DeepSeek - Qwen Cloud

This guide shows how to call DeepSeek models via the OpenAI-compatible API or DashScope SDK.

The models deepseek-v3, deepseek-v3.1, deepseek-v3.2, deepseek-v3.2-exp, deepseek-r1, deepseek-r1-0528, and deepseek-r1-distill-qwen-7b/14b/32b will be deprecated on July 9, 2026. Migrate to qwen3.7-plus, qwen3.7-max, or qwen3.6-flash.

Quick start

deepseek-v4-pro is the latest model in the DeepSeek series and delivers top-tier performance across coding, math, and general tasks. You can use the enable_thinking parameter to switch between thinking and non-thinking modes. The following example calls deepseek-v4-pro in thinking mode. Before you begin, get an API key and set it as an environment variable. If you call the model through an SDK, install the OpenAI or DashScope SDK.

OpenAI compatible
DashScope

The enable_thinking parameter is not part of the standard OpenAI API. In the OpenAI Python SDK, pass it through extra_body. In the Node.js SDK, pass it as a top-level parameter. The reasoning_effort parameter is a standard OpenAI parameter that you can pass directly as a top-level parameter.

Python
Node.js
curl

Example code

from openai import OpenAI
import os

client = OpenAI(
  api_key=os.getenv("DASHSCOPE_API_KEY"),
  base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)

messages = [{"role": "user", "content": "Who are you?"}]
completion = client.chat.completions.create(
  model="deepseek-v4-pro",
  messages=messages,
  extra_body={"enable_thinking": True},
  stream=True,
  stream_options={"include_usage": True},
)

reasoning_content = ""
answer_content = ""
is_answering = False
print("\n" + "=" * 20 + "Thinking process" + "=" * 20 + "\n")

for chunk in completion:
  if not chunk.choices:
    print("\n" + "=" * 20 + "Token usage" + "=" * 20 + "\n")
    print(chunk.usage)
    continue

  delta = chunk.choices[0].delta

  if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
    if not is_answering:
      print(delta.reasoning_content, end="", flush=True)
    reasoning_content += delta.reasoning_content

  if hasattr(delta, "content") and delta.content:
    if not is_answering:
      print("\n" + "=" * 20 + "Full response" + "=" * 20 + "\n")
      is_answering = True
    print(delta.content, end="", flush=True)
    answer_content += delta.content

Example code

import OpenAI from "openai";
import process from 'process';

const openai = new OpenAI({
  apiKey: process.env.DASHSCOPE_API_KEY,
  baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
});

let reasoningContent = '';
let answerContent = '';
let isAnswering = false;

async function main() {
  const messages = [{ role: 'user', content: 'Who are you?' }];

  const stream = await openai.chat.completions.create({
    model: 'deepseek-v4-pro',
    messages,
    enable_thinking: true,
    stream: true,
    stream_options: { include_usage: true },
  });

  console.log('\n' + '='.repeat(20) + 'Thinking process' + '='.repeat(20) + '\n');

  for await (const chunk of stream) {
    if (!chunk.choices?.length) {
      console.log('\n' + '='.repeat(20) + 'Token usage' + '='.repeat(20) + '\n');
      console.log(chunk.usage);
      continue;
    }

    const delta = chunk.choices[0].delta;

    if (delta.reasoning_content !== undefined && delta.reasoning_content !== null) {
      if (!isAnswering) {
        process.stdout.write(delta.reasoning_content);
      }
      reasoningContent += delta.reasoning_content;
    }

    if (delta.content !== undefined && delta.content) {
      if (!isAnswering) {
        console.log('\n' + '='.repeat(20) + 'Full response' + '='.repeat(20) + '\n');
        isAnswering = true;
      }
      process.stdout.write(delta.content);
      answerContent += delta.content;
    }
  }
}

main();

Example code

curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
    "model": "deepseek-v4-pro",
    "messages": [
        {
            "role": "user",
            "content": "Who are you?"
        }
    ],
    "stream": true,
    "stream_options": {
        "include_usage": true
    },
    "enable_thinking": true
}'

Python
Java
curl

Example code

import os
from dashscope import Generation

messages = [{"role": "user", "content": "Who are you?"}]

completion = Generation.call(
  api_key=os.getenv("DASHSCOPE_API_KEY"),
  model="deepseek-v4-pro",
  messages=messages,
  result_format="message",
  enable_thinking=True,
  stream=True,
  incremental_output=True,
)

reasoning_content = ""
answer_content = ""
is_answering = False

print("\n" + "=" * 20 + "Thinking process" + "=" * 20 + "\n")

for chunk in completion:
  message = chunk.output.choices[0].message
  if "reasoning_content" in message:
    if not is_answering:
      print(message.reasoning_content, end="", flush=True)
    reasoning_content += message.reasoning_content

  if message.content:
    if not is_answering:
      print("\n" + "=" * 20 + "Full response" + "=" * 20 + "\n")
      is_answering = True
    print(message.content, end="", flush=True)
    answer_content += message.content

print("\n" + "=" * 20 + "Token usage" + "=" * 20 + "\n")
print(chunk.usage)

Example code

DashScope Java SDK version must be 2.19.4 or later.

import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import io.reactivex.Flowable;
import java.lang.System;
import java.util.Arrays;

public class Main {
  private static StringBuilder reasoningContent = new StringBuilder();
  private static StringBuilder finalContent = new StringBuilder();
  private static boolean isFirstPrint = true;
  private static void handleGenerationResult(GenerationResult message) {
    String reasoning = message.getOutput().getChoices().get(0).getMessage().getReasoningContent();
    String content = message.getOutput().getChoices().get(0).getMessage().getContent();
    if (reasoning != null && !reasoning.isEmpty()) {
      reasoningContent.append(reasoning);
      if (isFirstPrint) {
        System.out.println("====================Thinking process====================");
        isFirstPrint = false;
      }
      System.out.print(reasoning);
    }
    if (content != null && !content.isEmpty()) {
      finalContent.append(content);
      if (!isFirstPrint) {
        System.out.println("\n====================Full response====================");
        isFirstPrint = true;
      }
      System.out.print(content);
    }
  }
  private static GenerationParam buildGenerationParam(Message userMsg) {
    return GenerationParam.builder()
      .apiKey(System.getenv("DASHSCOPE_API_KEY"))
      .model("deepseek-v4-pro")
      .enableThinking(true)
      .incrementalOutput(true)
      .resultFormat("message")
      .messages(Arrays.asList(userMsg))
      .build();
  }
  public static void streamCallWithMessage(Generation gen, Message userMsg)
    throws NoApiKeyException, ApiException, InputRequiredException {
    GenerationParam param = buildGenerationParam(userMsg);
    Flowable<GenerationResult> result = gen.streamCall(param);
    result.blockingForEach(message -> handleGenerationResult(message));
  }
  public static void main(String[] args) {
    try {
      Generation gen = new Generation();
      Message userMsg = Message.builder().role(Role.USER.getValue()).content("Who are you?").build();
      streamCallWithMessage(gen, userMsg);
    } catch (ApiException | NoApiKeyException | InputRequiredException e) {
      System.err.println("An exception occurred: " + e.getMessage());
    }
  }
}

Example code

curl -X POST "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-H "X-DashScope-SSE: enable" \
-d '{
    "model": "deepseek-v4-pro",
    "input":{
        "messages":[
            {
                "role": "user",
                "content": "Who are you?"
            }
        ]
    },
    "parameters":{
        "enable_thinking": true,
        "incremental_output": true,
        "result_format": "message"
    }
}'

Reasoning effort

deepseek-v4-pro and deepseek-v4-flash have thinking mode enabled by default. You can use the reasoning_effort parameter to control reasoning intensity. Valid values: high and max. The default value is high.

If you set this parameter to low or medium, it is mapped to high. If you set it to xhigh, it is mapped to max.

OpenAI compatible
DashScope

Python
Node.js
curl

from openai import OpenAI
import os

client = OpenAI(
  api_key=os.getenv("DASHSCOPE_API_KEY"),
  base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)

completion = client.chat.completions.create(
  model="deepseek-v4-pro",
  messages=[{"role": "user", "content": "Which is larger, 9.9 or 9.11?"}],
  reasoning_effort="high",
)
print(completion.choices[0].message.content)

import OpenAI from "openai";

const openai = new OpenAI({
  apiKey: process.env.DASHSCOPE_API_KEY,
  baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
});

const completion = await openai.chat.completions.create({
  model: "deepseek-v4-pro",
  messages: [{ role: "user", content: "Which is larger, 9.9 or 9.11?" }],
  reasoning_effort: "high",
});
console.log(completion.choices[0].message.content);

curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
    "model": "deepseek-v4-pro",
    "messages": [{"role": "user", "content": "Which is larger, 9.9 or 9.11?"}],
    "reasoning_effort": "high"
}'

import os
from dashscope import Generation

response = Generation.call(
  api_key=os.getenv("DASHSCOPE_API_KEY"),
  model="deepseek-v4-pro",
  messages=[{"role": "user", "content": "Which is larger, 9.9 or 9.11?"}],
  reasoning_effort="high",
  result_format="message",
)
print(response.output.choices[0].message.content)

Other features

Model	Multi-turn	Function calling	Web search	Context cache	Structured output
deepseek-v4-pro	✓	✓	✓	✓	—
deepseek-v4-flash	✓	✓	✓	✓	—
deepseek-v3.2	✓	✓	✓	✓	—

Parameter defaults

Model	temperature	top_p	repetition_penalty	presence_penalty	max_tokens	thinking_budget
deepseek-v4-pro	1.0	1.0	-	-	393,216 shared	393,216 shared
deepseek-v4-flash	1.0	1.0	-	-	393,216 shared	393,216 shared
deepseek-v3.2	1.0	0.95	-	-	65,536	32,768

A hyphen (-) indicates that the parameter is not supported.
The deepseek-r1, deepseek-r1-0528, and distilled models do not support overriding their default parameter values.
For parameter descriptions, see the OpenAI-compatible Chat API.

​Quick start

​Reasoning effort

​Other features

​Parameter defaults

Quick start

Reasoning effort

Other features

Parameter defaults