This guide shows how to call DeepSeek models via the OpenAI-compatible API or DashScope SDK.
The models deepseek-v3, deepseek-v3.1, deepseek-v3.2, deepseek-v3.2-exp, deepseek-r1, deepseek-r1-0528, and deepseek-r1-distill-qwen-7b/14b/32b will be deprecated on July 9, 2026. Migrate to qwen3.7-plus, qwen3.7-max, or qwen3.6-flash.
Quick start
deepseek-v4-pro is the latest model in the DeepSeek series and delivers top-tier performance across coding, math, and general tasks. You can use the enable_thinking parameter to switch between thinking and non-thinking modes. The following example calls deepseek-v4-pro in thinking mode.
Before you begin, get an API key and set it as an environment variable. If you call the model through an SDK, install the OpenAI or DashScope SDK.
The enable_thinking parameter is not part of the standard OpenAI API. In the OpenAI Python SDK, pass it through extra_body. In the Node.js SDK, pass it as a top-level parameter. The reasoning_effort parameter is a standard OpenAI parameter that you can pass directly as a top-level parameter.
Example codefrom openai import OpenAI
import os
client = OpenAI(
api_key=os.getenv("DASHSCOPE_API_KEY"),
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
messages = [{"role": "user", "content": "Who are you?"}]
completion = client.chat.completions.create(
model="deepseek-v4-pro",
messages=messages,
extra_body={"enable_thinking": True},
stream=True,
stream_options={"include_usage": True},
)
reasoning_content = ""
answer_content = ""
is_answering = False
print("\n" + "=" * 20 + "Thinking process" + "=" * 20 + "\n")
for chunk in completion:
if not chunk.choices:
print("\n" + "=" * 20 + "Token usage" + "=" * 20 + "\n")
print(chunk.usage)
continue
delta = chunk.choices[0].delta
if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
if not is_answering:
print(delta.reasoning_content, end="", flush=True)
reasoning_content += delta.reasoning_content
if hasattr(delta, "content") and delta.content:
if not is_answering:
print("\n" + "=" * 20 + "Full response" + "=" * 20 + "\n")
is_answering = True
print(delta.content, end="", flush=True)
answer_content += delta.content
Example codeimport OpenAI from "openai";
import process from 'process';
const openai = new OpenAI({
apiKey: process.env.DASHSCOPE_API_KEY,
baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
});
let reasoningContent = '';
let answerContent = '';
let isAnswering = false;
async function main() {
const messages = [{ role: 'user', content: 'Who are you?' }];
const stream = await openai.chat.completions.create({
model: 'deepseek-v4-pro',
messages,
enable_thinking: true,
stream: true,
stream_options: { include_usage: true },
});
console.log('\n' + '='.repeat(20) + 'Thinking process' + '='.repeat(20) + '\n');
for await (const chunk of stream) {
if (!chunk.choices?.length) {
console.log('\n' + '='.repeat(20) + 'Token usage' + '='.repeat(20) + '\n');
console.log(chunk.usage);
continue;
}
const delta = chunk.choices[0].delta;
if (delta.reasoning_content !== undefined && delta.reasoning_content !== null) {
if (!isAnswering) {
process.stdout.write(delta.reasoning_content);
}
reasoningContent += delta.reasoning_content;
}
if (delta.content !== undefined && delta.content) {
if (!isAnswering) {
console.log('\n' + '='.repeat(20) + 'Full response' + '='.repeat(20) + '\n');
isAnswering = true;
}
process.stdout.write(delta.content);
answerContent += delta.content;
}
}
}
main();
Example codecurl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "deepseek-v4-pro",
"messages": [
{
"role": "user",
"content": "Who are you?"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"enable_thinking": true
}'
Example codeimport os
from dashscope import Generation
messages = [{"role": "user", "content": "Who are you?"}]
completion = Generation.call(
api_key=os.getenv("DASHSCOPE_API_KEY"),
model="deepseek-v4-pro",
messages=messages,
result_format="message",
enable_thinking=True,
stream=True,
incremental_output=True,
)
reasoning_content = ""
answer_content = ""
is_answering = False
print("\n" + "=" * 20 + "Thinking process" + "=" * 20 + "\n")
for chunk in completion:
message = chunk.output.choices[0].message
if "reasoning_content" in message:
if not is_answering:
print(message.reasoning_content, end="", flush=True)
reasoning_content += message.reasoning_content
if message.content:
if not is_answering:
print("\n" + "=" * 20 + "Full response" + "=" * 20 + "\n")
is_answering = True
print(message.content, end="", flush=True)
answer_content += message.content
print("\n" + "=" * 20 + "Token usage" + "=" * 20 + "\n")
print(chunk.usage)
Example codeDashScope Java SDK version must be 2.19.4 or later.
import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import io.reactivex.Flowable;
import java.lang.System;
import java.util.Arrays;
public class Main {
private static StringBuilder reasoningContent = new StringBuilder();
private static StringBuilder finalContent = new StringBuilder();
private static boolean isFirstPrint = true;
private static void handleGenerationResult(GenerationResult message) {
String reasoning = message.getOutput().getChoices().get(0).getMessage().getReasoningContent();
String content = message.getOutput().getChoices().get(0).getMessage().getContent();
if (reasoning != null && !reasoning.isEmpty()) {
reasoningContent.append(reasoning);
if (isFirstPrint) {
System.out.println("====================Thinking process====================");
isFirstPrint = false;
}
System.out.print(reasoning);
}
if (content != null && !content.isEmpty()) {
finalContent.append(content);
if (!isFirstPrint) {
System.out.println("\n====================Full response====================");
isFirstPrint = true;
}
System.out.print(content);
}
}
private static GenerationParam buildGenerationParam(Message userMsg) {
return GenerationParam.builder()
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("deepseek-v4-pro")
.enableThinking(true)
.incrementalOutput(true)
.resultFormat("message")
.messages(Arrays.asList(userMsg))
.build();
}
public static void streamCallWithMessage(Generation gen, Message userMsg)
throws NoApiKeyException, ApiException, InputRequiredException {
GenerationParam param = buildGenerationParam(userMsg);
Flowable<GenerationResult> result = gen.streamCall(param);
result.blockingForEach(message -> handleGenerationResult(message));
}
public static void main(String[] args) {
try {
Generation gen = new Generation();
Message userMsg = Message.builder().role(Role.USER.getValue()).content("Who are you?").build();
streamCallWithMessage(gen, userMsg);
} catch (ApiException | NoApiKeyException | InputRequiredException e) {
System.err.println("An exception occurred: " + e.getMessage());
}
}
}
Example codecurl -X POST "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-H "X-DashScope-SSE: enable" \
-d '{
"model": "deepseek-v4-pro",
"input":{
"messages":[
{
"role": "user",
"content": "Who are you?"
}
]
},
"parameters":{
"enable_thinking": true,
"incremental_output": true,
"result_format": "message"
}
}'
Reasoning effort
deepseek-v4-pro and deepseek-v4-flash have thinking mode enabled by default. You can use the reasoning_effort parameter to control reasoning intensity. Valid values: high and max. The default value is high.
If you set this parameter to low or medium, it is mapped to high. If you set it to xhigh, it is mapped to max.
from openai import OpenAI
import os
client = OpenAI(
api_key=os.getenv("DASHSCOPE_API_KEY"),
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
completion = client.chat.completions.create(
model="deepseek-v4-pro",
messages=[{"role": "user", "content": "Which is larger, 9.9 or 9.11?"}],
reasoning_effort="high",
)
print(completion.choices[0].message.content)
import OpenAI from "openai";
const openai = new OpenAI({
apiKey: process.env.DASHSCOPE_API_KEY,
baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
});
const completion = await openai.chat.completions.create({
model: "deepseek-v4-pro",
messages: [{ role: "user", content: "Which is larger, 9.9 or 9.11?" }],
reasoning_effort: "high",
});
console.log(completion.choices[0].message.content);
curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "deepseek-v4-pro",
"messages": [{"role": "user", "content": "Which is larger, 9.9 or 9.11?"}],
"reasoning_effort": "high"
}'
import os
from dashscope import Generation
response = Generation.call(
api_key=os.getenv("DASHSCOPE_API_KEY"),
model="deepseek-v4-pro",
messages=[{"role": "user", "content": "Which is larger, 9.9 or 9.11?"}],
reasoning_effort="high",
result_format="message",
)
print(response.output.choices[0].message.content)
Other features
| Model | Multi-turn | Function calling | Web search | Context cache | Structured output |
|---|
| deepseek-v4-pro | ✓ | ✓ | ✓ | ✓ | — |
| deepseek-v4-flash | ✓ | ✓ | ✓ | ✓ | — |
| deepseek-v3.2 | ✓ | ✓ | ✓ | ✓ | — |
Parameter defaults
| Model | temperature | top_p | repetition_penalty | presence_penalty | max_tokens | thinking_budget |
|---|
| deepseek-v4-pro | 1.0 | 1.0 | - | - | 393,216 shared | 393,216 shared |
| deepseek-v4-flash | 1.0 | 1.0 | - | - | 393,216 shared | 393,216 shared |
| deepseek-v3.2 | 1.0 | 0.95 | - | - | 65,536 | 32,768 |
- A hyphen (-) indicates that the parameter is not supported.
- The deepseek-r1, deepseek-r1-0528, and distilled models do not support overriding their default parameter values.
- For parameter descriptions, see the OpenAI-compatible Chat API.