Call GLM models through the OpenAI-compatible API or DashScope SDK on Qwen Cloud.
Quick start
glm-5.1 is the latest GLM model, supporting thinking and non-thinking modes via theenable_thinking parameter. Run the following code to call glm-5.1 in thinking mode.
Prerequisites: obtain an API key and configure it as an environment variable. If calling via SDK, install the OpenAI or DashScope SDK.
- OpenAI compatible
- DashScope
enable_thinking is not a standard OpenAI parameter. In the Python SDK, pass it via extra_body; in the Node.js SDK, pass it as a top-level parameter.- Python
- Node.js
- curl
Example code
Copy
from openai import OpenAI
import os
client = OpenAI(
api_key=os.getenv("DASHSCOPE_API_KEY"),
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
messages = [{"role": "user", "content": "Who are you?"}]
completion = client.chat.completions.create(
model="glm-5.1",
messages=messages,
extra_body={"enable_thinking": True},
stream=True,
stream_options={"include_usage": True},
)
reasoning_content = ""
answer_content = ""
is_answering = False
print("\n" + "=" * 20 + " Thinking " + "=" * 20 + "\n")
for chunk in completion:
if not chunk.choices:
print("\n" + "=" * 20 + " Token Usage " + "=" * 20 + "\n")
print(chunk.usage)
continue
delta = chunk.choices[0].delta
if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
if not is_answering:
print(delta.reasoning_content, end="", flush=True)
reasoning_content += delta.reasoning_content
if hasattr(delta, "content") and delta.content:
if not is_answering:
print("\n" + "=" * 20 + " Response " + "=" * 20 + "\n")
is_answering = True
print(delta.content, end="", flush=True)
answer_content += delta.content
Example code
Copy
import OpenAI from "openai";
import process from 'process';
const openai = new OpenAI({
apiKey: process.env.DASHSCOPE_API_KEY,
baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
});
let reasoningContent = '';
let answerContent = '';
let isAnswering = false;
async function main() {
const messages = [{ role: 'user', content: 'Who are you?' }];
const stream = await openai.chat.completions.create({
model: 'glm-5.1',
messages,
enable_thinking: true,
stream: true,
stream_options: { include_usage: true },
});
console.log('\n' + '='.repeat(20) + ' Thinking ' + '='.repeat(20) + '\n');
for await (const chunk of stream) {
if (!chunk.choices?.length) {
console.log('\n' + '='.repeat(20) + ' Token Usage ' + '='.repeat(20) + '\n');
console.log(chunk.usage);
continue;
}
const delta = chunk.choices[0].delta;
if (delta.reasoning_content !== undefined && delta.reasoning_content !== null) {
if (!isAnswering) {
process.stdout.write(delta.reasoning_content);
}
reasoningContent += delta.reasoning_content;
}
if (delta.content !== undefined && delta.content) {
if (!isAnswering) {
console.log('\n' + '='.repeat(20) + ' Response ' + '='.repeat(20) + '\n');
isAnswering = true;
}
process.stdout.write(delta.content);
answerContent += delta.content;
}
}
}
main();
Example code
Copy
curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "glm-5.1",
"messages": [
{
"role": "user",
"content": "Who are you?"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"enable_thinking": true
}'
- Python
- Java
- curl
Example code
Copy
import os
from dashscope import Generation
messages = [{"role": "user", "content": "Who are you?"}]
completion = Generation.call(
api_key=os.getenv("DASHSCOPE_API_KEY"),
model="glm-5.1",
messages=messages,
result_format="message",
enable_thinking=True,
stream=True,
incremental_output=True,
)
reasoning_content = ""
answer_content = ""
is_answering = False
print("\n" + "=" * 20 + " Thinking " + "=" * 20 + "\n")
for chunk in completion:
message = chunk.output.choices[0].message
if "reasoning_content" in message:
if not is_answering:
print(message.reasoning_content, end="", flush=True)
reasoning_content += message.reasoning_content
if message.content:
if not is_answering:
print("\n" + "=" * 20 + " Response " + "=" * 20 + "\n")
is_answering = True
print(message.content, end="", flush=True)
answer_content += message.content
print("\n" + "=" * 20 + " Token Usage " + "=" * 20 + "\n")
print(chunk.usage)
Example code
DashScope Java SDK version must be 2.19.4 or later.
Copy
import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import io.reactivex.Flowable;
import java.lang.System;
import java.util.Arrays;
public class Main {
private static StringBuilder reasoningContent = new StringBuilder();
private static StringBuilder finalContent = new StringBuilder();
private static boolean isFirstPrint = true;
private static void handleGenerationResult(GenerationResult message) {
String reasoning = message.getOutput().getChoices().get(0).getMessage().getReasoningContent();
String content = message.getOutput().getChoices().get(0).getMessage().getContent();
if (reasoning != null && !reasoning.isEmpty()) {
reasoningContent.append(reasoning);
if (isFirstPrint) {
System.out.println("==================== Thinking ====================");
isFirstPrint = false;
}
System.out.print(reasoning);
}
if (content != null && !content.isEmpty()) {
finalContent.append(content);
if (!isFirstPrint) {
System.out.println("\n==================== Response ====================");
isFirstPrint = true;
}
System.out.print(content);
}
}
private static GenerationParam buildGenerationParam(Message userMsg) {
return GenerationParam.builder()
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("glm-5.1")
.enableThinking(true)
.incrementalOutput(true)
.resultFormat("message")
.messages(Arrays.asList(userMsg))
.build();
}
public static void streamCallWithMessage(Generation gen, Message userMsg)
throws NoApiKeyException, ApiException, InputRequiredException {
GenerationParam param = buildGenerationParam(userMsg);
Flowable<GenerationResult> result = gen.streamCall(param);
result.blockingForEach(message -> handleGenerationResult(message));
}
public static void main(String[] args) {
try {
Generation gen = new Generation();
Message userMsg = Message.builder().role(Role.USER.getValue()).content("Who are you?").build();
streamCallWithMessage(gen, userMsg);
} catch (ApiException | NoApiKeyException | InputRequiredException e) {
System.err.println("An exception occurred: " + e.getMessage());
}
}
}
Example code
Copy
curl -X POST "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-H "X-DashScope-SSE: enable" \
-d '{
"model": "glm-5.1",
"input":{
"messages":[
{
"role": "user",
"content": "Who are you?"
}
]
},
"parameters":{
"enable_thinking": true,
"incremental_output": true,
"result_format": "message"
}
}'
Streaming tool calling
glm-5.1 supports thetool_stream parameter (boolean, default false), effective only when stream is true. When enabled, function calling arguments are returned incrementally across multiple chunks rather than all at once.
| stream | tool_stream | tool_call behavior |
|---|---|---|
| true | true | arguments returned incrementally across chunks |
| true | false (default) | arguments returned in a single chunk |
| false | true/false | tool_stream has no effect |
- OpenAI compatible
- DashScope
- Python
- Node.js
- curl
Copy
from openai import OpenAI
import os
client = OpenAI(
api_key=os.getenv("DASHSCOPE_API_KEY"),
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather information for a city",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string", "description": "City name"}
},
"required": ["city"]
}
}
}
]
messages = [{"role": "user", "content": "What's the weather like in Singapore?"}]
completion = client.chat.completions.create(
model="glm-5.1",
tools=tools,
messages=messages,
extra_body={"tool_stream": True},
stream=True,
stream_options={"include_usage": True},
)
for chunk in completion:
if chunk.choices:
delta = chunk.choices[0].delta
if hasattr(delta, 'content') and delta.content:
print(f"[content] {delta.content}")
if hasattr(delta, 'tool_calls') and delta.tool_calls:
for tc in delta.tool_calls:
print(f"[tool_call] id={tc.id}, name={tc.function.name}, args={tc.function.arguments}")
if chunk.choices[0].finish_reason:
print(f"[finish_reason] {chunk.choices[0].finish_reason}")
if not chunk.choices and chunk.usage:
print(f"[usage] {chunk.usage}")
Copy
import OpenAI from "openai";
import process from 'process';
const openai = new OpenAI({
apiKey: process.env.DASHSCOPE_API_KEY,
baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
});
const tools = [
{
type: "function",
function: {
name: "get_weather",
description: "Get weather information for a city",
parameters: {
type: "object",
properties: {
city: { type: "string", description: "City name" }
},
required: ["city"]
}
}
}
];
async function main() {
const stream = await openai.chat.completions.create({
model: 'glm-5.1',
messages: [{ role: 'user', content: "What's the weather like in Singapore?" }],
tools: tools,
tool_stream: true,
stream: true,
stream_options: { include_usage: true },
});
for await (const chunk of stream) {
if (!chunk.choices?.length) {
if (chunk.usage) {
console.log(`[usage] ${JSON.stringify(chunk.usage)}`);
}
continue;
}
const delta = chunk.choices[0].delta;
if (delta.content) {
console.log(`[content] ${delta.content}`);
}
if (delta.tool_calls) {
for (const tc of delta.tool_calls) {
console.log(`[tool_call] id=${tc.id}, name=${tc.function.name}, args=${tc.function.arguments}`);
}
}
if (chunk.choices[0].finish_reason) {
console.log(`[finish_reason] ${chunk.choices[0].finish_reason}`);
}
}
}
main();
Copy
curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "glm-5.1",
"messages": [
{
"role": "user",
"content": "What is the weather like in Singapore?"
}
],
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather information for a city",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string", "description": "City name"}
},
"required": ["city"]
}
}
}
],
"stream": true,
"stream_options": {"include_usage": true},
"tool_stream": true
}'
- Python
- curl
Copy
import os
from dashscope import Generation
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather information for a city",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string", "description": "City name"}
},
"required": ["city"]
}
}
}
]
messages = [{"role": "user", "content": "What's the weather like in Singapore?"}]
completion = Generation.call(
api_key=os.getenv("DASHSCOPE_API_KEY"),
model="glm-5.1",
messages=messages,
tools=tools,
result_format="message",
stream=True,
tool_stream=True,
incremental_output=True,
)
for chunk in completion:
msg = chunk.output.choices[0].message
if msg.content:
print(f"[content] {msg.content}")
if "tool_calls" in msg and msg.tool_calls:
for tc in msg.tool_calls:
fn = tc.get("function", {})
print(f"[tool_call] id={tc.get('id','')}, name={fn.get('name','')}, args={fn.get('arguments','')}")
finish = chunk.output.choices[0].get("finish_reason", "")
if finish and finish != "null":
print(f"[finish_reason] {finish}")
Copy
curl -X POST "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-H "X-DashScope-SSE: enable" \
-d '{
"model": "glm-5.1",
"input": {
"messages": [
{
"role": "user",
"content": "What is the weather like in Singapore?"
}
]
},
"parameters": {
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather information for a city",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string", "description": "City name"}
},
"required": ["city"]
}
}
}
],
"tool_stream": true,
"incremental_output": true,
"result_format": "message"
}
}'
Other features
| Model | Multi-turn | Function calling | Web search | Context cache |
|---|---|---|---|---|
| glm-5.1 | ✓ | ✓ | ✓ (non-thinking mode only) | ✓ (explicit and implicit) |
Parameter defaults
| Model | enable_thinking | temperature | top_p | top_k | repetition_penalty |
|---|---|---|---|---|---|
| glm-5.1 | true | 1.0 | 0.95 | 20 | 1.0 |