GLM - Qwen Cloud

Quick start

glm-5.1 is the latest GLM model, supporting thinking and non-thinking modes via the enable_thinking parameter. Run the following code to call glm-5.1 in thinking mode. Prerequisites: obtain an API key and configure it as an environment variable. If calling via SDK, install the OpenAI or DashScope SDK.

OpenAI compatible
DashScope

enable_thinking is not a standard OpenAI parameter. In the Python SDK, pass it via extra_body; in the Node.js SDK, pass it as a top-level parameter.

Python
Node.js
curl

Example code

from openai import OpenAI
import os

client = OpenAI(
  api_key=os.getenv("DASHSCOPE_API_KEY"),
  base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)

messages = [{"role": "user", "content": "Who are you?"}]
completion = client.chat.completions.create(
  model="glm-5.1",
  messages=messages,
  extra_body={"enable_thinking": True},
  stream=True,
  stream_options={"include_usage": True},
)

reasoning_content = ""
answer_content = ""
is_answering = False
print("\n" + "=" * 20 + " Thinking " + "=" * 20 + "\n")

for chunk in completion:
  if not chunk.choices:
    print("\n" + "=" * 20 + " Token Usage " + "=" * 20 + "\n")
    print(chunk.usage)
    continue

  delta = chunk.choices[0].delta

  if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
    if not is_answering:
      print(delta.reasoning_content, end="", flush=True)
    reasoning_content += delta.reasoning_content

  if hasattr(delta, "content") and delta.content:
    if not is_answering:
      print("\n" + "=" * 20 + " Response " + "=" * 20 + "\n")
      is_answering = True
    print(delta.content, end="", flush=True)
    answer_content += delta.content

Example code

import OpenAI from "openai";
import process from 'process';

const openai = new OpenAI({
  apiKey: process.env.DASHSCOPE_API_KEY,
  baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
});

let reasoningContent = '';
let answerContent = '';
let isAnswering = false;

async function main() {
  const messages = [{ role: 'user', content: 'Who are you?' }];

  const stream = await openai.chat.completions.create({
    model: 'glm-5.1',
    messages,
    enable_thinking: true,
    stream: true,
    stream_options: { include_usage: true },
  });

  console.log('\n' + '='.repeat(20) + ' Thinking ' + '='.repeat(20) + '\n');

  for await (const chunk of stream) {
    if (!chunk.choices?.length) {
      console.log('\n' + '='.repeat(20) + ' Token Usage ' + '='.repeat(20) + '\n');
      console.log(chunk.usage);
      continue;
    }

    const delta = chunk.choices[0].delta;

    if (delta.reasoning_content !== undefined && delta.reasoning_content !== null) {
      if (!isAnswering) {
        process.stdout.write(delta.reasoning_content);
      }
      reasoningContent += delta.reasoning_content;
    }

    if (delta.content !== undefined && delta.content) {
      if (!isAnswering) {
        console.log('\n' + '='.repeat(20) + ' Response ' + '='.repeat(20) + '\n');
        isAnswering = true;
      }
      process.stdout.write(delta.content);
      answerContent += delta.content;
    }
  }
}

main();

Example code

curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
    "model": "glm-5.1",
    "messages": [
        {
            "role": "user",
            "content": "Who are you?"
        }
    ],
    "stream": true,
    "stream_options": {
        "include_usage": true
    },
    "enable_thinking": true
}'

Python
Java
curl

Example code

import os
from dashscope import Generation

messages = [{"role": "user", "content": "Who are you?"}]

completion = Generation.call(
  api_key=os.getenv("DASHSCOPE_API_KEY"),
  model="glm-5.1",
  messages=messages,
  result_format="message",
  enable_thinking=True,
  stream=True,
  incremental_output=True,
)

reasoning_content = ""
answer_content = ""
is_answering = False

print("\n" + "=" * 20 + " Thinking " + "=" * 20 + "\n")

for chunk in completion:
  message = chunk.output.choices[0].message
  if "reasoning_content" in message:
    if not is_answering:
      print(message.reasoning_content, end="", flush=True)
    reasoning_content += message.reasoning_content

  if message.content:
    if not is_answering:
      print("\n" + "=" * 20 + " Response " + "=" * 20 + "\n")
      is_answering = True
    print(message.content, end="", flush=True)
    answer_content += message.content

print("\n" + "=" * 20 + " Token Usage " + "=" * 20 + "\n")
print(chunk.usage)

Example code

DashScope Java SDK version must be 2.19.4 or later.

import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import io.reactivex.Flowable;
import java.lang.System;
import java.util.Arrays;

public class Main {
  private static StringBuilder reasoningContent = new StringBuilder();
  private static StringBuilder finalContent = new StringBuilder();
  private static boolean isFirstPrint = true;
  private static void handleGenerationResult(GenerationResult message) {
    String reasoning = message.getOutput().getChoices().get(0).getMessage().getReasoningContent();
    String content = message.getOutput().getChoices().get(0).getMessage().getContent();
    if (reasoning != null && !reasoning.isEmpty()) {
      reasoningContent.append(reasoning);
      if (isFirstPrint) {
        System.out.println("==================== Thinking ====================");
        isFirstPrint = false;
      }
      System.out.print(reasoning);
    }
    if (content != null && !content.isEmpty()) {
      finalContent.append(content);
      if (!isFirstPrint) {
        System.out.println("\n==================== Response ====================");
        isFirstPrint = true;
      }
      System.out.print(content);
    }
  }
  private static GenerationParam buildGenerationParam(Message userMsg) {
    return GenerationParam.builder()
      .apiKey(System.getenv("DASHSCOPE_API_KEY"))
      .model("glm-5.1")
      .enableThinking(true)
      .incrementalOutput(true)
      .resultFormat("message")
      .messages(Arrays.asList(userMsg))
      .build();
  }
  public static void streamCallWithMessage(Generation gen, Message userMsg)
    throws NoApiKeyException, ApiException, InputRequiredException {
    GenerationParam param = buildGenerationParam(userMsg);
    Flowable<GenerationResult> result = gen.streamCall(param);
    result.blockingForEach(message -> handleGenerationResult(message));
  }
  public static void main(String[] args) {
    try {
      Generation gen = new Generation();
      Message userMsg = Message.builder().role(Role.USER.getValue()).content("Who are you?").build();
      streamCallWithMessage(gen, userMsg);
    } catch (ApiException | NoApiKeyException | InputRequiredException e) {
      System.err.println("An exception occurred: " + e.getMessage());
    }
  }
}

Example code

curl -X POST "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-H "X-DashScope-SSE: enable" \
-d '{
    "model": "glm-5.1",
    "input":{
        "messages":[
            {
                "role": "user",
                "content": "Who are you?"
            }
        ]
    },
    "parameters":{
        "enable_thinking": true,
        "incremental_output": true,
        "result_format": "message"
    }
}'

Streaming tool calling

glm-5.1 supports the tool_stream parameter (boolean, default false), effective only when stream is true. When enabled, function calling arguments are returned incrementally across multiple chunks rather than all at once.

stream	tool_stream	tool_call behavior
true	true	arguments returned incrementally across chunks
true	false (default)	arguments returned in a single chunk
false	true/false	tool_stream has no effect

OpenAI compatible
DashScope

Python
Node.js
curl

from openai import OpenAI
import os

client = OpenAI(
  api_key=os.getenv("DASHSCOPE_API_KEY"),
  base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
)

tools = [
  {
    "type": "function",
    "function": {
      "name": "get_weather",
      "description": "Get weather information for a city",
      "parameters": {
        "type": "object",
        "properties": {
          "city": {"type": "string", "description": "City name"}
        },
        "required": ["city"]
      }
    }
  }
]

messages = [{"role": "user", "content": "What's the weather like in Singapore?"}]

completion = client.chat.completions.create(
  model="glm-5.1",
  tools=tools,
  messages=messages,
  extra_body={"tool_stream": True},
  stream=True,
  stream_options={"include_usage": True},
)

for chunk in completion:
  if chunk.choices:
    delta = chunk.choices[0].delta
    if hasattr(delta, 'content') and delta.content:
      print(f"[content] {delta.content}")
    if hasattr(delta, 'tool_calls') and delta.tool_calls:
      for tc in delta.tool_calls:
        print(f"[tool_call] id={tc.id}, name={tc.function.name}, args={tc.function.arguments}")
    if chunk.choices[0].finish_reason:
      print(f"[finish_reason] {chunk.choices[0].finish_reason}")
  if not chunk.choices and chunk.usage:
    print(f"[usage] {chunk.usage}")

import OpenAI from "openai";
import process from 'process';

const openai = new OpenAI({
  apiKey: process.env.DASHSCOPE_API_KEY,
  baseURL: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1'
});

const tools = [
  {
    type: "function",
    function: {
      name: "get_weather",
      description: "Get weather information for a city",
      parameters: {
        type: "object",
        properties: {
          city: { type: "string", description: "City name" }
        },
        required: ["city"]
      }
    }
  }
];

async function main() {
  const stream = await openai.chat.completions.create({
    model: 'glm-5.1',
    messages: [{ role: 'user', content: "What's the weather like in Singapore?" }],
    tools: tools,
    tool_stream: true,
    stream: true,
    stream_options: { include_usage: true },
  });

  for await (const chunk of stream) {
    if (!chunk.choices?.length) {
      if (chunk.usage) {
        console.log(`[usage] ${JSON.stringify(chunk.usage)}`);
      }
      continue;
    }

    const delta = chunk.choices[0].delta;
    if (delta.content) {
      console.log(`[content] ${delta.content}`);
    }
    if (delta.tool_calls) {
      for (const tc of delta.tool_calls) {
        console.log(`[tool_call] id=${tc.id}, name=${tc.function.name}, args=${tc.function.arguments}`);
      }
    }
    if (chunk.choices[0].finish_reason) {
      console.log(`[finish_reason] ${chunk.choices[0].finish_reason}`);
    }
  }
}

main();

curl -X POST https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
    "model": "glm-5.1",
    "messages": [
        {
            "role": "user",
            "content": "What is the weather like in Singapore?"
        }
    ],
    "tools": [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get weather information for a city",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "city": {"type": "string", "description": "City name"}
                    },
                    "required": ["city"]
                }
            }
        }
    ],
    "stream": true,
    "stream_options": {"include_usage": true},
    "tool_stream": true
}'

Python
curl

import os
from dashscope import Generation

tools = [
  {
    "type": "function",
    "function": {
      "name": "get_weather",
      "description": "Get weather information for a city",
      "parameters": {
        "type": "object",
        "properties": {
          "city": {"type": "string", "description": "City name"}
        },
        "required": ["city"]
      }
    }
  }
]

messages = [{"role": "user", "content": "What's the weather like in Singapore?"}]

completion = Generation.call(
  api_key=os.getenv("DASHSCOPE_API_KEY"),
  model="glm-5.1",
  messages=messages,
  tools=tools,
  result_format="message",
  stream=True,
  tool_stream=True,
  incremental_output=True,
)

for chunk in completion:
  msg = chunk.output.choices[0].message
  if msg.content:
    print(f"[content] {msg.content}")
  if "tool_calls" in msg and msg.tool_calls:
    for tc in msg.tool_calls:
      fn = tc.get("function", {})
      print(f"[tool_call] id={tc.get('id','')}, name={fn.get('name','')}, args={fn.get('arguments','')}")
  finish = chunk.output.choices[0].get("finish_reason", "")
  if finish and finish != "null":
    print(f"[finish_reason] {finish}")

curl -X POST "https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/text-generation/generation" \
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
-H "Content-Type: application/json" \
-H "X-DashScope-SSE: enable" \
-d '{
    "model": "glm-5.1",
    "input": {
        "messages": [
            {
                "role": "user",
                "content": "What is the weather like in Singapore?"
            }
        ]
    },
    "parameters": {
        "tools": [
            {
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "description": "Get weather information for a city",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "city": {"type": "string", "description": "City name"}
                        },
                        "required": ["city"]
                    }
                }
            }
        ],
        "tool_stream": true,
        "incremental_output": true,
        "result_format": "message"
    }
}'

Other features

Model	Multi-turn	Function calling	Web search	Context cache
glm-5.1	✓	✓	✓ (non-thinking mode only)	✓ (explicit and implicit)

Parameter defaults

Model	enable_thinking	temperature	top_p	top_k	repetition_penalty
glm-5.1	true	1.0	0.95	20	1.0

​Quick start

​Streaming tool calling

​Other features

​Parameter defaults

Quick start

Streaming tool calling

Other features

Parameter defaults