Skip to main content
Qwen-Omni-Realtime

Qwen-Omni server events

WebSocket server reference

Server events for the Qwen-Omni-Realtime API, including function calling events.

error

Server error message.
Example
{
  "event_id": "event_RoUu4T8yExPMI37GKwaOC",
  "type": "error",
  "error": {
  "type": "invalid_request_error",
  "code": "invalid_value",
  "message": "Invalid modalities: ['audio']. Supported combinations are: ['text'] and ['audio', 'text'].",
  "param": "session.modalities"
  }
}
string
body
Unique event identifier.
string
body
Always error.
object
body
Error details.

session.created

First event after connection. Contains the default session configuration.
Example
{
  "event_id": "event_RdvlSpbBb2ssyBjYrDHjt",
  "type": "session.created",
  "session": {
  "object": "realtime.session",
  "model": "qwen3-omni-flash-realtime",
  "modalities": [
      "text",
      "audio"
  ],
  "voice": "Cherry",
  "input_audio_format": "pcm",
  "output_audio_format": "pcm",
  "input_audio_transcription": {
      "model": "qwen3-asr-flash-realtime"
  },
  "turn_detection": {
      // The value can be server_vad or semantic_vad (only supported by qwen3.5-omni-realtime).
      "type": "server_vad",
      "threshold": 0.5,
      "prefix_padding_ms": 300,
      "silence_duration_ms": 800,
      "create_response": true,
      "interrupt_response": true
  },
  "enable_search": false,
  "search_options": {},
  "tools": [],
  "temperature": 0.8,
  "id": "sess_Ov7GOXoNXhNjlxXtOGKQS"
  }
}
string
body
Unique event identifier.
string
body
Always session.created.
object
body
Session configuration.

session.updated

Sent after a successful session.update request. On error, the server sends an error event instead.
Example
{
  "event_id": "event_X1HsXS4b4uptp6yo1LgKd",
  "type": "session.updated",
  "session": {
  "id": "sess_Aih6vAcY5Ddt6jwFx1tCa",
  "object": "realtime.session",
  "model": "qwen3-omni-flash-realtime",
  "modalities": [
      "text",
      "audio"
  ],
  "instructions": "You are Xiao Yun, a personal assistant. Answer user questions accurately and in a friendly manner. Always respond with a helpful attitude.",
  "voice": "Cherry",
  "input_audio_format": "pcm",
  "output_audio_format": "pcm",
  "input_audio_transcription": {
      "model": "qwen3-asr-flash-realtime"
  },
  "turn_detection": {
      // The value can be server_vad or semantic_vad (only supported by qwen3.5-omni-realtime).
      "type": "server_vad",
      "threshold": 0.1,
      "prefix_padding_ms": 500,
      "silence_duration_ms": 900,
      "create_response": true,
      "interrupt_response": true
  },
  "enable_search": true,
  "search_options": {
      "enable_source": true
  },
  "tools": [
      {
    "type": "function",
    "function": {
        "name": "get_current_weather",
        "description": "Useful for querying the weather in a specific city.",
        "parameters": {
      "type": "object",
      "properties": {
          "location": {"type": "string", "description": "The city name"}
      },
      "required": ["location"]
        }
    }
      }
  ],
  "temperature": 0.8,
  "max_response_output_token": "inf",
  "max_tokens": 16384,
  "repetition_penalty": 1.05,
  "presence_penalty": 0.0,
  "top_k": 50,
  "top_p": 1.0,
  "seed": -1
  }
}
string
body
Unique event identifier.
string
body
Always session.updated.
object
body
Session configuration.

input_audio_buffer.speech_started

Sent in VAD mode when speech starts in the audio buffer.
May also fire each time audio is added to the buffer before speech is detected.
Example
{
  "event_id": "event_Pvp8nEhsQuGCQbFJ9x58n",
  "type": "input_audio_buffer.speech_started",
  "audio_start_ms": 3647,
  "item_id": "item_YbAiGvK2H7YaS34o4R6Ba"
}
string
body
Unique event identifier.
string
body
Always input_audio_buffer.speech_started.
integer
body
Milliseconds from the start of audio input to the first detected speech.
string
body
User message item ID, created when speech stops. This item appends user input to the conversation history for inference.

input_audio_buffer.speech_stopped

Sent in VAD mode when speech ends in the audio buffer. The server also sends conversation.item.created to create the user message item.
Example
{
  "event_id": "event_UhQiqNVRsgUiq4KUS5Xb5",
  "type": "input_audio_buffer.speech_stopped",
  "audio_end_ms": 4453,
  "item_id": "item_YbAiGvK2H7YaS34o4R6Ba"
}
string
body
Unique event identifier.
string
body
Always input_audio_buffer.speech_stopped.
integer
body
Milliseconds from session start to speech end.
string
body
User message item ID (will be created).

input_audio_buffer.committed

Sent when the input audio buffer is committed.
  • In VAD mode, the buffer commits automatically when the user finishes speaking.
  • In manual mode, sent after the client sends input_audio_buffer.commit.
Example
{
  "event_id": "event_Iy6sUzL1nmdFgshFYxJEz",
  "type": "input_audio_buffer.committed",
  "item_id": "item_YbAiGvK2H7YaS34o4R6Ba"
}
string
body
Unique event identifier.
string
body
Always input_audio_buffer.committed.
string
body
User message item ID (will be created).

input_audio_buffer.cleared

Sent after the client sends input_audio_buffer.clear.
Example
{
  "event_id": "event_RoUu4T8yExPMI37GKwaOC",
  "type": "input_audio_buffer.cleared"
}
string
body
Unique event identifier.
string
body
Always input_audio_buffer.cleared.

conversation.item.created

Sent when a conversation item is created.
Example
{
  "event_id": "event_JEfkrr9gO3Ny7Xcv9bGVd",
  "type": "conversation.item.created",
  "item": {
  "id": "item_YbAiGvK2H7YaS34o4R6Ba",
  "object": "realtime.item",
  "type": "message",
  "status": "in_progress",
  "role": "assistant",
  "content": [
      {
    "type": "input_audio"
      }
  ]
  }
}
// Tool calling scenario
{
  "event_id": "event_S1hkaIQgcuQD8OEdOpGHQ",
  "type": "conversation.item.created",
  "item": {
  "id": "item_FEG9qJGNkPcdf4et3p7BV",
  "object": "realtime.item",
  "type": "function_call",
  "status": "in_progress",
  "call_id": "call_bc0a7fb7235840f69ecfe4",
  "name": "get_current_weather",
  "arguments": ""
  }
}
string
body
Unique event identifier.
string
body
Always conversation.item.created.
object
body
Conversation item.

conversation.item.input_audio_transcription.delta

When input audio transcription is enabled, this event is sent frequently while the user is speaking. It provides real-time intermediate transcription results. Concatenate text + stash to get the most complete sentence preview at any point in time.
Example
{
  "event_id": "event_C7jzoeSFuiwOZS6tR14yx",
  "type": "conversation.item.input_audio_transcription.delta",
  "item_id": "item_ThVYhLHOdeXb4bBSvzSFF",
  "content_index": 0,
  "text": "",
  "stash": "How is the weather today?",
  "language": "en",
  "emotion": "neutral",
  "obfuscation": "ABEXGYmxdmc97u"
}
string
body
Unique event identifier.
string
body
Always conversation.item.input_audio_transcription.delta.
string
body
The ID of the associated conversation item.
integer
body
The index of the content part that contains the audio.
string
body
The confirmed text prefix. This portion of the current sentence has been confirmed by the model and will not change.
string
body
The preliminary text suffix. This temporary draft follows the confirmed portion and may be revised by the model.
string
body
The detected language of the recognized audio.
string
body
The detected emotion of the recognized audio. Valid values: neutral, happy, sad, angry, surprised, disgusted, fearful.
Suppose the user says: "The weather is nice today, sunny and warm."
TimeUser speechtextstashDisplay (text + stash)
T1"The weather...""""The weather"The weather
T2"...is nice...""""The weather is nice"The weather is nice
T3"...today,""The weather"" is nice today,"The weather is nice today,
T4(brief pause)"The weather is nice today, """The weather is nice today,
T5"sunny...""The weather is nice today, ""sunny"The weather is nice today, sunny
T6"...and warm.""The weather is nice today, ""sunny and warm."The weather is nice today, sunny and warm.
T7(stops)--Use conversation.item.input_audio_transcription.completed as the final result.

conversation.item.input_audio_transcription.completed

Sent after audio is buffered and transcribed. Transcription uses a separate model (qwen3-asr-flash-realtime).
The transcribed text may differ from text processed by Qwen-Omni-Realtime. Treat it as a reference.
Example
{
  "event_id": "event_FrrZcxiDfTB9LD9p4pVng",
  "type": "conversation.item.input_audio_transcription.completed",
  "item_id": "item_YbAiGvK2H7YaS34o4R6Ba",
  "content_index": 0,
  "transcript": "Hello."
}
string
body
Unique event identifier.
string
body
Always conversation.item.input_audio_transcription.completed.
string
body
User message item ID.
integer
body
Fixed to 0.
string
body
Transcribed text.

conversation.item.input_audio_transcription.failed

Sent when input audio transcription fails (if enabled). Separate from the error event.
Example
{
  "type": "conversation.item.input_audio_transcription.failed",
  "item_id": "<item_id>",
  "content_index": 0,
  "error": {
  "code": "<code>",
  "message": "<message>",
  "param": "<param>"
  }
}
string
body
Unique event identifier.
string
body
Always conversation.item.input_audio_transcription.failed.
string
body
User message item ID.
integer
body
Fixed to 0.
object
body
Error details.

response.created

Sent when the model starts generating a response.
Example
{
  "event_id": "event_XuDavMzQN3KKepqGu3KRh",
  "type": "response.created",
  "response": {
  "id": "resp_HaVOPdbmX6vifiV5pAfJY",
  "object": "realtime.response",
  "conversation_id": "conv_FjJaccpnvwHNo9cPVuzGc",
  "status": "in_progress",
  "modalities": [
      "text",
      "audio"
  ],
  "voice": "Cherry",
  "output_audio_format": "pcm24",
  "output": []
  }
}
string
body
Unique event identifier.
string
body
Always response.created.
object
body
Response object.

response.done

Sent after response generation completes. The response object contains all output items except raw audio data.
Example
{
  "event_id": "event_CSaxRRYLvbrfexDXAEuDG",
  "type": "response.done",
  "response": {
  "id": "resp_HaVOPdbmX6vifiV5pAfJY",
  "object": "realtime.response",
  "conversation_id": "conv_FjJaccpnvwHNo9cPVuzGc",
  "status": "completed",
  "modalities": [
      "text",
      "audio"
  ],
  "voice": "Cherry",
  "output_audio_format": "pcm",
  "output": [
      {
    "id": "item_Ls6MtCUWO7LM4E59QziNv",
    "object": "realtime.item",
    "type": "message",
    "status": "completed",
    "role": "assistant",
    "content": [
          {
      "type": "audio",
      "transcript": "Hello! How can I help you?"
          }
    ]
      }
  ],
  "usage": {
      "total_tokens": 377,
      "input_tokens": 336,
      "output_tokens": 41,
      "input_tokens_details": {
    "text_tokens": 228,
    "audio_tokens": 108
      },
      "output_tokens_details": {
    "text_tokens": 9,
    "audio_tokens": 32
      },
      "plugins": {
    "search": {
        "count": 1,
        "strategy": "agent"
    }
      }
  }
  }
}
// Tool calling scenario
{
  "event_id": "event_T1EFAJp43X2DWtDRmxTtx",
  "type": "response.done",
  "response": {
  "id": "resp_TucN5QgymL5MA8vkJvFlS",
  "object": "realtime.response",
  "conversation_id": "conv_SEDZESRlefT8WvLSmEn6E",
  "status": "completed",
  "modalities": ["text", "audio"],
  "voice": "Ethan",
  "output_audio_format": "pcm16",
  "output": [
      {
    "id": "item_FEG9qJGNkPcdf4et3p7BV",
    "object": "realtime.item",
    "type": "function_call",
    "status": "completed",
    "call_id": "call_bc0a7fb7235840f69ecfe4",
    "name": "get_current_weather",
    "arguments": " {\"location\": \"Hangzhou\"}"
      }
  ],
  "usage": {
      "total_tokens": 567,
      "input_tokens": 524,
      "output_tokens": 43,
      "input_tokens_details": {
    "text_tokens": 487,
    "audio_tokens": 37
      },
      "output_tokens_details": {
    "text_tokens": 43
      }
  }
  }
}
string
body
Unique event identifier.
string
body
Always response.done.
object
body
Response object.

response.text.delta

Sent when the output modality is text-only and the model generates a text chunk.
Example
{
  "delta": "Hello",
  "event_id": "event_TH49MauuPmRo1RGaMSlP7",
  "type": "response.text.delta",
  "response_id": "resp_PrRSvPVpnCExdUOGHHLuP",
  "item_id": "item_L8IRm9kRXFpxoOjDqDC96",
  "output_index": 0,
  "content_index": 0
}
string
body
Unique event identifier.
string
body
Always response.text.delta.
string
body
Incremental text chunk.
string
body
Response ID.
string
body
Message item ID. Use this to associate items from the same message.
integer
body
Output item index. Fixed to 0.
integer
body
Content part index. Fixed to 0.

response.text.done

Sent when text-only output finishes generating.
Also sent when the response is interrupted, incomplete, or canceled.
Example
{
  "event_id": "event_B1lIeE2Nac33zn5V7h2mm",
  "type": "response.text.done",
  "response_id": "resp_B1lIdtjF4Noqpn5NOjznj",
  "item_id": "item_B1lIdJsAJlJiFs8ztWpJt",
  "output_index": 0,
  "content_index": 0,
  "text": "How can I assist you today?"
}
string
body
Unique event identifier.
string
body
Always response.text.done.
string
body
Response ID.
string
body
Message item ID.
integer
body
Output item index.
integer
body
Content part index.
string
body
Complete text output.

response.audio.delta

Sent when the output modality includes audio and the model generates an audio chunk.
Example
{
  "event_id": "event_B1osWMZBtrEQbiIwW0qHQ",
  "type": "response.audio.delta",
  "response_id": "resp_P79OOMs8LnrXVpiIHUCKR",
  "item_id": "item_OFaPGtzfWCPyGzxnuEX9i",
  "output_index": 0,
  "content_index": 0,
  "delta": "{base64 audio}"
}
string
body
Unique event identifier.
string
body
Always response.audio.delta.
string
body
Response ID.
string
body
Message item ID.
integer
body
Output item index.
integer
body
Content part index.
string
body
Base64-encoded audio chunk.

response.audio.done

Sent when audio output finishes generating.
Also sent when the response is interrupted, incomplete, or canceled.
Example
{
  "event_id": "event_Le1TDl7VfyHQxl47DtGxI",
  "type": "response.audio.done",
  "response_id": "resp_HaVOPdbmX6vifiV5pAfJY",
  "item_id": "item_Ls6MtCUWO7LM4E59QziNv",
  "output_index": 0,
  "content_index": 0
}
string
body
Unique event identifier.
string
body
Always response.audio.done.
string
body
Response ID.
string
body
Message item ID.
integer
body
Output item index.
integer
body
Content part index.

response.audio_transcript.delta

Sent when the output modality includes audio and the model generates a transcript chunk.
Example
{
  "event_id": "event_BksW7fOwnyavZdDxIzZYM",
  "type": "response.audio_transcript.delta",
  "response_id": "resp_HaVOPdbmX6vifiV5pAfJY",
  "item_id": "item_Ls6MtCUWO7LM4E59QziNv",
  "output_index": 0,
  "content_index": 0,
  "delta": "Is there anything"
}
string
body
Unique event identifier.
string
body
Always response.audio_transcript.delta.
string
body
Response ID.
string
body
Message item ID.
integer
body
Output item index.
integer
body
Content part index.
string
body
Incremental transcript text.

response.audio_transcript.done

Sent when the audio transcript finishes generating.
Example
{
  "event_id": "event_X49tL2WerT4WjxcmH16lS",
  "type": "response.audio_transcript.done",
  "response_id": "resp_HaVOPdbmX6vifiV5pAfJY",
  "item_id": "item_Ls6MtCUWO7LM4E59QziNv",
  "output_index": 0,
  "content_index": 0,
  "transcript": "Hello! Is there anything I can help you with?"
}
string
body
Unique event identifier.
string
body
Always response.audio_transcript.done.
string
body
Response ID.
string
body
Message item ID.
integer
body
Output item index.
integer
body
Content part index.
string
body
Complete transcript.

response.function_call_arguments.delta

When the model generates the argument string for a function call in a streaming manner, the server pushes this event for each new segment. Concatenate the delta fields in order. The complete content is provided in the subsequent response.function_call_arguments.done event.
Example
{
  "event_id": "event_SlKoJyEbPEqLq14DSM1u5",
  "type": "response.function_call_arguments.delta",
  "response_id": "resp_JnTOsWXlFhKcFohZbtfz6",
  "item_id": "item_Rhcms7CauTNsQprV5S4Hr",
  "output_index": 0,
  "call_id": "call_2be200f4cafe419b9530dd",
  "delta": " {\"location\": \"Beijing\"}"
}
string
body
Unique event identifier.
string
body
Always response.function_call_arguments.delta.
string
body
Response ID.
string
body
Message item ID.
integer
body
Output item index.
string
body
Unique ID for this function invocation. Consistent with the done event in the same turn.
string
body
New segment of the argument string. Concatenate segments in order.

response.function_call_arguments.done

Indicates that the function call arguments have been fully generated. The arguments field contains the complete argument string. After receiving this event, parse the arguments and call the local tool function. Use the complete arguments from this event, not the concatenated delta result.
Example
{
  "event_id": "event_X6suLyuL5agdH7r6koesM",
  "type": "response.function_call_arguments.done",
  "response_id": "resp_JnTOsWXlFhKcFohZbtfz6",
  "item_id": "item_Rhcms7CauTNsQprV5S4Hr",
  "output_index": 0,
  "name": "get_current_weather",
  "call_id": "call_2be200f4cafe419b9530dd",
  "arguments": " {\"location\": \"Beijing\"}"
}
string
body
Unique event identifier.
string
body
Always response.function_call_arguments.done.
string
body
Response ID.
string
body
Message item ID.
integer
body
Output item index.
string
body
Unique ID for this function invocation.
string
body
Name of the function that was called.
string
body
Complete arguments for the function invocation, typically a JSON string.

response.output_item.added

Sent when a new item is created during response generation. The item type can be message or function_call.
Example
{
  "event_id": "event_DsCO341DEVtiATtCB6BUY",
  "type": "response.output_item.added",
  "response_id": "resp_HaVOPdbmX6vifiV5pAfJY",
  "output_index": 0,
  "item": {
  "id": "item_Ls6MtCUWO7LM4E59QziNv",
  "object": "realtime.item",
  "type": "message",
  "status": "in_progress",
  "role": "assistant",
  "content": []
  }
}
// Tool calling scenario
{
  "event_id": "event_HXmKt5pGoiRtXx7Hq7zpN",
  "type": "response.output_item.added",
  "response_id": "resp_TucN5QgymL5MA8vkJvFlS",
  "output_index": 0,
  "item": {
  "id": "item_FEG9qJGNkPcdf4et3p7BV",
  "object": "realtime.item",
  "type": "function_call",
  "status": "in_progress",
  "call_id": "call_bc0a7fb7235840f69ecfe4",
  "name": "get_current_weather",
  "arguments": ""
  }
}
string
body
Unique event identifier.
string
body
Always response.output_item.added.
string
body
Response ID.
integer
body
Output item index.
object
body
Output item.

response.output_item.done

Sent when an output item is complete.
Example
{
  "event_id": "event_MEu5nlLw1LsOguHiehIP8",
  "type": "response.output_item.done",
  "response_id": "resp_HaVOPdbmX6vifiV5pAfJY",
  "output_index": 0,
  "item": {
  "id": "item_Ls6MtCUWO7LM4E59QziNv",
  "object": "realtime.item",
  "type": "message",
  "status": "completed",
  "role": "assistant",
  "content": [
      {
    "type": "audio",
    "text": "Hello! How can I help you?"
      }
  ]
  }
}
// Tool calling scenario
{
  "event_id": "event_FHspdfAnCyjuME3mmAwSY",
  "type": "response.output_item.done",
  "response_id": "resp_TucN5QgymL5MA8vkJvFlS",
  "output_index": 0,
  "item": {
  "id": "item_FEG9qJGNkPcdf4et3p7BV",
  "object": "realtime.item",
  "type": "function_call",
  "status": "completed",
  "call_id": "call_bc0a7fb7235840f69ecfe4",
  "name": "get_current_weather",
  "arguments": " {\"location\": \"Hangzhou\"}"
  }
}
string
body
Unique event identifier.
string
body
Always response.output_item.done.
string
body
Response ID.
integer
body
Output item index.
object
body
Output item.

response.content_part.added

Sent when a new content part is added to an assistant message during response generation.
Example
{
  "event_id": "event_AVBOmrgY3C8bjlRajfSUT",
  "type": "response.content_part.added",
  "response_id": "resp_HaVOPdbmX6vifiV5pAfJY",
  "item_id": "item_Ls6MtCUWO7LM4E59QziNv",
  "output_index": 0,
  "content_index": 0,
  "part": {
  "type": "audio",
  "text": ""
  }
}
string
body
Unique event identifier.
string
body
Always response.content_part.added.
string
body
Response ID.
string
body
Message item ID.
integer
body
Output item index. Fixed to 0.
integer
body
Content part index. Fixed to 0.
object
body
Content part.

response.content_part.done

Sent when a content part in an assistant message finishes streaming.
Example
{
  "event_id": "event_Il8HD19v58Qr5IBkw7LtN",
  "type": "response.content_part.done",
  "response_id": "resp_HaVOPdbmX6vifiV5pAfJY",
  "item_id": "item_Ls6MtCUWO7LM4E59QziNv",
  "output_index": 0,
  "content_index": 0,
  "part": {
  "type": "audio",
  "text": "Hello! Is there anything I can help you with?"
  }
}
string
body
Unique event identifier.
string
body
Always response.content_part.done.
string
body
Response ID.
string
body
Message item ID.
integer
body
Output item index. Fixed to 0.
integer
body
Content part index. Fixed to 0.
object
body
Content part.