Skip to main content
Realtime

LiveTranslate Python SDK

LiveTranslate Python SDK

Call Qwen-LiveTranslate with the DashScope Python SDK for real-time speech translation.

Prerequisites

Request parameters

Set these in the OmniRealtimeConversation constructor:
from dashscope.audio.qwen_omni import (
  OmniRealtimeConversation,
  OmniRealtimeCallback,
  MultiModality,
)
from dashscope.audio.qwen_omni.omni_realtime import TranslationParams


class MyCallback(OmniRealtimeCallback):
  """Callback handler for real-time translation"""
  def __init__(self, conversation=None):
    self.conversation = conversation
    self.handlers = {
      'session.created': self._handle_session_created,
      'response.audio_transcript.done': self._handle_translation_done,
      'response.audio.delta': self._handle_audio_delta,
      'response.done': lambda r: print('======Response Done======'),
      'input_audio_buffer.speech_started': lambda r: print('======Speech Start======'),
      'input_audio_buffer.speech_stopped': lambda r: print('======Speech Stop======'),
    }

  def on_open(self):
    print('Connection opened')

  def on_close(self, code, msg):
    print(f'Connection closed, code: {code}, msg: {msg}')

  def on_event(self, response):
    try:
      handler = self.handlers.get(response['type'])
      if handler:
        handler(response)
    except Exception as e:
      print(f'[Error] {e}')

  def _handle_session_created(self, response):
    print(f"Session created: {response['session']['id']}")

  def _handle_translation_done(self, response):
    print(f"Translation result: {response['transcript']}")

  def _handle_audio_delta(self, response):
    # Process incremental audio data.
    audio_b64 = response.get('delta', '')
    # Decode the audio data for playback or to save it.

conversation = OmniRealtimeConversation(
  model='qwen3-livetranslate-flash-realtime',
  url='wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime',
  callback=MyCallback(conversation=None)  # Temporarily pass None. It will be injected later.
)
# Inject self into the callback.
conversation.callback.conversation = conversation
ParameterTypeRequiredDescription
modelstrYesModel name. Set to qwen3-livetranslate-flash-realtime.
callbackOmniRealtimeCallbackYesCallback object that handles server events.
urlstrNoService endpoint: wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime. Defaults to the DashScope endpoint.
api_keystrNoAPI key for authentication. If this parameter is not provided, the SDK uses the DASHSCOPE_API_KEY environment variable.
Set these with OmniRealtimeConversation.update_session:
# Set translation parameters
translation_params = TranslationParams(
  language='en',  # Target language
  corpus=TranslationParams.Corpus(
    phrases={
      'Inteligencia Artificial': 'Artificial Intelligence',
      'Aprendizaje Automático': 'Machine Learning'
    }
  )
)

# Update session configuration
conversation.update_session(
  output_modalities=[MultiModality.TEXT, MultiModality.AUDIO],
  voice='Cherry',
  translation_params=translation_params,
)
ParameterTypeRequiredDescription
output_modalitiesList[MultiModality]NoOutput types. Default: [MultiModality.TEXT, MultiModality.AUDIO]. Valid values: [MultiModality.TEXT] (text only) or [MultiModality.TEXT, MultiModality.AUDIO] (text and audio).
voicestrNoVoice for audio output. Default: Cherry. See Supported voices.
input_audio_transcription_modelstrNoSet to qwen3-asr-flash-realtime to get speech recognition results for the source language.
translation_paramsTranslationParamsNoTranslation settings.
Set these in the TranslationParams constructor:
translation_params = TranslationParams(
  language='en',  # Target language code
  corpus=TranslationParams.Corpus(
    phrases={
      'Inteligencia Artificial': 'Artificial Intelligence',  # Source phrase: Target translation
      'Aprendizaje Automático': 'Machine Learning'
    }
  )
)
ParameterTypeRequiredDescription
languagestrNoTarget language code. Default: en. See Supported languages.
corpusTranslationParams.CorpusNoHotword settings to improve accuracy for specific terms.
corpus.phrasesdictNoHotword map (key: source term, value: target translation). Example: {'Inteligencia Artificial': 'Artificial Intelligence'}

Key interfaces

OmniRealtimeConversation class

Import: from dashscope.audio.qwen_omni import OmniRealtimeConversation
Method signatureServer event (via callback)Description
def connect(self) -> None:Server event: Session created; Server event: Session config updatedConnects to the server.
def update_session(self, output_modalities: List[MultiModality], voice: str = None, translation_params: TranslationParams = None, **kwargs) -> None:Server event: Session config updatedUpdates session settings. Call right after connecting. If not called, defaults apply. See the OmniRealtimeConversation.update_session parameters.
def end_session(self, timeout: int = 20) -> None:session.finished: The server finishes translation and ends the sessionEnds the session. The server finishes any remaining translation before closing.
def append_audio(self, audio_b64: str) -> None:NoneSends Base64-encoded audio to the input buffer. The server auto-detects speech boundaries and triggers translation.
def close(self) -> None:NoneStops the task and closes the connection.
def get_session_id(self) -> str:NoneReturns the current session ID.
def get_last_response_id(self) -> str:NoneReturns the last response ID.

Callback interface (OmniRealtimeCallback)

The server sends events to the client through callbacks. Inherit this class and implement its methods to handle them. Import: from dashscope.audio.qwen_omni import OmniRealtimeCallback
Method signatureParametersDescription
def on_open(self) -> None:NoneCalled when the WebSocket connection opens.
def on_event(self, message: dict) -> None:message: Server eventCalled when a server event arrives.
def on_close(self, close_status_code, close_msg) -> None:close_status_code: Status code. close_msg: Log message.Called when the WebSocket connection closes.

Complete example

Record and translate audio from a microphone in real time:
import os
import sys
import base64
import signal
import pyaudio
from dashscope.audio.qwen_omni import (
  OmniRealtimeConversation,
  OmniRealtimeCallback,
  MultiModality,
)
from dashscope.audio.qwen_omni.omni_realtime import TranslationParams


class Callback(OmniRealtimeCallback):
  """Callback handler class for real-time translation"""

  def __init__(self, speaker):
    self.speaker = speaker

  def on_open(self):
    print("[Connection established]")

  def on_close(self, code, msg):
    print(f"[Connection closed] code: {code}, msg: {msg}")

  def on_event(self, response):
    event_type = response.get("type", "")
    if event_type == "input_audio_buffer.speech_started":
      print("====== Speech input detected ======")
    elif event_type == "input_audio_buffer.speech_stopped":
      print("====== Speech input ended ======")
    elif event_type == "conversation.item.input_audio_transcription.text":
      # text: confirmed text, stash: temporary text being processed
      print(f"[Original text] {response.get('text', '')}{response.get('stash', '')}")
    elif event_type == "response.audio_transcript.text":
      # text: confirmed text, stash: temporary text being processed
      print(f"[Translation result] {response.get('text', '')}{response.get('stash', '')}")
    elif event_type == "response.audio.delta":
      audio_b64 = response.get("delta", "")
      if audio_b64:
        self.speaker.write(base64.b64decode(audio_b64))
    elif event_type == "error":
      print(f"[Error] {response.get('error', {}).get('message', '')}")


def main():
  # Check for the API key.
  if not os.environ.get("DASHSCOPE_API_KEY"):
    print("Set the DASHSCOPE_API_KEY environment variable.")
    sys.exit(1)

  # Initialize PyAudio.
  pya = pyaudio.PyAudio()

  # Initialize the speaker for playing back the translated audio.
  speaker = pya.open(
    format=pyaudio.paInt16,
    channels=1,
    rate=24000,
    output=True,
    frames_per_buffer=2400
  )

  # Initialize the microphone for capturing speech input.
  mic = pya.open(
    format=pyaudio.paInt16,
    channels=1,
    rate=16000,
    input=True,
    frames_per_buffer=1600
  )

  # Create a callback instance.
  callback = Callback(speaker=speaker)

  # Create a real-time session.
  conversation = OmniRealtimeConversation(
    model="qwen3-livetranslate-flash-realtime",
    url="wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime",
    callback=callback
  )

  # Connect to the server.
  conversation.connect()

  # Configure translation parameters.
  translation_params = TranslationParams(
    language="en",  # Target language for translation: English
    corpus=TranslationParams.Corpus(
      phrases={
        "Source Term 1": "Target Translation 1",
        "Source Term 2": "Target Translation 2"
      }
    )
  )

  # Update the session configuration.
  conversation.update_session(
    output_modalities=[MultiModality.TEXT, MultiModality.AUDIO],
    input_audio_transcription_model="qwen3-asr-flash-realtime",
    voice="Cherry",
    translation_params=translation_params,
  )

  # Register the exit signal handler.
  def on_exit(sig, frame):
    print("\n[Exiting...]")
    mic.stop_stream()
    mic.close()
    speaker.stop_stream()
    speaker.close()
    pya.terminate()
    conversation.close()
    sys.exit(0)

  signal.signal(signal.SIGINT, on_exit)

  print("[Starting real-time translation] Speak into the microphone. Press Ctrl+C to exit.")

  # Continuously capture and send microphone audio.
  while True:
    audio_data = mic.read(1600, exception_on_overflow=False)
    conversation.append_audio(base64.b64encode(audio_data).decode("ascii"))


if __name__ == "__main__":
  main()