Fun-ASR WebSocket API

Connect to the Fun-ASR real-time speech recognition service over WebSocket. This topic covers the service endpoint, request headers, and interaction flow. User guide: For model details and selection guidance, see Speech-to-text models. For sample code, see Realtime speech recognition. The DashScope SDK supports Java and Python only. For other languages, connect to the service directly over WebSocket.

Getting started

Sample code

Node.js
C#
PHP
Go

Install dependencies:

npm install ws
npm install uuid

Sample code:

const fs = require('fs');
const WebSocket = require('ws');
const { v4: uuidv4 } = require('uuid'); // Used to generate a UUID

// If you have not configured environment variables, replace the following line with your API key: const apiKey = "sk-xxx"
const apiKey = process.env.DASHSCOPE_API_KEY;
const url = 'wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference/'; // WebSocket server address
const audioFile = 'asr_example.wav'; // Replace with the path to your audio file

// Generate a 32-digit random ID
const TASK_ID = uuidv4().replace(/-/g, '').slice(0, 32);

// Create a WebSocket client
const ws = new WebSocket(url, {
  headers: {
    Authorization: `bearer ${apiKey}`
  }
});

let taskStarted = false; // A flag that indicates whether the task has started

// Send the run-task instruction when the connection is opened
ws.on('open', () => {
  console.log('Connected to the server');
  sendRunTask();
});

// Process received messages
ws.on('message', (data) => {
  const message = JSON.parse(data);
  switch (message.header.event) {
    case 'task-started':
      console.log('The task has started');
      taskStarted = true;
      sendAudioStream();
      break;
    case 'result-generated':
      console.log('Recognition result:', message.payload.output.sentence.text);
      if (message.payload.usage) {
        console.log('Billable duration of the task (in seconds):', message.payload.usage.duration);
      }
      break;
    case 'task-finished':
      console.log('The task is complete');
      ws.close();
      break;
    case 'task-failed':
      console.error('The task failed:', message.header.error_message);
      ws.close();
      break;
    default:
      console.log('Unknown event:', message.header.event);
  }
});

// If the task-started event is not received, close the connection
ws.on('close', () => {
  if (!taskStarted) {
    console.error('The task did not start. Closing the connection.');
  }
});

// Send the run-task instruction
function sendRunTask() {
  const runTaskMessage = {
    header: {
      action: 'run-task',
      task_id: TASK_ID,
      streaming: 'duplex'
    },
    payload: {
      task_group: 'audio',
      task: 'asr',
      function: 'recognition',
      model: 'fun-asr-realtime',
      parameters: {
        sample_rate: 16000,
        format: 'wav'
      },
      input: {}
    }
  };
  ws.send(JSON.stringify(runTaskMessage));
}

// Send the audio stream
function sendAudioStream() {
  const audioStream = fs.createReadStream(audioFile);
  let chunkCount = 0;

  function sendNextChunk() {
    const chunk = audioStream.read();
    if (chunk) {
      ws.send(chunk);
      chunkCount++;
      setTimeout(sendNextChunk, 100); // Send a chunk every 100 ms
    }
  }

  audioStream.on('readable', () => {
    sendNextChunk();
  });

  audioStream.on('end', () => {
    console.log('The audio stream has ended');
    sendFinishTask();
  });

  audioStream.on('error', (err) => {
    console.error('Error reading the audio file:', err);
    ws.close();
  });
}

// Send the finish-task instruction
function sendFinishTask() {
  const finishTaskMessage = {
    header: {
      action: 'finish-task',
      task_id: TASK_ID,
      streaming: 'duplex'
    },
    payload: {
      input: {}
    }
  };
  ws.send(JSON.stringify(finishTaskMessage));
}

// Handle errors
ws.on('error', (error) => {
  console.error('WebSocket error:', error);
});

using System.Net.WebSockets;
using System.Text;
using System.Text.Json;
using System.Text.Json.Nodes;

class Program {
  private static ClientWebSocket _webSocket = new ClientWebSocket();
  private static CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource();
  private static bool _taskStartedReceived = false;
  private static bool _taskFinishedReceived = false;
  // If you have not configured environment variables, replace the following line with your API key: private static readonly string ApiKey = "sk-xxx"
  private static readonly string ApiKey = Environment.GetEnvironmentVariable("DASHSCOPE_API_KEY") ?? throw new InvalidOperationException("DASHSCOPE_API_KEY environment variable is not set.");

  private const string WebSocketUrl = "wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference/";
  // Replace with the path to your audio file
  private const string AudioFilePath = "asr_example.wav";

  static async Task Main(string[] args) {
    // Establish a WebSocket connection and configure headers for authentication
    _webSocket.Options.SetRequestHeader("Authorization", $"bearer {ApiKey}");

    await _webSocket.ConnectAsync(new Uri(WebSocketUrl), _cancellationTokenSource.Token);

    // Start a thread to asynchronously receive WebSocket messages
    var receiveTask = ReceiveMessagesAsync();

    // Send the run-task instruction
    string _taskId = Guid.NewGuid().ToString("N"); // Generate a 32-digit random ID
    var runTaskJson = GenerateRunTaskJson(_taskId);
    await SendAsync(runTaskJson);

    // Wait for the task-started event
    while (!_taskStartedReceived) {
      await Task.Delay(100, _cancellationTokenSource.Token);
    }

    // Read the local file and send the audio stream to the server for recognition
    await SendAudioStreamAsync(AudioFilePath);

    // Send the finish-task instruction to end the task
    var finishTaskJson = GenerateFinishTaskJson(_taskId);
    await SendAsync(finishTaskJson);

    // Wait for the task-finished event
    while (!_taskFinishedReceived && !_cancellationTokenSource.IsCancellationRequested) {
      try {
        await Task.Delay(100, _cancellationTokenSource.Token);
      } catch (OperationCanceledException) {
        // The task has been canceled. Exit the loop.
        break;
      }
    }

    // Close the connection
    if (!_cancellationTokenSource.IsCancellationRequested) {
      await _webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Closing", _cancellationTokenSource.Token);
    }

    _cancellationTokenSource.Cancel();
    try {
      await receiveTask;
    } catch (OperationCanceledException) {
      // Ignore the operation canceled exception
    }
  }

  private static async Task ReceiveMessagesAsync() {
    try {
      while (_webSocket.State == WebSocketState.Open && !_cancellationTokenSource.IsCancellationRequested) {
        var message = await ReceiveMessageAsync(_cancellationTokenSource.Token);
        if (message != null) {
          var eventValue = message["header"]?["event"]?.GetValue<string>();
          switch (eventValue) {
            case "task-started":
              Console.WriteLine("The task started successfully");
              _taskStartedReceived = true;
              break;
            case "result-generated":
              Console.WriteLine($"Recognition result: {message["payload"]?["output"]?["sentence"]?["text"]?.GetValue<string>()}");
              if (message["payload"]?["usage"] != null && message["payload"]?["usage"]?["duration"] != null) {
                Console.WriteLine($"Billable duration of the task (in seconds): {message["payload"]?["usage"]?["duration"]?.GetValue<int>()}");
              }
              break;
            case "task-finished":
              Console.WriteLine("The task is complete");
              _taskFinishedReceived = true;
              _cancellationTokenSource.Cancel();
              break;
            case "task-failed":
              Console.WriteLine($"The task failed: {message["header"]?["error_message"]?.GetValue<string>()}");
              _cancellationTokenSource.Cancel();
              break;
          }
        }
      }
    } catch (OperationCanceledException) {
      // Ignore the operation canceled exception
    }
  }

  private static async Task<JsonNode?> ReceiveMessageAsync(CancellationToken cancellationToken) {
    var buffer = new byte[1024 * 4];
    var segment = new ArraySegment<byte>(buffer);
    var result = await _webSocket.ReceiveAsync(segment, cancellationToken);

    if (result.MessageType == WebSocketMessageType.Close) {
      await _webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Closing", cancellationToken);
      return null;
    }

    var message = Encoding.UTF8.GetString(buffer, 0, result.Count);
    return JsonNode.Parse(message);
  }

  private static async Task SendAsync(string message) {
    var buffer = Encoding.UTF8.GetBytes(message);
    var segment = new ArraySegment<byte>(buffer);
    await _webSocket.SendAsync(segment, WebSocketMessageType.Text, true, _cancellationTokenSource.Token);
  }

  private static async Task SendAudioStreamAsync(string filePath) {
    using (var audioStream = File.OpenRead(filePath)) {
      var buffer = new byte[1024]; // Send 100 ms of audio data each time
      int bytesRead;

      while ((bytesRead = await audioStream.ReadAsync(buffer, 0, buffer.Length)) > 0) {
        var segment = new ArraySegment<byte>(buffer, 0, bytesRead);
        await _webSocket.SendAsync(segment, WebSocketMessageType.Binary, true, _cancellationTokenSource.Token);
        await Task.Delay(100); // 100 ms interval
      }
    }
  }

  private static string GenerateRunTaskJson(string taskId) {
    var runTask = new JsonObject {
      ["header"] = new JsonObject {
        ["action"] = "run-task",
        ["task_id"] = taskId,
        ["streaming"] = "duplex"
      },
      ["payload"] = new JsonObject {
        ["task_group"] = "audio",
        ["task"] = "asr",
        ["function"] = "recognition",
        ["model"] = "fun-asr-realtime",
        ["parameters"] = new JsonObject {
          ["format"] = "wav",
          ["sample_rate"] = 16000,
        },
        ["input"] = new JsonObject()
      }
    };
    return JsonSerializer.Serialize(runTask);
  }

  private static string GenerateFinishTaskJson(string taskId) {
    var finishTask = new JsonObject {
      ["header"] = new JsonObject {
        ["action"] = "finish-task",
        ["task_id"] = taskId,
        ["streaming"] = "duplex"
      },
      ["payload"] = new JsonObject {
        ["input"] = new JsonObject()
      }
    };
    return JsonSerializer.Serialize(finishTask);
  }
}

The sample code uses this directory structure:

my-php-project/
├── composer.json
├── vendor/
└── index.php

composer.json content (adjust dependency versions as needed):

{
  "require": {
    "react/event-loop": "^1.3",
    "react/socket": "^1.11",
    "react/stream": "^1.2",
    "react/http": "^1.1",
    "ratchet/pawl": "^0.4"
  },
  "autoload": {
    "psr-4": {
      "App\\": "src/"
    }
  }
}

index.php content:

<?php

require __DIR__ . '/vendor/autoload.php';

use Ratchet\Client\Connector;
use React\EventLoop\Loop;
use React\Socket\Connector as SocketConnector;
use Ratchet\rfc6455\Messaging\Frame;

// If you have not configured environment variables, replace the following line with your API key: $api_key = "sk-xxx"
$api_key = getenv("DASHSCOPE_API_KEY");
$websocket_url = 'wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference/';
$audio_file_path = 'asr_example.wav'; // Replace with the path to your audio file

$loop = Loop::get();

// Create a custom connector
$socketConnector = new SocketConnector($loop, [
    'tcp' => [
        'bindto' => '0.0.0.0:0',
    ],
    'tls' => [
        'verify_peer' => false,
        'verify_peer_name' => false,
    ],
]);

$connector = new Connector($loop, $socketConnector);

$headers = [
    'Authorization' => 'bearer ' . $api_key
];

$connector($websocket_url, [], $headers)->then(function ($conn) use ($loop, $audio_file_path) {
    echo "Connected to the WebSocket server\n";

    // Start a thread to asynchronously receive WebSocket messages
    $conn->on('message', function($msg) use ($conn, $loop, $audio_file_path) {
        $response = json_decode($msg, true);

        if (isset($response['header']['event'])) {
            handleEvent($conn, $response, $loop, $audio_file_path);
        } else {
            echo "Unknown message format\n";
        }
    });

    // Listen for connection closure
    $conn->on('close', function($code = null, $reason = null) {
        echo "Connection closed\n";
        if ($code !== null) {
            echo "Close code: " . $code . "\n";
        }
        if ($reason !== null) {
            echo "Close reason: " . $reason . "\n";
        }
    });

    // Generate a task ID
    $taskId = generateTaskId();

    // Send the run-task instruction
    sendRunTaskMessage($conn, $taskId);

}, function ($e) {
    echo "Cannot connect: {$e->getMessage()}\n";
});

$loop->run();

/**
 * Generate a task ID
 * @return string
 */
function generateTaskId(): string {
    return bin2hex(random_bytes(16));
}

/**
 * Send the run-task instruction
 * @param $conn
 * @param $taskId
 */
function sendRunTaskMessage($conn, $taskId) {
    $runTaskMessage = json_encode([
        "header" => [
            "action" => "run-task",
            "task_id" => $taskId,
            "streaming" => "duplex"
        ],
        "payload" => [
            "task_group" => "audio",
            "task" => "asr",
            "function" => "recognition",
            "model" => "fun-asr-realtime",
            "parameters" => [
                "format" => "wav",
                "sample_rate" => 16000
            ],
            "input" => []
        ]
    ]);
    echo "Preparing to send the run-task instruction: " . $runTaskMessage . "\n";
    $conn->send($runTaskMessage);
    echo "The run-task instruction has been sent\n";
}

/**
 * Read the audio file
 * @param string $filePath
 * @return bool|string
 */
function readAudioFile(string $filePath) {
    $voiceData = file_get_contents($filePath);
    if ($voiceData === false) {
        echo "Cannot read the audio file\n";
    }
    return $voiceData;
}

/**
 * Split the audio data
 * @param string $data
 * @param int $chunkSize
 * @return array
 */
function splitAudioData(string $data, int $chunkSize): array {
    return str_split($data, $chunkSize);
}

/**
 * Send the finish-task instruction
 * @param $conn
 * @param $taskId
 */
function sendFinishTaskMessage($conn, $taskId) {
    $finishTaskMessage = json_encode([
        "header" => [
            "action" => "finish-task",
            "task_id" => $taskId,
            "streaming" => "duplex"
        ],
        "payload" => [
            "input" => []
        ]
    ]);
    echo "Preparing to send the finish-task instruction: " . $finishTaskMessage . "\n";
    $conn->send($finishTaskMessage);
    echo "The finish-task instruction has been sent\n";
}

/**
 * Handle events
 * @param $conn
 * @param $response
 * @param $loop
 * @param $audio_file_path
 */
function handleEvent($conn, $response, $loop, $audio_file_path) {
    static $taskId;
    static $chunks;
    static $allChunksSent = false;

    if (is_null($taskId)) {
        $taskId = generateTaskId();
    }

    switch ($response['header']['event']) {
        case 'task-started':
            echo "Task started. Sending audio data...\n";
            // Read the audio file
            $voiceData = readAudioFile($audio_file_path);
            if ($voiceData === false) {
                echo "Cannot read the audio file\n";
                $conn->close();
                return;
            }

            // Split the audio data
            $chunks = splitAudioData($voiceData, 1024);

            // Define the send function
            $sendChunk = function() use ($conn, &$chunks, $loop, &$sendChunk, &$allChunksSent, $taskId) {
                if (!empty($chunks)) {
                    $chunk = array_shift($chunks);
                    $binaryMsg = new Frame($chunk, true, Frame::OP_BINARY);
                    $conn->send($binaryMsg);
                    // Send the next segment after 100 ms
                    $loop->addTimer(0.1, $sendChunk);
                } else {
                    echo "All blocks have been sent\n";
                    $allChunksSent = true;

                    // Send the finish-task instruction
                    sendFinishTaskMessage($conn, $taskId);
                }
            };

            // Start sending audio data
            $sendChunk();
            break;
        case 'result-generated':
            $result = $response['payload']['output']['sentence'];
            echo "Recognition result: " . $result['text'] . "\n";
            if (isset($response['payload']['usage']['duration'])) {
                echo "Billable duration of the task (in seconds): " . $response['payload']['usage']['duration'] . "\n";
            }
            break;
        case 'task-finished':
            echo "The task is complete\n";
            $conn->close();
            break;
        case 'task-failed':
            echo "The task failed\n";
            echo "Error code: " . $response['header']['error_code'] . "\n";
            echo "Error message: " . $response['header']['error_message'] . "\n";
            $conn->close();
            break;
        case 'error':
            echo "Error: " . $response['payload']['message'] . "\n";
            break;
        default:
            echo "Unknown event: " . $response['header']['event'] . "\n";
            break;
    }

    // If all data has been sent and the task is complete, close the connection
    if ($allChunksSent && $response['header']['event'] == 'task-finished') {
        // Wait for 1 second to ensure all data has been transferred
        $loop->addTimer(1, function() use ($conn) {
            $conn->close();
            echo "The client closes the connection\n";
        });
    }
}

package main

import (
  "encoding/json"
  "fmt"
  "io"
  "log"
  "net/http"
  "os"
  "time"

  "github.com/google/uuid"
  "github.com/gorilla/websocket"
)

const (
  wsURL     = "wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference/" // WebSocket server address
  audioFile = "asr_example.wav"                                   // Replace with the path to your audio file
)

var dialer = websocket.DefaultDialer

func main() {
  // If you have not configured environment variables, replace the following line with your API key: apiKey := "sk-xxx"
  apiKey := os.Getenv("DASHSCOPE_API_KEY")

  // Connect to the WebSocket service
  conn, err := connectWebSocket(apiKey)
  if err != nil {
    log.Fatal("Failed to connect to WebSocket:", err)
  }
  defer closeConnection(conn)

  // Start a goroutine to receive results
  taskStarted := make(chan bool)
  taskDone := make(chan bool)
  startResultReceiver(conn, taskStarted, taskDone)

  // Send the run-task instruction
  taskID, err := sendRunTaskCmd(conn)
  if err != nil {
    log.Fatal("Failed to send the run-task instruction:", err)
  }

  // Wait for the task-started event
  waitForTaskStarted(taskStarted)

  // Send the audio file stream for recognition
  if err := sendAudioData(conn); err != nil {
    log.Fatal("Failed to send audio:", err)
  }

  // Send the finish-task instruction
  if err := sendFinishTaskCmd(conn, taskID); err != nil {
    log.Fatal("Failed to send the finish-task instruction:", err)
  }

  // Wait for the task to complete or fail
  <-taskDone
}

// Define a struct to represent JSON data
type Header struct {
  Action       string                 `json:"action"`
  TaskID       string                 `json:"task_id"`
  Streaming    string                 `json:"streaming"`
  Event        string                 `json:"event"`
  ErrorCode    string                 `json:"error_code,omitempty"`
  ErrorMessage string                 `json:"error_message,omitempty"`
  Attributes   map[string]interface{} `json:"attributes"`
}

type Output struct {
  Sentence struct {
    BeginTime int64  `json:"begin_time"`
    EndTime   *int64 `json:"end_time"`
    Text      string `json:"text"`
    Words     []struct {
      BeginTime   int64  `json:"begin_time"`
      EndTime     *int64 `json:"end_time"`
      Text        string `json:"text"`
      Punctuation string `json:"punctuation"`
    } `json:"words"`
  } `json:"sentence"`
}

type Payload struct {
  TaskGroup  string `json:"task_group"`
  Task       string `json:"task"`
  Function   string `json:"function"`
  Model      string `json:"model"`
  Parameters Params `json:"parameters"`
  Input      Input  `json:"input"`
  Output     Output `json:"output,omitempty"`
  Usage      *struct {
    Duration int `json:"duration"`
  } `json:"usage,omitempty"`
}

type Params struct {
  Format                   string `json:"format"`
  SampleRate               int    `json:"sample_rate"`
  VocabularyID             string `json:"vocabulary_id"`
  DisfluencyRemovalEnabled bool   `json:"disfluency_removal_enabled"`
}

type Input struct {
}

type Event struct {
  Header  Header  `json:"header"`
  Payload Payload `json:"payload"`
}

// Connect to the WebSocket service
func connectWebSocket(apiKey string) (*websocket.Conn, error) {
  header := make(http.Header)
  header.Add("Authorization", fmt.Sprintf("bearer %s", apiKey))
  conn, _, err := dialer.Dial(wsURL, header)
  return conn, err
}

// Start a goroutine to asynchronously receive WebSocket messages
func startResultReceiver(conn *websocket.Conn, taskStarted chan<- bool, taskDone chan<- bool) {
  go func() {
    for {
      _, message, err := conn.ReadMessage()
      if err != nil {
        log.Println("Failed to parse the server message:", err)
        return
      }
      var event Event
      err = json.Unmarshal(message, &event)
      if err != nil {
        log.Println("Failed to parse the event:", err)
        continue
      }
      if handleEvent(conn, event, taskStarted, taskDone) {
        return
      }
    }
  }()
}

// Send the run-task instruction
func sendRunTaskCmd(conn *websocket.Conn) (string, error) {
  runTaskCmd, taskID, err := generateRunTaskCmd()
  if err != nil {
    return "", err
  }
  err = conn.WriteMessage(websocket.TextMessage, []byte(runTaskCmd))
  return taskID, err
}

// Generate the run-task instruction
func generateRunTaskCmd() (string, string, error) {
  taskID := uuid.New().String()
  runTaskCmd := Event{
    Header: Header{
      Action:    "run-task",
      TaskID:    taskID,
      Streaming: "duplex",
    },
    Payload: Payload{
      TaskGroup: "audio",
      Task:      "asr",
      Function:  "recognition",
      Model:     "fun-asr-realtime",
      Parameters: Params{
        Format:     "wav",
        SampleRate: 16000,
      },
      Input: Input{},
    },
  }
  runTaskCmdJSON, err := json.Marshal(runTaskCmd)
  return string(runTaskCmdJSON), taskID, err
}

// Wait for the task-started event
func waitForTaskStarted(taskStarted chan bool) {
  select {
  case <-taskStarted:
    fmt.Println("The task started successfully")
  case <-time.After(10 * time.Second):
    log.Fatal("Timed out waiting for task-started. The task failed to start.")
  }
}

// Send audio data
func sendAudioData(conn *websocket.Conn) error {
  file, err := os.Open(audioFile)
  if err != nil {
    return err
  }
  defer file.Close()

  buf := make([]byte, 1024)
  for {
    n, err := file.Read(buf)
    if n == 0 {
      break
    }
    if err != nil && err != io.EOF {
      return err
    }
    err = conn.WriteMessage(websocket.BinaryMessage, buf[:n])
    if err != nil {
      return err
    }
    time.Sleep(100 * time.Millisecond)
  }
  return nil
}

// Send the finish-task instruction
func sendFinishTaskCmd(conn *websocket.Conn, taskID string) error {
  finishTaskCmd, err := generateFinishTaskCmd(taskID)
  if err != nil {
    return err
  }
  err = conn.WriteMessage(websocket.TextMessage, []byte(finishTaskCmd))
  return err
}

// Generate the finish-task instruction
func generateFinishTaskCmd(taskID string) (string, error) {
  finishTaskCmd := Event{
    Header: Header{
      Action:    "finish-task",
      TaskID:    taskID,
      Streaming: "duplex",
    },
    Payload: Payload{
      Input: Input{},
    },
  }
  finishTaskCmdJSON, err := json.Marshal(finishTaskCmd)
  return string(finishTaskCmdJSON), err
}

// Handle events
func handleEvent(conn *websocket.Conn, event Event, taskStarted chan<- bool, taskDone chan<- bool) bool {
  switch event.Header.Event {
  case "task-started":
    fmt.Println("Received task-started event")
    taskStarted <- true
  case "result-generated":
    if event.Payload.Output.Sentence.Text != "" {
      fmt.Println("Recognition result:", event.Payload.Output.Sentence.Text)
    }
    if event.Payload.Usage != nil {
      fmt.Println("Billable duration of the task (in seconds):", event.Payload.Usage.Duration)
    }
  case "task-finished":
    fmt.Println("The task is complete")
    taskDone <- true
    return true
  case "task-failed":
    handleTaskFailed(event, conn)
    taskDone <- true
    return true
  default:
    log.Printf("Unexpected event: %v", event)
  }
  return false
}

// Handle the task-failed event
func handleTaskFailed(event Event, conn *websocket.Conn) {
  if event.Header.ErrorMessage != "" {
    log.Fatalf("The task failed: %s", event.Header.ErrorMessage)
  } else {
    log.Fatal("The task failed for an unknown reason")
  }
}

// Close the connection
func closeConnection(conn *websocket.Conn) {
  if conn != nil {
    conn.Close()
  }
}

Service endpoint

Use the following WebSocket URL:

wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference

The URL must use the wss:// protocol. Provide your API key in the Authorization request header (see Request headers).

Request headers

Include the following headers in your request:

Parameter	Type	Required	Description
Authorization	string	Yes	Authentication token in the format `Bearer $DASHSCOPE_API_KEY`. Replace with your API key.
user-agent	string	No	Client identifier that the server uses to trace the request origin.
X-DashScope-WorkSpace	string	No	Qwen Cloud workspace ID.
X-DashScope-DataInspection	string	No	Whether to enable data inspection. Omit this header by default; set it to `enable` only when required.

The Authorization header is validated during the WebSocket handshake. If the API key is invalid or missing, the handshake fails with HTTP 401 or 403.

Interaction flow

For detailed descriptions of client events and server events, see Client events and Server events. The client and server exchange messages in the following sequence:

Establish a connection: The client opens a WebSocket connection to the server.
Start the task: The client sends a run-task directive and waits for the server's task-started event, which indicates that the task is ready and the client can proceed.
Send the audio stream: The client streams binary audio (mono only) while continuously receiving result-generated events from the server. Each event contains speech recognition results.
Signal task completion: The client sends a finish-task directive to instruct the server to end the task, and continues to receive result-generated events.
End the task: The client receives a task-finished event from the server, which signals that the task has ended.
Close the connection: The client closes the WebSocket connection.

Connection reuse

You can reuse a WebSocket connection across tasks. After the server returns a task-finished event, send another run-task directive on the same connection.

Each task on a reused connection must have a unique task_id.
Failed tasks trigger a task-failed event and close the connection (no reuse).
Connections time out after 60 seconds of inactivity.

​Getting started

​Sample code

​Service endpoint

​Request headers

​Interaction flow

​Connection reuse

Getting started

Sample code

Service endpoint

Request headers

Interaction flow

Connection reuse