Skip to main content
CosyVoice

Control pronunciation (SSML)

Rate, pitch, pauses, volume

SSML (Speech Synthesis Markup Language) is an XML markup language that controls speech rate, pitch, pauses, volume, and background music in CosyVoice.

Limitations

  • Models: cosyvoice-v3-flash, cosyvoice-v3-plus.
  • Voices: Cloned voices and system voices marked as SSML-enabled in the Voice list.
  • APIs:
    • Java SDK (2.20.3+): Non-streaming and unidirectional streaming only. See the Java SDK docs.
    • Python SDK (1.23.4+): Non-streaming and unidirectional streaming only. See the Python SDK docs.
    • WebSocket API: Set enable_ssml to true in run-task and send continue-task only once. See the WebSocket API docs.

Getting started

Check the Limitations section for supported models, voices, and APIs before using SSML.
Before you run the code:
  1. Get an API key
  2. Install the SDK (for Java/Python examples)

Java SDK

import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
import com.alibaba.dashscope.utils.Constants;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;

// See SSML support requirements in the note above
public class Main {
  private static String model = "cosyvoice-v3-flash";
  private static String voice = "longanyang";

  public static void main(String[] args) {
    Constants.baseWebsocketApiUrl = "wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference";
    streamAudioDataToSpeaker();
    System.exit(0);
  }

  public static void streamAudioDataToSpeaker() {
    SpeechSynthesisParam param =
        SpeechSynthesisParam.builder()
            // If you have not configured an environment variable, replace the following line with: .apiKey("sk-xxx")
            .apiKey(System.getenv("DASHSCOPE_API_KEY"))
            .model(model)
            .voice(voice)
            .build();

    SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, null);
    ByteBuffer audio = null;
    try {
      // Non-streaming call; blocks until audio is returned
      // Escape special characters
      audio = synthesizer.call("<speak rate=\"2\">My speaking rate is faster than a normal person's.</speak>");
    } catch (Exception e) {
      throw new RuntimeException(e);
    } finally {
      // Close the WebSocket connection after the task ends
      synthesizer.getDuplexApi().close(1000, "bye");
    }
    if (audio != null) {
      // Save the audio data to a local file named "output.mp3"
      File file = new File("output.mp3");
      try (FileOutputStream fos = new FileOutputStream(file)) {
        fos.write(audio.array());
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    // The first packet latency includes the time required to establish the WebSocket connection
    System.out.println(
        "[Metric] Request ID: "
            + synthesizer.getLastRequestId()
            + ", First packet latency (ms): "
            + synthesizer.getFirstPackageDelay());
  }
}

Python SDK

# coding=utf-8
# See SSML support requirements in the note above

import dashscope
from dashscope.audio.tts_v2 import *
import os

# If you have not configured an environment variable, replace the following line with: dashscope.api_key = "sk-xxx"
dashscope.api_key = os.environ.get('DASHSCOPE_API_KEY')

dashscope.base_websocket_api_url='wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference'

# Model
model = "cosyvoice-v3-flash"
# Voice
voice = "longanyang"

# Instantiate SpeechSynthesizer and pass model, voice, and other request parameters to the constructor
synthesizer = SpeechSynthesizer(model=model, voice=voice)
# Non-streaming call; blocks until audio is returned
# Escape special characters
audio = synthesizer.call("<speak rate=\"2\">My speaking rate is faster than a normal person's.</speak>")

# Save the audio locally
with open('output.mp3', 'wb') as f:
  f.write(audio)

# The first packet latency includes the time required to establish the WebSocket connection
print('[Metric] Request ID: {}, First packet latency: {} ms'.format(
  synthesizer.get_last_request_id(),
  synthesizer.get_first_package_delay()))

WebSocket API

  • Go
  • C#
  • PHP
  • Node.js
  • Java (WebSocket)
  • Python (WebSocket)
// See SSML support requirements in the note above

package main

import (
  "encoding/json"
  "fmt"
  "net/http"
  "os"
  "strings"
  "time"

  "github.com/google/uuid"
  "github.com/gorilla/websocket"
)

const (
  wsURL      = "wss://dashscope-intl.aliyuncs.com/api-ws/v1/inference/"
  outputFile = "output.mp3"
)

func main() {
  // If you have not configured an environment variable, replace the following line with: apiKey := "sk-xxx"
  apiKey := os.Getenv("DASHSCOPE_API_KEY")

  // Clear the output file
  os.Remove(outputFile)
  os.Create(outputFile)

  // Connect to WebSocket
  header := make(http.Header)
  header.Add("X-DashScope-DataInspection", "enable")
  header.Add("Authorization", fmt.Sprintf("bearer %s", apiKey))

  conn, resp, err := websocket.DefaultDialer.Dial(wsURL, header)
  if err != nil {
    if resp != nil {
      fmt.Printf("Connection failed. HTTP status code: %d\n", resp.StatusCode)
    }
    fmt.Println("Connection failed:", err)
    return
  }
  defer conn.Close()

  // Generate task ID
  taskID := uuid.New().String()
  fmt.Printf("Generated task ID: %s\n", taskID)

  // Send run-task command
  runTaskCmd := map[string]interface{}{
    "header": map[string]interface{}{
      "action":    "run-task",
      "task_id":   taskID,
      "streaming": "duplex",
    },
    "payload": map[string]interface{}{
      "task_group": "audio",
      "task":       "tts",
      "function":   "SpeechSynthesizer",
      "model":      "cosyvoice-v3-flash",
      "parameters": map[string]interface{}{
        "text_type":   "PlainText",
        "voice":       "longanyang",
        "format":      "mp3",
        "sample_rate": 22050,
        "volume":      50,
        "rate":        1,
        "pitch":       1,
        // With enable_ssml: true, send continue-task only once
        "enable_ssml": true,
      },
      "input": map[string]interface{}{},
    },
  }

  runTaskJSON, _ := json.Marshal(runTaskCmd)
  fmt.Printf("Sending run-task command: %s\n", string(runTaskJSON))

  err = conn.WriteMessage(websocket.TextMessage, runTaskJSON)
  if err != nil {
    fmt.Println("Failed to send run-task:", err)
    return
  }

  textSent := false

  // Process messages
  for {
    messageType, message, err := conn.ReadMessage()
    if err != nil {
      fmt.Println("Failed to read message:", err)
      break
    }

    // Handle binary messages
    if messageType == websocket.BinaryMessage {
      fmt.Printf("Received binary message, length: %d\n", len(message))
      file, _ := os.OpenFile(outputFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
      file.Write(message)
      file.Close()
      continue
    }

    // Handle text messages
    messageStr := string(message)
    fmt.Printf("Received text message: %s\n", strings.ReplaceAll(messageStr, "\n", ""))

    // Parse JSON to get event type
    var msgMap map[string]interface{}
    if json.Unmarshal(message, &msgMap) == nil {
      if header, ok := msgMap["header"].(map[string]interface{}); ok {
        if event, ok := header["event"].(string); ok {
          fmt.Printf("Event type: %s\n", event)

          switch event {
          case "task-started":
            fmt.Println("=== Received task-started event ===")

            if !textSent {
              // Send continue-task command; when using SSML, you can send this command only once
              continueTaskCmd := map[string]interface{}{
                "header": map[string]interface{}{
                  "action":    "continue-task",
                  "task_id":   taskID,
                  "streaming": "duplex",
                },
                "payload": map[string]interface{}{
                  "input": map[string]interface{}{
                    // Escape special characters
                    "text": "<speak rate=\"2\">My speaking rate is faster than a normal person's.</speak>",
                  },
                },
              }

              continueTaskJSON, _ := json.Marshal(continueTaskCmd)
              fmt.Printf("Sending continue-task command: %s\n", string(continueTaskJSON))

              err = conn.WriteMessage(websocket.TextMessage, continueTaskJSON)
              if err != nil {
                fmt.Println("Failed to send continue-task:", err)
                return
              }

              textSent = true

              // Delay sending finish-task
              time.Sleep(500 * time.Millisecond)

              // Send finish-task command
              finishTaskCmd := map[string]interface{}{
                "header": map[string]interface{}{
                  "action":    "finish-task",
                  "task_id":   taskID,
                  "streaming": "duplex",
                },
                "payload": map[string]interface{}{
                  "input": map[string]interface{}{},
                },
              }

              finishTaskJSON, _ := json.Marshal(finishTaskCmd)
              fmt.Printf("Sending finish-task command: %s\n", string(finishTaskJSON))

              err = conn.WriteMessage(websocket.TextMessage, finishTaskJSON)
              if err != nil {
                fmt.Println("Failed to send finish-task:", err)
                return
              }
            }

          case "task-finished":
            fmt.Println("=== Task finished ===")
            return

          case "task-failed":
            fmt.Println("=== Task failed ===")
            if header["error_message"] != nil {
              fmt.Printf("Error message: %s\n", header["error_message"])
            }
            return

          case "result-generated":
            fmt.Println("Received result-generated event")
          }
        }
      }
    }
  }
}

Tags

CosyVoice SSML is based on W3C SSML 1.0 but supports only a subset of tags.Syntax rules:
  • Wrap all SSML content in <speak></speak> tags.
  • Use multiple <speak> tags consecutively, but do not nest them.
  • Escape XML special characters: "&quot;, '&apos;, &&amp;, <&lt;, >&gt;.

<speak>: Root tag

Description Wrap all SSML content in <speak></speak> tags. Syntax
<speak>Text that requires SSML features</speak>
Properties
PropertyTypeRequiredDescription
voiceStringNoVoice name. Overrides the voice API parameter. See Voice list.
rateStringNoSpeech rate. Overrides the speech_rate API parameter. Range: 0.5 to 2. Default: 1. Values above 1 are faster; below 1 are slower.
pitchStringNoPitch. Overrides the pitch_rate API parameter. Range: 0.5 to 2. Default: 1. Values above 1 are higher; below 1 are lower.
volumeStringNoVolume. Overrides the volume API parameter. Range: 0 to 100. Default: 50.
effectStringNoSound effect. Values: robot, lolita (lively female voice), lowpass, echo, eq (equalizer, advanced), lpfilter (low-pass filter, advanced), hpfilter (high-pass filter, advanced). Use effectValue to customize eq, lpfilter, and hpfilter. Only one effect per tag. Sound effects increase latency.
effectValueStringNoCustomizes the effect. For eq: a string of 8 space-separated integers (-20 to 20) for gain at ["40 Hz", "100 Hz", "200 Hz", "400 Hz", "800 Hz", "1600 Hz", "4000 Hz", "12000 Hz"]. Example: "1 1 1 1 1 1 1 1". For lpfilter: integer frequency in (0, sample_rate/2]. Example: "800". For hpfilter: integer frequency in (0, sample_rate/2]. Example: "1200".
bgmStringNoBackground music URL. The file must be in OSS with at least public-read permissions. Escape XML special characters in the URL. Requirements: 16 kHz sample rate, mono, WAV, 16-bit. If the synthesized audio is longer than the music, the music loops.
backgroundMusicVolumeStringNoBackground music volume.
Examples Voice:
<speak voice="longcheng_v2">
  I am a male voice.
</speak>
Rate:
<speak rate="2">
  My speech rate is faster than normal.
</speak>
Pitch:
<speak pitch="0.5">
  However, my pitch is lower than others.
</speak>
Volume:
<speak volume="80">
  My volume is also very high.
</speak>
Effect:
<speak effect="robot">
  Do you like the robot WALL-E?
</speak>
Effect with effectValue:
<speak effect="eq" effectValue="1 -20 1 1 1 1 20 1">
  Do you like the robot WALL-E?
</speak>

<speak effect="lpfilter" effectValue="1200">
  Do you like the robot WALL-E?
</speak>

<speak effect="hpfilter" effectValue="1200">
  Do you like the robot WALL-E?
</speak>
If the audio is not in WAV format, convert it with ffmpeg:
ffmpeg -i input_audio -acodec pcm_s16le -ac 1 -ar 16000 output.wav
Background music (bgm):
<speak bgm="http://nls.alicdn.com/bgm/2.wav" backgroundMusicVolume="30" rate="-500" volume="40">
  <break time="2s"/>
  The old trees on the shady cliff are shrouded in mist
  <break time="700ms"/>
  The sound of rain is still in the bamboo forest
  <break time="700ms"/>
  I know that cotton contributes to the country's plan
  <break time="700ms"/>
  The scenery of Mianzhou is always pitiable
  <break time="2s"/>
</speak>
You are legally responsible for the copyright of the uploaded audio.
Combined properties:
<speak>
  Text that requires SSML tags
</speak>
<speak rate="200" pitch="-100" volume="80">
  So when put together, my voice sounds like this.
</speak>

<break>: Pause

Description Insert a pause. Set the duration in seconds (s) or milliseconds (ms). Syntax
# Empty attribute
<break/>
# With the time attribute
<break time="string"/>
Break tag behavior:
  • Without attributes, <break/> defaults to a 1-second pause.
  • Warning: Consecutive <break> tags are summed, but the total is capped at 10 seconds.
For example, these three tags total 15 seconds, but only the first 10 seconds take effect:
<speak>
  Please close your eyes and take a rest.<break time="5s"/><break time="5s"/><break time="5s"/>Okay, please open your eyes.
</speak>
Properties
PropertyTypeRequiredDescription
timeStringNoPause duration, such as "2s" or "50ms". In seconds: 1 to 10. In milliseconds: 50 to 10000.
Example
<speak>
  Please close your eyes and take a rest.<break time="500ms"/>Okay, please open your eyes.
</speak>

<sub>: Replace text

Description Replace displayed text with a different pronunciation. Syntax
<sub alias="string"></sub>
Properties
PropertyTypeRequiredDescription
aliasStringYesThe text to read instead.
Example
<speak>
   <sub alias="network protocol">W3C</sub>
 </speak>

<phoneme>: Set pronunciation

Description Specify pronunciation using Pinyin (Chinese) or the CMU phonetic alphabet (English). Syntax
<phoneme alphabet="string" ph="string">text</phoneme>
Properties
PropertyTypeRequiredDescription
alphabetStringYesPronunciation type: "py" (Pinyin) or "cmu" (phonetic alphabet). See The CMU Pronouncing Dictionary.
phStringYesThe Pinyin or phonetic symbols. Separate each character's Pinyin with a space. The number of syllables must match the number of characters. Each syllable has a tone number (1 to 5, where 5 is neutral).
Example
<speak>
<phoneme alphabet="py" ph="dian3 dang4 hang2">典当行</phoneme>把这个玩意<phoneme alphabet="py" ph="dang4 diao4">当掉</phoneme>
</speak>

<speak>
  How to spell <phoneme alphabet="cmu" ph="S AY N">sin</phoneme>?
</speak>

<soundEvent>: Insert a sound effect

Description Insert an external sound file (prompt tones, ambient sounds) into synthesized speech. Syntax
<soundEvent src="URL"/>
Properties
PropertyTypeRequiredDescription
srcStringYesAudio URL. The file must be in OSS with at least public-read permissions. Escape XML special characters in the URL. Requirements: 16 kHz sample rate, mono, WAV, 16-bit, max 2 MB.
If the audio is not in WAV format, convert it with ffmpeg:
ffmpeg -i input_audio -acodec pcm_s16le -ac 1 -ar 16000 output.wav
You are legally responsible for the copyright of the uploaded audio.
Example
<speak>
  A horse was frightened<soundEvent src="http://nls.alicdn.com/sound-event/horse-neigh.wav"/>and people scattered to avoid it.
</speak>

<say-as>: Set reading format

Description Specify how text is read (as numbers, dates, phone numbers, etc.). Syntax
<say-as interpret-as="string">text</say-as>
Properties
PropertyTypeRequiredDescription
interpret-asStringYesText type. Values: cardinal (number), digits (individual digits), telephone (phone number), name, address, id (account name/nickname), characters (character by character), punctuation, date, time, currency, measure (unit of measure).

cardinal

Supported formats for cardinal:
FormatExampleEnglish outputDescription
Number string145one hundred forty fiveInteger range: up to 13 digits, [-999999999999, 999999999999]. Decimal: up to 13-digit integer part, up to 10-digit decimal part.
Number string starting with zero0145one hundred forty five
Negative sign + number string-145minus hundred forty five
Three-digit number string separated by commas60,000sixty thousand
Negative sign + three-digit number string separated by commas-208,000minus two hundred eight thousand
Number string + decimal point + zero12.00twelve
Number string + decimal point + number string12.34twelve point three four
Three-digit number string separated by commas + decimal point + number string1,000.1one thousand point one
Negative sign + number string + decimal point + number string-12.34minus twelve point three four
Negative sign + three-digit number string separated by commas + decimal point + number string-1,000.1minus one thousand point one
(Three-digit comma-separated) number string + hyphen + (three-digit comma-separated) number1-1,000one to one thousand
Other default readings012.34twelve point three four
1/2one half
-3/4minus three quarters
5.1/6five point one over six
-3 1/2minus three and a half
1,000.3^3one thousand point three to the power of three
3e9.1three times ten to the power of nine point one
23.10%twenty three point one percent
Example
<speak>
  <say-as interpret-as="cardinal">12345</say-as>
</speak>
<speak>
  <say-as interpret-as="cardinal">10234</say-as>
</speak>

digits

Supported formats for digits:
FormatExampleEnglish outputDescription
Number string12034one two zero three fourNo strict length limit, but keep under 20 characters.
Number string + space or hyphen + number string + ...1-23-456 7890one, two three, four five six, seven eight nine zero
Example
<speak>
  <say-as interpret-as="digits">12345</say-as>
</speak>
<speak>
  <say-as interpret-as="digits">10234</say-as>
</speak>

telephone

Supported formats for telephone:
FormatExampleEnglish outputDescription
Number string12034one two oh three fourNo strict length limit, but keep under 20 characters.
Number string + space or hyphen + number string + ...1-23-456 7890one, two three, four five six, seven eight nine oh
Plus sign + number string + space or hyphen + number string+43-211-0567plus four three, two one one, oh five six seven
Left parenthesis + number string + right parenthesis + space + number string + space or hyphen + number string(21) 654-3210(two one) six five four, three two one oh
Example
<speak>
  <say-as interpret-as="telephone">12345</say-as>
</speak>
<speak>
  <say-as interpret-as="telephone">10234</say-as>
</speak>

name

Example
<speak>
  Her former name is <say-as interpret-as="name">Zeng Xiaofan</say-as>
</speak>

address

Not supported for English text.
Example
<speak>
  <say-as interpret-as="address">Fulu International, Building 1, Unit 3, Room 304</say-as>
</speak>

id

For English text, this works the same as characters.
Example
<speak>
  <say-as interpret-as="id">myid_1998</say-as>
</speak>

characters

Supported formats for characters:
FormatExampleEnglish outputDescription
string*b+3$.c-0'=αasterisk B plus three dollar dot C dash zero apostrophe equals alphaSupports Chinese characters, English letters, digits 0-9, and common symbols.
Example
<speak>
  <say-as interpret-as="characters">Greek letters αβ</say-as>
</speak>
<speak>
  <say-as interpret-as="characters">*b+3.c$=α</say-as>
</speak>

punctuation

For English text, this works the same as characters.
Example
<speak>
  <say-as interpret-as="punctuation"> -./:;</say-as>
</speak>

date

Supported formats for date:
FormatExampleEnglish outputDescription
Four digits/two digits or four digits-two digits2000/01two thousand, oh oneYear spans.
1900-01nineteen hundred, oh one
2001-02twenty oh one, oh two
2019-20twenty nineteen, twenty
1998-99nineteen ninety eight, ninety nine
1999-00nineteen ninety nine, oh oh
Four-digit number starting with 1 or 22000two thousandFour-digit year.
1900nineteen hundred
1905nineteen oh five
2021twenty twenty one
Day of the week-Day of the week or Day of the week~Day of the week or Day of the week&Day of the weekmon-wedmonday to wednesdayEscape XML special characters in range separators.
tue~frituesday to friday
sat&sunsaturday and sunday
DD-DD MMM, YYYY or DD~DD MMM, YYYY or DD&DD MMM, YYYY19-20 Jan, 2000the nineteen to the twentieth of january two thousandDD = two-digit day. MMM = month abbreviation or full name. YYYY = four-digit year.
01 ~ 10 Jul, 2020the first to the tenth of july twenty twenty
05&06 Apr, 2009the fifth and the sixth of april two thousand nine
MMM DD-DD or MMM DD~DD or MMM DD&DDFeb 01 - 03february the first to the thirdMMM = month. DD = day.
Aug 10-20august the tenth to the twentieth
Dec 11&12december the eleventh and the twelfth
MMM-MMM or MMM~MMM or MMM&MMMJan-Junjanuary to juneMMM = month.
Jul - Decjuly to december
sep&octseptember and october
YYYY-YYYY or YYYY~YYYY1990 - 2000nineteen ninety to two thousandYYYY = four-digit year starting with 1 or 2.
2001-2021two thousand one to twenty twenty one
WWW DD MMM YYYYSun 20 Nov 2011sunday the twentieth of november twenty elevenWWW = day of week (abbreviation or full). DD = day. MMM = month. YYYY = year.
WWW DD MMMSun 20 Novsunday the twentieth of november
WWW MMM DD YYYYSun Nov 20 2011sunday november the twentieth twenty eleven
WWW MMM DDSun Nov 20sunday november the twentieth
WWW YYYY-MM-DDSat 2010-10-01saturday october the first twenty ten
WWW YYYY/MM/DDSat 2010/10/01saturday october the first twenty ten
WWW MM/DD/YYYYSun 11/20/2011sunday november the twentieth twenty eleven
MM/DD/YYYY11/20/2011november the twentieth twenty eleven
YYYY1998nineteen ninety eight
Other default readings10 Mar, 2001the tenth of march two thousand one
10 Marthe tenth of march
Mar 2001march two thousand one
Fri. 10/Mar/2001friday the tenth of march two thousand one
Mar 10th, 2001march the tenth two thousand one
Mar 10march the tenth
2001/03/10march the tenth two thousand one
2001-03-10march the tenth two thousand one
2000stwo thousands
2010'stwenty tens
1900'snineteen hundreds
1990snineteen nineties
Example
<speak>
  <say-as interpret-as="date">1000-10-10</say-as>
</speak>
<speak>
  <say-as interpret-as="date">10-01-2020</say-as>
</speak>

time

Supported formats for time:
FormatExampleEnglish outputDescription
HH:MM AM or PM09:00 AMnine A MHH = hour (1-2 digits). MM = minute (2 digits). AM/PM = morning or afternoon.
09:03 PMnine oh three P M
09:13 p.m.nine thirteen p m
HH:MM21:00twenty one hundred
HHMM100one oclock
Time point-Time point8:00 am - 05:30 pmeight a m to five p mTime range formats.
7:05~10:15 AMseven oh five to ten fifteen A M
09:00-13:00nine oclock to thirteen hundred
Example
<speak>
  <say-as interpret-as="time">5:00am</say-as>
</speak>
<speak>
  <say-as interpret-as="time">0500</say-as>
</speak>

currency

Supported formats for currency:
FormatExampleEnglish outputDescription
Number + Currency identifier1.00 RMBone yuanSupports integers, decimals, and comma-separated thousands.
2.02 CNYtwo point zero two yuan
1,000.23 CN¥one thousand point two three yuan
1.01 SGDone singapore dollar and one cent
2.01 CADtwo canadian dollars and one cent
3.1 HKDthree hong kong dollars and ten cents
1,000.00 EURone thousand euros
Currency identifier + NumberUS$ 1.00one US dollarSupports integers, decimals, and comma-separated thousands.
$0.01one cent
JPY 1.01one japanese yen and one sen
£1.1one pound and ten pence
€2.01two euros and one cent
USD 1,000one thousand united states dollars
Number + Quantifier + Currency identifier or Currency identifier + Number + Quantifier1.23 Tn RMBone point two three trillion yuanQuantifiers: thousand, million, billion, trillion, Mil, mil, K, k, Bn, bn, Tn, tn.
$1.2 Kone point two thousand dollars
Example
<speak>
  <say-as interpret-as="currency">13,000,000.00RMB</say-as>
</speak>
<speak>
  <say-as interpret-as="currency">$1,000.01</say-as>
</speak>

measure

Supported formats for measure:
FormatExampleEnglish outputDescription
Number + Unit of measurement1.0 kgone kilogramSupports integers, decimals, and comma-separated thousands. Supports common unit abbreviations.
1,234.01 kmone thousand two hundred thirty-four point zero one kilometers
Unit of measurementmm2square millimeter
Example
<speak>
  <say-as interpret-as="measure">100m12cm6mm</say-as>
</speak>
<speak>
  <say-as interpret-as="measure">1,000.01kg</say-as>
</speak>

Symbol pronunciations

Common symbol pronunciations for <say-as>:
SymbolEnglish pronunciation
!exclamation mark
"double quote
#pound
$dollar
%percent
&and
'left quote
(left parenthesis
)right parenthesis
*asterisk
+plus
,comma
-dash
.dot
/slash
:colon
;semicolon
<less than
=equals
>greater than
?question mark
@at
[left bracket
\backslash
]right bracket
^caret
_underscore
`backtick
\{left brace
|vertical bar
\}right brace
~tilde
Full-width and special symbols:
SymbolEnglish pronunciation
exclamation mark
\u201cleft double quote
\u201dright double quote
\u2018left quote
\u2019right quote
left parenthesis
right parenthesis
comma
full stop
em dash
colon
semicolon
question mark
enumeration comma
ellipsis
……ellipsis
left guillemet
right guillemet
yuan
greater than or equal to
less than or equal to
not equal
approximately equal
±plus or minus
×times
πpi
Greek letters (uppercase):
SymbolEnglish pronunciation
Αalpha
Βbeta
Γgamma
Δdelta
Εepsilon
Ζzeta
Θtheta
Ιiota
Κkappa
lambda
Μmu
Νnu
Ξksi
Οomicron
pi
Ρrho
sigma
Τtau
Υupsilon
Φphi
Χchi
Ψpsi
Ωomega
Greek letters (lowercase):
SymbolEnglish pronunciation
αalpha
βbeta
γgamma
δdelta
εepsilon
ζzeta
ηeta
θtheta
ιiota
κkappa
λlambda
μmu
νnu
ξksi
οomicron
πpi
ρrho
σsigma
τtau
υupsilon
φphi
χchi
ψpsi
ωomega

Common units of measurement

Common units for <say-as>:
CategoryUnits
Lengthnm (nanometer), μm (micrometer), mm (millimeter), cm (centimeter), m (meter), km (kilometer), ft (foot), in (inch)
Areacm² (square centimeter), m² (square meter), km² (square kilometer), SqFt (square foot)
Volumecm³ (cubic centimeter), m³ (cubic meter), km3 (cubic kilometer), mL (milliliter), L (liter), gal (gallon)
Weightμg (microgram), mg (milligram), g (gram), kg (kilogram)
Timemin (minute), sec (second), ms (millisecond)
ElectromagnetismμA (microamp), mA (milliamp), Hz (hertz), kHz (kilohertz), MHz (megahertz), GHz (gigahertz), V (volt), kV (kilovolt), kWh (kilowatt hour)
SounddB (decibel)
Atmospheric pressurePa (pascal), kPa (kilopascal), MPa (megapascal)
OtherAlso supports units like tsp (teaspoon), rpm (revolutions per minute), KB (kilobyte), mmHg (millimetre of mercury), and more.
Control pronunciation (SSML) | Qwen Cloud