11/**
22 * Text-to-Speech Example with TinyLM
33 *
4- * This example demonstrates the speech generation capabilities:
5- * - Hardware capability detection
6- * - TTS model loading with progress tracking
7- * - Speech generation with different voices
8- * - Speed adjustments
9- * - Saving audio to files
10- * - Multiple languages support
4+ * This demonstrates speech generation with TinyLM, including:
5+ * - Basic speech generation
6+ * - Streaming for better handling of long texts
7+ * - Comparing streaming vs non-streaming approaches
118 */
129
13- import { TinyLM , ProgressUpdate , FileInfo , OverallProgress } from '../../../src/index' ;
10+ import { TinyLM , ProgressUpdate , SpeechResult , SpeechStreamResult } from '../../../src/index' ;
1411import * as fs from 'fs' ;
1512import * as path from 'path' ;
1613import { fileURLToPath } from 'url' ;
@@ -19,31 +16,9 @@ import { fileURLToPath } from 'url';
1916const __filename = fileURLToPath ( import . meta. url ) ;
2017const __dirname = path . dirname ( __filename ) ;
2118
22- // Format bytes to human-readable size
23- function formatBytes ( bytes : number | undefined ) : string {
24- if ( bytes === 0 || ! bytes ) return '0 B' ;
25- const sizes = [ 'B' , 'KB' , 'MB' , 'GB' ] ;
26- const i = Math . floor ( Math . log ( bytes ) / Math . log ( 1024 ) ) ;
27- return `${ ( bytes / Math . pow ( 1024 , i ) ) . toFixed ( 2 ) } ${ sizes [ i ] } ` ;
28- }
29-
30- // Format seconds to human-readable time
31- function formatTime ( seconds : number | null ) : string {
32- if ( ! seconds || seconds === 0 ) return '' ;
33- if ( seconds < 60 ) return `${ Math . ceil ( seconds ) } s` ;
34- if ( seconds < 3600 ) {
35- const minutes = Math . floor ( seconds / 60 ) ;
36- const secs = Math . ceil ( seconds % 60 ) ;
37- return `${ minutes } m ${ secs } s` ;
38- }
39- const hours = Math . floor ( seconds / 3600 ) ;
40- const minutes = Math . floor ( ( seconds % 3600 ) / 60 ) ;
41- return `${ hours } h ${ minutes } m` ;
42- }
43-
44- // Format overall progress information nicely
19+ // Format progress for console output
4520function formatProgress ( progress : ProgressUpdate ) : string {
46- const { type, status, percentComplete, message, files , overall } = progress ;
21+ const { type, status, percentComplete, message } = progress ;
4722
4823 // Progress bar for numeric progress
4924 let progressBar = '' ;
@@ -56,67 +31,11 @@ function formatProgress(progress: ProgressUpdate): string {
5631 `] ${ percentComplete } %` ;
5732 }
5833
59- // Color based on type
60- let color = '' ;
61- let resetColor = '' ;
62- if ( typeof process !== 'undefined' && process . stdout &&
63- // TypeScript-safe check for hasColors method
64- typeof ( process . stdout as any ) . hasColors === 'function' &&
65- ( process . stdout as any ) . hasColors ( ) ) {
66- // Terminal colors
67- switch ( type ) {
68- case 'system' : color = '\x1b[36m' ; break ; // Cyan
69- case 'tts_model' : color = '\x1b[32m' ; break ; // Green
70- case 'speech' : color = '\x1b[35m' ; break ; // Magenta
71- default : color = '' ; break ;
72- }
73- resetColor = '\x1b[0m' ;
74- }
75-
7634 // Format output lines
77- let output = `${ color } [${ status } ]${ resetColor } ${ type ? `(${ type } )` : '' } ` ;
35+ let output = `[${ status } ] ${ type ? `(${ type } )` : '' } ` ;
7836 if ( progressBar ) output += ` ${ progressBar } ` ;
7937 if ( message ) output += ` ${ message } ` ;
8038
81- // Add overall stats if available
82- const overallProgress = overall as OverallProgress | undefined ;
83- if ( overallProgress && type === 'tts_model' && status === 'loading' ) {
84- output += `\n Total: ${ overallProgress . formattedLoaded } /${ overallProgress . formattedTotal } ` ;
85- if ( overallProgress . formattedSpeed ) {
86- output += ` at ${ overallProgress . formattedSpeed } ` ;
87- }
88- if ( overallProgress . formattedRemaining ) {
89- output += ` - ETA: ${ overallProgress . formattedRemaining } ` ;
90- }
91- }
92-
93- // Add file-specific progress if available
94- if ( Array . isArray ( files ) && files . length > 0 && type === 'tts_model' ) {
95- // Show active files first
96- const activeFiles = files . filter ( f => f . status !== 'done' && f . status !== 'error' ) ;
97- if ( activeFiles . length > 0 ) {
98- output += '\n Active downloads:' ;
99- activeFiles . forEach ( ( file : FileInfo ) => {
100- output += `\n ${ file . name } : ${ file . percentComplete } % (${ formatBytes ( file . bytesLoaded ) } /${ formatBytes ( file . bytesTotal ) } )` ;
101- if ( file . speed > 0 ) {
102- output += ` at ${ formatBytes ( file . speed ) } /s` ;
103- }
104- if ( file . timeRemaining ) {
105- output += ` - ETA: ${ formatTime ( file . timeRemaining ) } ` ;
106- }
107- } ) ;
108- }
109-
110- // Show recently completed files (last 2)
111- const doneFiles = files . filter ( f => f . status === 'done' ) . slice ( - 2 ) ;
112- if ( doneFiles . length > 0 ) {
113- output += '\n Recently completed:' ;
114- doneFiles . forEach ( ( file : FileInfo ) => {
115- output += `\n ${ file . name } : Complete (${ formatBytes ( file . bytesTotal ) } )` ;
116- } ) ;
117- }
118- }
119-
12039 return output ;
12140}
12241
@@ -129,6 +48,11 @@ function ensureOutputDirExists(): string {
12948 return outputDir ;
13049}
13150
51+ // Type guard to check if result is a streaming result
52+ function isStreamResult ( result : SpeechResult | SpeechStreamResult ) : result is SpeechStreamResult {
53+ return 'chunks' in result && Array . isArray ( ( result as SpeechStreamResult ) . chunks ) ;
54+ }
55+
13256// Main text-to-speech example
13357async function runTextToSpeechExample ( ) : Promise < void > {
13458 console . log ( '=== TinyLM Text-to-Speech Example ===' ) ;
@@ -139,15 +63,9 @@ async function runTextToSpeechExample(): Promise<void> {
13963 // Create a new TinyLM instance with custom progress tracking
14064 const tiny = new TinyLM ( {
14165 progressCallback : ( progress : ProgressUpdate ) => {
142- try {
143- console . log ( formatProgress ( progress ) ) ;
144- } catch ( error ) {
145- // Fallback to simple logging
146- console . log ( `[${ progress . status } ] ${ progress . message || '' } ` ) ;
147- console . error ( 'Error formatting progress:' , error ) ;
148- }
66+ console . log ( formatProgress ( progress ) ) ;
14967 } ,
150- progressThrottleTime : 100 , // Update frequently to show progress
68+ progressThrottleTime : 100
15169 } ) ;
15270
15371 try {
@@ -163,150 +81,70 @@ async function runTextToSpeechExample(): Promise<void> {
16381 // Initialize TinyLM
16482 console . log ( "\nInitializing TinyLM..." ) ;
16583 await tiny . init ( {
166- ttsModels : [ 'onnx-community/Kokoro-82M-v1.0-ONNX' ] , // The TTS model from the implementation
84+ ttsModels : [ 'onnx-community/Kokoro-82M-v1.0-ONNX' ]
16785 } ) ;
16886
169- // Example 1: Basic speech generation
87+ // Example 1: Basic speech generation (non-streaming)
17088 console . log ( "\n=== Example 1: Basic Speech Generation ===" ) ;
89+ const shortText = "Welcome to TinyLM. This is a library for running language models and text-to-speech in browsers and Node.js." ;
90+ console . log ( `\nGenerating speech for: "${ shortText } "` ) ;
17191
172- const text = "Hello world! This is an example of text-to-speech with TinyLM." ;
173- console . log ( `\nGenerating speech for: "${ text } "` ) ;
174-
175- const outputPath = path . join ( outputDir , 'basic_speech.wav' ) ;
176-
177- const result = await tiny . audio . speech . create ( {
92+ const basicResult = await tiny . audio . speech . create ( {
17893 model : 'onnx-community/Kokoro-82M-v1.0-ONNX' ,
179- input : text ,
180- voice : 'af' , // Default voice
94+ input : shortText ,
95+ voice : 'af_bella' ,
18196 response_format : 'wav'
18297 } ) ;
18398
184- // Save the audio buffer to a file
185- fs . writeFileSync ( outputPath , Buffer . from ( result . audio ) ) ;
186- console . log ( `\nSpeech saved to: ${ outputPath } ` ) ;
187- console . log ( `Generation time: ${ result . _tinylm ?. time_ms } ms` ) ;
188-
189- // Example 2: Using different voices
190- console . log ( "\n=== Example 2: Different Voices ===" ) ;
191-
192- // Create a function to generate speech with different voices
193- async function generateWithVoice ( voice : string ) : Promise < void > {
194- console . log ( `\nGenerating speech with voice: ${ voice } ` ) ;
195- const text = `This is an example of the ${ voice } voice.` ;
196-
197- const result = await tiny . audio . speech . create ( {
198- model : 'onnx-community/Kokoro-82M-v1.0-ONNX' ,
199- input : text ,
200- voice,
201- response_format : 'wav'
202- } ) ;
203-
204- const outputPath = path . join ( outputDir , `${ voice } _example.wav` ) ;
205- fs . writeFileSync ( outputPath , Buffer . from ( result . audio ) ) ;
206- console . log ( `Speech saved to: ${ outputPath } ` ) ;
207- }
208-
209- // Generate examples with different voices
210- await generateWithVoice ( 'af_bella' ) ; // American female
211- await generateWithVoice ( 'am_adam' ) ; // American male
212- await generateWithVoice ( 'bf_emma' ) ; // British female
213-
214- // Example 3: Speed adjustment
215- console . log ( "\n=== Example 3: Speed Adjustment ===" ) ;
216-
217- const speedText = "This is a demonstration of different speech speeds." ;
218-
219- async function generateWithSpeed ( speed : number ) : Promise < void > {
220- console . log ( `\nGenerating speech with speed: ${ speed } ` ) ;
221-
222- const result = await tiny . audio . speech . create ( {
223- model : 'onnx-community/Kokoro-82M-v1.0-ONNX' ,
224- input : speedText ,
225- voice : 'af_bella' ,
226- speed,
227- response_format : 'wav'
228- } ) ;
229-
230- const outputPath = path . join ( outputDir , `speed_${ speed . toString ( ) . replace ( '.' , '_' ) } .wav` ) ;
231- fs . writeFileSync ( outputPath , Buffer . from ( result . audio ) ) ;
232- console . log ( `Speech saved to: ${ outputPath } ` ) ;
99+ // Output result is a regular SpeechResult
100+ if ( ! isStreamResult ( basicResult ) ) {
101+ const basicPath = path . join ( outputDir , 'basic_speech.wav' ) ;
102+ fs . writeFileSync ( basicPath , Buffer . from ( basicResult . audio ) ) ;
103+ console . log ( `Speech saved to: ${ basicPath } ` ) ;
104+ console . log ( `Generation time: ${ basicResult . _tinylm ?. time_ms } ms` ) ;
233105 }
234106
235- // Generate examples with different speeds
236- await generateWithSpeed ( 0.8 ) ; // Slower
237- await generateWithSpeed ( 1.0 ) ; // Normal
238- await generateWithSpeed ( 1.2 ) ; // Faster
239-
240- // Example 4: Multi-language support
241- console . log ( "\n=== Example 4: Multi-language Support ===" ) ;
242-
243- async function generateInLanguage ( text : string , voice : string , description : string ) : Promise < void > {
244- console . log ( `\nGenerating speech in ${ description } ` ) ;
245-
246- const result = await tiny . audio . speech . create ( {
247- model : 'onnx-community/Kokoro-82M-v1.0-ONNX' ,
248- input : text ,
249- voice,
250- response_format : 'wav'
251- } ) ;
252-
253- const outputPath = path . join ( outputDir , `${ voice } _language_example.wav` ) ;
254- fs . writeFileSync ( outputPath , Buffer . from ( result . audio ) ) ;
255- console . log ( `Speech saved to: ${ outputPath } ` ) ;
256- }
257-
258- // Generate examples in different languages
259- await generateInLanguage ( "Hello, this is English text with an American accent." , "af_bella" , "American English" ) ;
260- await generateInLanguage ( "Hello, this is English text with a British accent." , "bf_emma" , "British English" ) ;
261- await generateInLanguage ( "Hola, este es un texto en español." , "ef_dora" , "Spanish" ) ;
262- await generateInLanguage ( "नमस्ते, यह हिंदी में एक उदाहरण है।" , "hf_alpha" , "Hindi" ) ;
263-
264- // Example 5: Advanced use - Generate a paragraph with natural pauses
265- console . log ( "\n=== Example 5: Advanced Use - Paragraph with Natural Pauses ===" ) ;
266-
267- const paragraph = "Welcome to the world of speech synthesis. Artificial voices have come a long way. They now sound much more natural and expressive. This technology enables many accessibility features. It's also used in virtual assistants and automated systems." ;
268-
269- // Split into sentences and add pauses
270- const sentences = paragraph . split ( '.' ) ;
271- const sentencesWithPauses = sentences . map ( s => s . trim ( ) ) . filter ( s => s . length > 0 ) ;
272-
273- console . log ( `\nGenerating paragraph speech with ${ sentencesWithPauses . length } sentences` ) ;
274-
275- // Generate speech for the full paragraph
276- const paragraphResult = await tiny . audio . speech . create ( {
107+ // Example 2: Streaming speech generation for long text
108+ console . log ( "\n=== Example 2: Streaming TTS for Long Text ===" ) ;
109+ const longText = `
110+ Streaming text-to-speech processes content in semantically meaningful chunks.
111+ This creates more natural speech with proper phrasing and intonation.
112+ Unlike non-streaming approaches, this maintains consistent prosody across sentence boundaries.
113+ The implementation handles sentence boundaries, ensuring natural pauses between thoughts.
114+ It's particularly useful for longer texts like articles or stories.
115+ When texts are processed as a whole, long content can lose natural cadence and timing.
116+ Streaming solves this by breaking content into manageable pieces.
117+ Each piece receives appropriate voice styling based on its content and length.
118+ The result is more human-like speech that's easier to follow and understand.
119+ ` ;
120+
121+ console . log ( `\nGenerating streaming speech for long text (${ longText . length } characters)` ) ;
122+
123+ // Generate speech with streaming enabled
124+ const streamResult = await tiny . audio . speech . create ( {
277125 model : 'onnx-community/Kokoro-82M-v1.0-ONNX' ,
278- input : paragraph ,
126+ input : longText ,
279127 voice : 'af_bella' ,
280- response_format : 'wav'
281- } ) ;
282-
283- const paragraphOutputPath = path . join ( outputDir , 'paragraph_speech.wav' ) ;
284- fs . writeFileSync ( paragraphOutputPath , Buffer . from ( paragraphResult . audio ) ) ;
285- console . log ( `Full paragraph speech saved to: ${ paragraphOutputPath } ` ) ;
286-
287- // Example 6: Offloading the model
288- console . log ( "\n=== Example 6: Model Management ===" ) ;
289-
290- // Get the list of loaded models
291- console . log ( "\nCurrently loaded TTS models:" , tiny . models . listTTS ( ) ) ;
292-
293- // Offload the model
294- console . log ( "\nOffloading TTS model..." ) ;
295- const offloadResult = await tiny . models . offloadTTS ( {
296- model : 'onnx-community/Kokoro-82M-v1.0-ONNX'
128+ response_format : 'wav' ,
129+ stream : true // Enable streaming
297130 } ) ;
298131
299- console . log ( "Model offloaded:" , offloadResult ) ;
300- console . log ( "TTS models still loaded:" , tiny . models . listTTS ( ) ) ;
301-
302- // Re-load the model
303- console . log ( "\nRe-loading TTS model..." ) ;
304- await tiny . models . loadTTS ( {
305- model : 'onnx-community/Kokoro-82M-v1.0-ONNX'
306- } ) ;
307-
308- console . log ( "TTS models after reloading:" , tiny . models . listTTS ( ) ) ;
309-
132+ // Check if result is a streaming result
133+ if ( isStreamResult ( streamResult ) ) {
134+ // Instead of trying to concatenate the chunks:
135+ console . log ( "\nNOTE: Streaming mode produces multiple audio files - one per chunk." ) ;
136+ console . log ( "For production use, consider using a proper audio library for concatenation." ) ;
137+
138+ // Save each chunk separately
139+ for ( let i = 0 ; i < streamResult . chunks . length ; i ++ ) {
140+ const chunk = streamResult . chunks [ i ] ;
141+ if ( chunk ) {
142+ const chunkPath = path . join ( outputDir , `stream_chunk_${ i + 1 } .wav` ) ;
143+ fs . writeFileSync ( chunkPath , Buffer . from ( chunk . audio ) ) ;
144+ console . log ( `Chunk ${ i + 1 } : "${ chunk . text . substring ( 0 , 40 ) } ..." saved to ${ path . basename ( chunkPath ) } ` ) ;
145+ }
146+ }
147+ }
310148 } catch ( error ) {
311149 const errorMessage = error instanceof Error ? error . message : String ( error ) ;
312150 console . error ( "\nError during execution:" , errorMessage ) ;
0 commit comments