Skip to content

Commit 7f9d74a

Browse files
committed
feat: add examples for kokoro
1 parent c6faf4e commit 7f9d74a

File tree

1 file changed

+65
-227
lines changed

1 file changed

+65
-227
lines changed

examples/node/audio-generation/kokoro-example.ts

Lines changed: 65 additions & 227 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
/**
22
* Text-to-Speech Example with TinyLM
33
*
4-
* This example demonstrates the speech generation capabilities:
5-
* - Hardware capability detection
6-
* - TTS model loading with progress tracking
7-
* - Speech generation with different voices
8-
* - Speed adjustments
9-
* - Saving audio to files
10-
* - Multiple languages support
4+
* This demonstrates speech generation with TinyLM, including:
5+
* - Basic speech generation
6+
* - Streaming for better handling of long texts
7+
* - Comparing streaming vs non-streaming approaches
118
*/
129

13-
import { TinyLM, ProgressUpdate, FileInfo, OverallProgress } from '../../../src/index';
10+
import { TinyLM, ProgressUpdate, SpeechResult, SpeechStreamResult } from '../../../src/index';
1411
import * as fs from 'fs';
1512
import * as path from 'path';
1613
import { fileURLToPath } from 'url';
@@ -19,31 +16,9 @@ import { fileURLToPath } from 'url';
1916
const __filename = fileURLToPath(import.meta.url);
2017
const __dirname = path.dirname(__filename);
2118

22-
// Format bytes to human-readable size
23-
function formatBytes(bytes: number | undefined): string {
24-
if (bytes === 0 || !bytes) return '0 B';
25-
const sizes = ['B', 'KB', 'MB', 'GB'];
26-
const i = Math.floor(Math.log(bytes) / Math.log(1024));
27-
return `${(bytes / Math.pow(1024, i)).toFixed(2)} ${sizes[i]}`;
28-
}
29-
30-
// Format seconds to human-readable time
31-
function formatTime(seconds: number | null): string {
32-
if (!seconds || seconds === 0) return '';
33-
if (seconds < 60) return `${Math.ceil(seconds)}s`;
34-
if (seconds < 3600) {
35-
const minutes = Math.floor(seconds / 60);
36-
const secs = Math.ceil(seconds % 60);
37-
return `${minutes}m ${secs}s`;
38-
}
39-
const hours = Math.floor(seconds / 3600);
40-
const minutes = Math.floor((seconds % 3600) / 60);
41-
return `${hours}h ${minutes}m`;
42-
}
43-
44-
// Format overall progress information nicely
19+
// Format progress for console output
4520
function formatProgress(progress: ProgressUpdate): string {
46-
const { type, status, percentComplete, message, files, overall } = progress;
21+
const { type, status, percentComplete, message } = progress;
4722

4823
// Progress bar for numeric progress
4924
let progressBar = '';
@@ -56,67 +31,11 @@ function formatProgress(progress: ProgressUpdate): string {
5631
`] ${percentComplete}%`;
5732
}
5833

59-
// Color based on type
60-
let color = '';
61-
let resetColor = '';
62-
if (typeof process !== 'undefined' && process.stdout &&
63-
// TypeScript-safe check for hasColors method
64-
typeof (process.stdout as any).hasColors === 'function' &&
65-
(process.stdout as any).hasColors()) {
66-
// Terminal colors
67-
switch (type) {
68-
case 'system': color = '\x1b[36m'; break; // Cyan
69-
case 'tts_model': color = '\x1b[32m'; break; // Green
70-
case 'speech': color = '\x1b[35m'; break; // Magenta
71-
default: color = ''; break;
72-
}
73-
resetColor = '\x1b[0m';
74-
}
75-
7634
// Format output lines
77-
let output = `${color}[${status}]${resetColor} ${type ? `(${type})` : ''}`;
35+
let output = `[${status}] ${type ? `(${type})` : ''}`;
7836
if (progressBar) output += ` ${progressBar}`;
7937
if (message) output += ` ${message}`;
8038

81-
// Add overall stats if available
82-
const overallProgress = overall as OverallProgress | undefined;
83-
if (overallProgress && type === 'tts_model' && status === 'loading') {
84-
output += `\n Total: ${overallProgress.formattedLoaded}/${overallProgress.formattedTotal}`;
85-
if (overallProgress.formattedSpeed) {
86-
output += ` at ${overallProgress.formattedSpeed}`;
87-
}
88-
if (overallProgress.formattedRemaining) {
89-
output += ` - ETA: ${overallProgress.formattedRemaining}`;
90-
}
91-
}
92-
93-
// Add file-specific progress if available
94-
if (Array.isArray(files) && files.length > 0 && type === 'tts_model') {
95-
// Show active files first
96-
const activeFiles = files.filter(f => f.status !== 'done' && f.status !== 'error');
97-
if (activeFiles.length > 0) {
98-
output += '\n Active downloads:';
99-
activeFiles.forEach((file: FileInfo) => {
100-
output += `\n ${file.name}: ${file.percentComplete}% (${formatBytes(file.bytesLoaded)}/${formatBytes(file.bytesTotal)})`;
101-
if (file.speed > 0) {
102-
output += ` at ${formatBytes(file.speed)}/s`;
103-
}
104-
if (file.timeRemaining) {
105-
output += ` - ETA: ${formatTime(file.timeRemaining)}`;
106-
}
107-
});
108-
}
109-
110-
// Show recently completed files (last 2)
111-
const doneFiles = files.filter(f => f.status === 'done').slice(-2);
112-
if (doneFiles.length > 0) {
113-
output += '\n Recently completed:';
114-
doneFiles.forEach((file: FileInfo) => {
115-
output += `\n ${file.name}: Complete (${formatBytes(file.bytesTotal)})`;
116-
});
117-
}
118-
}
119-
12039
return output;
12140
}
12241

@@ -129,6 +48,11 @@ function ensureOutputDirExists(): string {
12948
return outputDir;
13049
}
13150

51+
// Type guard to check if result is a streaming result
52+
function isStreamResult(result: SpeechResult | SpeechStreamResult): result is SpeechStreamResult {
53+
return 'chunks' in result && Array.isArray((result as SpeechStreamResult).chunks);
54+
}
55+
13256
// Main text-to-speech example
13357
async function runTextToSpeechExample(): Promise<void> {
13458
console.log('=== TinyLM Text-to-Speech Example ===');
@@ -139,15 +63,9 @@ async function runTextToSpeechExample(): Promise<void> {
13963
// Create a new TinyLM instance with custom progress tracking
14064
const tiny = new TinyLM({
14165
progressCallback: (progress: ProgressUpdate) => {
142-
try {
143-
console.log(formatProgress(progress));
144-
} catch (error) {
145-
// Fallback to simple logging
146-
console.log(`[${progress.status}] ${progress.message || ''}`);
147-
console.error('Error formatting progress:', error);
148-
}
66+
console.log(formatProgress(progress));
14967
},
150-
progressThrottleTime: 100, // Update frequently to show progress
68+
progressThrottleTime: 100
15169
});
15270

15371
try {
@@ -163,150 +81,70 @@ async function runTextToSpeechExample(): Promise<void> {
16381
// Initialize TinyLM
16482
console.log("\nInitializing TinyLM...");
16583
await tiny.init({
166-
ttsModels: ['onnx-community/Kokoro-82M-v1.0-ONNX'], // The TTS model from the implementation
84+
ttsModels: ['onnx-community/Kokoro-82M-v1.0-ONNX']
16785
});
16886

169-
// Example 1: Basic speech generation
87+
// Example 1: Basic speech generation (non-streaming)
17088
console.log("\n=== Example 1: Basic Speech Generation ===");
89+
const shortText = "Welcome to TinyLM. This is a library for running language models and text-to-speech in browsers and Node.js.";
90+
console.log(`\nGenerating speech for: "${shortText}"`);
17191

172-
const text = "Hello world! This is an example of text-to-speech with TinyLM.";
173-
console.log(`\nGenerating speech for: "${text}"`);
174-
175-
const outputPath = path.join(outputDir, 'basic_speech.wav');
176-
177-
const result = await tiny.audio.speech.create({
92+
const basicResult = await tiny.audio.speech.create({
17893
model: 'onnx-community/Kokoro-82M-v1.0-ONNX',
179-
input: text,
180-
voice: 'af', // Default voice
94+
input: shortText,
95+
voice: 'af_bella',
18196
response_format: 'wav'
18297
});
18398

184-
// Save the audio buffer to a file
185-
fs.writeFileSync(outputPath, Buffer.from(result.audio));
186-
console.log(`\nSpeech saved to: ${outputPath}`);
187-
console.log(`Generation time: ${result._tinylm?.time_ms}ms`);
188-
189-
// Example 2: Using different voices
190-
console.log("\n=== Example 2: Different Voices ===");
191-
192-
// Create a function to generate speech with different voices
193-
async function generateWithVoice(voice: string): Promise<void> {
194-
console.log(`\nGenerating speech with voice: ${voice}`);
195-
const text = `This is an example of the ${voice} voice.`;
196-
197-
const result = await tiny.audio.speech.create({
198-
model: 'onnx-community/Kokoro-82M-v1.0-ONNX',
199-
input: text,
200-
voice,
201-
response_format: 'wav'
202-
});
203-
204-
const outputPath = path.join(outputDir, `${voice}_example.wav`);
205-
fs.writeFileSync(outputPath, Buffer.from(result.audio));
206-
console.log(`Speech saved to: ${outputPath}`);
207-
}
208-
209-
// Generate examples with different voices
210-
await generateWithVoice('af_bella'); // American female
211-
await generateWithVoice('am_adam'); // American male
212-
await generateWithVoice('bf_emma'); // British female
213-
214-
// Example 3: Speed adjustment
215-
console.log("\n=== Example 3: Speed Adjustment ===");
216-
217-
const speedText = "This is a demonstration of different speech speeds.";
218-
219-
async function generateWithSpeed(speed: number): Promise<void> {
220-
console.log(`\nGenerating speech with speed: ${speed}`);
221-
222-
const result = await tiny.audio.speech.create({
223-
model: 'onnx-community/Kokoro-82M-v1.0-ONNX',
224-
input: speedText,
225-
voice: 'af_bella',
226-
speed,
227-
response_format: 'wav'
228-
});
229-
230-
const outputPath = path.join(outputDir, `speed_${speed.toString().replace('.', '_')}.wav`);
231-
fs.writeFileSync(outputPath, Buffer.from(result.audio));
232-
console.log(`Speech saved to: ${outputPath}`);
99+
// Output result is a regular SpeechResult
100+
if (!isStreamResult(basicResult)) {
101+
const basicPath = path.join(outputDir, 'basic_speech.wav');
102+
fs.writeFileSync(basicPath, Buffer.from(basicResult.audio));
103+
console.log(`Speech saved to: ${basicPath}`);
104+
console.log(`Generation time: ${basicResult._tinylm?.time_ms}ms`);
233105
}
234106

235-
// Generate examples with different speeds
236-
await generateWithSpeed(0.8); // Slower
237-
await generateWithSpeed(1.0); // Normal
238-
await generateWithSpeed(1.2); // Faster
239-
240-
// Example 4: Multi-language support
241-
console.log("\n=== Example 4: Multi-language Support ===");
242-
243-
async function generateInLanguage(text: string, voice: string, description: string): Promise<void> {
244-
console.log(`\nGenerating speech in ${description}`);
245-
246-
const result = await tiny.audio.speech.create({
247-
model: 'onnx-community/Kokoro-82M-v1.0-ONNX',
248-
input: text,
249-
voice,
250-
response_format: 'wav'
251-
});
252-
253-
const outputPath = path.join(outputDir, `${voice}_language_example.wav`);
254-
fs.writeFileSync(outputPath, Buffer.from(result.audio));
255-
console.log(`Speech saved to: ${outputPath}`);
256-
}
257-
258-
// Generate examples in different languages
259-
await generateInLanguage("Hello, this is English text with an American accent.", "af_bella", "American English");
260-
await generateInLanguage("Hello, this is English text with a British accent.", "bf_emma", "British English");
261-
await generateInLanguage("Hola, este es un texto en español.", "ef_dora", "Spanish");
262-
await generateInLanguage("नमस्ते, यह हिंदी में एक उदाहरण है।", "hf_alpha", "Hindi");
263-
264-
// Example 5: Advanced use - Generate a paragraph with natural pauses
265-
console.log("\n=== Example 5: Advanced Use - Paragraph with Natural Pauses ===");
266-
267-
const paragraph = "Welcome to the world of speech synthesis. Artificial voices have come a long way. They now sound much more natural and expressive. This technology enables many accessibility features. It's also used in virtual assistants and automated systems.";
268-
269-
// Split into sentences and add pauses
270-
const sentences = paragraph.split('.');
271-
const sentencesWithPauses = sentences.map(s => s.trim()).filter(s => s.length > 0);
272-
273-
console.log(`\nGenerating paragraph speech with ${sentencesWithPauses.length} sentences`);
274-
275-
// Generate speech for the full paragraph
276-
const paragraphResult = await tiny.audio.speech.create({
107+
// Example 2: Streaming speech generation for long text
108+
console.log("\n=== Example 2: Streaming TTS for Long Text ===");
109+
const longText = `
110+
Streaming text-to-speech processes content in semantically meaningful chunks.
111+
This creates more natural speech with proper phrasing and intonation.
112+
Unlike non-streaming approaches, this maintains consistent prosody across sentence boundaries.
113+
The implementation handles sentence boundaries, ensuring natural pauses between thoughts.
114+
It's particularly useful for longer texts like articles or stories.
115+
When texts are processed as a whole, long content can lose natural cadence and timing.
116+
Streaming solves this by breaking content into manageable pieces.
117+
Each piece receives appropriate voice styling based on its content and length.
118+
The result is more human-like speech that's easier to follow and understand.
119+
`;
120+
121+
console.log(`\nGenerating streaming speech for long text (${longText.length} characters)`);
122+
123+
// Generate speech with streaming enabled
124+
const streamResult = await tiny.audio.speech.create({
277125
model: 'onnx-community/Kokoro-82M-v1.0-ONNX',
278-
input: paragraph,
126+
input: longText,
279127
voice: 'af_bella',
280-
response_format: 'wav'
281-
});
282-
283-
const paragraphOutputPath = path.join(outputDir, 'paragraph_speech.wav');
284-
fs.writeFileSync(paragraphOutputPath, Buffer.from(paragraphResult.audio));
285-
console.log(`Full paragraph speech saved to: ${paragraphOutputPath}`);
286-
287-
// Example 6: Offloading the model
288-
console.log("\n=== Example 6: Model Management ===");
289-
290-
// Get the list of loaded models
291-
console.log("\nCurrently loaded TTS models:", tiny.models.listTTS());
292-
293-
// Offload the model
294-
console.log("\nOffloading TTS model...");
295-
const offloadResult = await tiny.models.offloadTTS({
296-
model: 'onnx-community/Kokoro-82M-v1.0-ONNX'
128+
response_format: 'wav',
129+
stream: true // Enable streaming
297130
});
298131

299-
console.log("Model offloaded:", offloadResult);
300-
console.log("TTS models still loaded:", tiny.models.listTTS());
301-
302-
// Re-load the model
303-
console.log("\nRe-loading TTS model...");
304-
await tiny.models.loadTTS({
305-
model: 'onnx-community/Kokoro-82M-v1.0-ONNX'
306-
});
307-
308-
console.log("TTS models after reloading:", tiny.models.listTTS());
309-
132+
// Check if result is a streaming result
133+
if (isStreamResult(streamResult)) {
134+
// Instead of trying to concatenate the chunks:
135+
console.log("\nNOTE: Streaming mode produces multiple audio files - one per chunk.");
136+
console.log("For production use, consider using a proper audio library for concatenation.");
137+
138+
// Save each chunk separately
139+
for (let i = 0; i < streamResult.chunks.length; i++) {
140+
const chunk = streamResult.chunks[i];
141+
if (chunk) {
142+
const chunkPath = path.join(outputDir, `stream_chunk_${i+1}.wav`);
143+
fs.writeFileSync(chunkPath, Buffer.from(chunk.audio));
144+
console.log(`Chunk ${i+1}: "${chunk.text.substring(0, 40)}..." saved to ${path.basename(chunkPath)}`);
145+
}
146+
}
147+
}
310148
} catch (error) {
311149
const errorMessage = error instanceof Error ? error.message : String(error);
312150
console.error("\nError during execution:", errorMessage);

0 commit comments

Comments
 (0)