Chrome Speech Synthesis with longer texts

JavascriptGoogle ChromeSpeech Synthesis

Javascript Problem Overview


I am getting a problem when trying to use Speech Synthesis API in Chrome 33. It works perfectly with a shorter text, but if I try longer text, it just stops in the middle. After it has stopped once like that, the Speech Synthesis does not work anywhere within Chrome until the browser is restarted.

Example code (http://jsfiddle.net/Mdm47/1/):

function speak(text) {
    var msg = new SpeechSynthesisUtterance();
    var voices = speechSynthesis.getVoices();
    msg.voice = voices[10];
    msg.voiceURI = 'native';
    msg.volume = 1;
    msg.rate = 1;
    msg.pitch = 2;
    msg.text = text;
    msg.lang = 'en-US';
    
    speechSynthesis.speak(msg);
}

speak('Short text');
speak('Collaboratively administrate empowered markets via plug-and-play networks. Dynamically procrastinate B2C users after installed base benefits. Dramatically visualize customer directed convergence without revolutionary ROI. Efficiently unleash cross-media information without cross-media value. Quickly maximize timely deliverables for real-time schemas. Dramatically maintain clicks-and-mortar solutions without functional solutions.');
speak('Another short text');

It stops speaking in the middle of the second text, and I can't get any other page to speak after that.

Is it a browser bug or some kind of security limitation?

Javascript Solutions


Solution 1 - Javascript

I've had this issue for a while now with Google Chrome Speech Synthesis. After some investigation, I discovered the following:

  • The breaking of the utterances only happens when the voice is not a native voice,
  • The cutting out usually occurs between 200-300 characters,
  • When it does break you can un-freeze it by doing speechSynthesis.cancel();
  • The 'onend' event sometimes decides not to fire. A quirky work-around to this is to console.log() out the utterance object before speaking it. Also I found wrapping the speak invocation in a setTimeout callback helps smooth these issues out.

In response to these problems, I have written a function that overcomes the character limit, by chunking the text up into smaller utterances, and playing them one after another. Obviously you'll get some odd sounds sometimes as sentences might be chunked into two separate utterances with a small time delay in between each, however the code will try and split these points at punctuation marks as to make the breaks in sound less obvious.

Update

I've made this work-around publicly available at https://gist.github.com/woollsta/2d146f13878a301b36d7#file-chunkify-js. Many thanks to Brett Zamir for his contributions.

The function:

var speechUtteranceChunker = function (utt, settings, callback) {
    settings = settings || {};
	var newUtt;
    var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
	if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
		newUtt = utt;
		newUtt.text = txt;
		newUtt.addEventListener('end', function () {
			if (speechUtteranceChunker.cancel) {
				speechUtteranceChunker.cancel = false;
			}
			if (callback !== undefined) {
				callback();
			}
		});
	}
	else {
		var chunkLength = (settings && settings.chunkLength) || 160;
		var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
		var chunkArr = txt.match(pattRegex);

		if (chunkArr[0] === undefined || chunkArr[0].length <= 2) {
			//call once all text has been spoken...
			if (callback !== undefined) {
				callback();
			}
			return;
		}
		var chunk = chunkArr[0];
		newUtt = new SpeechSynthesisUtterance(chunk);
		var x;
		for (x in utt) {
			if (utt.hasOwnProperty(x) && x !== 'text') {
				newUtt[x] = utt[x];
			}
		}
		newUtt.addEventListener('end', function () {
			if (speechUtteranceChunker.cancel) {
				speechUtteranceChunker.cancel = false;
				return;
			}
			settings.offset = settings.offset || 0;
			settings.offset += chunk.length - 1;
			speechUtteranceChunker(utt, settings, callback);
		});
	}

	if (settings.modifier) {
		settings.modifier(newUtt);
	}
	console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
	//placing the speak invocation inside a callback fixes ordering and onend issues.
	setTimeout(function () {
		speechSynthesis.speak(newUtt);
	}, 0);
};

How to use it...

//create an utterance as you normally would...
var myLongText = "This is some long text, oh my goodness look how long I'm getting, wooooohooo!";

var utterance = new SpeechSynthesisUtterance(myLongText);

//modify it as you normally would
var voiceArr = speechSynthesis.getVoices();
utterance.voice = voiceArr[2];

//pass it into the chunking function to have it played out.
//you can set the max number of characters by changing the chunkLength property below.
//a callback function can also be added that will fire once the entire text has been spoken.
speechUtteranceChunker(utterance, {
    chunkLength: 120
}, function () {
    //some code to execute when done
    console.log('done');
});

Hope people find this as useful.

Solution 2 - Javascript

I have solved the probleme while having a timer function which call the pause() and resume() function and callset the timer again. On the onend event I clear the timer.

    var myTimeout;
    function myTimer() {
        window.speechSynthesis.pause();
	    window.speechSynthesis.resume();
	    myTimeout = setTimeout(myTimer, 10000);
    }
    ...
        window.speechSynthesis.cancel();
	    myTimeout = setTimeout(myTimer, 10000);
	    var toSpeak = "some text";
    	var utt = new SpeechSynthesisUtterance(toSpeak);
        ...
    	utt.onend =  function() { clearTimeout(myTimeout); }
    	window.speechSynthesis.speak(utt);
    ...

This seem to work well.

Solution 3 - Javascript

A simple and effective solution is to resume periodically.

function resumeInfinity() {
    window.speechSynthesis.resume();
    timeoutResumeInfinity = setTimeout(resumeInfinity, 1000);
}

You can associate this with the onend and onstart events, so you will only be invoking the resume if necessary. Something like:

var utterance = new SpeechSynthesisUtterance();

utterance.onstart = function(event) {
    resumeInfinity();
};

utterance.onend = function(event) {
    clearTimeout(timeoutResumeInfinity);
};

I discovered this by chance!

Hope this help!

Solution 4 - Javascript

The problem with Peter's answer is it doesn't work when you have a queue of speech synthesis set up. The script will put the new chunk at the end of the queue, and thus out of order. Example: https://jsfiddle.net/1gzkja90/

<script type='text/javascript' src='http://code.jquery.com/jquery-2.1.0.js'></script>
<script type='text/javascript'>    
    u = new SpeechSynthesisUtterance();
    $(document).ready(function () {
        $('.t').each(function () {
		    u = new SpeechSynthesisUtterance($(this).text());

    		speechUtteranceChunker(u, {
    			chunkLength: 120
	    	}, function () {
		    	console.log('end');
    		});
    	});
    });
     /**
     * Chunkify
     * Google Chrome Speech Synthesis Chunking Pattern
     * Fixes inconsistencies with speaking long texts in speechUtterance objects 
     * Licensed under the MIT License
     *
     * Peter Woolley and Brett Zamir
     */
    var speechUtteranceChunker = function (utt, settings, callback) {
        settings = settings || {};
        var newUtt;
        var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
        if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
            newUtt = utt;
            newUtt.text = txt;
            newUtt.addEventListener('end', function () {
                if (speechUtteranceChunker.cancel) {
                    speechUtteranceChunker.cancel = false;
                }
                if (callback !== undefined) {
                    callback();
                }
            });
        }
        else {
            var chunkLength = (settings && settings.chunkLength) || 160;
            var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
            var chunkArr = txt.match(pattRegex);
 
            if (chunkArr[0] === undefined || chunkArr[0].length <= 2) {
                //call once all text has been spoken...
                if (callback !== undefined) {
                    callback();
                }
                return;
            }
            var chunk = chunkArr[0];
            newUtt = new SpeechSynthesisUtterance(chunk);
            var x;
            for (x in utt) {
                if (utt.hasOwnProperty(x) && x !== 'text') {
                    newUtt[x] = utt[x];
                }
            }
            newUtt.addEventListener('end', function () {
                if (speechUtteranceChunker.cancel) {
                    speechUtteranceChunker.cancel = false;
                    return;
                }
                settings.offset = settings.offset || 0;
                settings.offset += chunk.length - 1;
                speechUtteranceChunker(utt, settings, callback);
            });
        }
 
        if (settings.modifier) {
            settings.modifier(newUtt);
        }
        console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
        //placing the speak invocation inside a callback fixes ordering and onend issues.
        setTimeout(function () {
            speechSynthesis.speak(newUtt);
        }, 0);
    };
</script>
<p class="t">MLA format follows the author-page method of in-text citation. This means that the author's last name and the page number(s) from which the quotation or paraphrase is taken must appear in the text, and a complete reference should appear on your Works Cited page. The author's name may appear either in the sentence itself or in parentheses following the quotation or paraphrase, but the page number(s) should always appear in the parentheses, not in the text of your sentence.</p>
<p class="t">Joe waited for the train.</p>
<p class="t">The train was late.</p>
<p class="t">Mary and Samantha took the bus.</p>

In my case, the answer was to "chunk" the string before adding them to the queue. See here: http://jsfiddle.net/vqvyjzq4/

Many props to Peter for the idea as well as the regex (which I still have yet to conquer.) I'm sure the javascript can be cleaned up, this is more of a proof of concept.

<script type='text/javascript' src='http://code.jquery.com/jquery-2.1.0.js'></script>
<script type='text/javascript'>    
    var chunkLength = 120;
    var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');

    $(document).ready(function () {
        var element = this;
        var arr = [];
        var txt = replaceBlank($(element).text());
        while (txt.length > 0) {
            arr.push(txt.match(pattRegex)[0]);
            txt = txt.substring(arr[arr.length - 1].length);
        }
        $.each(arr, function () {
            var u = new SpeechSynthesisUtterance(this.trim());
            window.speechSynthesis.speak(u);
        });
    });
</script>
<p class="t">MLA format follows the author-page method of in-text citation. This means that the author's last name and the page number(s) from which the quotation or paraphrase is taken must appear in the text, and a complete reference should appear on your Works Cited page. The author's name may appear either in the sentence itself or in parentheses following the quotation or paraphrase, but the page number(s) should always appear in the parentheses, not in the text of your sentence.</p>
<p class="t">Joe waited for the train.</p>
<p class="t">The train was late.</p>
<p class="t">Mary and Samantha took the bus.</p>

Solution 5 - Javascript

2017 and this bug is still around. I happen to understand this problem quite well, being the developer of the award-winning Chrome extension Read Aloud. OK, just kidding about the award winning part.

  1. Your speech will get stuck if it's longer than 15 seconds.
  2. I discover that Chrome uses a 15 second idle timer to decide when to deactivate an extension's event/background page. I believe this is the culprit.

The workaround I've used is a fairly complicated chunking algorithm that respects punctuation. For Latin languages, I set max chunk size at 36 words. The code is open-source, if you're inclined: https://github.com/ken107/read-aloud/blob/315f1e1d5be6b28ba47fe0c309961025521de516/js/speech.js#L212

The 36-word limit works well most of the time, staying within 15 seconds. But there'll be cases where it still gets stuck. To recover from that, I use a 16 second timer.

Solution 6 - Javascript

Here is what i ended up with, it simply splits my sentences on the period "."

var voices = window.speechSynthesis.getVoices();

var sayit = function ()
{
    var msg = new SpeechSynthesisUtterance();

    msg.voice = voices[10]; // Note: some voices don't support altering params
    msg.voiceURI = 'native';
    msg.volume = 1; // 0 to 1
    msg.rate = 1; // 0.1 to 10
    msg.pitch = 2; //0 to 2
    msg.lang = 'en-GB';
    msg.onstart = function (event) {

        console.log("started");
    };
    msg.onend = function(event) {
        console.log('Finished in ' + event.elapsedTime + ' seconds.');
    };
    msg.onerror = function(event)
    {

        console.log('Errored ' + event);
    }
    msg.onpause = function (event)
    {
        console.log('paused ' + event);

    }
    msg.onboundary = function (event)
    {
        console.log('onboundary ' + event);
    }

    return msg;
}


var speekResponse = function (text)
{
    speechSynthesis.cancel(); // if it errors, this clears out the error.

    var sentences = text.split(".");
    for (var i=0;i< sentences.length;i++)
    {
        var toSay = sayit();
        toSay.text = sentences[i];
        speechSynthesis.speak(toSay);
    }
}

Solution 7 - Javascript

I ended up chunking up the text and having some intelligence around handling of various punctucations like periods, commas, etc. For example, you don't want to break the text up on a comma if it's part of a number (i.e., $10,000).

I have tested it and it seems to work on arbitrarily large sets of input and it also appears to work not just on the desktop but on android phones and iphones.

Set up a github page for the synthesizer at: https://github.com/unk1911/speech

You can see it live at: http://edeliverables.com/tts/

Solution 8 - Javascript

new Vue({
  el: "#app",
  data: {
    text: `Collaboratively administrate empowered markets via plug-and-play networks. Dynamically procrastinate B2C users after installed base benefits. Dramatically visualize customer directed convergence without revolutionary ROI. Efficiently unleash cross-media information without cross-media value. Quickly maximize timely deliverables for real-time schemas. Dramatically maintain clicks-and-mortar solutions without functional solutions.`
  },

  methods:{
    stop_reading() {
      const synth = window.speechSynthesis;
      synth.cancel();
    },

    talk() {
      const synth = window.speechSynthesis;
      const textInput = this.text;

      const utterThis = new SpeechSynthesisUtterance(textInput);
      utterThis.pitch = 0;
      utterThis.rate = 1;
      synth.speak(utterThis);

      const resumeInfinity = () => {
        window.speechSynthesis.resume();
        const timeoutResumeInfinity = setTimeout(resumeInfinity, 1000);
      }
      
      utterThis.onstart = () => {
        resumeInfinity();
      };
    }
  }
})

<script src="https://cdnjs.cloudflare.com/ajax/libs/vue/2.5.17/vue.js"></script>
<div id="app">
  <button @click="talk">Speak</button>
  <button @click="stop_reading">Stop</button>
</div>

Solution 9 - Javascript

As Michael proposed, Peter's solutions is really great except when your text is on different lines. Michael created demo to better illustrate the problem with it. - https://jsfiddle.net/1gzkja90/ and proposed another solution.

To add one maybe simpler way to solve this is to remove line breaks from textarea in Peter's solution and it works just great.

//javascript
var noLineBreaks = document.getElementById('mytextarea').replace(/\n/g,'');

//jquery
var noLineBreaks = $('#mytextarea').val().replace(/\n/g,'');

So in Peter's solution it might look the following way :

utterance.text = $('#mytextarea').val().replace(/\n/g,'');

But still there's problem with canceling the speech. It just goes to another sequence and won't stop.

Solution 10 - Javascript

Other suggestion do weird thing with dot or say DOT and do not respect speech intonnation on sentence end.

var CHARACTER_LIMIT = 200;
var lang = "en";
    
var text = "MLA format follows the author-page method of in-text citation. This means that the author's last name and the page number(s) from which the quotation or paraphrase is taken must appear in the text, and a complete reference should appear on your Works Cited page. The author's name may appear either in the sentence itself or in parentheses following the quotation or paraphrase, but the page number(s) should always appear in the parentheses, not in the text of your sentence. Joe waited for the train. The train was late. Mary and Samantha took the bus.";
    
    speak(text, lang)
    
    function speak(text, lang) {
    
      //Support for multipart text (there is a limit on characters)
      var multipartText = [];
    
      if (text.length > CHARACTER_LIMIT) {
    
        var tmptxt = text;
    
        while (tmptxt.length > CHARACTER_LIMIT) {
    
          //Split by common phrase delimiters
          var p = tmptxt.search(/[:!?.;]+/);
          var part = '';
    
          //Coludn't split by priority characters, try commas
          if (p == -1 || p >= CHARACTER_LIMIT) {
            p = tmptxt.search(/[,]+/);
          }
    
          //Couldn't split by normal characters, then we use spaces
          if (p == -1 || p >= CHARACTER_LIMIT) {
    
            var words = tmptxt.split(' ');
    
            for (var i = 0; i < words.length; i++) {
    
              if (part.length + words[i].length + 1 > CHARACTER_LIMIT)
                break;
    
              part += (i != 0 ? ' ' : '') + words[i];
    
            }
    
          } else {
    
            part = tmptxt.substr(0, p + 1);
    
          }
    
          tmptxt = tmptxt.substr(part.length, tmptxt.length - part.length);
    
          multipartText.push(part);
          //console.log(part.length + " - " + part);
    
        }
    
        //Add the remaining text
        if (tmptxt.length > 0) {
          multipartText.push(tmptxt);
        }
    
      } else {
    
        //Small text
        multipartText.push(text);
      }
    
    
      //Play multipart text
      for (var i = 0; i < multipartText.length; i++) {
    
        //Use SpeechSynthesis
        //console.log(multipartText[i]);
    
        //Create msg object
        var msg = new SpeechSynthesisUtterance();
        //msg.voice = profile.systemvoice;
        //msg.voiceURI = profile.systemvoice.voiceURI;
        msg.volume = 1; // 0 to 1
        msg.rate = 1; // 0.1 to 10
        // msg.rate = usersetting || 1; // 0.1 to 10
        msg.pitch = 1; //0 to 2*/
        msg.text = multipartText[i];
        msg.speak = multipartText;
        msg.lang = lang;
        msg.onend = self.OnFinishedPlaying;
        msg.onerror = function (e) {
          console.log('Error');
          console.log(e);
        };
        /*GC*/
        msg.onstart = function (e) {
          var curenttxt = e.currentTarget.text;
          console.log(curenttxt);
          //highlight(e.currentTarget.text);
          //$('#showtxt').text(curenttxt);
          //console.log(e);
        };
        //console.log(msg);
        speechSynthesis.speak(msg);
    
      }
    
    }

https://jsfiddle.net/onigetoc/9r27Ltqz/

Solution 11 - Javascript

I want to say that through Chrome Extensions and Applications, I solved this quite irritating issue through using chrome.tts, since chrome.tts allows you to speak through the browser, instead of the window which stops the talk when you close the window.

Using the below code, you can fix the above issue with large speakings:

chrome.tts.speak("Abnormally large string, over 250 characters, etc...");
setInterval(() => { chrome.tts.resume(); }, 100);

I'm sure that will work, but I did this just to be safe:

var largeData = "";
var smallChunks = largeData.match(/.{1,250}/g);
for (var chunk of smallChunks) {
  chrome.tts.speak(chunk, {'enqueue': true});
}

Hope this helps someone! It helped make my application work more functionally, and epicly.

Solution 12 - Javascript

Yes, the google synthesis api will stop at some point during speaking a long text.

We can see onend event, onpause and onerror event of SpeechSynthesisUtterance won't be fired normally when the sudden stop happens, so does the speechSynthesis onerror event.

After several trials, found speechSynthesis.paused is working, and speechSynthesis.resume() can help resume the speaking.

Hence we just need to have a timer to check the pause status during the speaking, and calling speechSynthesis.resume() to continue. The interval should be small enough to prevent glitch when continuing the speak.

let timer = null;
let reading = false;

let readText = function(text) {

    if (!reading) {
        speechSynthesis.cancel();
        if (timer) {
            clearInterval(timer);
        }
        let msg = new SpeechSynthesisUtterance();
        let voices = window.speechSynthesis.getVoices();
        msg.voice = voices[82];
        msg.voiceURI = 'native';
        msg.volume = 1; // 0 to 1
        msg.rate = 1.0; // 0.1 to 10
        msg.pitch = 1; //0 to 2
        msg.text = text;
        msg.lang = 'zh-TW';

        msg.onerror = function(e) {
            speechSynthesis.cancel();
            reading = false;
            clearInterval(timer);
        };

        msg.onpause = function(e) {
            console.log('onpause in ' + e.elapsedTime + ' seconds.');
        }            

        msg.onend = function(e) {
            console.log('onend in ' + e.elapsedTime + ' seconds.');
            reading = false;
            clearInterval(timer);
        };

        speechSynthesis.onerror = function(e) {
            console.log('speechSynthesis onerror in ' + e.elapsedTime + ' seconds.');
            speechSynthesis.cancel();
            reading = false;
            clearInterval(timer);
        };

        speechSynthesis.speak(msg);

        timer = setInterval(function(){
            if (speechSynthesis.paused) {
                console.log("#continue")
                speechSynthesis.resume();
            }

        }, 100);

        reading = true;

    }
}

Attributions

All content for this solution is sourced from the original question on Stackoverflow.

The content on this page is licensed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

Content TypeOriginal AuthorOriginal Content on Stackoverflow
QuestionAndrey ShchekinView Question on Stackoverflow
Solution 1 - JavascriptPeter WoolleyView Answer on Stackoverflow
Solution 2 - JavascriptjjsaView Answer on Stackoverflow
Solution 3 - JavascriptMhagnumDwView Answer on Stackoverflow
Solution 4 - JavascriptMichaelView Answer on Stackoverflow
Solution 5 - JavascriptSarsaparillaView Answer on Stackoverflow
Solution 6 - JavascriptEmileView Answer on Stackoverflow
Solution 7 - Javascriptuser3892260View Answer on Stackoverflow
Solution 8 - JavascriptRukkiecodesView Answer on Stackoverflow
Solution 9 - JavascriptmhlavackaView Answer on Stackoverflow
Solution 10 - JavascriptGinoView Answer on Stackoverflow
Solution 11 - JavascriptJack HalesView Answer on Stackoverflow
Solution 12 - JavascriptJackie ChaoView Answer on Stackoverflow