diff options
-rw-r--r-- | CHANGELOG.markdown | 5 | ||||
-rw-r--r-- | src/com/darkprograms/speech/synthesiser/Synthesiser.java | 285 |
2 files changed, 264 insertions, 26 deletions
diff --git a/CHANGELOG.markdown b/CHANGELOG.markdown index 0c3e39a..2be4e72 100644 --- a/CHANGELOG.markdown +++ b/CHANGELOG.markdown @@ -5,6 +5,11 @@ Changelog corresponds with a tagged and signed Git commit. This marks the chang A tagged commit may or may not have a corresponding binary version available. Format: Tag: `<Corresponding Tag>` +* Version 1.06 (Tag v1.016) + * Added support for synthesiser for strings longer than 100 characters (Credits to @Skylion007) + * Added support for synthesiser for multiple languages, accents, and voices. (Credits to @Skylion007) + * Added support for auto-detection of language within synthesiser. (Credits to @Skylion007) + * Version 1.05 (Tag: v1.015) * Improved language support for recognizer (Credits to @duncanj) * Add support for multiple responses for recognizer (Credits to @duncanj) diff --git a/src/com/darkprograms/speech/synthesiser/Synthesiser.java b/src/com/darkprograms/speech/synthesiser/Synthesiser.java index bbedb0d..aa58c51 100644 --- a/src/com/darkprograms/speech/synthesiser/Synthesiser.java +++ b/src/com/darkprograms/speech/synthesiser/Synthesiser.java @@ -1,48 +1,281 @@ package com.darkprograms.speech.synthesiser; import java.io.InputStream; +import java.io.Reader; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.List; + /** - * Synthesiser class that connects to Google's unoffical API to retreive data + * Synthesiser class that connects to Google's unoffical API to retrieve data * - * @author Luke Kuza + * @author Luke Kuza, Aaron Gokaslan (Skylion) */ public class Synthesiser { - /** - * URL to query for Google synthesiser - */ - private final static String GOOGLE_SYNTHESISER_URL = "http://translate.google.com/translate_tts?tl=en&q="; + /** + * URL to query for Google synthesiser + */ + private final static String GOOGLE_SYNTHESISER_URL = "http://translate.google.com/translate_tts?tl="; + + /** + * URL to query for Google Auto Detection + */ + private final static String GOOGLE_AUTODETECT_URL = "http://translate.google.com/translate_a/t?client=t&sl=auto&text="; + + /** + * language of the Text you want to translate + */ + private String languageCode; + + /** + * LANG_XX_XXXX Variables are language codes. + */ + public static final String LANG_AU_ENGLISH = "en-AU"; + public static final String LANG_US_ENGLISH = "en-US"; + public static final String LANG_UK_ENGLISH = "en-GB"; + public static final String LANG_ES_SPANISH = "es"; + public static final String LANG_FR_FRENCH = "fr"; + public static final String LANG_DE_GERMAN = "de"; + //Please add on more regional languages as you find them. Also try to include the accent code if you can can. + + /** + * Constructor + */ + public Synthesiser() { + languageCode = "auto"; + } + + /** + * Constructor that takes language code parameter. Specify to "auto" for language autoDetection + */ + public Synthesiser(String languageCode){ + this.languageCode = languageCode; + } + + /** + * Returns the current language code for the Synthesiser. + * Example: English(Generic) = en, English (US) = en-US, English (UK) = en-GB. and Spanish = es; + * @return the current language code parameter + */ + public String getLanguage(){ + return languageCode; + } + + /** + * Note: set language to auto to enable automatic language detection. + * Setting to null will also implement Google's automatic language detection + * @param languageCode The language code you would like to modify languageCode to. + */ + public void setLanguage(String languageCode){ + this.languageCode = languageCode; + } + + /** + * Gets an input stream to MP3 data for the returned information from a request + * + * @param synthText Text you want to be synthesized into MP3 data + * @return Returns an input stream of the MP3 data that is returned from Google + * @throws Exception Throws exception if it can not complete the request + */ + public InputStream getMP3Data(String synthText) throws Exception { + + String languageCode = this.languageCode;//Ensures retention of language settings if set to auto - /** - * Constructor - */ - public Synthesiser() { + if(languageCode == null || languageCode.equals("") || languageCode.equalsIgnoreCase("auto")){ + try{ + languageCode = detectLanguage(synthText);//Detects language + if(languageCode == null){ + languageCode = "en-us";//Reverts to Default Language if it can't detect it. + } + } + catch(Exception ex){ + ex.printStackTrace(); + languageCode = "en-us";//Reverts to Default Language if it can't detect it. + } + } + + if(synthText.length()>100){ + List<String> fragments = parseString(synthText);//parses String if too long + String tmp = getLanguage(); + setLanguage(languageCode);//Keeps it from autodetecting each fragment. + InputStream out = getMP3Data(fragments); + setLanguage(tmp);//Reverts it to it's previous Language such as auto. + return out; + } - } - /** - * Gets an input stream to MP3 data for the returned information from a request - * - * @param synthText Text you want to be synthesized into MP3 data - * @return Returns an input stream of the MP3 data that is returned from Google - * @throws Exception Throws exception if it can not complete the request - */ - public InputStream getMP3Data(String synthText) throws Exception { - String encoded = URLEncoder.encode(synthText, "UTF-8"); //Encode + String encoded = URLEncoder.encode(synthText, "UTF-8"); //Encode - URL url = new URL(GOOGLE_SYNTHESISER_URL + encoded); //create url + URL url = new URL(GOOGLE_SYNTHESISER_URL + languageCode + "&q=" + encoded); //create url - // Open New URL connection channel. - URLConnection urlConn = url.openConnection(); //Open connection + // Open New URL connection channel. + URLConnection urlConn = url.openConnection(); //Open connection - urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); //Adding header for user agent is required + urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); //Adding header for user agent is required - return urlConn.getInputStream(); - } + return urlConn.getInputStream(); + } + /** + * Gets an InputStream to MP3Data for the returned information from a request + * @param synthText List of Strings you want to be synthesized into MP3 data + * @return Returns an input stream of all the MP3 data that is returned from Google + * @throws Exception Throws exception if it cannot complete the request + */ + public InputStream getMP3Data(List<String> synthText) throws Exception{ + InputStream complete = getMP3Data(synthText.remove(0)); + for(String part: synthText){ + complete = new java.io.SequenceInputStream(complete, getMP3Data(part));//Concatenate with new MP3 Data + } + return complete; + } + + /** + * Separates a string into smaller parts so that Google will not reject the request. + * @param input The string you want to separate + * @return A List<String> of the String fragments from your input.. + */ + private List<String> parseString(String input){ + return parseString (input, new ArrayList<String>()); + } + + /** + * Separates a string into smaller parts so that Google will not reject the request. + * @param input The string you want to break up into smaller parts + * @param fragments List<String> that you want to add stuff too. + * If you don't have a List<String> already constructed "new ArrayList<String>()" works well. + * @return A list of the fragments of the original String + */ + private List<String> parseString(String input, List<String> fragments){ + if(input.length()<=100){//Base Case + fragments.add(input); + return fragments; + } + else{ + int lastWord = findLastWord(input);//Checks if a space exists + if(lastWord<=0){ + fragments.add(input.substring(0,100));//In case you sent gibberish to Google. + return parseString(input.substring(100), fragments); + }else{ + fragments.add(input.substring(0,lastWord));//Otherwise, adds the last word to the list for recursion. + return parseString(input.substring(lastWord), fragments); + } + } + } + + /** + * Finds the last word in your String (before the index of 99) by searching for spaces and ending punctuation. + * Will preferably parse on punctuation to alleviate mid-sentence pausing + * @param input The String you want to search through. + * @return The index of where the last word of the string ends before the index of 99. + */ + private int findLastWord(String input){ + if(input.length()<100) + return input.length(); + int space = -1; + for(int i = 99; i>0; i--){ + char tmp = input.charAt(i); + if(isEndingPunctuation(tmp)){ + return i+1; + } + if(space==-1 && tmp == ' '){ + space = i; + } + } + if(space>0){ + return space; + } + return -1; + } + + /** + * Checks if char is an ending character + * Ending punctuation for all languages according to Wikipedia (Except for Sanskrit non-unicode) + * @param The char you want check + * @return True if it is, false if not. + */ + private boolean isEndingPunctuation(char input){ + return input == '.' || input == '!' || input == '?' || input == ';' || input == ':' || input == '|'; + } + + /** + * Automatically determines the language of the original text + * @param text represents the text you want to check the language of + * @return the languageCode + * @throws Exception if it cannot complete the request + */ + public String detectLanguage(String text) throws Exception{ + if(text.length()>99){//Google will not compute more than 99 characters + int lastWord = findLastWord(text); + if(lastWord<0){ + text = text.substring(0,99);//Fix for languages without spaces. + } + else{ + text = text.substring(0,lastWord);//We don't need the whole text to determine language + } + } + String encoded = URLEncoder.encode(text, "UTF-8"); //Encode + URL url = new URL(GOOGLE_AUTODETECT_URL + encoded); //Generates URL + URLConnection urlConn = url.openConnection(); //Open connection + urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); //Adding header for user agent is required + String rawData = urlToText(urlConn);//Gets text from Google + return parseRawData(rawData); + } + + /** + * Converts a URL Connection to Text + * @param urlConn The Open URLConnection that you want to generate a String from + * @return The generated String + * @throws Exception if it cannot complete the request + */ + private String urlToText(URLConnection urlConn) throws Exception{ + Reader r = new java.io.InputStreamReader(urlConn.getInputStream());//Gets Data Converts to string + StringBuilder buf = new StringBuilder(); + while (true) { + int ch = r.read(); + if (ch < 0) + break; + buf.append((char) ch); + } + String str = buf.toString(); + return str; + } + + /** + * Searches RawData for Language + * @param RawData the raw String directly from Google you want to search through + * @return The language parsed from the rawData or null if Google cannot determine it. + */ + private String parseRawData(String rawData){ + for(int i = 0; i+5<rawData.length(); i++){ + if(rawData.charAt(i)==',' && rawData.charAt(i+5)==',' //Looks for ,"en", ,"es", etc. + && rawData.charAt(i+1)== '"' && rawData.charAt(i+4)=='"'){ // ,"**", + String possible = rawData.substring(i+2,i+4); + if(containsLettersOnly(possible)){//Required due to Google's inconsistent formatting. + return possible; + } + } + } + return null; + } + + /** + * Checks if all characters in text are letters. + * @param text The text you want to determine the validity of. + * @return True if all characters are letters, otherwise false. + */ + private boolean containsLettersOnly(String text){ + for(int i = 0; i<text.length(); i++){ + if(!Character.isLetter(text.charAt(i))){ + return false; + } + } + return true; + } } + |