From 8f2c33fd8938269de283936975386c3de101e70d Mon Sep 17 00:00:00 2001 From: stonefruit Date: Mon, 14 Dec 2020 23:55:04 +0800 Subject: Add languages constant for languages with traineddata --- src/constants/languages.js | 218 +++++++++++++++++++++++++++++++++++++++++++++ src/index.js | 2 + 2 files changed, 220 insertions(+) create mode 100644 src/constants/languages.js diff --git a/src/constants/languages.js b/src/constants/languages.js new file mode 100644 index 0000000..3d19403 --- /dev/null +++ b/src/constants/languages.js @@ -0,0 +1,218 @@ +/* + * languages with existing tesseract traineddata + * https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016 + */ + +/** + * @typedef {object} Languages + * @property {string} AFR Afrikaans + * @property {string} AMH Amharic + * @property {string} ARA Arabic + * @property {string} ASM Assamese + * @property {string} AZE Azerbaijani + * @property {string} AZE_CYRL Azerbaijani - Cyrillic + * @property {string} BEL Belarusian + * @property {string} BEN Bengali + * @property {string} BOD Tibetan + * @property {string} BOS Bosnian + * @property {string} BUL Bulgarian + * @property {string} CAT Catalan; Valencian + * @property {string} CEB Cebuano + * @property {string} CES Czech + * @property {string} CHI_SIM Chinese - Simplified + * @property {string} CHI_TRA Chinese - Traditional + * @property {string} CHR Cherokee + * @property {string} CYM Welsh + * @property {string} DAN Danish + * @property {string} DEU German + * @property {string} DZO Dzongkha + * @property {string} ELL Greek, Modern (1453-) + * @property {string} ENG English + * @property {string} ENM English, Middle (1100-1500) + * @property {string} EPO Esperanto + * @property {string} EST Estonian + * @property {string} EUS Basque + * @property {string} FAS Persian + * @property {string} FIN Finnish + * @property {string} FRA French + * @property {string} FRK German Fraktur + * @property {string} FRM French, Middle (ca. 1400-1600) + * @property {string} GLE Irish + * @property {string} GLG Galician + * @property {string} GRC Greek, Ancient (-1453) + * @property {string} GUJ Gujarati + * @property {string} HAT Haitian; Haitian Creole + * @property {string} HEB Hebrew + * @property {string} HIN Hindi + * @property {string} HRV Croatian + * @property {string} HUN Hungarian + * @property {string} IKU Inuktitut + * @property {string} IND Indonesian + * @property {string} ISL Icelandic + * @property {string} ITA Italian + * @property {string} ITA_OLD Italian - Old + * @property {string} JAV Javanese + * @property {string} JPN Japanese + * @property {string} KAN Kannada + * @property {string} KAT Georgian + * @property {string} KAT_OLD Georgian - Old + * @property {string} KAZ Kazakh + * @property {string} KHM Central Khmer + * @property {string} KIR Kirghiz; Kyrgyz + * @property {string} KOR Korean + * @property {string} KUR Kurdish + * @property {string} LAO Lao + * @property {string} LAT Latin + * @property {string} LAV Latvian + * @property {string} LIT Lithuanian + * @property {string} MAL Malayalam + * @property {string} MAR Marathi + * @property {string} MKD Macedonian + * @property {string} MLT Maltese + * @property {string} MSA Malay + * @property {string} MYA Burmese + * @property {string} NEP Nepali + * @property {string} NLD Dutch; Flemish + * @property {string} NOR Norwegian + * @property {string} ORI Oriya + * @property {string} PAN Panjabi; Punjabi + * @property {string} POL Polish + * @property {string} POR Portuguese + * @property {string} PUS Pushto; Pashto + * @property {string} RON Romanian; Moldavian; Moldovan + * @property {string} RUS Russian + * @property {string} SAN Sanskrit + * @property {string} SIN Sinhala; Sinhalese + * @property {string} SLK Slovak + * @property {string} SLV Slovenian + * @property {string} SPA Spanish; Castilian + * @property {string} SPA_OLD Spanish; Castilian - Old + * @property {string} SQI Albanian + * @property {string} SRP Serbian + * @property {string} SRP_LATN Serbian - Latin + * @property {string} SWA Swahili + * @property {string} SWE Swedish + * @property {string} SYR Syriac + * @property {string} TAM Tamil + * @property {string} TEL Telugu + * @property {string} TGK Tajik + * @property {string} TGL Tagalog + * @property {string} THA Thai + * @property {string} TIR Tigrinya + * @property {string} TUR Turkish + * @property {string} UIG Uighur; Uyghur + * @property {string} UKR Ukrainian + * @property {string} URD Urdu + * @property {string} UZB Uzbek + * @property {string} UZB_CYRL Uzbek - Cyrillic + * @property {string} VIE Vietnamese + * @property {string} YID Yiddish + */ + +/** + * @type {Languages} + */ +module.exports = { + AFR: 'afr', + AMH: 'amh', + ARA: 'ara', + ASM: 'asm', + AZE: 'aze', + AZE_CYRL: 'aze_cyrl', + BEL: 'bel', + BEN: 'ben', + BOD: 'bod', + BOS: 'bos', + BUL: 'bul', + CAT: 'cat', + CEB: 'ceb', + CES: 'ces', + CHI_SIM: 'chi_sim', + CHI_TRA: 'chi_tra', + CHR: 'chr', + CYM: 'cym', + DAN: 'dan', + DEU: 'deu', + DZO: 'dzo', + ELL: 'ell', + ENG: 'eng', + ENM: 'enm', + EPO: 'epo', + EST: 'est', + EUS: 'eus', + FAS: 'fas', + FIN: 'fin', + FRA: 'fra', + FRK: 'frk', + FRM: 'frm', + GLE: 'gle', + GLG: 'glg', + GRC: 'grc', + GUJ: 'guj', + HAT: 'hat', + HEB: 'heb', + HIN: 'hin', + HRV: 'hrv', + HUN: 'hun', + IKU: 'iku', + IND: 'ind', + ISL: 'isl', + ITA: 'ita', + ITA_OLD: 'ita_old', + JAV: 'jav', + JPN: 'jpn', + KAN: 'kan', + KAT: 'kat', + KAT_OLD: 'kat_old', + KAZ: 'kaz', + KHM: 'khm', + KIR: 'kir', + KOR: 'kor', + KUR: 'kur', + LAO: 'lao', + LAT: 'lat', + LAV: 'lav', + LIT: 'lit', + MAL: 'mal', + MAR: 'mar', + MKD: 'mkd', + MLT: 'mlt', + MSA: 'msa', + MYA: 'mya', + NEP: 'nep', + NLD: 'nld', + NOR: 'nor', + ORI: 'ori', + PAN: 'pan', + POL: 'pol', + POR: 'por', + PUS: 'pus', + RON: 'ron', + RUS: 'rus', + SAN: 'san', + SIN: 'sin', + SLK: 'slk', + SLV: 'slv', + SPA: 'spa', + SPA_OLD: 'spa_old', + SQI: 'sqi', + SRP: 'srp', + SRP_LATN: 'srp_latn', + SWA: 'swa', + SWE: 'swe', + SYR: 'syr', + TAM: 'tam', + TEL: 'tel', + TGK: 'tgk', + TGL: 'tgl', + THA: 'tha', + TIR: 'tir', + TUR: 'tur', + UIG: 'uig', + UKR: 'ukr', + URD: 'urd', + UZB: 'uzb', + UZB_CYRL: 'uzb_cyrl', + VIE: 'vie', + YID: 'yid', +}; diff --git a/src/index.js b/src/index.js index 6f0fb1f..fd1b742 100644 --- a/src/index.js +++ b/src/index.js @@ -11,11 +11,13 @@ require('regenerator-runtime/runtime'); const createScheduler = require('./createScheduler'); const createWorker = require('./createWorker'); const Tesseract = require('./Tesseract'); +const languages = require('./constants/languages'); const OEM = require('./constants/OEM'); const PSM = require('./constants/PSM'); const { setLogging } = require('./utils/log'); module.exports = { + languages, OEM, PSM, createScheduler, -- cgit v1.2.3