Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/naptha/tesseract.js.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorstonefruit <jeromeng888@gmail.com>2020-12-14 18:55:04 +0300
committerstonefruit <jeromeng888@gmail.com>2020-12-14 18:55:04 +0300
commit8f2c33fd8938269de283936975386c3de101e70d (patch)
treeddde4dd264f555b47f754d2412913e9eec69db08
parent83a424f9b9058e940070979327ef255d875f6c7b (diff)
Add languages constant for languages
with traineddata
-rw-r--r--src/constants/languages.js218
-rw-r--r--src/index.js2
2 files changed, 220 insertions, 0 deletions
diff --git a/src/constants/languages.js b/src/constants/languages.js
new file mode 100644
index 0000000..3d19403
--- /dev/null
+++ b/src/constants/languages.js
@@ -0,0 +1,218 @@
+/*
+ * languages with existing tesseract traineddata
+ * https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016
+ */
+
+/**
+ * @typedef {object} Languages
+ * @property {string} AFR Afrikaans
+ * @property {string} AMH Amharic
+ * @property {string} ARA Arabic
+ * @property {string} ASM Assamese
+ * @property {string} AZE Azerbaijani
+ * @property {string} AZE_CYRL Azerbaijani - Cyrillic
+ * @property {string} BEL Belarusian
+ * @property {string} BEN Bengali
+ * @property {string} BOD Tibetan
+ * @property {string} BOS Bosnian
+ * @property {string} BUL Bulgarian
+ * @property {string} CAT Catalan; Valencian
+ * @property {string} CEB Cebuano
+ * @property {string} CES Czech
+ * @property {string} CHI_SIM Chinese - Simplified
+ * @property {string} CHI_TRA Chinese - Traditional
+ * @property {string} CHR Cherokee
+ * @property {string} CYM Welsh
+ * @property {string} DAN Danish
+ * @property {string} DEU German
+ * @property {string} DZO Dzongkha
+ * @property {string} ELL Greek, Modern (1453-)
+ * @property {string} ENG English
+ * @property {string} ENM English, Middle (1100-1500)
+ * @property {string} EPO Esperanto
+ * @property {string} EST Estonian
+ * @property {string} EUS Basque
+ * @property {string} FAS Persian
+ * @property {string} FIN Finnish
+ * @property {string} FRA French
+ * @property {string} FRK German Fraktur
+ * @property {string} FRM French, Middle (ca. 1400-1600)
+ * @property {string} GLE Irish
+ * @property {string} GLG Galician
+ * @property {string} GRC Greek, Ancient (-1453)
+ * @property {string} GUJ Gujarati
+ * @property {string} HAT Haitian; Haitian Creole
+ * @property {string} HEB Hebrew
+ * @property {string} HIN Hindi
+ * @property {string} HRV Croatian
+ * @property {string} HUN Hungarian
+ * @property {string} IKU Inuktitut
+ * @property {string} IND Indonesian
+ * @property {string} ISL Icelandic
+ * @property {string} ITA Italian
+ * @property {string} ITA_OLD Italian - Old
+ * @property {string} JAV Javanese
+ * @property {string} JPN Japanese
+ * @property {string} KAN Kannada
+ * @property {string} KAT Georgian
+ * @property {string} KAT_OLD Georgian - Old
+ * @property {string} KAZ Kazakh
+ * @property {string} KHM Central Khmer
+ * @property {string} KIR Kirghiz; Kyrgyz
+ * @property {string} KOR Korean
+ * @property {string} KUR Kurdish
+ * @property {string} LAO Lao
+ * @property {string} LAT Latin
+ * @property {string} LAV Latvian
+ * @property {string} LIT Lithuanian
+ * @property {string} MAL Malayalam
+ * @property {string} MAR Marathi
+ * @property {string} MKD Macedonian
+ * @property {string} MLT Maltese
+ * @property {string} MSA Malay
+ * @property {string} MYA Burmese
+ * @property {string} NEP Nepali
+ * @property {string} NLD Dutch; Flemish
+ * @property {string} NOR Norwegian
+ * @property {string} ORI Oriya
+ * @property {string} PAN Panjabi; Punjabi
+ * @property {string} POL Polish
+ * @property {string} POR Portuguese
+ * @property {string} PUS Pushto; Pashto
+ * @property {string} RON Romanian; Moldavian; Moldovan
+ * @property {string} RUS Russian
+ * @property {string} SAN Sanskrit
+ * @property {string} SIN Sinhala; Sinhalese
+ * @property {string} SLK Slovak
+ * @property {string} SLV Slovenian
+ * @property {string} SPA Spanish; Castilian
+ * @property {string} SPA_OLD Spanish; Castilian - Old
+ * @property {string} SQI Albanian
+ * @property {string} SRP Serbian
+ * @property {string} SRP_LATN Serbian - Latin
+ * @property {string} SWA Swahili
+ * @property {string} SWE Swedish
+ * @property {string} SYR Syriac
+ * @property {string} TAM Tamil
+ * @property {string} TEL Telugu
+ * @property {string} TGK Tajik
+ * @property {string} TGL Tagalog
+ * @property {string} THA Thai
+ * @property {string} TIR Tigrinya
+ * @property {string} TUR Turkish
+ * @property {string} UIG Uighur; Uyghur
+ * @property {string} UKR Ukrainian
+ * @property {string} URD Urdu
+ * @property {string} UZB Uzbek
+ * @property {string} UZB_CYRL Uzbek - Cyrillic
+ * @property {string} VIE Vietnamese
+ * @property {string} YID Yiddish
+ */
+
+/**
+ * @type {Languages}
+ */
+module.exports = {
+ AFR: 'afr',
+ AMH: 'amh',
+ ARA: 'ara',
+ ASM: 'asm',
+ AZE: 'aze',
+ AZE_CYRL: 'aze_cyrl',
+ BEL: 'bel',
+ BEN: 'ben',
+ BOD: 'bod',
+ BOS: 'bos',
+ BUL: 'bul',
+ CAT: 'cat',
+ CEB: 'ceb',
+ CES: 'ces',
+ CHI_SIM: 'chi_sim',
+ CHI_TRA: 'chi_tra',
+ CHR: 'chr',
+ CYM: 'cym',
+ DAN: 'dan',
+ DEU: 'deu',
+ DZO: 'dzo',
+ ELL: 'ell',
+ ENG: 'eng',
+ ENM: 'enm',
+ EPO: 'epo',
+ EST: 'est',
+ EUS: 'eus',
+ FAS: 'fas',
+ FIN: 'fin',
+ FRA: 'fra',
+ FRK: 'frk',
+ FRM: 'frm',
+ GLE: 'gle',
+ GLG: 'glg',
+ GRC: 'grc',
+ GUJ: 'guj',
+ HAT: 'hat',
+ HEB: 'heb',
+ HIN: 'hin',
+ HRV: 'hrv',
+ HUN: 'hun',
+ IKU: 'iku',
+ IND: 'ind',
+ ISL: 'isl',
+ ITA: 'ita',
+ ITA_OLD: 'ita_old',
+ JAV: 'jav',
+ JPN: 'jpn',
+ KAN: 'kan',
+ KAT: 'kat',
+ KAT_OLD: 'kat_old',
+ KAZ: 'kaz',
+ KHM: 'khm',
+ KIR: 'kir',
+ KOR: 'kor',
+ KUR: 'kur',
+ LAO: 'lao',
+ LAT: 'lat',
+ LAV: 'lav',
+ LIT: 'lit',
+ MAL: 'mal',
+ MAR: 'mar',
+ MKD: 'mkd',
+ MLT: 'mlt',
+ MSA: 'msa',
+ MYA: 'mya',
+ NEP: 'nep',
+ NLD: 'nld',
+ NOR: 'nor',
+ ORI: 'ori',
+ PAN: 'pan',
+ POL: 'pol',
+ POR: 'por',
+ PUS: 'pus',
+ RON: 'ron',
+ RUS: 'rus',
+ SAN: 'san',
+ SIN: 'sin',
+ SLK: 'slk',
+ SLV: 'slv',
+ SPA: 'spa',
+ SPA_OLD: 'spa_old',
+ SQI: 'sqi',
+ SRP: 'srp',
+ SRP_LATN: 'srp_latn',
+ SWA: 'swa',
+ SWE: 'swe',
+ SYR: 'syr',
+ TAM: 'tam',
+ TEL: 'tel',
+ TGK: 'tgk',
+ TGL: 'tgl',
+ THA: 'tha',
+ TIR: 'tir',
+ TUR: 'tur',
+ UIG: 'uig',
+ UKR: 'ukr',
+ URD: 'urd',
+ UZB: 'uzb',
+ UZB_CYRL: 'uzb_cyrl',
+ VIE: 'vie',
+ YID: 'yid',
+};
diff --git a/src/index.js b/src/index.js
index 6f0fb1f..fd1b742 100644
--- a/src/index.js
+++ b/src/index.js
@@ -11,11 +11,13 @@ require('regenerator-runtime/runtime');
const createScheduler = require('./createScheduler');
const createWorker = require('./createWorker');
const Tesseract = require('./Tesseract');
+const languages = require('./constants/languages');
const OEM = require('./constants/OEM');
const PSM = require('./constants/PSM');
const { setLogging } = require('./utils/log');
module.exports = {
+ languages,
OEM,
PSM,
createScheduler,