Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/naptha/tesseract.js.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBalearica <admin@scribeocr.com>2022-08-20 06:40:14 +0300
committerGitHub <noreply@github.com>2022-08-20 06:40:14 +0300
commitf372818146629fc6cdf0e16b42e048d9ebcdb5b0 (patch)
tree5ed62ad9614d1974553b199c688add19717baa6f
parent8b567609e38e728a225852632731dd19483342db (diff)
Added automatic detection of simd support (#641)
-rw-r--r--package-lock.json11
-rw-r--r--package.json1
-rw-r--r--src/worker-script/browser/getCore.js27
-rw-r--r--src/worker-script/index.js4
-rw-r--r--src/worker-script/node/getCore.js11
-rw-r--r--src/worker-script/utils/setImage.js6
-rw-r--r--src/worker/browser/defaultOptions.js4
7 files changed, 47 insertions, 17 deletions
diff --git a/package-lock.json b/package-lock.json
index b3dca23..095b0b3 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -21,6 +21,7 @@
"regenerator-runtime": "^0.13.3",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^3.0.1",
+ "wasm-feature-detect": "^1.2.11",
"zlibjs": "^0.3.1"
},
"devDependencies": {
@@ -8740,6 +8741,11 @@
"node": ">=4.0.0"
}
},
+ "node_modules/wasm-feature-detect": {
+ "version": "1.2.11",
+ "resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.2.11.tgz",
+ "integrity": "sha512-HUqwaodrQGaZgz1lZaNioIkog9tkeEJjrM3eq4aUL04whXOVDRc/o2EGb/8kV0QX411iAYWEqq7fMBmJ6dKS6w=="
+ },
"node_modules/watchpack": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz",
@@ -16052,6 +16058,11 @@
"rx": "^4.1.0"
}
},
+ "wasm-feature-detect": {
+ "version": "1.2.11",
+ "resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.2.11.tgz",
+ "integrity": "sha512-HUqwaodrQGaZgz1lZaNioIkog9tkeEJjrM3eq4aUL04whXOVDRc/o2EGb/8kV0QX411iAYWEqq7fMBmJ6dKS6w=="
+ },
"watchpack": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz",
diff --git a/package.json b/package.json
index 4aaba7c..dbf1dd4 100644
--- a/package.json
+++ b/package.json
@@ -69,6 +69,7 @@
"regenerator-runtime": "^0.13.3",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^3.0.1",
+ "wasm-feature-detect": "^1.2.11",
"zlibjs": "^0.3.1"
},
"repository": {
diff --git a/src/worker-script/browser/getCore.js b/src/worker-script/browser/getCore.js
index faea34e..f9d256e 100644
--- a/src/worker-script/browser/getCore.js
+++ b/src/worker-script/browser/getCore.js
@@ -1,15 +1,26 @@
-module.exports = (corePath, res) => {
+const { simd } = require('wasm-feature-detect');
+const { dependencies } = require('../../../package.json');
+
+module.exports = async (corePath, res) => {
if (typeof global.TesseractCore === 'undefined') {
res.progress({ status: 'loading tesseract core', progress: 0 });
- global.importScripts(corePath);
- /*
- * Depending on whether the browser supports WebAssembly,
- * the version of the TesseractCore will be different.
- */
+
+ // If the user specifies a core path, we use that
+ // Otherwise, we detect the correct core based on SIMD support
+ let corePathImport = corePath;
+ if (!corePathImport) {
+ const simdSupport = await simd();
+ if (simdSupport) {
+ corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.wasm.js`;
+ } else {
+ corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core-simd.wasm.js`;
+ }
+ }
+
+ global.importScripts(corePathImport);
+
if (typeof global.TesseractCoreWASM !== 'undefined' && typeof WebAssembly === 'object') {
global.TesseractCore = global.TesseractCoreWASM;
- } else if (typeof global.TesseractCoreASM !== 'undefined') {
- global.TesseractCore = global.TesseractCoreASM;
} else {
throw Error('Failed to load TesseractCore');
}
diff --git a/src/worker-script/index.js b/src/worker-script/index.js
index 8eeda62..1e2cfb6 100644
--- a/src/worker-script/index.js
+++ b/src/worker-script/index.js
@@ -28,10 +28,10 @@ let latestJob;
let adapter = {};
let params = defaultParams;
-const load = ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => {
+const load = async ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => {
setLogging(logging);
if (!TessModule) {
- const Core = adapter.getCore(corePath, res);
+ const Core = await adapter.getCore(corePath, res);
res.progress({ workerId, status: 'initializing tesseract', progress: 0 });
diff --git a/src/worker-script/node/getCore.js b/src/worker-script/node/getCore.js
index f3783b5..03469dd 100644
--- a/src/worker-script/node/getCore.js
+++ b/src/worker-script/node/getCore.js
@@ -1,12 +1,19 @@
+const { simd } = require('wasm-feature-detect');
+
let TesseractCore = null;
/*
* getCore is a sync function to load and return
* TesseractCore.
*/
-module.exports = (_, res) => {
+module.exports = async (_, res) => {
if (TesseractCore === null) {
+ const simdSupport = await simd();
res.progress({ status: 'loading tesseract core', progress: 0 });
- TesseractCore = require('tesseract.js-core');
+ if (simdSupport) {
+ TesseractCore = require('tesseract.js-core/tesseract-core-simd');
+ } else {
+ TesseractCore = require('tesseract.js-core/tesseract-core');
+ }
res.progress({ status: 'loaded tesseract core', progress: 1 });
}
return TesseractCore;
diff --git a/src/worker-script/utils/setImage.js b/src/worker-script/utils/setImage.js
index 351205c..3e09045 100644
--- a/src/worker-script/utils/setImage.js
+++ b/src/worker-script/utils/setImage.js
@@ -20,9 +20,9 @@ module.exports = (TessModule, api, image) => {
const exif = buf.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1;
/*
- * Although leptonica should support reading bmp, there is a bug of "compressed BMP files".
- * As there is no solution, we need to use bmp-js for now.
- * @see https://groups.google.com/forum/#!topic/tesseract-ocr/4mPD9zTxdxE
+ * Leptonica supports uncompressed but not compressed bmp files
+ * @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516
+ * We therefore use bmp-js to process all bmp files
*/
if (type && type.mime === 'image/bmp') {
const bmpBuf = bmp.decode(buf);
diff --git a/src/worker/browser/defaultOptions.js b/src/worker/browser/defaultOptions.js
index cef5e58..8127078 100644
--- a/src/worker/browser/defaultOptions.js
+++ b/src/worker/browser/defaultOptions.js
@@ -1,5 +1,5 @@
const resolveURL = require('resolve-url');
-const { version, dependencies } = require('../../../package.json');
+const { version } = require('../../../package.json');
const defaultOptions = require('../../constants/defaultOptions');
/*
@@ -14,5 +14,5 @@ module.exports = {
* If browser doesn't support WebAssembly,
* load ASM version instead
*/
- corePath: `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
+ corePath: null,
};