Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/naptha/tesseract.js.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWintrySnowman <52296952+WintrySnowman@users.noreply.github.com>2022-08-11 07:02:14 +0300
committerGitHub <noreply@github.com>2022-08-11 07:02:14 +0300
commit67848464ac6d89d00c3404e0ac90f36cf98c7233 (patch)
tree19579cd8161f4972790b408812cafb3b53897e9e
parentbe956cd8898acedf792101811e0b3b00351fd8ea (diff)
Add support for ImageData and fix a hang in buffer handling (#610)
-rw-r--r--src/worker-script/utils/setImage.js30
-rw-r--r--src/worker/browser/loadImage.js4
2 files changed, 25 insertions, 9 deletions
diff --git a/src/worker-script/utils/setImage.js b/src/worker-script/utils/setImage.js
index e9918db..c8fabcd 100644
--- a/src/worker-script/utils/setImage.js
+++ b/src/worker-script/utils/setImage.js
@@ -9,27 +9,37 @@ const fileType = require('file-type');
* @access public
*/
module.exports = (TessModule, api, image) => {
- const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length }));
- const type = fileType(buf);
+ const buf = (image instanceof Uint8Array) ? Buffer.from(image) : null;
+ const type = buf ? fileType(buf) : null;
let bytesPerPixel = 0;
let data = null;
let pix = null;
let w = 0;
let h = 0;
- /*
- * Although leptonica should support reading bmp, there is a bug of "compressed BMP files".
- * As there is no solution, we need to use bmp-js for now.
- * @see https://groups.google.com/forum/#!topic/tesseract-ocr/4mPD9zTxdxE
- */
- if (type && type.mime === 'image/bmp') {
+
+ if (image instanceof ImageData) {
+ // The pixel format of ImageData is RGBA and technically
+ // Tesseract is expecting ABGR, but it does not seem to matter
+ // in practice, so to save effort the bytes are not rearranged.
+ data = TessModule._malloc(image.data.byteLength);
+ TessModule.HEAPU8.set(image.data, data);
+ w = image.width;
+ h = image.height;
+ bytesPerPixel = 4;
+ } else if (buf && type && type.mime === 'image/bmp') {
+ /*
+ * Although leptonica should support reading bmp, there is a bug of "compressed BMP files".
+ * As there is no solution, we need to use bmp-js for now.
+ * @see https://groups.google.com/forum/#!topic/tesseract-ocr/4mPD9zTxdxE
+ */
const bmpBuf = bmp.decode(buf);
data = TessModule._malloc(bmpBuf.data.length * Uint8Array.BYTES_PER_ELEMENT);
TessModule.HEAPU8.set(bmpBuf.data, data);
w = bmpBuf.width;
h = bmpBuf.height;
bytesPerPixel = 4;
- } else {
+ } else if (buf) {
const ptr = TessModule._malloc(buf.length * Uint8Array.BYTES_PER_ELEMENT);
TessModule.HEAPU8.set(buf, ptr);
pix = TessModule._pixReadMem(ptr, buf.length);
@@ -44,6 +54,8 @@ module.exports = (TessModule, api, image) => {
.map((v, idx) => (
TessModule.getValue(pix + (idx * 4), 'i32')
));
+ } else {
+ throw Error('Unsupported image data container');
}
/*
diff --git a/src/worker/browser/loadImage.js b/src/worker/browser/loadImage.js
index 3102967..34f047d 100644
--- a/src/worker/browser/loadImage.js
+++ b/src/worker/browser/loadImage.js
@@ -82,6 +82,10 @@ const loadImage = async (image) => {
img = await fixOrientationFromUrlOrBlobOrFile(img);
}
data = await readFromBlobOrFile(img);
+ } else if (image instanceof ImageData) {
+ // Support a canvas' ImageData by passing it directly,
+ // where it is handled by setImage() without conversion.
+ return image;
}
return new Uint8Array(data);