import { createWorker } from "tesseract.js";
import _ from "lodash";
import XRegExp from "xregexp";
const LANGUAGE_CODE = "micr";
const MICR_CHARACTERS = "0123456789abcd";
const SYMBOL_CONFIDENCE_THRESHOLD_PERCENT = 45;
const CANADIAN_CHEQUE_REGEX = XRegExp(
  /[0-9]+[a-z]{2}(?<transit>[0-9]{4,5})[a-z]{1}(?<institution>[0-9]{3})[a-z]{1}(?<account>[dc0-9]+)/
);

const OCRCheque = async (img: string, langPath: string, callback: any) => {
  const getConfidentSymbols = (words: Tesseract.Word[]) =>
    _.flatten(words.map(word => word.symbols)).filter(
      symbol => symbol.confidence > SYMBOL_CONFIDENCE_THRESHOLD_PERCENT
    );

  const removeNonNumericSymbols = (text: string) => text.replace(/\D/g, "");

  const worker = createWorker({
    langPath: langPath
    // logger: m => console.log(m),
  });

  await worker.load();
  await worker.loadLanguage(LANGUAGE_CODE);
  await worker.initialize(LANGUAGE_CODE);
  await worker.setParameters({
    tessedit_char_whitelist: MICR_CHARACTERS
  });
  const result = await worker.recognize(img);
  await worker.terminate();

  const response: any = { text: result.data.text };
  if (result.data.blocks.length === 0 || result.data.lines.length === 0) {
    return callback({ ...response, error: "NO_TEXT_BLOCKS_FOUND" });
  }

  const lines = result.data.lines;
  const chequeLineWords = lines[lines.length - 1].words;
  const confidentSymbols = getConfidentSymbols(chequeLineWords);
  const averageConfidence =
    confidentSymbols.reduce((value, symbol) => value + symbol.confidence, 0) /
    confidentSymbols.length;
  const parsedChequeLine = confidentSymbols.map(symbol => symbol.text).join("");
  response.parsedChequeLine = parsedChequeLine;
  const chequeMatches = XRegExp.exec(parsedChequeLine, CANADIAN_CHEQUE_REGEX);

  if (!chequeMatches) {
    return callback({ ...response, error: "NO_CHEQUE_NUMBERS_FOUND" });
  }

  response.confidence = averageConfidence;
  response.numbers = {
    transit: removeNonNumericSymbols(chequeMatches[1]),
    institution: removeNonNumericSymbols(chequeMatches[2]),
    account: removeNonNumericSymbols(chequeMatches[3])
  };
  callback(response);
};

export default OCRCheque;
