const SS = require('string-similarity');

const l = console.log;

export function matchBestPageTextByGap(pageTexts, transcription, divideTime) {
  const { sections } = transcription.reduce(
    ({ sections, lastEnd }, { word, start, end }, index) => {
      if (start - lastEnd >= divideTime) {
        sections.push({
          startIndex: index,
          endIndex: index,
          start,
          end,
          text: word,
        });
      } else {
        const [section] = sections.slice(-1);
        section.endIndex = index;
        section.end = end;
        section.text += ' ' + word;
      }
      return {
        sections,
        lastEnd: end,
      };
    },
    {
      sections: [],
      lastEnd: -divideTime * 2,
    }
  );

  return pageTexts.map((pageText, pageNo) => {
    const cleanedPageText = getCleanedWordArray(pageText).join(' ');
    const { matchedText, similarity, startIndex, endIndex, start, end } = sections.reduce(
      (bestMatch, { text, startIndex, endIndex, start, end }) => {
        const similarity = SS.compareTwoStrings(cleanedPageText, text);
        if (similarity > bestMatch.similarity) {
          return {
            similarity,
            matchedText: text,
            startIndex,
            endIndex,
            start,
            end,
          };
        }

        return bestMatch;
      },
      { matchedText: '', similarity: 0 }
    );

    return {
      pageNo,
      pageText,
      matchedText,
      similarity,
      startIndex,
      endIndex,
      start,
      end,
      divideTime,
    };
  });
}

export function matchBestPageTextByGaps(pageTexts, transcription, similarityThreshold) {
  const SECTION_DIVIDE_TIMES = [
    3,
    2.75,
    2.5,
    2.25,
    2,
    1.9,
    1.8,
    1.7,
    1.6,
    1.5,
    1.4,
    1.3,
    1.2,
    1.1,
    1.0,
    0.9,
    0.8,
  ];

  return SECTION_DIVIDE_TIMES.reduce((bestPageMatches, divideTime) => {
    matchBestPageTextByGap(pageTexts, transcription, divideTime).forEach((textByGap) => {
      if (
        textByGap.similarity &&
        (!bestPageMatches[textByGap.pageNo] ||
          textByGap.similarity > bestPageMatches[textByGap.pageNo].similarity) &&
        similarityThreshold &&
        textByGap.similarity >= similarityThreshold
      ) {
        bestPageMatches[textByGap.pageNo] = textByGap;
      }
    });
    return bestPageMatches;
  }, {});
}

export function getCleanedWordArray(text) {
  // Unicode letter matching: https://github.com/tc39/proposal-regexp-unicode-property-escapes
  return text.toLowerCase().match(/[\d\p{L}]+/gu) || [];
}

function matchWordArrays(textArray, wordArray, textIndex) {
  const words = wordArray.join(' ');
  const oneWord = wordArray.join('');
  const matchingText = textArray.slice(textIndex, textIndex + wordArray.length).join(' ');
  const oneMatchingWord = textArray.slice(textIndex, textIndex + wordArray.length).join('');
  const oneMatchingTextPlusOne = textArray
    .slice(textIndex, textIndex + wordArray.length + 1)
    .join('');
  const oneMatchingTextMinusOne = textArray
    .slice(textIndex, textIndex + wordArray.length - 1)
    .join('');
  const matchers = [
    {
      name: 'fullWordMatcher', // Only This> Tests failed: 35, passed: 24, ignored: 29
      weight: 1,
      func: () => words === matchingText,
    },
    {
      name: 'oneWordMatcher',
      weight: 0.95,
      func: () => oneWord === oneMatchingWord,
    },
    {
      name: 'oneWordMatcherPlusOne',
      weight: 0.85,
      func: () => oneWord === oneMatchingTextPlusOne,
    },
    {
      name: 'oneWordMatcherMinusOne',
      weight: 0.85,
      func: () => oneWord === oneMatchingTextMinusOne,
    },
  ];

  return matchers.reduce((res, { name, weight, func }) => {
    if (res) return res;
    return func() ? weight : null;
  }, null);
}

function getFirstMatchingArrayPosition(textArray, words, offset = 0) {
  const [firstWord] = words;
  while ((offset = textArray.indexOf(firstWord, offset)) > -1) {
    const matchingWeight = matchWordArrays(textArray, words, offset);

    if (matchingWeight) {
      return { offset, weight: matchingWeight };
    }
    // const restOfWordsMatch = restOfWords.reduce(
    //   (res, word, index) => res && textArray[offset + index + 1] === word,
    //   true
    // );
    // if (restOfWordsMatch) {
    //   return {
    //     offset,
    //   };
    // }

    offset += 1;
  }
  return null;
}

export function getMatchingArrayPositions(textArray, wordsToMatch, initialOffset = 0) {
  let offset = initialOffset;
  let match;
  let matches = null;

  while ((match = getFirstMatchingArrayPosition(textArray, wordsToMatch, offset))) {
    if (!matches) {
      matches = [];
    }
    matches.push(match);
    offset = match.offset + 1;
  }

  return matches;
}

export function getBestMatchingArrayPosition(textArray, wordsToMatch, initialOffset = 0) {
  const results = getMatchingArrayPositions(textArray, wordsToMatch, initialOffset);

  if (!results) return null;

  // return results.reduce((best, res) => (res.weight > best.weight ? res : best)).offset;
  return {
    bestMatch: results.reduce((best, res) => (res.weight > best.weight ? res : best)),
    allMatches: results,
  };
}

export function generateTranscribedPageText(transcription, startIndex, endIndex) {
  return transcription
    .slice(startIndex, endIndex + 1)
    .map(({ word }) => word)
    .join(' ');
}

/**
 *
 * @param {String}          text
 * @param {Array[Object]}   transcription
 * @param {Number}          matchingWordsLength
 *
 * @returns {Object}        Return object containing:
 *                            * "startIndex" int
 *                            * "endIndex" int
 */
export function matchTextToTranscription(text, transcription, matchingWordsLength = 3) {
  const wordArray = transcription.map(({ word }) => word);
  const cleanedArray = getCleanedWordArray(text);
  let wordsToMatch = null;
  let startMatches = null;
  let startIndex = null;
  let startShift = null;
  let endMatches = null;
  let endIndex = null;
  let endShift = null;

  // Look for the first 3 (or matchingWordsLength) matching words in the beginning of the cleaned word array
  // If the first words are not matched, then continue with disregarding the first word, etc
  let wordOffset = 0;
  while (startMatches === null && wordOffset < cleanedArray.length / 2) {
    wordsToMatch = cleanedArray.slice(wordOffset, wordOffset + matchingWordsLength);
    startMatches = getBestMatchingArrayPosition(wordArray, wordsToMatch);
    wordOffset += 1;
  }

  startIndex = startMatches ? startMatches.bestMatch.offset : null;
  startShift = startMatches ? wordOffset - 1 : null;

  // Look for the last 3 (or matchingWordsLength) matching words in the end of the cleaned word array
  // If the last words are not matched, then continue with disregarding the last word, etc
  wordOffset = cleanedArray.length;
  while (endMatches === null && wordOffset > cleanedArray.length / 2) {
    wordsToMatch = cleanedArray.slice(wordOffset - matchingWordsLength, wordOffset);
    endMatches = getBestMatchingArrayPosition(wordArray, wordsToMatch, startIndex || 0);
    wordOffset -= 1;
  }

  endIndex = endMatches ? endMatches.bestMatch.offset : null;
  endShift = endMatches ? cleanedArray.length - wordOffset + 1 : null;

  if (endIndex >= 0) {
    endIndex += matchingWordsLength - 1;
  }

  return {
    startIndex,
    start: startIndex ? transcription[startIndex].start : null,
    startMatches,
    startShift,
    endIndex,
    end: endIndex ? transcription[endIndex].end : null,
    endMatches,
    endShift,
  };
}

export function matchPageTextsToTranscription(pageTexts, transcription) {
  const transcriptionResult = pageTexts.map((text) =>
    matchTextToTranscription(text, transcription)
  );

  const gapResults = matchBestPageTextByGaps(pageTexts, transcription, 0.5);

  transcriptionResult.forEach((item, index) => {
    if (item.startIndex !== null && item.endIndex !== null) {
      const pageText = pageTexts[index];
      const matchedText = transcription
        .slice(item.startIndex, item.endIndex + 1)
        .map(({ word }) => word)
        .join(' ');
      const cleanedPageText = getCleanedWordArray(pageTexts[index]).join(' ');
      const similarity = SS.compareTwoStrings(matchedText, cleanedPageText);
      item.pageNo = index;
      item.pageText = pageText;
      item.similarity = similarity;
    }
  });

  return transcriptionResult.map((item) => {
    if ((gapResults[item.pageNo] || {}).similarity > item.similarity) {
      l(`page ${item.pageNo}: Use gapResults`);
      return gapResults[item.pageNo];
    }
    l(`page ${item.pageNo}: Use transResults`);
    return item;
  });
}
