import { findSpans } from "unicode-default-word-boundary";
import escapeStringRegexp from "escape-string-regexp";

// https://github.com/tc39/proposal-regexp-unicode-property-escapes?tab=readme-ov-file#unicode-aware-version-of-w
export const WORD_LIKE = new RegExp(
  "[\\p{Alphabetic}\\p{Mark}\\p{Decimal_Number}\\p{Connector_Punctuation}\\p{Join_Control}\\p{Emoji_Presentation}]",
  "u"
);

const trimHighlightContext = (
  context: string,
  searchStrings: string[],
  { contextMaxWordLength = 200, contextWindowSize = 5 } = {}
) => {
  let minIndex: number | undefined;
  let maxIndex: number | undefined;

  // find the convex hull of all possible matches within the context
  for (const searchString of searchStrings) {
    const match = context.match(escapeStringRegexp(searchString));

    if (!match) continue;
    if (match.index === undefined) continue;

    const matchStart = match.index;
    const matchEnd = match.index + searchString.length;

    if (minIndex === undefined || maxIndex === undefined) {
      minIndex = matchStart;
      maxIndex = matchEnd;

      continue;
    }

    if (matchStart < minIndex) {
      minIndex = matchStart;
    }

    if (matchEnd > maxIndex) {
      maxIndex = matchEnd;
    }

    continue;
  }

  if (minIndex === undefined || maxIndex === undefined) {
    let trimmed = "";
    let i = 0;
    for (const span of findSpans(context)) {
      if (i >= contextMaxWordLength) break;

      trimmed += span.text;
      if (span.text.match(WORD_LIKE)) {
        i++;
      }
    }

    return trimmed;
  }

  const spans = Array.from(findSpans(context));

  // expand the hull to the word boundaries of the context window on either side

  let leftSpanIndex = 0;
  for (let spanIndex = spans.length - 1; spanIndex >= 0; spanIndex--) {
    const span = spans[spanIndex];
    if (!span.text.match(WORD_LIKE)) continue;

    if (span.start <= minIndex) {
      leftSpanIndex = spanIndex;
      break;
    }
  }

  let rightSpanIndex = spans.length - 1;
  for (let spanIndex = 0; spanIndex < spans.length; spanIndex++) {
    const span = spans[spanIndex];
    if (!span.text.match(WORD_LIKE)) continue;

    if (span.start + span.length >= maxIndex) {
      rightSpanIndex = spanIndex;
      break;
    }
  }

  if (contextWindowSize) {
    // further expand the hull to include contextWindowSize extra whole words
    let leftEnlargements = 0;
    for (let spanIndex = leftSpanIndex - 1; spanIndex >= 0; spanIndex--) {
      const span = spans[spanIndex];
      if (!span.text.match(WORD_LIKE)) continue;

      leftSpanIndex = spanIndex;
      leftEnlargements++;

      if (leftEnlargements >= contextWindowSize) {
        break;
      }
    }

    let rightEnlargements = 0;
    for (
      let spanIndex = rightSpanIndex + 1;
      spanIndex < spans.length;
      spanIndex++
    ) {
      const span = spans[spanIndex];
      if (!span.text.match(WORD_LIKE)) continue;

      rightSpanIndex = spanIndex;
      rightEnlargements++;

      if (rightEnlargements >= contextWindowSize) {
        break;
      }
    }
  }

  return spans
    .slice(leftSpanIndex, rightSpanIndex + 1)
    .map((s) => s.text)
    .join("");
};

export default trimHighlightContext;
