import { useEffect, useState } from 'react';
import * as pdfjsLib from 'pdfjs-dist';
import { TextContent, TextItem } from 'pdfjs-dist/types/src/display/api';

pdfjsLib.GlobalWorkerOptions.workerSrc = `${process.env.PUBLIC_URL}/pdfjs/pdf.worker.min.mjs`;

import {
  EXCEPTIONS_NEW_ROW,
  EXCEPTIONS,
  EXCEPTIONS_LISTS,
  BOLD_FONT,
} from './constants';

const usePdfViewer = (pdf: string | URL) => {
  const [textContent, setTextContent] = useState<string[]>([]);
  const [isLoading, setIsLoading] = useState(false);

  useEffect(() => {
    setIsLoading(true);
    const loadingTask = pdfjsLib.getDocument(pdf);

    const isBold = (item: TextItem) =>
      item.fontName === BOLD_FONT && item.height !== 0;

    const printText = (item: TextItem) => {
      return isBold(item) ? `<span>${item.str}</span>` : item.str;
    };

    loadingTask.promise
      .then((pdf: any) => {
        const numPages = pdf.numPages;
        const textPromises = [];

        for (let pageNum = 1; pageNum <= numPages; pageNum++) {
          textPromises.push(
            pdf.getPage(pageNum).then((page: any) => {
              return page.getTextContent().then((textContent: TextContent) => {
                let str = '';

                const content = textContent.items.map((item, index) => {
                  if (!('width' in item)) {
                    return;
                  }

                  if (item.width === 0 && item.height === 0) {
                    const generatedStr = str.trim();
                    const firstChar = generatedStr[0];

                    str = '';

                    if (generatedStr[generatedStr.length - 1] === '.') {
                      return ` ${generatedStr}\n`;
                    } else if (!isNaN(+firstChar)) {
                      return `\n${generatedStr}`;
                    } else if (firstChar === firstChar.toUpperCase()) {
                      const firstWord =
                        generatedStr.trim().split(' ').at(0) || '';

                      return (
                        (!EXCEPTIONS_NEW_ROW.includes(firstWord) ? '\n' : ' ') +
                        generatedStr
                      );
                    }

                    return ` ${generatedStr}`;
                  } else if (textContent.items.length - 1 === index) {
                    str += printText(item);

                    return ` ${str}`;
                  } else {
                    const firstWord = item.str.split(' ').at(0) || '';
                    if (EXCEPTIONS.includes(firstWord) && !isBold(item)) {
                      str += ' ';
                    }

                    str += printText(item);

                    return '';
                  }
                });

                return EXCEPTIONS_LISTS.reduce(
                  (acc, strSearch) =>
                    acc.replace(`${strSearch}</span>\n<span>`, ' '),
                  content.join(''),
                );
              });
            }),
          );
        }

        return Promise.all(textPromises);
      })
      .then((texts: any) => {
        setTextContent(texts);
        setIsLoading(false);
      })
      .catch((err: any) => {
        console.error('Error: ' + err.message);
        setIsLoading(false);
      });
  }, [pdf]);

  return { textContent, isLoading };
};

export default usePdfViewer;
