import * as pdfjsLib from 'pdfjs-dist';
import mammoth from 'mammoth';

// Extract Text from PDF
export const extractTextFromPDF = async (file: File) => {
    // pdfjsLib.GlobalWorkerOptions.workerSrc = window.location.origin + "/pdf.worker.min.mjs";
    pdfjsLib.GlobalWorkerOptions.workerSrc = `https://unpkg.com/pdfjs-dist@4.7.76/build/pdf.worker.min.mjs`;
    let text = '';
    try {
        const pdf = await pdfjsLib.getDocument(URL.createObjectURL(file)).promise;
        const maxPages = pdf.numPages;
        for (let pageNum = 1; pageNum <= maxPages; pageNum++) {
            const page = await pdf.getPage(pageNum);
            const content = await page.getTextContent();
            text += content.items.map((item) => ('str' in item ? item.str : '')).join(' ');
        }
        return text;   
    } catch (error) {
        console.log(error)
        return '';
    }
    
};

// Extract Text from DOCX
export const extractTextFromDOCX = async (file: File) => {
    const arrayBuffer = await file.arrayBuffer();
    const { value } = await mammoth.extractRawText({ arrayBuffer });
    return value;
};
