feat: Integrate pdfjs-dist for improved PDF parsing fallback
Test Suite / test (24.x) (push) Has been cancelled

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Nils-Johan Gynther
2026-05-03 20:09:45 +02:00
parent 99343f74af
commit 19ef7a4ea5
3 changed files with 294 additions and 5 deletions
@@ -288,13 +288,23 @@ export class ReceiptParsingService {
} catch (err) {
this.logger.warn(`pdf-parse misslyckades: ${err}`);
// Fallback to pdf-lib for more complex PDFs
// Fallback to pdfjs-dist for more complex PDFs
try {
const { PDFDocument } = await import('pdf-lib');
const pdfDoc = await PDFDocument.load(buffer);
text = pdfDoc.getPages().map(page => page.getText()).join(' ');
const pdfjsLib = await import('pdfjs-dist');
const loadingTask = pdfjsLib.getDocument({ data: buffer });
const pdfDocument = await loadingTask.promise;
let fullText = '';
for (let i = 1; i <= pdfDocument.numPages; i++) {
const page = await pdfDocument.getPage(i);
const textContent = await page.getTextContent();
const pageText = textContent.items.map(item => item.str).join(' ');
fullText += pageText + ' ';
}
text = fullText;
} catch (fallbackErr) {
this.logger.error(`Både pdf-parse och pdf-lib misslyckades: ${fallbackErr}`);
this.logger.error(`Både pdf-parse och pdfjs-dist misslyckades: ${fallbackErr}`);
throw new BadRequestException('PDF-filen kunde inte läsas. Kontrollera att filen inte är skadad eller krypterad.');
}
}