feat: Integrate pdfjs-dist for improved PDF parsing fallback
Test Suite / test (24.x) (push) Has been cancelled
Test Suite / test (24.x) (push) Has been cancelled
Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
@@ -288,13 +288,23 @@ export class ReceiptParsingService {
|
||||
} catch (err) {
|
||||
this.logger.warn(`pdf-parse misslyckades: ${err}`);
|
||||
|
||||
// Fallback to pdf-lib for more complex PDFs
|
||||
// Fallback to pdfjs-dist for more complex PDFs
|
||||
try {
|
||||
const { PDFDocument } = await import('pdf-lib');
|
||||
const pdfDoc = await PDFDocument.load(buffer);
|
||||
text = pdfDoc.getPages().map(page => page.getText()).join(' ');
|
||||
const pdfjsLib = await import('pdfjs-dist');
|
||||
const loadingTask = pdfjsLib.getDocument({ data: buffer });
|
||||
const pdfDocument = await loadingTask.promise;
|
||||
|
||||
let fullText = '';
|
||||
for (let i = 1; i <= pdfDocument.numPages; i++) {
|
||||
const page = await pdfDocument.getPage(i);
|
||||
const textContent = await page.getTextContent();
|
||||
const pageText = textContent.items.map(item => item.str).join(' ');
|
||||
fullText += pageText + ' ';
|
||||
}
|
||||
|
||||
text = fullText;
|
||||
} catch (fallbackErr) {
|
||||
this.logger.error(`Både pdf-parse och pdf-lib misslyckades: ${fallbackErr}`);
|
||||
this.logger.error(`Både pdf-parse och pdfjs-dist misslyckades: ${fallbackErr}`);
|
||||
throw new BadRequestException('PDF-filen kunde inte läsas. Kontrollera att filen inte är skadad eller krypterad.');
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user