feat: Enhance PDF parsing with fallback to pdf-lib for improved handling of complex PDFs
Test Suite / test (24.x) (push) Has been cancelled
Test Suite / test (24.x) (push) Has been cancelled
Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
@@ -282,11 +282,21 @@ export class ReceiptParsingService {
|
||||
private async parseReceiptFromPdf(buffer: Buffer, apiKey: string): Promise<ParsedReceiptItemRaw[]> {
|
||||
let text: string;
|
||||
try {
|
||||
// Try pdf-parse first
|
||||
const data = await pdfParse(buffer);
|
||||
text = data.text;
|
||||
} catch (err) {
|
||||
this.logger.warn(`pdf-parse misslyckades: ${err}`);
|
||||
throw new BadRequestException('PDF-filen kunde inte läsas. Kontrollera att filen inte är skadad.');
|
||||
|
||||
// Fallback to pdf-lib for more complex PDFs
|
||||
try {
|
||||
const { PDFDocument } = await import('pdf-lib');
|
||||
const pdfDoc = await PDFDocument.load(buffer);
|
||||
text = pdfDoc.getPages().map(page => page.getText()).join(' ');
|
||||
} catch (fallbackErr) {
|
||||
this.logger.error(`Både pdf-parse och pdf-lib misslyckades: ${fallbackErr}`);
|
||||
throw new BadRequestException('PDF-filen kunde inte läsas. Kontrollera att filen inte är skadad eller krypterad.');
|
||||
}
|
||||
}
|
||||
|
||||
const combinedLines = preprocessPdfLines(text);
|
||||
|
||||
Reference in New Issue
Block a user