diff --git a/backend/package-lock.json b/backend/package-lock.json index 426ef1f..d8ca77c 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -16,6 +16,7 @@ "multer": "^1.4.5-lts.1", "pdf-lib": "^1.17.1", "pdf-parse": "^1.1.1", + "pdfjs-dist": "^5.7.284", "reflect-metadata": "^0.2.2", "rxjs": "^7.8.1", "tesseract.js": "^5.1.1" @@ -411,6 +412,271 @@ "node": ">=8" } }, + "node_modules/@napi-rs/canvas": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.100.tgz", + "integrity": "sha512-xglYA6q3XO5P3BNJYxVZ1IV7DLVjp1Py6nwag88YntrS+3vKHyYcMqXVS4ZztJmwz2uGvz1FWhI/4LgbR5uQDA==", + "license": "MIT", + "optional": true, + "workspaces": [ + "e2e/*" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.100", + "@napi-rs/canvas-darwin-arm64": "0.1.100", + "@napi-rs/canvas-darwin-x64": "0.1.100", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.100", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.100", + "@napi-rs/canvas-linux-arm64-musl": "0.1.100", + "@napi-rs/canvas-linux-riscv64-gnu": "0.1.100", + "@napi-rs/canvas-linux-x64-gnu": "0.1.100", + "@napi-rs/canvas-linux-x64-musl": "0.1.100", + "@napi-rs/canvas-win32-arm64-msvc": "0.1.100", + "@napi-rs/canvas-win32-x64-msvc": "0.1.100" + } + }, + "node_modules/@napi-rs/canvas-android-arm64": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.100.tgz", + "integrity": "sha512-hjhCKhntPv9+t4ckHymdx0phYNcVW+GKQR6Lzw2zE+pOVjOplSmtx9nNNknTjbEDLcuLZqA1y8ufKg1XfgftzQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-darwin-arm64": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.100.tgz", + "integrity": "sha512-2PcswRaC7Ly645DGt88///zuFDhJxJYdKAs1uU3mfk1atYkXufgcgLfBpk6Tm12nCQBaNt1wpybuPZ4qOhTo8A==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-darwin-x64": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.100.tgz", + "integrity": "sha512-ePNZtj7pNIva/siZMg+HmbeozkIjqUIYdoymH8HaA3qK7LfzFN4WMBM8G6HQ9ZC+H3+Dnn5pqtiXpgLykaPOhw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.100.tgz", + "integrity": "sha512-d5cDB48oWFGU8/XPhUOFAlySgb/VAu7D+s8fi55K1Pcfg8aPplHWqMgibhVLU8ky7Pyg/fuiVLz4Nf3JrSTuUA==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-gnu": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.100.tgz", + "integrity": "sha512-rDxgxRu69RvDlX/bh9o22DxLsGr8EqsNgotL9+RwQE1S0b0cqeatqsw6aW45mukm0B42DIAaAacKaYQ8cqS1nw==", + "cpu": [ + "arm64" + ], + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-musl": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.100.tgz", + "integrity": "sha512-K3mDW66N+xT2/V439u1alFANiBUjdEx2gLiNYnCmUsva5jZMxWTjafBYwTzYK+EMFMHrUoabuU+T1BIP5CgbYQ==", + "cpu": [ + "arm64" + ], + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-riscv64-gnu": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.100.tgz", + "integrity": "sha512-mooqUBTIsccZpnoQC4NgrC1v6C1vof39etLNMnBwCY+p0gajWJvAHLGQ6g/gGyS5YrpDW+GefSN4+Cvcr08UWw==", + "cpu": [ + "riscv64" + ], + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-gnu": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.100.tgz", + "integrity": "sha512-1eCvkDCazm7FFhsT7DfGOdSaHgZVK3bt/dSBl5EWHOWmnz+I7j8tPseJqqD81NF+MH21jKUK4wQSDjN0mdhnTg==", + "cpu": [ + "x64" + ], + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-musl": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.100.tgz", + "integrity": "sha512-20arT6lnI19S68qNlii73TSEDbECNgzMz2EpldC1V3mZFuRkeujXkcebRk0LRJe9SEUAooYiLokfMViY8IX7yA==", + "cpu": [ + "x64" + ], + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-win32-arm64-msvc": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-arm64-msvc/-/canvas-win32-arm64-msvc-0.1.100.tgz", + "integrity": "sha512-DZFFT1wIAg37LJw37yhMRFfjATd3vTQzjZ1Yki8u2vhO6Hi5VE6BVaGQ1aaDu7xb4iMErz+9EOwjpS7xcxFeBw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.100", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.100.tgz", + "integrity": "sha512-MyT1j3mHC2+Lu4pBi9mKyMJhtP6U7k7EldY7sj/uS5gJA65gTXt8MefJQXLJo5d/vZbuWmfxzkEUNc/urV3pHA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, "node_modules/@nestjs/cli": { "version": "10.4.9", "resolved": "https://registry.npmjs.org/@nestjs/cli/-/cli-10.4.9.tgz", @@ -3494,6 +3760,18 @@ "url": "https://github.com/sponsors/mehmet-kozan" } }, + "node_modules/pdfjs-dist": { + "version": "5.7.284", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.7.284.tgz", + "integrity": "sha512-h4EdYQczmGhbOlqc3PPZwxevn7ApdWPbovAuWXOB/DjIyigSnwfy2oze7c6mRcSr9XgLp3eN3EeL4DyySTPMFw==", + "license": "Apache-2.0", + "engines": { + "node": ">=22.13.0 || >=24" + }, + "optionalDependencies": { + "@napi-rs/canvas": "^0.1.100" + } + }, "node_modules/picocolors": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", diff --git a/backend/package.json b/backend/package.json index 2076b4a..f072fdb 100644 --- a/backend/package.json +++ b/backend/package.json @@ -16,6 +16,7 @@ "multer": "^1.4.5-lts.1", "pdf-lib": "^1.17.1", "pdf-parse": "^1.1.1", + "pdfjs-dist": "^5.7.284", "reflect-metadata": "^0.2.2", "rxjs": "^7.8.1", "tesseract.js": "^5.1.1" diff --git a/backend/src/receipt-parsing/receipt-parsing.service.ts b/backend/src/receipt-parsing/receipt-parsing.service.ts index 3022893..c9edcef 100644 --- a/backend/src/receipt-parsing/receipt-parsing.service.ts +++ b/backend/src/receipt-parsing/receipt-parsing.service.ts @@ -288,13 +288,23 @@ export class ReceiptParsingService { } catch (err) { this.logger.warn(`pdf-parse misslyckades: ${err}`); - // Fallback to pdf-lib for more complex PDFs + // Fallback to pdfjs-dist for more complex PDFs try { - const { PDFDocument } = await import('pdf-lib'); - const pdfDoc = await PDFDocument.load(buffer); - text = pdfDoc.getPages().map(page => page.getText()).join(' '); + const pdfjsLib = await import('pdfjs-dist'); + const loadingTask = pdfjsLib.getDocument({ data: buffer }); + const pdfDocument = await loadingTask.promise; + + let fullText = ''; + for (let i = 1; i <= pdfDocument.numPages; i++) { + const page = await pdfDocument.getPage(i); + const textContent = await page.getTextContent(); + const pageText = textContent.items.map(item => item.str).join(' '); + fullText += pageText + ' '; + } + + text = fullText; } catch (fallbackErr) { - this.logger.error(`Både pdf-parse och pdf-lib misslyckades: ${fallbackErr}`); + this.logger.error(`Både pdf-parse och pdfjs-dist misslyckades: ${fallbackErr}`); throw new BadRequestException('PDF-filen kunde inte läsas. Kontrollera att filen inte är skadad eller krypterad.'); } }