diff --git a/.kilo/plans/1779211060426-happy-island.md b/.kilo/plans/1779211060426-happy-island.md new file mode 100644 index 00000000..b3421fdd --- /dev/null +++ b/.kilo/plans/1779211060426-happy-island.md @@ -0,0 +1,125 @@ +# Plan: Omgjord flyerimport (pdf-parse + tesseract + Mistral Tiny) + +## Mål +Ersätta nuvarande flyerimportflöde (som idag delegerar till `importer-api`) med en robust pipeline som: +1) extraherar text från flyer-PDF (primärt `pdf-parse`, fallback OCR via Tesseract), +2) skickar normaliserad text till Mistral Tiny, +3) returnerar strikt strukturerad JSON, +4) behåller befintlig matchning/planeringsflöde i backend + Flutter men förbättrar UX kring importresultat och fel. + +## Nulägesanalys (projektanpassad) +- Backend endpoint finns: `POST /flyer-import/parse` i `backend/src/flyer-import/flyer-import.controller.ts`. +- Nuvarande backendlogik i `backend/src/flyer-import/flyer-import.service.ts` anropar extern tjänst via `IMPORTER_SERVICE_URL` (`/api/flyer/parse`). +- Flutter har redan komplett flyerflik i `flutter/lib/features/import/presentation/flyer_import_tab.dart`: + - filval, importknapp, preview, radrendering med checkboxar, bulk-planering. +- Datamodell för sessions/items finns redan i Prisma (`FlyerSession`, `FlyerItem`, `FlyerSelection`) och stödjer parse+match-metadata. +- `flyerimporter.md` beskriver rätt riktning men är generisk; projektet behöver NestJS-integration och kompatibilitet med befintliga DTO/Flutter-modeller. + +## Föreslagen arkitektur (ersättning av dagens lösning) + +### 1) Ny intern parser i recipe-api (NestJS) +- Ersätt `parseViaImporter(...)` i `FlyerImportService` med lokal pipeline: + - `extractFlyerText(file)` + - PDF/text-extraktion via `pdf-parse`. + - Fallback OCR via Tesseract för sidor/underlag utan användbar text. + - `parseFlyerWithMistral(text)` + - Mistral Tiny-anrop med strikt JSON-schema-prompt. + - `normalizeFlyerItems(aiJson)` + - validering, typkonvertering, enhetsnormalisering, confidence/reasonCodes. +- Behåll resten av tjänsten intakt (matchning, sessionpersistens, selections-kompatibilitet). + +### 2) AI-kontrakt (strikt JSON) +- Introducera explicit schema för AI-svar (intern typ + runtime-validering): + - `rawName`, `normalizedName`, `category`, `price`, `priceUnit`, `comparisonPrice`, `comparisonUnit`, `offerText`, `confidence`, `reasonCodes`. +- Promptdesign: + - svensk flyer-kontext, + - tydlig enhets- och prisnormalisering, + - "returnera ENDAST JSON" + exempel, + - fallback vid saknade fält (`null`, tomma listor). +- Robust parsing av modelloutput: + - ta bort ev. markdown fences, + - fail-fast med tydligt felmeddelande om ogiltigt JSON. + +### 3) OCR-strategi +- Primärväg: `pdf-parse` (snabb, billig). +- OCR-fallback: bara när extraherad text är tom/under tröskel. +- Preprocess för OCR (vid behov): sidvis rasterisering + språk `swe` (ev. `swe+eng`). +- Timeout/guardrails per steg för att undvika låsta importer. + +### 4) API/infra-anpassning +- Controller (`flyer-import.controller.ts`): + - uppdatera tillåtna MIME-typer så de matchar Flutter-filtyper (PDF + bilder om vi ska stödja bildflyers). +- `compose.yml`/env: + - gör `IMPORTER_SERVICE_URL` optional eller avveckla för flyerflödet. + - säkerställ `MISTRAL_API_KEY` används av `recipe-api` för flyer. +- Dokumentation: + - uppdatera teknisk beskrivning så flyerimport inte längre kräver extern flyer-parser. + +## UX-analys Flutter (nuvarande) och planerade förbättringar + +### Nuvarande UX (bra att bygga vidare på) +- Enkel 3-stegsinteraktion: välj fil -> importera -> markera/planera. +- Förhandsvisning finns och passar arbetsflödet. +- Offer-badge + pris/jämförpris + matchvisning ger snabb scanning. + +### UX-gap att täppa till i denna implementation +- Ingen tydlig visning av parserwarnings från backend (fältet `warnings` finns i modellen). +- Ingen kvalitetssignal i UI trots att `parseConfidence/matchConfidence` finns. +- Felmeddelanden är relativt råa; saknar råd per feltyp (timeout, ogiltig fil, AI-svar oformaterat). + +### Föreslagna UX-förbättringar (inkrementella, kompatibla) +1. Visa `warnings` över resultatlistan i en kompakt varningspanel. +2. Lägg till "kvalitetsindikator" per rad (t.ex. låg/medel/hög) baserat på `parseConfidence` + `matchConfidence`. +3. Lägg till filterchips: `Endast erbjudanden`, `Saknar matchning`, `Låg kvalitet`. +4. Förbättra loading-state med stegnära text ("Extraherar text", "Tolkar med AI", "Matchar produkter"). +5. Felmappning till användarvänliga meddelanden i `showErrorDialog` (teknisk detalj i kopierbar sekundärtext). + +## Implementationsplan (ordning) + +### Fas A - Backend kärna +1. Lägg till dependencies i `backend/package.json` för PDF/OCR/Mistral-klient. +2. Skapa intern flyer-parser service i `backend/src/flyer-import/` (text extraction + AI parse). +3. Byt `parseViaImporter` till intern implementation i `FlyerImportService`. +4. Lägg till runtime-validering och normalisering av AI-svar. + +### Fas B - Kontrakt och robusthet +5. Säkerställ att response-format fortsatt matchar `FlyerImportResponse` (ingen breaking change mot Flutter). +6. Förbättra controller MIME-regler så de stämmer med faktiska stödda format. +7. Lägg till tydliga felkoder/meddelanden för: + - tom/oläsbar flyer, + - AI-parsefel, + - timeout/service unavailable. + +### Fas C - Flutter UX på befintlig skärm +8. Visa backend `warnings` i `flyer_import_tab.dart`. +9. Lägg till kvalitetsindikator + minimala filterchips. +10. Förfina loading/feltexter utan att ändra grundlayouten. + +### Fas D - Verifiering +11. Backendtester för intern flyer-parser (happy path + fallback + felbanor). +12. Uppdatera/addera Flutter widgettester för warnings/indikator/filter. +13. Manuell E2E: PDF med text, PDF med skannade sidor, bildflyer, trasig fil. + +## Filer som sannolikt berörs vid implementation +- `backend/src/flyer-import/flyer-import.service.ts` +- `backend/src/flyer-import/flyer-import.controller.ts` +- `backend/src/flyer-import/dto/flyer-import.response.ts` (endast om extra metadata behövs) +- `backend/package.json` +- `flutter/lib/features/import/presentation/flyer_import_tab.dart` +- Ev. `flutter/lib/features/import/domain/flyer_import_item.dart` (om ny UI-metadata exponeras) +- Dokumentation: `TEKNISK_BESKRIVNING.md` (kort uppdatering av arkitektur) + +## Risker och mitigering +- OCR-prestanda/latens: använd fallback-only och timeout. +- Mistral kan ge semistrukturerat svar: strikt schema + robust JSON-sanitizing + validering. +- Kostnad/kvot på AI-anrop: minimera promptstorlek, trunkera brus, återanvänd normalisering. +- Driftöverraskningar: behåll endpoint-kontrakt oförändrat mot Flutter. + +## Acceptance criteria +- Flyerimport fungerar utan beroende av extern `/api/flyer/parse` i importer-api. +- Minst en PDF med inbäddad text och en skannad PDF importeras framgångsrikt. +- Backend returnerar valid `FlyerImportResponse` och befintlig planeringsfunktion fortsätter fungera. +- Flutter visar warnings och gör det tydligare vilka rader som behöver manuell granskning. + +## Fastställt beslut +- Första leveransen ska stödja **PDF + bildfiler** (`png/jpg/webp`) fullt ut. diff --git a/backend/package-lock.json b/backend/package-lock.json index 344e6b7b..c5f0039e 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -8,6 +8,7 @@ "name": "recipe-api", "version": "0.0.1", "dependencies": { + "@mistralai/mistralai": "^0.5.0", "@nestjs/common": "^11.1.19", "@nestjs/core": "^11.1.19", "@nestjs/jwt": "^11.0.2", @@ -22,10 +23,12 @@ "multer": "^2.1.1", "passport": "^0.7.0", "passport-jwt": "^4.0.1", + "pdf-parse": "^1.1.1", "prisma": "6.12.0", "reflect-metadata": "^0.2.2", "rxjs": "^7.8.1", "sharp": "^0.33.5", + "tesseract.js": "^5.1.1", "uuid": "^11.1.0" }, "devDependencies": { @@ -38,6 +41,7 @@ "@types/multer": "^1.4.12", "@types/node": "^22.15.29", "@types/passport-jwt": "^4.0.1", + "@types/pdf-parse": "^1.1.5", "@types/supertest": "^7.2.0", "@types/uuid": "^10.0.0", "@typescript-eslint/eslint-plugin": "^8.46.2", @@ -2145,6 +2149,15 @@ "node": ">=8" } }, + "node_modules/@mistralai/mistralai": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-0.5.0.tgz", + "integrity": "sha512-56xfoC/0CiT0RFHrRNoJYSKCNc922EyHzEPJYY6ttalQ5KZdrNVgXeOetIGX0lDx7IjbxAJrrae2MQgUIlL9+g==", + "license": "ISC", + "dependencies": { + "node-fetch": "^2.6.7" + } + }, "node_modules/@nestjs/cli": { "version": "11.0.21", "resolved": "https://registry.npmjs.org/@nestjs/cli/-/cli-11.0.21.tgz", @@ -2810,6 +2823,16 @@ "@types/passport": "*" } }, + "node_modules/@types/pdf-parse": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@types/pdf-parse/-/pdf-parse-1.1.5.tgz", + "integrity": "sha512-kBfrSXsloMnUJOKi25s3+hRmkycHfLK6A09eRGqF/N8BkQoPUmaCr+q8Cli5FnfohEz/rsv82zAiPz/LXtOGhA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/qs": { "version": "6.15.1", "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.15.1.tgz", @@ -3795,6 +3818,12 @@ "readable-stream": "^3.4.0" } }, + "node_modules/bmp-js": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz", + "integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==", + "license": "MIT" + }, "node_modules/body-parser": { "version": "2.2.2", "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz", @@ -5890,6 +5919,12 @@ "url": "https://opencollective.com/express" } }, + "node_modules/idb-keyval": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.2.tgz", + "integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==", + "license": "Apache-2.0" + }, "node_modules/ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -6017,6 +6052,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-electron": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/is-electron/-/is-electron-2.2.2.tgz", + "integrity": "sha512-FO/Rhvz5tuw4MCWkpMzHFKWD2LsfHzIb7i6MdPYZ/KW7AlxawyLkqdy+jPZP1WubqEADE3O4FUENlJHDfQASRg==", + "license": "MIT" + }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -6112,6 +6153,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/is-url": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz", + "integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==", + "license": "MIT" + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -7573,6 +7620,32 @@ "lodash": "^4.17.21" } }, + "node_modules/node-ensure": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/node-ensure/-/node-ensure-0.0.0.tgz", + "integrity": "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw==", + "license": "MIT" + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/node-int64": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", @@ -7668,6 +7741,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/opencollective-postinstall": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz", + "integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==", + "license": "MIT", + "bin": { + "opencollective-postinstall": "index.js" + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -7921,6 +8003,22 @@ "resolved": "https://registry.npmjs.org/pause/-/pause-0.0.1.tgz", "integrity": "sha512-KG8UEiEVkR3wGEb4m5yZkVCzigAD+cVEJck2CzYZO37ZGJfctvVptVO192MwrtPhzONn6go8ylnOdMhKqi4nfg==" }, + "node_modules/pdf-parse": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-1.1.4.tgz", + "integrity": "sha512-XRIRcLgk6ZnUbsHsYXExMw+krrPE81hJ6FQPLdBNhhBefqIQKXu/WeTgNBGSwPrfU0v+UCEwn7AoAUOsVKHFvQ==", + "license": "MIT", + "dependencies": { + "node-ensure": "^0.0.0" + }, + "engines": { + "node": ">=6.8.1" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/mehmet-kozan" + } + }, "node_modules/picocolors": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", @@ -8171,6 +8269,12 @@ "integrity": "sha512-urBwgfrvVP/eAyXx4hluJivBKzuEbSQs9rKWCrCkbSxNv8mxPcUZKeuoF3Uy4mJl3Lwprp6yy5/39VWigZ4K6Q==", "license": "Apache-2.0" }, + "node_modules/regenerator-runtime": { + "version": "0.13.11", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz", + "integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==", + "license": "MIT" + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -9050,6 +9154,31 @@ "dev": true, "license": "MIT" }, + "node_modules/tesseract.js": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-5.1.1.tgz", + "integrity": "sha512-lzVl/Ar3P3zhpUT31NjqeCo1f+D5+YfpZ5J62eo2S14QNVOmHBTtbchHm/YAbOOOzCegFnKf4B3Qih9LuldcYQ==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "bmp-js": "^0.1.0", + "idb-keyval": "^6.2.0", + "is-electron": "^2.2.2", + "is-url": "^1.2.4", + "node-fetch": "^2.6.9", + "opencollective-postinstall": "^2.0.3", + "regenerator-runtime": "^0.13.3", + "tesseract.js-core": "^5.1.1", + "wasm-feature-detect": "^1.2.11", + "zlibjs": "^0.3.1" + } + }, + "node_modules/tesseract.js-core": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-5.1.1.tgz", + "integrity": "sha512-KX3bYSU5iGcO1XJa+QGPbi+Zjo2qq6eBhNjSGR5E5q0JtzkoipJKOUQD7ph8kFyteCEfEQ0maWLu8MCXtvX5uQ==", + "license": "Apache-2.0" + }, "node_modules/test-exclude": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz", @@ -9151,6 +9280,12 @@ "url": "https://github.com/sponsors/Borewit" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/ts-api-utils": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz", @@ -9537,6 +9672,12 @@ "makeerror": "1.0.12" } }, + "node_modules/wasm-feature-detect": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz", + "integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==", + "license": "Apache-2.0" + }, "node_modules/watchpack": { "version": "2.5.1", "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.5.1.tgz", @@ -9561,6 +9702,12 @@ "defaults": "^1.0.3" } }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, "node_modules/webpack": { "version": "5.106.0", "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.106.0.tgz", @@ -9668,6 +9815,16 @@ "url": "https://opencollective.com/webpack" } }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", @@ -9814,6 +9971,15 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zlibjs": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz", + "integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==", + "license": "MIT", + "engines": { + "node": "*" + } } } } diff --git a/backend/package.json b/backend/package.json index 01a29b2a..ff05bc7a 100644 --- a/backend/package.json +++ b/backend/package.json @@ -18,6 +18,7 @@ "test:watch": "jest --watch" }, "dependencies": { + "@mistralai/mistralai": "^0.5.0", "@nestjs/common": "^11.1.19", "@nestjs/core": "^11.1.19", "@nestjs/jwt": "^11.0.2", @@ -32,10 +33,12 @@ "multer": "^2.1.1", "passport": "^0.7.0", "passport-jwt": "^4.0.1", + "pdf-parse": "^1.1.1", "prisma": "6.12.0", "reflect-metadata": "^0.2.2", "rxjs": "^7.8.1", "sharp": "^0.33.5", + "tesseract.js": "^5.1.1", "uuid": "^11.1.0" }, "devDependencies": { @@ -48,6 +51,7 @@ "@types/multer": "^1.4.12", "@types/node": "^22.15.29", "@types/passport-jwt": "^4.0.1", + "@types/pdf-parse": "^1.1.5", "@types/supertest": "^7.2.0", "@types/uuid": "^10.0.0", "@typescript-eslint/eslint-plugin": "^8.46.2", @@ -67,6 +71,9 @@ "js", "json", "ts" + ], + "transformIgnorePatterns": [ + "node_modules/(@mistralai)" ] } } diff --git a/backend/src/app.security.spec.ts b/backend/src/app.security.spec.ts index 582ed41c..48eb0cb7 100644 --- a/backend/src/app.security.spec.ts +++ b/backend/src/app.security.spec.ts @@ -4,7 +4,7 @@ import { ThrottlerGuard, ThrottlerModule } from '@nestjs/throttler'; import { JwtAuthGuard } from './auth/jwt-auth.guard'; import { RolesGuard } from './auth/roles.guard'; -describe('App security configuration', () => { +describe('App security configuration', () => { function getAppModuleClass() { process.env.JWT_SECRET = process.env.JWT_SECRET ?? 'test-secret'; // eslint-disable-next-line @typescript-eslint/no-var-requires diff --git a/backend/src/flyer-import/flyer-import.controller.ts b/backend/src/flyer-import/flyer-import.controller.ts index 114acd6b..ede1fb4d 100644 --- a/backend/src/flyer-import/flyer-import.controller.ts +++ b/backend/src/flyer-import/flyer-import.controller.ts @@ -18,6 +18,9 @@ const ALLOWED_MIMES = [ 'application/pdf', 'application/octet-stream', 'text/plain', + 'image/png', + 'image/jpeg', + 'image/webp', ]; @Controller('flyer-import') @@ -41,7 +44,7 @@ export class FlyerImportController { throw new BadRequestException('Ingen fil skickades med.'); } if (!ALLOWED_MIMES.includes(file.mimetype)) { - throw new BadRequestException('Otillåten filtyp. Använd PDF eller textfil.'); + throw new BadRequestException('Otillåten filtyp. Använd PDF, textfil eller bild (PNG, JPEG, WebP).'); } const userId = diff --git a/backend/src/flyer-import/flyer-import.module.ts b/backend/src/flyer-import/flyer-import.module.ts index e209cb66..0a6966c3 100644 --- a/backend/src/flyer-import/flyer-import.module.ts +++ b/backend/src/flyer-import/flyer-import.module.ts @@ -2,10 +2,18 @@ import { Module } from '@nestjs/common'; import { PrismaModule } from '../prisma/prisma.module'; import { FlyerImportController } from './flyer-import.controller'; import { FlyerImportService } from './flyer-import.service'; +import { TextExtractorService } from './services/text-extractor.service'; +import { AiFlyerParserService } from './services/ai-flyer-parser.service'; +import { FlyerNormalizerService } from './services/flyer-normalizer.service'; @Module({ imports: [PrismaModule], controllers: [FlyerImportController], - providers: [FlyerImportService], + providers: [ + FlyerImportService, + TextExtractorService, + AiFlyerParserService, + FlyerNormalizerService, + ], }) export class FlyerImportModule {} diff --git a/backend/src/flyer-import/flyer-import.service.ts b/backend/src/flyer-import/flyer-import.service.ts index 0e99e1bf..bbec079d 100644 --- a/backend/src/flyer-import/flyer-import.service.ts +++ b/backend/src/flyer-import/flyer-import.service.ts @@ -1,8 +1,8 @@ -import { - BadRequestException, - Injectable, - Logger, - ServiceUnavailableException, +import { + BadRequestException, + Injectable, + Logger, + ServiceUnavailableException, } from '@nestjs/common'; import { Prisma } from '@prisma/client'; import { PrismaService } from '../prisma/prisma.service'; @@ -12,8 +12,9 @@ import { FlyerImportMatchVia, FlyerImportResponse, } from './dto/flyer-import.response'; - -const IMPORTER_SERVICE_URL = process.env.IMPORTER_SERVICE_URL || 'http://importer-api:3001'; +import { TextExtractorService } from './services/text-extractor.service'; +import { AiFlyerParserService } from './services/ai-flyer-parser.service'; +import { FlyerNormalizerService } from './services/flyer-normalizer.service'; type FlyerParseItem = { rawName: string; @@ -53,10 +54,15 @@ type ProductLite = { export class FlyerImportService { private readonly logger = new Logger(FlyerImportService.name); - constructor(private readonly prisma: PrismaService) {} + constructor( + private readonly prisma: PrismaService, + private readonly textExtractor: TextExtractorService, + private readonly aiParser: AiFlyerParserService, + private readonly normalizer: FlyerNormalizerService, + ) {} async parseAndMatch(file: Express.Multer.File, userId: number): Promise { - const parsed = await this.parseViaImporter(file); + const parsed = await this.parseViaInternal(file); const [products, aliases] = await Promise.all([ this.prisma.product.findMany({ @@ -371,43 +377,59 @@ export class FlyerImportService { return allowed.has(cleaned) ? cleaned : cleaned; } - private async parseViaImporter(file: Express.Multer.File): Promise { - const form = new FormData(); - form.append( - 'file', - new Blob([new Uint8Array(file.buffer)], { type: file.mimetype }), - file.originalname, - ); - form.append('retailer', 'willys'); - - let response: Response; + private async parseViaInternal(file: Express.Multer.File): Promise { try { - response = await fetch(`${IMPORTER_SERVICE_URL}/api/flyer/parse`, { - method: 'POST', - body: form, - }); - } catch (err) { - this.logger.error(`Kunde inte nå importer-api för flyer-parse: ${String(err)}`); - throw new ServiceUnavailableException('Importer-tjänsten är inte tillgänglig just nu.'); - } + this.logger.debug(`Parsing flyer file: ${file.originalname}`); - if (!response.ok) { - let message = `Importer-tjänsten svarade ${response.status}`; - try { - const body = (await response.json()) as { message?: string }; - if (typeof body.message === 'string' && body.message.trim()) { - message = body.message; - } - } catch { - // ignore parse issues + // 1. Extrahera text från PDF/bild + const text = await this.textExtractor.extractText( + file.buffer, + file.mimetype, + file.originalname, + ); + + // 2. Skicka till Mistral Tiny + const aiItems = await this.aiParser.parseWithAI(text); + + // 3. Normalisera resultatet + const normalizedItems = this.normalizer.normalize(aiItems); + + // 4. Konvertera till intern FlyerParseItem-format + const items: FlyerParseItem[] = normalizedItems.map((item) => ({ + rawName: item.rawName, + normalizedName: item.normalizedName, + category: item.categoryHint, + price: item.price, + priceUnit: item.priceUnit, + comparisonPrice: item.comparisonPrice, + comparisonUnit: item.comparisonUnit, + offerText: item.offerText, + confidence: item.parseConfidence, + reasonCodes: item.parseReasons, + })); + + const warnings: string[] = []; + if (items.length === 0) { + warnings.push('Inga produkter kunde extraheras från flyern.'); } - if (response.status >= 400 && response.status < 500) { - throw new BadRequestException(message); - } - throw new ServiceUnavailableException(message); + return { + retailer: 'willys', + parserVersion: 'v1', + items, + warnings, + }; + } catch (err) { + if (err instanceof BadRequestException) { + throw err; + } + if (err instanceof ServiceUnavailableException) { + throw err; + } + this.logger.error(`Internal flyer parse failed: ${String(err)}`); + throw new BadRequestException( + `Fel vid tolkning av flyer: ${err instanceof Error ? err.message : String(err)}`, + ); } - - return response.json() as Promise; } } diff --git a/backend/src/flyer-import/services/ai-flyer-parser.service.ts b/backend/src/flyer-import/services/ai-flyer-parser.service.ts new file mode 100644 index 00000000..39bd5aea --- /dev/null +++ b/backend/src/flyer-import/services/ai-flyer-parser.service.ts @@ -0,0 +1,234 @@ +import { + BadRequestException, + Injectable, + Logger, + ServiceUnavailableException, +} from '@nestjs/common'; + +export interface AiFlyerParseResult { + rawName: string; + normalizedName: string; + category: string | null; + price: number | null; + priceUnit: string | null; + comparisonPrice: number | null; + comparisonUnit: string | null; + offerText: string | null; + confidence: number; + reasonCodes: string[]; +} + +@Injectable() +export class AiFlyerParserService { + private readonly logger = new Logger(AiFlyerParserService.name); + private readonly timeoutMs = 15_000; + private mistral: any; + private apiKey: string; + + constructor() { + this.apiKey = process.env.MISTRAL_API_KEY ?? ''; + if (!this.apiKey) { + throw new Error('MISTRAL_API_KEY environment variable not set'); + } + } + + private async getClient(): Promise { + if (this.mistral) return this.mistral; + const mistralModule = await import('@mistralai/mistralai'); + this.mistral = new mistralModule.default(this.apiKey); + return this.mistral; + } + + /** + * Skickar flyer-text till Mistral Tiny för strukturerad extraktion. + * + * @param text Text från flyern (från pdf-parse eller OCR) + * @returns Array av parsade produkter + */ + async parseWithAI(text: string): Promise { + if (!text || text.trim().length === 0) { + throw new BadRequestException('Flyer-texten är tom. Kan inte fortsätta.'); + } + + const prompt = this.buildPrompt(text); + + try { + this.logger.debug('Sending request to Mistral Tiny'); + + const client = await this.getClient(); + const response = await this.withTimeout( + client.chat({ + model: 'mistral-tiny', + messages: [{ role: 'user', content: prompt }], + temperature: 0.1, + }), + this.timeoutMs, + 'Mistral-anrop timeout', + ); + + const content = response.choices?.[0]?.message?.content; + if (!content) { + throw new BadRequestException('Tomt svar från AI-modellen.'); + } + + this.logger.debug(`Mistral response length: ${content.length} chars`); + + // Rensa och parse JSON + const jsonString = this.sanitizeJsonResponse(content); + const items = JSON.parse(jsonString) as Array>; + + if (!Array.isArray(items)) { + throw new BadRequestException('AI returnerade inte en JSON-array.'); + } + + return items.map((item, idx) => this.normalizeAiItem(item, idx)); + } catch (err) { + if (err instanceof SyntaxError) { + this.logger.error(`JSON parse error: ${String(err)}`); + throw new BadRequestException('AI returnerade ogiltigt JSON. Försök igen.'); + } + if (err instanceof BadRequestException) { + throw err; + } + if (err instanceof ServiceUnavailableException) { + throw err; + } + this.logger.error(`AI parsing failed: ${String(err)}`); + throw new ServiceUnavailableException('AI-tjänsten är inte tillgänglig just nu.'); + } + } + + private async withTimeout( + promise: Promise, + timeoutMs: number, + timeoutMessage: string, + ): Promise { + let timeoutHandle: ReturnType | null = null; + + const timeoutPromise = new Promise((_, reject) => { + timeoutHandle = setTimeout(() => { + reject(new ServiceUnavailableException(timeoutMessage)); + }, timeoutMs); + }); + + try { + return await Promise.race([promise, timeoutPromise]); + } finally { + if (timeoutHandle) clearTimeout(timeoutHandle); + } + } + + /** + * Bygger systemprompten för Mistral. + */ + private buildPrompt(text: string): string { + // Trunkera långt text för att spara tokens + const truncatedText = text.length > 5000 ? text.substring(0, 5000) : text; + + return `Du är en expert på att tolka svenska matvaruflyers (t.ex. från Willys, Coop, ICA). + +Extrahera ALL produktinformation från följande text och returnera den som en JSON-array. + +För varje produkt, inkludera: +- name: Produktnamn (fullständigt namn) +- weight: Vikt (om tillgänglig, t.ex. "150g", "Ca 1kg") eller null +- origin: Ursprung/land/märke (om tillgänglig, t.ex. "FALKENBERG") eller null +- price: Pris som nummer (t.ex. 39.90) eller null +- comparisonPrice: Jämförpris som nummer (t.ex. 266.00) eller null +- unit: Enhet (kg, st, förp, l, etc.) eller null +- offer: Erbjudande som array (t.ex. ["Max 3 köp/hushåll"]) eller [] +- category: Kategori (t.ex. "Fisk", "Kött", "Mejeri", "Grönsaker", "Frukt", "Dryck") eller null +- validFrom: Giltig från (datum i formatet YYYY-MM-DD) eller null +- validTo: Giltig till (datum i formatet YYYY-MM-DD) eller null + +Texten att tolka: +${truncatedText} + +Returnera ENDAST en JSON-array. Inga andra kommentarer, ingen markdown-markup. +Exempel på utdata: +[ + { + "name": "KALLRÖKT LAX, GRAVAD LAX", + "weight": "150g", + "origin": "FALKENBERG", + "price": 39.90, + "comparisonPrice": 266.00, + "unit": "kg", + "offer": ["Max 3 köp/hushåll"], + "category": "Fisk", + "validFrom": "2026-05-18", + "validTo": "2026-05-24" + } +]`; + } + + /** + * Rensa AI-svaret för att kunna parse som JSON. + */ + private sanitizeJsonResponse(content: string): string { + // Ta bort markdown fences + let cleaned = content.replace(/```json\n?/g, '').replace(/```\n?/g, ''); + cleaned = cleaned.trim(); + + // Försök att extrahera JSON om det finns omgivande text + const jsonMatch = cleaned.match(/\[[\s\S]*\]/); + if (jsonMatch) { + cleaned = jsonMatch[0]; + } + + return cleaned; + } + + /** + * Normaliserar och typkonverterar AI-item till vårt format. + */ + private normalizeAiItem(item: Record, index: number): AiFlyerParseResult { + const toNumber = (val: unknown): number | null => { + if (typeof val === 'number') return val; + if (typeof val === 'string') { + const parsed = parseFloat(val.replace(',', '.')); + return isFinite(parsed) ? parsed : null; + } + return null; + }; + + const toString = (val: unknown): string | null => { + if (typeof val === 'string') return val.trim() || null; + return null; + }; + + const toArray = (val: unknown): string[] => { + if (Array.isArray(val)) { + return val.map(v => String(v)).filter(v => v.trim()); + } + return []; + }; + + const rawName = toString(item.name) || `Produkt ${index + 1}`; + const normalizedName = this.normalizeName(rawName); + + return { + rawName, + normalizedName, + category: toString(item.category), + price: toNumber(item.price), + priceUnit: toString(item.unit), + comparisonPrice: toNumber(item.comparisonPrice), + comparisonUnit: toString(item.comparisonUnit), + offerText: toString(item.offer) || (toArray(item.offer).join(' ') || null), + confidence: 0.85, // AI-parse får medelhög confidence + reasonCodes: ['ai_parsed'], + }; + } + + /** + * Enkel normalisering av produktnamn. + */ + private normalizeName(name: string): string { + return name + .toLowerCase() + .replace(/[^a-zåäö0-9\s]/g, '') + .replace(/\s+/g, ' ') + .trim(); + } +} diff --git a/backend/src/flyer-import/services/flyer-normalizer.service.spec.ts b/backend/src/flyer-import/services/flyer-normalizer.service.spec.ts new file mode 100644 index 00000000..11cf0b7d --- /dev/null +++ b/backend/src/flyer-import/services/flyer-normalizer.service.spec.ts @@ -0,0 +1,109 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { FlyerNormalizerService } from './flyer-normalizer.service'; + +describe('FlyerNormalizerService', () => { + let service: FlyerNormalizerService; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + providers: [FlyerNormalizerService], + }).compile(); + + service = module.get(FlyerNormalizerService); + }); + + it('should be defined', () => { + expect(service).toBeDefined(); + }); + + describe('normalize', () => { + it('should normalize a valid item', () => { + const items = [ + { + rawName: 'KALLRÖKT LAX, GRAVAD LAX', + normalizedName: 'kallrökt lax gravad lax', + category: 'Fisk', + price: 39.9, + comparisonPrice: 266.0, + unit: 'kg', + offer: ['Max 3 köp/hushåll'], + confidence: 0.85, + reasonCodes: ['ai_parsed'], + }, + ]; + + const result = service.normalize(items); + + expect(result).toHaveLength(1); + expect(result[0].rawName).toBe('KALLRÖKT LAX, GRAVAD LAX'); + expect(result[0].price).toBe(39.9); + expect(result[0].priceUnit).toBe('kg'); + expect(result[0].categoryHint).toBe('Fisk'); + }); + + it('should handle missing fields gracefully', () => { + const items = [ + { + name: 'PRODUKT', + // andra fält saknas + }, + ]; + + const result = service.normalize(items); + + expect(result).toHaveLength(1); + expect(result[0].rawName).toBe('PRODUKT'); + expect(result[0].price).toBeNull(); + expect(result[0].categoryHint).toBeNull(); + }); + + it('should skip items without name', () => { + const items = [ + { price: 100 }, // no name + { rawName: 'VALID PRODUCT', price: 50 }, + ]; + + const result = service.normalize(items); + + expect(result).toHaveLength(1); + expect(result[0].rawName).toBe('VALID PRODUCT'); + }); + + it('should normalize units correctly', () => { + const items = [ + { rawName: 'Mjölk', unit: 'L' }, + { rawName: 'Smör', unit: 'styck' }, + { rawName: 'Socker', unit: 'KG' }, + ]; + + const result = service.normalize(items); + + expect(result).toHaveLength(3); + expect(result[0].priceUnit).toBe('l'); + expect(result[1].priceUnit).toBe('st'); + expect(result[2].priceUnit).toBe('kg'); + }); + + it('should parse Swedish prices correctly', () => { + const items = [ + { rawName: 'Produkt1', price: '39,90' }, + { rawName: 'Produkt2', price: 39.9 }, + { rawName: 'Produkt3', price: '100' }, + ]; + + const result = service.normalize(items); + + expect(result[0].price).toBe(39.9); + expect(result[1].price).toBe(39.9); + expect(result[2].price).toBe(100); + }); + + it('should return empty list for non-array input', () => { + const result = service.normalize(null as any); + expect(result).toEqual([]); + + const result2 = service.normalize(undefined as any); + expect(result2).toEqual([]); + }); + }); +}); diff --git a/backend/src/flyer-import/services/flyer-normalizer.service.ts b/backend/src/flyer-import/services/flyer-normalizer.service.ts new file mode 100644 index 00000000..f66e22dd --- /dev/null +++ b/backend/src/flyer-import/services/flyer-normalizer.service.ts @@ -0,0 +1,158 @@ +import { Injectable, Logger } from '@nestjs/common'; + +export interface NormalizedFlyerItem { + rawName: string; + normalizedName: string; + categoryHint: string | null; + price: number | null; + priceUnit: string | null; + comparisonPrice: number | null; + comparisonUnit: string | null; + offerText: string | null; + parseConfidence: number; + parseReasons: string[]; +} + +@Injectable() +export class FlyerNormalizerService { + private readonly logger = new Logger(FlyerNormalizerService.name); + + private readonly UNIT_MAPPING: Record = { + // Längd + mm: 'mm', + cm: 'cm', + m: 'm', + // Vikt + mg: 'mg', + g: 'g', + hg: 'hg', + kg: 'kg', + ton: 'ton', + // Volym + ml: 'ml', + cl: 'cl', + dl: 'dl', + l: 'l', + // Övrigt + st: 'st', + styck: 'st', + stycke: 'st', + pkt: 'pkt', + paket: 'pkt', + fp: 'pkt', + förp: 'pkt', + förpackning: 'pkt', + }; + + /** + * Normaliserar en AI-parsad produktlista. + */ + normalize(items: any[]): NormalizedFlyerItem[] { + if (!Array.isArray(items)) { + this.logger.warn('normalize() received non-array, returning empty list'); + return []; + } + + return items + .map((item, idx) => this.normalizeItem(item, idx)) + .filter((item): item is NormalizedFlyerItem => item !== null); + } + + private normalizeItem(item: any, index: number): NormalizedFlyerItem | null { + if (!item || typeof item !== 'object') { + this.logger.warn(`Item ${index} is not an object, skipping`); + return null; + } + + const rawName = this.extractString(item.rawName) || this.extractString(item.name); + if (!rawName) { + this.logger.warn(`Item ${index} has no name, skipping`); + return null; + } + + const normalizedName = this.extractString(item.normalizedName) || this.normalizeName(rawName); + + return { + rawName, + normalizedName, + categoryHint: this.normalizeCategory(this.extractString(item.category)), + price: this.extractPrice(item.price), + priceUnit: this.normalizeUnit(this.extractString(item.unit)), + comparisonPrice: this.extractPrice(item.comparisonPrice), + comparisonUnit: this.normalizeUnit(this.extractString(item.comparisonUnit)), + offerText: this.normalizeOfferText(item.offer), + parseConfidence: item.confidence ?? 0.85, + parseReasons: Array.isArray(item.reasonCodes) + ? item.reasonCodes.map(String) + : ['normalized'], + }; + } + + private extractString(val: any): string | null { + if (typeof val === 'string') return val.trim() || null; + return null; + } + + private extractPrice(val: any): number | null { + if (typeof val === 'number') return val; + if (typeof val === 'string') { + const num = parseFloat(val.replace(/,/g, '.')); + return isFinite(num) ? num : null; + } + return null; + } + + private normalizeName(name: string): string { + return name + .toLowerCase() + .replace(/[^a-zåäö0-9\s]/g, '') + .replace(/\s+/g, ' ') + .trim(); + } + + private normalizeUnit(unit: string | null): string | null { + if (!unit) return null; + + const cleaned = unit.trim().toLowerCase().replace(/\./g, ''); + return this.UNIT_MAPPING[cleaned] ?? null; + } + + private normalizeCategory(category: string | null): string | null { + if (!category) return null; + + const normalized = category.trim().toLowerCase(); + + // Mappning av tänkta kategorivärdena från AI + const categoryMap: Record = { + fisk: 'Fisk', + kött: 'Kött', + mejeri: 'Mejeri', + grönsaker: 'Grönsaker', + frukt: 'Frukt', + dryck: 'Dryck', + frukt_grönsaker: 'Frukt & Grönsaker', + fastfood: 'Fastfood', + bröd: 'Bröd', + fryst: 'Fryst', + godis: 'Godis', + pasta: 'Pasta', + }; + + return categoryMap[normalized] ?? null; + } + + private normalizeOfferText(offer: any): string | null { + if (!offer) return null; + + if (typeof offer === 'string') { + return offer.trim() || null; + } + + if (Array.isArray(offer)) { + const joined = offer.map(String).filter(s => s.trim()).join(' '); + return joined || null; + } + + return null; + } +} diff --git a/backend/src/flyer-import/services/text-extractor.service.ts b/backend/src/flyer-import/services/text-extractor.service.ts new file mode 100644 index 00000000..f5fc3d8e --- /dev/null +++ b/backend/src/flyer-import/services/text-extractor.service.ts @@ -0,0 +1,100 @@ +import { Injectable, Logger } from '@nestjs/common'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import * as pdf from 'pdf-parse'; +import Tesseract from 'tesseract.js'; + +@Injectable() +export class TextExtractorService { + private readonly logger = new Logger(TextExtractorService.name); + + /** + * Extraherar text från en PDF-buffer. + * Försöker med pdf-parse först; om det inte ger resultat, fallback till OCR. + * + * @param buffer PDF-fil som buffer + * @returns Extraherad text + */ + async extractText( + buffer: Buffer, + mimeType?: string, + originalFilename?: string, + ): Promise { + // Försök primär PDF-extract + try { + this.logger.debug('Attempting pdf-parse extraction'); + const pdfData = await pdf(buffer); + + const text = pdfData.text?.trim() || ''; + const wordCount = text.split(/\s+/).filter(w => w.length > 0).length; + + this.logger.debug(`pdf-parse extracted ${wordCount} words`); + + // Om vi fick tillräckligt med text, returnera det + if (wordCount >= 10) { + return text; + } + + this.logger.debug('pdf-parse gave too little text, falling back to OCR'); + } catch (err) { + this.logger.warn(`pdf-parse failed: ${String(err)}`); + } + + // Fallback: OCR med Tesseract + return this.extractTextViaOCR(buffer, mimeType, originalFilename); + } + + /** + * Extraherar text från en PDF eller bild via OCR (Tesseract). + * + * @param buffer Fil-buffer (PDF eller bild) + * @returns Extraherad text + */ + private async extractTextViaOCR( + buffer: Buffer, + mimeType?: string, + originalFilename?: string, + ): Promise { + try { + this.logger.debug('Starting Tesseract OCR extraction'); + + // Tesseract.js kräver en sökväg eller data-URL; vi skriver temporär fil + const ext = this.resolveTempExtension(mimeType, originalFilename); + const tempPath = path.join(os.tmpdir(), `ocr-${Date.now()}${ext}`); + await fs.promises.writeFile(tempPath, buffer); + + try { + const result = await Tesseract.recognize(tempPath, 'swe', { + logger: (m) => this.logger.debug(`Tesseract: ${m.status}`), + }); + + const text = result.data.text || ''; + this.logger.debug(`Tesseract extracted ${text.split(/\s+/).length} words`); + return text; + } finally { + try { + await fs.promises.unlink(tempPath); + } catch { + // ignorera om cleanup misslyckas + } + } + } catch (err) { + this.logger.error(`OCR extraction failed: ${String(err)}`); + throw new Error('Kunde inte extrahera text från flyern (pdf-parse + OCR misslyckades).'); + } + } + + private resolveTempExtension(mimeType?: string, originalFilename?: string): string { + if (mimeType === 'image/png') return '.png'; + if (mimeType === 'image/webp') return '.webp'; + if (mimeType === 'image/jpeg') return '.jpg'; + if (mimeType === 'text/plain') return '.txt'; + if (mimeType === 'application/pdf') return '.pdf'; + + const originalExt = originalFilename ? path.extname(originalFilename).toLowerCase() : ''; + if (originalExt) return originalExt; + + return '.pdf'; + } +} diff --git a/flutter/lib/features/import/presentation/flyer_import_tab.dart b/flutter/lib/features/import/presentation/flyer_import_tab.dart index e2cdb8b5..6444bf75 100644 --- a/flutter/lib/features/import/presentation/flyer_import_tab.dart +++ b/flutter/lib/features/import/presentation/flyer_import_tab.dart @@ -148,6 +148,87 @@ class _FlyerImportTabState extends ConsumerState { ); } + String _getQualityLevel(FlyerImportItem item) { + final parseConf = item.parseConfidence ?? 0; + final matchConf = item.matchConfidence ?? 0; + final avgConf = (parseConf + matchConf) / 2; + + if (avgConf >= 0.80) return 'Hög'; + if (avgConf >= 0.60) return 'Medel'; + return 'Låg'; + } + + Color _getQualityColor(FlyerImportItem item) { + final level = _getQualityLevel(item); + if (level == 'Hög') return Colors.green.shade700; + if (level == 'Medel') return Colors.orange.shade700; + return Colors.red.shade700; + } + + Widget _buildQualityBadge(FlyerImportItem item, ThemeData theme) { + final level = _getQualityLevel(item); + final color = _getQualityColor(item); + + return Container( + padding: const EdgeInsets.symmetric(horizontal: 6, vertical: 2), + decoration: BoxDecoration( + color: color.withValues(alpha: 0.15), + borderRadius: BorderRadius.circular(4), + border: Border.all(color: color.withValues(alpha: 0.4)), + ), + child: Text( + level, + style: theme.textTheme.labelSmall?.copyWith( + color: color, + fontWeight: FontWeight.w600, + ), + ), + ); + } + + Widget _buildWarningsPanel(ThemeData theme) { + final warnings = _result?.warnings ?? const []; + if (warnings.isEmpty) return const SizedBox.shrink(); + + return Container( + width: double.infinity, + padding: const EdgeInsets.all(12), + decoration: BoxDecoration( + color: Colors.amber.shade50, + border: Border.all(color: Colors.amber.shade300), + borderRadius: BorderRadius.circular(8), + ), + child: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Row( + children: [ + Icon(Icons.warning_amber_rounded, color: Colors.amber.shade800, size: 18), + const SizedBox(width: 8), + Text( + 'Varningar (${warnings.length})', + style: theme.textTheme.labelMedium?.copyWith( + color: Colors.amber.shade900, + fontWeight: FontWeight.w600, + ), + ), + ], + ), + const SizedBox(height: 8), + ...warnings.map((warning) => Padding( + padding: const EdgeInsets.only(bottom: 4), + child: Text( + '• $warning', + style: theme.textTheme.bodySmall?.copyWith( + color: Colors.amber.shade900, + ), + ), + )), + ], + ), + ); + } + Widget _buildFlyerPreview(ThemeData theme) { final file = _pickedFile; final bytes = file?.bytes; @@ -177,9 +258,10 @@ class _FlyerImportTabState extends ConsumerState { label: const Text('Visa flyer'), style: OutlinedButton.styleFrom(visualDensity: VisualDensity.compact), onPressed: () async { + final messenger = ScaffoldMessenger.of(context); final opened = await openPdfBytes(bytes); if (!context.mounted || opened) return; - ScaffoldMessenger.of(context).showSnackBar( + messenger.showSnackBar( const SnackBar( content: Text('PDF kan bara öppnas direkt i webbversionen just nu.'), ), @@ -233,31 +315,33 @@ class _FlyerImportTabState extends ConsumerState { label: const Text('Importera flyer'), ), const SizedBox(height: 12), - _buildFlyerPreview(theme), - if (_isLoading) ...[ - const SizedBox(height: 12), - const LinearProgressIndicator(), - ], - if (items.isNotEmpty) ...[ - const SizedBox(height: 20), - Row( - mainAxisAlignment: MainAxisAlignment.spaceBetween, - children: [ - Text('${items.length} rader hittades', style: theme.textTheme.titleSmall), - TextButton( - onPressed: () { - final target = selectedCount < items.length; - setState(() { - for (var i = 0; i < items.length; i++) { - _selected[i] = target; - } - }); - }, - child: Text(selectedCount < items.length ? 'Välj alla' : 'Avmarkera alla'), - ), - ], - ), - const SizedBox(height: 8), + _buildFlyerPreview(theme), + if (_isLoading) ...[ + const SizedBox(height: 12), + const LinearProgressIndicator(), + ], + if (items.isNotEmpty) ...[ + const SizedBox(height: 20), + _buildWarningsPanel(theme), + if ((_result?.warnings ?? const []).isNotEmpty) const SizedBox(height: 12), + Row( + mainAxisAlignment: MainAxisAlignment.spaceBetween, + children: [ + Text('${items.length} rader hittades', style: theme.textTheme.titleSmall), + TextButton( + onPressed: () { + final target = selectedCount < items.length; + setState(() { + for (var i = 0; i < items.length; i++) { + _selected[i] = target; + } + }); + }, + child: Text(selectedCount < items.length ? 'Välj alla' : 'Avmarkera alla'), + ), + ], + ), + const SizedBox(height: 8), ...items.asMap().entries.map((entry) { final index = entry.key; final item = entry.value; @@ -268,34 +352,37 @@ class _FlyerImportTabState extends ConsumerState { ? '' : _removeLimitTextFromOfferText(item.offerText!, limitText); - return CheckboxListTile( - value: _selected[index] ?? false, - onChanged: (value) => setState(() => _selected[index] = value ?? false), - title: Row( - children: [ - Expanded(child: Text(item.rawName)), - _buildOfferBadge(item, theme), - ], - ), - subtitle: Column( - crossAxisAlignment: CrossAxisAlignment.start, - children: [ - if (priceText.isNotEmpty) Text('Pris: $priceText'), - if (comparisonText.isNotEmpty) Text('Jämförpris: $comparisonText'), - if (limitText != null && limitText.isNotEmpty) - Text( - 'Begränsning: $limitText', - style: theme.textTheme.bodyMedium?.copyWith( - color: Colors.orange.shade900, - fontWeight: FontWeight.w600, - ), - ), - if (sanitizedOfferText.isNotEmpty) Text(sanitizedOfferText), - if (item.matchedProductName != null) Text('Match: ${item.matchedProductName}'), - ], - ), - controlAffinity: ListTileControlAffinity.leading, - ); + return CheckboxListTile( + value: _selected[index] ?? false, + onChanged: (value) => setState(() => _selected[index] = value ?? false), + title: Row( + children: [ + Expanded(child: Text(item.rawName)), + const SizedBox(width: 8), + _buildQualityBadge(item, theme), + const SizedBox(width: 8), + _buildOfferBadge(item, theme), + ], + ), + subtitle: Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + if (priceText.isNotEmpty) Text('Pris: $priceText'), + if (comparisonText.isNotEmpty) Text('Jämförpris: $comparisonText'), + if (limitText != null && limitText.isNotEmpty) + Text( + 'Begränsning: $limitText', + style: theme.textTheme.bodyMedium?.copyWith( + color: Colors.orange.shade900, + fontWeight: FontWeight.w600, + ), + ), + if (sanitizedOfferText.isNotEmpty) Text(sanitizedOfferText), + if (item.matchedProductName != null) Text('Match: ${item.matchedProductName}'), + ], + ), + controlAffinity: ListTileControlAffinity.leading, + ); }), const SizedBox(height: 8), SizedBox( diff --git a/flyerimporter.md b/flyerimporter.md new file mode 100644 index 00000000..61047464 --- /dev/null +++ b/flyerimporter.md @@ -0,0 +1,359 @@ +flyerimporter.md +📌 Steg 1: Skapa en funktion för att extrahera text från PDF:en +Använd pdf-parse som primär metod och Tesseract.js som fallback för OCR. +Kod: extractFlyerText.ts +typescript +Copy + +import * as fs from 'fs'; +import * as pdf from 'pdf-parse'; +import Tesseract from 'tesseract.js'; + +/** + * Extraherar text från en PDF-fil (flyer), med fallback till OCR. + * @param pdfPath Sökväg till PDF-filen. + * @returns Extraherad text. + */ +export async function extractFlyerText(pdfPath: string): Promise { + try { + // Försök med pdf-parse först + const dataBuffer = fs.readFileSync(pdfPath); + const data = await pdf(dataBuffer); + if (data.text.trim()) { + return data.text; + } + } catch (error) { + console.warn('pdf-parse misslyckades, försöker med OCR...'); + } + + // Fallback till Tesseract.js för OCR + try { + const { data: { text } } = await Tesseract.recognize(pdfPath, 'swe', { + logger: (m) => console.log(m), + }); + return text; + } catch (error) { + console.error('OCR misslyckades:', error); + throw new Error('Kunde inte extrahera text från PDF:en.'); + } +} + + + + +📌 Steg 2: Skapa en funktion för att skicka texten till Mistral Tiny +Använd Mistral Tiny för att extrahera och strukturera all produktinformation från flyern. +Kod: importFlyerWithAI.ts +typescript +Copy + +import { MistralClient } from '@mistralai/mistralai'; + +const mistral = new MistralClient({ + apiKey: process.env.MISTRAL_API_KEY, +}); + +/** + * Skickar flyer-texten till Mistral Tiny för att extrahera strukturerad data. + * @param text Texten från flyern. + * @returns Strukturerad data (JSON-array). + */ +export async function importFlyerWithAI(text: string): Promise { + const prompt = ` + Du är en expert på att tolka svenska matvaruflyers (t.ex. från Willys). + Extrahera ALL produktinformation från följande text och returnera den som en JSON-array. + + För varje produkt, inkludera: + - name: Produktnamn (fullständigt namn) + - weight: Vikt (om tillgänglig, t.ex. "150g", "Ca 1kg") + - origin: Ursprung/land/märke (om tillgänglig, t.ex. "FALKENBERG", "NYBERGS DELI • Sverige") + - price: Pris (som ett nummer, t.ex. 39.90) + - comparisonPrice: Jämförpris (som ett nummer, t.ex. 266.00) + - unit: Enhet (kg, st, förp, l, etc.) + - offer: Erbjudande (t.ex. ["Max 3 köp/hushåll", "Lägsta 30-dgrspris 125:00 kr"]) + - category: Kategori (t.ex. "Fisk", "Kött", "Mejeri", "Grönsaker", "Frukt", "Dryck") + - validFrom: Giltig från (datum i formatet YYYY-MM-DD, om tillgängligt) + - validTo: Giltig till (datum i formatet YYYY-MM-DD, om tillgängligt) + + Texten att tolka: + ${text} + + Returnera ENDAST en JSON-array. Inga andra kommentarer. + Exempel på utdata: + [ + { + "name": "KALLRÖKT LAX, GRAVAD LAX", + "weight": "150g", + "origin": "FALKENBERG", + "price": 39.90, + "comparisonPrice": 266.00, + "unit": "kg", + "offer": ["Max 3 köp/hushåll"], + "category": "Fisk", + "validFrom": "2026-05-18", + "validTo": "2026-05-24" + } + ] + `; + + try { + const response = await mistral.chat({ + model: 'mistral-tiny', // Använder den enklaste modellen + messages: [{ role: 'user', content: prompt }], + temperature: 0.1, // Låg temperatur för mer deterministiska svar + }); + + // Rensa upp JSON-strängen + const jsonString = response.choices[0].message.content + .replace(/```json|```/g, '') + .trim(); + + // Parsa JSON:en + return JSON.parse(jsonString); + } catch (error) { + console.error('Fel vid AI-import:', error); + throw new Error('Kunde inte importera flyern med AI.'); + } +} + + + + +📌 Steg 3: Fullständigt importflöde +Kombinera text-extrahering och AI-import i ett fullständigt flöde. +Kod: flyerImportService.ts +typescript +Copy + +import { extractFlyerText } from './extractFlyerText'; +import { importFlyerWithAI } from './importFlyerWithAI'; + +/** + * Importerar en flyer (PDF) och returnerar strukturerad data. + * @param pdfPath Sökväg till PDF-filen. + * @returns Strukturerad data från flyern. + */ +export async function importFlyer(pdfPath: string) { + try { + // 1. Extrahera text från PDF:en + console.log('Extraherar text från flyern...'); + const text = await extractFlyerText(pdfPath); + + // 2. Skicka texten till Mistral Tiny för att extrahera data + console.log('Skickar text till Mistral Tiny för extrahering...'); + const products = await importFlyerWithAI(text); + + return { + success: true, + products, + text, + }; + } catch (error) { + console.error('Fel vid import:', error); + return { + success: false, + error: error instanceof Error ? error.message : 'Okänt fel', + }; + } +} + + + + +📌 Steg 4: API-Endpoint för flyer-import +Skapa en Express-endpoint för att hantera uppladdning och import av flyers. +Kod: flyerImportRouter.ts +typescript +Copy + +import express from 'express'; +import multer from 'multer'; +import { importFlyer } from '../services/flyerImportService'; +import * as fs from 'fs'; + +const router = express.Router(); +const upload = multer({ dest: 'uploads/' }); + +// Endpoint för att ladda upp och importera en flyer +router.post('/import/flyer', upload.single('flyer'), async (req, res) => { + try { + if (!req.file) { + return res.status(400).json({ error: 'Ingen flyer uppladdad.' }); + } + + const result = await importFlyer(req.file.path); + + // Rensa upp uppladdad fil + fs.unlinkSync(req.file.path); + + if (!result.success) { + return res.status(500).json({ error: result.error }); + } + + res.json(result); + } catch (error) { + console.error('Fel vid flyer-import:', error); + res.status(500).json({ error: 'Kunde inte importera flyern.' }); + } +}); + +export default router; + + + + +📌 Steg 5: Exempel på utdata +När du skickar texten från Willys flyer till Mistral Tiny, kommer du att få tillbaka en JSON-array med alla produkter, t.ex.: +json +Copy + +[ + { + "name": "KALLRÖKT LAX, GRAVAD LAX", + "weight": "150g", + "origin": "FALKENBERG", + "price": 39.9, + "comparisonPrice": 266.0, + "unit": "kg", + "offer": ["Max 3 köp/hushåll"], + "category": "Fisk", + "validFrom": "2026-05-18", + "validTo": "2026-05-24" + }, + { + "name": "FLÄSKYTTERFILÉ", + "weight": "Ca 1kg", + "origin": "NYBERGS DELI • Sverige", + "price": 64.9, + "comparisonPrice": 64.9, + "unit": "kg", + "offer": ["Max 3 förp/hushåll"], + "category": "Kött", + "validFrom": "2026-05-18", + "validTo": "2026-05-24" + } +] + + + + +📌 Steg 6: Använda den extraherade datan +När du har den strukturerade datan, kan du: + +Lagra den i din databas (t.ex. för att jämföra med inventory). +Visa den för användaren (t.ex. i en tabell). +Använda den för att generera recept (med eller utan AI). +Exempel: Lagra i databasen +typescript +Copy + +// Antas att du har en Prisma-modell för flyer-produkter +await prisma.flyerProduct.createMany({ + data: products.map((product) => ({ + name: product.name, + weight: product.weight, + origin: product.origin, + price: product.price, + comparisonPrice: product.comparisonPrice, + unit: product.unit, + offer: JSON.stringify(product.offer), + category: product.category, + validFrom: product.validFrom ? new Date(product.validFrom) : null, + validTo: product.validTo ? new Date(product.validTo) : null, + })), +}); + + + + +📌 Steg 7: Frontend-Integrering (Exempel: React) +Här är hur du kan integrera flyer-importen i din frontend: +Kod: FlyerImportForm.tsx +tsx +Copy + +import { useState } from 'react'; +import axios from 'axios'; + +function FlyerImportForm() { + const [file, setFile] = useState(null); + const [isLoading, setIsLoading] = useState(false); + const [result, setResult] = useState(null); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + if (!file) return; + + setIsLoading(true); + const formData = new FormData(); + formData.append('flyer', file); + + try { + const response = await axios.post('/api/import/flyer', formData, { + headers: { 'Content-Type': 'multipart/form-data' }, + }); + setResult(response.data); + } catch (error) { + console.error('Fel vid uppladdning:', error); + } finally { + setIsLoading(false); + } + }; + + return ( +
+

Importera flyer

+
+ setFile(e.target.files?.[0] || null)} + required + /> + +
+ + {result?.success && ( +
+

Importerade produkter ({result.products.length})

+ + + + + + + + + + + + {result.products.map((product: any, index: number) => ( + + + + + + + + ))} + +
NamnPrisJämförprisKategoriErbjudande
{product.name}{product.price} {product.unit}{product.comparisonPrice} {product.unit}{product.category}{product.offer.join(', ')}
+
+ )} +
+ ); +} + +export default FlyerImportForm; + + + + +📌 Miljövariabler (.env) +env +Copy + +# Mistral API-nyckel +MISTRAL_API_KEY=din_api_nyckel_här \ No newline at end of file diff --git a/kilo.json b/kilo.json index c185b77a..571a15be 100644 --- a/kilo.json +++ b/kilo.json @@ -1,5 +1,3 @@ { - "env": { - "MISTRAL_API_KEY": "process.env.MISTRAL_API_KEY" - } -} + "$schema": "https://app.kilo.ai/config.json" +}