import { BadRequestException, Injectable, Logger, ServiceUnavailableException, } from '@nestjs/common'; import { PrismaService } from '../prisma/prisma.service'; import { ParsedReceiptItem } from './dto/parsed-receipt-item.dto'; import { AiService, CategorySuggestion } from '../ai/ai.service'; import { CategoriesService } from '../categories/categories.service'; const IMPORTER_SERVICE_URL = process.env.IMPORTER_SERVICE_URL || 'http://importer-api:3001'; const WEAK_DESCRIPTORS = new Set([ 'rokt', 'rökt', 'kokt', 'grillad', 'stekt', 'skivad', 'strimlad', 'fryst', 'farsk', 'färsk', ]); function tokenize(value: string): string[] { return value .toLowerCase() .split(/[^a-z0-9åäö]+/) .filter((w) => w.length >= 3); } function normalizeToken(s: string): string { return s.replace(/å/g, 'a').replace(/ä/g, 'a').replace(/ö/g, 'o').replace(/é/g, 'e').replace(/è/g, 'e'); } function normalizeForRules(value: string): string { return value .toLowerCase() .normalize('NFD') .replace(/[\u0300-\u036f]/g, '') .replace(/[^a-z0-9]+/g, ' ') .trim(); } @Injectable() export class ReceiptImportService { private readonly logger = new Logger(ReceiptImportService.name); constructor( private readonly prisma: PrismaService, private readonly aiService: AiService, private readonly categoriesService: CategoriesService, ) {} async parseReceipt(file: Express.Multer.File, isPremium = false): Promise { // Steg 1: Delegera AI-parsning till microservice-importer const rawItems = await this.parseReceiptViaImporter(file); // Steg 2: Matchning mot produktdatabas (kräver DB — stannar i recipe-app) const matched = await this.matchProducts(rawItems); // Steg 3: AI-kategorisering för premium-användare if (isPremium) { return this.enrichWithAiCategories(matched); } return matched; } private async parseReceiptViaImporter(file: Express.Multer.File): Promise { const form = new FormData(); form.append( 'file', new Blob([new Uint8Array(file.buffer)], { type: file.mimetype }), file.originalname, ); let response: Response; try { response = await fetch(`${IMPORTER_SERVICE_URL}/api/receipt-import/parse`, { method: 'POST', body: form, }); } catch (err) { this.logger.error(`Kunde inte nå importer-api för kvittoparsning: ${err}`); throw new ServiceUnavailableException( 'Import-tjänsten är inte tillgänglig. Försök igen senare.', ); } if (!response.ok) { let message = `Importer svarade ${response.status}`; try { const body = (await response.json()) as { message?: string }; if (body.message) message = body.message; } catch { // ignorera parse-fel } this.logger.error(`Importer-api kvittoparsfel: ${message}`); if (response.status >= 400 && response.status < 500) { throw new BadRequestException(message); } throw new ServiceUnavailableException(message); } return response.json() as Promise; } private async matchProducts( items: ParsedReceiptItem[], ): Promise { // Hämta alias och produkter parallellt const [aliases, products] = await Promise.all([ this.prisma.receiptAlias.findMany({ select: { receiptName: true, productId: true, product: { select: { id: true, name: true, canonicalName: true } } }, }), this.prisma.product.findMany({ where: { isActive: true }, select: { id: true, name: true, canonicalName: true }, }), ]); return items.map((item) => { const raw = (item.rawName ?? '').toLowerCase().trim(); if (!raw) return item; // 1. Alias-match (säker, användaren behöver inte bekräfta) const alias = aliases.find((a) => a.receiptName === raw); if (alias) { return { ...item, matchedProductId: alias.product.id, matchedProductName: alias.product.canonicalName ?? alias.product.name, }; } // 2. Ordbaserad matchning (förslag, kräver bekräftelse) const suggestion = this.findWordMatch(raw, products); return { ...item, suggestedProductId: suggestion?.id, suggestedProductName: suggestion ? (suggestion.canonicalName ?? suggestion.name) : undefined, }; }); } private findWordMatch( raw: string, products: { id: number; name: string; canonicalName: string | null }[], ): { id: number; name: string; canonicalName: string | null } | undefined { // Dela upp kvittonamnet i ord (min 3 tecken) const rawWords = tokenize(raw); if (rawWords.length === 0) return undefined; const rawWordSet = new Set(rawWords); // Normaliserade versioner (utan diakritik) för att hantera t.ex. gradde == grädde const rawWordsNorm = rawWords.map(normalizeToken); const rawWordSetNorm = new Set(rawWordsNorm); let best: | { product: { id: number; name: string; canonicalName: string | null }; score: number } | undefined; for (const product of products) { const productWords = tokenize(product.canonicalName ?? product.name); if (productWords.length === 0) continue; let score = 0; let exactStrong = 0; let exactAny = 0; let partialStrong = 0; const phrase = (product.canonicalName ?? product.name).toLowerCase(); if (raw.includes(phrase)) { score += 5; } for (const pw of productWords) { const isWeak = WEAK_DESCRIPTORS.has(pw); const pwNorm = normalizeToken(pw); if (rawWordSet.has(pw) || rawWordSetNorm.has(pwNorm)) { exactAny += 1; if (isWeak) { score += 1; } else { exactStrong += 1; score += 8; } continue; } // Delmatchning tillåts bara för ord med minst 4 tecken. if (pw.length < 4) continue; const hasPartial = rawWords.some((rw) => rw.includes(pw) || pw.includes(rw)) || rawWordsNorm.some((rw) => rw.includes(pwNorm) || pwNorm.includes(rw)); if (!hasPartial) continue; if (isWeak) { // Deskriptiva ord (t.ex. rökt) ska inte driva förslag ensamma. continue; } partialStrong += 1; score += 3; } // Kräv antingen minst ett starkt exakt ord, eller flera samverkande signaler. // Undantag: ett enstaka starkt partiellt ord (>=5 tecken) räcker, t.ex. vispgrädde → grädde. const hasLongPartial = partialStrong >= 1 && productWords.some((pw) => pw.length >= 5); const hasStrongSignal = exactStrong >= 1 || exactAny + partialStrong >= 2 || hasLongPartial; if (!hasStrongSignal) continue; // Tröskel för att undvika svaga enkelträffar. if (score < 8) continue; if (!best || score > best.score) { best = { product, score }; } } return best?.product; } private async enrichWithAiCategories(items: ParsedReceiptItem[]): Promise { const unmatched = items.filter((i) => !i.matchedProductId && !i.suggestedProductId && i.rawName); if (unmatched.length === 0) return items; let categories: Awaited>; try { categories = await this.categoriesService.findFlattened(); } catch { return items; // Om kategoritjänsten är otillgänglig, returnera utan AI-förslag } const enriched = new Map(); for (const item of unmatched) { try { const byRule = this.ruleBasedCategorySuggestion(item.rawName, categories); if (byRule) { enriched.set(item.rawName, { ...item, categorySuggestion: byRule }); continue; } const suggestion = await this.aiService.suggestCategory(item.rawName, categories); enriched.set(item.rawName, { ...item, categorySuggestion: suggestion }); } catch { // Om AI-anrop misslyckas för enskild vara — hoppa över utan att kasta enriched.set(item.rawName, item); } } return items.map((item) => enriched.get(item.rawName) ?? item); } private ruleBasedCategorySuggestion( rawName: string, categories: Awaited>, ): CategorySuggestion | null { const normalized = normalizeForRules(rawName); // ── Regel: Te ──────────────────────────────────────────────────────── const isTea = /\bte\b/.test(normalized) || /\btea\b/.test(normalized) || /\bchai\b/.test(normalized) || /\btepa(se|k|r)?\b/.test(normalized); if (isTea) { const l3Te = categories.find( (c) => c.name.toLowerCase() === 'te' && c.path.toLowerCase().includes('te & choklad'), ); if (l3Te) { return { categoryId: l3Te.id, categoryName: l3Te.name, path: l3Te.path, confidence: 'high', usedFallback: false }; } const l2TeChoklad = categories.find( (c) => c.name.toLowerCase() === 'te & choklad' && c.path.toLowerCase().startsWith('dryck'), ); if (l2TeChoklad) { return { categoryId: l2TeChoklad.id, categoryName: l2TeChoklad.name, path: l2TeChoklad.path, confidence: 'medium', usedFallback: false }; } } // ── Regel: Kaffebröd ───────────────────────────────────────────────── const isKaffebrod = /\bwienerbrod\b/.test(normalized) || /\bdonut\b/.test(normalized) || /\bmunk\b/.test(normalized) || /\bcroissant\b/.test(normalized) || /\bkanelbulle\b/.test(normalized) || /\bbakelse\b/.test(normalized) || /\bsemla\b/.test(normalized) || /\bdammsugare\b/.test(normalized) || /\bkladdkaka\b/.test(normalized) || /\bmuffin\b/.test(normalized) || /\bcupcake\b/.test(normalized) || /\bchokladboll\b/.test(normalized); if (isKaffebrod) { const l3Kaffebrod = categories.find( (c) => c.name.toLowerCase() === 'kaffebröd' && c.path.toLowerCase().includes('kondis & fika'), ); if (l3Kaffebrod) { return { categoryId: l3Kaffebrod.id, categoryName: l3Kaffebrod.name, path: l3Kaffebrod.path, confidence: 'high', usedFallback: false }; } const l2Kondis = categories.find( (c) => c.name.toLowerCase() === 'kondis & fika' && c.path.toLowerCase().startsWith('bröd & kakor'), ); if (l2Kondis) { return { categoryId: l2Kondis.id, categoryName: l2Kondis.name, path: l2Kondis.path, confidence: 'medium', usedFallback: false }; } } // ── Regel: Laktosfri/växtbaserad mejeri ────────────────────────────── const isCookingBase = /\bmatlagningsbas\b/.test(normalized) || /\bmatlagnings\b/.test(normalized) || /\bplant\s+cream\b/.test(normalized) || /\bcreme\s+fraiche\b/.test(normalized) || /\bgradde\b/.test(normalized) || /\bvispgradde\b/.test(normalized); const isPlantOrAllergy = /\blaktosfri\b/.test(normalized) || /\bvegetabilisk\b/.test(normalized) || /\bhavre\b/.test(normalized) || /\bsoja\b/.test(normalized) || /\brisdryck\b/.test(normalized) || /\bplant\b/.test(normalized); if (!isCookingBase || !isPlantOrAllergy) return null; const l3AllergyCooking = categories.find( (c) => c.name.toLowerCase() === 'allergi matlagning' && c.path.toLowerCase().startsWith('matlagning > '), ); if (l3AllergyCooking) { return { categoryId: l3AllergyCooking.id, categoryName: l3AllergyCooking.name, path: l3AllergyCooking.path, confidence: 'high', usedFallback: false, }; } const l2Cooking = categories.find( (c) => c.name.toLowerCase() === 'matlagning' && c.path.toLowerCase() === 'mejeri, ost & ägg > matlagning', ); if (l2Cooking) { return { categoryId: l2Cooking.id, categoryName: l2Cooking.name, path: l2Cooking.path, confidence: 'medium', usedFallback: false, }; } return null; } }