diff --git a/backend/src/receipt-import/dto/match-decision.ts b/backend/src/receipt-import/dto/match-decision.ts new file mode 100644 index 00000000..3b630780 --- /dev/null +++ b/backend/src/receipt-import/dto/match-decision.ts @@ -0,0 +1,36 @@ +// Structured result från receipt item matching +export interface MatchDecision { + // Produkt-matchning + matchedProductId?: number; + suggestedProductId?: number; + matchedProductName?: string; + suggestedProductName?: string; + matchedVia: 'alias' | 'wordmatch' | 'none'; + + // Kategori + category?: { + id: number; + name: string; + path: string; + confidence: 'high' | 'medium' | 'low'; + source: 'alias' | 'product' | 'rule' | 'ai' | 'guard'; + usedFallback: boolean; + }; + + // Enhet + unit?: string; + + // Debug + debug?: { + rawName: string; + signal: string; + steps: string[]; + decisionTree: { + alias?: { found: boolean; productId?: number }; + wordMatch?: { found: boolean; productId?: number; score?: number }; + rule?: { found: boolean; path?: string }; + ai?: { called: boolean; result?: string }; + guard?: { applied: boolean; oldPath?: string; newPath?: string }; + }; + }; +} diff --git a/backend/src/receipt-import/receipt-import.service.ts b/backend/src/receipt-import/receipt-import.service.ts index 708ca8a5..cb767261 100644 --- a/backend/src/receipt-import/receipt-import.service.ts +++ b/backend/src/receipt-import/receipt-import.service.ts @@ -158,11 +158,96 @@ export class ReceiptImportService { // Steg 1: Delegera AI-parsning till microservice-importer const rawItems = await this.parseReceiptViaImporter(file); - // Steg 2: Matchning mot produktdatabas (kräver DB — stannar i recipe-app) - const matched = await this.matchProducts(rawItems, userId); + // Steg 2 & 3: Unified matching + categorization + // Samla context en gång för alla items + const context = await this.prepareMatchingContext(userId); - // Steg 3: Regel + AI-kategorisering för alla användare - return this.enrichWithAiCategories(matched, userId); + // Mappa alla items genom unified matcher + return Promise.all( + rawItems.map((item) => + this.matchAndEnrichReceiptItem(item, context), + ), + ); + } + + private async prepareMatchingContext(userId?: number) { + type UnitMappingLite = { productId: number; originalUnit: string; preferredUnit: string }; + type AliasLite = { + receiptName: string; + product: { + id: number; + name: string; + canonicalName: string | null; + categoryRef: { id: number; name: string } | null; + }; + }; + + const prismaAny = this.prisma as any; + + const productFilter = userId ? { isActive: true, ownerId: userId } : { isActive: true }; + const aliasFilter = userId + ? { OR: [{ ownerId: userId, isGlobal: false }, { isGlobal: true }] } + : { isGlobal: true }; + + const unitMappingsPromise = + userId && prismaAny.unitMapping?.findMany + ? (prismaAny.unitMapping.findMany({ + where: { userId }, + select: { productId: true, originalUnit: true, preferredUnit: true }, + }) as Promise) + : Promise.resolve([] as UnitMappingLite[]); + + let categories: Awaited>; + try { + categories = await this.categoriesService.findFlattened(); + } catch { + categories = []; + } + + const [aliases, products, unitMappings] = await Promise.all([ + this.prisma.receiptAlias.findMany({ + where: aliasFilter, + orderBy: [{ isGlobal: 'asc' }, { id: 'asc' }], + select: { + receiptName: true, + product: { + select: { + id: true, + name: true, + canonicalName: true, + categoryRef: { select: { id: true, name: true } }, + }, + }, + }, + }), + this.prisma.product.findMany({ + where: productFilter, + select: { + id: true, + name: true, + canonicalName: true, + categoryRef: { select: { id: true, name: true } }, + }, + }), + unitMappingsPromise, + ]) as [ + AliasLite[], + Array<{ id: number; name: string; canonicalName: string | null; categoryRef: { id: number; name: string } | null }>, + UnitMappingLite[] + ]; + + const user = userId + ? await this.prisma.user.findUnique({ where: { id: userId }, select: { aiEngineEnabled: true } }) + : null; + + return { + userId, + aliases, + products, + unitMappings, + categories, + aiEnabled: user?.aiEngineEnabled ?? false, + }; } async upsertUnitMapping( @@ -375,11 +460,12 @@ export class ReceiptImportService { if (item.learnAlias) { const normalizedReceiptName = (item.rawName ?? '').trim().toLowerCase(); if (normalizedReceiptName) { + const aliasOwnerId: number | null = dto.isAdminLearning ? null : userId || null; await tx.receiptAlias.upsert({ where: { receiptName_ownerId_isGlobal: { receiptName: normalizedReceiptName, - ownerId: dto.isAdminLearning ? null : userId, + ownerId: aliasOwnerId as any, isGlobal: dto.isAdminLearning ? true : false, }, }, @@ -389,7 +475,7 @@ export class ReceiptImportService { create: { receiptName: normalizedReceiptName, productId, - ownerId: dto.isAdminLearning ? null : userId, + ownerId: dto.isAdminLearning ? undefined : userId, isGlobal: dto.isAdminLearning ? true : false, }, }); @@ -635,6 +721,293 @@ export class ReceiptImportService { return best?.product; } + // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + // UNIFIED MATCHER: Kombinerar product matching + categorization + // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + private async matchAndEnrichReceiptItem( + item: ParsedReceiptItem, + context: { + userId?: number; + aliases: Array<{ + receiptName: string; + product: { + id: number; + name: string; + canonicalName: string | null; + categoryRef: { id: number; name: string } | null; + }; + }>; + products: Array<{ + id: number; + name: string; + canonicalName: string | null; + categoryRef: { id: number; name: string } | null; + }>; + unitMappings: Array<{ productId: number; originalUnit: string; preferredUnit: string }>; + categories: Awaited>; + aiEnabled: boolean; + }, + ): Promise { + if (!item.rawName) return item; + + const raw = item.rawName.toLowerCase().trim(); + const debug = { steps: [], tree: >{} }; + + try { + // ┌─ STEG 1: Alias-lookup (certifierad match) ─────────────────────────┐ + debug.steps.push('Step 1: Alias lookup'); + const aliasMatch = context.aliases.find((a) => a.receiptName === raw); + if (aliasMatch) { + debug.tree.alias = { found: true, productId: aliasMatch.product.id }; + debug.steps.push(` ✓ Alias found → productId ${aliasMatch.product.id}`); + + const mappedUnit = context.unitMappings.find( + (um) => um.productId === aliasMatch.product.id && um.originalUnit === (item.unit ?? '').trim().toLowerCase(), + )?.preferredUnit; + + return { + ...item, + matchedProductId: aliasMatch.product.id, + matchedProductName: aliasMatch.product.canonicalName ?? aliasMatch.product.name, + unit: mappedUnit ?? item.unit, + matchedVia: 'alias' as const, + ...(aliasMatch.product.categoryRef + ? { + categorySuggestion: { + categoryId: aliasMatch.product.categoryRef.id, + categoryName: aliasMatch.product.categoryRef.name, + path: aliasMatch.product.categoryRef.name, + confidence: 'high' as const, + usedFallback: false, + }, + } + : {}), + }; + } + debug.steps.push(` ✗ No alias match`); + debug.tree.alias = { found: false }; + + // ┌─ STEG 2: Ordet-baserad matchning (förslag) ────────────────────────┐ + debug.steps.push('Step 2: Word match'); + const wordMatchResult = this.findWordMatchWithScore(raw, context.products); + if (wordMatchResult) { + debug.tree.wordMatch = { found: true, productId: wordMatchResult.id, score: wordMatchResult.score }; + debug.steps.push(` ✓ Word match found → productId ${wordMatchResult.id} (score ${wordMatchResult.score})`); + + const unitMapping = context.unitMappings.find( + (um) => um.productId === wordMatchResult.id && um.originalUnit === (item.unit ?? '').trim().toLowerCase(), + ); + const preferredUnit = unitMapping ? unitMapping.preferredUnit : item.unit; + + const result: ParsedReceiptItem = { + ...item, + suggestedProductId: wordMatchResult.id, + suggestedProductName: wordMatchResult.canonicalName ?? wordMatchResult.name, + unit: preferredUnit, + matchedVia: 'wordmatch' as const, + }; + + // Lägg på kategori från produkt om den finns + if (wordMatchResult.categoryRef) { + result.categorySuggestion = { + categoryId: wordMatchResult.categoryRef.id, + categoryName: wordMatchResult.categoryRef.name, + path: wordMatchResult.categoryRef.name, + confidence: 'medium' as const, + usedFallback: false, + }; + } + + // Gå vidare till kategorisering för wordmatch + return await this.enrichCategoryForItem(result, context, debug); + } + debug.steps.push(` ✗ No word match`); + debug.tree.wordMatch = { found: false }; + + // ┌─ STEG 3: Regel-baserad kategorisering (no product match) ──────────┐ + return await this.enrichCategoryForItem( + { ...item, matchedVia: 'none' as const }, + context, + debug, + ); + } catch (err) { + this.logger.warn(`matchAndEnrichReceiptItem error for "${item.rawName}": ${err}`); + return item; + } + } + + private async enrichCategoryForItem( + item: ParsedReceiptItem, + context: { + userId?: number; + categories: Awaited>; + aiEnabled: boolean; + }, + debug: any, + ): Promise { + debug.steps.push('Step 3: Categorization'); + + const signalText = [item.rawName, item.matchedProductName, item.suggestedProductName] + .filter((v): v is string => typeof v === 'string' && v.trim().length > 0) + .join(' '); + + let nextCategory = item.categorySuggestion ?? null; + + // ┌─ Försök regel-baserad kategorisering ─────────────────────────────┐ + debug.steps.push(' Trying rule-based categorization'); + const ruleResult = this.ruleBasedCategorySuggestion(signalText || item.rawName, context.categories); + debug.tree.rule = { found: !!ruleResult, path: ruleResult?.path }; + + if (ruleResult?.confidence === 'high') { + const sameAsExisting = nextCategory && nextCategory.categoryId === ruleResult.categoryId; + if (!sameAsExisting) { + debug.steps.push(` ✓ Rule-based HIGH: ${ruleResult.path}`); + nextCategory = ruleResult; + } else { + debug.steps.push(` ✓ Rule-based HIGH (same as existing): ${ruleResult.path}`); + } + } else if (!nextCategory && ruleResult) { + debug.steps.push(` ✓ Rule-based fallback: ${ruleResult.path}`); + nextCategory = ruleResult; + } else { + debug.steps.push(` ✗ Rule-based miss or lower priority`); + } + + // ┌─ AI-kategorisering som fallback ──────────────────────────────────┐ + if (!nextCategory) { + debug.steps.push(' Trying AI categorization'); + if (context.aiEnabled) { + debug.tree.ai = { called: true }; + try { + nextCategory = await this.aiService.suggestCategory(item.rawName, context.categories); + debug.steps.push(` ✓ AI suggestion: ${nextCategory.path}`); + } catch (err) { + debug.steps.push(` ✗ AI failed: ${err}`); + debug.tree.ai = { called: true, error: String(err) }; + } + } else { + debug.steps.push(` ✗ AI disabled for user`); + debug.tree.ai = { called: false }; + } + } + + // ┌─ Contradiction guard (final sanity check) ────────────────────────┐ + if (nextCategory) { + debug.steps.push(' Applying contradiction guard'); + const beforePath = nextCategory.path; + const guardedCategory = this.applyContradictionGuard(signalText || item.rawName, nextCategory, context.categories); + if (guardedCategory && guardedCategory.path !== beforePath) { + debug.steps.push(` ⚠️ Guard remapped: ${beforePath} → ${guardedCategory.path}`); + nextCategory = guardedCategory; + debug.tree.guard = { applied: true, oldPath: beforePath, newPath: guardedCategory.path }; + } else { + debug.steps.push(` ✓ Guard OK`); + } + } + + // ┌─ Hard overrides (special rules for problematic cases) ─────────────┐ + if (nextCategory) { + debug.steps.push(' Applying hard overrides'); + const beforePath = nextCategory.path; + const finalCategory = this.applyHardCategoryOverrides(signalText || item.rawName, nextCategory, context.categories); + if (finalCategory && finalCategory.path !== beforePath) { + debug.steps.push(` ⚠️ Override applied: ${beforePath} → ${finalCategory.path}`); + nextCategory = finalCategory; + debug.tree.hardOverride = { applied: true, oldPath: beforePath, newPath: finalCategory.path }; + } else { + debug.steps.push(` ✓ No hard override needed`); + } + } + + if (nextCategory) { + debug.steps.push(`✅ FINAL: ${nextCategory.path} (${nextCategory.confidence})`); + } else { + debug.steps.push(`❌ FINAL: No category assigned`); + } + + if (this.shouldTraceDecision(signalText || item.rawName)) { + this.logger.log(`[ReceiptDecision] ${item.rawName}\n${debug.steps.join('\n')}`); + } + + return nextCategory ? { ...item, categorySuggestion: nextCategory } : item; + } + + // Helper: findWordMatch som returnerar både product OCH score + private findWordMatchWithScore( + raw: string, + products: Array<{ + id: number; + name: string; + canonicalName: string | null; + categoryRef: { id: number; name: string } | null; + }>, + ): (typeof products[0] & { score: number }) | undefined { + const rawWords = tokenize(raw); + if (rawWords.length === 0) return undefined; + + const rawWordSet = new Set(rawWords); + const rawWordsNorm = rawWords.map(normalizeToken); + const rawWordSetNorm = new Set(rawWordsNorm); + + let best: (typeof products[0] & { score: number }) | undefined; + + for (const product of products) { + const productWords = tokenize(product.canonicalName ?? product.name); + if (productWords.length === 0) continue; + + let score = 0; + let exactStrong = 0; + let exactAny = 0; + let partialStrong = 0; + + const phrase = (product.canonicalName ?? product.name).toLowerCase(); + if (raw.includes(phrase)) { + score += 5; + } + + for (const pw of productWords) { + const isWeak = WEAK_DESCRIPTORS.has(pw); + const pwNorm = normalizeToken(pw); + + if (rawWordSet.has(pw) || rawWordSetNorm.has(pwNorm)) { + exactAny += 1; + if (isWeak) { + score += 1; + } else { + exactStrong += 1; + score += 8; + } + continue; + } + + if (pw.length < 4) continue; + + const hasPartial = + rawWords.some((rw) => rw.includes(pw) || pw.includes(rw)) || + rawWordsNorm.some((rw) => rw.includes(pwNorm) || pwNorm.includes(rw)); + if (!hasPartial) continue; + + if (isWeak) continue; + + partialStrong += 1; + score += 3; + } + + const hasLongPartial = partialStrong >= 1 && productWords.some((pw) => pw.length >= 5); + const hasStrongSignal = exactStrong >= 1 || exactAny + partialStrong >= 2 || hasLongPartial; + if (!hasStrongSignal) continue; + + if (score < 8) continue; + + if (!best || score > best.score) { + best = { ...product, score }; + } + } + + return best; + } + private async enrichWithAiCategories(items: ParsedReceiptItem[], userId?: number): Promise { let categories: Awaited>; try {