feat: implement unified matching and categorization for receipt items with detailed debug information
Test Suite / test (24.15.0) (push) Has been cancelled

This commit is contained in:
Nils-Johan Gynther
2026-05-09 15:11:06 +02:00
parent 8354abbc8f
commit 1966a92a87
2 changed files with 415 additions and 6 deletions
@@ -0,0 +1,36 @@
// Structured result från receipt item matching
export interface MatchDecision {
// Produkt-matchning
matchedProductId?: number;
suggestedProductId?: number;
matchedProductName?: string;
suggestedProductName?: string;
matchedVia: 'alias' | 'wordmatch' | 'none';
// Kategori
category?: {
id: number;
name: string;
path: string;
confidence: 'high' | 'medium' | 'low';
source: 'alias' | 'product' | 'rule' | 'ai' | 'guard';
usedFallback: boolean;
};
// Enhet
unit?: string;
// Debug
debug?: {
rawName: string;
signal: string;
steps: string[];
decisionTree: {
alias?: { found: boolean; productId?: number };
wordMatch?: { found: boolean; productId?: number; score?: number };
rule?: { found: boolean; path?: string };
ai?: { called: boolean; result?: string };
guard?: { applied: boolean; oldPath?: string; newPath?: string };
};
};
}
@@ -158,11 +158,96 @@ export class ReceiptImportService {
// Steg 1: Delegera AI-parsning till microservice-importer
const rawItems = await this.parseReceiptViaImporter(file);
// Steg 2: Matchning mot produktdatabas (kräver DB — stannar i recipe-app)
const matched = await this.matchProducts(rawItems, userId);
// Steg 2 & 3: Unified matching + categorization
// Samla context en gång för alla items
const context = await this.prepareMatchingContext(userId);
// Steg 3: Regel + AI-kategorisering för alla användare
return this.enrichWithAiCategories(matched, userId);
// Mappa alla items genom unified matcher
return Promise.all(
rawItems.map((item) =>
this.matchAndEnrichReceiptItem(item, context),
),
);
}
private async prepareMatchingContext(userId?: number) {
type UnitMappingLite = { productId: number; originalUnit: string; preferredUnit: string };
type AliasLite = {
receiptName: string;
product: {
id: number;
name: string;
canonicalName: string | null;
categoryRef: { id: number; name: string } | null;
};
};
const prismaAny = this.prisma as any;
const productFilter = userId ? { isActive: true, ownerId: userId } : { isActive: true };
const aliasFilter = userId
? { OR: [{ ownerId: userId, isGlobal: false }, { isGlobal: true }] }
: { isGlobal: true };
const unitMappingsPromise =
userId && prismaAny.unitMapping?.findMany
? (prismaAny.unitMapping.findMany({
where: { userId },
select: { productId: true, originalUnit: true, preferredUnit: true },
}) as Promise<UnitMappingLite[]>)
: Promise.resolve([] as UnitMappingLite[]);
let categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
try {
categories = await this.categoriesService.findFlattened();
} catch {
categories = [];
}
const [aliases, products, unitMappings] = await Promise.all([
this.prisma.receiptAlias.findMany({
where: aliasFilter,
orderBy: [{ isGlobal: 'asc' }, { id: 'asc' }],
select: {
receiptName: true,
product: {
select: {
id: true,
name: true,
canonicalName: true,
categoryRef: { select: { id: true, name: true } },
},
},
},
}),
this.prisma.product.findMany({
where: productFilter,
select: {
id: true,
name: true,
canonicalName: true,
categoryRef: { select: { id: true, name: true } },
},
}),
unitMappingsPromise,
]) as [
AliasLite[],
Array<{ id: number; name: string; canonicalName: string | null; categoryRef: { id: number; name: string } | null }>,
UnitMappingLite[]
];
const user = userId
? await this.prisma.user.findUnique({ where: { id: userId }, select: { aiEngineEnabled: true } })
: null;
return {
userId,
aliases,
products,
unitMappings,
categories,
aiEnabled: user?.aiEngineEnabled ?? false,
};
}
async upsertUnitMapping(
@@ -375,11 +460,12 @@ export class ReceiptImportService {
if (item.learnAlias) {
const normalizedReceiptName = (item.rawName ?? '').trim().toLowerCase();
if (normalizedReceiptName) {
const aliasOwnerId: number | null = dto.isAdminLearning ? null : userId || null;
await tx.receiptAlias.upsert({
where: {
receiptName_ownerId_isGlobal: {
receiptName: normalizedReceiptName,
ownerId: dto.isAdminLearning ? null : userId,
ownerId: aliasOwnerId as any,
isGlobal: dto.isAdminLearning ? true : false,
},
},
@@ -389,7 +475,7 @@ export class ReceiptImportService {
create: {
receiptName: normalizedReceiptName,
productId,
ownerId: dto.isAdminLearning ? null : userId,
ownerId: dto.isAdminLearning ? undefined : userId,
isGlobal: dto.isAdminLearning ? true : false,
},
});
@@ -635,6 +721,293 @@ export class ReceiptImportService {
return best?.product;
}
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// UNIFIED MATCHER: Kombinerar product matching + categorization
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
private async matchAndEnrichReceiptItem(
item: ParsedReceiptItem,
context: {
userId?: number;
aliases: Array<{
receiptName: string;
product: {
id: number;
name: string;
canonicalName: string | null;
categoryRef: { id: number; name: string } | null;
};
}>;
products: Array<{
id: number;
name: string;
canonicalName: string | null;
categoryRef: { id: number; name: string } | null;
}>;
unitMappings: Array<{ productId: number; originalUnit: string; preferredUnit: string }>;
categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
aiEnabled: boolean;
},
): Promise<ParsedReceiptItem> {
if (!item.rawName) return item;
const raw = item.rawName.toLowerCase().trim();
const debug = { steps: <string[]>[], tree: <Record<string, any>>{} };
try {
// ┌─ STEG 1: Alias-lookup (certifierad match) ─────────────────────────┐
debug.steps.push('Step 1: Alias lookup');
const aliasMatch = context.aliases.find((a) => a.receiptName === raw);
if (aliasMatch) {
debug.tree.alias = { found: true, productId: aliasMatch.product.id };
debug.steps.push(` ✓ Alias found → productId ${aliasMatch.product.id}`);
const mappedUnit = context.unitMappings.find(
(um) => um.productId === aliasMatch.product.id && um.originalUnit === (item.unit ?? '').trim().toLowerCase(),
)?.preferredUnit;
return {
...item,
matchedProductId: aliasMatch.product.id,
matchedProductName: aliasMatch.product.canonicalName ?? aliasMatch.product.name,
unit: mappedUnit ?? item.unit,
matchedVia: 'alias' as const,
...(aliasMatch.product.categoryRef
? {
categorySuggestion: {
categoryId: aliasMatch.product.categoryRef.id,
categoryName: aliasMatch.product.categoryRef.name,
path: aliasMatch.product.categoryRef.name,
confidence: 'high' as const,
usedFallback: false,
},
}
: {}),
};
}
debug.steps.push(` ✗ No alias match`);
debug.tree.alias = { found: false };
// ┌─ STEG 2: Ordet-baserad matchning (förslag) ────────────────────────┐
debug.steps.push('Step 2: Word match');
const wordMatchResult = this.findWordMatchWithScore(raw, context.products);
if (wordMatchResult) {
debug.tree.wordMatch = { found: true, productId: wordMatchResult.id, score: wordMatchResult.score };
debug.steps.push(` ✓ Word match found → productId ${wordMatchResult.id} (score ${wordMatchResult.score})`);
const unitMapping = context.unitMappings.find(
(um) => um.productId === wordMatchResult.id && um.originalUnit === (item.unit ?? '').trim().toLowerCase(),
);
const preferredUnit = unitMapping ? unitMapping.preferredUnit : item.unit;
const result: ParsedReceiptItem = {
...item,
suggestedProductId: wordMatchResult.id,
suggestedProductName: wordMatchResult.canonicalName ?? wordMatchResult.name,
unit: preferredUnit,
matchedVia: 'wordmatch' as const,
};
// Lägg på kategori från produkt om den finns
if (wordMatchResult.categoryRef) {
result.categorySuggestion = {
categoryId: wordMatchResult.categoryRef.id,
categoryName: wordMatchResult.categoryRef.name,
path: wordMatchResult.categoryRef.name,
confidence: 'medium' as const,
usedFallback: false,
};
}
// Gå vidare till kategorisering för wordmatch
return await this.enrichCategoryForItem(result, context, debug);
}
debug.steps.push(` ✗ No word match`);
debug.tree.wordMatch = { found: false };
// ┌─ STEG 3: Regel-baserad kategorisering (no product match) ──────────┐
return await this.enrichCategoryForItem(
{ ...item, matchedVia: 'none' as const },
context,
debug,
);
} catch (err) {
this.logger.warn(`matchAndEnrichReceiptItem error for "${item.rawName}": ${err}`);
return item;
}
}
private async enrichCategoryForItem(
item: ParsedReceiptItem,
context: {
userId?: number;
categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
aiEnabled: boolean;
},
debug: any,
): Promise<ParsedReceiptItem> {
debug.steps.push('Step 3: Categorization');
const signalText = [item.rawName, item.matchedProductName, item.suggestedProductName]
.filter((v): v is string => typeof v === 'string' && v.trim().length > 0)
.join(' ');
let nextCategory = item.categorySuggestion ?? null;
// ┌─ Försök regel-baserad kategorisering ─────────────────────────────┐
debug.steps.push(' Trying rule-based categorization');
const ruleResult = this.ruleBasedCategorySuggestion(signalText || item.rawName, context.categories);
debug.tree.rule = { found: !!ruleResult, path: ruleResult?.path };
if (ruleResult?.confidence === 'high') {
const sameAsExisting = nextCategory && nextCategory.categoryId === ruleResult.categoryId;
if (!sameAsExisting) {
debug.steps.push(` ✓ Rule-based HIGH: ${ruleResult.path}`);
nextCategory = ruleResult;
} else {
debug.steps.push(` ✓ Rule-based HIGH (same as existing): ${ruleResult.path}`);
}
} else if (!nextCategory && ruleResult) {
debug.steps.push(` ✓ Rule-based fallback: ${ruleResult.path}`);
nextCategory = ruleResult;
} else {
debug.steps.push(` ✗ Rule-based miss or lower priority`);
}
// ┌─ AI-kategorisering som fallback ──────────────────────────────────┐
if (!nextCategory) {
debug.steps.push(' Trying AI categorization');
if (context.aiEnabled) {
debug.tree.ai = { called: true };
try {
nextCategory = await this.aiService.suggestCategory(item.rawName, context.categories);
debug.steps.push(` ✓ AI suggestion: ${nextCategory.path}`);
} catch (err) {
debug.steps.push(` ✗ AI failed: ${err}`);
debug.tree.ai = { called: true, error: String(err) };
}
} else {
debug.steps.push(` ✗ AI disabled for user`);
debug.tree.ai = { called: false };
}
}
// ┌─ Contradiction guard (final sanity check) ────────────────────────┐
if (nextCategory) {
debug.steps.push(' Applying contradiction guard');
const beforePath = nextCategory.path;
const guardedCategory = this.applyContradictionGuard(signalText || item.rawName, nextCategory, context.categories);
if (guardedCategory && guardedCategory.path !== beforePath) {
debug.steps.push(` ⚠️ Guard remapped: ${beforePath}${guardedCategory.path}`);
nextCategory = guardedCategory;
debug.tree.guard = { applied: true, oldPath: beforePath, newPath: guardedCategory.path };
} else {
debug.steps.push(` ✓ Guard OK`);
}
}
// ┌─ Hard overrides (special rules for problematic cases) ─────────────┐
if (nextCategory) {
debug.steps.push(' Applying hard overrides');
const beforePath = nextCategory.path;
const finalCategory = this.applyHardCategoryOverrides(signalText || item.rawName, nextCategory, context.categories);
if (finalCategory && finalCategory.path !== beforePath) {
debug.steps.push(` ⚠️ Override applied: ${beforePath}${finalCategory.path}`);
nextCategory = finalCategory;
debug.tree.hardOverride = { applied: true, oldPath: beforePath, newPath: finalCategory.path };
} else {
debug.steps.push(` ✓ No hard override needed`);
}
}
if (nextCategory) {
debug.steps.push(`✅ FINAL: ${nextCategory.path} (${nextCategory.confidence})`);
} else {
debug.steps.push(`❌ FINAL: No category assigned`);
}
if (this.shouldTraceDecision(signalText || item.rawName)) {
this.logger.log(`[ReceiptDecision] ${item.rawName}\n${debug.steps.join('\n')}`);
}
return nextCategory ? { ...item, categorySuggestion: nextCategory } : item;
}
// Helper: findWordMatch som returnerar både product OCH score
private findWordMatchWithScore(
raw: string,
products: Array<{
id: number;
name: string;
canonicalName: string | null;
categoryRef: { id: number; name: string } | null;
}>,
): (typeof products[0] & { score: number }) | undefined {
const rawWords = tokenize(raw);
if (rawWords.length === 0) return undefined;
const rawWordSet = new Set(rawWords);
const rawWordsNorm = rawWords.map(normalizeToken);
const rawWordSetNorm = new Set(rawWordsNorm);
let best: (typeof products[0] & { score: number }) | undefined;
for (const product of products) {
const productWords = tokenize(product.canonicalName ?? product.name);
if (productWords.length === 0) continue;
let score = 0;
let exactStrong = 0;
let exactAny = 0;
let partialStrong = 0;
const phrase = (product.canonicalName ?? product.name).toLowerCase();
if (raw.includes(phrase)) {
score += 5;
}
for (const pw of productWords) {
const isWeak = WEAK_DESCRIPTORS.has(pw);
const pwNorm = normalizeToken(pw);
if (rawWordSet.has(pw) || rawWordSetNorm.has(pwNorm)) {
exactAny += 1;
if (isWeak) {
score += 1;
} else {
exactStrong += 1;
score += 8;
}
continue;
}
if (pw.length < 4) continue;
const hasPartial =
rawWords.some((rw) => rw.includes(pw) || pw.includes(rw)) ||
rawWordsNorm.some((rw) => rw.includes(pwNorm) || pwNorm.includes(rw));
if (!hasPartial) continue;
if (isWeak) continue;
partialStrong += 1;
score += 3;
}
const hasLongPartial = partialStrong >= 1 && productWords.some((pw) => pw.length >= 5);
const hasStrongSignal = exactStrong >= 1 || exactAny + partialStrong >= 2 || hasLongPartial;
if (!hasStrongSignal) continue;
if (score < 8) continue;
if (!best || score > best.score) {
best = { ...product, score };
}
}
return best;
}
private async enrichWithAiCategories(items: ParsedReceiptItem[], userId?: number): Promise<ParsedReceiptItem[]> {
let categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
try {