feat: implement unified matching and categorization for receipt items with detailed debug information
Test Suite / test (24.15.0) (push) Has been cancelled
Test Suite / test (24.15.0) (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1,36 @@
|
|||||||
|
// Structured result från receipt item matching
|
||||||
|
export interface MatchDecision {
|
||||||
|
// Produkt-matchning
|
||||||
|
matchedProductId?: number;
|
||||||
|
suggestedProductId?: number;
|
||||||
|
matchedProductName?: string;
|
||||||
|
suggestedProductName?: string;
|
||||||
|
matchedVia: 'alias' | 'wordmatch' | 'none';
|
||||||
|
|
||||||
|
// Kategori
|
||||||
|
category?: {
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
path: string;
|
||||||
|
confidence: 'high' | 'medium' | 'low';
|
||||||
|
source: 'alias' | 'product' | 'rule' | 'ai' | 'guard';
|
||||||
|
usedFallback: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Enhet
|
||||||
|
unit?: string;
|
||||||
|
|
||||||
|
// Debug
|
||||||
|
debug?: {
|
||||||
|
rawName: string;
|
||||||
|
signal: string;
|
||||||
|
steps: string[];
|
||||||
|
decisionTree: {
|
||||||
|
alias?: { found: boolean; productId?: number };
|
||||||
|
wordMatch?: { found: boolean; productId?: number; score?: number };
|
||||||
|
rule?: { found: boolean; path?: string };
|
||||||
|
ai?: { called: boolean; result?: string };
|
||||||
|
guard?: { applied: boolean; oldPath?: string; newPath?: string };
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -158,11 +158,96 @@ export class ReceiptImportService {
|
|||||||
// Steg 1: Delegera AI-parsning till microservice-importer
|
// Steg 1: Delegera AI-parsning till microservice-importer
|
||||||
const rawItems = await this.parseReceiptViaImporter(file);
|
const rawItems = await this.parseReceiptViaImporter(file);
|
||||||
|
|
||||||
// Steg 2: Matchning mot produktdatabas (kräver DB — stannar i recipe-app)
|
// Steg 2 & 3: Unified matching + categorization
|
||||||
const matched = await this.matchProducts(rawItems, userId);
|
// Samla context en gång för alla items
|
||||||
|
const context = await this.prepareMatchingContext(userId);
|
||||||
|
|
||||||
// Steg 3: Regel + AI-kategorisering för alla användare
|
// Mappa alla items genom unified matcher
|
||||||
return this.enrichWithAiCategories(matched, userId);
|
return Promise.all(
|
||||||
|
rawItems.map((item) =>
|
||||||
|
this.matchAndEnrichReceiptItem(item, context),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async prepareMatchingContext(userId?: number) {
|
||||||
|
type UnitMappingLite = { productId: number; originalUnit: string; preferredUnit: string };
|
||||||
|
type AliasLite = {
|
||||||
|
receiptName: string;
|
||||||
|
product: {
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
canonicalName: string | null;
|
||||||
|
categoryRef: { id: number; name: string } | null;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const prismaAny = this.prisma as any;
|
||||||
|
|
||||||
|
const productFilter = userId ? { isActive: true, ownerId: userId } : { isActive: true };
|
||||||
|
const aliasFilter = userId
|
||||||
|
? { OR: [{ ownerId: userId, isGlobal: false }, { isGlobal: true }] }
|
||||||
|
: { isGlobal: true };
|
||||||
|
|
||||||
|
const unitMappingsPromise =
|
||||||
|
userId && prismaAny.unitMapping?.findMany
|
||||||
|
? (prismaAny.unitMapping.findMany({
|
||||||
|
where: { userId },
|
||||||
|
select: { productId: true, originalUnit: true, preferredUnit: true },
|
||||||
|
}) as Promise<UnitMappingLite[]>)
|
||||||
|
: Promise.resolve([] as UnitMappingLite[]);
|
||||||
|
|
||||||
|
let categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
||||||
|
try {
|
||||||
|
categories = await this.categoriesService.findFlattened();
|
||||||
|
} catch {
|
||||||
|
categories = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const [aliases, products, unitMappings] = await Promise.all([
|
||||||
|
this.prisma.receiptAlias.findMany({
|
||||||
|
where: aliasFilter,
|
||||||
|
orderBy: [{ isGlobal: 'asc' }, { id: 'asc' }],
|
||||||
|
select: {
|
||||||
|
receiptName: true,
|
||||||
|
product: {
|
||||||
|
select: {
|
||||||
|
id: true,
|
||||||
|
name: true,
|
||||||
|
canonicalName: true,
|
||||||
|
categoryRef: { select: { id: true, name: true } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
this.prisma.product.findMany({
|
||||||
|
where: productFilter,
|
||||||
|
select: {
|
||||||
|
id: true,
|
||||||
|
name: true,
|
||||||
|
canonicalName: true,
|
||||||
|
categoryRef: { select: { id: true, name: true } },
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
unitMappingsPromise,
|
||||||
|
]) as [
|
||||||
|
AliasLite[],
|
||||||
|
Array<{ id: number; name: string; canonicalName: string | null; categoryRef: { id: number; name: string } | null }>,
|
||||||
|
UnitMappingLite[]
|
||||||
|
];
|
||||||
|
|
||||||
|
const user = userId
|
||||||
|
? await this.prisma.user.findUnique({ where: { id: userId }, select: { aiEngineEnabled: true } })
|
||||||
|
: null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
userId,
|
||||||
|
aliases,
|
||||||
|
products,
|
||||||
|
unitMappings,
|
||||||
|
categories,
|
||||||
|
aiEnabled: user?.aiEngineEnabled ?? false,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async upsertUnitMapping(
|
async upsertUnitMapping(
|
||||||
@@ -375,11 +460,12 @@ export class ReceiptImportService {
|
|||||||
if (item.learnAlias) {
|
if (item.learnAlias) {
|
||||||
const normalizedReceiptName = (item.rawName ?? '').trim().toLowerCase();
|
const normalizedReceiptName = (item.rawName ?? '').trim().toLowerCase();
|
||||||
if (normalizedReceiptName) {
|
if (normalizedReceiptName) {
|
||||||
|
const aliasOwnerId: number | null = dto.isAdminLearning ? null : userId || null;
|
||||||
await tx.receiptAlias.upsert({
|
await tx.receiptAlias.upsert({
|
||||||
where: {
|
where: {
|
||||||
receiptName_ownerId_isGlobal: {
|
receiptName_ownerId_isGlobal: {
|
||||||
receiptName: normalizedReceiptName,
|
receiptName: normalizedReceiptName,
|
||||||
ownerId: dto.isAdminLearning ? null : userId,
|
ownerId: aliasOwnerId as any,
|
||||||
isGlobal: dto.isAdminLearning ? true : false,
|
isGlobal: dto.isAdminLearning ? true : false,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -389,7 +475,7 @@ export class ReceiptImportService {
|
|||||||
create: {
|
create: {
|
||||||
receiptName: normalizedReceiptName,
|
receiptName: normalizedReceiptName,
|
||||||
productId,
|
productId,
|
||||||
ownerId: dto.isAdminLearning ? null : userId,
|
ownerId: dto.isAdminLearning ? undefined : userId,
|
||||||
isGlobal: dto.isAdminLearning ? true : false,
|
isGlobal: dto.isAdminLearning ? true : false,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@@ -635,6 +721,293 @@ export class ReceiptImportService {
|
|||||||
return best?.product;
|
return best?.product;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||||
|
// UNIFIED MATCHER: Kombinerar product matching + categorization
|
||||||
|
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||||
|
|
||||||
|
private async matchAndEnrichReceiptItem(
|
||||||
|
item: ParsedReceiptItem,
|
||||||
|
context: {
|
||||||
|
userId?: number;
|
||||||
|
aliases: Array<{
|
||||||
|
receiptName: string;
|
||||||
|
product: {
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
canonicalName: string | null;
|
||||||
|
categoryRef: { id: number; name: string } | null;
|
||||||
|
};
|
||||||
|
}>;
|
||||||
|
products: Array<{
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
canonicalName: string | null;
|
||||||
|
categoryRef: { id: number; name: string } | null;
|
||||||
|
}>;
|
||||||
|
unitMappings: Array<{ productId: number; originalUnit: string; preferredUnit: string }>;
|
||||||
|
categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
||||||
|
aiEnabled: boolean;
|
||||||
|
},
|
||||||
|
): Promise<ParsedReceiptItem> {
|
||||||
|
if (!item.rawName) return item;
|
||||||
|
|
||||||
|
const raw = item.rawName.toLowerCase().trim();
|
||||||
|
const debug = { steps: <string[]>[], tree: <Record<string, any>>{} };
|
||||||
|
|
||||||
|
try {
|
||||||
|
// ┌─ STEG 1: Alias-lookup (certifierad match) ─────────────────────────┐
|
||||||
|
debug.steps.push('Step 1: Alias lookup');
|
||||||
|
const aliasMatch = context.aliases.find((a) => a.receiptName === raw);
|
||||||
|
if (aliasMatch) {
|
||||||
|
debug.tree.alias = { found: true, productId: aliasMatch.product.id };
|
||||||
|
debug.steps.push(` ✓ Alias found → productId ${aliasMatch.product.id}`);
|
||||||
|
|
||||||
|
const mappedUnit = context.unitMappings.find(
|
||||||
|
(um) => um.productId === aliasMatch.product.id && um.originalUnit === (item.unit ?? '').trim().toLowerCase(),
|
||||||
|
)?.preferredUnit;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...item,
|
||||||
|
matchedProductId: aliasMatch.product.id,
|
||||||
|
matchedProductName: aliasMatch.product.canonicalName ?? aliasMatch.product.name,
|
||||||
|
unit: mappedUnit ?? item.unit,
|
||||||
|
matchedVia: 'alias' as const,
|
||||||
|
...(aliasMatch.product.categoryRef
|
||||||
|
? {
|
||||||
|
categorySuggestion: {
|
||||||
|
categoryId: aliasMatch.product.categoryRef.id,
|
||||||
|
categoryName: aliasMatch.product.categoryRef.name,
|
||||||
|
path: aliasMatch.product.categoryRef.name,
|
||||||
|
confidence: 'high' as const,
|
||||||
|
usedFallback: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
debug.steps.push(` ✗ No alias match`);
|
||||||
|
debug.tree.alias = { found: false };
|
||||||
|
|
||||||
|
// ┌─ STEG 2: Ordet-baserad matchning (förslag) ────────────────────────┐
|
||||||
|
debug.steps.push('Step 2: Word match');
|
||||||
|
const wordMatchResult = this.findWordMatchWithScore(raw, context.products);
|
||||||
|
if (wordMatchResult) {
|
||||||
|
debug.tree.wordMatch = { found: true, productId: wordMatchResult.id, score: wordMatchResult.score };
|
||||||
|
debug.steps.push(` ✓ Word match found → productId ${wordMatchResult.id} (score ${wordMatchResult.score})`);
|
||||||
|
|
||||||
|
const unitMapping = context.unitMappings.find(
|
||||||
|
(um) => um.productId === wordMatchResult.id && um.originalUnit === (item.unit ?? '').trim().toLowerCase(),
|
||||||
|
);
|
||||||
|
const preferredUnit = unitMapping ? unitMapping.preferredUnit : item.unit;
|
||||||
|
|
||||||
|
const result: ParsedReceiptItem = {
|
||||||
|
...item,
|
||||||
|
suggestedProductId: wordMatchResult.id,
|
||||||
|
suggestedProductName: wordMatchResult.canonicalName ?? wordMatchResult.name,
|
||||||
|
unit: preferredUnit,
|
||||||
|
matchedVia: 'wordmatch' as const,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Lägg på kategori från produkt om den finns
|
||||||
|
if (wordMatchResult.categoryRef) {
|
||||||
|
result.categorySuggestion = {
|
||||||
|
categoryId: wordMatchResult.categoryRef.id,
|
||||||
|
categoryName: wordMatchResult.categoryRef.name,
|
||||||
|
path: wordMatchResult.categoryRef.name,
|
||||||
|
confidence: 'medium' as const,
|
||||||
|
usedFallback: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gå vidare till kategorisering för wordmatch
|
||||||
|
return await this.enrichCategoryForItem(result, context, debug);
|
||||||
|
}
|
||||||
|
debug.steps.push(` ✗ No word match`);
|
||||||
|
debug.tree.wordMatch = { found: false };
|
||||||
|
|
||||||
|
// ┌─ STEG 3: Regel-baserad kategorisering (no product match) ──────────┐
|
||||||
|
return await this.enrichCategoryForItem(
|
||||||
|
{ ...item, matchedVia: 'none' as const },
|
||||||
|
context,
|
||||||
|
debug,
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.warn(`matchAndEnrichReceiptItem error for "${item.rawName}": ${err}`);
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async enrichCategoryForItem(
|
||||||
|
item: ParsedReceiptItem,
|
||||||
|
context: {
|
||||||
|
userId?: number;
|
||||||
|
categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
||||||
|
aiEnabled: boolean;
|
||||||
|
},
|
||||||
|
debug: any,
|
||||||
|
): Promise<ParsedReceiptItem> {
|
||||||
|
debug.steps.push('Step 3: Categorization');
|
||||||
|
|
||||||
|
const signalText = [item.rawName, item.matchedProductName, item.suggestedProductName]
|
||||||
|
.filter((v): v is string => typeof v === 'string' && v.trim().length > 0)
|
||||||
|
.join(' ');
|
||||||
|
|
||||||
|
let nextCategory = item.categorySuggestion ?? null;
|
||||||
|
|
||||||
|
// ┌─ Försök regel-baserad kategorisering ─────────────────────────────┐
|
||||||
|
debug.steps.push(' Trying rule-based categorization');
|
||||||
|
const ruleResult = this.ruleBasedCategorySuggestion(signalText || item.rawName, context.categories);
|
||||||
|
debug.tree.rule = { found: !!ruleResult, path: ruleResult?.path };
|
||||||
|
|
||||||
|
if (ruleResult?.confidence === 'high') {
|
||||||
|
const sameAsExisting = nextCategory && nextCategory.categoryId === ruleResult.categoryId;
|
||||||
|
if (!sameAsExisting) {
|
||||||
|
debug.steps.push(` ✓ Rule-based HIGH: ${ruleResult.path}`);
|
||||||
|
nextCategory = ruleResult;
|
||||||
|
} else {
|
||||||
|
debug.steps.push(` ✓ Rule-based HIGH (same as existing): ${ruleResult.path}`);
|
||||||
|
}
|
||||||
|
} else if (!nextCategory && ruleResult) {
|
||||||
|
debug.steps.push(` ✓ Rule-based fallback: ${ruleResult.path}`);
|
||||||
|
nextCategory = ruleResult;
|
||||||
|
} else {
|
||||||
|
debug.steps.push(` ✗ Rule-based miss or lower priority`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ┌─ AI-kategorisering som fallback ──────────────────────────────────┐
|
||||||
|
if (!nextCategory) {
|
||||||
|
debug.steps.push(' Trying AI categorization');
|
||||||
|
if (context.aiEnabled) {
|
||||||
|
debug.tree.ai = { called: true };
|
||||||
|
try {
|
||||||
|
nextCategory = await this.aiService.suggestCategory(item.rawName, context.categories);
|
||||||
|
debug.steps.push(` ✓ AI suggestion: ${nextCategory.path}`);
|
||||||
|
} catch (err) {
|
||||||
|
debug.steps.push(` ✗ AI failed: ${err}`);
|
||||||
|
debug.tree.ai = { called: true, error: String(err) };
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
debug.steps.push(` ✗ AI disabled for user`);
|
||||||
|
debug.tree.ai = { called: false };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ┌─ Contradiction guard (final sanity check) ────────────────────────┐
|
||||||
|
if (nextCategory) {
|
||||||
|
debug.steps.push(' Applying contradiction guard');
|
||||||
|
const beforePath = nextCategory.path;
|
||||||
|
const guardedCategory = this.applyContradictionGuard(signalText || item.rawName, nextCategory, context.categories);
|
||||||
|
if (guardedCategory && guardedCategory.path !== beforePath) {
|
||||||
|
debug.steps.push(` ⚠️ Guard remapped: ${beforePath} → ${guardedCategory.path}`);
|
||||||
|
nextCategory = guardedCategory;
|
||||||
|
debug.tree.guard = { applied: true, oldPath: beforePath, newPath: guardedCategory.path };
|
||||||
|
} else {
|
||||||
|
debug.steps.push(` ✓ Guard OK`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ┌─ Hard overrides (special rules for problematic cases) ─────────────┐
|
||||||
|
if (nextCategory) {
|
||||||
|
debug.steps.push(' Applying hard overrides');
|
||||||
|
const beforePath = nextCategory.path;
|
||||||
|
const finalCategory = this.applyHardCategoryOverrides(signalText || item.rawName, nextCategory, context.categories);
|
||||||
|
if (finalCategory && finalCategory.path !== beforePath) {
|
||||||
|
debug.steps.push(` ⚠️ Override applied: ${beforePath} → ${finalCategory.path}`);
|
||||||
|
nextCategory = finalCategory;
|
||||||
|
debug.tree.hardOverride = { applied: true, oldPath: beforePath, newPath: finalCategory.path };
|
||||||
|
} else {
|
||||||
|
debug.steps.push(` ✓ No hard override needed`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextCategory) {
|
||||||
|
debug.steps.push(`✅ FINAL: ${nextCategory.path} (${nextCategory.confidence})`);
|
||||||
|
} else {
|
||||||
|
debug.steps.push(`❌ FINAL: No category assigned`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.shouldTraceDecision(signalText || item.rawName)) {
|
||||||
|
this.logger.log(`[ReceiptDecision] ${item.rawName}\n${debug.steps.join('\n')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return nextCategory ? { ...item, categorySuggestion: nextCategory } : item;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: findWordMatch som returnerar både product OCH score
|
||||||
|
private findWordMatchWithScore(
|
||||||
|
raw: string,
|
||||||
|
products: Array<{
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
canonicalName: string | null;
|
||||||
|
categoryRef: { id: number; name: string } | null;
|
||||||
|
}>,
|
||||||
|
): (typeof products[0] & { score: number }) | undefined {
|
||||||
|
const rawWords = tokenize(raw);
|
||||||
|
if (rawWords.length === 0) return undefined;
|
||||||
|
|
||||||
|
const rawWordSet = new Set(rawWords);
|
||||||
|
const rawWordsNorm = rawWords.map(normalizeToken);
|
||||||
|
const rawWordSetNorm = new Set(rawWordsNorm);
|
||||||
|
|
||||||
|
let best: (typeof products[0] & { score: number }) | undefined;
|
||||||
|
|
||||||
|
for (const product of products) {
|
||||||
|
const productWords = tokenize(product.canonicalName ?? product.name);
|
||||||
|
if (productWords.length === 0) continue;
|
||||||
|
|
||||||
|
let score = 0;
|
||||||
|
let exactStrong = 0;
|
||||||
|
let exactAny = 0;
|
||||||
|
let partialStrong = 0;
|
||||||
|
|
||||||
|
const phrase = (product.canonicalName ?? product.name).toLowerCase();
|
||||||
|
if (raw.includes(phrase)) {
|
||||||
|
score += 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const pw of productWords) {
|
||||||
|
const isWeak = WEAK_DESCRIPTORS.has(pw);
|
||||||
|
const pwNorm = normalizeToken(pw);
|
||||||
|
|
||||||
|
if (rawWordSet.has(pw) || rawWordSetNorm.has(pwNorm)) {
|
||||||
|
exactAny += 1;
|
||||||
|
if (isWeak) {
|
||||||
|
score += 1;
|
||||||
|
} else {
|
||||||
|
exactStrong += 1;
|
||||||
|
score += 8;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pw.length < 4) continue;
|
||||||
|
|
||||||
|
const hasPartial =
|
||||||
|
rawWords.some((rw) => rw.includes(pw) || pw.includes(rw)) ||
|
||||||
|
rawWordsNorm.some((rw) => rw.includes(pwNorm) || pwNorm.includes(rw));
|
||||||
|
if (!hasPartial) continue;
|
||||||
|
|
||||||
|
if (isWeak) continue;
|
||||||
|
|
||||||
|
partialStrong += 1;
|
||||||
|
score += 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hasLongPartial = partialStrong >= 1 && productWords.some((pw) => pw.length >= 5);
|
||||||
|
const hasStrongSignal = exactStrong >= 1 || exactAny + partialStrong >= 2 || hasLongPartial;
|
||||||
|
if (!hasStrongSignal) continue;
|
||||||
|
|
||||||
|
if (score < 8) continue;
|
||||||
|
|
||||||
|
if (!best || score > best.score) {
|
||||||
|
best = { ...product, score };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return best;
|
||||||
|
}
|
||||||
|
|
||||||
private async enrichWithAiCategories(items: ParsedReceiptItem[], userId?: number): Promise<ParsedReceiptItem[]> {
|
private async enrichWithAiCategories(items: ParsedReceiptItem[], userId?: number): Promise<ParsedReceiptItem[]> {
|
||||||
let categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
let categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
||||||
try {
|
try {
|
||||||
|
|||||||
Reference in New Issue
Block a user