feat: implement unified matching and categorization for receipt items with detailed debug information
Test Suite / test (24.15.0) (push) Has been cancelled
Test Suite / test (24.15.0) (push) Has been cancelled
This commit is contained in:
@@ -158,11 +158,96 @@ export class ReceiptImportService {
|
||||
// Steg 1: Delegera AI-parsning till microservice-importer
|
||||
const rawItems = await this.parseReceiptViaImporter(file);
|
||||
|
||||
// Steg 2: Matchning mot produktdatabas (kräver DB — stannar i recipe-app)
|
||||
const matched = await this.matchProducts(rawItems, userId);
|
||||
// Steg 2 & 3: Unified matching + categorization
|
||||
// Samla context en gång för alla items
|
||||
const context = await this.prepareMatchingContext(userId);
|
||||
|
||||
// Steg 3: Regel + AI-kategorisering för alla användare
|
||||
return this.enrichWithAiCategories(matched, userId);
|
||||
// Mappa alla items genom unified matcher
|
||||
return Promise.all(
|
||||
rawItems.map((item) =>
|
||||
this.matchAndEnrichReceiptItem(item, context),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
private async prepareMatchingContext(userId?: number) {
|
||||
type UnitMappingLite = { productId: number; originalUnit: string; preferredUnit: string };
|
||||
type AliasLite = {
|
||||
receiptName: string;
|
||||
product: {
|
||||
id: number;
|
||||
name: string;
|
||||
canonicalName: string | null;
|
||||
categoryRef: { id: number; name: string } | null;
|
||||
};
|
||||
};
|
||||
|
||||
const prismaAny = this.prisma as any;
|
||||
|
||||
const productFilter = userId ? { isActive: true, ownerId: userId } : { isActive: true };
|
||||
const aliasFilter = userId
|
||||
? { OR: [{ ownerId: userId, isGlobal: false }, { isGlobal: true }] }
|
||||
: { isGlobal: true };
|
||||
|
||||
const unitMappingsPromise =
|
||||
userId && prismaAny.unitMapping?.findMany
|
||||
? (prismaAny.unitMapping.findMany({
|
||||
where: { userId },
|
||||
select: { productId: true, originalUnit: true, preferredUnit: true },
|
||||
}) as Promise<UnitMappingLite[]>)
|
||||
: Promise.resolve([] as UnitMappingLite[]);
|
||||
|
||||
let categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
||||
try {
|
||||
categories = await this.categoriesService.findFlattened();
|
||||
} catch {
|
||||
categories = [];
|
||||
}
|
||||
|
||||
const [aliases, products, unitMappings] = await Promise.all([
|
||||
this.prisma.receiptAlias.findMany({
|
||||
where: aliasFilter,
|
||||
orderBy: [{ isGlobal: 'asc' }, { id: 'asc' }],
|
||||
select: {
|
||||
receiptName: true,
|
||||
product: {
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
canonicalName: true,
|
||||
categoryRef: { select: { id: true, name: true } },
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
this.prisma.product.findMany({
|
||||
where: productFilter,
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
canonicalName: true,
|
||||
categoryRef: { select: { id: true, name: true } },
|
||||
},
|
||||
}),
|
||||
unitMappingsPromise,
|
||||
]) as [
|
||||
AliasLite[],
|
||||
Array<{ id: number; name: string; canonicalName: string | null; categoryRef: { id: number; name: string } | null }>,
|
||||
UnitMappingLite[]
|
||||
];
|
||||
|
||||
const user = userId
|
||||
? await this.prisma.user.findUnique({ where: { id: userId }, select: { aiEngineEnabled: true } })
|
||||
: null;
|
||||
|
||||
return {
|
||||
userId,
|
||||
aliases,
|
||||
products,
|
||||
unitMappings,
|
||||
categories,
|
||||
aiEnabled: user?.aiEngineEnabled ?? false,
|
||||
};
|
||||
}
|
||||
|
||||
async upsertUnitMapping(
|
||||
@@ -375,11 +460,12 @@ export class ReceiptImportService {
|
||||
if (item.learnAlias) {
|
||||
const normalizedReceiptName = (item.rawName ?? '').trim().toLowerCase();
|
||||
if (normalizedReceiptName) {
|
||||
const aliasOwnerId: number | null = dto.isAdminLearning ? null : userId || null;
|
||||
await tx.receiptAlias.upsert({
|
||||
where: {
|
||||
receiptName_ownerId_isGlobal: {
|
||||
receiptName: normalizedReceiptName,
|
||||
ownerId: dto.isAdminLearning ? null : userId,
|
||||
ownerId: aliasOwnerId as any,
|
||||
isGlobal: dto.isAdminLearning ? true : false,
|
||||
},
|
||||
},
|
||||
@@ -389,7 +475,7 @@ export class ReceiptImportService {
|
||||
create: {
|
||||
receiptName: normalizedReceiptName,
|
||||
productId,
|
||||
ownerId: dto.isAdminLearning ? null : userId,
|
||||
ownerId: dto.isAdminLearning ? undefined : userId,
|
||||
isGlobal: dto.isAdminLearning ? true : false,
|
||||
},
|
||||
});
|
||||
@@ -635,6 +721,293 @@ export class ReceiptImportService {
|
||||
return best?.product;
|
||||
}
|
||||
|
||||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
// UNIFIED MATCHER: Kombinerar product matching + categorization
|
||||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
|
||||
private async matchAndEnrichReceiptItem(
|
||||
item: ParsedReceiptItem,
|
||||
context: {
|
||||
userId?: number;
|
||||
aliases: Array<{
|
||||
receiptName: string;
|
||||
product: {
|
||||
id: number;
|
||||
name: string;
|
||||
canonicalName: string | null;
|
||||
categoryRef: { id: number; name: string } | null;
|
||||
};
|
||||
}>;
|
||||
products: Array<{
|
||||
id: number;
|
||||
name: string;
|
||||
canonicalName: string | null;
|
||||
categoryRef: { id: number; name: string } | null;
|
||||
}>;
|
||||
unitMappings: Array<{ productId: number; originalUnit: string; preferredUnit: string }>;
|
||||
categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
||||
aiEnabled: boolean;
|
||||
},
|
||||
): Promise<ParsedReceiptItem> {
|
||||
if (!item.rawName) return item;
|
||||
|
||||
const raw = item.rawName.toLowerCase().trim();
|
||||
const debug = { steps: <string[]>[], tree: <Record<string, any>>{} };
|
||||
|
||||
try {
|
||||
// ┌─ STEG 1: Alias-lookup (certifierad match) ─────────────────────────┐
|
||||
debug.steps.push('Step 1: Alias lookup');
|
||||
const aliasMatch = context.aliases.find((a) => a.receiptName === raw);
|
||||
if (aliasMatch) {
|
||||
debug.tree.alias = { found: true, productId: aliasMatch.product.id };
|
||||
debug.steps.push(` ✓ Alias found → productId ${aliasMatch.product.id}`);
|
||||
|
||||
const mappedUnit = context.unitMappings.find(
|
||||
(um) => um.productId === aliasMatch.product.id && um.originalUnit === (item.unit ?? '').trim().toLowerCase(),
|
||||
)?.preferredUnit;
|
||||
|
||||
return {
|
||||
...item,
|
||||
matchedProductId: aliasMatch.product.id,
|
||||
matchedProductName: aliasMatch.product.canonicalName ?? aliasMatch.product.name,
|
||||
unit: mappedUnit ?? item.unit,
|
||||
matchedVia: 'alias' as const,
|
||||
...(aliasMatch.product.categoryRef
|
||||
? {
|
||||
categorySuggestion: {
|
||||
categoryId: aliasMatch.product.categoryRef.id,
|
||||
categoryName: aliasMatch.product.categoryRef.name,
|
||||
path: aliasMatch.product.categoryRef.name,
|
||||
confidence: 'high' as const,
|
||||
usedFallback: false,
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
}
|
||||
debug.steps.push(` ✗ No alias match`);
|
||||
debug.tree.alias = { found: false };
|
||||
|
||||
// ┌─ STEG 2: Ordet-baserad matchning (förslag) ────────────────────────┐
|
||||
debug.steps.push('Step 2: Word match');
|
||||
const wordMatchResult = this.findWordMatchWithScore(raw, context.products);
|
||||
if (wordMatchResult) {
|
||||
debug.tree.wordMatch = { found: true, productId: wordMatchResult.id, score: wordMatchResult.score };
|
||||
debug.steps.push(` ✓ Word match found → productId ${wordMatchResult.id} (score ${wordMatchResult.score})`);
|
||||
|
||||
const unitMapping = context.unitMappings.find(
|
||||
(um) => um.productId === wordMatchResult.id && um.originalUnit === (item.unit ?? '').trim().toLowerCase(),
|
||||
);
|
||||
const preferredUnit = unitMapping ? unitMapping.preferredUnit : item.unit;
|
||||
|
||||
const result: ParsedReceiptItem = {
|
||||
...item,
|
||||
suggestedProductId: wordMatchResult.id,
|
||||
suggestedProductName: wordMatchResult.canonicalName ?? wordMatchResult.name,
|
||||
unit: preferredUnit,
|
||||
matchedVia: 'wordmatch' as const,
|
||||
};
|
||||
|
||||
// Lägg på kategori från produkt om den finns
|
||||
if (wordMatchResult.categoryRef) {
|
||||
result.categorySuggestion = {
|
||||
categoryId: wordMatchResult.categoryRef.id,
|
||||
categoryName: wordMatchResult.categoryRef.name,
|
||||
path: wordMatchResult.categoryRef.name,
|
||||
confidence: 'medium' as const,
|
||||
usedFallback: false,
|
||||
};
|
||||
}
|
||||
|
||||
// Gå vidare till kategorisering för wordmatch
|
||||
return await this.enrichCategoryForItem(result, context, debug);
|
||||
}
|
||||
debug.steps.push(` ✗ No word match`);
|
||||
debug.tree.wordMatch = { found: false };
|
||||
|
||||
// ┌─ STEG 3: Regel-baserad kategorisering (no product match) ──────────┐
|
||||
return await this.enrichCategoryForItem(
|
||||
{ ...item, matchedVia: 'none' as const },
|
||||
context,
|
||||
debug,
|
||||
);
|
||||
} catch (err) {
|
||||
this.logger.warn(`matchAndEnrichReceiptItem error for "${item.rawName}": ${err}`);
|
||||
return item;
|
||||
}
|
||||
}
|
||||
|
||||
private async enrichCategoryForItem(
|
||||
item: ParsedReceiptItem,
|
||||
context: {
|
||||
userId?: number;
|
||||
categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
||||
aiEnabled: boolean;
|
||||
},
|
||||
debug: any,
|
||||
): Promise<ParsedReceiptItem> {
|
||||
debug.steps.push('Step 3: Categorization');
|
||||
|
||||
const signalText = [item.rawName, item.matchedProductName, item.suggestedProductName]
|
||||
.filter((v): v is string => typeof v === 'string' && v.trim().length > 0)
|
||||
.join(' ');
|
||||
|
||||
let nextCategory = item.categorySuggestion ?? null;
|
||||
|
||||
// ┌─ Försök regel-baserad kategorisering ─────────────────────────────┐
|
||||
debug.steps.push(' Trying rule-based categorization');
|
||||
const ruleResult = this.ruleBasedCategorySuggestion(signalText || item.rawName, context.categories);
|
||||
debug.tree.rule = { found: !!ruleResult, path: ruleResult?.path };
|
||||
|
||||
if (ruleResult?.confidence === 'high') {
|
||||
const sameAsExisting = nextCategory && nextCategory.categoryId === ruleResult.categoryId;
|
||||
if (!sameAsExisting) {
|
||||
debug.steps.push(` ✓ Rule-based HIGH: ${ruleResult.path}`);
|
||||
nextCategory = ruleResult;
|
||||
} else {
|
||||
debug.steps.push(` ✓ Rule-based HIGH (same as existing): ${ruleResult.path}`);
|
||||
}
|
||||
} else if (!nextCategory && ruleResult) {
|
||||
debug.steps.push(` ✓ Rule-based fallback: ${ruleResult.path}`);
|
||||
nextCategory = ruleResult;
|
||||
} else {
|
||||
debug.steps.push(` ✗ Rule-based miss or lower priority`);
|
||||
}
|
||||
|
||||
// ┌─ AI-kategorisering som fallback ──────────────────────────────────┐
|
||||
if (!nextCategory) {
|
||||
debug.steps.push(' Trying AI categorization');
|
||||
if (context.aiEnabled) {
|
||||
debug.tree.ai = { called: true };
|
||||
try {
|
||||
nextCategory = await this.aiService.suggestCategory(item.rawName, context.categories);
|
||||
debug.steps.push(` ✓ AI suggestion: ${nextCategory.path}`);
|
||||
} catch (err) {
|
||||
debug.steps.push(` ✗ AI failed: ${err}`);
|
||||
debug.tree.ai = { called: true, error: String(err) };
|
||||
}
|
||||
} else {
|
||||
debug.steps.push(` ✗ AI disabled for user`);
|
||||
debug.tree.ai = { called: false };
|
||||
}
|
||||
}
|
||||
|
||||
// ┌─ Contradiction guard (final sanity check) ────────────────────────┐
|
||||
if (nextCategory) {
|
||||
debug.steps.push(' Applying contradiction guard');
|
||||
const beforePath = nextCategory.path;
|
||||
const guardedCategory = this.applyContradictionGuard(signalText || item.rawName, nextCategory, context.categories);
|
||||
if (guardedCategory && guardedCategory.path !== beforePath) {
|
||||
debug.steps.push(` ⚠️ Guard remapped: ${beforePath} → ${guardedCategory.path}`);
|
||||
nextCategory = guardedCategory;
|
||||
debug.tree.guard = { applied: true, oldPath: beforePath, newPath: guardedCategory.path };
|
||||
} else {
|
||||
debug.steps.push(` ✓ Guard OK`);
|
||||
}
|
||||
}
|
||||
|
||||
// ┌─ Hard overrides (special rules for problematic cases) ─────────────┐
|
||||
if (nextCategory) {
|
||||
debug.steps.push(' Applying hard overrides');
|
||||
const beforePath = nextCategory.path;
|
||||
const finalCategory = this.applyHardCategoryOverrides(signalText || item.rawName, nextCategory, context.categories);
|
||||
if (finalCategory && finalCategory.path !== beforePath) {
|
||||
debug.steps.push(` ⚠️ Override applied: ${beforePath} → ${finalCategory.path}`);
|
||||
nextCategory = finalCategory;
|
||||
debug.tree.hardOverride = { applied: true, oldPath: beforePath, newPath: finalCategory.path };
|
||||
} else {
|
||||
debug.steps.push(` ✓ No hard override needed`);
|
||||
}
|
||||
}
|
||||
|
||||
if (nextCategory) {
|
||||
debug.steps.push(`✅ FINAL: ${nextCategory.path} (${nextCategory.confidence})`);
|
||||
} else {
|
||||
debug.steps.push(`❌ FINAL: No category assigned`);
|
||||
}
|
||||
|
||||
if (this.shouldTraceDecision(signalText || item.rawName)) {
|
||||
this.logger.log(`[ReceiptDecision] ${item.rawName}\n${debug.steps.join('\n')}`);
|
||||
}
|
||||
|
||||
return nextCategory ? { ...item, categorySuggestion: nextCategory } : item;
|
||||
}
|
||||
|
||||
// Helper: findWordMatch som returnerar både product OCH score
|
||||
private findWordMatchWithScore(
|
||||
raw: string,
|
||||
products: Array<{
|
||||
id: number;
|
||||
name: string;
|
||||
canonicalName: string | null;
|
||||
categoryRef: { id: number; name: string } | null;
|
||||
}>,
|
||||
): (typeof products[0] & { score: number }) | undefined {
|
||||
const rawWords = tokenize(raw);
|
||||
if (rawWords.length === 0) return undefined;
|
||||
|
||||
const rawWordSet = new Set(rawWords);
|
||||
const rawWordsNorm = rawWords.map(normalizeToken);
|
||||
const rawWordSetNorm = new Set(rawWordsNorm);
|
||||
|
||||
let best: (typeof products[0] & { score: number }) | undefined;
|
||||
|
||||
for (const product of products) {
|
||||
const productWords = tokenize(product.canonicalName ?? product.name);
|
||||
if (productWords.length === 0) continue;
|
||||
|
||||
let score = 0;
|
||||
let exactStrong = 0;
|
||||
let exactAny = 0;
|
||||
let partialStrong = 0;
|
||||
|
||||
const phrase = (product.canonicalName ?? product.name).toLowerCase();
|
||||
if (raw.includes(phrase)) {
|
||||
score += 5;
|
||||
}
|
||||
|
||||
for (const pw of productWords) {
|
||||
const isWeak = WEAK_DESCRIPTORS.has(pw);
|
||||
const pwNorm = normalizeToken(pw);
|
||||
|
||||
if (rawWordSet.has(pw) || rawWordSetNorm.has(pwNorm)) {
|
||||
exactAny += 1;
|
||||
if (isWeak) {
|
||||
score += 1;
|
||||
} else {
|
||||
exactStrong += 1;
|
||||
score += 8;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pw.length < 4) continue;
|
||||
|
||||
const hasPartial =
|
||||
rawWords.some((rw) => rw.includes(pw) || pw.includes(rw)) ||
|
||||
rawWordsNorm.some((rw) => rw.includes(pwNorm) || pwNorm.includes(rw));
|
||||
if (!hasPartial) continue;
|
||||
|
||||
if (isWeak) continue;
|
||||
|
||||
partialStrong += 1;
|
||||
score += 3;
|
||||
}
|
||||
|
||||
const hasLongPartial = partialStrong >= 1 && productWords.some((pw) => pw.length >= 5);
|
||||
const hasStrongSignal = exactStrong >= 1 || exactAny + partialStrong >= 2 || hasLongPartial;
|
||||
if (!hasStrongSignal) continue;
|
||||
|
||||
if (score < 8) continue;
|
||||
|
||||
if (!best || score > best.score) {
|
||||
best = { ...product, score };
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
private async enrichWithAiCategories(items: ParsedReceiptItem[], userId?: number): Promise<ParsedReceiptItem[]> {
|
||||
let categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user