feat: enhance product matching logic with improved scoring and tokenization
This commit is contained in:
@@ -12,6 +12,26 @@ import { CategoriesService } from '../categories/categories.service';
|
|||||||
const IMPORTER_SERVICE_URL =
|
const IMPORTER_SERVICE_URL =
|
||||||
process.env.IMPORTER_SERVICE_URL || 'http://importer-api:3001';
|
process.env.IMPORTER_SERVICE_URL || 'http://importer-api:3001';
|
||||||
|
|
||||||
|
const WEAK_DESCRIPTORS = new Set([
|
||||||
|
'rokt',
|
||||||
|
'rökt',
|
||||||
|
'kokt',
|
||||||
|
'grillad',
|
||||||
|
'stekt',
|
||||||
|
'skivad',
|
||||||
|
'strimlad',
|
||||||
|
'fryst',
|
||||||
|
'farsk',
|
||||||
|
'färsk',
|
||||||
|
]);
|
||||||
|
|
||||||
|
function tokenize(value: string): string[] {
|
||||||
|
return value
|
||||||
|
.toLowerCase()
|
||||||
|
.split(/[^a-z0-9åäö]+/)
|
||||||
|
.filter((w) => w.length >= 3);
|
||||||
|
}
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class ReceiptImportService {
|
export class ReceiptImportService {
|
||||||
private readonly logger = new Logger(ReceiptImportService.name);
|
private readonly logger = new Logger(ReceiptImportService.name);
|
||||||
@@ -120,20 +140,71 @@ export class ReceiptImportService {
|
|||||||
products: { id: number; name: string; canonicalName: string | null }[],
|
products: { id: number; name: string; canonicalName: string | null }[],
|
||||||
): { id: number; name: string; canonicalName: string | null } | undefined {
|
): { id: number; name: string; canonicalName: string | null } | undefined {
|
||||||
// Dela upp kvittonamnet i ord (min 3 tecken)
|
// Dela upp kvittonamnet i ord (min 3 tecken)
|
||||||
const rawWords = raw.split(/[\s\-_]+/).filter((w) => w.length >= 3);
|
const rawWords = tokenize(raw);
|
||||||
if (rawWords.length === 0) return undefined;
|
if (rawWords.length === 0) return undefined;
|
||||||
|
|
||||||
// Fortsätt med att hitta produkter där ett produktnamn-ord finns i kvittonamnet
|
const rawWordSet = new Set(rawWords);
|
||||||
// Exempel: produktord "ost" finns i kvittoord "prästost", "herrgårdsost", "brieost"
|
|
||||||
return products.find((p) => {
|
let best:
|
||||||
const productWords = (p.canonicalName ?? p.name)
|
| { product: { id: number; name: string; canonicalName: string | null }; score: number }
|
||||||
.toLowerCase()
|
| undefined;
|
||||||
.split(/[\s\-_]+/)
|
|
||||||
.filter((w) => w.length >= 3);
|
for (const product of products) {
|
||||||
return productWords.some((pw) =>
|
const productWords = tokenize(product.canonicalName ?? product.name);
|
||||||
rawWords.some((rw) => rw.includes(pw) || pw.includes(rw)),
|
if (productWords.length === 0) continue;
|
||||||
);
|
|
||||||
});
|
let score = 0;
|
||||||
|
let exactStrong = 0;
|
||||||
|
let exactAny = 0;
|
||||||
|
let partialStrong = 0;
|
||||||
|
|
||||||
|
const phrase = (product.canonicalName ?? product.name).toLowerCase();
|
||||||
|
if (raw.includes(phrase)) {
|
||||||
|
score += 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const pw of productWords) {
|
||||||
|
const isWeak = WEAK_DESCRIPTORS.has(pw);
|
||||||
|
|
||||||
|
if (rawWordSet.has(pw)) {
|
||||||
|
exactAny += 1;
|
||||||
|
if (isWeak) {
|
||||||
|
score += 1;
|
||||||
|
} else {
|
||||||
|
exactStrong += 1;
|
||||||
|
score += 8;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delmatchning tillåts bara för ord med minst 4 tecken.
|
||||||
|
if (pw.length < 4) continue;
|
||||||
|
|
||||||
|
const hasPartial = rawWords.some((rw) => rw.includes(pw) || pw.includes(rw));
|
||||||
|
if (!hasPartial) continue;
|
||||||
|
|
||||||
|
if (isWeak) {
|
||||||
|
// Deskriptiva ord (t.ex. rökt) ska inte driva förslag ensamma.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
partialStrong += 1;
|
||||||
|
score += 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kräv antingen minst ett starkt exakt ord, eller flera samverkande signaler.
|
||||||
|
const hasStrongSignal = exactStrong >= 1 || exactAny + partialStrong >= 2;
|
||||||
|
if (!hasStrongSignal) continue;
|
||||||
|
|
||||||
|
// Tröskel för att undvika svaga enkelträffar.
|
||||||
|
if (score < 8) continue;
|
||||||
|
|
||||||
|
if (!best || score > best.score) {
|
||||||
|
best = { product, score };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return best?.product;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async enrichWithAiCategories(items: ParsedReceiptItem[]): Promise<ParsedReceiptItem[]> {
|
private async enrichWithAiCategories(items: ParsedReceiptItem[]): Promise<ParsedReceiptItem[]> {
|
||||||
|
|||||||
Reference in New Issue
Block a user