From 504cf1c9470a4910c0181cf7167e7333d0d040da Mon Sep 17 00:00:00 2001 From: Nils-Johan Gynther Date: Sun, 3 May 2026 14:11:25 +0200 Subject: [PATCH] feat: Exclude monetary and weight tokens from name extraction in receipt parsing Co-authored-by: Copilot --- backend/src/receipt-parsing/receipt-parsing.service.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/backend/src/receipt-parsing/receipt-parsing.service.ts b/backend/src/receipt-parsing/receipt-parsing.service.ts index 625df05..d9fda90 100644 --- a/backend/src/receipt-parsing/receipt-parsing.service.ts +++ b/backend/src/receipt-parsing/receipt-parsing.service.ts @@ -99,6 +99,8 @@ function isLikelyNameLikeText(value: string): boolean { if (NON_NAME_TOKENS.has(token)) return false; if (/^\d+$/.test(token)) return false; if (/^\d+(?:[\.,]\d+)?$/.test(token)) return false; + // Exkludera kvitto-token som "98kr", "997kg", "15st" + if (/^\d+(?:[\.,]\d+)?(?:kr|sek|kg|g|mg|l|dl|cl|ml|st|fp|pkt|pak|förp)$/.test(token)) return false; return /[a-zåäö]/i.test(token); }); @@ -109,6 +111,9 @@ function extractNameCandidate(line: string): string | null { const cleaned = line .replace(/\b\d+\s*[x×]\s*\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg)\b/gi, ' ') .replace(/\b\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg|st|fp|pkt|pak|förp)\b/gi, ' ') + .replace(/\b\d+(?:[\.,]\d+)?\s*(kr|sek)\s*\/\s*(kg|g|mg|l|dl|cl|ml)\b/gi, ' ') + .replace(/\b(kr|sek)\s*\/\s*(kg|g|mg|l|dl|cl|ml)\b/gi, ' ') + .replace(/\b(kr|sek)\b/gi, ' ') .replace(/\b\d+(?:[\.,]\d{2})\b/g, ' ') .replace(/[|*]/g, ' ') .replace(/\s+/g, ' ')