feat: Exclude monetary and weight tokens from name extraction in receipt parsing

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Nils-Johan Gynther
2026-05-03 14:11:25 +02:00
parent f270f8510e
commit 504cf1c947
@@ -99,6 +99,8 @@ function isLikelyNameLikeText(value: string): boolean {
if (NON_NAME_TOKENS.has(token)) return false; if (NON_NAME_TOKENS.has(token)) return false;
if (/^\d+$/.test(token)) return false; if (/^\d+$/.test(token)) return false;
if (/^\d+(?:[\.,]\d+)?$/.test(token)) return false; if (/^\d+(?:[\.,]\d+)?$/.test(token)) return false;
// Exkludera kvitto-token som "98kr", "997kg", "15st"
if (/^\d+(?:[\.,]\d+)?(?:kr|sek|kg|g|mg|l|dl|cl|ml|st|fp|pkt|pak|förp)$/.test(token)) return false;
return /[a-zåäö]/i.test(token); return /[a-zåäö]/i.test(token);
}); });
@@ -109,6 +111,9 @@ function extractNameCandidate(line: string): string | null {
const cleaned = line const cleaned = line
.replace(/\b\d+\s*[x×]\s*\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg)\b/gi, ' ') .replace(/\b\d+\s*[x×]\s*\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg)\b/gi, ' ')
.replace(/\b\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg|st|fp|pkt|pak|förp)\b/gi, ' ') .replace(/\b\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg|st|fp|pkt|pak|förp)\b/gi, ' ')
.replace(/\b\d+(?:[\.,]\d+)?\s*(kr|sek)\s*\/\s*(kg|g|mg|l|dl|cl|ml)\b/gi, ' ')
.replace(/\b(kr|sek)\s*\/\s*(kg|g|mg|l|dl|cl|ml)\b/gi, ' ')
.replace(/\b(kr|sek)\b/gi, ' ')
.replace(/\b\d+(?:[\.,]\d{2})\b/g, ' ') .replace(/\b\d+(?:[\.,]\d{2})\b/g, ' ')
.replace(/[|*]/g, ' ') .replace(/[|*]/g, ' ')
.replace(/\s+/g, ' ') .replace(/\s+/g, ' ')