feat: Exclude monetary and weight tokens from name extraction in receipt parsing
Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
@@ -99,6 +99,8 @@ function isLikelyNameLikeText(value: string): boolean {
|
||||
if (NON_NAME_TOKENS.has(token)) return false;
|
||||
if (/^\d+$/.test(token)) return false;
|
||||
if (/^\d+(?:[\.,]\d+)?$/.test(token)) return false;
|
||||
// Exkludera kvitto-token som "98kr", "997kg", "15st"
|
||||
if (/^\d+(?:[\.,]\d+)?(?:kr|sek|kg|g|mg|l|dl|cl|ml|st|fp|pkt|pak|förp)$/.test(token)) return false;
|
||||
return /[a-zåäö]/i.test(token);
|
||||
});
|
||||
|
||||
@@ -109,6 +111,9 @@ function extractNameCandidate(line: string): string | null {
|
||||
const cleaned = line
|
||||
.replace(/\b\d+\s*[x×]\s*\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg)\b/gi, ' ')
|
||||
.replace(/\b\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg|st|fp|pkt|pak|förp)\b/gi, ' ')
|
||||
.replace(/\b\d+(?:[\.,]\d+)?\s*(kr|sek)\s*\/\s*(kg|g|mg|l|dl|cl|ml)\b/gi, ' ')
|
||||
.replace(/\b(kr|sek)\s*\/\s*(kg|g|mg|l|dl|cl|ml)\b/gi, ' ')
|
||||
.replace(/\b(kr|sek)\b/gi, ' ')
|
||||
.replace(/\b\d+(?:[\.,]\d{2})\b/g, ' ')
|
||||
.replace(/[|*]/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
|
||||
Reference in New Issue
Block a user