feat: Exclude monetary and weight tokens from name extraction in receipt parsing
Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
@@ -99,6 +99,8 @@ function isLikelyNameLikeText(value: string): boolean {
|
|||||||
if (NON_NAME_TOKENS.has(token)) return false;
|
if (NON_NAME_TOKENS.has(token)) return false;
|
||||||
if (/^\d+$/.test(token)) return false;
|
if (/^\d+$/.test(token)) return false;
|
||||||
if (/^\d+(?:[\.,]\d+)?$/.test(token)) return false;
|
if (/^\d+(?:[\.,]\d+)?$/.test(token)) return false;
|
||||||
|
// Exkludera kvitto-token som "98kr", "997kg", "15st"
|
||||||
|
if (/^\d+(?:[\.,]\d+)?(?:kr|sek|kg|g|mg|l|dl|cl|ml|st|fp|pkt|pak|förp)$/.test(token)) return false;
|
||||||
return /[a-zåäö]/i.test(token);
|
return /[a-zåäö]/i.test(token);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -109,6 +111,9 @@ function extractNameCandidate(line: string): string | null {
|
|||||||
const cleaned = line
|
const cleaned = line
|
||||||
.replace(/\b\d+\s*[x×]\s*\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg)\b/gi, ' ')
|
.replace(/\b\d+\s*[x×]\s*\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg)\b/gi, ' ')
|
||||||
.replace(/\b\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg|st|fp|pkt|pak|förp)\b/gi, ' ')
|
.replace(/\b\d+(?:[\.,]\d+)?\s*(ml|cl|dl|l|g|kg|st|fp|pkt|pak|förp)\b/gi, ' ')
|
||||||
|
.replace(/\b\d+(?:[\.,]\d+)?\s*(kr|sek)\s*\/\s*(kg|g|mg|l|dl|cl|ml)\b/gi, ' ')
|
||||||
|
.replace(/\b(kr|sek)\s*\/\s*(kg|g|mg|l|dl|cl|ml)\b/gi, ' ')
|
||||||
|
.replace(/\b(kr|sek)\b/gi, ' ')
|
||||||
.replace(/\b\d+(?:[\.,]\d{2})\b/g, ' ')
|
.replace(/\b\d+(?:[\.,]\d{2})\b/g, ' ')
|
||||||
.replace(/[|*]/g, ' ')
|
.replace(/[|*]/g, ' ')
|
||||||
.replace(/\s+/g, ' ')
|
.replace(/\s+/g, ' ')
|
||||||
|
|||||||
Reference in New Issue
Block a user