feat(flyer-import): add cheese variant splitting and normalization rules
Test Suite / backend-pr-quick (push) Has been skipped
Test Suite / quick-import-pr-quick (push) Has been skipped
Test Suite / backend-full (push) Successful in 5m48s
Test Suite / flutter-quality (push) Failing after 1m9s

- Add logic to split Swedish cheese variants (Präst, Herrgård, Grevé) into separate products
- Implement brand normalization for "Arla Ko" and category assignment to "Hårdost"
- Update flyer parser with detailed rules for bundle/group announcements
- Add unit tests for variant splitting and brand/category normalization
- Replace single-item return with flatMap for expanded product lists
This commit is contained in:
Nils-Johan Gynther
2026-05-21 14:44:37 +02:00
parent 47c89c9915
commit 2d94a83e73
3 changed files with 161 additions and 9 deletions
@@ -182,12 +182,23 @@ Returnera objekt med exakt dessa fält:
- unit: string | null (enhet for jamforpris, t.ex. kg/l/st)
- offer: string[]
Arbetssatt (viktigt):
Steg A) Identifiera om texten ar en gruppannons med flera varianter + gemensamma attribut.
Steg B) Returnera en post per faktisk produktvariant med arvd metadata.
Regler:
1) Vanlig produkt (ej bundle): isBundle=false, bundleWeight=null, bundleItems=[].
2) Kombipaket/bundle: isBundle=true, name ska vara paketets huvudnamn, bundleWeight totalvikt.
3) For bundle ska bundleItems innehalla de ingaende produkterna, t.ex. ["Chumlax 3x100g", "Alaska pollock 3x100g"].
4) price ar priset for hela forpackningen. comparisonPrice ar jamforpris som tal ("83:17" -> 83.17).
5) offer innehaller kampanjtext som "Max 10 kop/hushall".
6) Om en rubrik/lista innehaller flera kommaseparerade namn och efterfoljande rad/rader innehaller gemensam brand, vikt, pris eller kampanjvillkor: expandera till separata objekt (en per namn) och arv all gemensam metadata.
7) Tillämpa samma split-regel generellt for liknande tillbud (inte bara ost), nar listan tydligt representerar produktvarianter/smaker/sorter.
8) Splitta INTE om listan snarare ar ingredienser, avdelningar, eller otydlig marknadsforing utan tydlig produktvariant.
9) Specialregel ost: namn som PRAST/HERRGARD/GREVE ska normaliseras till Prastost/Herrgardsost/Greveost.
10) Om texten innehaller "ARLA KO" ska brand vara exakt "Arla Ko".
11) For ovan ostsorter ska category vara "Hardost".
12) Returnera aldrig extra nycklar, text, markdown eller forklaringar utanfor JSON-arrayen.
Exempel bundle utdata:
[
@@ -223,6 +234,42 @@ Exempel enkel produkt utdata:
}
]
Exempel gruppannons med varianter (ska splittas):
Input-idé: "PRAST, HERRGARD, GREVE" + "ARLA KO" + gemensam vikt/pris.
Output-idé:
[
{
"name": "Prastost",
"brand": "Arla Ko",
"category": "Hardost",
"isBundle": false,
"weight": "667g",
"bundleWeight": null,
"bundleItems": [],
"price": null,
"comparisonPrice": 79.90,
"unit": "kg",
"offer": ["Max 3 forp/hushall"]
},
{
"name": "Herrgardsost",
"brand": "Arla Ko",
"category": "Hardost",
"isBundle": false,
"weight": "667g",
"bundleWeight": null,
"bundleItems": [],
"price": null,
"comparisonPrice": 79.90,
"unit": "kg",
"offer": ["Max 3 forp/hushall"]
}
]
Exempel negativt fall (ska INTE splittas):
Input-idé: "Ingredienser: tomat, lok, vitlok".
Output-idé: en produktpost (ingen variant-expansion).
Text att tolka:
${truncatedText}`;
}
@@ -105,5 +105,41 @@ describe('FlyerNormalizerService', () => {
const result2 = service.normalize(undefined as any);
expect(result2).toEqual([]);
});
it('splits listed cheese variants into separate products', () => {
const items = [
{
rawName: 'PRÄST®, HERRGÅRD®, GREVÉ®',
brand: 'ARLA KO',
unit: 'kg',
comparisonPrice: '79,90',
offer: ['Max 3 förp/hushåll'],
},
];
const result = service.normalize(items);
expect(result).toHaveLength(3);
expect(result.map((item) => item.rawName)).toEqual(['Prastost', 'Herrgardsost', 'Greveost']);
expect(result.every((item) => item.brand === 'Arla Ko')).toBe(true);
expect(result.every((item) => item.categoryHint === 'Hårdost')).toBe(true);
expect(result[0].parseReasons).toContain('split_cheese_variants');
});
it('keeps single cheese item unsplit but normalizes brand/category', () => {
const items = [
{
rawName: 'Prästost',
brand: 'arla ko',
},
];
const result = service.normalize(items);
expect(result).toHaveLength(1);
expect(result[0].rawName).toBe('Prästost');
expect(result[0].brand).toBe('Arla Ko');
expect(result[0].categoryHint).toBe('Hårdost');
});
});
});
@@ -23,6 +23,11 @@ export class FlyerNormalizerService {
private readonly logger = new Logger(FlyerNormalizerService.name);
private readonly MAX_BUNDLE_ITEMS = 20;
private readonly MAX_BUNDLE_ITEM_LENGTH = 120;
private readonly CHEESE_VARIANT_TO_NAME: Record<string, string> = {
prast: 'Prastost',
herrgard: 'Herrgardsost',
greve: 'Greveost',
};
private readonly UNIT_MAPPING: Record<string, string> = {
// Längd
@@ -61,29 +66,30 @@ export class FlyerNormalizerService {
}
return items
.map((item, idx) => this.normalizeItem(item, idx))
.flatMap((item, idx) => this.normalizeItem(item, idx))
.filter((item): item is NormalizedFlyerItem => item !== null);
}
private normalizeItem(item: any, index: number): NormalizedFlyerItem | null {
private normalizeItem(item: any, index: number): Array<NormalizedFlyerItem | null> {
if (!item || typeof item !== 'object') {
this.logger.warn(`Item ${index} is not an object, skipping`);
return null;
return [null];
}
const rawName = this.extractString(item.rawName) || this.extractString(item.name);
if (!rawName) {
this.logger.warn(`Item ${index} has no name, skipping`);
return null;
return [null];
}
const normalizedName = this.extractString(item.normalizedName) || this.normalizeName(rawName);
return {
const normalizedBrand = this.normalizeBrand(this.extractString(item.brand), rawName);
const categoryHint = this.normalizeCategory(this.extractString(item.category), rawName);
const baseItem: NormalizedFlyerItem = {
rawName,
normalizedName,
brand: this.extractString(item.brand),
categoryHint: this.normalizeCategory(this.extractString(item.category)),
brand: normalizedBrand,
categoryHint,
price: this.extractPrice(item.price),
priceUnit: this.normalizeUnit(this.extractString(item.unit)),
comparisonPrice: this.extractPrice(item.comparisonPrice),
@@ -98,6 +104,13 @@ export class FlyerNormalizerService {
? item.reasonCodes.map(String)
: ['normalized'],
};
const expandedItems = this.expandCheeseVariants(baseItem);
if (expandedItems.length > 0) {
return expandedItems;
}
return [baseItem];
}
private extractString(val: any): string | null {
@@ -138,7 +151,10 @@ export class FlyerNormalizerService {
return this.UNIT_MAPPING[cleaned] ?? null;
}
private normalizeCategory(category: string | null): string | null {
private normalizeCategory(category: string | null, rawName?: string): string | null {
if (this.containsSwedishCheeseVariant(rawName)) {
return 'Hårdost';
}
if (!category) return null;
const normalized = category.trim().toLowerCase();
@@ -162,6 +178,59 @@ export class FlyerNormalizerService {
return categoryMap[normalized] ?? null;
}
private normalizeBrand(brand: string | null, rawName?: string): string | null {
const value = `${brand ?? ''} ${rawName ?? ''}`.trim().toLowerCase();
if (value.includes('arla ko')) {
return 'Arla Ko';
}
return brand;
}
private containsSwedishCheeseVariant(value?: string | null): boolean {
if (!value) return false;
const normalized = this.stripDiacritics(value.toLowerCase());
return ['prast', 'herrgard', 'greve'].some((token) => normalized.includes(token));
}
private expandCheeseVariants(item: NormalizedFlyerItem): NormalizedFlyerItem[] {
if (item.isBundle) return [];
const normalizedRaw = this.stripDiacritics(item.rawName.toLowerCase());
const tokens = normalizedRaw
.split(/[,/&]|\boch\b|\band\b/g)
.map((part) => part.trim())
.filter(Boolean);
const variants = Array.from(
new Set(
tokens
.map((token) => token.replace(/[^a-z0-9\s]/g, ''))
.flatMap((token) => Object.keys(this.CHEESE_VARIANT_TO_NAME).filter((key) => token.includes(key))),
),
);
if (variants.length <= 1) {
return [];
}
return variants.map((variant) => {
const productName = this.CHEESE_VARIANT_TO_NAME[variant];
return {
...item,
rawName: productName,
normalizedName: this.normalizeName(productName),
categoryHint: 'Hårdost',
parseReasons: [...item.parseReasons, 'split_cheese_variants'],
};
});
}
private stripDiacritics(value: string): string {
return value
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '');
}
private normalizeOfferText(offer: any): string | null {
if (!offer) return null;