From 2d94a83e73d51c62e36792ebac08cc00ce905b43 Mon Sep 17 00:00:00 2001 From: Nils-Johan Gynther Date: Thu, 21 May 2026 14:44:37 +0200 Subject: [PATCH] feat(flyer-import): add cheese variant splitting and normalization rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add logic to split Swedish cheese variants (Präst, Herrgård, Grevé) into separate products - Implement brand normalization for "Arla Ko" and category assignment to "Hårdost" - Update flyer parser with detailed rules for bundle/group announcements - Add unit tests for variant splitting and brand/category normalization - Replace single-item return with flatMap for expanded product lists --- .../services/ai-flyer-parser.service.ts | 47 ++++++++++ .../services/flyer-normalizer.service.spec.ts | 36 ++++++++ .../services/flyer-normalizer.service.ts | 87 +++++++++++++++++-- 3 files changed, 161 insertions(+), 9 deletions(-) diff --git a/backend/src/flyer-import/services/ai-flyer-parser.service.ts b/backend/src/flyer-import/services/ai-flyer-parser.service.ts index 2a4d6b7b..8b4712fa 100644 --- a/backend/src/flyer-import/services/ai-flyer-parser.service.ts +++ b/backend/src/flyer-import/services/ai-flyer-parser.service.ts @@ -182,12 +182,23 @@ Returnera objekt med exakt dessa fält: - unit: string | null (enhet for jamforpris, t.ex. kg/l/st) - offer: string[] +Arbetssatt (viktigt): +Steg A) Identifiera om texten ar en gruppannons med flera varianter + gemensamma attribut. +Steg B) Returnera en post per faktisk produktvariant med arvd metadata. + Regler: 1) Vanlig produkt (ej bundle): isBundle=false, bundleWeight=null, bundleItems=[]. 2) Kombipaket/bundle: isBundle=true, name ska vara paketets huvudnamn, bundleWeight totalvikt. 3) For bundle ska bundleItems innehalla de ingaende produkterna, t.ex. ["Chumlax 3x100g", "Alaska pollock 3x100g"]. 4) price ar priset for hela forpackningen. comparisonPrice ar jamforpris som tal ("83:17" -> 83.17). 5) offer innehaller kampanjtext som "Max 10 kop/hushall". +6) Om en rubrik/lista innehaller flera kommaseparerade namn och efterfoljande rad/rader innehaller gemensam brand, vikt, pris eller kampanjvillkor: expandera till separata objekt (en per namn) och arv all gemensam metadata. +7) Tillämpa samma split-regel generellt for liknande tillbud (inte bara ost), nar listan tydligt representerar produktvarianter/smaker/sorter. +8) Splitta INTE om listan snarare ar ingredienser, avdelningar, eller otydlig marknadsforing utan tydlig produktvariant. +9) Specialregel ost: namn som PRAST/HERRGARD/GREVE ska normaliseras till Prastost/Herrgardsost/Greveost. +10) Om texten innehaller "ARLA KO" ska brand vara exakt "Arla Ko". +11) For ovan ostsorter ska category vara "Hardost". +12) Returnera aldrig extra nycklar, text, markdown eller forklaringar utanfor JSON-arrayen. Exempel bundle utdata: [ @@ -223,6 +234,42 @@ Exempel enkel produkt utdata: } ] +Exempel gruppannons med varianter (ska splittas): +Input-idé: "PRAST, HERRGARD, GREVE" + "ARLA KO" + gemensam vikt/pris. +Output-idé: +[ + { + "name": "Prastost", + "brand": "Arla Ko", + "category": "Hardost", + "isBundle": false, + "weight": "667g", + "bundleWeight": null, + "bundleItems": [], + "price": null, + "comparisonPrice": 79.90, + "unit": "kg", + "offer": ["Max 3 forp/hushall"] + }, + { + "name": "Herrgardsost", + "brand": "Arla Ko", + "category": "Hardost", + "isBundle": false, + "weight": "667g", + "bundleWeight": null, + "bundleItems": [], + "price": null, + "comparisonPrice": 79.90, + "unit": "kg", + "offer": ["Max 3 forp/hushall"] + } +] + +Exempel negativt fall (ska INTE splittas): +Input-idé: "Ingredienser: tomat, lok, vitlok". +Output-idé: en produktpost (ingen variant-expansion). + Text att tolka: ${truncatedText}`; } diff --git a/backend/src/flyer-import/services/flyer-normalizer.service.spec.ts b/backend/src/flyer-import/services/flyer-normalizer.service.spec.ts index 11cf0b7d..1dd66db8 100644 --- a/backend/src/flyer-import/services/flyer-normalizer.service.spec.ts +++ b/backend/src/flyer-import/services/flyer-normalizer.service.spec.ts @@ -105,5 +105,41 @@ describe('FlyerNormalizerService', () => { const result2 = service.normalize(undefined as any); expect(result2).toEqual([]); }); + + it('splits listed cheese variants into separate products', () => { + const items = [ + { + rawName: 'PRÄST®, HERRGÅRD®, GREVÉ®', + brand: 'ARLA KO', + unit: 'kg', + comparisonPrice: '79,90', + offer: ['Max 3 förp/hushåll'], + }, + ]; + + const result = service.normalize(items); + + expect(result).toHaveLength(3); + expect(result.map((item) => item.rawName)).toEqual(['Prastost', 'Herrgardsost', 'Greveost']); + expect(result.every((item) => item.brand === 'Arla Ko')).toBe(true); + expect(result.every((item) => item.categoryHint === 'Hårdost')).toBe(true); + expect(result[0].parseReasons).toContain('split_cheese_variants'); + }); + + it('keeps single cheese item unsplit but normalizes brand/category', () => { + const items = [ + { + rawName: 'Prästost', + brand: 'arla ko', + }, + ]; + + const result = service.normalize(items); + + expect(result).toHaveLength(1); + expect(result[0].rawName).toBe('Prästost'); + expect(result[0].brand).toBe('Arla Ko'); + expect(result[0].categoryHint).toBe('Hårdost'); + }); }); }); diff --git a/backend/src/flyer-import/services/flyer-normalizer.service.ts b/backend/src/flyer-import/services/flyer-normalizer.service.ts index 8c03b92a..a3133ea5 100644 --- a/backend/src/flyer-import/services/flyer-normalizer.service.ts +++ b/backend/src/flyer-import/services/flyer-normalizer.service.ts @@ -23,6 +23,11 @@ export class FlyerNormalizerService { private readonly logger = new Logger(FlyerNormalizerService.name); private readonly MAX_BUNDLE_ITEMS = 20; private readonly MAX_BUNDLE_ITEM_LENGTH = 120; + private readonly CHEESE_VARIANT_TO_NAME: Record = { + prast: 'Prastost', + herrgard: 'Herrgardsost', + greve: 'Greveost', + }; private readonly UNIT_MAPPING: Record = { // Längd @@ -61,29 +66,30 @@ export class FlyerNormalizerService { } return items - .map((item, idx) => this.normalizeItem(item, idx)) + .flatMap((item, idx) => this.normalizeItem(item, idx)) .filter((item): item is NormalizedFlyerItem => item !== null); } - private normalizeItem(item: any, index: number): NormalizedFlyerItem | null { + private normalizeItem(item: any, index: number): Array { if (!item || typeof item !== 'object') { this.logger.warn(`Item ${index} is not an object, skipping`); - return null; + return [null]; } const rawName = this.extractString(item.rawName) || this.extractString(item.name); if (!rawName) { this.logger.warn(`Item ${index} has no name, skipping`); - return null; + return [null]; } const normalizedName = this.extractString(item.normalizedName) || this.normalizeName(rawName); - - return { + const normalizedBrand = this.normalizeBrand(this.extractString(item.brand), rawName); + const categoryHint = this.normalizeCategory(this.extractString(item.category), rawName); + const baseItem: NormalizedFlyerItem = { rawName, normalizedName, - brand: this.extractString(item.brand), - categoryHint: this.normalizeCategory(this.extractString(item.category)), + brand: normalizedBrand, + categoryHint, price: this.extractPrice(item.price), priceUnit: this.normalizeUnit(this.extractString(item.unit)), comparisonPrice: this.extractPrice(item.comparisonPrice), @@ -98,6 +104,13 @@ export class FlyerNormalizerService { ? item.reasonCodes.map(String) : ['normalized'], }; + + const expandedItems = this.expandCheeseVariants(baseItem); + if (expandedItems.length > 0) { + return expandedItems; + } + + return [baseItem]; } private extractString(val: any): string | null { @@ -138,7 +151,10 @@ export class FlyerNormalizerService { return this.UNIT_MAPPING[cleaned] ?? null; } - private normalizeCategory(category: string | null): string | null { + private normalizeCategory(category: string | null, rawName?: string): string | null { + if (this.containsSwedishCheeseVariant(rawName)) { + return 'Hårdost'; + } if (!category) return null; const normalized = category.trim().toLowerCase(); @@ -162,6 +178,59 @@ export class FlyerNormalizerService { return categoryMap[normalized] ?? null; } + private normalizeBrand(brand: string | null, rawName?: string): string | null { + const value = `${brand ?? ''} ${rawName ?? ''}`.trim().toLowerCase(); + if (value.includes('arla ko')) { + return 'Arla Ko'; + } + return brand; + } + + private containsSwedishCheeseVariant(value?: string | null): boolean { + if (!value) return false; + const normalized = this.stripDiacritics(value.toLowerCase()); + return ['prast', 'herrgard', 'greve'].some((token) => normalized.includes(token)); + } + + private expandCheeseVariants(item: NormalizedFlyerItem): NormalizedFlyerItem[] { + if (item.isBundle) return []; + + const normalizedRaw = this.stripDiacritics(item.rawName.toLowerCase()); + const tokens = normalizedRaw + .split(/[,/&]|\boch\b|\band\b/g) + .map((part) => part.trim()) + .filter(Boolean); + + const variants = Array.from( + new Set( + tokens + .map((token) => token.replace(/[^a-z0-9\s]/g, '')) + .flatMap((token) => Object.keys(this.CHEESE_VARIANT_TO_NAME).filter((key) => token.includes(key))), + ), + ); + + if (variants.length <= 1) { + return []; + } + + return variants.map((variant) => { + const productName = this.CHEESE_VARIANT_TO_NAME[variant]; + return { + ...item, + rawName: productName, + normalizedName: this.normalizeName(productName), + categoryHint: 'Hårdost', + parseReasons: [...item.parseReasons, 'split_cheese_variants'], + }; + }); + } + + private stripDiacritics(value: string): string { + return value + .normalize('NFD') + .replace(/[\u0300-\u036f]/g, ''); + } + private normalizeOfferText(offer: any): string | null { if (!offer) return null;