feat(flyer-import): add cheese variant splitting and normalization rules
- Add logic to split Swedish cheese variants (Präst, Herrgård, Grevé) into separate products - Implement brand normalization for "Arla Ko" and category assignment to "Hårdost" - Update flyer parser with detailed rules for bundle/group announcements - Add unit tests for variant splitting and brand/category normalization - Replace single-item return with flatMap for expanded product lists
This commit is contained in:
@@ -182,12 +182,23 @@ Returnera objekt med exakt dessa fält:
|
||||
- unit: string | null (enhet for jamforpris, t.ex. kg/l/st)
|
||||
- offer: string[]
|
||||
|
||||
Arbetssatt (viktigt):
|
||||
Steg A) Identifiera om texten ar en gruppannons med flera varianter + gemensamma attribut.
|
||||
Steg B) Returnera en post per faktisk produktvariant med arvd metadata.
|
||||
|
||||
Regler:
|
||||
1) Vanlig produkt (ej bundle): isBundle=false, bundleWeight=null, bundleItems=[].
|
||||
2) Kombipaket/bundle: isBundle=true, name ska vara paketets huvudnamn, bundleWeight totalvikt.
|
||||
3) For bundle ska bundleItems innehalla de ingaende produkterna, t.ex. ["Chumlax 3x100g", "Alaska pollock 3x100g"].
|
||||
4) price ar priset for hela forpackningen. comparisonPrice ar jamforpris som tal ("83:17" -> 83.17).
|
||||
5) offer innehaller kampanjtext som "Max 10 kop/hushall".
|
||||
6) Om en rubrik/lista innehaller flera kommaseparerade namn och efterfoljande rad/rader innehaller gemensam brand, vikt, pris eller kampanjvillkor: expandera till separata objekt (en per namn) och arv all gemensam metadata.
|
||||
7) Tillämpa samma split-regel generellt for liknande tillbud (inte bara ost), nar listan tydligt representerar produktvarianter/smaker/sorter.
|
||||
8) Splitta INTE om listan snarare ar ingredienser, avdelningar, eller otydlig marknadsforing utan tydlig produktvariant.
|
||||
9) Specialregel ost: namn som PRAST/HERRGARD/GREVE ska normaliseras till Prastost/Herrgardsost/Greveost.
|
||||
10) Om texten innehaller "ARLA KO" ska brand vara exakt "Arla Ko".
|
||||
11) For ovan ostsorter ska category vara "Hardost".
|
||||
12) Returnera aldrig extra nycklar, text, markdown eller forklaringar utanfor JSON-arrayen.
|
||||
|
||||
Exempel bundle utdata:
|
||||
[
|
||||
@@ -223,6 +234,42 @@ Exempel enkel produkt utdata:
|
||||
}
|
||||
]
|
||||
|
||||
Exempel gruppannons med varianter (ska splittas):
|
||||
Input-idé: "PRAST, HERRGARD, GREVE" + "ARLA KO" + gemensam vikt/pris.
|
||||
Output-idé:
|
||||
[
|
||||
{
|
||||
"name": "Prastost",
|
||||
"brand": "Arla Ko",
|
||||
"category": "Hardost",
|
||||
"isBundle": false,
|
||||
"weight": "667g",
|
||||
"bundleWeight": null,
|
||||
"bundleItems": [],
|
||||
"price": null,
|
||||
"comparisonPrice": 79.90,
|
||||
"unit": "kg",
|
||||
"offer": ["Max 3 forp/hushall"]
|
||||
},
|
||||
{
|
||||
"name": "Herrgardsost",
|
||||
"brand": "Arla Ko",
|
||||
"category": "Hardost",
|
||||
"isBundle": false,
|
||||
"weight": "667g",
|
||||
"bundleWeight": null,
|
||||
"bundleItems": [],
|
||||
"price": null,
|
||||
"comparisonPrice": 79.90,
|
||||
"unit": "kg",
|
||||
"offer": ["Max 3 forp/hushall"]
|
||||
}
|
||||
]
|
||||
|
||||
Exempel negativt fall (ska INTE splittas):
|
||||
Input-idé: "Ingredienser: tomat, lok, vitlok".
|
||||
Output-idé: en produktpost (ingen variant-expansion).
|
||||
|
||||
Text att tolka:
|
||||
${truncatedText}`;
|
||||
}
|
||||
|
||||
@@ -105,5 +105,41 @@ describe('FlyerNormalizerService', () => {
|
||||
const result2 = service.normalize(undefined as any);
|
||||
expect(result2).toEqual([]);
|
||||
});
|
||||
|
||||
it('splits listed cheese variants into separate products', () => {
|
||||
const items = [
|
||||
{
|
||||
rawName: 'PRÄST®, HERRGÅRD®, GREVÉ®',
|
||||
brand: 'ARLA KO',
|
||||
unit: 'kg',
|
||||
comparisonPrice: '79,90',
|
||||
offer: ['Max 3 förp/hushåll'],
|
||||
},
|
||||
];
|
||||
|
||||
const result = service.normalize(items);
|
||||
|
||||
expect(result).toHaveLength(3);
|
||||
expect(result.map((item) => item.rawName)).toEqual(['Prastost', 'Herrgardsost', 'Greveost']);
|
||||
expect(result.every((item) => item.brand === 'Arla Ko')).toBe(true);
|
||||
expect(result.every((item) => item.categoryHint === 'Hårdost')).toBe(true);
|
||||
expect(result[0].parseReasons).toContain('split_cheese_variants');
|
||||
});
|
||||
|
||||
it('keeps single cheese item unsplit but normalizes brand/category', () => {
|
||||
const items = [
|
||||
{
|
||||
rawName: 'Prästost',
|
||||
brand: 'arla ko',
|
||||
},
|
||||
];
|
||||
|
||||
const result = service.normalize(items);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].rawName).toBe('Prästost');
|
||||
expect(result[0].brand).toBe('Arla Ko');
|
||||
expect(result[0].categoryHint).toBe('Hårdost');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -23,6 +23,11 @@ export class FlyerNormalizerService {
|
||||
private readonly logger = new Logger(FlyerNormalizerService.name);
|
||||
private readonly MAX_BUNDLE_ITEMS = 20;
|
||||
private readonly MAX_BUNDLE_ITEM_LENGTH = 120;
|
||||
private readonly CHEESE_VARIANT_TO_NAME: Record<string, string> = {
|
||||
prast: 'Prastost',
|
||||
herrgard: 'Herrgardsost',
|
||||
greve: 'Greveost',
|
||||
};
|
||||
|
||||
private readonly UNIT_MAPPING: Record<string, string> = {
|
||||
// Längd
|
||||
@@ -61,29 +66,30 @@ export class FlyerNormalizerService {
|
||||
}
|
||||
|
||||
return items
|
||||
.map((item, idx) => this.normalizeItem(item, idx))
|
||||
.flatMap((item, idx) => this.normalizeItem(item, idx))
|
||||
.filter((item): item is NormalizedFlyerItem => item !== null);
|
||||
}
|
||||
|
||||
private normalizeItem(item: any, index: number): NormalizedFlyerItem | null {
|
||||
private normalizeItem(item: any, index: number): Array<NormalizedFlyerItem | null> {
|
||||
if (!item || typeof item !== 'object') {
|
||||
this.logger.warn(`Item ${index} is not an object, skipping`);
|
||||
return null;
|
||||
return [null];
|
||||
}
|
||||
|
||||
const rawName = this.extractString(item.rawName) || this.extractString(item.name);
|
||||
if (!rawName) {
|
||||
this.logger.warn(`Item ${index} has no name, skipping`);
|
||||
return null;
|
||||
return [null];
|
||||
}
|
||||
|
||||
const normalizedName = this.extractString(item.normalizedName) || this.normalizeName(rawName);
|
||||
|
||||
return {
|
||||
const normalizedBrand = this.normalizeBrand(this.extractString(item.brand), rawName);
|
||||
const categoryHint = this.normalizeCategory(this.extractString(item.category), rawName);
|
||||
const baseItem: NormalizedFlyerItem = {
|
||||
rawName,
|
||||
normalizedName,
|
||||
brand: this.extractString(item.brand),
|
||||
categoryHint: this.normalizeCategory(this.extractString(item.category)),
|
||||
brand: normalizedBrand,
|
||||
categoryHint,
|
||||
price: this.extractPrice(item.price),
|
||||
priceUnit: this.normalizeUnit(this.extractString(item.unit)),
|
||||
comparisonPrice: this.extractPrice(item.comparisonPrice),
|
||||
@@ -98,6 +104,13 @@ export class FlyerNormalizerService {
|
||||
? item.reasonCodes.map(String)
|
||||
: ['normalized'],
|
||||
};
|
||||
|
||||
const expandedItems = this.expandCheeseVariants(baseItem);
|
||||
if (expandedItems.length > 0) {
|
||||
return expandedItems;
|
||||
}
|
||||
|
||||
return [baseItem];
|
||||
}
|
||||
|
||||
private extractString(val: any): string | null {
|
||||
@@ -138,7 +151,10 @@ export class FlyerNormalizerService {
|
||||
return this.UNIT_MAPPING[cleaned] ?? null;
|
||||
}
|
||||
|
||||
private normalizeCategory(category: string | null): string | null {
|
||||
private normalizeCategory(category: string | null, rawName?: string): string | null {
|
||||
if (this.containsSwedishCheeseVariant(rawName)) {
|
||||
return 'Hårdost';
|
||||
}
|
||||
if (!category) return null;
|
||||
|
||||
const normalized = category.trim().toLowerCase();
|
||||
@@ -162,6 +178,59 @@ export class FlyerNormalizerService {
|
||||
return categoryMap[normalized] ?? null;
|
||||
}
|
||||
|
||||
private normalizeBrand(brand: string | null, rawName?: string): string | null {
|
||||
const value = `${brand ?? ''} ${rawName ?? ''}`.trim().toLowerCase();
|
||||
if (value.includes('arla ko')) {
|
||||
return 'Arla Ko';
|
||||
}
|
||||
return brand;
|
||||
}
|
||||
|
||||
private containsSwedishCheeseVariant(value?: string | null): boolean {
|
||||
if (!value) return false;
|
||||
const normalized = this.stripDiacritics(value.toLowerCase());
|
||||
return ['prast', 'herrgard', 'greve'].some((token) => normalized.includes(token));
|
||||
}
|
||||
|
||||
private expandCheeseVariants(item: NormalizedFlyerItem): NormalizedFlyerItem[] {
|
||||
if (item.isBundle) return [];
|
||||
|
||||
const normalizedRaw = this.stripDiacritics(item.rawName.toLowerCase());
|
||||
const tokens = normalizedRaw
|
||||
.split(/[,/&]|\boch\b|\band\b/g)
|
||||
.map((part) => part.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
const variants = Array.from(
|
||||
new Set(
|
||||
tokens
|
||||
.map((token) => token.replace(/[^a-z0-9\s]/g, ''))
|
||||
.flatMap((token) => Object.keys(this.CHEESE_VARIANT_TO_NAME).filter((key) => token.includes(key))),
|
||||
),
|
||||
);
|
||||
|
||||
if (variants.length <= 1) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return variants.map((variant) => {
|
||||
const productName = this.CHEESE_VARIANT_TO_NAME[variant];
|
||||
return {
|
||||
...item,
|
||||
rawName: productName,
|
||||
normalizedName: this.normalizeName(productName),
|
||||
categoryHint: 'Hårdost',
|
||||
parseReasons: [...item.parseReasons, 'split_cheese_variants'],
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private stripDiacritics(value: string): string {
|
||||
return value
|
||||
.normalize('NFD')
|
||||
.replace(/[\u0300-\u036f]/g, '');
|
||||
}
|
||||
|
||||
private normalizeOfferText(offer: any): string | null {
|
||||
if (!offer) return null;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user