feat(flyer-import): add cheese variant splitting and normalization rules
- Add logic to split Swedish cheese variants (Präst, Herrgård, Grevé) into separate products - Implement brand normalization for "Arla Ko" and category assignment to "Hårdost" - Update flyer parser with detailed rules for bundle/group announcements - Add unit tests for variant splitting and brand/category normalization - Replace single-item return with flatMap for expanded product lists
This commit is contained in:
@@ -182,12 +182,23 @@ Returnera objekt med exakt dessa fält:
|
|||||||
- unit: string | null (enhet for jamforpris, t.ex. kg/l/st)
|
- unit: string | null (enhet for jamforpris, t.ex. kg/l/st)
|
||||||
- offer: string[]
|
- offer: string[]
|
||||||
|
|
||||||
|
Arbetssatt (viktigt):
|
||||||
|
Steg A) Identifiera om texten ar en gruppannons med flera varianter + gemensamma attribut.
|
||||||
|
Steg B) Returnera en post per faktisk produktvariant med arvd metadata.
|
||||||
|
|
||||||
Regler:
|
Regler:
|
||||||
1) Vanlig produkt (ej bundle): isBundle=false, bundleWeight=null, bundleItems=[].
|
1) Vanlig produkt (ej bundle): isBundle=false, bundleWeight=null, bundleItems=[].
|
||||||
2) Kombipaket/bundle: isBundle=true, name ska vara paketets huvudnamn, bundleWeight totalvikt.
|
2) Kombipaket/bundle: isBundle=true, name ska vara paketets huvudnamn, bundleWeight totalvikt.
|
||||||
3) For bundle ska bundleItems innehalla de ingaende produkterna, t.ex. ["Chumlax 3x100g", "Alaska pollock 3x100g"].
|
3) For bundle ska bundleItems innehalla de ingaende produkterna, t.ex. ["Chumlax 3x100g", "Alaska pollock 3x100g"].
|
||||||
4) price ar priset for hela forpackningen. comparisonPrice ar jamforpris som tal ("83:17" -> 83.17).
|
4) price ar priset for hela forpackningen. comparisonPrice ar jamforpris som tal ("83:17" -> 83.17).
|
||||||
5) offer innehaller kampanjtext som "Max 10 kop/hushall".
|
5) offer innehaller kampanjtext som "Max 10 kop/hushall".
|
||||||
|
6) Om en rubrik/lista innehaller flera kommaseparerade namn och efterfoljande rad/rader innehaller gemensam brand, vikt, pris eller kampanjvillkor: expandera till separata objekt (en per namn) och arv all gemensam metadata.
|
||||||
|
7) Tillämpa samma split-regel generellt for liknande tillbud (inte bara ost), nar listan tydligt representerar produktvarianter/smaker/sorter.
|
||||||
|
8) Splitta INTE om listan snarare ar ingredienser, avdelningar, eller otydlig marknadsforing utan tydlig produktvariant.
|
||||||
|
9) Specialregel ost: namn som PRAST/HERRGARD/GREVE ska normaliseras till Prastost/Herrgardsost/Greveost.
|
||||||
|
10) Om texten innehaller "ARLA KO" ska brand vara exakt "Arla Ko".
|
||||||
|
11) For ovan ostsorter ska category vara "Hardost".
|
||||||
|
12) Returnera aldrig extra nycklar, text, markdown eller forklaringar utanfor JSON-arrayen.
|
||||||
|
|
||||||
Exempel bundle utdata:
|
Exempel bundle utdata:
|
||||||
[
|
[
|
||||||
@@ -223,6 +234,42 @@ Exempel enkel produkt utdata:
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
Exempel gruppannons med varianter (ska splittas):
|
||||||
|
Input-idé: "PRAST, HERRGARD, GREVE" + "ARLA KO" + gemensam vikt/pris.
|
||||||
|
Output-idé:
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "Prastost",
|
||||||
|
"brand": "Arla Ko",
|
||||||
|
"category": "Hardost",
|
||||||
|
"isBundle": false,
|
||||||
|
"weight": "667g",
|
||||||
|
"bundleWeight": null,
|
||||||
|
"bundleItems": [],
|
||||||
|
"price": null,
|
||||||
|
"comparisonPrice": 79.90,
|
||||||
|
"unit": "kg",
|
||||||
|
"offer": ["Max 3 forp/hushall"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Herrgardsost",
|
||||||
|
"brand": "Arla Ko",
|
||||||
|
"category": "Hardost",
|
||||||
|
"isBundle": false,
|
||||||
|
"weight": "667g",
|
||||||
|
"bundleWeight": null,
|
||||||
|
"bundleItems": [],
|
||||||
|
"price": null,
|
||||||
|
"comparisonPrice": 79.90,
|
||||||
|
"unit": "kg",
|
||||||
|
"offer": ["Max 3 forp/hushall"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
Exempel negativt fall (ska INTE splittas):
|
||||||
|
Input-idé: "Ingredienser: tomat, lok, vitlok".
|
||||||
|
Output-idé: en produktpost (ingen variant-expansion).
|
||||||
|
|
||||||
Text att tolka:
|
Text att tolka:
|
||||||
${truncatedText}`;
|
${truncatedText}`;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -105,5 +105,41 @@ describe('FlyerNormalizerService', () => {
|
|||||||
const result2 = service.normalize(undefined as any);
|
const result2 = service.normalize(undefined as any);
|
||||||
expect(result2).toEqual([]);
|
expect(result2).toEqual([]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('splits listed cheese variants into separate products', () => {
|
||||||
|
const items = [
|
||||||
|
{
|
||||||
|
rawName: 'PRÄST®, HERRGÅRD®, GREVÉ®',
|
||||||
|
brand: 'ARLA KO',
|
||||||
|
unit: 'kg',
|
||||||
|
comparisonPrice: '79,90',
|
||||||
|
offer: ['Max 3 förp/hushåll'],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = service.normalize(items);
|
||||||
|
|
||||||
|
expect(result).toHaveLength(3);
|
||||||
|
expect(result.map((item) => item.rawName)).toEqual(['Prastost', 'Herrgardsost', 'Greveost']);
|
||||||
|
expect(result.every((item) => item.brand === 'Arla Ko')).toBe(true);
|
||||||
|
expect(result.every((item) => item.categoryHint === 'Hårdost')).toBe(true);
|
||||||
|
expect(result[0].parseReasons).toContain('split_cheese_variants');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps single cheese item unsplit but normalizes brand/category', () => {
|
||||||
|
const items = [
|
||||||
|
{
|
||||||
|
rawName: 'Prästost',
|
||||||
|
brand: 'arla ko',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = service.normalize(items);
|
||||||
|
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(result[0].rawName).toBe('Prästost');
|
||||||
|
expect(result[0].brand).toBe('Arla Ko');
|
||||||
|
expect(result[0].categoryHint).toBe('Hårdost');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -23,6 +23,11 @@ export class FlyerNormalizerService {
|
|||||||
private readonly logger = new Logger(FlyerNormalizerService.name);
|
private readonly logger = new Logger(FlyerNormalizerService.name);
|
||||||
private readonly MAX_BUNDLE_ITEMS = 20;
|
private readonly MAX_BUNDLE_ITEMS = 20;
|
||||||
private readonly MAX_BUNDLE_ITEM_LENGTH = 120;
|
private readonly MAX_BUNDLE_ITEM_LENGTH = 120;
|
||||||
|
private readonly CHEESE_VARIANT_TO_NAME: Record<string, string> = {
|
||||||
|
prast: 'Prastost',
|
||||||
|
herrgard: 'Herrgardsost',
|
||||||
|
greve: 'Greveost',
|
||||||
|
};
|
||||||
|
|
||||||
private readonly UNIT_MAPPING: Record<string, string> = {
|
private readonly UNIT_MAPPING: Record<string, string> = {
|
||||||
// Längd
|
// Längd
|
||||||
@@ -61,29 +66,30 @@ export class FlyerNormalizerService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return items
|
return items
|
||||||
.map((item, idx) => this.normalizeItem(item, idx))
|
.flatMap((item, idx) => this.normalizeItem(item, idx))
|
||||||
.filter((item): item is NormalizedFlyerItem => item !== null);
|
.filter((item): item is NormalizedFlyerItem => item !== null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private normalizeItem(item: any, index: number): NormalizedFlyerItem | null {
|
private normalizeItem(item: any, index: number): Array<NormalizedFlyerItem | null> {
|
||||||
if (!item || typeof item !== 'object') {
|
if (!item || typeof item !== 'object') {
|
||||||
this.logger.warn(`Item ${index} is not an object, skipping`);
|
this.logger.warn(`Item ${index} is not an object, skipping`);
|
||||||
return null;
|
return [null];
|
||||||
}
|
}
|
||||||
|
|
||||||
const rawName = this.extractString(item.rawName) || this.extractString(item.name);
|
const rawName = this.extractString(item.rawName) || this.extractString(item.name);
|
||||||
if (!rawName) {
|
if (!rawName) {
|
||||||
this.logger.warn(`Item ${index} has no name, skipping`);
|
this.logger.warn(`Item ${index} has no name, skipping`);
|
||||||
return null;
|
return [null];
|
||||||
}
|
}
|
||||||
|
|
||||||
const normalizedName = this.extractString(item.normalizedName) || this.normalizeName(rawName);
|
const normalizedName = this.extractString(item.normalizedName) || this.normalizeName(rawName);
|
||||||
|
const normalizedBrand = this.normalizeBrand(this.extractString(item.brand), rawName);
|
||||||
return {
|
const categoryHint = this.normalizeCategory(this.extractString(item.category), rawName);
|
||||||
|
const baseItem: NormalizedFlyerItem = {
|
||||||
rawName,
|
rawName,
|
||||||
normalizedName,
|
normalizedName,
|
||||||
brand: this.extractString(item.brand),
|
brand: normalizedBrand,
|
||||||
categoryHint: this.normalizeCategory(this.extractString(item.category)),
|
categoryHint,
|
||||||
price: this.extractPrice(item.price),
|
price: this.extractPrice(item.price),
|
||||||
priceUnit: this.normalizeUnit(this.extractString(item.unit)),
|
priceUnit: this.normalizeUnit(this.extractString(item.unit)),
|
||||||
comparisonPrice: this.extractPrice(item.comparisonPrice),
|
comparisonPrice: this.extractPrice(item.comparisonPrice),
|
||||||
@@ -98,6 +104,13 @@ export class FlyerNormalizerService {
|
|||||||
? item.reasonCodes.map(String)
|
? item.reasonCodes.map(String)
|
||||||
: ['normalized'],
|
: ['normalized'],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const expandedItems = this.expandCheeseVariants(baseItem);
|
||||||
|
if (expandedItems.length > 0) {
|
||||||
|
return expandedItems;
|
||||||
|
}
|
||||||
|
|
||||||
|
return [baseItem];
|
||||||
}
|
}
|
||||||
|
|
||||||
private extractString(val: any): string | null {
|
private extractString(val: any): string | null {
|
||||||
@@ -138,7 +151,10 @@ export class FlyerNormalizerService {
|
|||||||
return this.UNIT_MAPPING[cleaned] ?? null;
|
return this.UNIT_MAPPING[cleaned] ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private normalizeCategory(category: string | null): string | null {
|
private normalizeCategory(category: string | null, rawName?: string): string | null {
|
||||||
|
if (this.containsSwedishCheeseVariant(rawName)) {
|
||||||
|
return 'Hårdost';
|
||||||
|
}
|
||||||
if (!category) return null;
|
if (!category) return null;
|
||||||
|
|
||||||
const normalized = category.trim().toLowerCase();
|
const normalized = category.trim().toLowerCase();
|
||||||
@@ -162,6 +178,59 @@ export class FlyerNormalizerService {
|
|||||||
return categoryMap[normalized] ?? null;
|
return categoryMap[normalized] ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private normalizeBrand(brand: string | null, rawName?: string): string | null {
|
||||||
|
const value = `${brand ?? ''} ${rawName ?? ''}`.trim().toLowerCase();
|
||||||
|
if (value.includes('arla ko')) {
|
||||||
|
return 'Arla Ko';
|
||||||
|
}
|
||||||
|
return brand;
|
||||||
|
}
|
||||||
|
|
||||||
|
private containsSwedishCheeseVariant(value?: string | null): boolean {
|
||||||
|
if (!value) return false;
|
||||||
|
const normalized = this.stripDiacritics(value.toLowerCase());
|
||||||
|
return ['prast', 'herrgard', 'greve'].some((token) => normalized.includes(token));
|
||||||
|
}
|
||||||
|
|
||||||
|
private expandCheeseVariants(item: NormalizedFlyerItem): NormalizedFlyerItem[] {
|
||||||
|
if (item.isBundle) return [];
|
||||||
|
|
||||||
|
const normalizedRaw = this.stripDiacritics(item.rawName.toLowerCase());
|
||||||
|
const tokens = normalizedRaw
|
||||||
|
.split(/[,/&]|\boch\b|\band\b/g)
|
||||||
|
.map((part) => part.trim())
|
||||||
|
.filter(Boolean);
|
||||||
|
|
||||||
|
const variants = Array.from(
|
||||||
|
new Set(
|
||||||
|
tokens
|
||||||
|
.map((token) => token.replace(/[^a-z0-9\s]/g, ''))
|
||||||
|
.flatMap((token) => Object.keys(this.CHEESE_VARIANT_TO_NAME).filter((key) => token.includes(key))),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (variants.length <= 1) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return variants.map((variant) => {
|
||||||
|
const productName = this.CHEESE_VARIANT_TO_NAME[variant];
|
||||||
|
return {
|
||||||
|
...item,
|
||||||
|
rawName: productName,
|
||||||
|
normalizedName: this.normalizeName(productName),
|
||||||
|
categoryHint: 'Hårdost',
|
||||||
|
parseReasons: [...item.parseReasons, 'split_cheese_variants'],
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private stripDiacritics(value: string): string {
|
||||||
|
return value
|
||||||
|
.normalize('NFD')
|
||||||
|
.replace(/[\u0300-\u036f]/g, '');
|
||||||
|
}
|
||||||
|
|
||||||
private normalizeOfferText(offer: any): string | null {
|
private normalizeOfferText(offer: any): string | null {
|
||||||
if (!offer) return null;
|
if (!offer) return null;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user