ca1eed5061
- Added `productName` field to `AdminAiWarning` to include product context in warnings - Updated `collectWarnings` to extract and include `rawName` as `productName` in AI trace warnings - Added `signals` field to `FlyerParseItem` type for detailed product signals - Enhanced Flutter admin panel to display product names in AI trace warnings - Added new `AdminAiTraceResponse` DTO for AI trace data structure
1050 lines
34 KiB
TypeScript
1050 lines
34 KiB
TypeScript
import {
|
|
BadRequestException,
|
|
ForbiddenException,
|
|
Injectable,
|
|
Logger,
|
|
NotFoundException,
|
|
ServiceUnavailableException,
|
|
} from '@nestjs/common';
|
|
import { Prisma } from '@prisma/client';
|
|
import { PrismaService } from '../prisma/prisma.service';
|
|
import { normalizeName } from '../common/utils/normalize-name';
|
|
import { CategoriesService } from '../categories/categories.service';
|
|
import {
|
|
FlyerImportItem,
|
|
FlyerImportMatchVia,
|
|
FlyerImportResponse,
|
|
} from './dto/flyer-import.response';
|
|
import { TextExtractorService } from './services/text-extractor.service';
|
|
import { AiFlyerParserService } from './services/ai-flyer-parser.service';
|
|
import { FlyerNormalizerService } from './services/flyer-normalizer.service';
|
|
import { describeMatchReason, describeParseReason } from './services/reason-codes';
|
|
import { CategoryResolverService } from '../import-common/category-resolver.service';
|
|
import { buildDisplayNameDetailed } from '../import-common/import-display-name.util';
|
|
import { extractImportSignals } from '../import-common/import-signals.util';
|
|
import { ImportedItemSignals } from '../import-common/import-item.types';
|
|
|
|
type FlyerParseItem = {
|
|
rawName: string;
|
|
normalizedName: string;
|
|
brand: string | null;
|
|
category: string | null;
|
|
price: number | null;
|
|
priceUnit: string | null;
|
|
comparisonPrice: number | null;
|
|
comparisonUnit: string | null;
|
|
weight: string | null;
|
|
bundleWeight: string | null;
|
|
isBundle: boolean;
|
|
bundleItems: string[];
|
|
offerText: string | null;
|
|
confidence: number;
|
|
reasonCodes: string[];
|
|
signals?: ImportedItemSignals | null;
|
|
};
|
|
|
|
type FlyerParseResponse = {
|
|
retailer: 'willys';
|
|
parserVersion: 'v1';
|
|
items: FlyerParseItem[];
|
|
warnings: string[];
|
|
trace: {
|
|
prompt: string | null;
|
|
rawOutput: string | null;
|
|
chunkCount: number | null;
|
|
retryCount: number | null;
|
|
};
|
|
};
|
|
|
|
type ExtractedOfferSignals = {
|
|
price: number | null;
|
|
priceUnit: string | null;
|
|
comparisonPrice: number | null;
|
|
comparisonUnit: string | null;
|
|
hasCampaignPattern: boolean;
|
|
};
|
|
|
|
type ProductLite = {
|
|
id: number;
|
|
name: string;
|
|
canonicalName: string | null;
|
|
categoryId: number | null;
|
|
};
|
|
|
|
@Injectable()
|
|
export class FlyerImportService {
|
|
private readonly logger = new Logger(FlyerImportService.name);
|
|
private readonly MAX_BUNDLE_ITEMS = 20;
|
|
private readonly MAX_BUNDLE_ITEM_LENGTH = 120;
|
|
|
|
constructor(
|
|
private readonly prisma: PrismaService,
|
|
private readonly categoriesService: CategoriesService,
|
|
private readonly categoryResolver: CategoryResolverService,
|
|
private readonly textExtractor: TextExtractorService,
|
|
private readonly aiParser: AiFlyerParserService,
|
|
private readonly normalizer: FlyerNormalizerService,
|
|
) {}
|
|
|
|
async parseAndMatch(file: Express.Multer.File, userId: number): Promise<FlyerImportResponse> {
|
|
const startedAt = Date.now();
|
|
const parsed = await this.parseViaInternal(file);
|
|
|
|
const [products, aliases, categories] = await Promise.all([
|
|
this.prisma.product.findMany({
|
|
where: { ownerId: userId, isActive: true },
|
|
select: { id: true, name: true, canonicalName: true, categoryId: true },
|
|
}),
|
|
this.prisma.receiptAlias.findMany({
|
|
where: {
|
|
OR: [{ ownerId: userId, isGlobal: false }, { isGlobal: true }],
|
|
},
|
|
select: { receiptName: true, productId: true },
|
|
}),
|
|
this.categoriesService.findFlattened().catch((error) => {
|
|
this.logger.warn(
|
|
`Could not load categories for flyer import, proceeding without rule categories: ${error instanceof Error ? error.message : String(error)}`,
|
|
);
|
|
return [];
|
|
}),
|
|
]);
|
|
|
|
const aliasToProduct = new Map<string, number>();
|
|
for (const alias of aliases) {
|
|
const normalized = normalizeName(alias.receiptName);
|
|
if (!normalized) continue;
|
|
if (!aliasToProduct.has(normalized)) {
|
|
aliasToProduct.set(normalized, alias.productId);
|
|
}
|
|
}
|
|
|
|
const productById = new Map<number, ProductLite>();
|
|
for (const product of products) {
|
|
productById.set(product.id, product);
|
|
}
|
|
|
|
const items: FlyerImportItem[] = parsed.items.map((item) => {
|
|
const signalData = extractImportSignals({
|
|
rawName: item.rawName,
|
|
brand: item.brand,
|
|
offerText: item.offerText,
|
|
});
|
|
|
|
const match = this.matchItem(item, signalData.normalizedMatchName, signalData.signals, products, aliasToProduct, productById);
|
|
const signals = this.extractOfferSignals(item.offerText);
|
|
const price = item.price ?? signals.price;
|
|
const priceUnit = this.normalizeUnit(item.priceUnit) ?? signals.priceUnit;
|
|
const comparisonPrice = item.comparisonPrice ?? signals.comparisonPrice;
|
|
const comparisonUnit = this.normalizeUnit(item.comparisonUnit) ?? signals.comparisonUnit;
|
|
const offerLimitText = this.extractOfferLimitText(item.offerText);
|
|
const displayNameDetailed = buildDisplayNameDetailed({
|
|
rawName: item.rawName,
|
|
isBundle: item.isBundle,
|
|
bundleItems: this.sanitizeBundleItems(item.bundleItems),
|
|
});
|
|
const categoryId = this.categoryResolver.resolveForFlyer({
|
|
categories,
|
|
signalText: [item.rawName, item.brand ?? '', item.offerText ?? ''].join(' ').trim(),
|
|
categoryHint: item.category,
|
|
matchedProductCategoryId: match.product?.categoryId ?? null,
|
|
matchConfidence: match.confidence,
|
|
});
|
|
|
|
const origin = item.signals?.originCountries?.[0] || null;
|
|
const brand = item.brand && item.brand.trim() !== origin ? item.brand : null;
|
|
|
|
return {
|
|
flyerItemId: null,
|
|
rawName: item.rawName,
|
|
normalizedName: signalData.normalizedMatchName || item.normalizedName,
|
|
brand,
|
|
category: item.category,
|
|
categoryId,
|
|
price,
|
|
priceUnit,
|
|
comparisonPrice,
|
|
comparisonUnit,
|
|
weight: item.weight,
|
|
bundleWeight: item.bundleWeight,
|
|
isBundle: item.isBundle,
|
|
bundleItems: this.sanitizeBundleItems(item.bundleItems),
|
|
displayNameDetailed,
|
|
signals: signalData.signals,
|
|
offerText: item.offerText,
|
|
isOffer: this.isOfferItem(item, signals.hasCampaignPattern),
|
|
offerLimitText,
|
|
parseConfidence: item.confidence,
|
|
parseReasons: item.reasonCodes,
|
|
parseReasonsDetailed: this.describeParseReasons(item.reasonCodes),
|
|
matchedProductId: match.product?.id ?? null,
|
|
matchedProductName: match.product?.name ?? null,
|
|
matchedVia: match.via,
|
|
matchConfidence: match.confidence,
|
|
matchReasons: match.reasons,
|
|
matchReasonsDetailed: this.describeMatchReasons(match.reasons),
|
|
};
|
|
});
|
|
|
|
this.logImportMetrics(items);
|
|
|
|
const persistedItems = await this.persistSessionWithItems(userId, parsed.retailer, items, file);
|
|
|
|
await this.persistFlyerTrace({
|
|
userId,
|
|
sessionId: persistedItems.sessionId,
|
|
model: 'ministral-8b-2512',
|
|
prompt: parsed.trace.prompt,
|
|
rawOutput: parsed.trace.rawOutput,
|
|
normalizedOutput: {
|
|
sessionId: persistedItems.sessionId,
|
|
warnings: parsed.warnings,
|
|
itemCount: persistedItems.items.length,
|
|
chunkCount: parsed.trace.chunkCount,
|
|
retryCount: parsed.trace.retryCount,
|
|
},
|
|
status: persistedItems.items.length === 0 ? 'error' : parsed.warnings.length > 0 ? 'warning' : 'success',
|
|
error: persistedItems.items.length === 0 ? 'Inga produkter kunde extraheras från flyern.' : null,
|
|
durationMs: Date.now() - startedAt,
|
|
});
|
|
|
|
return {
|
|
sessionId: persistedItems.sessionId,
|
|
retailer: parsed.retailer,
|
|
parserVersion: parsed.parserVersion,
|
|
sourceAvailable: true,
|
|
sourceFileName: file.originalname ?? null,
|
|
sourceMimeType: file.mimetype ?? null,
|
|
sourceFileSize: file.size ?? null,
|
|
items: persistedItems.items,
|
|
warnings: parsed.warnings,
|
|
};
|
|
}
|
|
|
|
async getSessionSource(sessionId: number, userId: number): Promise<{
|
|
fileName: string;
|
|
mimeType: string;
|
|
contentLength: number;
|
|
data: Buffer;
|
|
}> {
|
|
const session = await this.prisma.flyerSession.findUnique({
|
|
where: { id: sessionId },
|
|
select: {
|
|
userId: true,
|
|
sourceFileName: true,
|
|
sourceMimeType: true,
|
|
sourceFileSize: true,
|
|
sourceData: true,
|
|
},
|
|
});
|
|
|
|
if (!session) {
|
|
throw new NotFoundException('Flyer-session hittades inte.');
|
|
}
|
|
if (session.userId !== userId) {
|
|
throw new ForbiddenException('Du saknar åtkomst till denna session.');
|
|
}
|
|
if (!session.sourceData || !session.sourceFileName || !session.sourceMimeType) {
|
|
throw new NotFoundException('Källfil saknas för denna flyer-session.');
|
|
}
|
|
|
|
const data = Buffer.from(session.sourceData);
|
|
return {
|
|
fileName: session.sourceFileName,
|
|
mimeType: session.sourceMimeType,
|
|
contentLength: session.sourceFileSize ?? data.length,
|
|
data,
|
|
};
|
|
}
|
|
|
|
async updateSessionItem(
|
|
sessionId: number,
|
|
itemId: number,
|
|
userId: number,
|
|
payload: { rawName?: string; categoryId?: number | null },
|
|
): Promise<FlyerImportItem> {
|
|
const session = await this.prisma.flyerSession.findUnique({
|
|
where: { id: sessionId },
|
|
select: { id: true, userId: true },
|
|
});
|
|
if (!session) {
|
|
throw new NotFoundException('Flyer-session hittades inte.');
|
|
}
|
|
if (session.userId !== userId) {
|
|
throw new ForbiddenException('Du saknar åtkomst till denna session.');
|
|
}
|
|
|
|
const item = await this.prisma.flyerItem.findUnique({
|
|
where: { id: itemId },
|
|
select: { id: true, sessionId: true, rawName: true },
|
|
});
|
|
if (!item || item.sessionId !== sessionId) {
|
|
throw new NotFoundException('Flyer-rad hittades inte i sessionen.');
|
|
}
|
|
|
|
const updateData: Prisma.FlyerItemUncheckedUpdateInput = {};
|
|
|
|
if (typeof payload.rawName === 'string') {
|
|
const trimmed = payload.rawName.trim();
|
|
if (!trimmed) {
|
|
throw new BadRequestException('Namn får inte vara tomt.');
|
|
}
|
|
updateData.rawName = trimmed;
|
|
updateData.normalizedName = normalizeName(trimmed) || normalizeName(item.rawName);
|
|
}
|
|
|
|
if (payload.categoryId !== undefined) {
|
|
if (payload.categoryId === null) {
|
|
updateData.categoryId = null;
|
|
updateData.categoryHint = null;
|
|
} else {
|
|
const path = await this.resolveCategoryPath(payload.categoryId);
|
|
updateData.categoryId = payload.categoryId;
|
|
updateData.categoryHint = path;
|
|
}
|
|
}
|
|
|
|
if (Object.keys(updateData).length === 0) {
|
|
throw new BadRequestException('Inga giltiga fält att uppdatera.');
|
|
}
|
|
|
|
const updated = await this.prisma.flyerItem.update({
|
|
where: { id: itemId },
|
|
data: updateData,
|
|
include: {
|
|
categoryRef: {
|
|
include: {
|
|
parent: {
|
|
include: {
|
|
parent: true,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
return this.toFlyerImportItem(updated as any);
|
|
}
|
|
|
|
async getSession(sessionId: number, userId: number): Promise<FlyerImportResponse> {
|
|
const session = await this.prisma.flyerSession.findFirst({
|
|
where: { id: sessionId, userId },
|
|
select: {
|
|
id: true,
|
|
sourceFileName: true,
|
|
sourceMimeType: true,
|
|
sourceFileSize: true,
|
|
sourceStorageKey: true,
|
|
items: {
|
|
include: {
|
|
categoryRef: {
|
|
include: {
|
|
parent: {
|
|
include: {
|
|
parent: true,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
orderBy: { id: 'asc' },
|
|
},
|
|
},
|
|
});
|
|
|
|
if (!session) {
|
|
throw new NotFoundException('Flyer-session hittades inte.');
|
|
}
|
|
|
|
return this.toFlyerImportResponseFromSession(session);
|
|
}
|
|
|
|
async getLatestSession(userId: number): Promise<FlyerImportResponse> {
|
|
const latest = await this.prisma.flyerSession.findFirst({
|
|
where: { userId },
|
|
orderBy: { createdAt: 'desc' },
|
|
select: {
|
|
id: true,
|
|
sourceFileName: true,
|
|
sourceMimeType: true,
|
|
sourceFileSize: true,
|
|
sourceStorageKey: true,
|
|
items: {
|
|
include: {
|
|
categoryRef: {
|
|
include: {
|
|
parent: {
|
|
include: {
|
|
parent: true,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
orderBy: { id: 'asc' },
|
|
},
|
|
},
|
|
});
|
|
|
|
if (!latest) {
|
|
return {
|
|
sessionId: null,
|
|
retailer: 'willys',
|
|
parserVersion: 'v1',
|
|
sourceAvailable: false,
|
|
sourceFileName: null,
|
|
sourceMimeType: null,
|
|
sourceFileSize: null,
|
|
items: [],
|
|
warnings: [],
|
|
};
|
|
}
|
|
|
|
return this.toFlyerImportResponseFromSession(latest);
|
|
}
|
|
|
|
private async persistSessionWithItems(
|
|
userId: number,
|
|
retailer: 'willys',
|
|
items: FlyerImportItem[],
|
|
file: Express.Multer.File,
|
|
): Promise<{ sessionId: number; items: FlyerImportItem[] }> {
|
|
const weekKey = this.toWeekKey(new Date());
|
|
|
|
const session = await this.prisma.flyerSession.create({
|
|
data: {
|
|
userId,
|
|
retailer,
|
|
weekKey,
|
|
status: 'draft',
|
|
sourceFileName: file.originalname ?? null,
|
|
sourceMimeType: file.mimetype ?? null,
|
|
sourceFileSize: file.size ?? file.buffer.length,
|
|
sourceStorageKey: this.buildSourceStorageKey(userId, weekKey),
|
|
sourceData: Buffer.from(file.buffer),
|
|
},
|
|
select: { id: true },
|
|
});
|
|
|
|
const savedItems: FlyerImportItem[] = [];
|
|
for (const item of items) {
|
|
const createData: Prisma.FlyerItemUncheckedCreateInput = {
|
|
sessionId: session.id,
|
|
rawName: item.rawName,
|
|
normalizedName: item.normalizedName,
|
|
brand: item.brand,
|
|
categoryHint: item.category,
|
|
categoryId: item.categoryId,
|
|
price: item.price != null ? new Prisma.Decimal(item.price) : null,
|
|
priceUnit: item.priceUnit,
|
|
comparisonPrice:
|
|
item.comparisonPrice != null ? new Prisma.Decimal(item.comparisonPrice) : null,
|
|
comparisonUnit: item.comparisonUnit,
|
|
weight: item.weight,
|
|
bundleWeight: item.bundleWeight,
|
|
isBundle: item.isBundle,
|
|
bundleItems: item.bundleItems,
|
|
displayNameDetailed: item.displayNameDetailed,
|
|
signals: item.signals as Prisma.InputJsonValue,
|
|
offerText: item.offerText,
|
|
parseConfidence: item.parseConfidence,
|
|
parseReasons: item.parseReasons,
|
|
matchedProductId: item.matchedProductId,
|
|
matchedProductName: item.matchedProductName,
|
|
matchedVia: item.matchedVia,
|
|
matchConfidence: item.matchConfidence,
|
|
matchReasons: item.matchReasons,
|
|
};
|
|
|
|
const created = await this.prisma.flyerItem.create({
|
|
data: createData,
|
|
select: { id: true },
|
|
});
|
|
|
|
savedItems.push({ ...item, flyerItemId: created.id });
|
|
}
|
|
|
|
return { sessionId: session.id, items: savedItems };
|
|
}
|
|
|
|
private toWeekKey(date: Date): string {
|
|
const d = new Date(Date.UTC(date.getFullYear(), date.getMonth(), date.getDate()));
|
|
const dayNum = d.getUTCDay() || 7;
|
|
d.setUTCDate(d.getUTCDate() + 4 - dayNum);
|
|
const yearStart = new Date(Date.UTC(d.getUTCFullYear(), 0, 1));
|
|
const weekNo = Math.ceil((((d.getTime() - yearStart.getTime()) / 86400000) + 1) / 7);
|
|
return `${d.getUTCFullYear()}-W${String(weekNo).padStart(2, '0')}`;
|
|
}
|
|
|
|
private matchItem(
|
|
item: FlyerParseItem,
|
|
normalizedMatchName: string,
|
|
itemSignals: ImportedItemSignals,
|
|
products: ProductLite[],
|
|
aliasToProduct: Map<string, number>,
|
|
productById: Map<number, ProductLite>,
|
|
): {
|
|
product: ProductLite | null;
|
|
via: FlyerImportMatchVia;
|
|
confidence: number;
|
|
reasons: string[];
|
|
} {
|
|
const normalized = normalizedMatchName || normalizeName(item.normalizedName || item.rawName);
|
|
if (!normalized) {
|
|
return { product: null, via: 'none', confidence: 0, reasons: ['empty_name'] };
|
|
}
|
|
|
|
const aliasedProductId = aliasToProduct.get(normalized);
|
|
if (aliasedProductId) {
|
|
const product = productById.get(aliasedProductId) ?? null;
|
|
return {
|
|
product,
|
|
via: product ? 'alias' : 'none',
|
|
confidence: product ? 1 : 0,
|
|
reasons: product ? ['alias_exact'] : ['alias_points_to_missing_product'],
|
|
};
|
|
}
|
|
|
|
for (const product of products) {
|
|
const pn = normalizeName(product.name);
|
|
const cn = product.canonicalName ? normalizeName(product.canonicalName) : null;
|
|
if (normalized === pn || (cn && normalized === cn)) {
|
|
return {
|
|
product,
|
|
via: 'exact',
|
|
confidence: 0.96,
|
|
reasons: ['normalized_exact'],
|
|
};
|
|
}
|
|
}
|
|
|
|
let best: { product: ProductLite; confidence: number; overlap: number } | null = null;
|
|
const itemTokens = this.tokenize(normalized);
|
|
for (const product of products) {
|
|
const productTokens = this.tokenize(product.canonicalName ?? product.name);
|
|
const overlap = this.tokenOverlap(itemTokens, productTokens);
|
|
if (overlap <= 0) continue;
|
|
|
|
let confidence = Math.min(0.93, 0.48 + overlap * 0.42);
|
|
if (this.hasBrandSignal(item.brand, product)) {
|
|
confidence += 0.04;
|
|
}
|
|
if (this.hasWeightSignal(item.weight, product)) {
|
|
confidence += 0.03;
|
|
}
|
|
if (this.hasQualitySignal(itemSignals, product)) {
|
|
confidence += 0.03;
|
|
}
|
|
|
|
confidence = Math.min(0.95, confidence);
|
|
|
|
if (!best || confidence > best.confidence) {
|
|
best = { product, confidence, overlap };
|
|
}
|
|
}
|
|
|
|
if (best && best.confidence >= 0.66) {
|
|
return {
|
|
product: best.product,
|
|
via: 'token',
|
|
confidence: best.confidence,
|
|
reasons: [`token_overlap:${best.overlap.toFixed(2)}`],
|
|
};
|
|
}
|
|
|
|
return {
|
|
product: null,
|
|
via: 'none',
|
|
confidence: 0,
|
|
reasons: ['no_match'],
|
|
};
|
|
}
|
|
|
|
private tokenize(value: string): string[] {
|
|
return value
|
|
.toLowerCase()
|
|
.split(/[^a-z0-9åäö]+/)
|
|
.map((part) => part.trim())
|
|
.filter((part) => part.length >= 3);
|
|
}
|
|
|
|
private tokenOverlap(a: string[], b: string[]): number {
|
|
if (a.length === 0 || b.length === 0) return 0;
|
|
const as = new Set(a);
|
|
const bs = new Set(b);
|
|
let intersection = 0;
|
|
for (const token of as) {
|
|
if (bs.has(token)) intersection++;
|
|
}
|
|
const union = new Set([...as, ...bs]).size;
|
|
if (union === 0) return 0;
|
|
return intersection / union;
|
|
}
|
|
|
|
private hasBrandSignal(brand: string | null, product: ProductLite): boolean {
|
|
if (!brand) return false;
|
|
const normalizedBrand = normalizeName(brand);
|
|
if (!normalizedBrand) return false;
|
|
|
|
const normalizedProduct = normalizeName(`${product.name} ${product.canonicalName ?? ''}`);
|
|
return normalizedProduct.includes(normalizedBrand);
|
|
}
|
|
|
|
private hasWeightSignal(weight: string | null, product: ProductLite): boolean {
|
|
if (!weight) return false;
|
|
const normalizedWeight = normalizeName(weight);
|
|
if (!normalizedWeight) return false;
|
|
|
|
const normalizedProduct = normalizeName(`${product.name} ${product.canonicalName ?? ''}`);
|
|
return normalizedProduct.includes(normalizedWeight);
|
|
}
|
|
|
|
private hasQualitySignal(signals: ImportedItemSignals, product: ProductLite): boolean {
|
|
if (!signals.qualityFlags.includes('eco')) return false;
|
|
const normalizedProduct = normalizeName(`${product.name} ${product.canonicalName ?? ''}`);
|
|
return /\beko\b|\bekolog/i.test(normalizedProduct);
|
|
}
|
|
|
|
private isOfferItem(item: FlyerParseItem, hasCampaignPattern: boolean): boolean {
|
|
return (
|
|
item.price != null
|
|
|| item.comparisonPrice != null
|
|
|| !!item.offerText?.trim()
|
|
|| hasCampaignPattern
|
|
);
|
|
}
|
|
|
|
private extractOfferLimitText(offerText: string | null): string | null {
|
|
if (!offerText) return null;
|
|
|
|
const normalized = offerText.replace(/\s+/g, ' ').trim();
|
|
if (!normalized) return null;
|
|
|
|
const limitMatch = normalized.match(
|
|
/(?:max|högst|begränsat\s+antal)\s+[^,.;]*(?:hushåll|kund|köp)?(?:\s*\/\s*(?:hushåll|kund))?/i,
|
|
);
|
|
if (limitMatch?.[0]) {
|
|
return limitMatch[0].trim();
|
|
}
|
|
|
|
const perCustomerMatch = normalized.match(
|
|
/[^,.;]*(?:per\s+(?:hushåll|kund)|\/\s*(?:hushåll|kund))[^,.;]*/i,
|
|
);
|
|
if (perCustomerMatch?.[0]) {
|
|
return perCustomerMatch[0].trim();
|
|
}
|
|
|
|
const householdMatch = normalized.match(/[^,.;]*(?:hushåll|kund)[^,.;]*/i);
|
|
if (householdMatch?.[0]) {
|
|
return householdMatch[0].trim();
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private extractOfferSignals(offerText: string | null): ExtractedOfferSignals {
|
|
const empty: ExtractedOfferSignals = {
|
|
price: null,
|
|
priceUnit: null,
|
|
comparisonPrice: null,
|
|
comparisonUnit: null,
|
|
hasCampaignPattern: false,
|
|
};
|
|
|
|
if (!offerText?.trim()) return empty;
|
|
|
|
const normalized = offerText.replace(/\s+/g, ' ').trim().toLowerCase();
|
|
const campaignPattern = /(\b\d+\s*för\s*\d+[,.:]?\d*\b)|(ta\s*\d+\s*betala\s*för\s*\d+)/i;
|
|
const priceWithUnit = normalized.match(/(\d{1,3}[:.,]\d{2}|\d{1,3})\s*(?:kr)?\s*\/?\s*(kg|hg|g|l|dl|cl|ml|st|styck|pkt|förp|fp)/i);
|
|
const priceOnly = normalized.match(/(\d{1,3}[:.,]\d{2}|\d{1,3})\s*kr\b/i);
|
|
const comparison = normalized.match(
|
|
/(?:jfr\s*pris|jamforpris|jämförpris|jfr)\s*[:]?\s*(\d{1,3}[:.,]\d{2}|\d{1,3})\s*(?:kr)?\s*\/?\s*(kg|hg|g|l|dl|cl|ml|st|styck|pkt|förp|fp)/i,
|
|
);
|
|
|
|
const signals: ExtractedOfferSignals = {
|
|
...empty,
|
|
hasCampaignPattern: campaignPattern.test(normalized),
|
|
};
|
|
|
|
if (priceWithUnit) {
|
|
signals.price = this.parseSwedishPrice(priceWithUnit[1]);
|
|
signals.priceUnit = this.normalizeUnit(priceWithUnit[2]);
|
|
} else if (priceOnly) {
|
|
signals.price = this.parseSwedishPrice(priceOnly[1]);
|
|
}
|
|
|
|
if (comparison) {
|
|
signals.comparisonPrice = this.parseSwedishPrice(comparison[1]);
|
|
signals.comparisonUnit = this.normalizeUnit(comparison[2]);
|
|
}
|
|
|
|
return signals;
|
|
}
|
|
|
|
private parseSwedishPrice(value: string | null | undefined): number | null {
|
|
if (!value) return null;
|
|
const normalized = value.trim().replace(':', '.').replace(',', '.');
|
|
const parsed = Number.parseFloat(normalized);
|
|
if (!Number.isFinite(parsed)) return null;
|
|
return parsed;
|
|
}
|
|
|
|
private normalizeUnit(unit: string | null | undefined): string | null {
|
|
if (!unit) return null;
|
|
const cleaned = unit.trim().toLowerCase().replace(/\./g, '');
|
|
if (!cleaned) return null;
|
|
|
|
if (cleaned === 'styck') return 'st';
|
|
if (cleaned === 'fp' || cleaned === 'forp' || cleaned === 'förp' || cleaned === 'pkt') {
|
|
return 'pkt';
|
|
}
|
|
|
|
const allowed = new Set(['kg', 'hg', 'g', 'l', 'dl', 'cl', 'ml', 'st', 'pkt']);
|
|
return allowed.has(cleaned) ? cleaned : cleaned;
|
|
}
|
|
|
|
private async parseViaInternal(file: Express.Multer.File): Promise<FlyerParseResponse> {
|
|
try {
|
|
this.logger.debug(`Parsing flyer file: ${file.originalname}`);
|
|
|
|
// 1. Extrahera text från PDF/bild
|
|
const text = await this.textExtractor.extractText(
|
|
file.buffer,
|
|
file.mimetype,
|
|
file.originalname,
|
|
);
|
|
|
|
// 2. Skicka till Mistral Tiny
|
|
const aiParseResult = await this.aiParser.parseWithAI(text);
|
|
|
|
// 3. Normalisera resultatet
|
|
const normalizedItems = this.normalizer.normalize(aiParseResult.items);
|
|
|
|
// 4. Konvertera till intern FlyerParseItem-format
|
|
const items: FlyerParseItem[] = normalizedItems.map((item) => ({
|
|
rawName: item.rawName,
|
|
normalizedName: item.normalizedName,
|
|
brand: item.brand,
|
|
category: item.categoryHint,
|
|
price: item.price,
|
|
priceUnit: item.priceUnit,
|
|
comparisonPrice: item.comparisonPrice,
|
|
comparisonUnit: item.comparisonUnit,
|
|
weight: item.weight,
|
|
bundleWeight: item.bundleWeight,
|
|
isBundle: item.isBundle,
|
|
bundleItems: item.bundleItems,
|
|
offerText: item.offerText,
|
|
confidence: item.parseConfidence,
|
|
reasonCodes: item.parseReasons,
|
|
signals: null,
|
|
}));
|
|
|
|
const warnings: string[] = [];
|
|
if (items.length === 0) {
|
|
warnings.push('Inga produkter kunde extraheras från flyern.');
|
|
}
|
|
|
|
return {
|
|
retailer: 'willys',
|
|
parserVersion: 'v1',
|
|
items,
|
|
warnings,
|
|
trace: {
|
|
prompt: aiParseResult.trace.prompt,
|
|
rawOutput: aiParseResult.trace.rawOutput,
|
|
chunkCount: aiParseResult.trace.chunkCount,
|
|
retryCount: aiParseResult.trace.retryCount,
|
|
},
|
|
};
|
|
} catch (err) {
|
|
if (err instanceof BadRequestException) {
|
|
throw err;
|
|
}
|
|
if (err instanceof ServiceUnavailableException) {
|
|
throw err;
|
|
}
|
|
this.logger.error(`Internal flyer parse failed: ${String(err)}`);
|
|
throw new BadRequestException(
|
|
`Fel vid tolkning av flyer: ${err instanceof Error ? err.message : String(err)}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
private async persistFlyerTrace(params: {
|
|
userId: number;
|
|
sessionId: number;
|
|
model: string;
|
|
prompt: string | null;
|
|
rawOutput: string | null;
|
|
normalizedOutput: Record<string, unknown> | null;
|
|
status: 'success' | 'warning' | 'error';
|
|
error: string | null;
|
|
durationMs: number | null;
|
|
}): Promise<void> {
|
|
try {
|
|
await this.prisma.aiTrace.create({
|
|
data: {
|
|
source: 'flyer',
|
|
userId: params.userId,
|
|
sessionId: params.sessionId,
|
|
model: params.model,
|
|
prompt: params.prompt,
|
|
rawOutput: params.rawOutput,
|
|
...(params.normalizedOutput == null
|
|
? {}
|
|
: { normalizedOutput: params.normalizedOutput as Prisma.InputJsonValue }),
|
|
status: params.status,
|
|
error: params.error,
|
|
durationMs: params.durationMs,
|
|
},
|
|
});
|
|
} catch (err) {
|
|
this.logger.warn(
|
|
`Kunde inte spara flyer AI-trace: ${err instanceof Error ? err.message : String(err)}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
private toFlyerImportItem(item: {
|
|
id: number;
|
|
rawName: string;
|
|
normalizedName: string;
|
|
brand: string | null;
|
|
categoryHint: string | null;
|
|
categoryId: number | null;
|
|
categoryRef?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
} | null;
|
|
} | null;
|
|
} | null;
|
|
price: Prisma.Decimal | null;
|
|
priceUnit: string | null;
|
|
comparisonPrice: Prisma.Decimal | null;
|
|
comparisonUnit: string | null;
|
|
weight: string | null;
|
|
bundleWeight: string | null;
|
|
isBundle: boolean;
|
|
bundleItems: Prisma.JsonValue | null;
|
|
displayNameDetailed?: string | null;
|
|
signals?: Prisma.JsonValue | null;
|
|
offerText: string | null;
|
|
parseConfidence: number;
|
|
parseReasons: Prisma.JsonValue | null;
|
|
matchedProductId: number | null;
|
|
matchedProductName: string | null;
|
|
matchedVia: string | null;
|
|
matchConfidence: number | null;
|
|
matchReasons: Prisma.JsonValue | null;
|
|
}): FlyerImportItem {
|
|
const toStringArray = (value: Prisma.JsonValue | null): string[] => {
|
|
if (!Array.isArray(value)) return [];
|
|
return value.map((entry) => String(entry));
|
|
};
|
|
|
|
const toSignals = (value: Prisma.JsonValue | null | undefined): ImportedItemSignals | null => {
|
|
if (!value || typeof value !== 'object' || Array.isArray(value)) return null;
|
|
const record = value as Record<string, unknown>;
|
|
const toArray = (key: string): string[] => {
|
|
const maybeArray = record[key];
|
|
if (!Array.isArray(maybeArray)) return [];
|
|
return maybeArray.map((entry) => String(entry));
|
|
};
|
|
|
|
return {
|
|
originCountries: toArray('originCountries'),
|
|
labels: toArray('labels'),
|
|
qualityFlags: toArray('qualityFlags'),
|
|
variant: typeof record.variant === 'string' ? record.variant : null,
|
|
packaging: typeof record.packaging === 'string' ? record.packaging : null,
|
|
};
|
|
};
|
|
|
|
const normalizedMatchVia =
|
|
item.matchedVia === 'alias' || item.matchedVia === 'exact' || item.matchedVia === 'token'
|
|
? item.matchedVia
|
|
: 'none';
|
|
|
|
const categoryPath = this.buildCategoryPath(item.categoryRef) ?? item.categoryHint;
|
|
|
|
const offerLimitText = this.extractOfferLimitText(item.offerText);
|
|
const offerSignals = this.extractOfferSignals(item.offerText);
|
|
|
|
return {
|
|
flyerItemId: item.id,
|
|
rawName: item.rawName,
|
|
normalizedName: item.normalizedName,
|
|
brand: item.brand,
|
|
category: categoryPath,
|
|
categoryId: item.categoryId,
|
|
price: item.price != null ? item.price.toNumber() : offerSignals.price,
|
|
priceUnit: this.normalizeUnit(item.priceUnit) ?? offerSignals.priceUnit,
|
|
comparisonPrice: item.comparisonPrice != null ? item.comparisonPrice.toNumber() : offerSignals.comparisonPrice,
|
|
comparisonUnit: this.normalizeUnit(item.comparisonUnit) ?? offerSignals.comparisonUnit,
|
|
weight: item.weight,
|
|
bundleWeight: item.bundleWeight,
|
|
isBundle: item.isBundle,
|
|
bundleItems: this.sanitizeBundleItems(toStringArray(item.bundleItems)),
|
|
displayNameDetailed:
|
|
item.displayNameDetailed ??
|
|
buildDisplayNameDetailed({
|
|
rawName: item.rawName,
|
|
isBundle: item.isBundle,
|
|
bundleItems: this.sanitizeBundleItems(toStringArray(item.bundleItems)),
|
|
}),
|
|
signals: toSignals(item.signals),
|
|
offerText: item.offerText,
|
|
isOffer:
|
|
item.price != null
|
|
|| item.comparisonPrice != null
|
|
|| !!item.offerText?.trim()
|
|
|| offerSignals.hasCampaignPattern,
|
|
offerLimitText,
|
|
parseConfidence: item.parseConfidence,
|
|
parseReasons: toStringArray(item.parseReasons),
|
|
parseReasonsDetailed: this.describeParseReasons(toStringArray(item.parseReasons)),
|
|
matchedProductId: item.matchedProductId,
|
|
matchedProductName: item.matchedProductName,
|
|
matchedVia: normalizedMatchVia,
|
|
matchConfidence: item.matchConfidence ?? 0,
|
|
matchReasons: toStringArray(item.matchReasons),
|
|
matchReasonsDetailed: this.describeMatchReasons(toStringArray(item.matchReasons)),
|
|
};
|
|
}
|
|
|
|
private describeParseReasons(codes: string[]) {
|
|
return codes.map((code) => describeParseReason(code));
|
|
}
|
|
|
|
private describeMatchReasons(codes: string[]) {
|
|
return codes.map((code) => describeMatchReason(code));
|
|
}
|
|
|
|
private buildCategoryPath(categoryRef?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
parent?: { name: string } | null;
|
|
} | null;
|
|
} | null): string | null {
|
|
if (!categoryRef) return null;
|
|
const names: string[] = [];
|
|
let current: { name: string; parent?: any } | null = categoryRef;
|
|
while (current) {
|
|
names.unshift(current.name);
|
|
current = current.parent ?? null;
|
|
}
|
|
return names.length > 0 ? names.join(' > ') : null;
|
|
}
|
|
|
|
private toFlyerImportResponseFromSession(session: {
|
|
id: number;
|
|
sourceFileName?: string | null;
|
|
sourceMimeType?: string | null;
|
|
sourceFileSize?: number | null;
|
|
sourceStorageKey?: string | null;
|
|
items: Array<{
|
|
id: number;
|
|
rawName: string;
|
|
normalizedName: string;
|
|
brand: string | null;
|
|
categoryHint: string | null;
|
|
categoryId: number | null;
|
|
categoryRef?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
} | null;
|
|
} | null;
|
|
} | null;
|
|
price: Prisma.Decimal | null;
|
|
priceUnit: string | null;
|
|
comparisonPrice: Prisma.Decimal | null;
|
|
comparisonUnit: string | null;
|
|
weight: string | null;
|
|
bundleWeight: string | null;
|
|
isBundle: boolean;
|
|
bundleItems: Prisma.JsonValue | null;
|
|
displayNameDetailed?: string | null;
|
|
signals?: Prisma.JsonValue | null;
|
|
offerText: string | null;
|
|
parseConfidence: number;
|
|
parseReasons: Prisma.JsonValue | null;
|
|
matchedProductId: number | null;
|
|
matchedProductName: string | null;
|
|
matchedVia: string | null;
|
|
matchConfidence: number | null;
|
|
matchReasons: Prisma.JsonValue | null;
|
|
}>;
|
|
}): FlyerImportResponse {
|
|
return {
|
|
sessionId: session.id,
|
|
retailer: 'willys',
|
|
parserVersion: 'v1',
|
|
sourceAvailable: !!session.sourceStorageKey,
|
|
sourceFileName: session.sourceFileName ?? null,
|
|
sourceMimeType: session.sourceMimeType ?? null,
|
|
sourceFileSize: session.sourceFileSize ?? null,
|
|
items: session.items.map((item) => this.toFlyerImportItem(item)),
|
|
warnings: [],
|
|
};
|
|
}
|
|
|
|
private async resolveCategoryPath(categoryId: number): Promise<string> {
|
|
const category = await this.prisma.category.findUnique({
|
|
where: { id: categoryId },
|
|
include: {
|
|
parent: {
|
|
include: {
|
|
parent: true,
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
if (!category) {
|
|
throw new BadRequestException(`Kategori med id ${categoryId} hittades inte.`);
|
|
}
|
|
|
|
const names: string[] = [];
|
|
let current: { name: string; parent: any } | null = category as any;
|
|
while (current) {
|
|
names.unshift(current.name);
|
|
current = current.parent;
|
|
}
|
|
return names.join(' > ');
|
|
}
|
|
|
|
private buildSourceStorageKey(userId: number, weekKey: string): string {
|
|
return `flyer/${userId}/${weekKey}/${Date.now()}`;
|
|
}
|
|
|
|
private sanitizeBundleItems(items: string[] | null | undefined): string[] {
|
|
if (!Array.isArray(items)) return [];
|
|
return items
|
|
.map((entry) => String(entry).trim())
|
|
.filter(Boolean)
|
|
.slice(0, this.MAX_BUNDLE_ITEMS)
|
|
.map((entry) => entry.slice(0, this.MAX_BUNDLE_ITEM_LENGTH));
|
|
}
|
|
|
|
private logImportMetrics(items: FlyerImportItem[]): void {
|
|
if (items.length === 0) return;
|
|
|
|
const noMatchCount = items.filter((item) => item.matchReasons.includes('no_match')).length;
|
|
const categoryAssignedCount = items.filter((item) => item.categoryId != null).length;
|
|
const noMatchRatio = (noMatchCount / items.length) * 100;
|
|
const categoryAssignedRatio = (categoryAssignedCount / items.length) * 100;
|
|
|
|
this.logger.log(
|
|
`Flyer import metrics: no_match=${noMatchCount}/${items.length} (${noMatchRatio.toFixed(1)}%), category_id=${categoryAssignedCount}/${items.length} (${categoryAssignedRatio.toFixed(1)}%)`,
|
|
);
|
|
}
|
|
}
|