d9f992ca9a
- Added structured warning system with `AdminAiWarning` type in backend and Flutter - Implemented detailed reason descriptors with `FlyerReasonDescriptor` for parse and match operations - Added `legacyWarnings` field to maintain backward compatibility - Enhanced AI trace service to collect and format warnings with item-level context - Updated flyer import services to include detailed reason descriptions in responses - Added Swedish diacritic preservation for cheese variants (Prästost, Herrgårdsost, Grevéost) - Implemented UTF-8 content validation for AI responses - Added new reason code definitions in `reason-codes.ts` - Updated Flutter UI to display structured warnings with severity indicators - Added error report generation and copy functionality in admin panel - Added comprehensive test coverage for new warning system and cheese normalization BREAKING CHANGE: AI trace warnings are now structured objects instead of simple strings
922 lines
29 KiB
TypeScript
922 lines
29 KiB
TypeScript
import {
|
|
BadRequestException,
|
|
ForbiddenException,
|
|
Injectable,
|
|
Logger,
|
|
NotFoundException,
|
|
ServiceUnavailableException,
|
|
} from '@nestjs/common';
|
|
import { Prisma } from '@prisma/client';
|
|
import { PrismaService } from '../prisma/prisma.service';
|
|
import { normalizeName } from '../common/utils/normalize-name';
|
|
import {
|
|
FlyerImportItem,
|
|
FlyerImportMatchVia,
|
|
FlyerImportResponse,
|
|
} from './dto/flyer-import.response';
|
|
import { TextExtractorService } from './services/text-extractor.service';
|
|
import { AiFlyerParserService } from './services/ai-flyer-parser.service';
|
|
import { FlyerNormalizerService } from './services/flyer-normalizer.service';
|
|
import { describeMatchReason, describeParseReason } from './services/reason-codes';
|
|
|
|
type FlyerParseItem = {
|
|
rawName: string;
|
|
normalizedName: string;
|
|
brand: string | null;
|
|
category: string | null;
|
|
price: number | null;
|
|
priceUnit: string | null;
|
|
comparisonPrice: number | null;
|
|
comparisonUnit: string | null;
|
|
weight: string | null;
|
|
bundleWeight: string | null;
|
|
isBundle: boolean;
|
|
bundleItems: string[];
|
|
offerText: string | null;
|
|
confidence: number;
|
|
reasonCodes: string[];
|
|
};
|
|
|
|
type FlyerParseResponse = {
|
|
retailer: 'willys';
|
|
parserVersion: 'v1';
|
|
items: FlyerParseItem[];
|
|
warnings: string[];
|
|
trace: {
|
|
prompt: string | null;
|
|
rawOutput: string | null;
|
|
chunkCount: number | null;
|
|
retryCount: number | null;
|
|
};
|
|
};
|
|
|
|
type ExtractedOfferSignals = {
|
|
price: number | null;
|
|
priceUnit: string | null;
|
|
comparisonPrice: number | null;
|
|
comparisonUnit: string | null;
|
|
hasCampaignPattern: boolean;
|
|
};
|
|
|
|
type ProductLite = {
|
|
id: number;
|
|
name: string;
|
|
canonicalName: string | null;
|
|
};
|
|
|
|
@Injectable()
|
|
export class FlyerImportService {
|
|
private readonly logger = new Logger(FlyerImportService.name);
|
|
private readonly MAX_BUNDLE_ITEMS = 20;
|
|
private readonly MAX_BUNDLE_ITEM_LENGTH = 120;
|
|
|
|
constructor(
|
|
private readonly prisma: PrismaService,
|
|
private readonly textExtractor: TextExtractorService,
|
|
private readonly aiParser: AiFlyerParserService,
|
|
private readonly normalizer: FlyerNormalizerService,
|
|
) {}
|
|
|
|
async parseAndMatch(file: Express.Multer.File, userId: number): Promise<FlyerImportResponse> {
|
|
const startedAt = Date.now();
|
|
const parsed = await this.parseViaInternal(file);
|
|
|
|
const [products, aliases] = await Promise.all([
|
|
this.prisma.product.findMany({
|
|
where: { ownerId: userId, isActive: true },
|
|
select: { id: true, name: true, canonicalName: true },
|
|
}),
|
|
this.prisma.receiptAlias.findMany({
|
|
where: {
|
|
OR: [{ ownerId: userId, isGlobal: false }, { isGlobal: true }],
|
|
},
|
|
select: { receiptName: true, productId: true },
|
|
}),
|
|
]);
|
|
|
|
const aliasToProduct = new Map<string, number>();
|
|
for (const alias of aliases) {
|
|
const normalized = normalizeName(alias.receiptName);
|
|
if (!normalized) continue;
|
|
if (!aliasToProduct.has(normalized)) {
|
|
aliasToProduct.set(normalized, alias.productId);
|
|
}
|
|
}
|
|
|
|
const productById = new Map<number, ProductLite>();
|
|
for (const product of products) {
|
|
productById.set(product.id, product);
|
|
}
|
|
|
|
const items: FlyerImportItem[] = parsed.items.map((item) => {
|
|
const match = this.matchItem(item, products, aliasToProduct, productById);
|
|
const signals = this.extractOfferSignals(item.offerText);
|
|
const price = item.price ?? signals.price;
|
|
const priceUnit = this.normalizeUnit(item.priceUnit) ?? signals.priceUnit;
|
|
const comparisonPrice = item.comparisonPrice ?? signals.comparisonPrice;
|
|
const comparisonUnit = this.normalizeUnit(item.comparisonUnit) ?? signals.comparisonUnit;
|
|
const offerLimitText = this.extractOfferLimitText(item.offerText);
|
|
return {
|
|
flyerItemId: null,
|
|
rawName: item.rawName,
|
|
normalizedName: item.normalizedName,
|
|
brand: item.brand,
|
|
category: item.category,
|
|
categoryId: null,
|
|
price,
|
|
priceUnit,
|
|
comparisonPrice,
|
|
comparisonUnit,
|
|
weight: item.weight,
|
|
bundleWeight: item.bundleWeight,
|
|
isBundle: item.isBundle,
|
|
bundleItems: this.sanitizeBundleItems(item.bundleItems),
|
|
offerText: item.offerText,
|
|
isOffer: this.isOfferItem(item, signals.hasCampaignPattern),
|
|
offerLimitText,
|
|
parseConfidence: item.confidence,
|
|
parseReasons: item.reasonCodes,
|
|
parseReasonsDetailed: this.describeParseReasons(item.reasonCodes),
|
|
matchedProductId: match.product?.id ?? null,
|
|
matchedProductName: match.product?.name ?? null,
|
|
matchedVia: match.via,
|
|
matchConfidence: match.confidence,
|
|
matchReasons: match.reasons,
|
|
matchReasonsDetailed: this.describeMatchReasons(match.reasons),
|
|
};
|
|
});
|
|
|
|
const persistedItems = await this.persistSessionWithItems(userId, parsed.retailer, items, file);
|
|
|
|
await this.persistFlyerTrace({
|
|
userId,
|
|
sessionId: persistedItems.sessionId,
|
|
model: 'ministral-8b-2512',
|
|
prompt: parsed.trace.prompt,
|
|
rawOutput: parsed.trace.rawOutput,
|
|
normalizedOutput: {
|
|
sessionId: persistedItems.sessionId,
|
|
warnings: parsed.warnings,
|
|
itemCount: persistedItems.items.length,
|
|
chunkCount: parsed.trace.chunkCount,
|
|
retryCount: parsed.trace.retryCount,
|
|
},
|
|
status: persistedItems.items.length === 0 ? 'error' : parsed.warnings.length > 0 ? 'warning' : 'success',
|
|
error: persistedItems.items.length === 0 ? 'Inga produkter kunde extraheras från flyern.' : null,
|
|
durationMs: Date.now() - startedAt,
|
|
});
|
|
|
|
return {
|
|
sessionId: persistedItems.sessionId,
|
|
retailer: parsed.retailer,
|
|
parserVersion: parsed.parserVersion,
|
|
sourceAvailable: true,
|
|
sourceFileName: file.originalname ?? null,
|
|
sourceMimeType: file.mimetype ?? null,
|
|
sourceFileSize: file.size ?? null,
|
|
items: persistedItems.items,
|
|
warnings: parsed.warnings,
|
|
};
|
|
}
|
|
|
|
async getSessionSource(sessionId: number, userId: number): Promise<{
|
|
fileName: string;
|
|
mimeType: string;
|
|
contentLength: number;
|
|
data: Buffer;
|
|
}> {
|
|
const session = await this.prisma.flyerSession.findUnique({
|
|
where: { id: sessionId },
|
|
select: {
|
|
userId: true,
|
|
sourceFileName: true,
|
|
sourceMimeType: true,
|
|
sourceFileSize: true,
|
|
sourceData: true,
|
|
},
|
|
});
|
|
|
|
if (!session) {
|
|
throw new NotFoundException('Flyer-session hittades inte.');
|
|
}
|
|
if (session.userId !== userId) {
|
|
throw new ForbiddenException('Du saknar åtkomst till denna session.');
|
|
}
|
|
if (!session.sourceData || !session.sourceFileName || !session.sourceMimeType) {
|
|
throw new NotFoundException('Källfil saknas för denna flyer-session.');
|
|
}
|
|
|
|
const data = Buffer.from(session.sourceData);
|
|
return {
|
|
fileName: session.sourceFileName,
|
|
mimeType: session.sourceMimeType,
|
|
contentLength: session.sourceFileSize ?? data.length,
|
|
data,
|
|
};
|
|
}
|
|
|
|
async updateSessionItem(
|
|
sessionId: number,
|
|
itemId: number,
|
|
userId: number,
|
|
payload: { rawName?: string; categoryId?: number | null },
|
|
): Promise<FlyerImportItem> {
|
|
const session = await this.prisma.flyerSession.findUnique({
|
|
where: { id: sessionId },
|
|
select: { id: true, userId: true },
|
|
});
|
|
if (!session) {
|
|
throw new NotFoundException('Flyer-session hittades inte.');
|
|
}
|
|
if (session.userId !== userId) {
|
|
throw new ForbiddenException('Du saknar åtkomst till denna session.');
|
|
}
|
|
|
|
const item = await this.prisma.flyerItem.findUnique({
|
|
where: { id: itemId },
|
|
select: { id: true, sessionId: true, rawName: true },
|
|
});
|
|
if (!item || item.sessionId !== sessionId) {
|
|
throw new NotFoundException('Flyer-rad hittades inte i sessionen.');
|
|
}
|
|
|
|
const updateData: Prisma.FlyerItemUncheckedUpdateInput = {};
|
|
|
|
if (typeof payload.rawName === 'string') {
|
|
const trimmed = payload.rawName.trim();
|
|
if (!trimmed) {
|
|
throw new BadRequestException('Namn får inte vara tomt.');
|
|
}
|
|
updateData.rawName = trimmed;
|
|
updateData.normalizedName = normalizeName(trimmed) || normalizeName(item.rawName);
|
|
}
|
|
|
|
if (payload.categoryId !== undefined) {
|
|
if (payload.categoryId === null) {
|
|
updateData.categoryId = null;
|
|
updateData.categoryHint = null;
|
|
} else {
|
|
const path = await this.resolveCategoryPath(payload.categoryId);
|
|
updateData.categoryId = payload.categoryId;
|
|
updateData.categoryHint = path;
|
|
}
|
|
}
|
|
|
|
if (Object.keys(updateData).length === 0) {
|
|
throw new BadRequestException('Inga giltiga fält att uppdatera.');
|
|
}
|
|
|
|
const updated = await this.prisma.flyerItem.update({
|
|
where: { id: itemId },
|
|
data: updateData,
|
|
include: {
|
|
categoryRef: {
|
|
include: {
|
|
parent: {
|
|
include: {
|
|
parent: true,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
return this.toFlyerImportItem(updated as any);
|
|
}
|
|
|
|
async getSession(sessionId: number, userId: number): Promise<FlyerImportResponse> {
|
|
const session = await this.prisma.flyerSession.findFirst({
|
|
where: { id: sessionId, userId },
|
|
select: {
|
|
id: true,
|
|
sourceFileName: true,
|
|
sourceMimeType: true,
|
|
sourceFileSize: true,
|
|
sourceStorageKey: true,
|
|
items: {
|
|
include: {
|
|
categoryRef: {
|
|
include: {
|
|
parent: {
|
|
include: {
|
|
parent: true,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
orderBy: { id: 'asc' },
|
|
},
|
|
},
|
|
});
|
|
|
|
if (!session) {
|
|
throw new NotFoundException('Flyer-session hittades inte.');
|
|
}
|
|
|
|
return this.toFlyerImportResponseFromSession(session);
|
|
}
|
|
|
|
async getLatestSession(userId: number): Promise<FlyerImportResponse> {
|
|
const latest = await this.prisma.flyerSession.findFirst({
|
|
where: { userId },
|
|
orderBy: { createdAt: 'desc' },
|
|
select: {
|
|
id: true,
|
|
sourceFileName: true,
|
|
sourceMimeType: true,
|
|
sourceFileSize: true,
|
|
sourceStorageKey: true,
|
|
items: {
|
|
include: {
|
|
categoryRef: {
|
|
include: {
|
|
parent: {
|
|
include: {
|
|
parent: true,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
orderBy: { id: 'asc' },
|
|
},
|
|
},
|
|
});
|
|
|
|
if (!latest) {
|
|
return {
|
|
sessionId: null,
|
|
retailer: 'willys',
|
|
parserVersion: 'v1',
|
|
sourceAvailable: false,
|
|
sourceFileName: null,
|
|
sourceMimeType: null,
|
|
sourceFileSize: null,
|
|
items: [],
|
|
warnings: [],
|
|
};
|
|
}
|
|
|
|
return this.toFlyerImportResponseFromSession(latest);
|
|
}
|
|
|
|
private async persistSessionWithItems(
|
|
userId: number,
|
|
retailer: 'willys',
|
|
items: FlyerImportItem[],
|
|
file: Express.Multer.File,
|
|
): Promise<{ sessionId: number; items: FlyerImportItem[] }> {
|
|
const weekKey = this.toWeekKey(new Date());
|
|
|
|
const session = await this.prisma.flyerSession.create({
|
|
data: {
|
|
userId,
|
|
retailer,
|
|
weekKey,
|
|
status: 'draft',
|
|
sourceFileName: file.originalname ?? null,
|
|
sourceMimeType: file.mimetype ?? null,
|
|
sourceFileSize: file.size ?? file.buffer.length,
|
|
sourceStorageKey: this.buildSourceStorageKey(userId, weekKey),
|
|
sourceData: Buffer.from(file.buffer),
|
|
},
|
|
select: { id: true },
|
|
});
|
|
|
|
const savedItems: FlyerImportItem[] = [];
|
|
for (const item of items) {
|
|
const created = await this.prisma.flyerItem.create({
|
|
data: {
|
|
sessionId: session.id,
|
|
rawName: item.rawName,
|
|
normalizedName: item.normalizedName,
|
|
brand: item.brand,
|
|
categoryHint: item.category,
|
|
categoryId: item.categoryId,
|
|
price: item.price != null ? new Prisma.Decimal(item.price) : null,
|
|
priceUnit: item.priceUnit,
|
|
comparisonPrice:
|
|
item.comparisonPrice != null ? new Prisma.Decimal(item.comparisonPrice) : null,
|
|
comparisonUnit: item.comparisonUnit,
|
|
weight: item.weight,
|
|
bundleWeight: item.bundleWeight,
|
|
isBundle: item.isBundle,
|
|
bundleItems: item.bundleItems,
|
|
offerText: item.offerText,
|
|
parseConfidence: item.parseConfidence,
|
|
parseReasons: item.parseReasons,
|
|
matchedProductId: item.matchedProductId,
|
|
matchedProductName: item.matchedProductName,
|
|
matchedVia: item.matchedVia,
|
|
matchConfidence: item.matchConfidence,
|
|
matchReasons: item.matchReasons,
|
|
},
|
|
select: { id: true },
|
|
});
|
|
|
|
savedItems.push({ ...item, flyerItemId: created.id });
|
|
}
|
|
|
|
return { sessionId: session.id, items: savedItems };
|
|
}
|
|
|
|
private toWeekKey(date: Date): string {
|
|
const d = new Date(Date.UTC(date.getFullYear(), date.getMonth(), date.getDate()));
|
|
const dayNum = d.getUTCDay() || 7;
|
|
d.setUTCDate(d.getUTCDate() + 4 - dayNum);
|
|
const yearStart = new Date(Date.UTC(d.getUTCFullYear(), 0, 1));
|
|
const weekNo = Math.ceil((((d.getTime() - yearStart.getTime()) / 86400000) + 1) / 7);
|
|
return `${d.getUTCFullYear()}-W${String(weekNo).padStart(2, '0')}`;
|
|
}
|
|
|
|
private matchItem(
|
|
item: FlyerParseItem,
|
|
products: ProductLite[],
|
|
aliasToProduct: Map<string, number>,
|
|
productById: Map<number, ProductLite>,
|
|
): {
|
|
product: ProductLite | null;
|
|
via: FlyerImportMatchVia;
|
|
confidence: number;
|
|
reasons: string[];
|
|
} {
|
|
const normalized = normalizeName(item.rawName || item.normalizedName);
|
|
if (!normalized) {
|
|
return { product: null, via: 'none', confidence: 0, reasons: ['empty_name'] };
|
|
}
|
|
|
|
const aliasedProductId = aliasToProduct.get(normalized);
|
|
if (aliasedProductId) {
|
|
const product = productById.get(aliasedProductId) ?? null;
|
|
return {
|
|
product,
|
|
via: product ? 'alias' : 'none',
|
|
confidence: product ? 1 : 0,
|
|
reasons: product ? ['alias_exact'] : ['alias_points_to_missing_product'],
|
|
};
|
|
}
|
|
|
|
for (const product of products) {
|
|
const pn = normalizeName(product.name);
|
|
const cn = product.canonicalName ? normalizeName(product.canonicalName) : null;
|
|
if (normalized === pn || (cn && normalized === cn)) {
|
|
return {
|
|
product,
|
|
via: 'exact',
|
|
confidence: 0.96,
|
|
reasons: ['normalized_exact'],
|
|
};
|
|
}
|
|
}
|
|
|
|
let best: { product: ProductLite; confidence: number; overlap: number } | null = null;
|
|
const itemTokens = this.tokenize(item.rawName);
|
|
for (const product of products) {
|
|
const productTokens = this.tokenize(product.canonicalName ?? product.name);
|
|
const overlap = this.tokenOverlap(itemTokens, productTokens);
|
|
if (overlap <= 0) continue;
|
|
const confidence = Math.min(0.92, 0.5 + overlap * 0.4);
|
|
if (!best || confidence > best.confidence) {
|
|
best = { product, confidence, overlap };
|
|
}
|
|
}
|
|
|
|
if (best && best.confidence >= 0.66) {
|
|
return {
|
|
product: best.product,
|
|
via: 'token',
|
|
confidence: best.confidence,
|
|
reasons: [`token_overlap:${best.overlap.toFixed(2)}`],
|
|
};
|
|
}
|
|
|
|
return {
|
|
product: null,
|
|
via: 'none',
|
|
confidence: 0,
|
|
reasons: ['no_match'],
|
|
};
|
|
}
|
|
|
|
private tokenize(value: string): string[] {
|
|
return value
|
|
.toLowerCase()
|
|
.split(/[^a-z0-9åäö]+/)
|
|
.map((part) => part.trim())
|
|
.filter((part) => part.length >= 3);
|
|
}
|
|
|
|
private tokenOverlap(a: string[], b: string[]): number {
|
|
if (a.length === 0 || b.length === 0) return 0;
|
|
const as = new Set(a);
|
|
const bs = new Set(b);
|
|
let intersection = 0;
|
|
for (const token of as) {
|
|
if (bs.has(token)) intersection++;
|
|
}
|
|
const union = new Set([...as, ...bs]).size;
|
|
if (union === 0) return 0;
|
|
return intersection / union;
|
|
}
|
|
|
|
private isOfferItem(item: FlyerParseItem, hasCampaignPattern: boolean): boolean {
|
|
return (
|
|
item.price != null
|
|
|| item.comparisonPrice != null
|
|
|| !!item.offerText?.trim()
|
|
|| hasCampaignPattern
|
|
);
|
|
}
|
|
|
|
private extractOfferLimitText(offerText: string | null): string | null {
|
|
if (!offerText) return null;
|
|
|
|
const normalized = offerText.replace(/\s+/g, ' ').trim();
|
|
if (!normalized) return null;
|
|
|
|
const limitMatch = normalized.match(
|
|
/(?:max|högst|begränsat\s+antal)\s+[^,.;]*(?:hushåll|kund|köp)?(?:\s*\/\s*(?:hushåll|kund))?/i,
|
|
);
|
|
if (limitMatch?.[0]) {
|
|
return limitMatch[0].trim();
|
|
}
|
|
|
|
const perCustomerMatch = normalized.match(
|
|
/[^,.;]*(?:per\s+(?:hushåll|kund)|\/\s*(?:hushåll|kund))[^,.;]*/i,
|
|
);
|
|
if (perCustomerMatch?.[0]) {
|
|
return perCustomerMatch[0].trim();
|
|
}
|
|
|
|
const householdMatch = normalized.match(/[^,.;]*(?:hushåll|kund)[^,.;]*/i);
|
|
if (householdMatch?.[0]) {
|
|
return householdMatch[0].trim();
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private extractOfferSignals(offerText: string | null): ExtractedOfferSignals {
|
|
const empty: ExtractedOfferSignals = {
|
|
price: null,
|
|
priceUnit: null,
|
|
comparisonPrice: null,
|
|
comparisonUnit: null,
|
|
hasCampaignPattern: false,
|
|
};
|
|
|
|
if (!offerText?.trim()) return empty;
|
|
|
|
const normalized = offerText.replace(/\s+/g, ' ').trim().toLowerCase();
|
|
const campaignPattern = /(\b\d+\s*för\s*\d+[,.:]?\d*\b)|(ta\s*\d+\s*betala\s*för\s*\d+)/i;
|
|
const priceWithUnit = normalized.match(/(\d{1,3}[:.,]\d{2}|\d{1,3})\s*(?:kr)?\s*\/?\s*(kg|hg|g|l|dl|cl|ml|st|styck|pkt|förp|fp)/i);
|
|
const priceOnly = normalized.match(/(\d{1,3}[:.,]\d{2}|\d{1,3})\s*kr\b/i);
|
|
const comparison = normalized.match(
|
|
/(?:jfr\s*pris|jamforpris|jämförpris|jfr)\s*[:]?\s*(\d{1,3}[:.,]\d{2}|\d{1,3})\s*(?:kr)?\s*\/?\s*(kg|hg|g|l|dl|cl|ml|st|styck|pkt|förp|fp)/i,
|
|
);
|
|
|
|
const signals: ExtractedOfferSignals = {
|
|
...empty,
|
|
hasCampaignPattern: campaignPattern.test(normalized),
|
|
};
|
|
|
|
if (priceWithUnit) {
|
|
signals.price = this.parseSwedishPrice(priceWithUnit[1]);
|
|
signals.priceUnit = this.normalizeUnit(priceWithUnit[2]);
|
|
} else if (priceOnly) {
|
|
signals.price = this.parseSwedishPrice(priceOnly[1]);
|
|
}
|
|
|
|
if (comparison) {
|
|
signals.comparisonPrice = this.parseSwedishPrice(comparison[1]);
|
|
signals.comparisonUnit = this.normalizeUnit(comparison[2]);
|
|
}
|
|
|
|
return signals;
|
|
}
|
|
|
|
private parseSwedishPrice(value: string | null | undefined): number | null {
|
|
if (!value) return null;
|
|
const normalized = value.trim().replace(':', '.').replace(',', '.');
|
|
const parsed = Number.parseFloat(normalized);
|
|
if (!Number.isFinite(parsed)) return null;
|
|
return parsed;
|
|
}
|
|
|
|
private normalizeUnit(unit: string | null | undefined): string | null {
|
|
if (!unit) return null;
|
|
const cleaned = unit.trim().toLowerCase().replace(/\./g, '');
|
|
if (!cleaned) return null;
|
|
|
|
if (cleaned === 'styck') return 'st';
|
|
if (cleaned === 'fp' || cleaned === 'forp' || cleaned === 'förp' || cleaned === 'pkt') {
|
|
return 'pkt';
|
|
}
|
|
|
|
const allowed = new Set(['kg', 'hg', 'g', 'l', 'dl', 'cl', 'ml', 'st', 'pkt']);
|
|
return allowed.has(cleaned) ? cleaned : cleaned;
|
|
}
|
|
|
|
private async parseViaInternal(file: Express.Multer.File): Promise<FlyerParseResponse> {
|
|
try {
|
|
this.logger.debug(`Parsing flyer file: ${file.originalname}`);
|
|
|
|
// 1. Extrahera text från PDF/bild
|
|
const text = await this.textExtractor.extractText(
|
|
file.buffer,
|
|
file.mimetype,
|
|
file.originalname,
|
|
);
|
|
|
|
// 2. Skicka till Mistral Tiny
|
|
const aiParseResult = await this.aiParser.parseWithAI(text);
|
|
|
|
// 3. Normalisera resultatet
|
|
const normalizedItems = this.normalizer.normalize(aiParseResult.items);
|
|
|
|
// 4. Konvertera till intern FlyerParseItem-format
|
|
const items: FlyerParseItem[] = normalizedItems.map((item) => ({
|
|
rawName: item.rawName,
|
|
normalizedName: item.normalizedName,
|
|
brand: item.brand,
|
|
category: item.categoryHint,
|
|
price: item.price,
|
|
priceUnit: item.priceUnit,
|
|
comparisonPrice: item.comparisonPrice,
|
|
comparisonUnit: item.comparisonUnit,
|
|
weight: item.weight,
|
|
bundleWeight: item.bundleWeight,
|
|
isBundle: item.isBundle,
|
|
bundleItems: item.bundleItems,
|
|
offerText: item.offerText,
|
|
confidence: item.parseConfidence,
|
|
reasonCodes: item.parseReasons,
|
|
}));
|
|
|
|
const warnings: string[] = [];
|
|
if (items.length === 0) {
|
|
warnings.push('Inga produkter kunde extraheras från flyern.');
|
|
}
|
|
|
|
return {
|
|
retailer: 'willys',
|
|
parserVersion: 'v1',
|
|
items,
|
|
warnings,
|
|
trace: {
|
|
prompt: aiParseResult.trace.prompt,
|
|
rawOutput: aiParseResult.trace.rawOutput,
|
|
chunkCount: aiParseResult.trace.chunkCount,
|
|
retryCount: aiParseResult.trace.retryCount,
|
|
},
|
|
};
|
|
} catch (err) {
|
|
if (err instanceof BadRequestException) {
|
|
throw err;
|
|
}
|
|
if (err instanceof ServiceUnavailableException) {
|
|
throw err;
|
|
}
|
|
this.logger.error(`Internal flyer parse failed: ${String(err)}`);
|
|
throw new BadRequestException(
|
|
`Fel vid tolkning av flyer: ${err instanceof Error ? err.message : String(err)}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
private async persistFlyerTrace(params: {
|
|
userId: number;
|
|
sessionId: number;
|
|
model: string;
|
|
prompt: string | null;
|
|
rawOutput: string | null;
|
|
normalizedOutput: Record<string, unknown> | null;
|
|
status: 'success' | 'warning' | 'error';
|
|
error: string | null;
|
|
durationMs: number | null;
|
|
}): Promise<void> {
|
|
try {
|
|
await this.prisma.aiTrace.create({
|
|
data: {
|
|
source: 'flyer',
|
|
userId: params.userId,
|
|
sessionId: params.sessionId,
|
|
model: params.model,
|
|
prompt: params.prompt,
|
|
rawOutput: params.rawOutput,
|
|
...(params.normalizedOutput == null
|
|
? {}
|
|
: { normalizedOutput: params.normalizedOutput as Prisma.InputJsonValue }),
|
|
status: params.status,
|
|
error: params.error,
|
|
durationMs: params.durationMs,
|
|
},
|
|
});
|
|
} catch (err) {
|
|
this.logger.warn(
|
|
`Kunde inte spara flyer AI-trace: ${err instanceof Error ? err.message : String(err)}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
private toFlyerImportItem(item: {
|
|
id: number;
|
|
rawName: string;
|
|
normalizedName: string;
|
|
brand: string | null;
|
|
categoryHint: string | null;
|
|
categoryId: number | null;
|
|
categoryRef?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
} | null;
|
|
} | null;
|
|
} | null;
|
|
price: Prisma.Decimal | null;
|
|
priceUnit: string | null;
|
|
comparisonPrice: Prisma.Decimal | null;
|
|
comparisonUnit: string | null;
|
|
weight: string | null;
|
|
bundleWeight: string | null;
|
|
isBundle: boolean;
|
|
bundleItems: Prisma.JsonValue | null;
|
|
offerText: string | null;
|
|
parseConfidence: number;
|
|
parseReasons: Prisma.JsonValue | null;
|
|
matchedProductId: number | null;
|
|
matchedProductName: string | null;
|
|
matchedVia: string | null;
|
|
matchConfidence: number | null;
|
|
matchReasons: Prisma.JsonValue | null;
|
|
}): FlyerImportItem {
|
|
const toStringArray = (value: Prisma.JsonValue | null): string[] => {
|
|
if (!Array.isArray(value)) return [];
|
|
return value.map((entry) => String(entry));
|
|
};
|
|
|
|
const normalizedMatchVia =
|
|
item.matchedVia === 'alias' || item.matchedVia === 'exact' || item.matchedVia === 'token'
|
|
? item.matchedVia
|
|
: 'none';
|
|
|
|
const categoryPath = this.buildCategoryPath(item.categoryRef) ?? item.categoryHint;
|
|
|
|
const offerLimitText = this.extractOfferLimitText(item.offerText);
|
|
const offerSignals = this.extractOfferSignals(item.offerText);
|
|
|
|
return {
|
|
flyerItemId: item.id,
|
|
rawName: item.rawName,
|
|
normalizedName: item.normalizedName,
|
|
brand: item.brand,
|
|
category: categoryPath,
|
|
categoryId: item.categoryId,
|
|
price: item.price != null ? item.price.toNumber() : offerSignals.price,
|
|
priceUnit: this.normalizeUnit(item.priceUnit) ?? offerSignals.priceUnit,
|
|
comparisonPrice: item.comparisonPrice != null ? item.comparisonPrice.toNumber() : offerSignals.comparisonPrice,
|
|
comparisonUnit: this.normalizeUnit(item.comparisonUnit) ?? offerSignals.comparisonUnit,
|
|
weight: item.weight,
|
|
bundleWeight: item.bundleWeight,
|
|
isBundle: item.isBundle,
|
|
bundleItems: this.sanitizeBundleItems(toStringArray(item.bundleItems)),
|
|
offerText: item.offerText,
|
|
isOffer:
|
|
item.price != null
|
|
|| item.comparisonPrice != null
|
|
|| !!item.offerText?.trim()
|
|
|| offerSignals.hasCampaignPattern,
|
|
offerLimitText,
|
|
parseConfidence: item.parseConfidence,
|
|
parseReasons: toStringArray(item.parseReasons),
|
|
parseReasonsDetailed: this.describeParseReasons(toStringArray(item.parseReasons)),
|
|
matchedProductId: item.matchedProductId,
|
|
matchedProductName: item.matchedProductName,
|
|
matchedVia: normalizedMatchVia,
|
|
matchConfidence: item.matchConfidence ?? 0,
|
|
matchReasons: toStringArray(item.matchReasons),
|
|
matchReasonsDetailed: this.describeMatchReasons(toStringArray(item.matchReasons)),
|
|
};
|
|
}
|
|
|
|
private describeParseReasons(codes: string[]) {
|
|
return codes.map((code) => describeParseReason(code));
|
|
}
|
|
|
|
private describeMatchReasons(codes: string[]) {
|
|
return codes.map((code) => describeMatchReason(code));
|
|
}
|
|
|
|
private buildCategoryPath(categoryRef?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
parent?: { name: string } | null;
|
|
} | null;
|
|
} | null): string | null {
|
|
if (!categoryRef) return null;
|
|
const names: string[] = [];
|
|
let current: { name: string; parent?: any } | null = categoryRef;
|
|
while (current) {
|
|
names.unshift(current.name);
|
|
current = current.parent ?? null;
|
|
}
|
|
return names.length > 0 ? names.join(' > ') : null;
|
|
}
|
|
|
|
private toFlyerImportResponseFromSession(session: {
|
|
id: number;
|
|
sourceFileName?: string | null;
|
|
sourceMimeType?: string | null;
|
|
sourceFileSize?: number | null;
|
|
sourceStorageKey?: string | null;
|
|
items: Array<{
|
|
id: number;
|
|
rawName: string;
|
|
normalizedName: string;
|
|
brand: string | null;
|
|
categoryHint: string | null;
|
|
categoryId: number | null;
|
|
categoryRef?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
parent?: {
|
|
name: string;
|
|
} | null;
|
|
} | null;
|
|
} | null;
|
|
price: Prisma.Decimal | null;
|
|
priceUnit: string | null;
|
|
comparisonPrice: Prisma.Decimal | null;
|
|
comparisonUnit: string | null;
|
|
weight: string | null;
|
|
bundleWeight: string | null;
|
|
isBundle: boolean;
|
|
bundleItems: Prisma.JsonValue | null;
|
|
offerText: string | null;
|
|
parseConfidence: number;
|
|
parseReasons: Prisma.JsonValue | null;
|
|
matchedProductId: number | null;
|
|
matchedProductName: string | null;
|
|
matchedVia: string | null;
|
|
matchConfidence: number | null;
|
|
matchReasons: Prisma.JsonValue | null;
|
|
}>;
|
|
}): FlyerImportResponse {
|
|
return {
|
|
sessionId: session.id,
|
|
retailer: 'willys',
|
|
parserVersion: 'v1',
|
|
sourceAvailable: !!session.sourceStorageKey,
|
|
sourceFileName: session.sourceFileName ?? null,
|
|
sourceMimeType: session.sourceMimeType ?? null,
|
|
sourceFileSize: session.sourceFileSize ?? null,
|
|
items: session.items.map((item) => this.toFlyerImportItem(item)),
|
|
warnings: [],
|
|
};
|
|
}
|
|
|
|
private async resolveCategoryPath(categoryId: number): Promise<string> {
|
|
const category = await this.prisma.category.findUnique({
|
|
where: { id: categoryId },
|
|
include: {
|
|
parent: {
|
|
include: {
|
|
parent: true,
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
if (!category) {
|
|
throw new BadRequestException(`Kategori med id ${categoryId} hittades inte.`);
|
|
}
|
|
|
|
const names: string[] = [];
|
|
let current: { name: string; parent: any } | null = category as any;
|
|
while (current) {
|
|
names.unshift(current.name);
|
|
current = current.parent;
|
|
}
|
|
return names.join(' > ');
|
|
}
|
|
|
|
private buildSourceStorageKey(userId: number, weekKey: string): string {
|
|
return `flyer/${userId}/${weekKey}/${Date.now()}`;
|
|
}
|
|
|
|
private sanitizeBundleItems(items: string[] | null | undefined): string[] {
|
|
if (!Array.isArray(items)) return [];
|
|
return items
|
|
.map((entry) => String(entry).trim())
|
|
.filter(Boolean)
|
|
.slice(0, this.MAX_BUNDLE_ITEMS)
|
|
.map((entry) => entry.slice(0, this.MAX_BUNDLE_ITEM_LENGTH));
|
|
}
|
|
}
|