Files
recipe-app/backend/src/receipt-import/receipt-import.service.ts
T

372 lines
13 KiB
TypeScript

import {
BadRequestException,
Injectable,
Logger,
ServiceUnavailableException,
} from '@nestjs/common';
import { PrismaService } from '../prisma/prisma.service';
import { ParsedReceiptItem } from './dto/parsed-receipt-item.dto';
import { AiService, CategorySuggestion } from '../ai/ai.service';
import { CategoriesService } from '../categories/categories.service';
const IMPORTER_SERVICE_URL =
process.env.IMPORTER_SERVICE_URL || 'http://importer-api:3001';
const WEAK_DESCRIPTORS = new Set([
'rokt',
'rökt',
'kokt',
'grillad',
'stekt',
'skivad',
'strimlad',
'fryst',
'farsk',
'färsk',
]);
function tokenize(value: string): string[] {
return value
.toLowerCase()
.split(/[^a-z0-9åäö]+/)
.filter((w) => w.length >= 3);
}
function normalizeToken(s: string): string {
return s.replace(/å/g, 'a').replace(/ä/g, 'a').replace(/ö/g, 'o').replace(/é/g, 'e').replace(/è/g, 'e');
}
function normalizeForRules(value: string): string {
return value
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[^a-z0-9]+/g, ' ')
.trim();
}
@Injectable()
export class ReceiptImportService {
private readonly logger = new Logger(ReceiptImportService.name);
constructor(
private readonly prisma: PrismaService,
private readonly aiService: AiService,
private readonly categoriesService: CategoriesService,
) {}
async parseReceipt(file: Express.Multer.File, isPremium = false): Promise<ParsedReceiptItem[]> {
// Steg 1: Delegera AI-parsning till microservice-importer
const rawItems = await this.parseReceiptViaImporter(file);
// Steg 2: Matchning mot produktdatabas (kräver DB — stannar i recipe-app)
const matched = await this.matchProducts(rawItems);
// Steg 3: AI-kategorisering för premium-användare
if (isPremium) {
return this.enrichWithAiCategories(matched);
}
return matched;
}
private async parseReceiptViaImporter(file: Express.Multer.File): Promise<ParsedReceiptItem[]> {
const form = new FormData();
form.append(
'file',
new Blob([new Uint8Array(file.buffer)], { type: file.mimetype }),
file.originalname,
);
let response: Response;
try {
response = await fetch(`${IMPORTER_SERVICE_URL}/api/receipt-import/parse`, {
method: 'POST',
body: form,
});
} catch (err) {
this.logger.error(`Kunde inte nå importer-api för kvittoparsning: ${err}`);
throw new ServiceUnavailableException(
'Import-tjänsten är inte tillgänglig. Försök igen senare.',
);
}
if (!response.ok) {
let message = `Importer svarade ${response.status}`;
try {
const body = (await response.json()) as { message?: string };
if (body.message) message = body.message;
} catch {
// ignorera parse-fel
}
this.logger.error(`Importer-api kvittoparsfel: ${message}`);
if (response.status >= 400 && response.status < 500) {
throw new BadRequestException(message);
}
throw new ServiceUnavailableException(message);
}
return response.json() as Promise<ParsedReceiptItem[]>;
}
private async matchProducts(
items: ParsedReceiptItem[],
): Promise<ParsedReceiptItem[]> {
// Hämta alias och produkter parallellt
const [aliases, products] = await Promise.all([
this.prisma.receiptAlias.findMany({
select: { receiptName: true, productId: true, product: { select: { id: true, name: true, canonicalName: true } } },
}),
this.prisma.product.findMany({
where: { isActive: true },
select: { id: true, name: true, canonicalName: true },
}),
]);
return items.map((item) => {
const raw = (item.rawName ?? '').toLowerCase().trim();
if (!raw) return item;
// 1. Alias-match (säker, användaren behöver inte bekräfta)
const alias = aliases.find((a) => a.receiptName === raw);
if (alias) {
return {
...item,
matchedProductId: alias.product.id,
matchedProductName: alias.product.canonicalName ?? alias.product.name,
};
}
// 2. Ordbaserad matchning (förslag, kräver bekräftelse)
const suggestion = this.findWordMatch(raw, products);
return {
...item,
suggestedProductId: suggestion?.id,
suggestedProductName: suggestion
? (suggestion.canonicalName ?? suggestion.name)
: undefined,
};
});
}
private findWordMatch(
raw: string,
products: { id: number; name: string; canonicalName: string | null }[],
): { id: number; name: string; canonicalName: string | null } | undefined {
// Dela upp kvittonamnet i ord (min 3 tecken)
const rawWords = tokenize(raw);
if (rawWords.length === 0) return undefined;
const rawWordSet = new Set(rawWords);
// Normaliserade versioner (utan diakritik) för att hantera t.ex. gradde == grädde
const rawWordsNorm = rawWords.map(normalizeToken);
const rawWordSetNorm = new Set(rawWordsNorm);
let best:
| { product: { id: number; name: string; canonicalName: string | null }; score: number }
| undefined;
for (const product of products) {
const productWords = tokenize(product.canonicalName ?? product.name);
if (productWords.length === 0) continue;
let score = 0;
let exactStrong = 0;
let exactAny = 0;
let partialStrong = 0;
const phrase = (product.canonicalName ?? product.name).toLowerCase();
if (raw.includes(phrase)) {
score += 5;
}
for (const pw of productWords) {
const isWeak = WEAK_DESCRIPTORS.has(pw);
const pwNorm = normalizeToken(pw);
if (rawWordSet.has(pw) || rawWordSetNorm.has(pwNorm)) {
exactAny += 1;
if (isWeak) {
score += 1;
} else {
exactStrong += 1;
score += 8;
}
continue;
}
// Delmatchning tillåts bara för ord med minst 4 tecken.
if (pw.length < 4) continue;
const hasPartial =
rawWords.some((rw) => rw.includes(pw) || pw.includes(rw)) ||
rawWordsNorm.some((rw) => rw.includes(pwNorm) || pwNorm.includes(rw));
if (!hasPartial) continue;
if (isWeak) {
// Deskriptiva ord (t.ex. rökt) ska inte driva förslag ensamma.
continue;
}
partialStrong += 1;
score += 3;
}
// Kräv antingen minst ett starkt exakt ord, eller flera samverkande signaler.
// Undantag: ett enstaka starkt partiellt ord (>=5 tecken) räcker, t.ex. vispgrädde → grädde.
const hasLongPartial = partialStrong >= 1 && productWords.some((pw) => pw.length >= 5);
const hasStrongSignal = exactStrong >= 1 || exactAny + partialStrong >= 2 || hasLongPartial;
if (!hasStrongSignal) continue;
// Tröskel för att undvika svaga enkelträffar.
if (score < 8) continue;
if (!best || score > best.score) {
best = { product, score };
}
}
return best?.product;
}
private async enrichWithAiCategories(items: ParsedReceiptItem[]): Promise<ParsedReceiptItem[]> {
const unmatched = items.filter((i) => !i.matchedProductId && !i.suggestedProductId && i.rawName);
if (unmatched.length === 0) return items;
let categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
try {
categories = await this.categoriesService.findFlattened();
} catch {
return items; // Om kategoritjänsten är otillgänglig, returnera utan AI-förslag
}
const enriched = new Map<string, ParsedReceiptItem>();
for (const item of unmatched) {
try {
const byRule = this.ruleBasedCategorySuggestion(item.rawName, categories);
if (byRule) {
enriched.set(item.rawName, { ...item, categorySuggestion: byRule });
continue;
}
const suggestion = await this.aiService.suggestCategory(item.rawName, categories);
enriched.set(item.rawName, { ...item, categorySuggestion: suggestion });
} catch {
// Om AI-anrop misslyckas för enskild vara — hoppa över utan att kasta
enriched.set(item.rawName, item);
}
}
return items.map((item) => enriched.get(item.rawName) ?? item);
}
private ruleBasedCategorySuggestion(
rawName: string,
categories: Awaited<ReturnType<CategoriesService['findFlattened']>>,
): CategorySuggestion | null {
const normalized = normalizeForRules(rawName);
// ── Regel: Te ────────────────────────────────────────────────────────
const isTea =
/\bte\b/.test(normalized) ||
/\btea\b/.test(normalized) ||
/\bchai\b/.test(normalized) ||
/\btepa(se|k|r)?\b/.test(normalized);
if (isTea) {
const l3Te = categories.find(
(c) => c.name.toLowerCase() === 'te' && c.path.toLowerCase().includes('te & choklad'),
);
if (l3Te) {
return { categoryId: l3Te.id, categoryName: l3Te.name, path: l3Te.path, confidence: 'high', usedFallback: false };
}
const l2TeChoklad = categories.find(
(c) => c.name.toLowerCase() === 'te & choklad' && c.path.toLowerCase().startsWith('dryck'),
);
if (l2TeChoklad) {
return { categoryId: l2TeChoklad.id, categoryName: l2TeChoklad.name, path: l2TeChoklad.path, confidence: 'medium', usedFallback: false };
}
}
// ── Regel: Kaffebröd ─────────────────────────────────────────────────
const isKaffebrod =
/\bwienerbrod\b/.test(normalized) ||
/\bdonut\b/.test(normalized) ||
/\bmunk\b/.test(normalized) ||
/\bcroissant\b/.test(normalized) ||
/\bkanelbulle\b/.test(normalized) ||
/\bbakelse\b/.test(normalized) ||
/\bsemla\b/.test(normalized) ||
/\bdammsugare\b/.test(normalized) ||
/\bkladdkaka\b/.test(normalized) ||
/\bmuffin\b/.test(normalized) ||
/\bcupcake\b/.test(normalized) ||
/\bchokladboll\b/.test(normalized);
if (isKaffebrod) {
const l3Kaffebrod = categories.find(
(c) => c.name.toLowerCase() === 'kaffebröd' && c.path.toLowerCase().includes('kondis & fika'),
);
if (l3Kaffebrod) {
return { categoryId: l3Kaffebrod.id, categoryName: l3Kaffebrod.name, path: l3Kaffebrod.path, confidence: 'high', usedFallback: false };
}
const l2Kondis = categories.find(
(c) => c.name.toLowerCase() === 'kondis & fika' && c.path.toLowerCase().startsWith('bröd & kakor'),
);
if (l2Kondis) {
return { categoryId: l2Kondis.id, categoryName: l2Kondis.name, path: l2Kondis.path, confidence: 'medium', usedFallback: false };
}
}
// ── Regel: Laktosfri/växtbaserad mejeri ──────────────────────────────
const isCookingBase =
/\bmatlagningsbas\b/.test(normalized) ||
/\bmatlagnings\b/.test(normalized) ||
/\bplant\s+cream\b/.test(normalized) ||
/\bcreme\s+fraiche\b/.test(normalized) ||
/\bgradde\b/.test(normalized) ||
/\bvispgradde\b/.test(normalized);
const isPlantOrAllergy =
/\blaktosfri\b/.test(normalized) ||
/\bvegetabilisk\b/.test(normalized) ||
/\bhavre\b/.test(normalized) ||
/\bsoja\b/.test(normalized) ||
/\brisdryck\b/.test(normalized) ||
/\bplant\b/.test(normalized);
if (!isCookingBase || !isPlantOrAllergy) return null;
const l3AllergyCooking = categories.find(
(c) =>
c.name.toLowerCase() === 'allergi matlagning' &&
c.path.toLowerCase().startsWith('matlagning > '),
);
if (l3AllergyCooking) {
return {
categoryId: l3AllergyCooking.id,
categoryName: l3AllergyCooking.name,
path: l3AllergyCooking.path,
confidence: 'high',
usedFallback: false,
};
}
const l2Cooking = categories.find(
(c) =>
c.name.toLowerCase() === 'matlagning' &&
c.path.toLowerCase() === 'mejeri, ost & ägg > matlagning',
);
if (l2Cooking) {
return {
categoryId: l2Cooking.id,
categoryName: l2Cooking.name,
path: l2Cooking.path,
confidence: 'medium',
usedFallback: false,
};
}
return null;
}
}