372 lines
13 KiB
TypeScript
372 lines
13 KiB
TypeScript
import {
|
|
BadRequestException,
|
|
Injectable,
|
|
Logger,
|
|
ServiceUnavailableException,
|
|
} from '@nestjs/common';
|
|
import { PrismaService } from '../prisma/prisma.service';
|
|
import { ParsedReceiptItem } from './dto/parsed-receipt-item.dto';
|
|
import { AiService, CategorySuggestion } from '../ai/ai.service';
|
|
import { CategoriesService } from '../categories/categories.service';
|
|
|
|
const IMPORTER_SERVICE_URL =
|
|
process.env.IMPORTER_SERVICE_URL || 'http://importer-api:3001';
|
|
|
|
const WEAK_DESCRIPTORS = new Set([
|
|
'rokt',
|
|
'rökt',
|
|
'kokt',
|
|
'grillad',
|
|
'stekt',
|
|
'skivad',
|
|
'strimlad',
|
|
'fryst',
|
|
'farsk',
|
|
'färsk',
|
|
]);
|
|
|
|
function tokenize(value: string): string[] {
|
|
return value
|
|
.toLowerCase()
|
|
.split(/[^a-z0-9åäö]+/)
|
|
.filter((w) => w.length >= 3);
|
|
}
|
|
|
|
function normalizeToken(s: string): string {
|
|
return s.replace(/å/g, 'a').replace(/ä/g, 'a').replace(/ö/g, 'o').replace(/é/g, 'e').replace(/è/g, 'e');
|
|
}
|
|
|
|
function normalizeForRules(value: string): string {
|
|
return value
|
|
.toLowerCase()
|
|
.normalize('NFD')
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|
.replace(/[^a-z0-9]+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
@Injectable()
|
|
export class ReceiptImportService {
|
|
private readonly logger = new Logger(ReceiptImportService.name);
|
|
|
|
constructor(
|
|
private readonly prisma: PrismaService,
|
|
private readonly aiService: AiService,
|
|
private readonly categoriesService: CategoriesService,
|
|
) {}
|
|
|
|
async parseReceipt(file: Express.Multer.File, isPremium = false): Promise<ParsedReceiptItem[]> {
|
|
// Steg 1: Delegera AI-parsning till microservice-importer
|
|
const rawItems = await this.parseReceiptViaImporter(file);
|
|
|
|
// Steg 2: Matchning mot produktdatabas (kräver DB — stannar i recipe-app)
|
|
const matched = await this.matchProducts(rawItems);
|
|
|
|
// Steg 3: AI-kategorisering för premium-användare
|
|
if (isPremium) {
|
|
return this.enrichWithAiCategories(matched);
|
|
}
|
|
return matched;
|
|
}
|
|
|
|
private async parseReceiptViaImporter(file: Express.Multer.File): Promise<ParsedReceiptItem[]> {
|
|
const form = new FormData();
|
|
form.append(
|
|
'file',
|
|
new Blob([new Uint8Array(file.buffer)], { type: file.mimetype }),
|
|
file.originalname,
|
|
);
|
|
|
|
let response: Response;
|
|
try {
|
|
response = await fetch(`${IMPORTER_SERVICE_URL}/api/receipt-import/parse`, {
|
|
method: 'POST',
|
|
body: form,
|
|
});
|
|
} catch (err) {
|
|
this.logger.error(`Kunde inte nå importer-api för kvittoparsning: ${err}`);
|
|
throw new ServiceUnavailableException(
|
|
'Import-tjänsten är inte tillgänglig. Försök igen senare.',
|
|
);
|
|
}
|
|
|
|
if (!response.ok) {
|
|
let message = `Importer svarade ${response.status}`;
|
|
try {
|
|
const body = (await response.json()) as { message?: string };
|
|
if (body.message) message = body.message;
|
|
} catch {
|
|
// ignorera parse-fel
|
|
}
|
|
this.logger.error(`Importer-api kvittoparsfel: ${message}`);
|
|
if (response.status >= 400 && response.status < 500) {
|
|
throw new BadRequestException(message);
|
|
}
|
|
throw new ServiceUnavailableException(message);
|
|
}
|
|
|
|
return response.json() as Promise<ParsedReceiptItem[]>;
|
|
}
|
|
|
|
private async matchProducts(
|
|
items: ParsedReceiptItem[],
|
|
): Promise<ParsedReceiptItem[]> {
|
|
// Hämta alias och produkter parallellt
|
|
const [aliases, products] = await Promise.all([
|
|
this.prisma.receiptAlias.findMany({
|
|
select: { receiptName: true, productId: true, product: { select: { id: true, name: true, canonicalName: true } } },
|
|
}),
|
|
this.prisma.product.findMany({
|
|
where: { isActive: true },
|
|
select: { id: true, name: true, canonicalName: true },
|
|
}),
|
|
]);
|
|
|
|
return items.map((item) => {
|
|
const raw = (item.rawName ?? '').toLowerCase().trim();
|
|
if (!raw) return item;
|
|
|
|
// 1. Alias-match (säker, användaren behöver inte bekräfta)
|
|
const alias = aliases.find((a) => a.receiptName === raw);
|
|
if (alias) {
|
|
return {
|
|
...item,
|
|
matchedProductId: alias.product.id,
|
|
matchedProductName: alias.product.canonicalName ?? alias.product.name,
|
|
};
|
|
}
|
|
|
|
// 2. Ordbaserad matchning (förslag, kräver bekräftelse)
|
|
const suggestion = this.findWordMatch(raw, products);
|
|
return {
|
|
...item,
|
|
suggestedProductId: suggestion?.id,
|
|
suggestedProductName: suggestion
|
|
? (suggestion.canonicalName ?? suggestion.name)
|
|
: undefined,
|
|
};
|
|
});
|
|
}
|
|
|
|
private findWordMatch(
|
|
raw: string,
|
|
products: { id: number; name: string; canonicalName: string | null }[],
|
|
): { id: number; name: string; canonicalName: string | null } | undefined {
|
|
// Dela upp kvittonamnet i ord (min 3 tecken)
|
|
const rawWords = tokenize(raw);
|
|
if (rawWords.length === 0) return undefined;
|
|
|
|
const rawWordSet = new Set(rawWords);
|
|
// Normaliserade versioner (utan diakritik) för att hantera t.ex. gradde == grädde
|
|
const rawWordsNorm = rawWords.map(normalizeToken);
|
|
const rawWordSetNorm = new Set(rawWordsNorm);
|
|
|
|
let best:
|
|
| { product: { id: number; name: string; canonicalName: string | null }; score: number }
|
|
| undefined;
|
|
|
|
for (const product of products) {
|
|
const productWords = tokenize(product.canonicalName ?? product.name);
|
|
if (productWords.length === 0) continue;
|
|
|
|
let score = 0;
|
|
let exactStrong = 0;
|
|
let exactAny = 0;
|
|
let partialStrong = 0;
|
|
|
|
const phrase = (product.canonicalName ?? product.name).toLowerCase();
|
|
if (raw.includes(phrase)) {
|
|
score += 5;
|
|
}
|
|
|
|
for (const pw of productWords) {
|
|
const isWeak = WEAK_DESCRIPTORS.has(pw);
|
|
const pwNorm = normalizeToken(pw);
|
|
|
|
if (rawWordSet.has(pw) || rawWordSetNorm.has(pwNorm)) {
|
|
exactAny += 1;
|
|
if (isWeak) {
|
|
score += 1;
|
|
} else {
|
|
exactStrong += 1;
|
|
score += 8;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Delmatchning tillåts bara för ord med minst 4 tecken.
|
|
if (pw.length < 4) continue;
|
|
|
|
const hasPartial =
|
|
rawWords.some((rw) => rw.includes(pw) || pw.includes(rw)) ||
|
|
rawWordsNorm.some((rw) => rw.includes(pwNorm) || pwNorm.includes(rw));
|
|
if (!hasPartial) continue;
|
|
|
|
if (isWeak) {
|
|
// Deskriptiva ord (t.ex. rökt) ska inte driva förslag ensamma.
|
|
continue;
|
|
}
|
|
|
|
partialStrong += 1;
|
|
score += 3;
|
|
}
|
|
|
|
// Kräv antingen minst ett starkt exakt ord, eller flera samverkande signaler.
|
|
// Undantag: ett enstaka starkt partiellt ord (>=5 tecken) räcker, t.ex. vispgrädde → grädde.
|
|
const hasLongPartial = partialStrong >= 1 && productWords.some((pw) => pw.length >= 5);
|
|
const hasStrongSignal = exactStrong >= 1 || exactAny + partialStrong >= 2 || hasLongPartial;
|
|
if (!hasStrongSignal) continue;
|
|
|
|
// Tröskel för att undvika svaga enkelträffar.
|
|
if (score < 8) continue;
|
|
|
|
if (!best || score > best.score) {
|
|
best = { product, score };
|
|
}
|
|
}
|
|
|
|
return best?.product;
|
|
}
|
|
|
|
private async enrichWithAiCategories(items: ParsedReceiptItem[]): Promise<ParsedReceiptItem[]> {
|
|
const unmatched = items.filter((i) => !i.matchedProductId && !i.suggestedProductId && i.rawName);
|
|
if (unmatched.length === 0) return items;
|
|
|
|
let categories: Awaited<ReturnType<CategoriesService['findFlattened']>>;
|
|
try {
|
|
categories = await this.categoriesService.findFlattened();
|
|
} catch {
|
|
return items; // Om kategoritjänsten är otillgänglig, returnera utan AI-förslag
|
|
}
|
|
|
|
const enriched = new Map<string, ParsedReceiptItem>();
|
|
for (const item of unmatched) {
|
|
try {
|
|
const byRule = this.ruleBasedCategorySuggestion(item.rawName, categories);
|
|
if (byRule) {
|
|
enriched.set(item.rawName, { ...item, categorySuggestion: byRule });
|
|
continue;
|
|
}
|
|
|
|
const suggestion = await this.aiService.suggestCategory(item.rawName, categories);
|
|
enriched.set(item.rawName, { ...item, categorySuggestion: suggestion });
|
|
} catch {
|
|
// Om AI-anrop misslyckas för enskild vara — hoppa över utan att kasta
|
|
enriched.set(item.rawName, item);
|
|
}
|
|
}
|
|
|
|
return items.map((item) => enriched.get(item.rawName) ?? item);
|
|
}
|
|
|
|
private ruleBasedCategorySuggestion(
|
|
rawName: string,
|
|
categories: Awaited<ReturnType<CategoriesService['findFlattened']>>,
|
|
): CategorySuggestion | null {
|
|
const normalized = normalizeForRules(rawName);
|
|
|
|
// ── Regel: Te ────────────────────────────────────────────────────────
|
|
const isTea =
|
|
/\bte\b/.test(normalized) ||
|
|
/\btea\b/.test(normalized) ||
|
|
/\bchai\b/.test(normalized) ||
|
|
/\btepa(se|k|r)?\b/.test(normalized);
|
|
|
|
if (isTea) {
|
|
const l3Te = categories.find(
|
|
(c) => c.name.toLowerCase() === 'te' && c.path.toLowerCase().includes('te & choklad'),
|
|
);
|
|
if (l3Te) {
|
|
return { categoryId: l3Te.id, categoryName: l3Te.name, path: l3Te.path, confidence: 'high', usedFallback: false };
|
|
}
|
|
const l2TeChoklad = categories.find(
|
|
(c) => c.name.toLowerCase() === 'te & choklad' && c.path.toLowerCase().startsWith('dryck'),
|
|
);
|
|
if (l2TeChoklad) {
|
|
return { categoryId: l2TeChoklad.id, categoryName: l2TeChoklad.name, path: l2TeChoklad.path, confidence: 'medium', usedFallback: false };
|
|
}
|
|
}
|
|
|
|
// ── Regel: Kaffebröd ─────────────────────────────────────────────────
|
|
const isKaffebrod =
|
|
/\bwienerbrod\b/.test(normalized) ||
|
|
/\bdonut\b/.test(normalized) ||
|
|
/\bmunk\b/.test(normalized) ||
|
|
/\bcroissant\b/.test(normalized) ||
|
|
/\bkanelbulle\b/.test(normalized) ||
|
|
/\bbakelse\b/.test(normalized) ||
|
|
/\bsemla\b/.test(normalized) ||
|
|
/\bdammsugare\b/.test(normalized) ||
|
|
/\bkladdkaka\b/.test(normalized) ||
|
|
/\bmuffin\b/.test(normalized) ||
|
|
/\bcupcake\b/.test(normalized) ||
|
|
/\bchokladboll\b/.test(normalized);
|
|
|
|
if (isKaffebrod) {
|
|
const l3Kaffebrod = categories.find(
|
|
(c) => c.name.toLowerCase() === 'kaffebröd' && c.path.toLowerCase().includes('kondis & fika'),
|
|
);
|
|
if (l3Kaffebrod) {
|
|
return { categoryId: l3Kaffebrod.id, categoryName: l3Kaffebrod.name, path: l3Kaffebrod.path, confidence: 'high', usedFallback: false };
|
|
}
|
|
const l2Kondis = categories.find(
|
|
(c) => c.name.toLowerCase() === 'kondis & fika' && c.path.toLowerCase().startsWith('bröd & kakor'),
|
|
);
|
|
if (l2Kondis) {
|
|
return { categoryId: l2Kondis.id, categoryName: l2Kondis.name, path: l2Kondis.path, confidence: 'medium', usedFallback: false };
|
|
}
|
|
}
|
|
|
|
// ── Regel: Laktosfri/växtbaserad mejeri ──────────────────────────────
|
|
const isCookingBase =
|
|
/\bmatlagningsbas\b/.test(normalized) ||
|
|
/\bmatlagnings\b/.test(normalized) ||
|
|
/\bplant\s+cream\b/.test(normalized) ||
|
|
/\bcreme\s+fraiche\b/.test(normalized) ||
|
|
/\bgradde\b/.test(normalized) ||
|
|
/\bvispgradde\b/.test(normalized);
|
|
|
|
const isPlantOrAllergy =
|
|
/\blaktosfri\b/.test(normalized) ||
|
|
/\bvegetabilisk\b/.test(normalized) ||
|
|
/\bhavre\b/.test(normalized) ||
|
|
/\bsoja\b/.test(normalized) ||
|
|
/\brisdryck\b/.test(normalized) ||
|
|
/\bplant\b/.test(normalized);
|
|
|
|
if (!isCookingBase || !isPlantOrAllergy) return null;
|
|
|
|
const l3AllergyCooking = categories.find(
|
|
(c) =>
|
|
c.name.toLowerCase() === 'allergi matlagning' &&
|
|
c.path.toLowerCase().startsWith('matlagning > '),
|
|
);
|
|
if (l3AllergyCooking) {
|
|
return {
|
|
categoryId: l3AllergyCooking.id,
|
|
categoryName: l3AllergyCooking.name,
|
|
path: l3AllergyCooking.path,
|
|
confidence: 'high',
|
|
usedFallback: false,
|
|
};
|
|
}
|
|
|
|
const l2Cooking = categories.find(
|
|
(c) =>
|
|
c.name.toLowerCase() === 'matlagning' &&
|
|
c.path.toLowerCase() === 'mejeri, ost & ägg > matlagning',
|
|
);
|
|
if (l2Cooking) {
|
|
return {
|
|
categoryId: l2Cooking.id,
|
|
categoryName: l2Cooking.name,
|
|
path: l2Cooking.path,
|
|
confidence: 'medium',
|
|
usedFallback: false,
|
|
};
|
|
}
|
|
|
|
return null;
|
|
}
|
|
}
|