feat: migrate import functionality to microservice-importer for quick-import, receipt parsing, and markdown parsing
This commit is contained in:
@@ -1,20 +1,14 @@
|
|||||||
import {
|
import {
|
||||||
BadRequestException,
|
BadRequestException,
|
||||||
Injectable,
|
Injectable,
|
||||||
Logger,
|
Logger,
|
||||||
ServiceUnavailableException,
|
ServiceUnavailableException,
|
||||||
UnsupportedMediaTypeException,
|
|
||||||
} from '@nestjs/common';
|
} from '@nestjs/common';
|
||||||
import * as fs from 'node:fs/promises';
|
|
||||||
import * as path from 'node:path';
|
|
||||||
import * as pdfParse from 'pdf-parse';
|
|
||||||
import { createWorker } from 'tesseract.js';
|
|
||||||
import { IcaRecipeParser } from './parsers/ica.parser';
|
|
||||||
import { GenericRecipeParser } from './parsers/generic.parser';
|
|
||||||
import { RecipeParser } from './parsers/base.parser';
|
|
||||||
import { downloadAndOptimizeImage } from '../common/utils/download-image';
|
import { downloadAndOptimizeImage } from '../common/utils/download-image';
|
||||||
|
|
||||||
const IMAGE_DEST_DIR = process.env.IMAGE_DEST_DIR || '/app/recipe-images';
|
const IMAGE_DEST_DIR = process.env.IMAGE_DEST_DIR || '/app/recipe-images';
|
||||||
|
const IMPORTER_SERVICE_URL =
|
||||||
|
process.env.IMPORTER_SERVICE_URL || 'http://importer-api:3001';
|
||||||
|
|
||||||
export interface QuickImportResult {
|
export interface QuickImportResult {
|
||||||
markdown: string;
|
markdown: string;
|
||||||
@@ -23,246 +17,100 @@ export interface QuickImportResult {
|
|||||||
imageWarning?: string;
|
imageWarning?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
type UploadKind = 'pdf' | 'image';
|
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class QuickImportService {
|
export class QuickImportService {
|
||||||
private readonly logger = new Logger(QuickImportService.name);
|
private readonly logger = new Logger(QuickImportService.name);
|
||||||
|
|
||||||
constructor() {}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Detekterar typ av input (URL eller filsökväg) och importerar från lämplig källa
|
|
||||||
*/
|
|
||||||
async importFromInput(input: string): Promise<QuickImportResult> {
|
async importFromInput(input: string): Promise<QuickImportResult> {
|
||||||
const trimmed = input.trim();
|
const trimmed = input.trim();
|
||||||
this.logger.log(`Mottog input: ${trimmed}`);
|
this.logger.log(`Delegerar URL-import till microservice: ${trimmed}`);
|
||||||
|
|
||||||
if (!trimmed) {
|
if (!trimmed) {
|
||||||
throw new BadRequestException('Du måste ange en URL eller ladda upp en fil');
|
throw new BadRequestException('Du måste ange en URL eller ladda upp en fil');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.isUrl(trimmed)) {
|
let response: Response;
|
||||||
this.logger.log('Detekterade URL, försöker scrapa...');
|
|
||||||
return this.scrapeRecipeFromUrl(trimmed);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.looksLikeLocalFile(trimmed)) {
|
|
||||||
this.logger.log(`Försöker läsa lokal fil: ${trimmed}`);
|
|
||||||
try {
|
|
||||||
const buffer = await fs.readFile(trimmed);
|
|
||||||
return this.importFromBuffer(buffer, path.extname(trimmed).slice(1) as UploadKind);
|
|
||||||
} catch (error) {
|
|
||||||
this.logger.error(`Kunde inte läsa fil: ${error}`);
|
|
||||||
throw new ServiceUnavailableException('Kunde inte läsa filen');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new BadRequestException('Ogiltig input. Ange en URL eller en filsökväg.');
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Importerar från en uppladdad fil
|
|
||||||
*/
|
|
||||||
async importFromUpload(file: Express.Multer.File): Promise<QuickImportResult> {
|
|
||||||
this.logger.log(`MIME-typ: ${file.mimetype}, filnamn: ${file.originalname}`);
|
|
||||||
const kind = file.mimetype.startsWith('image/') ? 'image' : 'pdf';
|
|
||||||
return this.importFromBuffer(file.buffer, kind);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Importerar från en buffert (PDF eller bild)
|
|
||||||
*/
|
|
||||||
async importFromBuffer(
|
|
||||||
buffer: Buffer,
|
|
||||||
kind: UploadKind,
|
|
||||||
): Promise<QuickImportResult> {
|
|
||||||
try {
|
try {
|
||||||
if (kind === 'pdf') {
|
response = await fetch(`${IMPORTER_SERVICE_URL}/api/quick-import`, {
|
||||||
this.logger.log('Parsar PDF med pdf-parse...');
|
method: 'POST',
|
||||||
const data = await pdfParse(buffer);
|
headers: { 'Content-Type': 'application/json' },
|
||||||
const markdown = data.text || '(Tom PDF)';
|
body: JSON.stringify({ input: trimmed }),
|
||||||
return { markdown, source: 'pdf' };
|
|
||||||
} else {
|
|
||||||
this.logger.log('Parsar bild med tesseract...');
|
|
||||||
const worker = await createWorker('swe+eng');
|
|
||||||
const ret = await worker.recognize(buffer);
|
|
||||||
await worker.terminate();
|
|
||||||
const markdown = ret.data.text || '(Tom bild)';
|
|
||||||
return { markdown, source: 'image' };
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
this.logger.error(`Fel vid import av ${kind}: ${error}`);
|
|
||||||
throw new ServiceUnavailableException(`Kunde inte importera ${kind}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private isUrl(input: string): boolean {
|
|
||||||
return input.startsWith('http://') || input.startsWith('https://');
|
|
||||||
}
|
|
||||||
|
|
||||||
private looksLikeLocalFile(input: string): boolean {
|
|
||||||
return input.includes('/') || input.includes('\\');
|
|
||||||
}
|
|
||||||
|
|
||||||
private async scrapeRecipeFromUrl(url: string): Promise<QuickImportResult> {
|
|
||||||
try {
|
|
||||||
this.logger.log(`Hämtar HTML från: ${url}`);
|
|
||||||
|
|
||||||
const response = await fetch(url, {
|
|
||||||
headers: {
|
|
||||||
'User-Agent':
|
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
|
|
||||||
this.logger.log(`HTTP status: ${response.status}`);
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const html = await response.text();
|
|
||||||
this.logger.log(`HTML längd: ${html.length} tecken`);
|
|
||||||
|
|
||||||
const parsers: RecipeParser[] = [
|
|
||||||
new IcaRecipeParser(),
|
|
||||||
new GenericRecipeParser(),
|
|
||||||
];
|
|
||||||
|
|
||||||
let recipe = null;
|
|
||||||
for (const parser of parsers) {
|
|
||||||
if (parser.canHandle(url)) {
|
|
||||||
this.logger.log(`Använder parser: ${parser.constructor.name}`);
|
|
||||||
recipe = parser.parse(html);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!recipe) {
|
|
||||||
throw new Error('Ingen parserutrustning tillgänglig');
|
|
||||||
}
|
|
||||||
|
|
||||||
this.logger.log(`Parsad recept: ${recipe.name} (${recipe.ingredients.length} ingredienser)`);
|
|
||||||
|
|
||||||
if (!recipe.name) {
|
|
||||||
throw new Error('Kunde inte hitta receptnamn på sidan. Försök med en annan länk.');
|
|
||||||
}
|
|
||||||
|
|
||||||
const markdown = this.recipeToMarkdown(recipe, url);
|
|
||||||
this.logger.log(`Markdown genererad, längd: ${markdown.length}`);
|
|
||||||
|
|
||||||
let source: 'ica' | 'pdf' | 'image' | 'other' = 'other';
|
|
||||||
if (/ica\.se/i.test(url)) {
|
|
||||||
source = 'ica';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ladda ner och optimera bild om parser hittade en
|
|
||||||
let imageUrl: string | undefined;
|
|
||||||
let imageWarning: string | undefined;
|
|
||||||
if (recipe.imageUrl) {
|
|
||||||
this.logger.log(`Bildkandidat från parser: ${recipe.imageUrl}`);
|
|
||||||
const normalizedImageUrl = this.normalizeImageUrl(recipe.imageUrl, url);
|
|
||||||
this.logger.log(`Normaliserad bild-URL: ${normalizedImageUrl ?? 'null'}`);
|
|
||||||
if (!normalizedImageUrl) {
|
|
||||||
imageWarning = 'Receptbild kunde inte tolkas till en giltig URL.';
|
|
||||||
this.logger.warn(
|
|
||||||
`Kunde inte normalisera bild-URL: "${recipe.imageUrl}" (källsida: ${url})`,
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
imageUrl = await downloadAndOptimizeImage(normalizedImageUrl, IMAGE_DEST_DIR);
|
|
||||||
this.logger.log(`Bild optimerad och sparad: ${imageUrl}`);
|
|
||||||
} catch (imgErr) {
|
|
||||||
// Fallback: behåll extern URL så klienten ändå kan visa bild.
|
|
||||||
imageUrl = normalizedImageUrl;
|
|
||||||
imageWarning = 'Receptbild kunde inte laddas ner lokalt; extern URL används.';
|
|
||||||
this.logger.warn(
|
|
||||||
`Kunde inte ladda ner bild: ${imgErr} (källa: ${normalizedImageUrl})`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
markdown,
|
|
||||||
source,
|
|
||||||
imageUrl,
|
|
||||||
imageWarning,
|
|
||||||
};
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const message = err instanceof Error ? err.message : 'Okänt fel vid scraping';
|
this.logger.error(`Kunde inte nå importer-api: ${err}`);
|
||||||
this.logger.error(`Scraping misslyckades: ${message}`);
|
throw new ServiceUnavailableException(
|
||||||
throw new BadRequestException(
|
'Import-tjänsten är inte tillgänglig. Försök igen senare.',
|
||||||
`Kunde inte hämta recept: ${message}. Kontrollera att länken är korrekt och försök igen.`
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const result = await this.handleImporterResponse(response);
|
||||||
|
return this.downloadImageIfNeeded(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
private normalizeImageUrl(rawImageUrl: string, pageUrl: string): string | null {
|
async importFromUpload(file: Express.Multer.File): Promise<QuickImportResult> {
|
||||||
const trimmed = rawImageUrl.trim();
|
this.logger.log(`Delegerar filuploading till microservice: ${file.originalname} (${file.mimetype})`);
|
||||||
if (!trimmed) return null;
|
|
||||||
|
|
||||||
const protocolNormalized =
|
const form = new FormData();
|
||||||
trimmed.startsWith('//') ? `https:${trimmed}` : trimmed;
|
form.append(
|
||||||
|
'file',
|
||||||
|
new Blob([file.buffer], { type: file.mimetype }),
|
||||||
|
file.originalname,
|
||||||
|
);
|
||||||
|
|
||||||
|
let response: Response;
|
||||||
try {
|
try {
|
||||||
return new URL(protocolNormalized, pageUrl).toString();
|
response = await fetch(`${IMPORTER_SERVICE_URL}/api/quick-import`, {
|
||||||
} catch {
|
method: 'POST',
|
||||||
return null;
|
body: form,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error(`Kunde inte nå importer-api: ${err}`);
|
||||||
|
throw new ServiceUnavailableException(
|
||||||
|
'Import-tjänsten är inte tillgänglig. Försök igen senare.',
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const result = await this.handleImporterResponse(response);
|
||||||
|
return this.downloadImageIfNeeded(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private async handleImporterResponse(response: Response): Promise<QuickImportResult> {
|
||||||
* Konvertera receptobjekt till Markdown-format
|
if (!response.ok) {
|
||||||
*/
|
let message = `Import-tjänsten svarade ${response.status}`;
|
||||||
private recipeToMarkdown(
|
try {
|
||||||
recipe: {
|
const body = (await response.json()) as { message?: string };
|
||||||
name: string;
|
if (body.message) message = body.message;
|
||||||
description?: string;
|
} catch {
|
||||||
ingredients: Array<{
|
// ignorera parse-fel
|
||||||
quantity: number;
|
|
||||||
unit: string;
|
|
||||||
name: string;
|
|
||||||
note?: string;
|
|
||||||
}>;
|
|
||||||
instructions?: string;
|
|
||||||
},
|
|
||||||
sourceUrl?: string,
|
|
||||||
): string {
|
|
||||||
const lines: string[] = [];
|
|
||||||
|
|
||||||
lines.push(`# ${recipe.name}`);
|
|
||||||
lines.push('');
|
|
||||||
|
|
||||||
if (recipe.description) {
|
|
||||||
lines.push(recipe.description);
|
|
||||||
lines.push('');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (recipe.ingredients.length > 0) {
|
|
||||||
lines.push('## Ingredienser');
|
|
||||||
for (const ing of recipe.ingredients) {
|
|
||||||
const quantity = ing.quantity > 0 ? `${ing.quantity} ` : '';
|
|
||||||
const unit = ing.unit ? `${ing.unit} ` : '';
|
|
||||||
const note = ing.note ? ` (${ing.note})` : '';
|
|
||||||
lines.push(`- ${quantity}${unit}${ing.name}${note}`);
|
|
||||||
}
|
}
|
||||||
lines.push('');
|
this.logger.error(`Importer-api fel: ${message}`);
|
||||||
|
if (response.status >= 400 && response.status < 500) {
|
||||||
|
throw new BadRequestException(message);
|
||||||
|
}
|
||||||
|
throw new ServiceUnavailableException(message);
|
||||||
|
}
|
||||||
|
return response.json() as Promise<QuickImportResult>;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async downloadImageIfNeeded(result: QuickImportResult): Promise<QuickImportResult> {
|
||||||
|
if (!result.imageUrl) return result;
|
||||||
|
|
||||||
|
const imageUrl = result.imageUrl;
|
||||||
|
if (!imageUrl.startsWith('http://') && !imageUrl.startsWith('https://')) {
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (recipe.instructions) {
|
this.logger.log(`Laddar ner receptbild: ${imageUrl}`);
|
||||||
lines.push('## Tillvägagångssätt');
|
try {
|
||||||
lines.push(recipe.instructions);
|
const localPath = await downloadAndOptimizeImage(imageUrl, IMAGE_DEST_DIR);
|
||||||
lines.push('');
|
this.logger.log(`Bild sparad lokalt: ${localPath}`);
|
||||||
|
return { ...result, imageUrl: localPath };
|
||||||
|
} catch (imgErr) {
|
||||||
|
this.logger.warn(`Kunde inte ladda ner bild: ${imgErr} (källa: ${imageUrl})`);
|
||||||
|
return {
|
||||||
|
...result,
|
||||||
|
imageWarning: result.imageWarning ?? 'Receptbild kunde inte laddas ner lokalt; extern URL används.',
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sourceUrl) {
|
|
||||||
lines.push('---');
|
|
||||||
lines.push('');
|
|
||||||
lines.push(`Källa: [${sourceUrl}](${sourceUrl})`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return lines.join('\n');
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,71 +4,17 @@ import {
|
|||||||
Logger,
|
Logger,
|
||||||
ServiceUnavailableException,
|
ServiceUnavailableException,
|
||||||
} from '@nestjs/common';
|
} from '@nestjs/common';
|
||||||
import * as pdfParse from 'pdf-parse';
|
|
||||||
import { PrismaService } from '../prisma/prisma.service';
|
import { PrismaService } from '../prisma/prisma.service';
|
||||||
import { ParsedReceiptItem } from './dto/parsed-receipt-item.dto';
|
import { ParsedReceiptItem } from './dto/parsed-receipt-item.dto';
|
||||||
import { AiService } from '../ai/ai.service';
|
import { AiService } from '../ai/ai.service';
|
||||||
import { CategoriesService } from '../categories/categories.service';
|
import { CategoriesService } from '../categories/categories.service';
|
||||||
|
|
||||||
const MISTRAL_API_URL = 'https://api.mistral.ai/v1/chat/completions';
|
const IMPORTER_SERVICE_URL =
|
||||||
export const RECEIPT_IMPORT_MODEL = 'mistral-small-2603';
|
process.env.IMPORTER_SERVICE_URL || 'http://importer-api:3001';
|
||||||
|
|
||||||
const IMAGE_PROMPT = `Du är en kvittoläsare. Analysera detta kvitto och returnera ENDAST en JSON-array med alla köpta varor.
|
|
||||||
Varje vara ska ha följande fält:
|
|
||||||
- "rawName": varans namn som det står på kvittot (sträng)
|
|
||||||
- "quantity": antal eller mängd som ett tal (t.ex. 1, 2, 0.5)
|
|
||||||
- "unit": enhet — välj ett av: "st", "kg", "g", "l", "dl", "cl", "ml", "förp", "pak", "burk", "flaska"
|
|
||||||
- "price": pris i SEK som ett tal, eller null
|
|
||||||
- "brand": märke eller leverantör om det tydligt framgår av varunamnet (t.ex. "Arla", "ICA", "Oatly"), annars null
|
|
||||||
- "origin": ursprungsland om det framgår av varunamnet (t.ex. "Brasilien", "Sverige", "Italien"), annars null
|
|
||||||
|
|
||||||
Returnera BARA JSON-arrayen utan markdown-formatering.`;
|
|
||||||
|
|
||||||
const TEXT_PROMPT = (text: string) =>
|
|
||||||
`Du är en kvittoläsare. Nedan följer rå text från ett kvitto. Analysera texten och returnera ENDAST en JSON-array med alla köpta varor.
|
|
||||||
Varje vara ska ha följande fält:
|
|
||||||
- "rawName": varans namn som det står på kvittot (sträng)
|
|
||||||
- "quantity": antal eller mängd som ett tal (t.ex. 1, 2, 0.5)
|
|
||||||
- "unit": enhet — välj ett av: "st", "kg", "g", "l", "dl", "cl", "ml", "förp", "pak", "burk", "flaska"
|
|
||||||
- "price": pris i SEK som ett tal, eller null
|
|
||||||
- "brand": märke eller leverantör om det tydligt framgår av varunamnet (t.ex. "Arla", "ICA", "Oatly"), annars null
|
|
||||||
- "origin": ursprungsland om det framgår av varunamnet (t.ex. "Brasilien", "Sverige", "Italien"), annars null
|
|
||||||
|
|
||||||
Returnera BARA JSON-arrayen utan markdown-formatering.
|
|
||||||
|
|
||||||
Kvittotext:
|
|
||||||
${text}`;
|
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class ReceiptImportService {
|
export class ReceiptImportService {
|
||||||
private readonly logger = new Logger(ReceiptImportService.name);
|
private readonly logger = new Logger(ReceiptImportService.name);
|
||||||
private readonly MAX_RETRIES = 3;
|
|
||||||
|
|
||||||
private async callMistralWithRetry(body: object, apiKey: string, source: string): Promise<Response> {
|
|
||||||
for (let attempt = 1; attempt <= this.MAX_RETRIES; attempt++) {
|
|
||||||
const response = await fetch(MISTRAL_API_URL, {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
Authorization: `Bearer ${apiKey}`,
|
|
||||||
},
|
|
||||||
body: JSON.stringify(body),
|
|
||||||
});
|
|
||||||
|
|
||||||
if (response.status === 503 || response.status === 429) {
|
|
||||||
const err = await response.text();
|
|
||||||
this.logger.warn(`Mistral ${response.status} (${source}, försök ${attempt}/${this.MAX_RETRIES}): ${err}`);
|
|
||||||
if (attempt < this.MAX_RETRIES) {
|
|
||||||
await new Promise((r) => setTimeout(r, attempt * 2000));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
throw new ServiceUnavailableException('Mistral API returnerade ett fel: Tjänsten tillfälligt otillgänglig (503)');
|
|
||||||
}
|
|
||||||
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
throw new ServiceUnavailableException('Kunde inte nå Mistral API efter flera försök');
|
|
||||||
}
|
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly prisma: PrismaService,
|
private readonly prisma: PrismaService,
|
||||||
@@ -77,120 +23,56 @@ export class ReceiptImportService {
|
|||||||
) {}
|
) {}
|
||||||
|
|
||||||
async parseReceipt(file: Express.Multer.File, isPremium = false): Promise<ParsedReceiptItem[]> {
|
async parseReceipt(file: Express.Multer.File, isPremium = false): Promise<ParsedReceiptItem[]> {
|
||||||
const apiKey = process.env.MISTRAL_API_KEY;
|
// Steg 1: Delegera AI-parsning till microservice-importer
|
||||||
if (!apiKey) {
|
const rawItems = await this.parseReceiptViaImporter(file);
|
||||||
throw new ServiceUnavailableException(
|
|
||||||
'MISTRAL_API_KEY är inte konfigurerad i miljövariabler',
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const isPdf =
|
|
||||||
file.mimetype === 'application/pdf' ||
|
|
||||||
file.mimetype === 'application/octet-stream' ||
|
|
||||||
file.originalname?.toLowerCase().endsWith('.pdf');
|
|
||||||
const rawItems = isPdf
|
|
||||||
? await this.parseReceiptFromPdf(file.buffer, apiKey)
|
|
||||||
: await this.parseReceiptFromImage(file.buffer, file.mimetype, apiKey);
|
|
||||||
|
|
||||||
|
// Steg 2: Matchning mot produktdatabas (kräver DB — stannar i recipe-app)
|
||||||
const matched = await this.matchProducts(rawItems);
|
const matched = await this.matchProducts(rawItems);
|
||||||
|
|
||||||
|
// Steg 3: AI-kategorisering för premium-användare
|
||||||
if (isPremium) {
|
if (isPremium) {
|
||||||
return this.enrichWithAiCategories(matched);
|
return this.enrichWithAiCategories(matched);
|
||||||
}
|
}
|
||||||
return matched;
|
return matched;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async parseReceiptFromImage(
|
private async parseReceiptViaImporter(file: Express.Multer.File): Promise<ParsedReceiptItem[]> {
|
||||||
buffer: Buffer,
|
const form = new FormData();
|
||||||
mimeType: string,
|
form.append(
|
||||||
apiKey: string,
|
'file',
|
||||||
): Promise<ParsedReceiptItem[]> {
|
new Blob([file.buffer], { type: file.mimetype }),
|
||||||
const base64 = buffer.toString('base64');
|
file.originalname,
|
||||||
const response = await this.callMistralWithRetry({
|
);
|
||||||
model: RECEIPT_IMPORT_MODEL,
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: 'user',
|
|
||||||
content: [
|
|
||||||
{
|
|
||||||
type: 'image_url',
|
|
||||||
image_url: { url: `data:${mimeType};base64,${base64}` },
|
|
||||||
},
|
|
||||||
{ type: 'text', text: IMAGE_PROMPT },
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
max_tokens: 2000,
|
|
||||||
temperature: 0.1,
|
|
||||||
}, apiKey, 'bild');
|
|
||||||
|
|
||||||
return this.extractItemsFromMistralResponse(response, 'bild');
|
let response: Response;
|
||||||
}
|
|
||||||
|
|
||||||
private async parseReceiptFromPdf(
|
|
||||||
buffer: Buffer,
|
|
||||||
apiKey: string,
|
|
||||||
): Promise<ParsedReceiptItem[]> {
|
|
||||||
let pdfText: string;
|
|
||||||
try {
|
try {
|
||||||
const parsed = await pdfParse(buffer);
|
response = await fetch(`${IMPORTER_SERVICE_URL}/api/receipt-import/parse`, {
|
||||||
pdfText = parsed.text?.trim();
|
method: 'POST',
|
||||||
} catch {
|
body: form,
|
||||||
throw new BadRequestException('Kunde inte läsa PDF-filen. Kontrollera att filen inte är skadad.');
|
});
|
||||||
}
|
} catch (err) {
|
||||||
|
this.logger.error(`Kunde inte nå importer-api för kvittoparsning: ${err}`);
|
||||||
if (!pdfText || pdfText.length < 20) {
|
|
||||||
throw new BadRequestException(
|
|
||||||
'PDF-filen verkar inte innehålla läsbar text. Prova att fotografera kvittot istället.',
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
this.logger.log(`PDF-text extraherad (${pdfText.length} tecken)`);
|
|
||||||
|
|
||||||
const response = await this.callMistralWithRetry({
|
|
||||||
model: RECEIPT_IMPORT_MODEL,
|
|
||||||
messages: [{ role: 'user', content: TEXT_PROMPT(pdfText) }],
|
|
||||||
max_tokens: 2000,
|
|
||||||
temperature: 0.1,
|
|
||||||
}, apiKey, 'PDF');
|
|
||||||
|
|
||||||
return this.extractItemsFromMistralResponse(response, 'PDF');
|
|
||||||
}
|
|
||||||
|
|
||||||
private async extractItemsFromMistralResponse(
|
|
||||||
response: Response,
|
|
||||||
source: string,
|
|
||||||
): Promise<ParsedReceiptItem[]> {
|
|
||||||
if (!response.ok) {
|
|
||||||
const err = await response.text();
|
|
||||||
this.logger.error(`Mistral API svarade ${response.status} (${source}): ${err}`);
|
|
||||||
const hint =
|
|
||||||
response.status === 401
|
|
||||||
? 'Ogiltig API-nyckel (401)'
|
|
||||||
: response.status === 429
|
|
||||||
? 'För många förfrågningar — försök igen om en stund (429)'
|
|
||||||
: `HTTP ${response.status}`;
|
|
||||||
throw new ServiceUnavailableException(
|
throw new ServiceUnavailableException(
|
||||||
`Mistral API returnerade ett fel: ${hint}`,
|
'Import-tjänsten är inte tillgänglig. Försök igen senare.',
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = (await response.json()) as {
|
if (!response.ok) {
|
||||||
choices: { message: { content: string } }[];
|
let message = `Importer svarade ${response.status}`;
|
||||||
};
|
try {
|
||||||
const content = data.choices?.[0]?.message?.content ?? '[]';
|
const body = (await response.json()) as { message?: string };
|
||||||
|
if (body.message) message = body.message;
|
||||||
try {
|
} catch {
|
||||||
const clean = content.replace(/```(?:json)?/gi, '').trim();
|
// ignorera parse-fel
|
||||||
const items = JSON.parse(clean);
|
}
|
||||||
if (!Array.isArray(items)) throw new Error('Inte en array');
|
this.logger.error(`Importer-api kvittoparsfel: ${message}`);
|
||||||
return items as ParsedReceiptItem[];
|
if (response.status >= 400 && response.status < 500) {
|
||||||
} catch {
|
throw new BadRequestException(message);
|
||||||
this.logger.error(`Kunde inte parsa Mistral-svar (${source}):`, content);
|
}
|
||||||
throw new BadRequestException(
|
throw new ServiceUnavailableException(message);
|
||||||
`Kvittot kunde inte tolkas. Försök med en tydligare ${source === 'PDF' ? 'PDF' : 'bild'}.`,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return response.json() as Promise<ParsedReceiptItem[]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async matchProducts(
|
private async matchProducts(
|
||||||
|
|||||||
@@ -353,7 +353,24 @@ export class RecipesService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async parseMarkdown(dto: ParseMarkdownDto) {
|
async parseMarkdown(dto: ParseMarkdownDto) {
|
||||||
const parsed = parseRecipeMarkdown(dto.markdown);
|
// Delegera markdown-parsning till microservice-importer
|
||||||
|
const importerUrl = process.env.IMPORTER_SERVICE_URL || 'http://importer-api:3001';
|
||||||
|
let parsed: ParsedRecipe;
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${importerUrl}/api/recipes/parse-markdown`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ markdown: dto.markdown }),
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Importer svarade ${response.status}`);
|
||||||
|
}
|
||||||
|
parsed = (await response.json()) as ParsedRecipe;
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error(`Kunde inte nå importer-api för parse-markdown: ${err}`);
|
||||||
|
// Fallback: använd lokal parser vid driftavbrott
|
||||||
|
parsed = parseRecipeMarkdown(dto.markdown);
|
||||||
|
}
|
||||||
|
|
||||||
const allProducts = await this.prisma.product.findMany({
|
const allProducts = await this.prisma.product.findMany({
|
||||||
where: { isActive: true },
|
where: { isActive: true },
|
||||||
|
|||||||
+24
@@ -48,11 +48,14 @@ services:
|
|||||||
ADMIN_PADMIN_PASSWORD: "${ADMIN_PADMIN_PASSWORD}"
|
ADMIN_PADMIN_PASSWORD: "${ADMIN_PADMIN_PASSWORD}"
|
||||||
SEED_USER1_PASSWORD: "${SEED_USER1_PASSWORD}"
|
SEED_USER1_PASSWORD: "${SEED_USER1_PASSWORD}"
|
||||||
SEED_USER2_PASSWORD: "${SEED_USER2_PASSWORD}"
|
SEED_USER2_PASSWORD: "${SEED_USER2_PASSWORD}"
|
||||||
|
IMPORTER_SERVICE_URL: "http://importer-api:3001"
|
||||||
volumes:
|
volumes:
|
||||||
- recipe_images:/app/recipe-images
|
- recipe_images:/app/recipe-images
|
||||||
depends_on:
|
depends_on:
|
||||||
recipe-db:
|
recipe-db:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
importer-api:
|
||||||
|
condition: service_healthy
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:8080/api/health >/dev/null 2>&1 || exit 1"]
|
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:8080/api/health >/dev/null 2>&1 || exit 1"]
|
||||||
interval: 20s
|
interval: 20s
|
||||||
@@ -63,6 +66,27 @@ services:
|
|||||||
- proxy
|
- proxy
|
||||||
- recipe-internal
|
- recipe-internal
|
||||||
|
|
||||||
|
importer-api:
|
||||||
|
build:
|
||||||
|
context: ../microservice-importer
|
||||||
|
dockerfile: backend/Dockerfile
|
||||||
|
image: recipe-importer-api:local
|
||||||
|
pull_policy: never
|
||||||
|
container_name: importer-api
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
NODE_ENV: "production"
|
||||||
|
PORT: "3001"
|
||||||
|
MISTRAL_API_KEY: "${MISTRAL_API_KEY:-}"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:3001/api/health >/dev/null 2>&1 || exit 1"]
|
||||||
|
interval: 20s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
networks:
|
||||||
|
- recipe-internal
|
||||||
|
|
||||||
recipe-db:
|
recipe-db:
|
||||||
image: mariadb:11
|
image: mariadb:11
|
||||||
container_name: recipe-db
|
container_name: recipe-db
|
||||||
|
|||||||
@@ -0,0 +1,107 @@
|
|||||||
|
# Migrering: Import-funktion → microservice-importer
|
||||||
|
|
||||||
|
## Beslut
|
||||||
|
|
||||||
|
- **Scope:** quick-import, parse-markdown, receipt-import
|
||||||
|
- **Arkitektur:** Backend-till-backend — recipe-app NestJS-backend anropar microservice-importer internt via HTTP. Frontend ändras inte.
|
||||||
|
- **OCR:** Läggs till i microservice-importer (tesseract.js)
|
||||||
|
- **Infra:** Separata Docker Compose-filer, microservice-importer på port 3001
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Fas 1 — Utöka microservice-importer
|
||||||
|
|
||||||
|
*Steg 1–3 är oberoende och kan utföras parallellt.*
|
||||||
|
|
||||||
|
**1. Lägg till OCR-stöd (tesseract.js)**
|
||||||
|
Ny `ImageParser` i `backend/src/web-scraping-service/parsers/`. Controllern
|
||||||
|
`quick-import.controller.ts` utökas att acceptera `multipart/form-data` för
|
||||||
|
bilder vid sidan av JSON-body för URL-anrop.
|
||||||
|
|
||||||
|
**2. Lägg till `imageUrl` i quick-import-svaret**
|
||||||
|
`quick-import.service.ts` returnerar idag `{ markdown, source }`. Komplettera
|
||||||
|
med `imageUrl?` (original-URL från skrapad sida).
|
||||||
|
|
||||||
|
**3. Ny `ReceiptParsingModule` – stateless kvittoparsning**
|
||||||
|
Ny modul `backend/src/receipt-parsing/` med endpoint `POST /api/receipt-import/parse`.
|
||||||
|
- PDF → text via `pdf-parse`; bild → base64
|
||||||
|
- Anropar Mistral AI med kvitto-prompt
|
||||||
|
- Returnerar: `[{ rawName, quantity, unit, price, brand, origin }]`
|
||||||
|
- Ingen databaskoppling — rent stateless
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Fas 2 — Anpassa recipe-app backend
|
||||||
|
|
||||||
|
*Beror på Fas 1. Steg 5–7 kan utföras parallellt.*
|
||||||
|
|
||||||
|
**4. Lägg till `HttpModule` + `IMPORTER_SERVICE_URL`**
|
||||||
|
recipe-app backend registrerar NestJS:s `HttpModule` (axios-wrapper).
|
||||||
|
`IMPORTER_SERVICE_URL` sätts som env-variabel (`http://importer-api:3001` i Docker).
|
||||||
|
|
||||||
|
**5. Refaktorera `QuickImportService`**
|
||||||
|
Ta bort lokal ICA-parsning, pdf-parse och tesseract — anropa istället
|
||||||
|
microservice-importer `POST /api/quick-import` (eller `POST /api/document-import`
|
||||||
|
för PDF). `QuickImportModule` behåller sin controller och DTO (API-kontrakt oförändrat).
|
||||||
|
|
||||||
|
**6. Refaktorera `ReceiptImportService`**
|
||||||
|
- AI-parsning → delegeras till `POST $IMPORTER_URL/api/receipt-import/parse`
|
||||||
|
- Produktmatchning (Levenshtein mot `Product`, `ReceiptAlias`) — behålls i recipe-app (DB-krav)
|
||||||
|
- Slår ihop och returnerar samma svar som idag till frontend
|
||||||
|
|
||||||
|
**7. Refaktorera `RecipesService.parseMarkdown()`**
|
||||||
|
- Anropar `POST $IMPORTER_URL/api/recipes/parse-markdown` → `{ name, ingredients[], ... }`
|
||||||
|
- Kör befintlig Levenshtein-produktmatchning mot `Product`-tabellen i recipe-app
|
||||||
|
- Returnerar sammansatt svar — API-kontraktet mot frontend oförändrat
|
||||||
|
|
||||||
|
**8. Ta bort lokala parsningsberoenden**
|
||||||
|
Ta bort `pdf-parse`, `tesseract.js`, `node-fetch` etc. ur recipe-app backend
|
||||||
|
`package.json` när steg 5–7 är verifierade.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Fas 3 — Infrastruktur
|
||||||
|
|
||||||
|
*Kan påbörjas parallellt med Fas 1.*
|
||||||
|
|
||||||
|
**9. Länka microservice-importer i recipe-app:s Docker Compose**
|
||||||
|
Lägg till `importer-api`-tjänst i `recipe-app/compose.yml` (byggs från
|
||||||
|
`../microservice-importer/backend`). Delar `recipe-network` med recipe-app
|
||||||
|
backend. Sätt `IMPORTER_SERVICE_URL=http://importer-api:3001` i recipe-app
|
||||||
|
backend-tjänstens env.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Relevanta filer
|
||||||
|
|
||||||
|
| Fil | Förändring |
|
||||||
|
|---|---|
|
||||||
|
| `microservice-importer/backend/src/web-scraping-service/` | Ny ImageParser, imageUrl i svar |
|
||||||
|
| `microservice-importer/backend/src/` | Ny `receipt-parsing/` modul |
|
||||||
|
| `recipe-app/backend/src/quick-import/quick-import.service.ts` | Ersätt lokal parsning med HTTP-anrop |
|
||||||
|
| `recipe-app/backend/src/receipt-import/receipt-import.service.ts` | AI-del delegeras, matchning behålls |
|
||||||
|
| `recipe-app/backend/src/recipes/recipes.service.ts` | parseMarkdown delegeras, matchning behålls |
|
||||||
|
| `recipe-app/backend/src/app.module.ts` | Registrera HttpModule |
|
||||||
|
| `recipe-app/backend/package.json` | Ta bort pdf-parse, tesseract.js |
|
||||||
|
| `recipe-app/compose.yml` | Lägg till importer-api tjänst |
|
||||||
|
| `recipe-app/frontend/` | **Ändras inte** |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Verifiering
|
||||||
|
|
||||||
|
1. `POST /api/quick-import` (recipe-app backend) med ICA-URL → samma svar som idag
|
||||||
|
2. `POST /api/quick-import` med PDF-fil → samma svar
|
||||||
|
3. `POST /api/recipes/parse-markdown` med markdown → ingredienser med produkt-ID:n
|
||||||
|
4. `POST /api/receipt-import` med kvittobild → matchade items med DB-produkt-ID:n
|
||||||
|
5. Autentisering fungerar (hanteras av recipe-app backend som tidigare)
|
||||||
|
6. `docker compose up` startar microservice-importer som intern tjänst
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Avgränsningar
|
||||||
|
|
||||||
|
- **Frontend ändras inte** — samma proxy-routes, samma API-kontrakt
|
||||||
|
- **Auth stannar i recipe-app backend** — microservice-importer exponeras bara internt på Docker-nätverket
|
||||||
|
- **Bildoptimering vid sparande** behålls i recipe-app (sker vid `RecipesService.create()`, inte vid import)
|
||||||
|
- `receipt-import` splittad: AI-del → microservice, produktmatchning + DB → recipe-app backend
|
||||||
Reference in New Issue
Block a user