diff --git a/backend/Dockerfile b/backend/Dockerfile index 1d34461..adc11e6 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -19,6 +19,12 @@ FROM node:22-alpine AS runner WORKDIR /app ENV NODE_ENV=production +# Tesseract OCR systempaket (krävs av tesseract.js) +RUN apk add --no-cache \ + tesseract-ocr \ + tesseract-ocr-data-swe \ + tesseract-ocr-data-eng + COPY --from=builder /app/package.json ./package.json COPY --from=builder /app/node_modules ./node_modules COPY --from=builder /app/dist ./dist diff --git a/backend/src/app.module.ts b/backend/src/app.module.ts index b2edd87..4a160c2 100644 --- a/backend/src/app.module.ts +++ b/backend/src/app.module.ts @@ -1,13 +1,24 @@ -import { Module } from '@nestjs/common'; +import { Module, Controller, Get } from '@nestjs/common'; import { WebScrapingModule } from './web-scraping-service/web-scraping.module'; import { RecipesModule } from './recipes/recipes.module'; import { DocumentServiceModule } from './document-service/document-service.module'; +import { ReceiptParsingModule } from './receipt-parsing/receipt-parsing.module'; + +@Controller('health') +class HealthController { + @Get() + check() { + return { status: 'ok' }; + } +} @Module({ imports: [ DocumentServiceModule, WebScrapingModule, RecipesModule, + ReceiptParsingModule, ], + controllers: [HealthController], }) export class AppModule {} diff --git a/backend/src/receipt-parsing/receipt-parsing.controller.ts b/backend/src/receipt-parsing/receipt-parsing.controller.ts new file mode 100644 index 0000000..c8443e8 --- /dev/null +++ b/backend/src/receipt-parsing/receipt-parsing.controller.ts @@ -0,0 +1,56 @@ +import { + Controller, + HttpCode, + Post, + UploadedFile, + UseInterceptors, + BadRequestException, +} from '@nestjs/common'; +import { FileInterceptor } from '@nestjs/platform-express'; +import { memoryStorage } from 'multer'; +import { ReceiptParsingService, ParsedReceiptItemRaw } from './receipt-parsing.service'; + +const ALLOWED_MIMES = new Set([ + 'image/jpeg', + 'image/jpg', + 'image/png', + 'image/webp', + 'image/heic', + 'image/heif', + 'application/pdf', + 'application/octet-stream', +]); + +@Controller('receipt-import') +export class ReceiptParsingController { + constructor(private readonly receiptParsingService: ReceiptParsingService) {} + + @Post('parse') + @HttpCode(200) + @UseInterceptors( + FileInterceptor('file', { + storage: memoryStorage(), + limits: { fileSize: 15 * 1024 * 1024 }, + fileFilter: (_req, file, cb) => { + if (ALLOWED_MIMES.has(file.mimetype)) { + cb(null, true); + } else { + cb( + new BadRequestException( + `Filtypen "${file.mimetype}" är inte tillåten. Använd JPEG, PNG, WebP, HEIC eller PDF.`, + ), + false, + ); + } + }, + }), + ) + async parseReceipt( + @UploadedFile() file: Express.Multer.File, + ): Promise { + if (!file) { + throw new BadRequestException('Ingen fil bifogades. Skicka en bild eller PDF under fältnamnet "file".'); + } + return this.receiptParsingService.parseReceipt(file); + } +} diff --git a/backend/src/receipt-parsing/receipt-parsing.module.ts b/backend/src/receipt-parsing/receipt-parsing.module.ts new file mode 100644 index 0000000..c496e88 --- /dev/null +++ b/backend/src/receipt-parsing/receipt-parsing.module.ts @@ -0,0 +1,9 @@ +import { Module } from '@nestjs/common'; +import { ReceiptParsingController } from './receipt-parsing.controller'; +import { ReceiptParsingService } from './receipt-parsing.service'; + +@Module({ + controllers: [ReceiptParsingController], + providers: [ReceiptParsingService], +}) +export class ReceiptParsingModule {} diff --git a/backend/src/receipt-parsing/receipt-parsing.service.ts b/backend/src/receipt-parsing/receipt-parsing.service.ts new file mode 100644 index 0000000..b480ecd --- /dev/null +++ b/backend/src/receipt-parsing/receipt-parsing.service.ts @@ -0,0 +1,192 @@ +import { + BadRequestException, + Injectable, + Logger, + ServiceUnavailableException, +} from '@nestjs/common'; +import * as pdfParse from 'pdf-parse'; + +const MISTRAL_API_URL = 'https://api.mistral.ai/v1/chat/completions'; +const RECEIPT_IMPORT_MODEL = 'mistral-small-2603'; +const MAX_RETRIES = 3; + +const IMAGE_PROMPT = `Du är en kvittoläsare. Analysera detta kvitto och returnera ENDAST en JSON-array med alla köpta varor. +Varje vara ska ha följande fält: +- "rawName": varans namn som det står på kvittot (sträng) +- "quantity": antal eller mängd som ett tal (t.ex. 1, 2, 0.5) +- "unit": enhet — välj ett av: "st", "kg", "g", "l", "dl", "cl", "ml", "förp", "pak", "burk", "flaska" +- "price": pris i SEK som ett tal, eller null +- "brand": märke eller leverantör om det tydligt framgår av varunamnet (t.ex. "Arla", "ICA", "Oatly"), annars null +- "origin": ursprungsland om det framgår av varunamnet (t.ex. "Brasilien", "Sverige", "Italien"), annars null + +Returnera BARA JSON-arrayen utan markdown-formatering.`; + +const TEXT_PROMPT = (text: string) => + `Du är en kvittoläsare. Nedan följer rå text från ett kvitto. Analysera texten och returnera ENDAST en JSON-array med alla köpta varor. +Varje vara ska ha följande fält: +- "rawName": varans namn som det står på kvittot (sträng) +- "quantity": antal eller mängd som ett tal (t.ex. 1, 2, 0.5) +- "unit": enhet — välj ett av: "st", "kg", "g", "l", "dl", "cl", "ml", "förp", "pak", "burk", "flaska" +- "price": pris i SEK som ett tal, eller null +- "brand": märke eller leverantör om det tydligt framgår av varunamnet (t.ex. "Arla", "ICA", "Oatly"), annars null +- "origin": ursprungsland om det framgår av varunamnet (t.ex. "Brasilien", "Sverige", "Italien"), annars null + +Returnera BARA JSON-arrayen utan markdown-formatering. + +Kvittotext: +${text}`; + +export interface ParsedReceiptItemRaw { + rawName: string; + quantity: number; + unit: string; + price?: number | null; + brand?: string | null; + origin?: string | null; +} + +@Injectable() +export class ReceiptParsingService { + private readonly logger = new Logger(ReceiptParsingService.name); + + async parseReceipt(file: Express.Multer.File): Promise { + const apiKey = process.env.MISTRAL_API_KEY; + if (!apiKey) { + throw new ServiceUnavailableException('MISTRAL_API_KEY är inte konfigurerad'); + } + + const isPdf = + file.mimetype === 'application/pdf' || + file.mimetype === 'application/octet-stream' || + file.originalname?.toLowerCase().endsWith('.pdf'); + + if (isPdf) { + return this.parseReceiptFromPdf(file.buffer, apiKey); + } + return this.parseReceiptFromImage(file.buffer, file.mimetype, apiKey); + } + + private async callMistralWithRetry(body: object, apiKey: string, source: string): Promise { + for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { + const response = await fetch(MISTRAL_API_URL, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify(body), + }); + + if (response.status === 503 || response.status === 429) { + const err = await response.text(); + this.logger.warn(`Mistral ${response.status} (${source}, försök ${attempt}/${MAX_RETRIES}): ${err}`); + if (attempt < MAX_RETRIES) { + await new Promise((r) => setTimeout(r, attempt * 2000)); + continue; + } + throw new ServiceUnavailableException('Mistral API: Tjänsten tillfälligt otillgänglig'); + } + + return response; + } + throw new ServiceUnavailableException('Kunde inte nå Mistral API efter flera försök'); + } + + private async parseReceiptFromImage( + buffer: Buffer, + mimeType: string, + apiKey: string, + ): Promise { + const effectiveMime = mimeType === 'application/octet-stream' ? 'image/jpeg' : mimeType; + const base64 = buffer.toString('base64'); + const response = await this.callMistralWithRetry( + { + model: RECEIPT_IMPORT_MODEL, + messages: [ + { + role: 'user', + content: [ + { + type: 'image_url', + image_url: { url: `data:${effectiveMime};base64,${base64}` }, + }, + { type: 'text', text: IMAGE_PROMPT }, + ], + }, + ], + max_tokens: 2000, + temperature: 0.1, + }, + apiKey, + 'bild', + ); + return this.extractItemsFromMistralResponse(response, 'bild'); + } + + private async parseReceiptFromPdf( + buffer: Buffer, + apiKey: string, + ): Promise { + let pdfText: string; + try { + const parsed = await pdfParse(buffer); + pdfText = parsed.text?.trim(); + } catch { + throw new BadRequestException('Kunde inte läsa PDF-filen. Kontrollera att filen inte är skadad.'); + } + + if (!pdfText || pdfText.length < 20) { + throw new BadRequestException( + 'PDF-filen verkar inte innehålla läsbar text. Prova att fotografera kvittot istället.', + ); + } + + this.logger.log(`PDF-text extraherad (${pdfText.length} tecken)`); + + const response = await this.callMistralWithRetry( + { + model: RECEIPT_IMPORT_MODEL, + messages: [{ role: 'user', content: TEXT_PROMPT(pdfText) }], + max_tokens: 2000, + temperature: 0.1, + }, + apiKey, + 'PDF', + ); + return this.extractItemsFromMistralResponse(response, 'PDF'); + } + + private async extractItemsFromMistralResponse( + response: Response, + source: string, + ): Promise { + if (!response.ok) { + const err = await response.text(); + this.logger.error(`Mistral API svarade ${response.status} (${source}): ${err}`); + const hint = + response.status === 401 + ? 'Ogiltig API-nyckel (401)' + : response.status === 429 + ? 'För många förfrågningar (429)' + : `HTTP ${response.status}`; + throw new ServiceUnavailableException(`Mistral API returnerade ett fel: ${hint}`); + } + + const data = (await response.json()) as { + choices: { message: { content: string } }[]; + }; + const content = data.choices?.[0]?.message?.content ?? '[]'; + + try { + const clean = content.replace(/```(?:json)?/gi, '').trim(); + const items = JSON.parse(clean); + if (!Array.isArray(items)) throw new Error('Inte en array'); + return items as ParsedReceiptItemRaw[]; + } catch { + this.logger.error(`Kunde inte parsa Mistral-svar (${source}):`, content); + throw new BadRequestException( + `Kvittot kunde inte tolkas. Försök med en tydligare ${source === 'PDF' ? 'PDF' : 'bild'}.`, + ); + } + } +} diff --git a/backend/src/web-scraping-service/controllers/quick-import.controller.ts b/backend/src/web-scraping-service/controllers/quick-import.controller.ts index f0d9195..791d777 100644 --- a/backend/src/web-scraping-service/controllers/quick-import.controller.ts +++ b/backend/src/web-scraping-service/controllers/quick-import.controller.ts @@ -1,4 +1,13 @@ -import { Controller, Post, Body } from '@nestjs/common'; +import { + Controller, + Post, + Body, + HttpCode, + UploadedFile, + UseInterceptors, +} from '@nestjs/common'; +import { FileInterceptor } from '@nestjs/platform-express'; +import { memoryStorage } from 'multer'; import { QuickImportService, QuickImportResult } from '../services/quick-import.service'; @Controller('quick-import') @@ -6,9 +15,20 @@ export class QuickImportController { constructor(private readonly quickImportService: QuickImportService) {} @Post() + @HttpCode(200) + @UseInterceptors( + FileInterceptor('file', { + storage: memoryStorage(), + limits: { fileSize: 10 * 1024 * 1024 }, + }), + ) async importFromInput( - @Body() body: { input: string } + @Body() body: { input?: string }, + @UploadedFile() file?: Express.Multer.File, ): Promise { - return this.quickImportService.importFromInput(body.input); + if (file) { + return this.quickImportService.importFromUpload(file); + } + return this.quickImportService.importFromInput(body.input ?? ''); } } diff --git a/backend/src/web-scraping-service/parsers/base.parser.ts b/backend/src/web-scraping-service/parsers/base.parser.ts index b698495..6413680 100644 --- a/backend/src/web-scraping-service/parsers/base.parser.ts +++ b/backend/src/web-scraping-service/parsers/base.parser.ts @@ -12,6 +12,7 @@ export interface ParsedRecipe { note?: string; }>; instructions?: string; + imageUrl?: string; } export abstract class RecipeParser { diff --git a/backend/src/web-scraping-service/parsers/ica.parser.ts b/backend/src/web-scraping-service/parsers/ica.parser.ts index 361a710..9073157 100644 --- a/backend/src/web-scraping-service/parsers/ica.parser.ts +++ b/backend/src/web-scraping-service/parsers/ica.parser.ts @@ -76,11 +76,24 @@ export class IcaRecipeParser extends RecipeParser { } } + // Extrahera bild-URL + let imageUrl: string | undefined; + if (recipe.image) { + if (typeof recipe.image === 'string') { + imageUrl = recipe.image; + } else if (Array.isArray(recipe.image) && recipe.image.length > 0) { + imageUrl = typeof recipe.image[0] === 'string' ? recipe.image[0] : recipe.image[0]?.url; + } else if (typeof recipe.image === 'object' && recipe.image.url) { + imageUrl = recipe.image.url; + } + } + return { name, description, ingredients, instructions, + imageUrl, }; } diff --git a/backend/src/web-scraping-service/services/quick-import.service.ts b/backend/src/web-scraping-service/services/quick-import.service.ts index 7f876dd..39bddc8 100644 --- a/backend/src/web-scraping-service/services/quick-import.service.ts +++ b/backend/src/web-scraping-service/services/quick-import.service.ts @@ -1,51 +1,85 @@ -import { Injectable, BadRequestException } from '@nestjs/common'; -import { IcaRecipeParser } from './parsers/ica.parser'; -import { GenericRecipeParser } from './parsers/generic.parser'; -import { RecipeParser } from './parsers/base.parser'; +import { Injectable, BadRequestException, Logger } from '@nestjs/common'; +import { IcaRecipeParser } from '../parsers/ica.parser'; +import { GenericRecipeParser } from '../parsers/generic.parser'; +import { RecipeParser } from '../parsers/base.parser'; +import { createWorker } from 'tesseract.js'; +import * as pdfParse from 'pdf-parse'; export interface QuickImportResult { markdown: string; - source: 'ica' | 'pdf' | 'other'; + source: 'ica' | 'pdf' | 'image' | 'other'; + imageUrl?: string; + imageWarning?: string; } @Injectable() export class QuickImportService { - /** - * Detekterar typ av input (URL eller filsökväg) och importerar från lämplig källa - */ + private readonly logger = new Logger(QuickImportService.name); + async importFromInput(input: string): Promise { input = input.trim(); - console.log('[QuickImport] Mottog input:', input); + this.logger.log(`Mottog input: ${input}`); if (!input) { throw new BadRequestException('Du måste ange en URL eller filsökväg'); } - // Detektera typ - const isUrl = this.isUrl(input); - const isPdf = this.isPdfPath(input); - - console.log('[QuickImport] isUrl:', isUrl, 'isPdf:', isPdf); - - if (isUrl) { - console.log('[QuickImport] Detekterade URL, försöker scrapa...'); + if (this.isUrl(input)) { + this.logger.log('Detekterade URL, försöker scrapa...'); return this.scrapeRecipeFromUrl(input); - } else if (isPdf) { - console.log('[QuickImport] Detekterade PDF-fil'); - throw new BadRequestException( - 'PDF-import under utveckling. Försök med en URL från ICA.se eller annat receptsida.' - ); - } else { - console.log('[QuickImport] Input är inte URL eller PDF'); - throw new BadRequestException( - 'Ogültig input. Ange en gyltig URL (t.ex. ica.se/recept/...) eller filsökväg' - ); + } + + throw new BadRequestException( + 'Ogiltig input. Ange en giltig URL (t.ex. ica.se/recept/...) eller ladda upp en fil.', + ); + } + + async importFromUpload(file: Express.Multer.File): Promise { + this.logger.log(`Fil mottagen: ${file.originalname} (${file.mimetype})`); + + if ( + file.mimetype === 'application/pdf' || + file.mimetype === 'application/octet-stream' || + file.originalname?.toLowerCase().endsWith('.pdf') + ) { + return this.importFromPdf(file.buffer); + } + + if (file.mimetype.startsWith('image/')) { + return this.importFromImage(file.buffer); + } + + throw new BadRequestException('Otillåten filtyp. Använd JPEG, PNG, WebP eller PDF.'); + } + + private async importFromPdf(buffer: Buffer): Promise { + try { + this.logger.log('Parsar PDF med pdf-parse...'); + const data = await pdfParse(buffer); + const markdown = data.text?.trim() || '(Tom PDF)'; + return { markdown, source: 'pdf' }; + } catch (err) { + const message = err instanceof Error ? err.message : 'Okänt fel'; + this.logger.error(`PDF-parsning misslyckades: ${message}`); + throw new BadRequestException('Kunde inte läsa PDF-filen. Kontrollera att filen inte är skadad.'); + } + } + + private async importFromImage(buffer: Buffer): Promise { + try { + this.logger.log('Parsar bild med Tesseract OCR...'); + const worker = await createWorker('swe+eng'); + const result = await worker.recognize(buffer); + await worker.terminate(); + const markdown = result.data.text?.trim() || '(Tom bild)'; + return { markdown, source: 'image' }; + } catch (err) { + const message = err instanceof Error ? err.message : 'Okänt fel'; + this.logger.error(`OCR misslyckades: ${message}`); + throw new BadRequestException('Kunde inte läsa text ur bilden. Prova en tydligare bild.'); } } - /** - * Kontrollerar om input är en URL - */ private isUrl(input: string): boolean { try { new URL(input); @@ -55,28 +89,10 @@ export class QuickImportService { } } - /** - * Kontrollerar om input är en PDF-filsökväg - */ - private isPdfPath(input: string): boolean { - const normalized = input.toLowerCase(); - return normalized.endsWith('.pdf'); - } - - /** - * Skrapar recept från en URL - * - * Använder site-specifika parsers om tillgängliga, - * annars fallback till generisk parser. - * - * @param url URL till receptsidan - * @returns Markdown-format - */ private async scrapeRecipeFromUrl(url: string): Promise { try { - console.log('[QuickImport] Hämtar HTML från:', url); + this.logger.log(`Hämtar HTML från: ${url}`); - // Hämta HTML från URL const response = await fetch(url, { headers: { 'User-Agent': @@ -84,16 +100,15 @@ export class QuickImportService { }, }); - console.log('[QuickImport] HTTP status:', response.status); + this.logger.log(`HTTP status: ${response.status}`); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const html = await response.text(); - console.log('[QuickImport] HTML längd:', html.length, 'tecken'); + this.logger.log(`HTML längd: ${html.length} tecken`); - // Välj lämplig parser const parsers: RecipeParser[] = [ new IcaRecipeParser(), new GenericRecipeParser(), @@ -102,7 +117,7 @@ export class QuickImportService { let recipe = null; for (const parser of parsers) { if (parser.canHandle(url)) { - console.log('[QuickImport] Använder parser:', parser.constructor.name); + this.logger.log(`Använder parser: ${parser.constructor.name}`); recipe = parser.parse(html); break; } @@ -112,68 +127,73 @@ export class QuickImportService { throw new Error('Ingen parserutrustning tillgänglig'); } - console.log('[QuickImport] Parsad recept:', { - name: recipe.name, - ingredienser: recipe.ingredients.length, - }); + this.logger.log(`Parsad recept: ${recipe.name} (${recipe.ingredients.length} ingredienser)`); if (!recipe.name) { throw new Error('Kunde inte hitta receptnamn på sidan. Försök med en annan länk.'); } - // Konvertera till Markdown-format const markdown = this.recipeToMarkdown(recipe, url); - console.log('[QuickImport] Markdown genererad, längd:', markdown.length); + this.logger.log(`Markdown genererad, längd: ${markdown.length}`); - // Detektera källa från URL - let source: 'ica' | 'pdf' | 'other' = 'other'; + let source: 'ica' | 'pdf' | 'image' | 'other' = 'other'; if (/ica\.se/i.test(url)) { source = 'ica'; } - return { - markdown, - source, - }; + // Normalisera bild-URL från parsern (ingen download — görs i recipe-app vid sparande) + let imageUrl: string | undefined; + let imageWarning: string | undefined; + if (recipe.imageUrl) { + const normalized = this.normalizeImageUrl(recipe.imageUrl, url); + if (normalized) { + imageUrl = normalized; + } else { + imageWarning = 'Receptbild kunde inte tolkas till en giltig URL.'; + this.logger.warn(`Kunde inte normalisera bild-URL: "${recipe.imageUrl}"`); + } + } + + return { markdown, source, imageUrl, imageWarning }; } catch (err) { const message = err instanceof Error ? err.message : 'Okänt fel vid scraping'; - console.error('[QuickImport] ERROR:', message); + this.logger.error(`Scraping misslyckades: ${message}`); throw new BadRequestException( - `Kunde inte hämta recept: ${message}. Kontrollera att länken är korrekt och försök igen.` + `Kunde inte hämta recept: ${message}. Kontrollera att länken är korrekt och försök igen.`, ); } } - /** - * Konvertera receptobjekt till Markdown-format - */ + private normalizeImageUrl(rawImageUrl: string, pageUrl: string): string | null { + const trimmed = rawImageUrl.trim(); + if (!trimmed) return null; + const protocolNormalized = trimmed.startsWith('//') ? `https:${trimmed}` : trimmed; + try { + return new URL(protocolNormalized, pageUrl).toString(); + } catch { + return null; + } + } + private recipeToMarkdown( recipe: { name: string; description?: string; - ingredients: Array<{ - quantity: number; - unit: string; - name: string; - note?: string; - }>; + ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }>; instructions?: string; }, sourceUrl?: string, ): string { const lines: string[] = []; - // Titel lines.push(`# ${recipe.name}`); lines.push(''); - // Beskrivning if (recipe.description) { lines.push(recipe.description); lines.push(''); } - // Ingredienser if (recipe.ingredients.length > 0) { lines.push('## Ingredienser'); for (const ing of recipe.ingredients) { @@ -185,14 +205,12 @@ export class QuickImportService { lines.push(''); } - // Instruktioner if (recipe.instructions) { lines.push('## Tillvägagångssätt'); lines.push(recipe.instructions); lines.push(''); } - // Källa if (sourceUrl) { lines.push('---'); lines.push(''); @@ -202,3 +220,4 @@ export class QuickImportService { return lines.join('\n'); } } + diff --git a/backend/src/web-scraping-service/services/web-scraping.module.ts b/backend/src/web-scraping-service/services/web-scraping.module.ts deleted file mode 100644 index 4a05346..0000000 --- a/backend/src/web-scraping-service/services/web-scraping.module.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { Module } from '@nestjs/common'; -import { QuickImportController } from './controllers/quick-import.controller'; -import { QuickImportService } from './services/quick-import.service'; - -@Module({ - controllers: [QuickImportController], - providers: [QuickImportService], -}) -export class WebScrapingModule {} \ No newline at end of file diff --git a/backend/src/web-scraping-service/web-scraping.module.ts b/backend/src/web-scraping-service/web-scraping.module.ts index 0dc61f8..652722a 100644 --- a/backend/src/web-scraping-service/web-scraping.module.ts +++ b/backend/src/web-scraping-service/web-scraping.module.ts @@ -1,9 +1,9 @@ import { Module } from '@nestjs/common'; -import { QuickImportController } from './quick-import.controller'; -import { QuickImportService } from './quick-import.service'; +import { QuickImportController } from './controllers/quick-import.controller'; +import { QuickImportService } from './services/quick-import.service'; @Module({ controllers: [QuickImportController], providers: [QuickImportService], }) -export class QuickImportModule {} +export class WebScrapingModule {}