import { BadRequestException, Injectable, ServiceUnavailableException, UnsupportedMediaTypeException, } from '@nestjs/common'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import * as pdfParse from 'pdf-parse'; import { createWorker } from 'tesseract.js'; import { IcaRecipeParser } from './parsers/ica.parser'; import { GenericRecipeParser } from './parsers/generic.parser'; import { RecipeParser } from './parsers/base.parser'; export interface QuickImportResult { markdown: string; source: 'ica' | 'pdf' | 'image' | 'other'; } type UploadKind = 'pdf' | 'image'; @Injectable() export class QuickImportService { /** * Detekterar typ av input (URL eller filsökväg) och importerar från lämplig källa */ async importFromInput(input: string): Promise { const trimmed = input.trim(); console.log('[QuickImport] Mottog input:', trimmed); if (!trimmed) { throw new BadRequestException('Du måste ange en URL eller ladda upp en fil'); } if (this.isUrl(trimmed)) { console.log('[QuickImport] Detekterade URL, försöker scrapa...'); return this.scrapeRecipeFromUrl(trimmed); } if (this.looksLikeLocalFile(trimmed)) { console.log('[QuickImport] Försöker läsa lokal fil:', trimmed); try { const buffer = await fs.readFile(trimmed); return this.importFromUpload({ buffer, originalname: path.basename(trimmed), mimetype: this.getMimeTypeFromExtension(trimmed), } as Express.Multer.File); } catch (error) { console.error('[QuickImport] Kunde inte läsa lokal fil:', error); throw new BadRequestException( 'Kunde inte läsa filen. Använd filuppladdning i gränssnittet eller kontrollera sökvägen.', ); } } throw new BadRequestException( 'Ogiltig input. Ange en giltig URL eller ladda upp en PDF- eller bildfil.', ); } async importFromUpload(file: Express.Multer.File): Promise { if (!file?.buffer) { throw new BadRequestException('Ingen fil skickades med.'); } console.log('[QuickImport] Mottog uppladdad fil:', file.originalname, file.mimetype); const kind = this.getUploadKind(file); if (kind === 'pdf') { const text = await this.extractTextFromPdf(file.buffer); return { markdown: this.normalizeImportedTextToMarkdown(text, file.originalname), source: 'pdf', }; } const text = await this.extractTextFromImage(file.buffer); return { markdown: this.normalizeImportedTextToMarkdown(text, file.originalname), source: 'image', }; } /** * Kontrollerar om input är en URL */ private isUrl(input: string): boolean { try { new URL(input); return true; } catch { return false; } } private looksLikeLocalFile(input: string): boolean { const normalized = input.toLowerCase(); return /[\\/]/.test(input) || /\.(pdf|png|jpg|jpeg|webp|bmp)$/i.test(normalized); } private getMimeTypeFromExtension(filename: string): string { const ext = path.extname(filename).toLowerCase(); if (ext === '.pdf') return 'application/pdf'; if (ext === '.png') return 'image/png'; if (ext === '.jpg' || ext === '.jpeg') return 'image/jpeg'; if (ext === '.webp') return 'image/webp'; if (ext === '.bmp') return 'image/bmp'; return 'application/octet-stream'; } private getUploadKind( file: Pick, ): UploadKind { const type = (file.mimetype ?? '').toLowerCase(); const name = (file.originalname ?? '').toLowerCase(); if (type.includes('pdf') || name.endsWith('.pdf')) { return 'pdf'; } if ( type.startsWith('image/') || ['.png', '.jpg', '.jpeg', '.webp', '.bmp'].some((ext) => name.endsWith(ext)) ) { return 'image'; } throw new UnsupportedMediaTypeException( 'Endast PDF, PNG, JPG, JPEG, WEBP och BMP stöds.', ); } private async extractTextFromPdf(buffer: Buffer): Promise { try { const result = await pdfParse(buffer); const text = result.text?.replace(/\u0000/g, '').trim(); if (!text) { throw new BadRequestException( 'PDF-filen saknar läsbar text. Prova bildimport om det är en skannad sida.', ); } return text; } catch (error) { if (error instanceof BadRequestException) { throw error; } console.error('[QuickImport] PDF ERROR:', error); throw new ServiceUnavailableException('PDF-importen misslyckades.'); } } private async extractTextFromImage(buffer: Buffer): Promise { const worker = await createWorker('swe+eng'); try { const result = await worker.recognize(buffer); const text = result.data.text?.trim(); if (!text) { throw new BadRequestException('Ingen text hittades i bilden.'); } return text; } catch (error) { if (error instanceof BadRequestException) { throw error; } console.error('[QuickImport] OCR ERROR:', error); throw new ServiceUnavailableException('OCR-importen misslyckades.'); } finally { await worker.terminate(); } } private normalizeImportedTextToMarkdown(text: string, sourceName?: string): string { const cleanedText = text .replace(/\r/g, '') .replace(/[ \t]+/g, ' ') .replace(/\n{3,}/g, '\n\n') .trim(); if (!cleanedText) { throw new BadRequestException('Ingen läsbar text hittades i filen.'); } const title = cleanedText.split('\n').find((line) => line.trim().length > 3)?.trim() ?? 'Importerat recept'; const ingredients: string[] = []; const instructions: string[] = []; let section: 'unknown' | 'ingredients' | 'instructions' = 'unknown'; for (const rawLine of cleanedText.split('\n')) { const line = rawLine.trim(); if (!line || line === title) { continue; } const lower = line.toLowerCase(); if (/^ingred/i.test(lower)) { section = 'ingredients'; continue; } if (/^(gör så här|gor sa har|instruktioner|tillvägagångssätt|tillvagagangssatt|method|instructions)/i.test(lower)) { section = 'instructions'; continue; } if (section === 'unknown') { section = this.looksLikeIngredientLine(line) ? 'ingredients' : 'instructions'; } if (section === 'ingredients') { ingredients.push(line.startsWith('-') ? line : `- ${line}`); } else { instructions.push(line); } } return [ `# ${title}`, '', '## Ingredienser', ...(ingredients.length > 0 ? ingredients : ['- Komplettera ingredienser manuellt']), '', '## Tillvägagångssätt', ...(instructions.length > 0 ? instructions : ['Komplettera tillagningsstegen manuellt.']), '', sourceName ? `Källa: ${sourceName}` : '', ] .filter(Boolean) .join('\n'); } private looksLikeIngredientLine(line: string): boolean { return ( /^[-*•]\s+/.test(line) || /^\d+[.,]?\d*\s+/.test(line) || /\b(g|kg|hg|mg|ml|dl|cl|l|tsk|msk|krm|st|pkt|förp|klyfta)\b/i.test(line) ); } /** * Skrapar recept från en URL * * Använder site-specifika parsers om tillgängliga, * annars fallback till generisk parser. * * @param url URL till receptsidan * @returns Markdown-format */ private async scrapeRecipeFromUrl(url: string): Promise { try { console.log('[QuickImport] Hämtar HTML från:', url); const response = await fetch(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', }, }); console.log('[QuickImport] HTTP status:', response.status); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const html = await response.text(); console.log('[QuickImport] HTML längd:', html.length, 'tecken'); const parsers: RecipeParser[] = [ new IcaRecipeParser(), new GenericRecipeParser(), ]; let recipe = null; for (const parser of parsers) { if (parser.canHandle(url)) { console.log('[QuickImport] Använder parser:', parser.constructor.name); recipe = parser.parse(html); break; } } if (!recipe) { throw new Error('Ingen parserutrustning tillgänglig'); } console.log('[QuickImport] Parsad recept:', { name: recipe.name, ingredienser: recipe.ingredients.length, }); if (!recipe.name) { throw new Error('Kunde inte hitta receptnamn på sidan. Försök med en annan länk.'); } const markdown = this.recipeToMarkdown(recipe, url); console.log('[QuickImport] Markdown genererad, längd:', markdown.length); let source: 'ica' | 'pdf' | 'image' | 'other' = 'other'; if (/ica\.se/i.test(url)) { source = 'ica'; } return { markdown, source, }; } catch (err) { const message = err instanceof Error ? err.message : 'Okänt fel vid scraping'; console.error('[QuickImport] ERROR:', message); throw new BadRequestException( `Kunde inte hämta recept: ${message}. Kontrollera att länken är korrekt och försök igen.` ); } } /** * Konvertera receptobjekt till Markdown-format */ private recipeToMarkdown( recipe: { name: string; description?: string; ingredients: Array<{ quantity: number; unit: string; name: string; note?: string; }>; instructions?: string; }, sourceUrl?: string, ): string { const lines: string[] = []; lines.push(`# ${recipe.name}`); lines.push(''); if (recipe.description) { lines.push(recipe.description); lines.push(''); } if (recipe.ingredients.length > 0) { lines.push('## Ingredienser'); for (const ing of recipe.ingredients) { const quantity = ing.quantity > 0 ? `${ing.quantity} ` : ''; const unit = ing.unit ? `${ing.unit} ` : ''; const note = ing.note ? ` (${ing.note})` : ''; lines.push(`- ${quantity}${unit}${ing.name}${note}`); } lines.push(''); } if (recipe.instructions) { lines.push('## Tillvägagångssätt'); lines.push(recipe.instructions); lines.push(''); } if (sourceUrl) { lines.push('---'); lines.push(''); lines.push(`Källa: [${sourceUrl}](${sourceUrl})`); } return lines.join('\n'); } }