Files
recipe-app/backend/src/quick-import/quick-import.service.ts
T

380 lines
11 KiB
TypeScript

import {
BadRequestException,
Injectable,
ServiceUnavailableException,
UnsupportedMediaTypeException,
} from '@nestjs/common';
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import * as pdfParse from 'pdf-parse';
import { createWorker } from 'tesseract.js';
import { IcaRecipeParser } from './parsers/ica.parser';
import { GenericRecipeParser } from './parsers/generic.parser';
import { RecipeParser } from './parsers/base.parser';
export interface QuickImportResult {
markdown: string;
source: 'ica' | 'pdf' | 'image' | 'other';
}
type UploadKind = 'pdf' | 'image';
@Injectable()
export class QuickImportService {
/**
* Detekterar typ av input (URL eller filsökväg) och importerar från lämplig källa
*/
async importFromInput(input: string): Promise<QuickImportResult> {
const trimmed = input.trim();
console.log('[QuickImport] Mottog input:', trimmed);
if (!trimmed) {
throw new BadRequestException('Du måste ange en URL eller ladda upp en fil');
}
if (this.isUrl(trimmed)) {
console.log('[QuickImport] Detekterade URL, försöker scrapa...');
return this.scrapeRecipeFromUrl(trimmed);
}
if (this.looksLikeLocalFile(trimmed)) {
console.log('[QuickImport] Försöker läsa lokal fil:', trimmed);
try {
const buffer = await fs.readFile(trimmed);
return this.importFromUpload({
buffer,
originalname: path.basename(trimmed),
mimetype: this.getMimeTypeFromExtension(trimmed),
} as Express.Multer.File);
} catch (error) {
console.error('[QuickImport] Kunde inte läsa lokal fil:', error);
throw new BadRequestException(
'Kunde inte läsa filen. Använd filuppladdning i gränssnittet eller kontrollera sökvägen.',
);
}
}
throw new BadRequestException(
'Ogiltig input. Ange en giltig URL eller ladda upp en PDF- eller bildfil.',
);
}
async importFromUpload(file: Express.Multer.File): Promise<QuickImportResult> {
if (!file?.buffer) {
throw new BadRequestException('Ingen fil skickades med.');
}
console.log('[QuickImport] Mottog uppladdad fil:', file.originalname, file.mimetype);
const kind = this.getUploadKind(file);
if (kind === 'pdf') {
const text = await this.extractTextFromPdf(file.buffer);
return {
markdown: this.normalizeImportedTextToMarkdown(text, file.originalname),
source: 'pdf',
};
}
const text = await this.extractTextFromImage(file.buffer);
return {
markdown: this.normalizeImportedTextToMarkdown(text, file.originalname),
source: 'image',
};
}
/**
* Kontrollerar om input är en URL
*/
private isUrl(input: string): boolean {
try {
new URL(input);
return true;
} catch {
return false;
}
}
private looksLikeLocalFile(input: string): boolean {
const normalized = input.toLowerCase();
return /[\\/]/.test(input) || /\.(pdf|png|jpg|jpeg|webp|bmp)$/i.test(normalized);
}
private getMimeTypeFromExtension(filename: string): string {
const ext = path.extname(filename).toLowerCase();
if (ext === '.pdf') return 'application/pdf';
if (ext === '.png') return 'image/png';
if (ext === '.jpg' || ext === '.jpeg') return 'image/jpeg';
if (ext === '.webp') return 'image/webp';
if (ext === '.bmp') return 'image/bmp';
return 'application/octet-stream';
}
private getUploadKind(
file: Pick<Express.Multer.File, 'mimetype' | 'originalname'>,
): UploadKind {
const type = (file.mimetype ?? '').toLowerCase();
const name = (file.originalname ?? '').toLowerCase();
if (type.includes('pdf') || name.endsWith('.pdf')) {
return 'pdf';
}
if (
type.startsWith('image/') ||
['.png', '.jpg', '.jpeg', '.webp', '.bmp'].some((ext) => name.endsWith(ext))
) {
return 'image';
}
throw new UnsupportedMediaTypeException(
'Endast PDF, PNG, JPG, JPEG, WEBP och BMP stöds.',
);
}
private async extractTextFromPdf(buffer: Buffer): Promise<string> {
try {
const result = await pdfParse(buffer);
const text = result.text?.replace(/\u0000/g, '').trim();
if (!text) {
throw new BadRequestException(
'PDF-filen saknar läsbar text. Prova bildimport om det är en skannad sida.',
);
}
return text;
} catch (error) {
if (error instanceof BadRequestException) {
throw error;
}
console.error('[QuickImport] PDF ERROR:', error);
throw new ServiceUnavailableException('PDF-importen misslyckades.');
}
}
private async extractTextFromImage(buffer: Buffer): Promise<string> {
const worker = await createWorker('swe+eng');
try {
const result = await worker.recognize(buffer);
const text = result.data.text?.trim();
if (!text) {
throw new BadRequestException('Ingen text hittades i bilden.');
}
return text;
} catch (error) {
if (error instanceof BadRequestException) {
throw error;
}
console.error('[QuickImport] OCR ERROR:', error);
throw new ServiceUnavailableException('OCR-importen misslyckades.');
} finally {
await worker.terminate();
}
}
private normalizeImportedTextToMarkdown(text: string, sourceName?: string): string {
const cleanedText = text
.replace(/\r/g, '')
.replace(/[ \t]+/g, ' ')
.replace(/\n{3,}/g, '\n\n')
.trim();
if (!cleanedText) {
throw new BadRequestException('Ingen läsbar text hittades i filen.');
}
const title = cleanedText.split('\n').find((line) => line.trim().length > 3)?.trim() ?? 'Importerat recept';
const ingredients: string[] = [];
const instructions: string[] = [];
let section: 'unknown' | 'ingredients' | 'instructions' = 'unknown';
for (const rawLine of cleanedText.split('\n')) {
const line = rawLine.trim();
if (!line || line === title) {
continue;
}
const lower = line.toLowerCase();
if (/^ingred/i.test(lower)) {
section = 'ingredients';
continue;
}
if (/^(gör så här|gor sa har|instruktioner|tillvägagångssätt|tillvagagangssatt|method|instructions)/i.test(lower)) {
section = 'instructions';
continue;
}
if (section === 'unknown') {
section = this.looksLikeIngredientLine(line) ? 'ingredients' : 'instructions';
}
if (section === 'ingredients') {
ingredients.push(line.startsWith('-') ? line : `- ${line}`);
} else {
instructions.push(line);
}
}
return [
`# ${title}`,
'',
'## Ingredienser',
...(ingredients.length > 0 ? ingredients : ['- Komplettera ingredienser manuellt']),
'',
'## Tillvägagångssätt',
...(instructions.length > 0 ? instructions : ['Komplettera tillagningsstegen manuellt.']),
'',
sourceName ? `Källa: ${sourceName}` : '',
]
.filter(Boolean)
.join('\n');
}
private looksLikeIngredientLine(line: string): boolean {
return (
/^[-*•]\s+/.test(line) ||
/^\d+[.,]?\d*\s+/.test(line) ||
/\b(g|kg|hg|mg|ml|dl|cl|l|tsk|msk|krm|st|pkt|förp|klyfta)\b/i.test(line)
);
}
/**
* Skrapar recept från en URL
*
* Använder site-specifika parsers om tillgängliga,
* annars fallback till generisk parser.
*
* @param url URL till receptsidan
* @returns Markdown-format
*/
private async scrapeRecipeFromUrl(url: string): Promise<QuickImportResult> {
try {
console.log('[QuickImport] Hämtar HTML från:', url);
const response = await fetch(url, {
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
},
});
console.log('[QuickImport] HTTP status:', response.status);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const html = await response.text();
console.log('[QuickImport] HTML längd:', html.length, 'tecken');
const parsers: RecipeParser[] = [
new IcaRecipeParser(),
new GenericRecipeParser(),
];
let recipe = null;
for (const parser of parsers) {
if (parser.canHandle(url)) {
console.log('[QuickImport] Använder parser:', parser.constructor.name);
recipe = parser.parse(html);
break;
}
}
if (!recipe) {
throw new Error('Ingen parserutrustning tillgänglig');
}
console.log('[QuickImport] Parsad recept:', {
name: recipe.name,
ingredienser: recipe.ingredients.length,
});
if (!recipe.name) {
throw new Error('Kunde inte hitta receptnamn på sidan. Försök med en annan länk.');
}
const markdown = this.recipeToMarkdown(recipe, url);
console.log('[QuickImport] Markdown genererad, längd:', markdown.length);
let source: 'ica' | 'pdf' | 'image' | 'other' = 'other';
if (/ica\.se/i.test(url)) {
source = 'ica';
}
return {
markdown,
source,
};
} catch (err) {
const message = err instanceof Error ? err.message : 'Okänt fel vid scraping';
console.error('[QuickImport] ERROR:', message);
throw new BadRequestException(
`Kunde inte hämta recept: ${message}. Kontrollera att länken är korrekt och försök igen.`
);
}
}
/**
* Konvertera receptobjekt till Markdown-format
*/
private recipeToMarkdown(
recipe: {
name: string;
description?: string;
ingredients: Array<{
quantity: number;
unit: string;
name: string;
note?: string;
}>;
instructions?: string;
},
sourceUrl?: string,
): string {
const lines: string[] = [];
lines.push(`# ${recipe.name}`);
lines.push('');
if (recipe.description) {
lines.push(recipe.description);
lines.push('');
}
if (recipe.ingredients.length > 0) {
lines.push('## Ingredienser');
for (const ing of recipe.ingredients) {
const quantity = ing.quantity > 0 ? `${ing.quantity} ` : '';
const unit = ing.unit ? `${ing.unit} ` : '';
const note = ing.note ? ` (${ing.note})` : '';
lines.push(`- ${quantity}${unit}${ing.name}${note}`);
}
lines.push('');
}
if (recipe.instructions) {
lines.push('## Tillvägagångssätt');
lines.push(recipe.instructions);
lines.push('');
}
if (sourceUrl) {
lines.push('---');
lines.push('');
lines.push(`Källa: [${sourceUrl}](${sourceUrl})`);
}
return lines.join('\n');
}
}