87372f0d15
Co-authored-by: Copilot <copilot@github.com>
273 lines
8.3 KiB
TypeScript
273 lines
8.3 KiB
TypeScript
import {
|
|
BadRequestException,
|
|
Injectable,
|
|
Logger,
|
|
ServiceUnavailableException,
|
|
UnsupportedMediaTypeException,
|
|
} from '@nestjs/common';
|
|
import * as fs from 'node:fs/promises';
|
|
import * as path from 'node:path';
|
|
import * as pdfParse from 'pdf-parse';
|
|
import { createWorker } from 'tesseract.js';
|
|
import { IcaRecipeParser } from './parsers/ica.parser';
|
|
import { GenericRecipeParser } from './parsers/generic.parser';
|
|
import { ReceiptParser } from './parsers/receipt.parser';
|
|
import { RecipeParser } from './parsers/base.parser';
|
|
import { downloadAndOptimizeImage } from '../common/utils/download-image';
|
|
|
|
const IMAGE_DEST_DIR = process.env.IMAGE_DEST_DIR || '/app/recipe-images';
|
|
|
|
export interface QuickImportResult {
|
|
markdown: string;
|
|
source: 'ica' | 'pdf' | 'image' | 'other';
|
|
imageUrl?: string;
|
|
imageWarning?: string;
|
|
}
|
|
|
|
export interface ReceiptImportResult {
|
|
items: Array<{
|
|
name: string;
|
|
quantity: number;
|
|
price: number;
|
|
}>;
|
|
source: 'pdf' | 'image';
|
|
}
|
|
|
|
type UploadKind = 'pdf' | 'image';
|
|
|
|
@Injectable()
|
|
export class QuickImportService {
|
|
private readonly logger = new Logger(QuickImportService.name);
|
|
private readonly receiptParser = new ReceiptParser();
|
|
|
|
constructor() {}
|
|
|
|
/**
|
|
* Detekterar typ av input (URL eller filsökväg) och importerar från lämplig källa
|
|
*/
|
|
async importFromInput(input: string): Promise<QuickImportResult> {
|
|
const trimmed = input.trim();
|
|
this.logger.log(`Mottog input: ${trimmed}`);
|
|
|
|
if (!trimmed) {
|
|
throw new BadRequestException('Du måste ange en URL eller ladda upp en fil');
|
|
}
|
|
|
|
if (this.isUrl(trimmed)) {
|
|
this.logger.log('Detekterade URL, försöker scrapa...');
|
|
return this.scrapeRecipeFromUrl(trimmed);
|
|
}
|
|
|
|
if (this.looksLikeLocalFile(trimmed)) {
|
|
this.logger.log(`Försöker läsa lokal fil: ${trimmed}`);
|
|
try {
|
|
const buffer = await fs.readFile(trimmed);
|
|
return this.importFromBuffer(buffer, path.extname(trimmed).slice(1) as UploadKind);
|
|
} catch (error) {
|
|
this.logger.error(`Kunde inte läsa fil: ${error}`);
|
|
throw new ServiceUnavailableException('Kunde inte läsa filen');
|
|
}
|
|
}
|
|
|
|
throw new BadRequestException('Ogiltig input. Ange en URL eller en filsökväg.');
|
|
}
|
|
|
|
/**
|
|
* Importerar från en uppladdad fil
|
|
*/
|
|
async importFromUpload(file: Express.Multer.File): Promise<QuickImportResult | ReceiptImportResult> {
|
|
this.logger.log('MIME-typ:', file.mimetype);
|
|
this.logger.log('Token:', file.originalname);
|
|
const kind = file.mimetype.startsWith('image/') ? 'image' : 'pdf';
|
|
return this.importFromBuffer(file.buffer, kind);
|
|
}
|
|
|
|
/**
|
|
* Importerar från en buffert (PDF eller bild)
|
|
*/
|
|
async importFromBuffer(
|
|
buffer: Buffer,
|
|
kind: UploadKind,
|
|
): Promise<QuickImportResult | ReceiptImportResult> {
|
|
try {
|
|
if (kind === 'pdf') {
|
|
return this.receiptParser.parseFromPdf(buffer);
|
|
} else {
|
|
return this.receiptParser.parseFromImage(buffer);
|
|
}
|
|
} catch (error) {
|
|
this.logger.error(`Fel vid import av ${kind}: ${error}`);
|
|
throw new ServiceUnavailableException(`Kunde inte importera ${kind}`);
|
|
}
|
|
}
|
|
|
|
private isUrl(input: string): boolean {
|
|
return input.startsWith('http://') || input.startsWith('https://');
|
|
}
|
|
|
|
private looksLikeLocalFile(input: string): boolean {
|
|
return input.includes('/') || input.includes('\\');
|
|
}
|
|
|
|
private async scrapeRecipeFromUrl(url: string): Promise<QuickImportResult> {
|
|
try {
|
|
this.logger.log(`Hämtar HTML från: ${url}`);
|
|
|
|
const response = await fetch(url, {
|
|
headers: {
|
|
'User-Agent':
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
},
|
|
});
|
|
|
|
this.logger.log(`HTTP status: ${response.status}`);
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
}
|
|
|
|
const html = await response.text();
|
|
this.logger.log(`HTML längd: ${html.length} tecken`);
|
|
|
|
const parsers: RecipeParser[] = [
|
|
new IcaRecipeParser(),
|
|
new GenericRecipeParser(),
|
|
];
|
|
|
|
let recipe = null;
|
|
for (const parser of parsers) {
|
|
if (parser.canHandle(url)) {
|
|
this.logger.log(`Använder parser: ${parser.constructor.name}`);
|
|
recipe = parser.parse(html);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!recipe) {
|
|
throw new Error('Ingen parserutrustning tillgänglig');
|
|
}
|
|
|
|
this.logger.log(`Parsad recept: ${recipe.name} (${recipe.ingredients.length} ingredienser)`);
|
|
|
|
if (!recipe.name) {
|
|
throw new Error('Kunde inte hitta receptnamn på sidan. Försök med en annan länk.');
|
|
}
|
|
|
|
const markdown = this.recipeToMarkdown(recipe, url);
|
|
this.logger.log(`Markdown genererad, längd: ${markdown.length}`);
|
|
|
|
let source: 'ica' | 'pdf' | 'image' | 'other' = 'other';
|
|
if (/ica\.se/i.test(url)) {
|
|
source = 'ica';
|
|
}
|
|
|
|
// Ladda ner och optimera bild om parser hittade en
|
|
let imageUrl: string | undefined;
|
|
let imageWarning: string | undefined;
|
|
if (recipe.imageUrl) {
|
|
this.logger.log(`Bildkandidat från parser: ${recipe.imageUrl}`);
|
|
const normalizedImageUrl = this.normalizeImageUrl(recipe.imageUrl, url);
|
|
this.logger.log(`Normaliserad bild-URL: ${normalizedImageUrl ?? 'null'}`);
|
|
if (!normalizedImageUrl) {
|
|
imageWarning = 'Receptbild kunde inte tolkas till en giltig URL.';
|
|
this.logger.warn(
|
|
`Kunde inte normalisera bild-URL: "${recipe.imageUrl}" (källsida: ${url})`,
|
|
);
|
|
} else {
|
|
try {
|
|
imageUrl = await downloadAndOptimizeImage(normalizedImageUrl, IMAGE_DEST_DIR);
|
|
this.logger.log(`Bild optimerad och sparad: ${imageUrl}`);
|
|
} catch (imgErr) {
|
|
// Fallback: behåll extern URL så klienten ändå kan visa bild.
|
|
imageUrl = normalizedImageUrl;
|
|
imageWarning = 'Receptbild kunde inte laddas ner lokalt; extern URL används.';
|
|
this.logger.warn(
|
|
`Kunde inte ladda ner bild: ${imgErr} (källa: ${normalizedImageUrl})`,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
markdown,
|
|
source,
|
|
imageUrl,
|
|
imageWarning,
|
|
};
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : 'Okänt fel vid scraping';
|
|
this.logger.error(`Scraping misslyckades: ${message}`);
|
|
throw new BadRequestException(
|
|
`Kunde inte hämta recept: ${message}. Kontrollera att länken är korrekt och försök igen.`
|
|
);
|
|
}
|
|
}
|
|
|
|
private normalizeImageUrl(rawImageUrl: string, pageUrl: string): string | null {
|
|
const trimmed = rawImageUrl.trim();
|
|
if (!trimmed) return null;
|
|
|
|
const protocolNormalized =
|
|
trimmed.startsWith('//') ? `https:${trimmed}` : trimmed;
|
|
|
|
try {
|
|
return new URL(protocolNormalized, pageUrl).toString();
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Konvertera receptobjekt till Markdown-format
|
|
*/
|
|
private recipeToMarkdown(
|
|
recipe: {
|
|
name: string;
|
|
description?: string;
|
|
ingredients: Array<{
|
|
quantity: number;
|
|
unit: string;
|
|
name: string;
|
|
note?: string;
|
|
}>;
|
|
instructions?: string;
|
|
},
|
|
sourceUrl?: string,
|
|
): string {
|
|
const lines: string[] = [];
|
|
|
|
lines.push(`# ${recipe.name}`);
|
|
lines.push('');
|
|
|
|
if (recipe.description) {
|
|
lines.push(recipe.description);
|
|
lines.push('');
|
|
}
|
|
|
|
if (recipe.ingredients.length > 0) {
|
|
lines.push('## Ingredienser');
|
|
for (const ing of recipe.ingredients) {
|
|
const quantity = ing.quantity > 0 ? `${ing.quantity} ` : '';
|
|
const unit = ing.unit ? `${ing.unit} ` : '';
|
|
const note = ing.note ? ` (${ing.note})` : '';
|
|
lines.push(`- ${quantity}${unit}${ing.name}${note}`);
|
|
}
|
|
lines.push('');
|
|
}
|
|
|
|
if (recipe.instructions) {
|
|
lines.push('## Tillvägagångssätt');
|
|
lines.push(recipe.instructions);
|
|
lines.push('');
|
|
}
|
|
|
|
if (sourceUrl) {
|
|
lines.push('---');
|
|
lines.push('');
|
|
lines.push(`Källa: [${sourceUrl}](${sourceUrl})`);
|
|
}
|
|
|
|
return lines.join('\n');
|
|
}
|
|
}
|