feat: Implement PDF recipe parser and quick import service for file and URL inputs
This commit is contained in:
@@ -17,15 +17,20 @@
|
||||
"@prisma/client": "^6.12.0",
|
||||
"class-transformer": "^0.5.1",
|
||||
"class-validator": "^0.15.1",
|
||||
"multer": "^1.4.5-lts.2",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"reflect-metadata": "^0.2.2",
|
||||
"rxjs": "^7.8.1"
|
||||
"rxjs": "^7.8.1",
|
||||
"tesseract.js": "^6.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@nestjs/cli": "^10.3.0",
|
||||
"@nestjs/schematics": "^10.1.1",
|
||||
"@nestjs/testing": "^10.3.0",
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/multer": "^1.4.12",
|
||||
"@types/node": "^22.15.29",
|
||||
"@types/pdf-parse": "^1.1.5",
|
||||
"prisma": "^6.12.0",
|
||||
"typescript": "^5.4.5"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
import { IsOptional, IsString, MaxLength } from 'class-validator';
|
||||
|
||||
export class QuickImportDto {
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
@MaxLength(2048)
|
||||
input?: string;
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
/**
|
||||
* Parser för PDF-filer
|
||||
* Använder pdf-parse för att extrahera text från PDF-dokument
|
||||
*/
|
||||
import { RecipeParser, ParsedRecipe } from './base.parser';
|
||||
import * as pdf from 'pdf-parse';
|
||||
|
||||
export class PdfRecipeParser extends RecipeParser {
|
||||
canHandle(url: string): boolean {
|
||||
// Denna parser hanterar PDF-filer
|
||||
const normalized = url.toLowerCase();
|
||||
return normalized.endsWith('.pdf');
|
||||
}
|
||||
|
||||
async parse(fileBuffer: Buffer): Promise<ParsedRecipe> {
|
||||
console.log('[PdfParser] Parsing PDF file...');
|
||||
|
||||
try {
|
||||
// Extrahera text från PDF
|
||||
const data = await pdf(fileBuffer);
|
||||
const text = data.text;
|
||||
console.log('[PdfParser] Extraherad text längd:', text.length);
|
||||
|
||||
// Parsa texten till receptstruktur
|
||||
return this.parseRecipeText(text);
|
||||
} catch (err) {
|
||||
console.error('[PdfParser] Fel vid PDF-parsing:', err);
|
||||
throw new Error('Kunde inte tolka PDF-filen. Kontrollera att det är ett giltigt recept.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parsar råtext från PDF till strukturerat recept
|
||||
* Försöker identifiera receptnamn, ingredienser och instruktioner
|
||||
*/
|
||||
private parseRecipeText(text: string): ParsedRecipe {
|
||||
const lines = text.split('\n').map(line => line.trim()).filter(line => line.length > 0);
|
||||
|
||||
let name = 'Okänt recept';
|
||||
let description = '';
|
||||
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
||||
let instructions = '';
|
||||
let currentSection: 'name' | 'description' | 'ingredients' | 'instructions' | null = null;
|
||||
|
||||
// Försök hitta receptnamn (stor text i början)
|
||||
const titleMatch = text.match(/^[A-ZÅÄÖ\s]+/i);
|
||||
if (titleMatch) {
|
||||
name = titleMatch[0].trim();
|
||||
}
|
||||
|
||||
// Analysera texten rad för rad
|
||||
for (const line of lines) {
|
||||
// Hoppa över tomma rader
|
||||
if (!line || line.length === 0) continue;
|
||||
|
||||
// Detektera sektioner
|
||||
if (line.toLowerCase().includes('ingredienser')) {
|
||||
currentSection = 'ingredients';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.toLowerCase().includes('tillvägagångssätt') ||
|
||||
line.toLowerCase().includes('instruktioner') ||
|
||||
line.toLowerCase().includes('gör så här')) {
|
||||
currentSection = 'instructions';
|
||||
continue;
|
||||
}
|
||||
|
||||
// Samla in innehåll baserat på aktuell sektion
|
||||
switch (currentSection) {
|
||||
case 'ingredients':
|
||||
if (line.toLowerCase().includes('tillvägagångssätt') ||
|
||||
line.toLowerCase().includes('instruktioner')) {
|
||||
currentSection = 'instructions';
|
||||
break;
|
||||
}
|
||||
|
||||
// Parsa ingrediensrad
|
||||
const ingredient = this.parseIngredientLine(line);
|
||||
if (ingredient) {
|
||||
ingredients.push(ingredient);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'instructions':
|
||||
if (instructions.length > 0) {
|
||||
instructions += '\n';
|
||||
}
|
||||
instructions += line;
|
||||
break;
|
||||
|
||||
default:
|
||||
// Om vi inte har hittat ingredienser än, kan detta vara beskrivning
|
||||
if (ingredients.length === 0 && !description.includes(line)) {
|
||||
if (description.length > 0) {
|
||||
description += ' ';
|
||||
}
|
||||
description += line;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Om vi inte hittade något receptnamn, försök använda första meningsfulla raden
|
||||
if (name === 'Okänt recept' && lines.length > 0) {
|
||||
name = lines[0].length > 50 ? lines[0].substring(0, 50) + '...' : lines[0];
|
||||
}
|
||||
|
||||
return {
|
||||
name,
|
||||
description: description || undefined,
|
||||
ingredients,
|
||||
instructions: instructions || undefined,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
import { Controller, Post, Body, UseInterceptors, UploadedFile } from '@nestjs/common';
|
||||
import { FileInterceptor } from '@nestjs/platform-express';
|
||||
import { QuickImportService, QuickImportResult } from './quick-import.service';
|
||||
|
||||
@Controller('quick-import')
|
||||
export class QuickImportController {
|
||||
constructor(private readonly quickImportService: QuickImportService) {}
|
||||
|
||||
@Post()
|
||||
@UseInterceptors(FileInterceptor('file'))
|
||||
async importFromInput(
|
||||
@Body() body: { input: string },
|
||||
@UploadedFile() file?: Express.Multer.File
|
||||
): Promise<QuickImportResult> {
|
||||
// Om en fil laddats upp, använd filen
|
||||
if (file) {
|
||||
console.log('[QuickImportController] Mottog fil:', file.originalname);
|
||||
return this.quickImportService.importFromInput(file.originalname, file.buffer);
|
||||
}
|
||||
|
||||
// Annars använd text-input (URL)
|
||||
console.log('[QuickImportController] Mottog text-input:', body.input);
|
||||
return this.quickImportService.importFromInput(body.input);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,7 @@
|
||||
import { Controller, Post, Body } from '@nestjs/common';
|
||||
import { Body, Controller, Post, UploadedFile, UseInterceptors } from '@nestjs/common';
|
||||
import { FileInterceptor } from '@nestjs/platform-express';
|
||||
import { memoryStorage } from 'multer';
|
||||
import { QuickImportDto } from './dto/quick-import.dto';
|
||||
import { QuickImportService, QuickImportResult } from './quick-import.service';
|
||||
|
||||
@Controller('quick-import')
|
||||
@@ -6,9 +9,20 @@ export class QuickImportController {
|
||||
constructor(private readonly quickImportService: QuickImportService) {}
|
||||
|
||||
@Post()
|
||||
@UseInterceptors(
|
||||
FileInterceptor('file', {
|
||||
storage: memoryStorage(),
|
||||
limits: { fileSize: 10 * 1024 * 1024 },
|
||||
}),
|
||||
)
|
||||
async importFromInput(
|
||||
@Body() body: { input: string }
|
||||
@Body() body: QuickImportDto,
|
||||
@UploadedFile() file?: Express.Multer.File,
|
||||
): Promise<QuickImportResult> {
|
||||
return this.quickImportService.importFromInput(body.input);
|
||||
if (file) {
|
||||
return this.quickImportService.importFromUpload(file);
|
||||
}
|
||||
|
||||
return this.quickImportService.importFromInput(body?.input ?? '');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,242 @@
|
||||
import { Injectable, BadRequestException } from '@nestjs/common';
|
||||
import { IcaRecipeParser } from './parsers/ica.parser';
|
||||
import { GenericRecipeParser } from './parsers/generic.parser';
|
||||
import { PdfRecipeParser } from './parsers/pdf.parser';
|
||||
import { RecipeParser } from './parsers/base.parser';
|
||||
|
||||
export interface QuickImportResult {
|
||||
markdown: string;
|
||||
source: 'ica' | 'pdf' | 'other';
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class QuickImportService {
|
||||
/**
|
||||
* Detekterar typ av input (URL eller fil) och importerar från lämplig källa
|
||||
*/
|
||||
async importFromInput(input: string, fileBuffer?: Buffer): Promise<QuickImportResult> {
|
||||
input = input.trim();
|
||||
console.log('[QuickImport] Mottog input:', input);
|
||||
|
||||
if (!input) {
|
||||
throw new BadRequestException('Du måste ange en URL eller ladda upp en fil');
|
||||
}
|
||||
|
||||
// Detektera typ
|
||||
const isUrl = this.isUrl(input);
|
||||
const isPdf = this.isPdfPath(input);
|
||||
|
||||
console.log('[QuickImport] isUrl:', isUrl, 'isPdf:', isPdf);
|
||||
|
||||
if (isUrl) {
|
||||
console.log('[QuickImport] Detekterade URL, försöker scrapa...');
|
||||
return this.scrapeRecipeFromUrl(input);
|
||||
} else if (isPdf) {
|
||||
console.log('[QuickImport] Detekterade PDF-fil');
|
||||
if (!fileBuffer) {
|
||||
throw new BadRequestException('PDF-fil kräver filinnehåll (fileBuffer)');
|
||||
}
|
||||
return this.parsePdfFile(fileBuffer);
|
||||
} else {
|
||||
console.log('[QuickImport] Input är inte URL eller PDF');
|
||||
throw new BadRequestException(
|
||||
'Ogültig input. Ange en gyltig URL (t.ex. ica.se/recept/...) eller ladda upp en PDF-fil'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Kontrollerar om input är en URL
|
||||
*/
|
||||
private isUrl(input: string): boolean {
|
||||
try {
|
||||
new URL(input);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Kontrollerar om input är en PDF-filsökväg
|
||||
*/
|
||||
private isPdfPath(input: string): boolean {
|
||||
const normalized = input.toLowerCase();
|
||||
return normalized.endsWith('.pdf');
|
||||
}
|
||||
|
||||
/**
|
||||
* Skrapar recept från en URL
|
||||
*
|
||||
* Använder site-specifika parsers om tillgängliga,
|
||||
* annars fallback till generisk parser.
|
||||
*
|
||||
* @param url URL till receptsidan
|
||||
* @returns Markdown-format
|
||||
*/
|
||||
private async scrapeRecipeFromUrl(url: string): Promise<QuickImportResult> {
|
||||
try {
|
||||
console.log('[QuickImport] Hämtar HTML från:', url);
|
||||
|
||||
// Hämta HTML från URL
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
},
|
||||
});
|
||||
|
||||
console.log('[QuickImport] HTTP status:', response.status);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
console.log('[QuickImport] HTML längd:', html.length, 'tecken');
|
||||
|
||||
// Välj lämplig parser
|
||||
const parsers: RecipeParser[] = [
|
||||
new IcaRecipeParser(),
|
||||
new GenericRecipeParser(),
|
||||
];
|
||||
|
||||
let recipe = null;
|
||||
for (const parser of parsers) {
|
||||
if (parser.canHandle(url)) {
|
||||
console.log('[QuickImport] Använder parser:', parser.constructor.name);
|
||||
recipe = parser.parse(html);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!recipe) {
|
||||
throw new Error('Ingen parserutrustning tillgänglig');
|
||||
}
|
||||
|
||||
console.log('[QuickImport] Parsad recept:', {
|
||||
name: recipe.name,
|
||||
ingredienser: recipe.ingredients.length,
|
||||
});
|
||||
|
||||
if (!recipe.name) {
|
||||
throw new Error('Kunde inte hitta receptnamn på sidan. Försök med en annan länk.');
|
||||
}
|
||||
|
||||
// Konvertera till Markdown-format
|
||||
const markdown = this.recipeToMarkdown(recipe, url);
|
||||
console.log('[QuickImport] Markdown genererad, längd:', markdown.length);
|
||||
|
||||
// Detektera källa från URL
|
||||
let source: 'ica' | 'pdf' | 'other' = 'other';
|
||||
if (/ica\.se/i.test(url)) {
|
||||
source = 'ica';
|
||||
}
|
||||
|
||||
return {
|
||||
markdown,
|
||||
source,
|
||||
};
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : 'Okänt fel vid scraping';
|
||||
console.error('[QuickImport] ERROR:', message);
|
||||
throw new BadRequestException(
|
||||
`Kunde inte hämta recept: ${message}. Kontrollera att länken är korrekt och försök igen.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parsar PDF-fil och konverterar till Markdown
|
||||
*/
|
||||
private async parsePdfFile(fileBuffer: Buffer): Promise<QuickImportResult> {
|
||||
try {
|
||||
console.log('[QuickImport] Parsar PDF-fil...');
|
||||
|
||||
const pdfParser = new PdfRecipeParser();
|
||||
const recipe = await pdfParser.parse(fileBuffer);
|
||||
|
||||
console.log('[QuickImport] PDF parsad:', {
|
||||
name: recipe.name,
|
||||
ingredienser: recipe.ingredients.length,
|
||||
});
|
||||
|
||||
if (!recipe.name) {
|
||||
throw new Error('Kunde inte hitta receptnamn i PDF-filen.');
|
||||
}
|
||||
|
||||
// Konvertera till Markdown-format
|
||||
const markdown = this.recipeToMarkdown(recipe);
|
||||
console.log('[QuickImport] Markdown genererad från PDF, längd:', markdown.length);
|
||||
|
||||
return {
|
||||
markdown,
|
||||
source: 'pdf',
|
||||
};
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : 'Okänt fel vid PDF-parsing';
|
||||
console.error('[QuickImport] PDF ERROR:', message);
|
||||
throw new BadRequestException(
|
||||
`Kunde inte läsa PDF-filen: ${message}. Kontrollera att det är ett giltigt recept i PDF-format.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Konvertera receptobjekt till Markdown-format
|
||||
*/
|
||||
private recipeToMarkdown(
|
||||
recipe: {
|
||||
name: string;
|
||||
description?: string;
|
||||
ingredients: Array<{
|
||||
quantity: number;
|
||||
unit: string;
|
||||
name: string;
|
||||
note?: string;
|
||||
}>;
|
||||
instructions?: string;
|
||||
},
|
||||
sourceUrl?: string,
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
// Titel
|
||||
lines.push(`# ${recipe.name}`);
|
||||
lines.push('');
|
||||
|
||||
// Beskrivning
|
||||
if (recipe.description) {
|
||||
lines.push(recipe.description);
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Ingredienser
|
||||
if (recipe.ingredients.length > 0) {
|
||||
lines.push('## Ingredienser');
|
||||
for (const ing of recipe.ingredients) {
|
||||
const quantity = ing.quantity > 0 ? `${ing.quantity} ` : '';
|
||||
const unit = ing.unit ? `${ing.unit} ` : '';
|
||||
const note = ing.note ? ` (${ing.note})` : '';
|
||||
lines.push(`- ${quantity}${unit}${ing.name}${note}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Instruktioner
|
||||
if (recipe.instructions) {
|
||||
lines.push('## Tillvägagångssätt');
|
||||
lines.push(recipe.instructions);
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Källa
|
||||
if (sourceUrl) {
|
||||
lines.push('---');
|
||||
lines.push('');
|
||||
lines.push(`Källa: [${sourceUrl}](${sourceUrl})`);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
}
|
||||
@@ -1,46 +1,85 @@
|
||||
import { Injectable, BadRequestException } from '@nestjs/common';
|
||||
import {
|
||||
BadRequestException,
|
||||
Injectable,
|
||||
ServiceUnavailableException,
|
||||
UnsupportedMediaTypeException,
|
||||
} from '@nestjs/common';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import * as pdfParse from 'pdf-parse';
|
||||
import { createWorker } from 'tesseract.js';
|
||||
import { IcaRecipeParser } from './parsers/ica.parser';
|
||||
import { GenericRecipeParser } from './parsers/generic.parser';
|
||||
import { RecipeParser } from './parsers/base.parser';
|
||||
|
||||
export interface QuickImportResult {
|
||||
markdown: string;
|
||||
source: 'ica' | 'pdf' | 'other';
|
||||
source: 'ica' | 'pdf' | 'image' | 'other';
|
||||
}
|
||||
|
||||
type UploadKind = 'pdf' | 'image';
|
||||
|
||||
@Injectable()
|
||||
export class QuickImportService {
|
||||
/**
|
||||
* Detekterar typ av input (URL eller filsökväg) och importerar från lämplig källa
|
||||
*/
|
||||
async importFromInput(input: string): Promise<QuickImportResult> {
|
||||
input = input.trim();
|
||||
console.log('[QuickImport] Mottog input:', input);
|
||||
const trimmed = input.trim();
|
||||
console.log('[QuickImport] Mottog input:', trimmed);
|
||||
|
||||
if (!input) {
|
||||
throw new BadRequestException('Du måste ange en URL eller filsökväg');
|
||||
if (!trimmed) {
|
||||
throw new BadRequestException('Du måste ange en URL eller ladda upp en fil');
|
||||
}
|
||||
|
||||
// Detektera typ
|
||||
const isUrl = this.isUrl(input);
|
||||
const isPdf = this.isPdfPath(input);
|
||||
|
||||
console.log('[QuickImport] isUrl:', isUrl, 'isPdf:', isPdf);
|
||||
|
||||
if (isUrl) {
|
||||
if (this.isUrl(trimmed)) {
|
||||
console.log('[QuickImport] Detekterade URL, försöker scrapa...');
|
||||
return this.scrapeRecipeFromUrl(input);
|
||||
} else if (isPdf) {
|
||||
console.log('[QuickImport] Detekterade PDF-fil');
|
||||
throw new BadRequestException(
|
||||
'PDF-import under utveckling. Försök med en URL från ICA.se eller annat receptsida.'
|
||||
);
|
||||
} else {
|
||||
console.log('[QuickImport] Input är inte URL eller PDF');
|
||||
throw new BadRequestException(
|
||||
'Ogültig input. Ange en gyltig URL (t.ex. ica.se/recept/...) eller filsökväg'
|
||||
);
|
||||
return this.scrapeRecipeFromUrl(trimmed);
|
||||
}
|
||||
|
||||
if (this.looksLikeLocalFile(trimmed)) {
|
||||
console.log('[QuickImport] Försöker läsa lokal fil:', trimmed);
|
||||
try {
|
||||
const buffer = await fs.readFile(trimmed);
|
||||
return this.importFromUpload({
|
||||
buffer,
|
||||
originalname: path.basename(trimmed),
|
||||
mimetype: this.getMimeTypeFromExtension(trimmed),
|
||||
} as Express.Multer.File);
|
||||
} catch (error) {
|
||||
console.error('[QuickImport] Kunde inte läsa lokal fil:', error);
|
||||
throw new BadRequestException(
|
||||
'Kunde inte läsa filen. Använd filuppladdning i gränssnittet eller kontrollera sökvägen.',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
throw new BadRequestException(
|
||||
'Ogiltig input. Ange en giltig URL eller ladda upp en PDF- eller bildfil.',
|
||||
);
|
||||
}
|
||||
|
||||
async importFromUpload(file: Express.Multer.File): Promise<QuickImportResult> {
|
||||
if (!file?.buffer) {
|
||||
throw new BadRequestException('Ingen fil skickades med.');
|
||||
}
|
||||
|
||||
console.log('[QuickImport] Mottog uppladdad fil:', file.originalname, file.mimetype);
|
||||
const kind = this.getUploadKind(file);
|
||||
|
||||
if (kind === 'pdf') {
|
||||
const text = await this.extractTextFromPdf(file.buffer);
|
||||
return {
|
||||
markdown: this.normalizeImportedTextToMarkdown(text, file.originalname),
|
||||
source: 'pdf',
|
||||
};
|
||||
}
|
||||
|
||||
const text = await this.extractTextFromImage(file.buffer);
|
||||
return {
|
||||
markdown: this.normalizeImportedTextToMarkdown(text, file.originalname),
|
||||
source: 'image',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -55,12 +94,157 @@ export class QuickImportService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Kontrollerar om input är en PDF-filsökväg
|
||||
*/
|
||||
private isPdfPath(input: string): boolean {
|
||||
private looksLikeLocalFile(input: string): boolean {
|
||||
const normalized = input.toLowerCase();
|
||||
return normalized.endsWith('.pdf');
|
||||
return /[\\/]/.test(input) || /\.(pdf|png|jpg|jpeg|webp|bmp)$/i.test(normalized);
|
||||
}
|
||||
|
||||
private getMimeTypeFromExtension(filename: string): string {
|
||||
const ext = path.extname(filename).toLowerCase();
|
||||
|
||||
if (ext === '.pdf') return 'application/pdf';
|
||||
if (ext === '.png') return 'image/png';
|
||||
if (ext === '.jpg' || ext === '.jpeg') return 'image/jpeg';
|
||||
if (ext === '.webp') return 'image/webp';
|
||||
if (ext === '.bmp') return 'image/bmp';
|
||||
|
||||
return 'application/octet-stream';
|
||||
}
|
||||
|
||||
private getUploadKind(
|
||||
file: Pick<Express.Multer.File, 'mimetype' | 'originalname'>,
|
||||
): UploadKind {
|
||||
const type = (file.mimetype ?? '').toLowerCase();
|
||||
const name = (file.originalname ?? '').toLowerCase();
|
||||
|
||||
if (type.includes('pdf') || name.endsWith('.pdf')) {
|
||||
return 'pdf';
|
||||
}
|
||||
|
||||
if (
|
||||
type.startsWith('image/') ||
|
||||
['.png', '.jpg', '.jpeg', '.webp', '.bmp'].some((ext) => name.endsWith(ext))
|
||||
) {
|
||||
return 'image';
|
||||
}
|
||||
|
||||
throw new UnsupportedMediaTypeException(
|
||||
'Endast PDF, PNG, JPG, JPEG, WEBP och BMP stöds.',
|
||||
);
|
||||
}
|
||||
|
||||
private async extractTextFromPdf(buffer: Buffer): Promise<string> {
|
||||
try {
|
||||
const result = await pdfParse(buffer);
|
||||
const text = result.text?.replace(/\u0000/g, '').trim();
|
||||
|
||||
if (!text) {
|
||||
throw new BadRequestException(
|
||||
'PDF-filen saknar läsbar text. Prova bildimport om det är en skannad sida.',
|
||||
);
|
||||
}
|
||||
|
||||
return text;
|
||||
} catch (error) {
|
||||
if (error instanceof BadRequestException) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
console.error('[QuickImport] PDF ERROR:', error);
|
||||
throw new ServiceUnavailableException('PDF-importen misslyckades.');
|
||||
}
|
||||
}
|
||||
|
||||
private async extractTextFromImage(buffer: Buffer): Promise<string> {
|
||||
const worker = await createWorker('swe+eng');
|
||||
|
||||
try {
|
||||
const result = await worker.recognize(buffer);
|
||||
const text = result.data.text?.trim();
|
||||
|
||||
if (!text) {
|
||||
throw new BadRequestException('Ingen text hittades i bilden.');
|
||||
}
|
||||
|
||||
return text;
|
||||
} catch (error) {
|
||||
if (error instanceof BadRequestException) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
console.error('[QuickImport] OCR ERROR:', error);
|
||||
throw new ServiceUnavailableException('OCR-importen misslyckades.');
|
||||
} finally {
|
||||
await worker.terminate();
|
||||
}
|
||||
}
|
||||
|
||||
private normalizeImportedTextToMarkdown(text: string, sourceName?: string): string {
|
||||
const cleanedText = text
|
||||
.replace(/\r/g, '')
|
||||
.replace(/[ \t]+/g, ' ')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim();
|
||||
|
||||
if (!cleanedText) {
|
||||
throw new BadRequestException('Ingen läsbar text hittades i filen.');
|
||||
}
|
||||
|
||||
const title = cleanedText.split('\n').find((line) => line.trim().length > 3)?.trim() ?? 'Importerat recept';
|
||||
const ingredients: string[] = [];
|
||||
const instructions: string[] = [];
|
||||
let section: 'unknown' | 'ingredients' | 'instructions' = 'unknown';
|
||||
|
||||
for (const rawLine of cleanedText.split('\n')) {
|
||||
const line = rawLine.trim();
|
||||
if (!line || line === title) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const lower = line.toLowerCase();
|
||||
|
||||
if (/^ingred/i.test(lower)) {
|
||||
section = 'ingredients';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (/^(gör så här|gor sa har|instruktioner|tillvägagångssätt|tillvagagangssatt|method|instructions)/i.test(lower)) {
|
||||
section = 'instructions';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (section === 'unknown') {
|
||||
section = this.looksLikeIngredientLine(line) ? 'ingredients' : 'instructions';
|
||||
}
|
||||
|
||||
if (section === 'ingredients') {
|
||||
ingredients.push(line.startsWith('-') ? line : `- ${line}`);
|
||||
} else {
|
||||
instructions.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
`# ${title}`,
|
||||
'',
|
||||
'## Ingredienser',
|
||||
...(ingredients.length > 0 ? ingredients : ['- Komplettera ingredienser manuellt']),
|
||||
'',
|
||||
'## Tillvägagångssätt',
|
||||
...(instructions.length > 0 ? instructions : ['Komplettera tillagningsstegen manuellt.']),
|
||||
'',
|
||||
sourceName ? `Källa: ${sourceName}` : '',
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
private looksLikeIngredientLine(line: string): boolean {
|
||||
return (
|
||||
/^[-*•]\s+/.test(line) ||
|
||||
/^\d+[.,]?\d*\s+/.test(line) ||
|
||||
/\b(g|kg|hg|mg|ml|dl|cl|l|tsk|msk|krm|st|pkt|förp|klyfta)\b/i.test(line)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -76,7 +260,6 @@ export class QuickImportService {
|
||||
try {
|
||||
console.log('[QuickImport] Hämtar HTML från:', url);
|
||||
|
||||
// Hämta HTML från URL
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent':
|
||||
@@ -93,7 +276,6 @@ export class QuickImportService {
|
||||
const html = await response.text();
|
||||
console.log('[QuickImport] HTML längd:', html.length, 'tecken');
|
||||
|
||||
// Välj lämplig parser
|
||||
const parsers: RecipeParser[] = [
|
||||
new IcaRecipeParser(),
|
||||
new GenericRecipeParser(),
|
||||
@@ -121,12 +303,10 @@ export class QuickImportService {
|
||||
throw new Error('Kunde inte hitta receptnamn på sidan. Försök med en annan länk.');
|
||||
}
|
||||
|
||||
// Konvertera till Markdown-format
|
||||
const markdown = this.recipeToMarkdown(recipe, url);
|
||||
console.log('[QuickImport] Markdown genererad, längd:', markdown.length);
|
||||
|
||||
// Detektera källa från URL
|
||||
let source: 'ica' | 'pdf' | 'other' = 'other';
|
||||
let source: 'ica' | 'pdf' | 'image' | 'other' = 'other';
|
||||
if (/ica\.se/i.test(url)) {
|
||||
source = 'ica';
|
||||
}
|
||||
@@ -163,17 +343,14 @@ export class QuickImportService {
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
// Titel
|
||||
lines.push(`# ${recipe.name}`);
|
||||
lines.push('');
|
||||
|
||||
// Beskrivning
|
||||
if (recipe.description) {
|
||||
lines.push(recipe.description);
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Ingredienser
|
||||
if (recipe.ingredients.length > 0) {
|
||||
lines.push('## Ingredienser');
|
||||
for (const ing of recipe.ingredients) {
|
||||
@@ -185,14 +362,12 @@ export class QuickImportService {
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Instruktioner
|
||||
if (recipe.instructions) {
|
||||
lines.push('## Tillvägagångssätt');
|
||||
lines.push(recipe.instructions);
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Källa
|
||||
if (sourceUrl) {
|
||||
lines.push('---');
|
||||
lines.push('');
|
||||
|
||||
Reference in New Issue
Block a user