import { RecipeParser, ParsedRecipe } from './base.parser'; /** * Generisk parser för okända receptsidor * Försöker JSON-LD först, sedan vanlig HTML-parsing * Denna är mer permissiv än site-specifika parsers */ export class GenericRecipeParser extends RecipeParser { canHandle(url: string): boolean { // Denna parser hanterar alltid (är fallback) return true; } parse(html: string): ParsedRecipe { console.log('[GenericParser] Parsing recipe from unknown site...'); // Försöka extrahera JSON-LD recipe data const jsonLdMatch = html.match( /]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/i ); if (jsonLdMatch) { try { const jsonData = JSON.parse(jsonLdMatch[1]); const recipe = jsonData['@type'] === 'Recipe' ? jsonData : jsonData['@graph']?.find((item: any) => item['@type'] === 'Recipe'); if (recipe) { console.log('[GenericParser] ✓ JSON-LD data found'); return this.extractFromJsonLd(recipe); } } catch (err) { console.log('[GenericParser] JSON-LD parsing failed'); } } console.log('[GenericParser] No JSON-LD found, using HTML parsing'); return this.parseFromHtml(html); } private extractFromJsonLd(recipe: any): ParsedRecipe { const name = recipe.name || ''; const ingredients: Array<{ quantity: number; unit: string; name: string }> = []; if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) { for (const ing of recipe.recipeIngredient) { const parsed = this.parseIngredientLine(ing); if (parsed) { ingredients.push(parsed); } } } let instructions = ''; if (recipe.recipeInstructions) { if (typeof recipe.recipeInstructions === 'string') { instructions = recipe.recipeInstructions; } else if (Array.isArray(recipe.recipeInstructions)) { instructions = recipe.recipeInstructions .map((step: any) => { if (typeof step === 'string') return step; if (step.text) return step.text; return ''; }) .filter((s: string) => s) .join('\n\n'); } } return { name, ingredients, instructions, }; } private parseFromHtml(html: string): ParsedRecipe { // Försöka hitta titel let name = ''; // Prova olika selector-mönster let titleMatch = html.match(/]*>([^<]+)<\/h1>/i) || html.match(/([^<]+)<\/title>/i); if (titleMatch) { name = titleMatch[1].trim(); } // Försöka extrahera ingredienser från vanliga strukturer const ingredients: Array<{ quantity: number; unit: string; name: string }> = []; // Testa olika ingredient-selectors const ingredientPatterns = [ /]*>(.*?)<\/li>/gi, /]*class="ingredient"[^>]*>(.*?)<\/div>/gi, /]*class="ingredient"[^>]*>(.*?)<\/p>/gi, ]; for (const pattern of ingredientPatterns) { let match; while ((match = pattern.exec(html)) !== null) { const parsed = this.parseIngredientLine(match[1]); if (parsed && parsed.name.length > 2) { // Undvik mycket korta ingredienser (troligen brus) ingredients.push(parsed); } } if (ingredients.length > 0) break; // Om vi hittat några, använd dessa } // Försöka hitta instruktioner let instructions = ''; const instructionsPatterns = [ /<(?:div|section)[^>]*class="[^"]*(?:instruction|method|step)[^"]*"[^>]*>(.*?)<\/(?:div|section)>/is, /]*>(.*?)<\/ol>/i, ]; for (const pattern of instructionsPatterns) { const match = html.match(pattern); if (match) { instructions = match[1].replace(/<[^>]+>/g, '').trim(); if (instructions.length > 10) break; } } return { name, ingredients, instructions, }; } }