feat: Implement site-specific recipe parsers for ICA and generic fallback
This commit is contained in:
@@ -0,0 +1,136 @@
|
||||
import { RecipeParser, ParsedRecipe } from './base.parser';
|
||||
|
||||
/**
|
||||
* Generisk parser för okända receptsidor
|
||||
* Försöker JSON-LD först, sedan vanlig HTML-parsing
|
||||
* Denna är mer permissiv än site-specifika parsers
|
||||
*/
|
||||
export class GenericRecipeParser extends RecipeParser {
|
||||
canHandle(url: string): boolean {
|
||||
// Denna parser hanterar alltid (är fallback)
|
||||
return true;
|
||||
}
|
||||
|
||||
parse(html: string): ParsedRecipe {
|
||||
console.log('[GenericParser] Parsing recipe from unknown site...');
|
||||
|
||||
// Försöka extrahera JSON-LD recipe data
|
||||
const jsonLdMatch = html.match(
|
||||
/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/i
|
||||
);
|
||||
|
||||
if (jsonLdMatch) {
|
||||
try {
|
||||
const jsonData = JSON.parse(jsonLdMatch[1]);
|
||||
const recipe =
|
||||
jsonData['@type'] === 'Recipe'
|
||||
? jsonData
|
||||
: jsonData['@graph']?.find((item) => item['@type'] === 'Recipe');
|
||||
|
||||
if (recipe) {
|
||||
console.log('[GenericParser] ✓ JSON-LD data found');
|
||||
return this.extractFromJsonLd(recipe);
|
||||
}
|
||||
} catch (err) {
|
||||
console.log('[GenericParser] JSON-LD parsing failed');
|
||||
}
|
||||
}
|
||||
|
||||
console.log('[GenericParser] No JSON-LD found, using HTML parsing');
|
||||
return this.parseFromHtml(html);
|
||||
}
|
||||
|
||||
private extractFromJsonLd(recipe: any): ParsedRecipe {
|
||||
const name = recipe.name || '';
|
||||
|
||||
const ingredients: Array<{ quantity: number; unit: string; name: string }> = [];
|
||||
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
|
||||
for (const ing of recipe.recipeIngredient) {
|
||||
const parsed = this.parseIngredientLine(ing);
|
||||
if (parsed) {
|
||||
ingredients.push(parsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let instructions = '';
|
||||
if (recipe.recipeInstructions) {
|
||||
if (typeof recipe.recipeInstructions === 'string') {
|
||||
instructions = recipe.recipeInstructions;
|
||||
} else if (Array.isArray(recipe.recipeInstructions)) {
|
||||
instructions = recipe.recipeInstructions
|
||||
.map((step) => {
|
||||
if (typeof step === 'string') return step;
|
||||
if (step.text) return step.text;
|
||||
return '';
|
||||
})
|
||||
.filter((s) => s)
|
||||
.join('\n\n');
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
name,
|
||||
ingredients,
|
||||
instructions,
|
||||
};
|
||||
}
|
||||
|
||||
private parseFromHtml(html: string): ParsedRecipe {
|
||||
// Försöka hitta titel
|
||||
let name = '';
|
||||
|
||||
// Prova olika selector-mönster
|
||||
let titleMatch =
|
||||
html.match(/<h1[^>]*>([^<]+)<\/h1>/i) ||
|
||||
html.match(/<meta\s+property="og:title"\s+content="([^"]+)"/i) ||
|
||||
html.match(/<title>([^<]+)<\/title>/i);
|
||||
|
||||
if (titleMatch) {
|
||||
name = titleMatch[1].trim();
|
||||
}
|
||||
|
||||
// Försöka extrahera ingredienser från vanliga strukturer
|
||||
const ingredients: Array<{ quantity: number; unit: string; name: string }> = [];
|
||||
|
||||
// Testa olika ingredient-selectors
|
||||
const ingredientPatterns = [
|
||||
/<li[^>]*>(.*?)<\/li>/gi,
|
||||
/<div[^>]*class="ingredient"[^>]*>(.*?)<\/div>/gi,
|
||||
/<p[^>]*class="ingredient"[^>]*>(.*?)<\/p>/gi,
|
||||
];
|
||||
|
||||
for (const pattern of ingredientPatterns) {
|
||||
let match;
|
||||
while ((match = pattern.exec(html)) !== null) {
|
||||
const parsed = this.parseIngredientLine(match[1]);
|
||||
if (parsed && parsed.name.length > 2) {
|
||||
// Undvik mycket korta ingredienser (troligen brus)
|
||||
ingredients.push(parsed);
|
||||
}
|
||||
}
|
||||
if (ingredients.length > 0) break; // Om vi hittat några, använd dessa
|
||||
}
|
||||
|
||||
// Försöka hitta instruktioner
|
||||
let instructions = '';
|
||||
const instructionsPatterns = [
|
||||
/<(?:div|section)[^>]*class="[^"]*(?:instruction|method|step)[^"]*"[^>]*>(.*?)<\/(?:div|section)>/is,
|
||||
/<ol[^>]*>(.*?)<\/ol>/i,
|
||||
];
|
||||
|
||||
for (const pattern of instructionsPatterns) {
|
||||
const match = html.match(pattern);
|
||||
if (match) {
|
||||
instructions = match[1].replace(/<[^>]+>/g, '').trim();
|
||||
if (instructions.length > 10) break;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
name,
|
||||
ingredients,
|
||||
instructions,
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user