139 lines
4.0 KiB
TypeScript
139 lines
4.0 KiB
TypeScript
import { RecipeParser, ParsedRecipe } from './base.parser';
|
|
|
|
/**
|
|
* Parser för ica.se receptsidor
|
|
* Använder JSON-LD structured data som primär källa
|
|
*/
|
|
export class IcaRecipeParser extends RecipeParser {
|
|
canHandle(url: string): boolean {
|
|
return /ica\.se\/recept/i.test(url);
|
|
}
|
|
|
|
parse(html: string): ParsedRecipe {
|
|
console.log('[IcaParser] Parsing ICA recipe...');
|
|
|
|
// Försöka extrahera JSON-LD recipe data (ICA använder detta)
|
|
const jsonLdMatch = html.match(
|
|
/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/i
|
|
);
|
|
|
|
if (jsonLdMatch) {
|
|
try {
|
|
const jsonData = JSON.parse(jsonLdMatch[1]);
|
|
|
|
// Hitta recipe-objektet
|
|
const recipe =
|
|
jsonData['@type'] === 'Recipe'
|
|
? jsonData
|
|
: jsonData['@graph']?.find((item: any) => item['@type'] === 'Recipe');
|
|
|
|
if (recipe) {
|
|
console.log('[IcaParser] ✓ JSON-LD recipe found');
|
|
return this.extractFromJsonLd(recipe);
|
|
}
|
|
} catch (err) {
|
|
console.log('[IcaParser] JSON-LD parsing failed:', err);
|
|
}
|
|
}
|
|
|
|
// Fallback: HTML parsing (sällan nödvändigt för ICA)
|
|
console.log('[IcaParser] Falling back to HTML parsing');
|
|
return this.parseFromHtml(html);
|
|
}
|
|
|
|
private extractFromJsonLd(recipe: any): ParsedRecipe {
|
|
// Extrahera titel
|
|
const name = recipe.name || '';
|
|
|
|
// Extrahera beskrivning
|
|
const description = recipe.description || '';
|
|
|
|
// Extrahera ingredienser
|
|
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
|
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
|
|
for (const ing of recipe.recipeIngredient) {
|
|
const parsed = this.parseIngredientLine(ing);
|
|
if (parsed) {
|
|
ingredients.push(parsed);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Extrahera instruktioner
|
|
let instructions = '';
|
|
if (recipe.recipeInstructions) {
|
|
if (typeof recipe.recipeInstructions === 'string') {
|
|
instructions = recipe.recipeInstructions;
|
|
} else if (Array.isArray(recipe.recipeInstructions)) {
|
|
instructions = recipe.recipeInstructions
|
|
.map((step: any) => {
|
|
if (typeof step === 'string') return step;
|
|
if (step.text) return step.text;
|
|
return '';
|
|
})
|
|
.filter((s: string) => s)
|
|
.join('\n\n');
|
|
}
|
|
}
|
|
|
|
return {
|
|
name,
|
|
description,
|
|
ingredients,
|
|
instructions,
|
|
};
|
|
}
|
|
|
|
private parseFromHtml(html: string): ParsedRecipe {
|
|
let name = '';
|
|
const titleMatch = html.match(/<h1[^>]*>([^<]+)<\/h1>/i);
|
|
if (titleMatch) {
|
|
name = titleMatch[1].trim();
|
|
}
|
|
|
|
if (!name) {
|
|
const ogTitleMatch = html.match(
|
|
/<meta\s+property="og:title"\s+content="([^"]+)"/i
|
|
);
|
|
if (ogTitleMatch) {
|
|
name = ogTitleMatch[1].trim();
|
|
}
|
|
}
|
|
|
|
// Extrahera beskrivning från meta-taggar
|
|
let description = '';
|
|
const descMatch = html.match(
|
|
/<meta\s+name="description"\s+content="([^"]+)"/i
|
|
);
|
|
if (descMatch) {
|
|
description = descMatch[1].trim();
|
|
}
|
|
|
|
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
|
const ingredientRegex =
|
|
/<li[^>]*class="[^"]*ingredient[^"]*"[^>]*>([^<]+)<\/li>/gi;
|
|
let match;
|
|
while ((match = ingredientRegex.exec(html)) !== null) {
|
|
const parsed = this.parseIngredientLine(match[1]);
|
|
if (parsed) {
|
|
ingredients.push(parsed);
|
|
}
|
|
}
|
|
|
|
let instructions = '';
|
|
const instructionsMatch = html.match(
|
|
/<(?:div|section)[^>]*class="[^"]*(?:instruction|howto)[^"]*"[^>]*>([^<]*)<\/(?:div|section)>/is
|
|
);
|
|
if (instructionsMatch) {
|
|
instructions = instructionsMatch[1].replace(/<[^>]+>/g, '').trim();
|
|
}
|
|
|
|
return {
|
|
name,
|
|
description,
|
|
ingredients,
|
|
instructions,
|
|
};
|
|
}
|
|
}
|