feat: Enhance ingredient parsing to support mixed fractions and add description extraction in recipe parsers

This commit is contained in:
Nils-Johan Gynther
2026-04-12 10:50:59 +02:00
parent 9ca7fcce96
commit 3d4994f24d
5 changed files with 314 additions and 218 deletions
@@ -59,15 +59,25 @@ export abstract class RecipeParser {
cleaned = cleaned.replace(/\s*\([^)]*\)/, '').trim(); cleaned = cleaned.replace(/\s*\([^)]*\)/, '').trim();
} }
// Hantera bråkdelar: "1/2" eller "1 / 2" // Hantera bråkdelar: "1/2" eller "1 1/2" eller "1 1 / 2"
const fractionMatch = cleaned.match(/^([\d.]+)\s*\/\s*([\d.]+)/); // Regex: (optional whole)? numerator / denominator
const fractionMatch = cleaned.match(/^(\d+)?\s*(\d+)\s*\/\s*([\d.]+)/);
let quantity = 0; let quantity = 0;
let remainingText = cleaned; let remainingText = cleaned;
if (fractionMatch) { if (fractionMatch) {
const numerator = parseFloat(fractionMatch[1]); if (fractionMatch[1]) {
const denominator = parseFloat(fractionMatch[2]); // Heltal + bråk: "1 1/2"
const whole = parseFloat(fractionMatch[1]);
const numerator = parseFloat(fractionMatch[2]);
const denominator = parseFloat(fractionMatch[3]);
quantity = whole + (numerator / denominator);
} else {
// Bara bråk: "1/2"
const numerator = parseFloat(fractionMatch[2]);
const denominator = parseFloat(fractionMatch[3]);
quantity = numerator / denominator; quantity = numerator / denominator;
}
remainingText = cleaned.substring(fractionMatch[0].length).trim(); remainingText = cleaned.substring(fractionMatch[0].length).trim();
} else { } else {
const numberMatch = remainingText.match(/^([\d.,]+)/); const numberMatch = remainingText.match(/^([\d.,]+)/);
@@ -42,6 +42,7 @@ export class GenericRecipeParser extends RecipeParser {
private extractFromJsonLd(recipe: any): ParsedRecipe { private extractFromJsonLd(recipe: any): ParsedRecipe {
const name = recipe.name || ''; const name = recipe.name || '';
const description = recipe.description || '';
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = []; const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) { if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
@@ -71,6 +72,7 @@ export class GenericRecipeParser extends RecipeParser {
return { return {
name, name,
description,
ingredients, ingredients,
instructions, instructions,
}; };
@@ -90,6 +92,15 @@ export class GenericRecipeParser extends RecipeParser {
name = titleMatch[1].trim(); name = titleMatch[1].trim();
} }
// Försöka extrahera beskrivning från meta-taggar
let description = '';
const descMatch = html.match(
/<meta\s+name="description"\s+content="([^"]+)"/i
);
if (descMatch) {
description = descMatch[1].trim();
}
// Försöka extrahera ingredienser från vanliga strukturer // Försöka extrahera ingredienser från vanliga strukturer
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = []; const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
@@ -129,6 +140,7 @@ export class GenericRecipeParser extends RecipeParser {
return { return {
name, name,
description,
ingredients, ingredients,
instructions, instructions,
}; };
@@ -45,6 +45,9 @@ export class IcaRecipeParser extends RecipeParser {
// Extrahera titel // Extrahera titel
const name = recipe.name || ''; const name = recipe.name || '';
// Extrahera beskrivning
const description = recipe.description || '';
// Extrahera ingredienser // Extrahera ingredienser
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = []; const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) { if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
@@ -75,6 +78,7 @@ export class IcaRecipeParser extends RecipeParser {
return { return {
name, name,
description,
ingredients, ingredients,
instructions, instructions,
}; };
@@ -96,6 +100,15 @@ export class IcaRecipeParser extends RecipeParser {
} }
} }
// Extrahera beskrivning från meta-taggar
let description = '';
const descMatch = html.match(
/<meta\s+name="description"\s+content="([^"]+)"/i
);
if (descMatch) {
description = descMatch[1].trim();
}
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = []; const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
const ingredientRegex = const ingredientRegex =
/<li[^>]*class="[^"]*ingredient[^"]*"[^>]*>([^<]+)<\/li>/gi; /<li[^>]*class="[^"]*ingredient[^"]*"[^>]*>([^<]+)<\/li>/gi;
@@ -117,6 +130,7 @@ export class IcaRecipeParser extends RecipeParser {
return { return {
name, name,
description,
ingredients, ingredients,
instructions, instructions,
}; };
+42 -2
View File
@@ -529,7 +529,7 @@ function parseRecipeMarkdown(markdown: string): ParsedRecipe {
const heading = trimmed.replace(/^##\s+/, '').trim().toLowerCase(); const heading = trimmed.replace(/^##\s+/, '').trim().toLowerCase();
if (/ingrediens/.test(heading)) { if (/ingrediens/.test(heading)) {
currentSection = 'ingredients'; currentSection = 'ingredients';
} else if (/instruktion|tillagning|gör så här|steg/.test(heading)) { } else if (/instruktion|tillagning|gör så här|steg|tillväg|metod/.test(heading)) {
currentSection = 'instructions'; currentSection = 'instructions';
} else { } else {
currentSection = 'none'; currentSection = 'none';
@@ -570,12 +570,21 @@ function parseRecipeMarkdown(markdown: string): ParsedRecipe {
* Parsar en ingrediensrad, t.ex.: * Parsar en ingrediensrad, t.ex.:
* "400 g kycklingfilé" * "400 g kycklingfilé"
* "2 dl grädde (eller crème fraiche)" * "2 dl grädde (eller crème fraiche)"
* "1 1/2 dl crème fraiche"
* "1 polka- eller gulbeta"
* "1 kruka basilika" * "1 kruka basilika"
* "salt" * "salt"
*/ */
function parseIngredientLine(text: string): ParsedIngredient { function parseIngredientLine(text: string): ParsedIngredient {
const trimmed = text.trim(); const trimmed = text.trim();
// Kända enheter
const knownUnits = [
'g', 'kg', 'hg', 'mg', 'ml', 'dl', 'l', 'tl',
'st', 'tsk', 'msk', 'krm', 'matsled', 'tesled',
'pris', 'portion', 'burk', 'förp', 'paket',
];
// Extrahera eventuell parentes-not i slutet // Extrahera eventuell parentes-not i slutet
let note: string | null = null; let note: string | null = null;
let main = trimmed; let main = trimmed;
@@ -585,16 +594,47 @@ function parseIngredientLine(text: string): ParsedIngredient {
main = trimmed.slice(0, parenMatch.index).trim(); main = trimmed.slice(0, parenMatch.index).trim();
} }
// Försök matcha bråk först: "1 1/2 dl crème fraiche" eller "1/2 dl"
const fractionMatch = main.match(/^(\d+)?\s*(\d+)\s*\/\s*([\d.]+)\s+(\S+)\s+(.*)$/);
if (fractionMatch) {
let quantity = 0;
if (fractionMatch[1]) {
quantity = parseFloat(fractionMatch[1]) + parseFloat(fractionMatch[2]) / parseFloat(fractionMatch[3]);
} else {
quantity = parseFloat(fractionMatch[2]) / parseFloat(fractionMatch[3]);
}
const candidateUnit = fractionMatch[4].toLowerCase();
if (knownUnits.includes(candidateUnit)) {
return {
quantity,
unit: candidateUnit,
rawName: fractionMatch[5].trim(),
note,
};
}
}
// Försök matcha "kvantitet enhet namn" — t.ex. "400 g kycklingfilé" eller "2.5 dl grädde" // Försök matcha "kvantitet enhet namn" — t.ex. "400 g kycklingfilé" eller "2.5 dl grädde"
const fullMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(\S+)\s+(.+)$/); const fullMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(\S+)\s+(.+)$/);
if (fullMatch) { if (fullMatch) {
const candidateUnit = fullMatch[2].toLowerCase();
// Validera att det andra ordet är en känd enhet
if (knownUnits.includes(candidateUnit)) {
return { return {
quantity: parseNumber(fullMatch[1]), quantity: parseNumber(fullMatch[1]),
unit: fullMatch[2], unit: candidateUnit,
rawName: fullMatch[3].trim(), rawName: fullMatch[3].trim(),
note, note,
}; };
} }
// Om inte känd enhet, behandla som "kvantitet namn" utan enhet
return {
quantity: parseNumber(fullMatch[1]),
unit: 'st',
rawName: fullMatch[2] + ' ' + fullMatch[3],
note,
};
}
// Försök matcha "kvantitet namn" utan enhet — t.ex. "3 ägg" // Försök matcha "kvantitet namn" utan enhet — t.ex. "3 ägg"
const noUnitMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(.+)$/); const noUnitMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(.+)$/);
+80 -60
View File
@@ -311,66 +311,9 @@ Stek löken i lite smör. Tillsätt köttfärsen...`}</pre>
{/* STEG 2: Granskning */} {/* STEG 2: Granskning */}
{step === 'review' && parsed && ( {step === 'review' && parsed && (
<section style={{ display: 'grid', gap: '1.5rem' }}> <section style={{ display: 'grid', gridTemplateColumns: '2fr 1fr', gap: '1.5rem' }}>
{/* Debug Panel - Import Output */} {/* Vänster: Receptdetaljer + Ingredienser */}
<details <div style={{ display: 'grid', gap: '1.5rem' }}>
open={showDebugPanel}
onChange={(e) => setShowDebugPanel((e.target as HTMLDetailsElement).open)}
style={{
border: '1px solid #ddd',
borderRadius: '8px',
padding: '1rem',
background: '#f9f9f9',
}}
>
<summary style={{ cursor: 'pointer', fontWeight: 600, fontSize: '0.95rem', color: '#666' }}>
🔍 Import Debug Output {showDebugPanel ? '▼' : '▶'}
</summary>
<div style={{ marginTop: '1rem', display: 'grid', gap: '1rem' }}>
{/* Raw Markdown */}
<div>
<h4 style={{ margin: '0 0 0.5rem 0', fontSize: '0.85rem', color: '#555' }}>Raw Markdown:</h4>
<pre
style={{
background: 'white',
border: '1px solid #ddd',
borderRadius: '4px',
padding: '0.75rem',
fontSize: '0.8rem',
overflow: 'auto',
maxHeight: '200px',
margin: 0,
fontFamily: 'monospace',
color: '#333',
}}
>
{markdown}
</pre>
</div>
{/* Parse Results as JSON */}
<div>
<h4 style={{ margin: '0 0 0.5rem 0', fontSize: '0.85rem', color: '#555' }}>Parse Result:</h4>
<pre
style={{
background: 'white',
border: '1px solid #ddd',
borderRadius: '4px',
padding: '0.75rem',
fontSize: '0.75rem',
overflow: 'auto',
maxHeight: '250px',
margin: 0,
fontFamily: 'monospace',
color: '#333',
}}
>
{JSON.stringify(parsed, null, 2)}
</pre>
</div>
</div>
</details>
{/* Receptdetaljer */} {/* Receptdetaljer */}
<div style={{ display: 'grid', gap: '1rem', padding: '1rem', border: '1px solid #ddd', borderRadius: '8px' }}> <div style={{ display: 'grid', gap: '1rem', padding: '1rem', border: '1px solid #ddd', borderRadius: '8px' }}>
<h2 style={{ margin: 0, fontSize: '1.1rem' }}>Receptdetaljer</h2> <h2 style={{ margin: 0, fontSize: '1.1rem' }}>Receptdetaljer</h2>
@@ -549,6 +492,83 @@ Stek löken i lite smör. Tillsätt köttfärsen...`}</pre>
Avbryt Avbryt
</button> </button>
</div> </div>
</div>
{/* Höger: Debug Panel (Sticky) */}
<div style={{ position: 'sticky', top: '1rem', height: 'fit-content' }}>
<details
open={showDebugPanel}
onChange={(e) => setShowDebugPanel((e.target as HTMLDetailsElement).open)}
style={{
border: '1px solid #ddd',
borderRadius: '8px',
padding: '0.75rem',
background: '#f9f9f9',
}}
>
<summary style={{ cursor: 'pointer', fontWeight: 600, fontSize: '0.9rem', color: '#666', userSelect: 'none' }}>
🔍 Import Debug {showDebugPanel ? '▼' : '▶'}
</summary>
{/* Raw Markdown */}
<details
style={{
marginTop: '0.75rem',
paddingTop: '0.75rem',
borderTop: '1px solid #ddd',
}}
>
<summary style={{ cursor: 'pointer', fontSize: '0.8rem', fontWeight: 500, color: '#555', userSelect: 'none' }}>
Raw Markdown
</summary>
<pre
style={{
background: 'white',
border: '1px solid #ddd',
borderRadius: '4px',
padding: '0.5rem',
fontSize: '0.7rem',
overflow: 'auto',
maxHeight: '200px',
margin: '0.5rem 0 0',
fontFamily: 'monospace',
color: '#333',
}}
>
{markdown}
</pre>
</details>
{/* Parse Result */}
<details
style={{
marginTop: '0.5rem',
paddingTop: '0.5rem',
borderTop: '1px solid #ddd',
}}
>
<summary style={{ cursor: 'pointer', fontSize: '0.8rem', fontWeight: 500, color: '#555', userSelect: 'none' }}>
Parse Result
</summary>
<pre
style={{
background: 'white',
border: '1px solid #ddd',
borderRadius: '4px',
padding: '0.5rem',
fontSize: '0.65rem',
overflow: 'auto',
maxHeight: '200px',
margin: '0.5rem 0 0',
fontFamily: 'monospace',
color: '#333',
}}
>
{JSON.stringify(parsed, null, 2)}
</pre>
</details>
</details>
</div>
</section> </section>
)} )}
</main> </main>