feat: Enhance ingredient parsing to support mixed fractions and add description extraction in recipe parsers
This commit is contained in:
@@ -59,15 +59,25 @@ export abstract class RecipeParser {
|
|||||||
cleaned = cleaned.replace(/\s*\([^)]*\)/, '').trim();
|
cleaned = cleaned.replace(/\s*\([^)]*\)/, '').trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hantera bråkdelar: "1/2" eller "1 / 2"
|
// Hantera bråkdelar: "1/2" eller "1 1/2" eller "1 1 / 2"
|
||||||
const fractionMatch = cleaned.match(/^([\d.]+)\s*\/\s*([\d.]+)/);
|
// Regex: (optional whole)? numerator / denominator
|
||||||
|
const fractionMatch = cleaned.match(/^(\d+)?\s*(\d+)\s*\/\s*([\d.]+)/);
|
||||||
let quantity = 0;
|
let quantity = 0;
|
||||||
let remainingText = cleaned;
|
let remainingText = cleaned;
|
||||||
|
|
||||||
if (fractionMatch) {
|
if (fractionMatch) {
|
||||||
const numerator = parseFloat(fractionMatch[1]);
|
if (fractionMatch[1]) {
|
||||||
const denominator = parseFloat(fractionMatch[2]);
|
// Heltal + bråk: "1 1/2"
|
||||||
|
const whole = parseFloat(fractionMatch[1]);
|
||||||
|
const numerator = parseFloat(fractionMatch[2]);
|
||||||
|
const denominator = parseFloat(fractionMatch[3]);
|
||||||
|
quantity = whole + (numerator / denominator);
|
||||||
|
} else {
|
||||||
|
// Bara bråk: "1/2"
|
||||||
|
const numerator = parseFloat(fractionMatch[2]);
|
||||||
|
const denominator = parseFloat(fractionMatch[3]);
|
||||||
quantity = numerator / denominator;
|
quantity = numerator / denominator;
|
||||||
|
}
|
||||||
remainingText = cleaned.substring(fractionMatch[0].length).trim();
|
remainingText = cleaned.substring(fractionMatch[0].length).trim();
|
||||||
} else {
|
} else {
|
||||||
const numberMatch = remainingText.match(/^([\d.,]+)/);
|
const numberMatch = remainingText.match(/^([\d.,]+)/);
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ export class GenericRecipeParser extends RecipeParser {
|
|||||||
|
|
||||||
private extractFromJsonLd(recipe: any): ParsedRecipe {
|
private extractFromJsonLd(recipe: any): ParsedRecipe {
|
||||||
const name = recipe.name || '';
|
const name = recipe.name || '';
|
||||||
|
const description = recipe.description || '';
|
||||||
|
|
||||||
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
||||||
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
|
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
|
||||||
@@ -71,6 +72,7 @@ export class GenericRecipeParser extends RecipeParser {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
name,
|
name,
|
||||||
|
description,
|
||||||
ingredients,
|
ingredients,
|
||||||
instructions,
|
instructions,
|
||||||
};
|
};
|
||||||
@@ -90,6 +92,15 @@ export class GenericRecipeParser extends RecipeParser {
|
|||||||
name = titleMatch[1].trim();
|
name = titleMatch[1].trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Försöka extrahera beskrivning från meta-taggar
|
||||||
|
let description = '';
|
||||||
|
const descMatch = html.match(
|
||||||
|
/<meta\s+name="description"\s+content="([^"]+)"/i
|
||||||
|
);
|
||||||
|
if (descMatch) {
|
||||||
|
description = descMatch[1].trim();
|
||||||
|
}
|
||||||
|
|
||||||
// Försöka extrahera ingredienser från vanliga strukturer
|
// Försöka extrahera ingredienser från vanliga strukturer
|
||||||
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
||||||
|
|
||||||
@@ -129,6 +140,7 @@ export class GenericRecipeParser extends RecipeParser {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
name,
|
name,
|
||||||
|
description,
|
||||||
ingredients,
|
ingredients,
|
||||||
instructions,
|
instructions,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -45,6 +45,9 @@ export class IcaRecipeParser extends RecipeParser {
|
|||||||
// Extrahera titel
|
// Extrahera titel
|
||||||
const name = recipe.name || '';
|
const name = recipe.name || '';
|
||||||
|
|
||||||
|
// Extrahera beskrivning
|
||||||
|
const description = recipe.description || '';
|
||||||
|
|
||||||
// Extrahera ingredienser
|
// Extrahera ingredienser
|
||||||
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
||||||
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
|
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
|
||||||
@@ -75,6 +78,7 @@ export class IcaRecipeParser extends RecipeParser {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
name,
|
name,
|
||||||
|
description,
|
||||||
ingredients,
|
ingredients,
|
||||||
instructions,
|
instructions,
|
||||||
};
|
};
|
||||||
@@ -96,6 +100,15 @@ export class IcaRecipeParser extends RecipeParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extrahera beskrivning från meta-taggar
|
||||||
|
let description = '';
|
||||||
|
const descMatch = html.match(
|
||||||
|
/<meta\s+name="description"\s+content="([^"]+)"/i
|
||||||
|
);
|
||||||
|
if (descMatch) {
|
||||||
|
description = descMatch[1].trim();
|
||||||
|
}
|
||||||
|
|
||||||
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
|
||||||
const ingredientRegex =
|
const ingredientRegex =
|
||||||
/<li[^>]*class="[^"]*ingredient[^"]*"[^>]*>([^<]+)<\/li>/gi;
|
/<li[^>]*class="[^"]*ingredient[^"]*"[^>]*>([^<]+)<\/li>/gi;
|
||||||
@@ -117,6 +130,7 @@ export class IcaRecipeParser extends RecipeParser {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
name,
|
name,
|
||||||
|
description,
|
||||||
ingredients,
|
ingredients,
|
||||||
instructions,
|
instructions,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -529,7 +529,7 @@ function parseRecipeMarkdown(markdown: string): ParsedRecipe {
|
|||||||
const heading = trimmed.replace(/^##\s+/, '').trim().toLowerCase();
|
const heading = trimmed.replace(/^##\s+/, '').trim().toLowerCase();
|
||||||
if (/ingrediens/.test(heading)) {
|
if (/ingrediens/.test(heading)) {
|
||||||
currentSection = 'ingredients';
|
currentSection = 'ingredients';
|
||||||
} else if (/instruktion|tillagning|gör så här|steg/.test(heading)) {
|
} else if (/instruktion|tillagning|gör så här|steg|tillväg|metod/.test(heading)) {
|
||||||
currentSection = 'instructions';
|
currentSection = 'instructions';
|
||||||
} else {
|
} else {
|
||||||
currentSection = 'none';
|
currentSection = 'none';
|
||||||
@@ -570,12 +570,21 @@ function parseRecipeMarkdown(markdown: string): ParsedRecipe {
|
|||||||
* Parsar en ingrediensrad, t.ex.:
|
* Parsar en ingrediensrad, t.ex.:
|
||||||
* "400 g kycklingfilé"
|
* "400 g kycklingfilé"
|
||||||
* "2 dl grädde (eller crème fraiche)"
|
* "2 dl grädde (eller crème fraiche)"
|
||||||
|
* "1 1/2 dl crème fraiche"
|
||||||
|
* "1 polka- eller gulbeta"
|
||||||
* "1 kruka basilika"
|
* "1 kruka basilika"
|
||||||
* "salt"
|
* "salt"
|
||||||
*/
|
*/
|
||||||
function parseIngredientLine(text: string): ParsedIngredient {
|
function parseIngredientLine(text: string): ParsedIngredient {
|
||||||
const trimmed = text.trim();
|
const trimmed = text.trim();
|
||||||
|
|
||||||
|
// Kända enheter
|
||||||
|
const knownUnits = [
|
||||||
|
'g', 'kg', 'hg', 'mg', 'ml', 'dl', 'l', 'tl',
|
||||||
|
'st', 'tsk', 'msk', 'krm', 'matsled', 'tesled',
|
||||||
|
'pris', 'portion', 'burk', 'förp', 'paket',
|
||||||
|
];
|
||||||
|
|
||||||
// Extrahera eventuell parentes-not i slutet
|
// Extrahera eventuell parentes-not i slutet
|
||||||
let note: string | null = null;
|
let note: string | null = null;
|
||||||
let main = trimmed;
|
let main = trimmed;
|
||||||
@@ -585,16 +594,47 @@ function parseIngredientLine(text: string): ParsedIngredient {
|
|||||||
main = trimmed.slice(0, parenMatch.index).trim();
|
main = trimmed.slice(0, parenMatch.index).trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Försök matcha bråk först: "1 1/2 dl crème fraiche" eller "1/2 dl"
|
||||||
|
const fractionMatch = main.match(/^(\d+)?\s*(\d+)\s*\/\s*([\d.]+)\s+(\S+)\s+(.*)$/);
|
||||||
|
if (fractionMatch) {
|
||||||
|
let quantity = 0;
|
||||||
|
if (fractionMatch[1]) {
|
||||||
|
quantity = parseFloat(fractionMatch[1]) + parseFloat(fractionMatch[2]) / parseFloat(fractionMatch[3]);
|
||||||
|
} else {
|
||||||
|
quantity = parseFloat(fractionMatch[2]) / parseFloat(fractionMatch[3]);
|
||||||
|
}
|
||||||
|
const candidateUnit = fractionMatch[4].toLowerCase();
|
||||||
|
if (knownUnits.includes(candidateUnit)) {
|
||||||
|
return {
|
||||||
|
quantity,
|
||||||
|
unit: candidateUnit,
|
||||||
|
rawName: fractionMatch[5].trim(),
|
||||||
|
note,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Försök matcha "kvantitet enhet namn" — t.ex. "400 g kycklingfilé" eller "2.5 dl grädde"
|
// Försök matcha "kvantitet enhet namn" — t.ex. "400 g kycklingfilé" eller "2.5 dl grädde"
|
||||||
const fullMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(\S+)\s+(.+)$/);
|
const fullMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(\S+)\s+(.+)$/);
|
||||||
if (fullMatch) {
|
if (fullMatch) {
|
||||||
|
const candidateUnit = fullMatch[2].toLowerCase();
|
||||||
|
// Validera att det andra ordet är en känd enhet
|
||||||
|
if (knownUnits.includes(candidateUnit)) {
|
||||||
return {
|
return {
|
||||||
quantity: parseNumber(fullMatch[1]),
|
quantity: parseNumber(fullMatch[1]),
|
||||||
unit: fullMatch[2],
|
unit: candidateUnit,
|
||||||
rawName: fullMatch[3].trim(),
|
rawName: fullMatch[3].trim(),
|
||||||
note,
|
note,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
// Om inte känd enhet, behandla som "kvantitet namn" utan enhet
|
||||||
|
return {
|
||||||
|
quantity: parseNumber(fullMatch[1]),
|
||||||
|
unit: 'st',
|
||||||
|
rawName: fullMatch[2] + ' ' + fullMatch[3],
|
||||||
|
note,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// Försök matcha "kvantitet namn" utan enhet — t.ex. "3 ägg"
|
// Försök matcha "kvantitet namn" utan enhet — t.ex. "3 ägg"
|
||||||
const noUnitMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(.+)$/);
|
const noUnitMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(.+)$/);
|
||||||
|
|||||||
@@ -311,66 +311,9 @@ Stek löken i lite smör. Tillsätt köttfärsen...`}</pre>
|
|||||||
|
|
||||||
{/* STEG 2: Granskning */}
|
{/* STEG 2: Granskning */}
|
||||||
{step === 'review' && parsed && (
|
{step === 'review' && parsed && (
|
||||||
<section style={{ display: 'grid', gap: '1.5rem' }}>
|
<section style={{ display: 'grid', gridTemplateColumns: '2fr 1fr', gap: '1.5rem' }}>
|
||||||
{/* Debug Panel - Import Output */}
|
{/* Vänster: Receptdetaljer + Ingredienser */}
|
||||||
<details
|
<div style={{ display: 'grid', gap: '1.5rem' }}>
|
||||||
open={showDebugPanel}
|
|
||||||
onChange={(e) => setShowDebugPanel((e.target as HTMLDetailsElement).open)}
|
|
||||||
style={{
|
|
||||||
border: '1px solid #ddd',
|
|
||||||
borderRadius: '8px',
|
|
||||||
padding: '1rem',
|
|
||||||
background: '#f9f9f9',
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<summary style={{ cursor: 'pointer', fontWeight: 600, fontSize: '0.95rem', color: '#666' }}>
|
|
||||||
🔍 Import Debug Output {showDebugPanel ? '▼' : '▶'}
|
|
||||||
</summary>
|
|
||||||
<div style={{ marginTop: '1rem', display: 'grid', gap: '1rem' }}>
|
|
||||||
{/* Raw Markdown */}
|
|
||||||
<div>
|
|
||||||
<h4 style={{ margin: '0 0 0.5rem 0', fontSize: '0.85rem', color: '#555' }}>Raw Markdown:</h4>
|
|
||||||
<pre
|
|
||||||
style={{
|
|
||||||
background: 'white',
|
|
||||||
border: '1px solid #ddd',
|
|
||||||
borderRadius: '4px',
|
|
||||||
padding: '0.75rem',
|
|
||||||
fontSize: '0.8rem',
|
|
||||||
overflow: 'auto',
|
|
||||||
maxHeight: '200px',
|
|
||||||
margin: 0,
|
|
||||||
fontFamily: 'monospace',
|
|
||||||
color: '#333',
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{markdown}
|
|
||||||
</pre>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Parse Results as JSON */}
|
|
||||||
<div>
|
|
||||||
<h4 style={{ margin: '0 0 0.5rem 0', fontSize: '0.85rem', color: '#555' }}>Parse Result:</h4>
|
|
||||||
<pre
|
|
||||||
style={{
|
|
||||||
background: 'white',
|
|
||||||
border: '1px solid #ddd',
|
|
||||||
borderRadius: '4px',
|
|
||||||
padding: '0.75rem',
|
|
||||||
fontSize: '0.75rem',
|
|
||||||
overflow: 'auto',
|
|
||||||
maxHeight: '250px',
|
|
||||||
margin: 0,
|
|
||||||
fontFamily: 'monospace',
|
|
||||||
color: '#333',
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{JSON.stringify(parsed, null, 2)}
|
|
||||||
</pre>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</details>
|
|
||||||
|
|
||||||
{/* Receptdetaljer */}
|
{/* Receptdetaljer */}
|
||||||
<div style={{ display: 'grid', gap: '1rem', padding: '1rem', border: '1px solid #ddd', borderRadius: '8px' }}>
|
<div style={{ display: 'grid', gap: '1rem', padding: '1rem', border: '1px solid #ddd', borderRadius: '8px' }}>
|
||||||
<h2 style={{ margin: 0, fontSize: '1.1rem' }}>Receptdetaljer</h2>
|
<h2 style={{ margin: 0, fontSize: '1.1rem' }}>Receptdetaljer</h2>
|
||||||
@@ -549,6 +492,83 @@ Stek löken i lite smör. Tillsätt köttfärsen...`}</pre>
|
|||||||
Avbryt
|
Avbryt
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Höger: Debug Panel (Sticky) */}
|
||||||
|
<div style={{ position: 'sticky', top: '1rem', height: 'fit-content' }}>
|
||||||
|
<details
|
||||||
|
open={showDebugPanel}
|
||||||
|
onChange={(e) => setShowDebugPanel((e.target as HTMLDetailsElement).open)}
|
||||||
|
style={{
|
||||||
|
border: '1px solid #ddd',
|
||||||
|
borderRadius: '8px',
|
||||||
|
padding: '0.75rem',
|
||||||
|
background: '#f9f9f9',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<summary style={{ cursor: 'pointer', fontWeight: 600, fontSize: '0.9rem', color: '#666', userSelect: 'none' }}>
|
||||||
|
🔍 Import Debug {showDebugPanel ? '▼' : '▶'}
|
||||||
|
</summary>
|
||||||
|
|
||||||
|
{/* Raw Markdown */}
|
||||||
|
<details
|
||||||
|
style={{
|
||||||
|
marginTop: '0.75rem',
|
||||||
|
paddingTop: '0.75rem',
|
||||||
|
borderTop: '1px solid #ddd',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<summary style={{ cursor: 'pointer', fontSize: '0.8rem', fontWeight: 500, color: '#555', userSelect: 'none' }}>
|
||||||
|
Raw Markdown
|
||||||
|
</summary>
|
||||||
|
<pre
|
||||||
|
style={{
|
||||||
|
background: 'white',
|
||||||
|
border: '1px solid #ddd',
|
||||||
|
borderRadius: '4px',
|
||||||
|
padding: '0.5rem',
|
||||||
|
fontSize: '0.7rem',
|
||||||
|
overflow: 'auto',
|
||||||
|
maxHeight: '200px',
|
||||||
|
margin: '0.5rem 0 0',
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
color: '#333',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{markdown}
|
||||||
|
</pre>
|
||||||
|
</details>
|
||||||
|
|
||||||
|
{/* Parse Result */}
|
||||||
|
<details
|
||||||
|
style={{
|
||||||
|
marginTop: '0.5rem',
|
||||||
|
paddingTop: '0.5rem',
|
||||||
|
borderTop: '1px solid #ddd',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<summary style={{ cursor: 'pointer', fontSize: '0.8rem', fontWeight: 500, color: '#555', userSelect: 'none' }}>
|
||||||
|
Parse Result
|
||||||
|
</summary>
|
||||||
|
<pre
|
||||||
|
style={{
|
||||||
|
background: 'white',
|
||||||
|
border: '1px solid #ddd',
|
||||||
|
borderRadius: '4px',
|
||||||
|
padding: '0.5rem',
|
||||||
|
fontSize: '0.65rem',
|
||||||
|
overflow: 'auto',
|
||||||
|
maxHeight: '200px',
|
||||||
|
margin: '0.5rem 0 0',
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
color: '#333',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{JSON.stringify(parsed, null, 2)}
|
||||||
|
</pre>
|
||||||
|
</details>
|
||||||
|
</details>
|
||||||
|
</div>
|
||||||
</section>
|
</section>
|
||||||
)}
|
)}
|
||||||
</main>
|
</main>
|
||||||
|
|||||||
Reference in New Issue
Block a user