Files
microservice-importer/backend/src/quick-import/parsers/base.parser.ts
T

159 lines
4.7 KiB
TypeScript

/**
* Bas-parser för receptsidor
* Alla site-specifika parsers bör extenda denna
*/
export interface ParsedRecipe {
name: string;
description?: string;
ingredients: Array<{
quantity: number;
unit: string;
name: string;
note?: string;
}>;
instructions?: string;
}
export abstract class RecipeParser {
/**
* Kontrollera om denna parser kan hantera denna URL
*/
abstract canHandle(url: string): boolean;
/**
* Parsa HTML och extrahera receptdata
*/
abstract parse(html: string): ParsedRecipe;
/**
* Hjälpfunktion: parsa ingrediens-rad
* Hanterar format som:
* - "3 ägg"
* - "150 g lax"
* - "1/2 citron"
* - "1 msk senap"
* - "salt och peppar"
* - "1 förp handskalade räkor i lake (à 570 g)"
*/
protected parseIngredientLine(line: string): {
quantity: number;
unit: string;
name: string;
note?: string;
} | null {
let cleaned = line.replace(/<[^>]+>/g, '').trim();
if (!cleaned) return null;
// Kända enheter
const knownUnits = [
'g', 'kg', 'hg', 'mg', 'ml', 'dl', 'l', 'tl',
'st', 'tsk', 'msk', 'krm', 'matsked', 'tesked',
'pris', 'portion', 'port', 'burk', 'förp', 'paket', 'efter smak', 'klyfta',
];
// Extrahera parentetisk info
let parentheticalText = '';
const parentheteMatch = cleaned.match(/\s*\(([^)]*)\)/);
if (parentheteMatch) {
parentheticalText = parentheteMatch[1].trim();
cleaned = cleaned.replace(/\s*\([^)]*\)/, '').trim();
}
// Hantera bråkdelar: "1/2" eller "1 1/2" eller "1 1 / 2"
// Regex: (optional whole)? numerator / denominator
const fractionMatch = cleaned.match(/^(\d+)?\s*(\d+)\s*\/\s*([\d.]+)/);
let quantity = 0;
let remainingText = cleaned;
if (fractionMatch) {
if (fractionMatch[1]) {
// Heltal + bråk: "1 1/2"
const whole = parseFloat(fractionMatch[1]);
const numerator = parseFloat(fractionMatch[2]);
const denominator = parseFloat(fractionMatch[3]);
quantity = whole + (numerator / denominator);
} else {
// Bara bråk: "1/2"
const numerator = parseFloat(fractionMatch[2]);
const denominator = parseFloat(fractionMatch[3]);
quantity = numerator / denominator;
}
remainingText = cleaned.substring(fractionMatch[0].length).trim();
} else {
const numberMatch = remainingText.match(/^([\d.,]+)/);
if (numberMatch) {
quantity = parseFloat(numberMatch[1].replace(',', '.'));
remainingText = remainingText.substring(numberMatch[0].length).trim();
}
}
// Extrahera potentiell enhet
let potentialUnit = '';
let productName = remainingText;
if (remainingText) {
const unitMatch = remainingText.match(/^([a-zåäö]+)\b/i);
if (unitMatch) {
const candidateUnit = unitMatch[1].toLowerCase();
if (knownUnits.includes(candidateUnit)) {
potentialUnit = candidateUnit;
productName = remainingText.substring(candidateUnit.length).trim();
}
}
}
// Analysera parenthetical text för måttenhet
let parenthHasUnit = false;
if (parentheticalText) {
for (const unit of knownUnits) {
if (parentheticalText.toLowerCase().includes(unit)) {
parenthHasUnit = true;
break;
}
}
}
let note: string | undefined = undefined;
// Om vi hade quantity i huvuddelen och parenthetical innehåller unit
// → spara parenthetical som note
if (quantity > 0 && parenthHasUnit) {
note = parentheticalText;
}
// Om ingen mängd i huvuddelen men parenthetical hade både mängd och unit
// → parse parenthetical som quantity + unit
if (quantity === 0 && parentheticalText) {
const parenthMatch = parentheticalText.match(/^[\D]*?([\d.,]+)?\s*([a-zåäö]*)?\s*(.*)$/i);
if (parenthMatch) {
let pQuantity = parenthMatch[1] ? parseFloat(parenthMatch[1].replace(',', '.')) : 0;
let pUnit = parenthMatch[2]?.toLowerCase() || '';
let pRest = parenthMatch[3]?.trim() || '';
if (knownUnits.includes(pUnit) && pQuantity > 0) {
quantity = pQuantity;
potentialUnit = pUnit;
note = parentheticalText;
}
}
}
// Om ingen mängd och enhet, bara returna produktnamnet
if (quantity === 0) {
return {
quantity: 0,
unit: '',
name: cleaned,
note: parentheticalText || undefined,
};
}
return {
quantity,
unit: potentialUnit,
name: productName,
note: note,
};
}
}