Initial microservice-importer setup with NestJS backend and Next.js frontend
This commit is contained in:
@@ -0,0 +1,158 @@
|
||||
/**
|
||||
* Bas-parser för receptsidor
|
||||
* Alla site-specifika parsers bör extenda denna
|
||||
*/
|
||||
export interface ParsedRecipe {
|
||||
name: string;
|
||||
description?: string;
|
||||
ingredients: Array<{
|
||||
quantity: number;
|
||||
unit: string;
|
||||
name: string;
|
||||
note?: string;
|
||||
}>;
|
||||
instructions?: string;
|
||||
}
|
||||
|
||||
export abstract class RecipeParser {
|
||||
/**
|
||||
* Kontrollera om denna parser kan hantera denna URL
|
||||
*/
|
||||
abstract canHandle(url: string): boolean;
|
||||
|
||||
/**
|
||||
* Parsa HTML och extrahera receptdata
|
||||
*/
|
||||
abstract parse(html: string): ParsedRecipe;
|
||||
|
||||
/**
|
||||
* Hjälpfunktion: parsa ingrediens-rad
|
||||
* Hanterar format som:
|
||||
* - "3 ägg"
|
||||
* - "150 g lax"
|
||||
* - "1/2 citron"
|
||||
* - "1 msk senap"
|
||||
* - "salt och peppar"
|
||||
* - "1 förp handskalade räkor i lake (à 570 g)"
|
||||
*/
|
||||
protected parseIngredientLine(line: string): {
|
||||
quantity: number;
|
||||
unit: string;
|
||||
name: string;
|
||||
note?: string;
|
||||
} | null {
|
||||
let cleaned = line.replace(/<[^>]+>/g, '').trim();
|
||||
if (!cleaned) return null;
|
||||
|
||||
// Kända enheter
|
||||
const knownUnits = [
|
||||
'g', 'kg', 'hg', 'mg', 'ml', 'dl', 'l', 'tl',
|
||||
'st', 'tsk', 'msk', 'krm', 'matsked', 'tesked',
|
||||
'pris', 'portion', 'port', 'burk', 'förp', 'paket', 'efter smak', 'klyfta',
|
||||
];
|
||||
|
||||
// Extrahera parentetisk info
|
||||
let parentheticalText = '';
|
||||
const parentheteMatch = cleaned.match(/\s*\(([^)]*)\)/);
|
||||
if (parentheteMatch) {
|
||||
parentheticalText = parentheteMatch[1].trim();
|
||||
cleaned = cleaned.replace(/\s*\([^)]*\)/, '').trim();
|
||||
}
|
||||
|
||||
// Hantera bråkdelar: "1/2" eller "1 1/2" eller "1 1 / 2"
|
||||
// Regex: (optional whole)? numerator / denominator
|
||||
const fractionMatch = cleaned.match(/^(\d+)?\s*(\d+)\s*\/\s*([\d.]+)/);
|
||||
let quantity = 0;
|
||||
let remainingText = cleaned;
|
||||
|
||||
if (fractionMatch) {
|
||||
if (fractionMatch[1]) {
|
||||
// Heltal + bråk: "1 1/2"
|
||||
const whole = parseFloat(fractionMatch[1]);
|
||||
const numerator = parseFloat(fractionMatch[2]);
|
||||
const denominator = parseFloat(fractionMatch[3]);
|
||||
quantity = whole + (numerator / denominator);
|
||||
} else {
|
||||
// Bara bråk: "1/2"
|
||||
const numerator = parseFloat(fractionMatch[2]);
|
||||
const denominator = parseFloat(fractionMatch[3]);
|
||||
quantity = numerator / denominator;
|
||||
}
|
||||
remainingText = cleaned.substring(fractionMatch[0].length).trim();
|
||||
} else {
|
||||
const numberMatch = remainingText.match(/^([\d.,]+)/);
|
||||
if (numberMatch) {
|
||||
quantity = parseFloat(numberMatch[1].replace(',', '.'));
|
||||
remainingText = remainingText.substring(numberMatch[0].length).trim();
|
||||
}
|
||||
}
|
||||
|
||||
// Extrahera potentiell enhet
|
||||
let potentialUnit = '';
|
||||
let productName = remainingText;
|
||||
|
||||
if (remainingText) {
|
||||
const unitMatch = remainingText.match(/^([a-zåäö]+)\b/i);
|
||||
if (unitMatch) {
|
||||
const candidateUnit = unitMatch[1].toLowerCase();
|
||||
if (knownUnits.includes(candidateUnit)) {
|
||||
potentialUnit = candidateUnit;
|
||||
productName = remainingText.substring(candidateUnit.length).trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Analysera parenthetical text för måttenhet
|
||||
let parenthHasUnit = false;
|
||||
if (parentheticalText) {
|
||||
for (const unit of knownUnits) {
|
||||
if (parentheticalText.toLowerCase().includes(unit)) {
|
||||
parenthHasUnit = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let note: string | undefined = undefined;
|
||||
|
||||
// Om vi hade quantity i huvuddelen och parenthetical innehåller unit
|
||||
// → spara parenthetical som note
|
||||
if (quantity > 0 && parenthHasUnit) {
|
||||
note = parentheticalText;
|
||||
}
|
||||
|
||||
// Om ingen mängd i huvuddelen men parenthetical hade både mängd och unit
|
||||
// → parse parenthetical som quantity + unit
|
||||
if (quantity === 0 && parentheticalText) {
|
||||
const parenthMatch = parentheticalText.match(/^[\D]*?([\d.,]+)?\s*([a-zåäö]*)?\s*(.*)$/i);
|
||||
if (parenthMatch) {
|
||||
let pQuantity = parenthMatch[1] ? parseFloat(parenthMatch[1].replace(',', '.')) : 0;
|
||||
let pUnit = parenthMatch[2]?.toLowerCase() || '';
|
||||
let pRest = parenthMatch[3]?.trim() || '';
|
||||
|
||||
if (knownUnits.includes(pUnit) && pQuantity > 0) {
|
||||
quantity = pQuantity;
|
||||
potentialUnit = pUnit;
|
||||
note = parentheticalText;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Om ingen mängd och enhet, bara returna produktnamnet
|
||||
if (quantity === 0) {
|
||||
return {
|
||||
quantity: 0,
|
||||
unit: '',
|
||||
name: cleaned,
|
||||
note: parentheticalText || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
quantity,
|
||||
unit: potentialUnit,
|
||||
name: productName,
|
||||
note: note,
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user