feat: refactor recipe parsing logic; move parseRecipeMarkdown and related functions to a new utility file
Test Suite / test (24.15.0) (push) Has been cancelled

This commit is contained in:
Nils-Johan Gynther
2026-05-04 21:28:06 +02:00
parent a164b42bdc
commit 648e1856a1
2 changed files with 200 additions and 192 deletions
+199
View File
@@ -0,0 +1,199 @@
/**
* Markdown-parser för recept
* Extraherar namn, beskrivning, instruktioner och ingredienser från Markdown.
*/
// ============================================================================
// Local Type Definitions
// ============================================================================
interface ParsedIngredient {
rawName: string;
quantity: number;
unit: string;
note: string | null;
}
interface ParsedRecipe {
name: string;
description: string;
instructions: string;
ingredients: ParsedIngredient[];
}
// ============================================================================
// Parser Functions
// ============================================================================
/**
* Parsar ett recept i Markdown-format och extraherar namn, beskrivning,
* instruktioner och ingredienser.
*
* Förväntat format:
* # Receptnamn
* Beskrivning (valfritt stycke efter titeln)
*
* ## Ingredienser
* - 400 g kycklingfilé
* - 2 dl grädde (eller crème fraiche)
*
* ## Instruktioner
* 1. Stek kycklingen …
*/
export function parseRecipeMarkdown(markdown: string): ParsedRecipe {
const lines = markdown.split('\n');
let name = '';
let description = '';
let instructions = '';
const ingredients: ParsedIngredient[] = [];
let currentSection: 'none' | 'description' | 'ingredients' | 'instructions' = 'none';
const descriptionLines: string[] = [];
const instructionLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
// H1 — receptnamn
if (/^#\s+/.test(trimmed) && !trimmed.startsWith('##')) {
name = trimmed.replace(/^#\s+/, '').trim();
currentSection = 'description';
continue;
}
// H2 — sektionsrubriker
if (/^##\s+/.test(trimmed)) {
const heading = trimmed.replace(/^##\s+/, '').trim().toLowerCase();
if (/ingrediens/.test(heading)) {
currentSection = 'ingredients';
} else if (/instruktion|tillagning|gör så här|steg|tillväg|metod/.test(heading)) {
currentSection = 'instructions';
} else {
currentSection = 'none';
}
continue;
}
// Samla rader beroende på sektion
switch (currentSection) {
case 'description':
if (trimmed.length > 0) {
descriptionLines.push(trimmed);
}
break;
case 'ingredients':
if (/^[-*]\s+/.test(trimmed)) {
const ingredientText = trimmed.replace(/^[-*]\s+/, '');
ingredients.push(parseIngredientLine(ingredientText));
}
break;
case 'instructions':
if (trimmed.length > 0) {
instructionLines.push(trimmed);
}
break;
}
}
description = descriptionLines.join('\n');
instructions = instructionLines.join('\n');
return { name, description, instructions, ingredients };
}
/**
* Parsar en ingrediensrad, t.ex.:
* "400 g kycklingfilé"
* "2 dl grädde (eller crème fraiche)"
* "1 1/2 dl crème fraiche"
* "1 polka- eller gulbeta"
* "1 kruka basilika"
* "salt"
*/
function parseIngredientLine(text: string): ParsedIngredient {
const trimmed = text.trim();
// Kända enheter
const knownUnits = [
'g', 'kg', 'hg', 'mg', 'ml', 'dl', 'l', 'tl',
'st', 'tsk', 'msk', 'krm', 'matsled', 'tesled',
'pris', 'portion', 'port', 'burk', 'förp', 'paket', 'efter smak', 'klyfta',
];
// Extrahera eventuell parentes-not i slutet
let note: string | null = null;
let main = trimmed;
const parenMatch = trimmed.match(/\(([^)]+)\)\s*$/);
if (parenMatch) {
note = parenMatch[1].trim();
main = trimmed.slice(0, parenMatch.index).trim();
}
// Försök matcha bråk först: "1 1/2 dl crème fraiche" eller "1/2 dl"
const fractionMatch = main.match(/^(\d+)?\s*(\d+)\s*\/\s*([\d.]+)\s+(\S+)\s+(.*)$/);
if (fractionMatch) {
let quantity = 0;
if (fractionMatch[1]) {
quantity = parseFloat(fractionMatch[1]) + parseFloat(fractionMatch[2]) / parseFloat(fractionMatch[3]);
} else {
quantity = parseFloat(fractionMatch[2]) / parseFloat(fractionMatch[3]);
}
const candidateUnit = fractionMatch[4].toLowerCase();
if (knownUnits.includes(candidateUnit)) {
return {
quantity,
unit: candidateUnit,
rawName: fractionMatch[5].trim(),
note,
};
}
}
// Försök matcha "kvantitet enhet namn" — t.ex. "400 g kycklingfilé" eller "2.5 dl grädde"
const fullMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(\S+)\s+(.+)$/);
if (fullMatch) {
const candidateUnit = fullMatch[2].toLowerCase();
// Validera att det andra ordet är en känd enhet
if (knownUnits.includes(candidateUnit)) {
return {
quantity: parseNumber(fullMatch[1]),
unit: candidateUnit,
rawName: fullMatch[3].trim(),
note,
};
}
// Om inte känd enhet, behandla som "kvantitet namn" utan enhet
return {
quantity: parseNumber(fullMatch[1]),
unit: 'st',
rawName: fullMatch[2] + ' ' + fullMatch[3],
note,
};
}
// Försök matcha "kvantitet namn" utan enhet — t.ex. "3 ägg"
const noUnitMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(.+)$/);
if (noUnitMatch) {
return {
quantity: parseNumber(noUnitMatch[1]),
unit: 'st',
rawName: noUnitMatch[2].trim(),
note,
};
}
// Bara ett namn, ingen kvantitet — t.ex. "salt"
return {
quantity: 0,
unit: '',
rawName: main,
note,
};
}
function parseNumber(s: string): number {
return parseFloat(s.replace(',', '.'));
}
+1 -192
View File
@@ -6,25 +6,11 @@ import { PrismaService } from '../prisma/prisma.service';
import { CreateRecipeDto } from './dto/create-recipe.dto'; import { CreateRecipeDto } from './dto/create-recipe.dto';
import { ParseMarkdownDto } from './dto/parse-markdown.dto'; import { ParseMarkdownDto } from './dto/parse-markdown.dto';
import { downloadAndOptimizeImage } from '../common/utils/download-image'; import { downloadAndOptimizeImage } from '../common/utils/download-image';
import { parseRecipeMarkdown } from '../common/utils/recipe-parser';
import { normalizeUnit, getUnitType, convertUnit, canConvert } from '../common/utils/units'; import { normalizeUnit, getUnitType, convertUnit, canConvert } from '../common/utils/units';
const IMAGE_DEST_DIR = process.env.IMAGE_DEST_DIR || '/app/recipe-images'; const IMAGE_DEST_DIR = process.env.IMAGE_DEST_DIR || '/app/recipe-images';
// Lokala typdefiniitioner (tidigare från recipe-document-converter)
interface ParsedIngredient {
rawName: string;
quantity: number;
unit: string;
note: string | null;
}
interface ParsedRecipe {
name: string;
description: string;
instructions: string;
ingredients: ParsedIngredient[];
}
@Injectable() @Injectable()
export class RecipesService { export class RecipesService {
private readonly logger = new Logger(RecipesService.name); private readonly logger = new Logger(RecipesService.name);
@@ -539,181 +525,4 @@ export class RecipesService {
ingredients: ingredientsWithSuggestions, ingredients: ingredientsWithSuggestions,
}; };
} }
}
// ============================================================================
// Parser Functions (previously from recipe-document-converter library)
// ============================================================================
/**
* Parsar ett recept i Markdown-format och extraherar namn, beskrivning,
* instruktioner och ingredienser.
*
* Förväntat format:
* # Receptnamn
* Beskrivning (valfritt stycke efter titeln)
*
* ## Ingredienser
* - 400 g kycklingfilé
* - 2 dl grädde (eller crème fraiche)
*
* ## Instruktioner
* 1. Stek kycklingen …
*/
function parseRecipeMarkdown(markdown: string): ParsedRecipe {
const lines = markdown.split('\n');
let name = '';
let description = '';
let instructions = '';
const ingredients: ParsedIngredient[] = [];
let currentSection: 'none' | 'description' | 'ingredients' | 'instructions' = 'none';
const descriptionLines: string[] = [];
const instructionLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
// H1 — receptnamn
if (/^#\s+/.test(trimmed) && !trimmed.startsWith('##')) {
name = trimmed.replace(/^#\s+/, '').trim();
currentSection = 'description';
continue;
}
// H2 — sektionsrubriker
if (/^##\s+/.test(trimmed)) {
const heading = trimmed.replace(/^##\s+/, '').trim().toLowerCase();
if (/ingrediens/.test(heading)) {
currentSection = 'ingredients';
} else if (/instruktion|tillagning|gör så här|steg|tillväg|metod/.test(heading)) {
currentSection = 'instructions';
} else {
currentSection = 'none';
}
continue;
}
// Samla rader beroende på sektion
switch (currentSection) {
case 'description':
if (trimmed.length > 0) {
descriptionLines.push(trimmed);
}
break;
case 'ingredients':
if (/^[-*]\s+/.test(trimmed)) {
const ingredientText = trimmed.replace(/^[-*]\s+/, '');
ingredients.push(parseIngredientLine(ingredientText));
}
break;
case 'instructions':
if (trimmed.length > 0) {
instructionLines.push(trimmed);
}
break;
}
}
description = descriptionLines.join('\n');
instructions = instructionLines.join('\n');
return { name, description, instructions, ingredients };
}
/**
* Parsar en ingrediensrad, t.ex.:
* "400 g kycklingfilé"
* "2 dl grädde (eller crème fraiche)"
* "1 1/2 dl crème fraiche"
* "1 polka- eller gulbeta"
* "1 kruka basilika"
* "salt"
*/
function parseIngredientLine(text: string): ParsedIngredient {
const trimmed = text.trim();
// Kända enheter
const knownUnits = [
'g', 'kg', 'hg', 'mg', 'ml', 'dl', 'l', 'tl',
'st', 'tsk', 'msk', 'krm', 'matsled', 'tesled',
'pris', 'portion', 'port', 'burk', 'förp', 'paket', 'efter smak', 'klyfta',
];
// Extrahera eventuell parentes-not i slutet
let note: string | null = null;
let main = trimmed;
const parenMatch = trimmed.match(/\(([^)]+)\)\s*$/);
if (parenMatch) {
note = parenMatch[1].trim();
main = trimmed.slice(0, parenMatch.index).trim();
}
// Försök matcha bråk först: "1 1/2 dl crème fraiche" eller "1/2 dl"
const fractionMatch = main.match(/^(\d+)?\s*(\d+)\s*\/\s*([\d.]+)\s+(\S+)\s+(.*)$/);
if (fractionMatch) {
let quantity = 0;
if (fractionMatch[1]) {
quantity = parseFloat(fractionMatch[1]) + parseFloat(fractionMatch[2]) / parseFloat(fractionMatch[3]);
} else {
quantity = parseFloat(fractionMatch[2]) / parseFloat(fractionMatch[3]);
}
const candidateUnit = fractionMatch[4].toLowerCase();
if (knownUnits.includes(candidateUnit)) {
return {
quantity,
unit: candidateUnit,
rawName: fractionMatch[5].trim(),
note,
};
}
}
// Försök matcha "kvantitet enhet namn" — t.ex. "400 g kycklingfilé" eller "2.5 dl grädde"
const fullMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(\S+)\s+(.+)$/);
if (fullMatch) {
const candidateUnit = fullMatch[2].toLowerCase();
// Validera att det andra ordet är en känd enhet
if (knownUnits.includes(candidateUnit)) {
return {
quantity: parseNumber(fullMatch[1]),
unit: candidateUnit,
rawName: fullMatch[3].trim(),
note,
};
}
// Om inte känd enhet, behandla som "kvantitet namn" utan enhet
return {
quantity: parseNumber(fullMatch[1]),
unit: 'st',
rawName: fullMatch[2] + ' ' + fullMatch[3],
note,
};
}
// Försök matcha "kvantitet namn" utan enhet — t.ex. "3 ägg"
const noUnitMatch = main.match(/^(\d+(?:[.,]\d+)?)\s+(.+)$/);
if (noUnitMatch) {
return {
quantity: parseNumber(noUnitMatch[1]),
unit: 'st',
rawName: noUnitMatch[2].trim(),
note,
};
}
// Bara ett namn, ingen kvantitet — t.ex. "salt"
return {
quantity: 0,
unit: '',
rawName: main,
note,
};
}
function parseNumber(s: string): number {
return parseFloat(s.replace(',', '.'));
} }