refactor: remove unused parser files and update ParsedReceiptItem model with additional fields

This commit is contained in:
Nils-Johan Gynther
2026-05-01 01:16:10 +02:00
parent 879501292d
commit bfe9cb46fa
9 changed files with 26 additions and 854 deletions
-3
View File
@@ -25,11 +25,9 @@
"class-transformer": "^0.5.1", "class-transformer": "^0.5.1",
"class-validator": "^0.15.1", "class-validator": "^0.15.1",
"multer": "^1.4.5-lts.2", "multer": "^1.4.5-lts.2",
"pdf-parse": "^1.1.1",
"reflect-metadata": "^0.2.2", "reflect-metadata": "^0.2.2",
"rxjs": "^7.8.1", "rxjs": "^7.8.1",
"sharp": "^0.33.5", "sharp": "^0.33.5",
"tesseract.js": "^6.0.1",
"uuid": "^11.1.0", "uuid": "^11.1.0",
"helmet": "^8.0.0", "helmet": "^8.0.0",
"@nestjs/throttler": "^6.4.0" "@nestjs/throttler": "^6.4.0"
@@ -43,7 +41,6 @@
"@types/multer": "^1.4.12", "@types/multer": "^1.4.12",
"@types/node": "^22.15.29", "@types/node": "^22.15.29",
"@types/passport-jwt": "^4.0.1", "@types/passport-jwt": "^4.0.1",
"@types/pdf-parse": "^1.1.5",
"@types/uuid": "^10.0.0", "@types/uuid": "^10.0.0",
"prisma": "6.12.0", "prisma": "6.12.0",
"typescript": "^5.4.5", "typescript": "^5.4.5",
@@ -1,57 +0,0 @@
# Site-Specifika Parsers
Denna mapp innehåller parsers för olika receptsidor. Varje webbplats kan ha sina egna selectors och datastrukturer.
## Arkitektur
- **`base.parser.ts`** Bas-klass som alla parsers extendar
- **`ica.parser.ts`** Optimerad parser för ica.se
- **`generic.parser.ts`** Generisk fallback-parser för okända sidor
## Så lägger du till en ny parser
1. Skapa en ny fil, t.ex. `mathem.parser.ts`:
```typescript
import { RecipeParser, ParsedRecipe } from './base.parser';
export class MathemsRecipeParser extends RecipeParser {
canHandle(url: string): boolean {
return /mathem\.se/i.test(url); // Matchar bara mathem.se-URLs
}
parse(html: string): ParsedRecipe {
// Din site-specifika parsing-logik här
// Returnera { name, ingredients, instructions }
}
}
```
2. Registrera parsern i `quick-import.service.ts`:
```typescript
const parsers: RecipeParser[] = [
new IcaRecipeParser(),
new MathemsRecipeParser(), // Din nya parser här
new GenericRecipeParser(), // Måste vara sist (fallback)
];
```
## Bästa praxis
- **I18n**: Använd svenska användarmeddelanden
- **Säkerhet**: Sanitera HTML-output innan du använder det
- **Robustness**: Testa edge-cases (tomma ingredienser, långa instruktioner)
- **Prioritering**: Mer specifika parsers måste komma före generiska
## Tips för att debugga en ny sida
1. Kolla om sidan använder JSON-LD: Öppna DevTools → Sök efter `<script type="application/ld+json">`
2. Om JSON-LD finns → kopiera strukturen och anpassa `parseIngredientLine()`
3. Om inte → analysera HTML-strukturen och justera CSS-selectors
## Framtida förbättringar
- [ ] Stöd för Puppeteer/Playwright för JavaScript-heavy webbplatser
- [ ] Plugin-system för community-bidrag
- [ ] Tester per parser
@@ -1,107 +0,0 @@
import { RecipeParser, ParsedRecipe } from './base.parser';
// Konkret testklass för att komma åt protected-metoden
class TestParser extends RecipeParser {
canHandle(_url: string): boolean { return true; }
parse(_html: string): ParsedRecipe {
return { name: '', ingredients: [] };
}
public testParseIngredientLine(line: string) {
return this.parseIngredientLine(line);
}
}
describe('RecipeParser.parseIngredientLine', () => {
const parser = new TestParser();
const parse = (line: string) => parser.testParseIngredientLine(line);
describe('enkla mängd + enhet + namn', () => {
it('parsar "150 g lax"', () => {
const result = parse('150 g lax');
expect(result?.quantity).toBe(150);
expect(result?.unit).toBe('g');
expect(result?.name).toBe('lax');
});
it('parsar "2 dl grädde"', () => {
const result = parse('2 dl grädde');
expect(result?.quantity).toBe(2);
expect(result?.unit).toBe('dl');
expect(result?.name).toBe('grädde');
});
it('parsar "1 msk olivolja"', () => {
const result = parse('1 msk olivolja');
expect(result?.quantity).toBe(1);
expect(result?.unit).toBe('msk');
expect(result?.name).toBe('olivolja');
});
it('parsar "3 st ägg"', () => {
const result = parse('3 st ägg');
expect(result?.quantity).toBe(3);
expect(result?.unit).toBe('st');
expect(result?.name).toBe('ägg');
});
it('parsar "3 ägg" (utan enhet)', () => {
const result = parse('3 ägg');
expect(result?.quantity).toBe(3);
expect(result?.unit).toBe('');
expect(result?.name).toBe('ägg');
});
});
describe('bråktal', () => {
it('parsar "1/2 citron"', () => {
const result = parse('1/2 citron');
expect(result?.quantity).toBeCloseTo(0.5);
expect(result?.name).toBe('citron');
});
it('parsar "1 1/2 dl mjölk"', () => {
const result = parse('1 1/2 dl mjölk');
expect(result?.quantity).toBeCloseTo(1.5);
expect(result?.unit).toBe('dl');
});
});
describe('utan mängd', () => {
it('parsar "salt och peppar" (ingen mängd)', () => {
const result = parse('salt och peppar');
expect(result?.quantity).toBe(0);
expect(result?.unit).toBe('');
expect(result?.name).toBe('salt och peppar');
});
it('returnerar null för tom sträng', () => {
expect(parse('')).toBeNull();
});
});
describe('med parenteser', () => {
it('parsar "1 förp handskalade räkor (à 570 g)" med note', () => {
const result = parse('1 förp handskalade räkor (à 570 g)');
expect(result?.quantity).toBe(1);
expect(result?.unit).toBe('förp');
expect(result?.name).toBe('handskalade räkor');
expect(result?.note).toBe('à 570 g');
});
});
describe('kommatalstal', () => {
it('parsar "2,5 dl buljong"', () => {
const result = parse('2,5 dl buljong');
expect(result?.quantity).toBeCloseTo(2.5);
expect(result?.unit).toBe('dl');
});
});
describe('strips HTML-taggar', () => {
it('parsar rad med HTML', () => {
const result = parse('<b>200</b> g köttfärs');
expect(result?.quantity).toBe(200);
expect(result?.unit).toBe('g');
});
});
});
@@ -1,159 +0,0 @@
/**
* Bas-parser för receptsidor
* Alla site-specifika parsers bör extenda denna
*/
export interface ParsedRecipe {
name: string;
description?: string;
ingredients: Array<{
quantity: number;
unit: string;
name: string;
note?: string;
}>;
instructions?: string;
imageUrl?: string;
}
export abstract class RecipeParser {
/**
* Kontrollera om denna parser kan hantera denna URL
*/
abstract canHandle(url: string): boolean;
/**
* Parsa HTML och extrahera receptdata
*/
abstract parse(html: string): ParsedRecipe;
/**
* Hjälpfunktion: parsa ingrediens-rad
* Hanterar format som:
* - "3 ägg"
* - "150 g lax"
* - "1/2 citron"
* - "1 msk senap"
* - "salt och peppar"
* - "1 förp handskalade räkor i lake (à 570 g)"
*/
protected parseIngredientLine(line: string): {
quantity: number;
unit: string;
name: string;
note?: string;
} | null {
let cleaned = line.replace(/<[^>]+>/g, '').trim();
if (!cleaned) return null;
// Kända enheter
const knownUnits = [
'g', 'kg', 'hg', 'mg', 'ml', 'dl', 'l', 'tl',
'st', 'tsk', 'msk', 'krm', 'matsked', 'tesked',
'pris', 'portion', 'port', 'burk', 'förp', 'paket', 'efter smak', 'klyfta',
];
// Extrahera parentetisk info
let parentheticalText = '';
const parentheteMatch = cleaned.match(/\s*\(([^)]*)\)/);
if (parentheteMatch) {
parentheticalText = parentheteMatch[1].trim();
cleaned = cleaned.replace(/\s*\([^)]*\)/, '').trim();
}
// Hantera bråkdelar: "1/2" eller "1 1/2" eller "1 1 / 2"
// Regex: (optional whole)? numerator / denominator
const fractionMatch = cleaned.match(/^(\d+)?\s*(\d+)\s*\/\s*([\d.]+)/);
let quantity = 0;
let remainingText = cleaned;
if (fractionMatch) {
if (fractionMatch[1]) {
// Heltal + bråk: "1 1/2"
const whole = parseFloat(fractionMatch[1]);
const numerator = parseFloat(fractionMatch[2]);
const denominator = parseFloat(fractionMatch[3]);
quantity = whole + (numerator / denominator);
} else {
// Bara bråk: "1/2"
const numerator = parseFloat(fractionMatch[2]);
const denominator = parseFloat(fractionMatch[3]);
quantity = numerator / denominator;
}
remainingText = cleaned.substring(fractionMatch[0].length).trim();
} else {
const numberMatch = remainingText.match(/^([\d.,]+)/);
if (numberMatch) {
quantity = parseFloat(numberMatch[1].replace(',', '.'));
remainingText = remainingText.substring(numberMatch[0].length).trim();
}
}
// Extrahera potentiell enhet
let potentialUnit = '';
let productName = remainingText;
if (remainingText) {
const unitMatch = remainingText.match(/^([a-zåäö]+)\b/i);
if (unitMatch) {
const candidateUnit = unitMatch[1].toLowerCase();
if (knownUnits.includes(candidateUnit)) {
potentialUnit = candidateUnit;
productName = remainingText.substring(candidateUnit.length).trim();
}
}
}
// Analysera parenthetical text för måttenhet
let parenthHasUnit = false;
if (parentheticalText) {
for (const unit of knownUnits) {
if (parentheticalText.toLowerCase().includes(unit)) {
parenthHasUnit = true;
break;
}
}
}
let note: string | undefined = undefined;
// Om vi hade quantity i huvuddelen och parenthetical innehåller unit
// → spara parenthetical som note
if (quantity > 0 && parenthHasUnit) {
note = parentheticalText;
}
// Om ingen mängd i huvuddelen men parenthetical hade både mängd och unit
// → parse parenthetical som quantity + unit
if (quantity === 0 && parentheticalText) {
const parenthMatch = parentheticalText.match(/^[\D]*?([\d.,]+)?\s*([a-zåäö]*)?\s*(.*)$/i);
if (parenthMatch) {
let pQuantity = parenthMatch[1] ? parseFloat(parenthMatch[1].replace(',', '.')) : 0;
let pUnit = parenthMatch[2]?.toLowerCase() || '';
let pRest = parenthMatch[3]?.trim() || '';
if (knownUnits.includes(pUnit) && pQuantity > 0) {
quantity = pQuantity;
potentialUnit = pUnit;
note = parentheticalText;
}
}
}
// Om ingen mängd och enhet, bara returna produktnamnet
if (quantity === 0) {
return {
quantity: 0,
unit: '',
name: cleaned,
note: parentheticalText || undefined,
};
}
return {
quantity,
unit: potentialUnit,
name: productName,
note: note,
};
}
}
@@ -1,231 +0,0 @@
import { Logger } from '@nestjs/common';
import { RecipeParser, ParsedRecipe } from './base.parser';
/**
* Generisk parser för okända receptsidor
* Försöker JSON-LD först, sedan vanlig HTML-parsing
* Denna är mer permissiv än site-specifika parsers
*/
export class GenericRecipeParser extends RecipeParser {
private readonly logger = new Logger(GenericRecipeParser.name);
canHandle(url: string): boolean {
// Denna parser hanterar alltid (är fallback)
return true;
}
parse(html: string): ParsedRecipe {
this.logger.log('Parsing recipe from unknown site...');
// Extrahera og:image för bildurl-fallback
const ogImage = this.extractOgImage(html);
// Försöka extrahera JSON-LD recipe data (flera script-taggar är vanligt)
const jsonLdRegex =
/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi;
let jsonLdMatch: RegExpExecArray | null;
while ((jsonLdMatch = jsonLdRegex.exec(html)) !== null) {
const rawJson = jsonLdMatch[1]?.trim();
if (!rawJson) continue;
try {
const parsedJson = JSON.parse(rawJson);
const recipe = this.findRecipeInJsonLd(parsedJson);
if (recipe) {
this.logger.log('JSON-LD data found');
return this.extractFromJsonLd(recipe, ogImage);
}
} catch {
this.logger.warn('JSON-LD parsing failed');
}
}
this.logger.log('No JSON-LD found, using HTML parsing');
return this.parseFromHtml(html, ogImage);
}
private findRecipeInJsonLd(jsonData: any): any {
if (!jsonData) return null;
if (Array.isArray(jsonData)) {
for (const item of jsonData) {
const recipe = this.findRecipeInJsonLd(item);
if (recipe) return recipe;
}
return null;
}
if (jsonData['@type'] === 'Recipe') {
return jsonData;
}
if (Array.isArray(jsonData['@type']) && jsonData['@type'].includes('Recipe')) {
return jsonData;
}
const graph = jsonData['@graph'];
if (Array.isArray(graph)) {
return graph.find(
(item: any) =>
item?.['@type'] === 'Recipe' ||
(Array.isArray(item?.['@type']) && item['@type'].includes('Recipe')),
) ?? null;
}
return null;
}
private extractOgImage(html: string): string | undefined {
const match = html.match(/<meta[^>]+property="og:image"[^>]+content="([^"]+)"/i)
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+property="og:image"/i);
return match ? this.decodeHtmlEntities(match[1].trim()) : undefined;
}
private decodeHtmlEntities(value: string): string {
return value
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>');
}
private extractImageValue(image: any): string | undefined {
if (!image) return undefined;
if (typeof image === 'string') return image;
if (Array.isArray(image)) {
for (const item of image) {
const extracted = this.extractImageValue(item);
if (extracted) return extracted;
}
return undefined;
}
if (typeof image === 'object') {
return (
image.url ||
image['@id'] ||
image.contentUrl ||
image.thumbnailUrl ||
this.extractImageValue(image.image)
);
}
return undefined;
}
private extractFromJsonLd(recipe: any, ogImage?: string): ParsedRecipe {
const name = recipe.name || '';
const description = recipe.description || '';
// Extrahera bildurl från JSON-LD
let imageUrl: string | undefined = ogImage;
const extractedImage = this.extractImageValue(recipe.image);
if (extractedImage) {
imageUrl = this.decodeHtmlEntities(extractedImage);
}
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
for (const ing of recipe.recipeIngredient) {
const parsed = this.parseIngredientLine(ing);
if (parsed) {
ingredients.push(parsed);
}
}
}
let instructions = '';
if (recipe.recipeInstructions) {
if (typeof recipe.recipeInstructions === 'string') {
instructions = recipe.recipeInstructions;
} else if (Array.isArray(recipe.recipeInstructions)) {
instructions = recipe.recipeInstructions
.map((step: any) => {
if (typeof step === 'string') return step;
if (step.text) return step.text;
return '';
})
.filter((s: string) => s)
.join('\n\n');
}
}
return {
name,
description,
ingredients,
instructions,
imageUrl,
};
}
private parseFromHtml(html: string, ogImage?: string): ParsedRecipe {
// Försöka hitta titel
let name = '';
// Prova olika selector-mönster
let titleMatch =
html.match(/<h1[^>]*>([^<]+)<\/h1>/i) ||
html.match(/<meta\s+property="og:title"\s+content="([^"]+)"/i) ||
html.match(/<title>([^<]+)<\/title>/i);
if (titleMatch) {
name = titleMatch[1].trim();
}
// Försöka extrahera beskrivning från meta-taggar
let description = '';
const descMatch = html.match(
/<meta\s+name="description"\s+content="([^"]+)"/i
);
if (descMatch) {
description = descMatch[1].trim();
}
// Försöka extrahera ingredienser från vanliga strukturer
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
// Testa olika ingredient-selectors
const ingredientPatterns = [
/<li[^>]*>(.*?)<\/li>/gi,
/<div[^>]*class="ingredient"[^>]*>(.*?)<\/div>/gi,
/<p[^>]*class="ingredient"[^>]*>(.*?)<\/p>/gi,
];
for (const pattern of ingredientPatterns) {
let match;
while ((match = pattern.exec(html)) !== null) {
const parsed = this.parseIngredientLine(match[1]);
if (parsed && parsed.name.length > 2) {
// Undvik mycket korta ingredienser (troligen brus)
ingredients.push(parsed);
}
}
if (ingredients.length > 0) break; // Om vi hittat några, använd dessa
}
// Försöka hitta instruktioner
let instructions = '';
const instructionsPatterns = [
/<(?:div|section)[^>]*class="[^"]*(?:instruction|method|step)[^"]*"[^>]*>(.*?)<\/(?:div|section)>/is,
/<ol[^>]*>(.*?)<\/ol>/i,
];
for (const pattern of instructionsPatterns) {
const match = html.match(pattern);
if (match) {
instructions = match[1].replace(/<[^>]+>/g, '').trim();
if (instructions.length > 10) break;
}
}
return {
name,
description,
ingredients,
instructions,
imageUrl: ogImage,
};
}
}
@@ -1,219 +0,0 @@
import { Logger } from '@nestjs/common';
import { RecipeParser, ParsedRecipe } from './base.parser';
/**
* Parser för ica.se receptsidor
* Använder JSON-LD structured data som primär källa
*/
export class IcaRecipeParser extends RecipeParser {
private readonly logger = new Logger(IcaRecipeParser.name);
canHandle(url: string): boolean {
return /ica\.se\/recept/i.test(url);
}
parse(html: string): ParsedRecipe {
this.logger.log('Parsing ICA recipe...');
// Extrahera og:image för bildurl-fallback
const ogImage = this.extractOgImage(html);
// Försöka extrahera JSON-LD recipe data (ICA använder ofta flera script-taggar)
const jsonLdRegex =
/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi;
let jsonLdMatch: RegExpExecArray | null;
while ((jsonLdMatch = jsonLdRegex.exec(html)) !== null) {
const rawJson = jsonLdMatch[1]?.trim();
if (!rawJson) continue;
try {
const parsedJson = JSON.parse(rawJson);
const recipe = this.findRecipeInJsonLd(parsedJson);
if (recipe) {
this.logger.log('JSON-LD recipe found');
return this.extractFromJsonLd(recipe, ogImage);
}
} catch (err) {
this.logger.warn(`JSON-LD parsing failed: ${err}`);
}
}
// Fallback: HTML parsing (sällan nödvändigt för ICA)
this.logger.log('Falling back to HTML parsing');
return this.parseFromHtml(html, ogImage);
}
private findRecipeInJsonLd(jsonData: any): any {
if (!jsonData) return null;
if (Array.isArray(jsonData)) {
for (const item of jsonData) {
const recipe = this.findRecipeInJsonLd(item);
if (recipe) return recipe;
}
return null;
}
if (jsonData['@type'] === 'Recipe') {
return jsonData;
}
if (Array.isArray(jsonData['@type']) && jsonData['@type'].includes('Recipe')) {
return jsonData;
}
const graph = jsonData['@graph'];
if (Array.isArray(graph)) {
return graph.find(
(item: any) =>
item?.['@type'] === 'Recipe' ||
(Array.isArray(item?.['@type']) && item['@type'].includes('Recipe')),
) ?? null;
}
return null;
}
private extractOgImage(html: string): string | undefined {
const match = html.match(/<meta[^>]+property="og:image"[^>]+content="([^"]+)"/i)
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+property="og:image"/i);
return match ? this.decodeHtmlEntities(match[1].trim()) : undefined;
}
private decodeHtmlEntities(value: string): string {
return value
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>');
}
private extractImageValue(image: any): string | undefined {
if (!image) return undefined;
if (typeof image === 'string') return image;
if (Array.isArray(image)) {
for (const item of image) {
const extracted = this.extractImageValue(item);
if (extracted) return extracted;
}
return undefined;
}
if (typeof image === 'object') {
return (
image.url ||
image['@id'] ||
image.contentUrl ||
image.thumbnailUrl ||
this.extractImageValue(image.image)
);
}
return undefined;
}
private extractFromJsonLd(recipe: any, ogImage?: string): ParsedRecipe {
// Extrahera titel
const name = recipe.name || '';
// Extrahera beskrivning
const description = recipe.description || '';
// Extrahera bildurl från JSON-LD (kan vara sträng eller array)
let imageUrl: string | undefined = ogImage;
const extractedImage = this.extractImageValue(recipe.image);
if (extractedImage) {
imageUrl = this.decodeHtmlEntities(extractedImage);
}
// Extrahera ingredienser
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
if (recipe.recipeIngredient && Array.isArray(recipe.recipeIngredient)) {
for (const ing of recipe.recipeIngredient) {
const parsed = this.parseIngredientLine(ing);
if (parsed) {
ingredients.push(parsed);
}
}
}
// Extrahera instruktioner
let instructions = '';
if (recipe.recipeInstructions) {
if (typeof recipe.recipeInstructions === 'string') {
instructions = recipe.recipeInstructions;
} else if (Array.isArray(recipe.recipeInstructions)) {
instructions = recipe.recipeInstructions
.map((step: any) => {
if (typeof step === 'string') return step;
if (step.text) return step.text;
return '';
})
.filter((s: string) => s)
.join('\n\n');
}
}
return {
name,
description,
ingredients,
instructions,
imageUrl,
};
}
private parseFromHtml(html: string, ogImage?: string): ParsedRecipe {
let name = '';
const titleMatch = html.match(/<h1[^>]*>([^<]+)<\/h1>/i);
if (titleMatch) {
name = titleMatch[1].trim();
}
if (!name) {
const ogTitleMatch = html.match(
/<meta\s+property="og:title"\s+content="([^"]+)"/i
);
if (ogTitleMatch) {
name = ogTitleMatch[1].trim();
}
}
// Extrahera beskrivning från meta-taggar
let description = '';
const descMatch = html.match(
/<meta\s+name="description"\s+content="([^"]+)"/i
);
if (descMatch) {
description = descMatch[1].trim();
}
const ingredients: Array<{ quantity: number; unit: string; name: string; note?: string }> = [];
const ingredientRegex =
/<li[^>]*class="[^"]*ingredient[^"]*"[^>]*>([^<]+)<\/li>/gi;
let match;
while ((match = ingredientRegex.exec(html)) !== null) {
const parsed = this.parseIngredientLine(match[1]);
if (parsed) {
ingredients.push(parsed);
}
}
let instructions = '';
const instructionsMatch = html.match(
/<(?:div|section)[^>]*class="[^"]*(?:instruction|howto)[^"]*"[^>]*>([^<]*)<\/(?:div|section)>/is
);
if (instructionsMatch) {
instructions = instructionsMatch[1].replace(/<[^>]+>/g, '').trim();
}
return {
name,
description,
ingredients,
instructions,
imageUrl: ogImage,
};
}
}
@@ -1,71 +0,0 @@
import { Logger } from '@nestjs/common';
import * as pdfParse from 'pdf-parse';
import { createWorker } from 'tesseract.js';
import { RecipeParser, ParsedRecipe } from './base.parser';
interface ParsedReceiptItem {
name: string;
quantity: number;
price: number;
}
export class ReceiptParser extends RecipeParser {
private readonly logger = new Logger(ReceiptParser.name);
canHandle(url: string): boolean {
// This parser is for receipts, not URLs, so it will be used directly in the service
return false;
}
parse(_html: string): import('./base.parser').ParsedRecipe {
throw new Error('ReceiptParser does not support HTML parsing');
}
async parseFromPdf(buffer: Buffer): Promise<ParsedReceiptItem[]> {
try {
this.logger.log('Parsing PDF receipt...');
const data = await pdfParse(buffer);
const text = data.text;
return this.parseReceiptText(text);
} catch (error) {
this.logger.error('Failed to parse PDF receipt', error);
throw new Error('Failed to parse PDF receipt');
}
}
async parseFromImage(buffer: Buffer): Promise<ParsedReceiptItem[]> {
try {
this.logger.log('Parsing image receipt...');
const worker = await createWorker('eng');
const ret = await worker.recognize(buffer);
await worker.terminate();
const text = ret.data.text;
return this.parseReceiptText(text);
} catch (error) {
this.logger.error('Failed to parse image receipt', error);
throw new Error('Failed to parse image receipt');
}
}
parseReceiptText(text: string): ParsedReceiptItem[] {
this.logger.log('Parsing receipt text...');
// Simple parsing logic to extract items from receipt text
// This is a placeholder and should be replaced with actual parsing logic
const lines = text.split('\n');
const items: ParsedReceiptItem[] = [];
for (const line of lines) {
if (line.trim() === '') continue;
// Example parsing logic: "2x Apple 10.00 SEK"
const match = line.match(/(\d+)x\s+(.+?)\s+([\d.]+)\s*SEK/);
if (match) {
const quantity = parseInt(match[1], 10);
const name = match[2].trim();
const price = parseFloat(match[3]);
items.push({ name, quantity, price });
}
}
return items;
}
}
@@ -1,4 +1,4 @@
import { Body, Controller, Post, UploadedFile, UseInterceptors } from '@nestjs/common'; import { Body, Controller, HttpCode, Post, UploadedFile, UseInterceptors } from '@nestjs/common';
import { Throttle } from '@nestjs/throttler'; import { Throttle } from '@nestjs/throttler';
import { FileInterceptor } from '@nestjs/platform-express'; import { FileInterceptor } from '@nestjs/platform-express';
import { memoryStorage } from 'multer'; import { memoryStorage } from 'multer';
@@ -10,6 +10,7 @@ export class QuickImportController {
constructor(private readonly quickImportService: QuickImportService) {} constructor(private readonly quickImportService: QuickImportService) {}
@Post() @Post()
@HttpCode(200)
@Throttle({ default: { ttl: 60_000, limit: 20 } }) @Throttle({ default: { ttl: 60_000, limit: 20 } })
@UseInterceptors( @UseInterceptors(
FileInterceptor('file', { FileInterceptor('file', {
@@ -3,25 +3,43 @@ class ParsedReceiptItem {
final String rawName; final String rawName;
final double? quantity; final double? quantity;
final String? unit; final String? unit;
final String? suggestedProductId; final double? price;
final String? brand;
final String? origin;
// alias-match (säker, ingen bekräftelse behövs)
final int? matchedProductId;
final String? matchedProductName;
// ordbaserad match (kräver bekräftelse)
final int? suggestedProductId;
final String? suggestedProductName; final String? suggestedProductName;
final String? categorySuggestion; // AI-kategorisuggestion (premium)
final String? categorySuggestionName;
ParsedReceiptItem({ ParsedReceiptItem({
required this.rawName, required this.rawName,
this.quantity, this.quantity,
this.unit, this.unit,
this.price,
this.brand,
this.origin,
this.matchedProductId,
this.matchedProductName,
this.suggestedProductId, this.suggestedProductId,
this.suggestedProductName, this.suggestedProductName,
this.categorySuggestion, this.categorySuggestionName,
}); });
factory ParsedReceiptItem.fromJson(Map<String, dynamic> json) => ParsedReceiptItem( factory ParsedReceiptItem.fromJson(Map<String, dynamic> json) => ParsedReceiptItem(
rawName: json['rawName'] as String, rawName: json['rawName'] as String? ?? '',
quantity: (json['quantity'] as num?)?.toDouble(), quantity: (json['quantity'] as num?)?.toDouble(),
unit: json['unit'] as String?, unit: json['unit'] as String?,
suggestedProductId: json['suggestedProductId'] as String?, price: (json['price'] as num?)?.toDouble(),
brand: json['brand'] as String?,
origin: json['origin'] as String?,
matchedProductId: (json['matchedProductId'] as num?)?.toInt(),
matchedProductName: json['matchedProductName'] as String?,
suggestedProductId: (json['suggestedProductId'] as num?)?.toInt(),
suggestedProductName: json['suggestedProductName'] as String?, suggestedProductName: json['suggestedProductName'] as String?,
categorySuggestion: json['categorySuggestion'] as String?, categorySuggestionName: (json['categorySuggestion'] as Map<String, dynamic>?)?['categoryName'] as String?,
); );
} }