feat(flyer-import): integrate AI-based flyer parsing with image support
Test Suite / quick-import-pr-quick (push) Has been skipped
Test Suite / backend-full (push) Successful in 2m31s
Test Suite / flutter-quality (push) Failing after 3m48s
Test Suite / backend-pr-quick (push) Failing after 13m57s

- Add support for PNG, JPEG, and WebP image formats in flyer import
- Replace external importer service with internal AI-based parsing pipeline
- Add new services: TextExtractorService, AiFlyerParserService, FlyerNormalizerService
- Integrate Mistral AI, pdf-parse, and tesseract.js dependencies
- Add quality confidence indicators and warning panels in Flutter UI
- Update package.json with new dependencies and transform ignore patterns
- Add documentation for flyer importer system
- Add Kilo AI planning file for Happy Island project

BREAKING CHANGE: Flyer import now uses internal AI parsing instead of external importer service
This commit is contained in:
Nils-Johan Gynther
2026-05-19 19:57:54 +02:00
parent 0ce1db5471
commit 187d0283a5
14 changed files with 1479 additions and 103 deletions
@@ -0,0 +1,234 @@
import {
BadRequestException,
Injectable,
Logger,
ServiceUnavailableException,
} from '@nestjs/common';
export interface AiFlyerParseResult {
rawName: string;
normalizedName: string;
category: string | null;
price: number | null;
priceUnit: string | null;
comparisonPrice: number | null;
comparisonUnit: string | null;
offerText: string | null;
confidence: number;
reasonCodes: string[];
}
@Injectable()
export class AiFlyerParserService {
private readonly logger = new Logger(AiFlyerParserService.name);
private readonly timeoutMs = 15_000;
private mistral: any;
private apiKey: string;
constructor() {
this.apiKey = process.env.MISTRAL_API_KEY ?? '';
if (!this.apiKey) {
throw new Error('MISTRAL_API_KEY environment variable not set');
}
}
private async getClient(): Promise<any> {
if (this.mistral) return this.mistral;
const mistralModule = await import('@mistralai/mistralai');
this.mistral = new mistralModule.default(this.apiKey);
return this.mistral;
}
/**
* Skickar flyer-text till Mistral Tiny för strukturerad extraktion.
*
* @param text Text från flyern (från pdf-parse eller OCR)
* @returns Array av parsade produkter
*/
async parseWithAI(text: string): Promise<AiFlyerParseResult[]> {
if (!text || text.trim().length === 0) {
throw new BadRequestException('Flyer-texten är tom. Kan inte fortsätta.');
}
const prompt = this.buildPrompt(text);
try {
this.logger.debug('Sending request to Mistral Tiny');
const client = await this.getClient();
const response = await this.withTimeout<any>(
client.chat({
model: 'mistral-tiny',
messages: [{ role: 'user', content: prompt }],
temperature: 0.1,
}),
this.timeoutMs,
'Mistral-anrop timeout',
);
const content = response.choices?.[0]?.message?.content;
if (!content) {
throw new BadRequestException('Tomt svar från AI-modellen.');
}
this.logger.debug(`Mistral response length: ${content.length} chars`);
// Rensa och parse JSON
const jsonString = this.sanitizeJsonResponse(content);
const items = JSON.parse(jsonString) as Array<Record<string, unknown>>;
if (!Array.isArray(items)) {
throw new BadRequestException('AI returnerade inte en JSON-array.');
}
return items.map((item, idx) => this.normalizeAiItem(item, idx));
} catch (err) {
if (err instanceof SyntaxError) {
this.logger.error(`JSON parse error: ${String(err)}`);
throw new BadRequestException('AI returnerade ogiltigt JSON. Försök igen.');
}
if (err instanceof BadRequestException) {
throw err;
}
if (err instanceof ServiceUnavailableException) {
throw err;
}
this.logger.error(`AI parsing failed: ${String(err)}`);
throw new ServiceUnavailableException('AI-tjänsten är inte tillgänglig just nu.');
}
}
private async withTimeout<T>(
promise: Promise<T>,
timeoutMs: number,
timeoutMessage: string,
): Promise<T> {
let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
const timeoutPromise = new Promise<never>((_, reject) => {
timeoutHandle = setTimeout(() => {
reject(new ServiceUnavailableException(timeoutMessage));
}, timeoutMs);
});
try {
return await Promise.race([promise, timeoutPromise]);
} finally {
if (timeoutHandle) clearTimeout(timeoutHandle);
}
}
/**
* Bygger systemprompten för Mistral.
*/
private buildPrompt(text: string): string {
// Trunkera långt text för att spara tokens
const truncatedText = text.length > 5000 ? text.substring(0, 5000) : text;
return `Du är en expert på att tolka svenska matvaruflyers (t.ex. från Willys, Coop, ICA).
Extrahera ALL produktinformation från följande text och returnera den som en JSON-array.
För varje produkt, inkludera:
- name: Produktnamn (fullständigt namn)
- weight: Vikt (om tillgänglig, t.ex. "150g", "Ca 1kg") eller null
- origin: Ursprung/land/märke (om tillgänglig, t.ex. "FALKENBERG") eller null
- price: Pris som nummer (t.ex. 39.90) eller null
- comparisonPrice: Jämförpris som nummer (t.ex. 266.00) eller null
- unit: Enhet (kg, st, förp, l, etc.) eller null
- offer: Erbjudande som array (t.ex. ["Max 3 köp/hushåll"]) eller []
- category: Kategori (t.ex. "Fisk", "Kött", "Mejeri", "Grönsaker", "Frukt", "Dryck") eller null
- validFrom: Giltig från (datum i formatet YYYY-MM-DD) eller null
- validTo: Giltig till (datum i formatet YYYY-MM-DD) eller null
Texten att tolka:
${truncatedText}
Returnera ENDAST en JSON-array. Inga andra kommentarer, ingen markdown-markup.
Exempel på utdata:
[
{
"name": "KALLRÖKT LAX, GRAVAD LAX",
"weight": "150g",
"origin": "FALKENBERG",
"price": 39.90,
"comparisonPrice": 266.00,
"unit": "kg",
"offer": ["Max 3 köp/hushåll"],
"category": "Fisk",
"validFrom": "2026-05-18",
"validTo": "2026-05-24"
}
]`;
}
/**
* Rensa AI-svaret för att kunna parse som JSON.
*/
private sanitizeJsonResponse(content: string): string {
// Ta bort markdown fences
let cleaned = content.replace(/```json\n?/g, '').replace(/```\n?/g, '');
cleaned = cleaned.trim();
// Försök att extrahera JSON om det finns omgivande text
const jsonMatch = cleaned.match(/\[[\s\S]*\]/);
if (jsonMatch) {
cleaned = jsonMatch[0];
}
return cleaned;
}
/**
* Normaliserar och typkonverterar AI-item till vårt format.
*/
private normalizeAiItem(item: Record<string, unknown>, index: number): AiFlyerParseResult {
const toNumber = (val: unknown): number | null => {
if (typeof val === 'number') return val;
if (typeof val === 'string') {
const parsed = parseFloat(val.replace(',', '.'));
return isFinite(parsed) ? parsed : null;
}
return null;
};
const toString = (val: unknown): string | null => {
if (typeof val === 'string') return val.trim() || null;
return null;
};
const toArray = (val: unknown): string[] => {
if (Array.isArray(val)) {
return val.map(v => String(v)).filter(v => v.trim());
}
return [];
};
const rawName = toString(item.name) || `Produkt ${index + 1}`;
const normalizedName = this.normalizeName(rawName);
return {
rawName,
normalizedName,
category: toString(item.category),
price: toNumber(item.price),
priceUnit: toString(item.unit),
comparisonPrice: toNumber(item.comparisonPrice),
comparisonUnit: toString(item.comparisonUnit),
offerText: toString(item.offer) || (toArray(item.offer).join(' ') || null),
confidence: 0.85, // AI-parse får medelhög confidence
reasonCodes: ['ai_parsed'],
};
}
/**
* Enkel normalisering av produktnamn.
*/
private normalizeName(name: string): string {
return name
.toLowerCase()
.replace(/[^a-zåäö0-9\s]/g, '')
.replace(/\s+/g, ' ')
.trim();
}
}