feat(flyer-import): integrate AI-based flyer parsing with image support
- Add support for PNG, JPEG, and WebP image formats in flyer import - Replace external importer service with internal AI-based parsing pipeline - Add new services: TextExtractorService, AiFlyerParserService, FlyerNormalizerService - Integrate Mistral AI, pdf-parse, and tesseract.js dependencies - Add quality confidence indicators and warning panels in Flutter UI - Update package.json with new dependencies and transform ignore patterns - Add documentation for flyer importer system - Add Kilo AI planning file for Happy Island project BREAKING CHANGE: Flyer import now uses internal AI parsing instead of external importer service
This commit is contained in:
@@ -0,0 +1,234 @@
|
||||
import {
|
||||
BadRequestException,
|
||||
Injectable,
|
||||
Logger,
|
||||
ServiceUnavailableException,
|
||||
} from '@nestjs/common';
|
||||
|
||||
export interface AiFlyerParseResult {
|
||||
rawName: string;
|
||||
normalizedName: string;
|
||||
category: string | null;
|
||||
price: number | null;
|
||||
priceUnit: string | null;
|
||||
comparisonPrice: number | null;
|
||||
comparisonUnit: string | null;
|
||||
offerText: string | null;
|
||||
confidence: number;
|
||||
reasonCodes: string[];
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class AiFlyerParserService {
|
||||
private readonly logger = new Logger(AiFlyerParserService.name);
|
||||
private readonly timeoutMs = 15_000;
|
||||
private mistral: any;
|
||||
private apiKey: string;
|
||||
|
||||
constructor() {
|
||||
this.apiKey = process.env.MISTRAL_API_KEY ?? '';
|
||||
if (!this.apiKey) {
|
||||
throw new Error('MISTRAL_API_KEY environment variable not set');
|
||||
}
|
||||
}
|
||||
|
||||
private async getClient(): Promise<any> {
|
||||
if (this.mistral) return this.mistral;
|
||||
const mistralModule = await import('@mistralai/mistralai');
|
||||
this.mistral = new mistralModule.default(this.apiKey);
|
||||
return this.mistral;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skickar flyer-text till Mistral Tiny för strukturerad extraktion.
|
||||
*
|
||||
* @param text Text från flyern (från pdf-parse eller OCR)
|
||||
* @returns Array av parsade produkter
|
||||
*/
|
||||
async parseWithAI(text: string): Promise<AiFlyerParseResult[]> {
|
||||
if (!text || text.trim().length === 0) {
|
||||
throw new BadRequestException('Flyer-texten är tom. Kan inte fortsätta.');
|
||||
}
|
||||
|
||||
const prompt = this.buildPrompt(text);
|
||||
|
||||
try {
|
||||
this.logger.debug('Sending request to Mistral Tiny');
|
||||
|
||||
const client = await this.getClient();
|
||||
const response = await this.withTimeout<any>(
|
||||
client.chat({
|
||||
model: 'mistral-tiny',
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
temperature: 0.1,
|
||||
}),
|
||||
this.timeoutMs,
|
||||
'Mistral-anrop timeout',
|
||||
);
|
||||
|
||||
const content = response.choices?.[0]?.message?.content;
|
||||
if (!content) {
|
||||
throw new BadRequestException('Tomt svar från AI-modellen.');
|
||||
}
|
||||
|
||||
this.logger.debug(`Mistral response length: ${content.length} chars`);
|
||||
|
||||
// Rensa och parse JSON
|
||||
const jsonString = this.sanitizeJsonResponse(content);
|
||||
const items = JSON.parse(jsonString) as Array<Record<string, unknown>>;
|
||||
|
||||
if (!Array.isArray(items)) {
|
||||
throw new BadRequestException('AI returnerade inte en JSON-array.');
|
||||
}
|
||||
|
||||
return items.map((item, idx) => this.normalizeAiItem(item, idx));
|
||||
} catch (err) {
|
||||
if (err instanceof SyntaxError) {
|
||||
this.logger.error(`JSON parse error: ${String(err)}`);
|
||||
throw new BadRequestException('AI returnerade ogiltigt JSON. Försök igen.');
|
||||
}
|
||||
if (err instanceof BadRequestException) {
|
||||
throw err;
|
||||
}
|
||||
if (err instanceof ServiceUnavailableException) {
|
||||
throw err;
|
||||
}
|
||||
this.logger.error(`AI parsing failed: ${String(err)}`);
|
||||
throw new ServiceUnavailableException('AI-tjänsten är inte tillgänglig just nu.');
|
||||
}
|
||||
}
|
||||
|
||||
private async withTimeout<T>(
|
||||
promise: Promise<T>,
|
||||
timeoutMs: number,
|
||||
timeoutMessage: string,
|
||||
): Promise<T> {
|
||||
let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
timeoutHandle = setTimeout(() => {
|
||||
reject(new ServiceUnavailableException(timeoutMessage));
|
||||
}, timeoutMs);
|
||||
});
|
||||
|
||||
try {
|
||||
return await Promise.race([promise, timeoutPromise]);
|
||||
} finally {
|
||||
if (timeoutHandle) clearTimeout(timeoutHandle);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bygger systemprompten för Mistral.
|
||||
*/
|
||||
private buildPrompt(text: string): string {
|
||||
// Trunkera långt text för att spara tokens
|
||||
const truncatedText = text.length > 5000 ? text.substring(0, 5000) : text;
|
||||
|
||||
return `Du är en expert på att tolka svenska matvaruflyers (t.ex. från Willys, Coop, ICA).
|
||||
|
||||
Extrahera ALL produktinformation från följande text och returnera den som en JSON-array.
|
||||
|
||||
För varje produkt, inkludera:
|
||||
- name: Produktnamn (fullständigt namn)
|
||||
- weight: Vikt (om tillgänglig, t.ex. "150g", "Ca 1kg") eller null
|
||||
- origin: Ursprung/land/märke (om tillgänglig, t.ex. "FALKENBERG") eller null
|
||||
- price: Pris som nummer (t.ex. 39.90) eller null
|
||||
- comparisonPrice: Jämförpris som nummer (t.ex. 266.00) eller null
|
||||
- unit: Enhet (kg, st, förp, l, etc.) eller null
|
||||
- offer: Erbjudande som array (t.ex. ["Max 3 köp/hushåll"]) eller []
|
||||
- category: Kategori (t.ex. "Fisk", "Kött", "Mejeri", "Grönsaker", "Frukt", "Dryck") eller null
|
||||
- validFrom: Giltig från (datum i formatet YYYY-MM-DD) eller null
|
||||
- validTo: Giltig till (datum i formatet YYYY-MM-DD) eller null
|
||||
|
||||
Texten att tolka:
|
||||
${truncatedText}
|
||||
|
||||
Returnera ENDAST en JSON-array. Inga andra kommentarer, ingen markdown-markup.
|
||||
Exempel på utdata:
|
||||
[
|
||||
{
|
||||
"name": "KALLRÖKT LAX, GRAVAD LAX",
|
||||
"weight": "150g",
|
||||
"origin": "FALKENBERG",
|
||||
"price": 39.90,
|
||||
"comparisonPrice": 266.00,
|
||||
"unit": "kg",
|
||||
"offer": ["Max 3 köp/hushåll"],
|
||||
"category": "Fisk",
|
||||
"validFrom": "2026-05-18",
|
||||
"validTo": "2026-05-24"
|
||||
}
|
||||
]`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rensa AI-svaret för att kunna parse som JSON.
|
||||
*/
|
||||
private sanitizeJsonResponse(content: string): string {
|
||||
// Ta bort markdown fences
|
||||
let cleaned = content.replace(/```json\n?/g, '').replace(/```\n?/g, '');
|
||||
cleaned = cleaned.trim();
|
||||
|
||||
// Försök att extrahera JSON om det finns omgivande text
|
||||
const jsonMatch = cleaned.match(/\[[\s\S]*\]/);
|
||||
if (jsonMatch) {
|
||||
cleaned = jsonMatch[0];
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normaliserar och typkonverterar AI-item till vårt format.
|
||||
*/
|
||||
private normalizeAiItem(item: Record<string, unknown>, index: number): AiFlyerParseResult {
|
||||
const toNumber = (val: unknown): number | null => {
|
||||
if (typeof val === 'number') return val;
|
||||
if (typeof val === 'string') {
|
||||
const parsed = parseFloat(val.replace(',', '.'));
|
||||
return isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const toString = (val: unknown): string | null => {
|
||||
if (typeof val === 'string') return val.trim() || null;
|
||||
return null;
|
||||
};
|
||||
|
||||
const toArray = (val: unknown): string[] => {
|
||||
if (Array.isArray(val)) {
|
||||
return val.map(v => String(v)).filter(v => v.trim());
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
const rawName = toString(item.name) || `Produkt ${index + 1}`;
|
||||
const normalizedName = this.normalizeName(rawName);
|
||||
|
||||
return {
|
||||
rawName,
|
||||
normalizedName,
|
||||
category: toString(item.category),
|
||||
price: toNumber(item.price),
|
||||
priceUnit: toString(item.unit),
|
||||
comparisonPrice: toNumber(item.comparisonPrice),
|
||||
comparisonUnit: toString(item.comparisonUnit),
|
||||
offerText: toString(item.offer) || (toArray(item.offer).join(' ') || null),
|
||||
confidence: 0.85, // AI-parse får medelhög confidence
|
||||
reasonCodes: ['ai_parsed'],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Enkel normalisering av produktnamn.
|
||||
*/
|
||||
private normalizeName(name: string): string {
|
||||
return name
|
||||
.toLowerCase()
|
||||
.replace(/[^a-zåäö0-9\s]/g, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user