refactor(ai): enhance AI trace integration and OCR normalization
- Add FlyerTraceSupplement type for AI trace metadata - Implement getFlyerTraceSupplements method to fetch trace supplements - Update AiTraceService to include prompt/rawOutput and counters in flyer traces - Add persistFlyerTrace method to FlyerImportService for trace persistence - Enhance AiFlyerParserService to return structured trace data with prompts and retries - Update FlyerNormalizerService with OCR typo fixes for cheese variants and spröd bakad firre - Improve Flutter admin panel with selectable text, warnings display, and tooltips - Add comprehensive tests for AI trace supplements and normalization rules
This commit is contained in:
@@ -25,6 +25,13 @@ export interface AiFlyerParseResult {
|
||||
reasonCodes: string[];
|
||||
}
|
||||
|
||||
export interface AiFlyerParseTrace {
|
||||
prompt: string | null;
|
||||
rawOutput: string | null;
|
||||
chunkCount: number;
|
||||
retryCount: number;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class AiFlyerParserService {
|
||||
private readonly logger = new Logger(AiFlyerParserService.name);
|
||||
@@ -66,7 +73,7 @@ export class AiFlyerParserService {
|
||||
* @param text Text från flyern (från pdf-parse eller OCR)
|
||||
* @returns Array av parsade produkter
|
||||
*/
|
||||
async parseWithAI(text: string): Promise<AiFlyerParseResult[]> {
|
||||
async parseWithAI(text: string): Promise<{ items: AiFlyerParseResult[]; trace: AiFlyerParseTrace }> {
|
||||
if (!text || text.trim().length === 0) {
|
||||
throw new BadRequestException('Flyer-texten är tom. Kan inte fortsätta.');
|
||||
}
|
||||
@@ -95,18 +102,30 @@ export class AiFlyerParserService {
|
||||
}
|
||||
|
||||
const allItems: AiFlyerParseResult[] = [];
|
||||
const prompts: string[] = [];
|
||||
const rawResponses: string[] = [];
|
||||
let retryCount = 0;
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const chunkItems = await this.parseChunkWithRetry(
|
||||
const chunkResult = await this.parseChunkWithRetry(
|
||||
client,
|
||||
chunks[i],
|
||||
i + 1,
|
||||
chunks.length,
|
||||
debugSession,
|
||||
);
|
||||
allItems.push(...chunkItems);
|
||||
allItems.push(...chunkResult.items);
|
||||
prompts.push(chunkResult.prompt);
|
||||
rawResponses.push(chunkResult.rawOutput);
|
||||
retryCount += Math.max(0, chunkResult.attemptsUsed - 1);
|
||||
}
|
||||
|
||||
const deduped = this.dedupeItems(allItems);
|
||||
const trace: AiFlyerParseTrace = {
|
||||
prompt: prompts.length > 0 ? prompts.join('\n\n-----\n\n') : null,
|
||||
rawOutput: rawResponses.length > 0 ? rawResponses.join('\n\n-----\n\n') : null,
|
||||
chunkCount: chunks.length,
|
||||
retryCount,
|
||||
};
|
||||
|
||||
if (debugSession) {
|
||||
await this.writeDebugFile(
|
||||
@@ -116,7 +135,7 @@ export class AiFlyerParserService {
|
||||
);
|
||||
}
|
||||
|
||||
return deduped;
|
||||
return { items: deduped, trace };
|
||||
} catch (err) {
|
||||
if (debugSession) {
|
||||
await this.writeDebugFile(
|
||||
@@ -371,7 +390,12 @@ ${truncatedText}`;
|
||||
chunkIndex: number,
|
||||
totalChunks: number,
|
||||
debugSession: { dirPath: string; baseName: string } | null,
|
||||
): Promise<AiFlyerParseResult[]> {
|
||||
): Promise<{
|
||||
items: AiFlyerParseResult[];
|
||||
prompt: string;
|
||||
rawOutput: string;
|
||||
attemptsUsed: number;
|
||||
}> {
|
||||
const textWindows = [3000, 2200, 1600];
|
||||
const attempts = Math.max(1, Math.min(this.maxRetries + 1, textWindows.length));
|
||||
let lastError: unknown = null;
|
||||
@@ -425,7 +449,12 @@ ${truncatedText}`;
|
||||
throw new BadRequestException('AI returnerade inte en JSON-array.');
|
||||
}
|
||||
|
||||
return items.map((aiItem, idx) => this.normalizeAiItem(aiItem, idx));
|
||||
return {
|
||||
items: items.map((aiItem, idx) => this.normalizeAiItem(aiItem, idx)),
|
||||
prompt,
|
||||
rawOutput: String(content),
|
||||
attemptsUsed: i + 1,
|
||||
};
|
||||
} catch (attemptErr) {
|
||||
lastError = attemptErr;
|
||||
if (debugSession) {
|
||||
@@ -454,14 +483,24 @@ ${truncatedText}`;
|
||||
const deduped: AiFlyerParseResult[] = [];
|
||||
|
||||
for (const item of items) {
|
||||
const normalizedName = item.normalizedName.trim();
|
||||
const normalizedBrand = (item.brand ?? '').trim().toLowerCase();
|
||||
const normalizedPrice = item.price == null ? '' : Number(item.price).toFixed(2);
|
||||
const normalizedPriceUnit = (item.priceUnit ?? '').trim().toLowerCase();
|
||||
const normalizedComparisonPrice =
|
||||
item.comparisonPrice == null ? '' : Number(item.comparisonPrice).toFixed(2);
|
||||
const normalizedComparisonUnit = (item.comparisonUnit ?? '').trim().toLowerCase();
|
||||
const offerSignature = this.offerSignature(item.offerText);
|
||||
|
||||
const key = [
|
||||
item.normalizedName,
|
||||
item.price ?? '',
|
||||
item.priceUnit ?? '',
|
||||
item.offerText ?? '',
|
||||
normalizedName,
|
||||
normalizedBrand,
|
||||
normalizedPrice,
|
||||
normalizedPriceUnit,
|
||||
normalizedComparisonPrice,
|
||||
normalizedComparisonUnit,
|
||||
offerSignature,
|
||||
item.isBundle ? '1' : '0',
|
||||
item.bundleWeight ?? '',
|
||||
JSON.stringify(item.bundleItems ?? []),
|
||||
].join('|');
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
@@ -471,6 +510,27 @@ ${truncatedText}`;
|
||||
return deduped;
|
||||
}
|
||||
|
||||
private offerSignature(offerText: string | null | undefined): string {
|
||||
if (!offerText || offerText.trim().length === 0) return '';
|
||||
|
||||
const normalized = offerText
|
||||
.toLowerCase()
|
||||
.normalize('NFD')
|
||||
.replace(/[\u0300-\u036f]/g, '')
|
||||
.replace(/[^a-z0-9\s]/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
if (!normalized) return '';
|
||||
|
||||
const hasCampaignMarkers =
|
||||
/(max|hogst|begransat|hushall|kund|kop|for|betala|ta)/.test(normalized)
|
||||
|| /(\d+\s*for\s*\d+)/.test(normalized)
|
||||
|| /(ta\s*\d+\s*betala\s*for\s*\d+)/.test(normalized);
|
||||
|
||||
return hasCampaignMarkers ? normalized : '';
|
||||
}
|
||||
|
||||
private readPositiveIntEnv(key: string, fallback: number): number {
|
||||
const raw = process.env[key];
|
||||
if (!raw) return fallback;
|
||||
|
||||
Reference in New Issue
Block a user