Update flyerimport. flutter timeout 300 sek
This commit is contained in:
Generated
+4
-4
@@ -39,7 +39,7 @@
|
||||
"@types/express": "^5.0.5",
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/multer": "^1.4.12",
|
||||
"@types/node": "^22.15.29",
|
||||
"@types/node": "^22.19.19",
|
||||
"@types/passport-jwt": "^4.0.1",
|
||||
"@types/pdf-parse": "^1.1.5",
|
||||
"@types/supertest": "^7.2.0",
|
||||
@@ -2783,9 +2783,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "22.19.17",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.17.tgz",
|
||||
"integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==",
|
||||
"version": "22.19.19",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.19.tgz",
|
||||
"integrity": "sha512-dyh/xO2Fh5bYrfWaaqGrRQQGkNdmYw6AmaAUvYeUMNTWQtvb796ikLdmTchRmOlOiIJ1TDXfWgVx1QkUlQ6Hew==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
"@types/express": "^5.0.5",
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/multer": "^1.4.12",
|
||||
"@types/node": "^22.15.29",
|
||||
"@types/node": "^22.19.19",
|
||||
"@types/passport-jwt": "^4.0.1",
|
||||
"@types/pdf-parse": "^1.1.5",
|
||||
"@types/supertest": "^7.2.0",
|
||||
|
||||
@@ -4,6 +4,8 @@ import {
|
||||
Logger,
|
||||
ServiceUnavailableException,
|
||||
} from '@nestjs/common';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
export interface AiFlyerParseResult {
|
||||
rawName: string;
|
||||
@@ -26,6 +28,8 @@ export class AiFlyerParserService {
|
||||
private readonly chunkSizeChars: number;
|
||||
private readonly chunkOverlapChars: number;
|
||||
private readonly maxChunks: number;
|
||||
private readonly debugEnabled: boolean;
|
||||
private readonly debugDirectory: string;
|
||||
private mistral: any;
|
||||
private apiKey: string;
|
||||
|
||||
@@ -40,6 +44,8 @@ export class AiFlyerParserService {
|
||||
this.chunkSizeChars = this.readPositiveIntEnv('FLYER_AI_CHUNK_SIZE_CHARS', 3_000);
|
||||
this.chunkOverlapChars = this.readPositiveIntEnv('FLYER_AI_CHUNK_OVERLAP_CHARS', 300);
|
||||
this.maxChunks = this.readPositiveIntEnv('FLYER_AI_MAX_CHUNKS', 8);
|
||||
this.debugEnabled = this.readBooleanEnv('FLYER_AI_DEBUG', false);
|
||||
this.debugDirectory = process.env.FLYER_AI_DEBUG_DIR?.trim() || path.join(process.cwd(), 'debug');
|
||||
}
|
||||
|
||||
private async getClient(): Promise<any> {
|
||||
@@ -60,19 +66,61 @@ export class AiFlyerParserService {
|
||||
throw new BadRequestException('Flyer-texten är tom. Kan inte fortsätta.');
|
||||
}
|
||||
|
||||
const debugSession = this.createDebugSession('AI-flyerimporter');
|
||||
|
||||
try {
|
||||
if (debugSession) {
|
||||
await this.writeDebugFile(
|
||||
debugSession,
|
||||
`${debugSession.baseName}-input.txt`,
|
||||
text,
|
||||
);
|
||||
}
|
||||
|
||||
const client = await this.getClient();
|
||||
const chunks = this.splitIntoChunks(text);
|
||||
this.logger.debug(`Parsing flyer text in ${chunks.length} chunk(s)`);
|
||||
|
||||
if (debugSession) {
|
||||
await this.writeDebugFile(
|
||||
debugSession,
|
||||
`${debugSession.baseName}-chunks.json`,
|
||||
JSON.stringify(chunks, null, 2),
|
||||
);
|
||||
}
|
||||
|
||||
const allItems: AiFlyerParseResult[] = [];
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const chunkItems = await this.parseChunkWithRetry(client, chunks[i], i + 1, chunks.length);
|
||||
const chunkItems = await this.parseChunkWithRetry(
|
||||
client,
|
||||
chunks[i],
|
||||
i + 1,
|
||||
chunks.length,
|
||||
debugSession,
|
||||
);
|
||||
allItems.push(...chunkItems);
|
||||
}
|
||||
|
||||
return this.dedupeItems(allItems);
|
||||
const deduped = this.dedupeItems(allItems);
|
||||
|
||||
if (debugSession) {
|
||||
await this.writeDebugFile(
|
||||
debugSession,
|
||||
`${debugSession.baseName}-result.json`,
|
||||
JSON.stringify(deduped, null, 2),
|
||||
);
|
||||
}
|
||||
|
||||
return deduped;
|
||||
} catch (err) {
|
||||
if (debugSession) {
|
||||
await this.writeDebugFile(
|
||||
debugSession,
|
||||
`${debugSession.baseName}-error.txt`,
|
||||
this.toErrorMessage(err),
|
||||
);
|
||||
}
|
||||
|
||||
if (err instanceof SyntaxError) {
|
||||
this.logger.error(`JSON parse error: ${String(err)}`);
|
||||
throw new BadRequestException('AI returnerade ogiltigt JSON. Försök igen.');
|
||||
@@ -155,11 +203,9 @@ Exempel på utdata:
|
||||
* Rensa AI-svaret för att kunna parse som JSON.
|
||||
*/
|
||||
private sanitizeJsonResponse(content: string): string {
|
||||
// Ta bort markdown fences
|
||||
let cleaned = content.replace(/```json\n?/g, '').replace(/```\n?/g, '');
|
||||
cleaned = cleaned.trim();
|
||||
|
||||
// Försök att extrahera JSON om det finns omgivande text
|
||||
const jsonMatch = cleaned.match(/\[[\s\S]*\]/);
|
||||
if (jsonMatch) {
|
||||
cleaned = jsonMatch[0];
|
||||
@@ -205,14 +251,11 @@ Exempel på utdata:
|
||||
comparisonPrice: toNumber(item.comparisonPrice),
|
||||
comparisonUnit: toString(item.comparisonUnit),
|
||||
offerText: toString(item.offer) || (toArray(item.offer).join(' ') || null),
|
||||
confidence: 0.85, // AI-parse får medelhög confidence
|
||||
confidence: 0.85,
|
||||
reasonCodes: ['ai_parsed'],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Enkel normalisering av produktnamn.
|
||||
*/
|
||||
private normalizeName(name: string): string {
|
||||
return name
|
||||
.toLowerCase()
|
||||
@@ -247,6 +290,7 @@ Exempel på utdata:
|
||||
chunkText: string,
|
||||
chunkIndex: number,
|
||||
totalChunks: number,
|
||||
debugSession: { dirPath: string; baseName: string } | null,
|
||||
): Promise<AiFlyerParseResult[]> {
|
||||
const textWindows = [3000, 2200, 1600];
|
||||
const attempts = Math.max(1, Math.min(this.maxRetries + 1, textWindows.length));
|
||||
@@ -261,6 +305,14 @@ Exempel på utdata:
|
||||
`Sending request to Mistral Tiny (chunk ${chunkIndex}/${totalChunks}, attempt ${i + 1}/${attempts}, timeout=${this.timeoutMs}ms, textWindow=${window})`,
|
||||
);
|
||||
|
||||
if (debugSession) {
|
||||
await this.writeDebugFile(
|
||||
debugSession,
|
||||
`${debugSession.baseName}-chunk-${chunkIndex}-attempt-${i + 1}-prompt.txt`,
|
||||
prompt,
|
||||
);
|
||||
}
|
||||
|
||||
const response = await this.withTimeout<any>(
|
||||
client.chat({
|
||||
model: 'mistral-tiny',
|
||||
@@ -278,6 +330,14 @@ Exempel på utdata:
|
||||
|
||||
this.logger.debug(`Mistral response length: ${content.length} chars`);
|
||||
|
||||
if (debugSession) {
|
||||
await this.writeDebugFile(
|
||||
debugSession,
|
||||
`${debugSession.baseName}-chunk-${chunkIndex}-attempt-${i + 1}-response.txt`,
|
||||
String(content),
|
||||
);
|
||||
}
|
||||
|
||||
const jsonString = this.sanitizeJsonResponse(content);
|
||||
const items = JSON.parse(jsonString) as Array<Record<string, unknown>>;
|
||||
|
||||
@@ -288,6 +348,13 @@ Exempel på utdata:
|
||||
return items.map((item, idx) => this.normalizeAiItem(item, idx));
|
||||
} catch (attemptErr) {
|
||||
lastError = attemptErr;
|
||||
if (debugSession) {
|
||||
await this.writeDebugFile(
|
||||
debugSession,
|
||||
`${debugSession.baseName}-chunk-${chunkIndex}-attempt-${i + 1}-error.txt`,
|
||||
this.toErrorMessage(attemptErr),
|
||||
);
|
||||
}
|
||||
if (!this.isRetryableError(attemptErr) || i === attempts - 1) {
|
||||
throw attemptErr;
|
||||
}
|
||||
@@ -332,6 +399,44 @@ Exempel på utdata:
|
||||
return parsed;
|
||||
}
|
||||
|
||||
private readBooleanEnv(key: string, fallback: boolean): boolean {
|
||||
const raw = process.env[key];
|
||||
if (!raw) return fallback;
|
||||
return ['1', 'true', 'yes', 'on'].includes(raw.trim().toLowerCase());
|
||||
}
|
||||
|
||||
private createDebugSession(prefix: string): { dirPath: string; baseName: string } | null {
|
||||
if (!this.debugEnabled) return null;
|
||||
const now = new Date();
|
||||
const y = String(now.getFullYear()).slice(-2);
|
||||
const m = String(now.getMonth() + 1).padStart(2, '0');
|
||||
const d = String(now.getDate()).padStart(2, '0');
|
||||
const hh = String(now.getHours()).padStart(2, '0');
|
||||
const mm = String(now.getMinutes()).padStart(2, '0');
|
||||
const ss = String(now.getSeconds()).padStart(2, '0');
|
||||
const datePart = `${y}${m}${d}`;
|
||||
const timePart = `${hh}${mm}${ss}`;
|
||||
const baseName = `${prefix}-${datePart}-${timePart}`;
|
||||
const dirPath = path.join(this.debugDirectory, baseName);
|
||||
return { dirPath, baseName };
|
||||
}
|
||||
|
||||
private async writeDebugFile(
|
||||
debugSession: { dirPath: string; baseName: string } | null,
|
||||
filename: string,
|
||||
content: string,
|
||||
): Promise<void> {
|
||||
if (!debugSession) return;
|
||||
|
||||
try {
|
||||
await fs.promises.mkdir(debugSession.dirPath, { recursive: true });
|
||||
const filePath = path.join(debugSession.dirPath, filename);
|
||||
await fs.promises.writeFile(filePath, content, 'utf8');
|
||||
} catch (err) {
|
||||
this.logger.warn(`Failed to write flyer debug file ${filename}: ${this.toErrorMessage(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
private isRetryableError(err: unknown): boolean {
|
||||
if (err instanceof ServiceUnavailableException) return true;
|
||||
const message = this.toErrorMessage(err).toLowerCase();
|
||||
|
||||
Reference in New Issue
Block a user