Update flyerimport. flutter timeout 300 sek
This commit is contained in:
@@ -21,3 +21,5 @@ JWT_SECRET=uK9yRQpyyWOcHYcYbpAdsJ7NJcEsyCYZcgF82OnBz2k=
|
|||||||
MISTRAL_API_KEY=JGPjLuNnzaLSYMxKbexLZohUOegrSLye
|
MISTRAL_API_KEY=JGPjLuNnzaLSYMxKbexLZohUOegrSLye
|
||||||
FLYER_AI_TIMEOUT_MS=45000
|
FLYER_AI_TIMEOUT_MS=45000
|
||||||
FLYER_AI_RETRIES=2
|
FLYER_AI_RETRIES=2
|
||||||
|
FLYER_AI_DEBUG=1
|
||||||
|
FLYER_AI_DEBUG_DIR=/app/debug
|
||||||
|
|||||||
@@ -21,6 +21,9 @@ JWT_SECRET=byt-ut-mig
|
|||||||
MISTRAL_API_KEY=
|
MISTRAL_API_KEY=
|
||||||
FLYER_AI_TIMEOUT_MS=45000
|
FLYER_AI_TIMEOUT_MS=45000
|
||||||
FLYER_AI_RETRIES=2
|
FLYER_AI_RETRIES=2
|
||||||
|
FLYER_AI_DEBUG=0
|
||||||
|
# Linux-container: /app/debug, lokalt: ./debug
|
||||||
|
FLYER_AI_DEBUG_DIR=/app/debug
|
||||||
|
|
||||||
# Publik URL (används av frontend)
|
# Publik URL (används av frontend)
|
||||||
NEXT_PUBLIC_APP_URL=https://recept.gynther.se
|
NEXT_PUBLIC_APP_URL=https://recept.gynther.se
|
||||||
|
|||||||
Generated
+4
-4
@@ -39,7 +39,7 @@
|
|||||||
"@types/express": "^5.0.5",
|
"@types/express": "^5.0.5",
|
||||||
"@types/jest": "^29.5.14",
|
"@types/jest": "^29.5.14",
|
||||||
"@types/multer": "^1.4.12",
|
"@types/multer": "^1.4.12",
|
||||||
"@types/node": "^22.15.29",
|
"@types/node": "^22.19.19",
|
||||||
"@types/passport-jwt": "^4.0.1",
|
"@types/passport-jwt": "^4.0.1",
|
||||||
"@types/pdf-parse": "^1.1.5",
|
"@types/pdf-parse": "^1.1.5",
|
||||||
"@types/supertest": "^7.2.0",
|
"@types/supertest": "^7.2.0",
|
||||||
@@ -2783,9 +2783,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/node": {
|
"node_modules/@types/node": {
|
||||||
"version": "22.19.17",
|
"version": "22.19.19",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.17.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.19.tgz",
|
||||||
"integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==",
|
"integrity": "sha512-dyh/xO2Fh5bYrfWaaqGrRQQGkNdmYw6AmaAUvYeUMNTWQtvb796ikLdmTchRmOlOiIJ1TDXfWgVx1QkUlQ6Hew==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.21.0"
|
"undici-types": "~6.21.0"
|
||||||
|
|||||||
@@ -49,7 +49,7 @@
|
|||||||
"@types/express": "^5.0.5",
|
"@types/express": "^5.0.5",
|
||||||
"@types/jest": "^29.5.14",
|
"@types/jest": "^29.5.14",
|
||||||
"@types/multer": "^1.4.12",
|
"@types/multer": "^1.4.12",
|
||||||
"@types/node": "^22.15.29",
|
"@types/node": "^22.19.19",
|
||||||
"@types/passport-jwt": "^4.0.1",
|
"@types/passport-jwt": "^4.0.1",
|
||||||
"@types/pdf-parse": "^1.1.5",
|
"@types/pdf-parse": "^1.1.5",
|
||||||
"@types/supertest": "^7.2.0",
|
"@types/supertest": "^7.2.0",
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ import {
|
|||||||
Logger,
|
Logger,
|
||||||
ServiceUnavailableException,
|
ServiceUnavailableException,
|
||||||
} from '@nestjs/common';
|
} from '@nestjs/common';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
|
||||||
export interface AiFlyerParseResult {
|
export interface AiFlyerParseResult {
|
||||||
rawName: string;
|
rawName: string;
|
||||||
@@ -26,6 +28,8 @@ export class AiFlyerParserService {
|
|||||||
private readonly chunkSizeChars: number;
|
private readonly chunkSizeChars: number;
|
||||||
private readonly chunkOverlapChars: number;
|
private readonly chunkOverlapChars: number;
|
||||||
private readonly maxChunks: number;
|
private readonly maxChunks: number;
|
||||||
|
private readonly debugEnabled: boolean;
|
||||||
|
private readonly debugDirectory: string;
|
||||||
private mistral: any;
|
private mistral: any;
|
||||||
private apiKey: string;
|
private apiKey: string;
|
||||||
|
|
||||||
@@ -40,6 +44,8 @@ export class AiFlyerParserService {
|
|||||||
this.chunkSizeChars = this.readPositiveIntEnv('FLYER_AI_CHUNK_SIZE_CHARS', 3_000);
|
this.chunkSizeChars = this.readPositiveIntEnv('FLYER_AI_CHUNK_SIZE_CHARS', 3_000);
|
||||||
this.chunkOverlapChars = this.readPositiveIntEnv('FLYER_AI_CHUNK_OVERLAP_CHARS', 300);
|
this.chunkOverlapChars = this.readPositiveIntEnv('FLYER_AI_CHUNK_OVERLAP_CHARS', 300);
|
||||||
this.maxChunks = this.readPositiveIntEnv('FLYER_AI_MAX_CHUNKS', 8);
|
this.maxChunks = this.readPositiveIntEnv('FLYER_AI_MAX_CHUNKS', 8);
|
||||||
|
this.debugEnabled = this.readBooleanEnv('FLYER_AI_DEBUG', false);
|
||||||
|
this.debugDirectory = process.env.FLYER_AI_DEBUG_DIR?.trim() || path.join(process.cwd(), 'debug');
|
||||||
}
|
}
|
||||||
|
|
||||||
private async getClient(): Promise<any> {
|
private async getClient(): Promise<any> {
|
||||||
@@ -60,19 +66,61 @@ export class AiFlyerParserService {
|
|||||||
throw new BadRequestException('Flyer-texten är tom. Kan inte fortsätta.');
|
throw new BadRequestException('Flyer-texten är tom. Kan inte fortsätta.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const debugSession = this.createDebugSession('AI-flyerimporter');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
if (debugSession) {
|
||||||
|
await this.writeDebugFile(
|
||||||
|
debugSession,
|
||||||
|
`${debugSession.baseName}-input.txt`,
|
||||||
|
text,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const client = await this.getClient();
|
const client = await this.getClient();
|
||||||
const chunks = this.splitIntoChunks(text);
|
const chunks = this.splitIntoChunks(text);
|
||||||
this.logger.debug(`Parsing flyer text in ${chunks.length} chunk(s)`);
|
this.logger.debug(`Parsing flyer text in ${chunks.length} chunk(s)`);
|
||||||
|
|
||||||
|
if (debugSession) {
|
||||||
|
await this.writeDebugFile(
|
||||||
|
debugSession,
|
||||||
|
`${debugSession.baseName}-chunks.json`,
|
||||||
|
JSON.stringify(chunks, null, 2),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const allItems: AiFlyerParseResult[] = [];
|
const allItems: AiFlyerParseResult[] = [];
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
const chunkItems = await this.parseChunkWithRetry(client, chunks[i], i + 1, chunks.length);
|
const chunkItems = await this.parseChunkWithRetry(
|
||||||
|
client,
|
||||||
|
chunks[i],
|
||||||
|
i + 1,
|
||||||
|
chunks.length,
|
||||||
|
debugSession,
|
||||||
|
);
|
||||||
allItems.push(...chunkItems);
|
allItems.push(...chunkItems);
|
||||||
}
|
}
|
||||||
|
|
||||||
return this.dedupeItems(allItems);
|
const deduped = this.dedupeItems(allItems);
|
||||||
|
|
||||||
|
if (debugSession) {
|
||||||
|
await this.writeDebugFile(
|
||||||
|
debugSession,
|
||||||
|
`${debugSession.baseName}-result.json`,
|
||||||
|
JSON.stringify(deduped, null, 2),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return deduped;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
if (debugSession) {
|
||||||
|
await this.writeDebugFile(
|
||||||
|
debugSession,
|
||||||
|
`${debugSession.baseName}-error.txt`,
|
||||||
|
this.toErrorMessage(err),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
if (err instanceof SyntaxError) {
|
if (err instanceof SyntaxError) {
|
||||||
this.logger.error(`JSON parse error: ${String(err)}`);
|
this.logger.error(`JSON parse error: ${String(err)}`);
|
||||||
throw new BadRequestException('AI returnerade ogiltigt JSON. Försök igen.');
|
throw new BadRequestException('AI returnerade ogiltigt JSON. Försök igen.');
|
||||||
@@ -155,11 +203,9 @@ Exempel på utdata:
|
|||||||
* Rensa AI-svaret för att kunna parse som JSON.
|
* Rensa AI-svaret för att kunna parse som JSON.
|
||||||
*/
|
*/
|
||||||
private sanitizeJsonResponse(content: string): string {
|
private sanitizeJsonResponse(content: string): string {
|
||||||
// Ta bort markdown fences
|
|
||||||
let cleaned = content.replace(/```json\n?/g, '').replace(/```\n?/g, '');
|
let cleaned = content.replace(/```json\n?/g, '').replace(/```\n?/g, '');
|
||||||
cleaned = cleaned.trim();
|
cleaned = cleaned.trim();
|
||||||
|
|
||||||
// Försök att extrahera JSON om det finns omgivande text
|
|
||||||
const jsonMatch = cleaned.match(/\[[\s\S]*\]/);
|
const jsonMatch = cleaned.match(/\[[\s\S]*\]/);
|
||||||
if (jsonMatch) {
|
if (jsonMatch) {
|
||||||
cleaned = jsonMatch[0];
|
cleaned = jsonMatch[0];
|
||||||
@@ -205,14 +251,11 @@ Exempel på utdata:
|
|||||||
comparisonPrice: toNumber(item.comparisonPrice),
|
comparisonPrice: toNumber(item.comparisonPrice),
|
||||||
comparisonUnit: toString(item.comparisonUnit),
|
comparisonUnit: toString(item.comparisonUnit),
|
||||||
offerText: toString(item.offer) || (toArray(item.offer).join(' ') || null),
|
offerText: toString(item.offer) || (toArray(item.offer).join(' ') || null),
|
||||||
confidence: 0.85, // AI-parse får medelhög confidence
|
confidence: 0.85,
|
||||||
reasonCodes: ['ai_parsed'],
|
reasonCodes: ['ai_parsed'],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Enkel normalisering av produktnamn.
|
|
||||||
*/
|
|
||||||
private normalizeName(name: string): string {
|
private normalizeName(name: string): string {
|
||||||
return name
|
return name
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
@@ -247,6 +290,7 @@ Exempel på utdata:
|
|||||||
chunkText: string,
|
chunkText: string,
|
||||||
chunkIndex: number,
|
chunkIndex: number,
|
||||||
totalChunks: number,
|
totalChunks: number,
|
||||||
|
debugSession: { dirPath: string; baseName: string } | null,
|
||||||
): Promise<AiFlyerParseResult[]> {
|
): Promise<AiFlyerParseResult[]> {
|
||||||
const textWindows = [3000, 2200, 1600];
|
const textWindows = [3000, 2200, 1600];
|
||||||
const attempts = Math.max(1, Math.min(this.maxRetries + 1, textWindows.length));
|
const attempts = Math.max(1, Math.min(this.maxRetries + 1, textWindows.length));
|
||||||
@@ -261,6 +305,14 @@ Exempel på utdata:
|
|||||||
`Sending request to Mistral Tiny (chunk ${chunkIndex}/${totalChunks}, attempt ${i + 1}/${attempts}, timeout=${this.timeoutMs}ms, textWindow=${window})`,
|
`Sending request to Mistral Tiny (chunk ${chunkIndex}/${totalChunks}, attempt ${i + 1}/${attempts}, timeout=${this.timeoutMs}ms, textWindow=${window})`,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (debugSession) {
|
||||||
|
await this.writeDebugFile(
|
||||||
|
debugSession,
|
||||||
|
`${debugSession.baseName}-chunk-${chunkIndex}-attempt-${i + 1}-prompt.txt`,
|
||||||
|
prompt,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const response = await this.withTimeout<any>(
|
const response = await this.withTimeout<any>(
|
||||||
client.chat({
|
client.chat({
|
||||||
model: 'mistral-tiny',
|
model: 'mistral-tiny',
|
||||||
@@ -278,6 +330,14 @@ Exempel på utdata:
|
|||||||
|
|
||||||
this.logger.debug(`Mistral response length: ${content.length} chars`);
|
this.logger.debug(`Mistral response length: ${content.length} chars`);
|
||||||
|
|
||||||
|
if (debugSession) {
|
||||||
|
await this.writeDebugFile(
|
||||||
|
debugSession,
|
||||||
|
`${debugSession.baseName}-chunk-${chunkIndex}-attempt-${i + 1}-response.txt`,
|
||||||
|
String(content),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const jsonString = this.sanitizeJsonResponse(content);
|
const jsonString = this.sanitizeJsonResponse(content);
|
||||||
const items = JSON.parse(jsonString) as Array<Record<string, unknown>>;
|
const items = JSON.parse(jsonString) as Array<Record<string, unknown>>;
|
||||||
|
|
||||||
@@ -288,6 +348,13 @@ Exempel på utdata:
|
|||||||
return items.map((item, idx) => this.normalizeAiItem(item, idx));
|
return items.map((item, idx) => this.normalizeAiItem(item, idx));
|
||||||
} catch (attemptErr) {
|
} catch (attemptErr) {
|
||||||
lastError = attemptErr;
|
lastError = attemptErr;
|
||||||
|
if (debugSession) {
|
||||||
|
await this.writeDebugFile(
|
||||||
|
debugSession,
|
||||||
|
`${debugSession.baseName}-chunk-${chunkIndex}-attempt-${i + 1}-error.txt`,
|
||||||
|
this.toErrorMessage(attemptErr),
|
||||||
|
);
|
||||||
|
}
|
||||||
if (!this.isRetryableError(attemptErr) || i === attempts - 1) {
|
if (!this.isRetryableError(attemptErr) || i === attempts - 1) {
|
||||||
throw attemptErr;
|
throw attemptErr;
|
||||||
}
|
}
|
||||||
@@ -332,6 +399,44 @@ Exempel på utdata:
|
|||||||
return parsed;
|
return parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private readBooleanEnv(key: string, fallback: boolean): boolean {
|
||||||
|
const raw = process.env[key];
|
||||||
|
if (!raw) return fallback;
|
||||||
|
return ['1', 'true', 'yes', 'on'].includes(raw.trim().toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
private createDebugSession(prefix: string): { dirPath: string; baseName: string } | null {
|
||||||
|
if (!this.debugEnabled) return null;
|
||||||
|
const now = new Date();
|
||||||
|
const y = String(now.getFullYear()).slice(-2);
|
||||||
|
const m = String(now.getMonth() + 1).padStart(2, '0');
|
||||||
|
const d = String(now.getDate()).padStart(2, '0');
|
||||||
|
const hh = String(now.getHours()).padStart(2, '0');
|
||||||
|
const mm = String(now.getMinutes()).padStart(2, '0');
|
||||||
|
const ss = String(now.getSeconds()).padStart(2, '0');
|
||||||
|
const datePart = `${y}${m}${d}`;
|
||||||
|
const timePart = `${hh}${mm}${ss}`;
|
||||||
|
const baseName = `${prefix}-${datePart}-${timePart}`;
|
||||||
|
const dirPath = path.join(this.debugDirectory, baseName);
|
||||||
|
return { dirPath, baseName };
|
||||||
|
}
|
||||||
|
|
||||||
|
private async writeDebugFile(
|
||||||
|
debugSession: { dirPath: string; baseName: string } | null,
|
||||||
|
filename: string,
|
||||||
|
content: string,
|
||||||
|
): Promise<void> {
|
||||||
|
if (!debugSession) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await fs.promises.mkdir(debugSession.dirPath, { recursive: true });
|
||||||
|
const filePath = path.join(debugSession.dirPath, filename);
|
||||||
|
await fs.promises.writeFile(filePath, content, 'utf8');
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.warn(`Failed to write flyer debug file ${filename}: ${this.toErrorMessage(err)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private isRetryableError(err: unknown): boolean {
|
private isRetryableError(err: unknown): boolean {
|
||||||
if (err instanceof ServiceUnavailableException) return true;
|
if (err instanceof ServiceUnavailableException) return true;
|
||||||
const message = this.toErrorMessage(err).toLowerCase();
|
const message = this.toErrorMessage(err).toLowerCase();
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ services:
|
|||||||
MISTRAL_API_KEY: "${MISTRAL_API_KEY:-}"
|
MISTRAL_API_KEY: "${MISTRAL_API_KEY:-}"
|
||||||
FLYER_AI_TIMEOUT_MS: "${FLYER_AI_TIMEOUT_MS:-30000}"
|
FLYER_AI_TIMEOUT_MS: "${FLYER_AI_TIMEOUT_MS:-30000}"
|
||||||
FLYER_AI_RETRIES: "${FLYER_AI_RETRIES:-2}"
|
FLYER_AI_RETRIES: "${FLYER_AI_RETRIES:-2}"
|
||||||
|
FLYER_AI_DEBUG: "${FLYER_AI_DEBUG:-0}"
|
||||||
|
FLYER_AI_DEBUG_DIR: "${FLYER_AI_DEBUG_DIR:-/app/debug}"
|
||||||
JWT_SECRET: "${JWT_SECRET}"
|
JWT_SECRET: "${JWT_SECRET}"
|
||||||
ALLOWED_ORIGIN: "${NEXT_PUBLIC_APP_URL}"
|
ALLOWED_ORIGIN: "${NEXT_PUBLIC_APP_URL}"
|
||||||
ADMIN_NADMIN_PASSWORD: "${ADMIN_NADMIN_PASSWORD}"
|
ADMIN_NADMIN_PASSWORD: "${ADMIN_NADMIN_PASSWORD}"
|
||||||
|
|||||||
@@ -162,7 +162,7 @@ class ImportRepository {
|
|||||||
);
|
);
|
||||||
|
|
||||||
final streamed = await _client.send(request).timeout(
|
final streamed = await _client.send(request).timeout(
|
||||||
const Duration(seconds: 120),
|
const Duration(seconds: 300),
|
||||||
onTimeout: () {
|
onTimeout: () {
|
||||||
throw ApiException(
|
throw ApiException(
|
||||||
type: ApiErrorType.network,
|
type: ApiErrorType.network,
|
||||||
|
|||||||
Reference in New Issue
Block a user