feat(import): enhance image URL handling and error reporting during recipe import
This commit is contained in:
@@ -20,19 +20,24 @@ export async function downloadAndOptimizeImage(
|
||||
sourceUrl: string,
|
||||
destDir: string,
|
||||
): Promise<string> {
|
||||
// Protokollvalidering
|
||||
if (!sourceUrl.startsWith('https://')) {
|
||||
throw new Error('Bild-URL måste använda https://');
|
||||
}
|
||||
const raw = sourceUrl.trim();
|
||||
const protocolNormalized = raw.startsWith('//') ? `https:${raw}` : raw;
|
||||
|
||||
// SSRF: blockera privata hostnames
|
||||
let hostname: string;
|
||||
let parsedUrl: URL;
|
||||
try {
|
||||
hostname = new URL(sourceUrl).hostname;
|
||||
parsedUrl = new URL(protocolNormalized);
|
||||
} catch {
|
||||
throw new Error('Ogiltig bild-URL');
|
||||
}
|
||||
|
||||
// Protokollvalidering
|
||||
if (parsedUrl.protocol !== 'https:') {
|
||||
throw new Error('Bild-URL måste använda https://');
|
||||
}
|
||||
|
||||
const hostname = parsedUrl.hostname;
|
||||
|
||||
if (BLOCKED_HOSTNAMES.test(hostname)) {
|
||||
throw new Error('Bild-URL pekar på ett blockerat nätverk');
|
||||
}
|
||||
@@ -42,7 +47,7 @@ export async function downloadAndOptimizeImage(
|
||||
const timeout = setTimeout(() => controller.abort(), 10_000);
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(sourceUrl, {
|
||||
response = await fetch(parsedUrl.toString(), {
|
||||
signal: controller.signal,
|
||||
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; RecipeApp/1.0)' },
|
||||
});
|
||||
|
||||
@@ -19,24 +19,22 @@ export class GenericRecipeParser extends RecipeParser {
|
||||
// Extrahera og:image för bildurl-fallback
|
||||
const ogImage = this.extractOgImage(html);
|
||||
|
||||
// Försöka extrahera JSON-LD recipe data
|
||||
const jsonLdMatch = html.match(
|
||||
/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/i
|
||||
);
|
||||
// Försöka extrahera JSON-LD recipe data (flera script-taggar är vanligt)
|
||||
const jsonLdRegex =
|
||||
/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi;
|
||||
let jsonLdMatch: RegExpExecArray | null;
|
||||
while ((jsonLdMatch = jsonLdRegex.exec(html)) !== null) {
|
||||
const rawJson = jsonLdMatch[1]?.trim();
|
||||
if (!rawJson) continue;
|
||||
|
||||
if (jsonLdMatch) {
|
||||
try {
|
||||
const jsonData = JSON.parse(jsonLdMatch[1]);
|
||||
const recipe =
|
||||
jsonData['@type'] === 'Recipe'
|
||||
? jsonData
|
||||
: jsonData['@graph']?.find((item: any) => item['@type'] === 'Recipe');
|
||||
|
||||
const parsedJson = JSON.parse(rawJson);
|
||||
const recipe = this.findRecipeInJsonLd(parsedJson);
|
||||
if (recipe) {
|
||||
this.logger.log('JSON-LD data found');
|
||||
return this.extractFromJsonLd(recipe, ogImage);
|
||||
}
|
||||
} catch (err) {
|
||||
} catch {
|
||||
this.logger.warn('JSON-LD parsing failed');
|
||||
}
|
||||
}
|
||||
@@ -45,6 +43,37 @@ export class GenericRecipeParser extends RecipeParser {
|
||||
return this.parseFromHtml(html, ogImage);
|
||||
}
|
||||
|
||||
private findRecipeInJsonLd(jsonData: any): any {
|
||||
if (!jsonData) return null;
|
||||
|
||||
if (Array.isArray(jsonData)) {
|
||||
for (const item of jsonData) {
|
||||
const recipe = this.findRecipeInJsonLd(item);
|
||||
if (recipe) return recipe;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (jsonData['@type'] === 'Recipe') {
|
||||
return jsonData;
|
||||
}
|
||||
|
||||
if (Array.isArray(jsonData['@type']) && jsonData['@type'].includes('Recipe')) {
|
||||
return jsonData;
|
||||
}
|
||||
|
||||
const graph = jsonData['@graph'];
|
||||
if (Array.isArray(graph)) {
|
||||
return graph.find(
|
||||
(item: any) =>
|
||||
item?.['@type'] === 'Recipe' ||
|
||||
(Array.isArray(item?.['@type']) && item['@type'].includes('Recipe')),
|
||||
) ?? null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private extractOgImage(html: string): string | undefined {
|
||||
const match = html.match(/<meta[^>]+property="og:image"[^>]+content="([^"]+)"/i)
|
||||
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+property="og:image"/i);
|
||||
|
||||
@@ -17,21 +17,17 @@ export class IcaRecipeParser extends RecipeParser {
|
||||
// Extrahera og:image för bildurl-fallback
|
||||
const ogImage = this.extractOgImage(html);
|
||||
|
||||
// Försöka extrahera JSON-LD recipe data (ICA använder detta)
|
||||
const jsonLdMatch = html.match(
|
||||
/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/i
|
||||
);
|
||||
// Försöka extrahera JSON-LD recipe data (ICA använder ofta flera script-taggar)
|
||||
const jsonLdRegex =
|
||||
/<script[^>]*type="application\/ld\+json"[^>]*>([\s\S]*?)<\/script>/gi;
|
||||
let jsonLdMatch: RegExpExecArray | null;
|
||||
while ((jsonLdMatch = jsonLdRegex.exec(html)) !== null) {
|
||||
const rawJson = jsonLdMatch[1]?.trim();
|
||||
if (!rawJson) continue;
|
||||
|
||||
if (jsonLdMatch) {
|
||||
try {
|
||||
const jsonData = JSON.parse(jsonLdMatch[1]);
|
||||
|
||||
// Hitta recipe-objektet
|
||||
const recipe =
|
||||
jsonData['@type'] === 'Recipe'
|
||||
? jsonData
|
||||
: jsonData['@graph']?.find((item: any) => item['@type'] === 'Recipe');
|
||||
|
||||
const parsedJson = JSON.parse(rawJson);
|
||||
const recipe = this.findRecipeInJsonLd(parsedJson);
|
||||
if (recipe) {
|
||||
this.logger.log('JSON-LD recipe found');
|
||||
return this.extractFromJsonLd(recipe, ogImage);
|
||||
@@ -46,6 +42,37 @@ export class IcaRecipeParser extends RecipeParser {
|
||||
return this.parseFromHtml(html, ogImage);
|
||||
}
|
||||
|
||||
private findRecipeInJsonLd(jsonData: any): any {
|
||||
if (!jsonData) return null;
|
||||
|
||||
if (Array.isArray(jsonData)) {
|
||||
for (const item of jsonData) {
|
||||
const recipe = this.findRecipeInJsonLd(item);
|
||||
if (recipe) return recipe;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (jsonData['@type'] === 'Recipe') {
|
||||
return jsonData;
|
||||
}
|
||||
|
||||
if (Array.isArray(jsonData['@type']) && jsonData['@type'].includes('Recipe')) {
|
||||
return jsonData;
|
||||
}
|
||||
|
||||
const graph = jsonData['@graph'];
|
||||
if (Array.isArray(graph)) {
|
||||
return graph.find(
|
||||
(item: any) =>
|
||||
item?.['@type'] === 'Recipe' ||
|
||||
(Array.isArray(item?.['@type']) && item['@type'].includes('Recipe')),
|
||||
) ?? null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private extractOgImage(html: string): string | undefined {
|
||||
const match = html.match(/<meta[^>]+property="og:image"[^>]+content="([^"]+)"/i)
|
||||
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+property="og:image"/i);
|
||||
|
||||
@@ -20,6 +20,7 @@ export interface QuickImportResult {
|
||||
markdown: string;
|
||||
source: 'ica' | 'pdf' | 'image' | 'other';
|
||||
imageUrl?: string;
|
||||
imageWarning?: string;
|
||||
}
|
||||
|
||||
type UploadKind = 'pdf' | 'image';
|
||||
@@ -317,12 +318,24 @@ export class QuickImportService {
|
||||
|
||||
// Ladda ner och optimera bild om parser hittade en
|
||||
let imageUrl: string | undefined;
|
||||
let imageWarning: string | undefined;
|
||||
if (recipe.imageUrl) {
|
||||
try {
|
||||
imageUrl = await downloadAndOptimizeImage(recipe.imageUrl, IMAGE_DEST_DIR);
|
||||
this.logger.log(`Bild optimerad och sparad: ${imageUrl}`);
|
||||
} catch (imgErr) {
|
||||
this.logger.warn(`Kunde inte ladda ner bild: ${imgErr}`);
|
||||
const normalizedImageUrl = this.normalizeImageUrl(recipe.imageUrl, url);
|
||||
if (!normalizedImageUrl) {
|
||||
imageWarning = 'Receptbild kunde inte tolkas till en giltig URL.';
|
||||
this.logger.warn(
|
||||
`Kunde inte normalisera bild-URL: "${recipe.imageUrl}" (källsida: ${url})`,
|
||||
);
|
||||
} else {
|
||||
try {
|
||||
imageUrl = await downloadAndOptimizeImage(normalizedImageUrl, IMAGE_DEST_DIR);
|
||||
this.logger.log(`Bild optimerad och sparad: ${imageUrl}`);
|
||||
} catch (imgErr) {
|
||||
imageWarning = 'Receptbild kunde inte laddas ner.';
|
||||
this.logger.warn(
|
||||
`Kunde inte ladda ner bild: ${imgErr} (källa: ${normalizedImageUrl})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -330,6 +343,7 @@ export class QuickImportService {
|
||||
markdown,
|
||||
source,
|
||||
imageUrl,
|
||||
imageWarning,
|
||||
};
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : 'Okänt fel vid scraping';
|
||||
@@ -340,6 +354,20 @@ export class QuickImportService {
|
||||
}
|
||||
}
|
||||
|
||||
private normalizeImageUrl(rawImageUrl: string, pageUrl: string): string | null {
|
||||
const trimmed = rawImageUrl.trim();
|
||||
if (!trimmed) return null;
|
||||
|
||||
const protocolNormalized =
|
||||
trimmed.startsWith('//') ? `https:${trimmed}` : trimmed;
|
||||
|
||||
try {
|
||||
return new URL(protocolNormalized, pageUrl).toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Konvertera receptobjekt till Markdown-format
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user