feat: OG image scraper - auto-fetch recipe images from Pinterest/URLs

- New backend service: og-scraper.service.ts (extracts og:image, og:title, og:description)
- Pinterest support via Twitterbot UA (gets original resolution from i.pinimg.com)
- Works with Chefkoch, Allrecipes, blogs, any site with og:image meta tags
- GET /api/og-preview?url= for preview
- POST /api/recipes/:id/fetch-image to download + process with sharp
- Frontend: 'Bild holen' button appears when source URL is filled
- Auto-fills title & description from OG data if empty
- Images processed to WebP, max 1200px wide
This commit is contained in:
clawd
2026-02-18 10:15:18 +00:00
parent ee452efa6a
commit 60ca01fb94
8 changed files with 216 additions and 10 deletions

View File

@@ -0,0 +1,73 @@
/**
* OG Image Scraper — holt og:image aus beliebiger URL
* Funktioniert mit Pinterest, Chefkoch, Allrecipes, Blogs etc.
*/
const USER_AGENTS: Record<string, string> = {
'pinterest.com': 'Twitterbot/1.0',
'pinterest.de': 'Twitterbot/1.0',
'pin.it': 'Twitterbot/1.0',
default: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
};
function getUserAgent(url: string): string {
for (const [domain, ua] of Object.entries(USER_AGENTS)) {
if (domain !== 'default' && url.includes(domain)) return ua;
}
return USER_AGENTS.default;
}
export interface OgData {
image?: string;
title?: string;
description?: string;
}
export async function scrapeOgData(url: string): Promise<OgData> {
const ua = getUserAgent(url);
const res = await fetch(url, {
headers: { 'User-Agent': ua },
redirect: 'follow',
signal: AbortSignal.timeout(10000),
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const html = await res.text();
const result: OgData = {};
// Extract og:image
const imageMatch = html.match(/<meta[^>]+(?:property|name)="og:image"[^>]+content="([^"]+)"/i)
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:image"/i);
if (imageMatch) result.image = imageMatch[1];
// Extract og:title
const titleMatch = html.match(/<meta[^>]+(?:property|name)="og:title"[^>]+content="([^"]+)"/i)
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:title"/i);
if (titleMatch) result.title = decodeHtmlEntities(titleMatch[1]);
// Extract og:description
const descMatch = html.match(/<meta[^>]+(?:property|name)="og:description"[^>]+content="([^"]+)"/i)
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:description"/i);
if (descMatch) result.description = decodeHtmlEntities(descMatch[1]);
// Pinterest special: higher-res image from JSON data
if (url.includes('pinterest') && result.image) {
// Try to get orig resolution instead of 736x
const origMatch = html.match(/"orig":\s*\{[^}]*"url"\s*:\s*"([^"]+)"/);
if (origMatch) result.image = origMatch[1];
}
return result;
}
function decodeHtmlEntities(str: string): string {
return str
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&#x27;/g, "'");
}