/** * OG Image Scraper — holt og:image aus beliebiger URL * Funktioniert mit Pinterest, Chefkoch, Allrecipes, Blogs etc. */ const USER_AGENTS: Record = { 'pinterest.com': 'Twitterbot/1.0', 'pinterest.de': 'Twitterbot/1.0', 'pin.it': 'Twitterbot/1.0', default: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', }; function getUserAgent(url: string): string { for (const [domain, ua] of Object.entries(USER_AGENTS)) { if (domain !== 'default' && url.includes(domain)) return ua; } return USER_AGENTS.default; } export interface OgData { image?: string; title?: string; description?: string; } export async function scrapeOgData(url: string): Promise { const ua = getUserAgent(url); const res = await fetch(url, { headers: { 'User-Agent': ua }, redirect: 'follow', signal: AbortSignal.timeout(10000), }); if (!res.ok) throw new Error(`HTTP ${res.status}`); const html = await res.text(); const result: OgData = {}; // Extract og:image const imageMatch = html.match(/]+(?:property|name)="og:image"[^>]+content="([^"]+)"/i) || html.match(/]+content="([^"]+)"[^>]+(?:property|name)="og:image"/i); if (imageMatch) result.image = imageMatch[1]; // Extract og:title const titleMatch = html.match(/]+(?:property|name)="og:title"[^>]+content="([^"]+)"/i) || html.match(/]+content="([^"]+)"[^>]+(?:property|name)="og:title"/i); if (titleMatch) result.title = decodeHtmlEntities(titleMatch[1]); // Extract og:description const descMatch = html.match(/]+(?:property|name)="og:description"[^>]+content="([^"]+)"/i) || html.match(/]+content="([^"]+)"[^>]+(?:property|name)="og:description"/i); if (descMatch) result.description = decodeHtmlEntities(descMatch[1]); // Pinterest special: higher-res image from JSON data if (url.includes('pinterest') && result.image) { // Try to get orig resolution instead of 736x const origMatch = html.match(/"orig":\s*\{[^}]*"url"\s*:\s*"([^"]+)"/); if (origMatch) result.image = origMatch[1]; } return result; } function decodeHtmlEntities(str: string): string { return str .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/'/g, "'"); }