feat: OG image scraper - auto-fetch recipe images from Pinterest/URLs
- New backend service: og-scraper.service.ts (extracts og:image, og:title, og:description) - Pinterest support via Twitterbot UA (gets original resolution from i.pinimg.com) - Works with Chefkoch, Allrecipes, blogs, any site with og:image meta tags - GET /api/og-preview?url= for preview - POST /api/recipes/:id/fetch-image to download + process with sharp - Frontend: 'Bild holen' button appears when source URL is filled - Auto-fills title & description from OG data if empty - Images processed to WebP, max 1200px wide
This commit is contained in:
73
backend/src/services/og-scraper.service.ts
Normal file
73
backend/src/services/og-scraper.service.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
/**
|
||||
* OG Image Scraper — holt og:image aus beliebiger URL
|
||||
* Funktioniert mit Pinterest, Chefkoch, Allrecipes, Blogs etc.
|
||||
*/
|
||||
|
||||
const USER_AGENTS: Record<string, string> = {
|
||||
'pinterest.com': 'Twitterbot/1.0',
|
||||
'pinterest.de': 'Twitterbot/1.0',
|
||||
'pin.it': 'Twitterbot/1.0',
|
||||
default: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
};
|
||||
|
||||
function getUserAgent(url: string): string {
|
||||
for (const [domain, ua] of Object.entries(USER_AGENTS)) {
|
||||
if (domain !== 'default' && url.includes(domain)) return ua;
|
||||
}
|
||||
return USER_AGENTS.default;
|
||||
}
|
||||
|
||||
export interface OgData {
|
||||
image?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export async function scrapeOgData(url: string): Promise<OgData> {
|
||||
const ua = getUserAgent(url);
|
||||
|
||||
const res = await fetch(url, {
|
||||
headers: { 'User-Agent': ua },
|
||||
redirect: 'follow',
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
|
||||
const html = await res.text();
|
||||
const result: OgData = {};
|
||||
|
||||
// Extract og:image
|
||||
const imageMatch = html.match(/<meta[^>]+(?:property|name)="og:image"[^>]+content="([^"]+)"/i)
|
||||
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:image"/i);
|
||||
if (imageMatch) result.image = imageMatch[1];
|
||||
|
||||
// Extract og:title
|
||||
const titleMatch = html.match(/<meta[^>]+(?:property|name)="og:title"[^>]+content="([^"]+)"/i)
|
||||
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:title"/i);
|
||||
if (titleMatch) result.title = decodeHtmlEntities(titleMatch[1]);
|
||||
|
||||
// Extract og:description
|
||||
const descMatch = html.match(/<meta[^>]+(?:property|name)="og:description"[^>]+content="([^"]+)"/i)
|
||||
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:description"/i);
|
||||
if (descMatch) result.description = decodeHtmlEntities(descMatch[1]);
|
||||
|
||||
// Pinterest special: higher-res image from JSON data
|
||||
if (url.includes('pinterest') && result.image) {
|
||||
// Try to get orig resolution instead of 736x
|
||||
const origMatch = html.match(/"orig":\s*\{[^}]*"url"\s*:\s*"([^"]+)"/);
|
||||
if (origMatch) result.image = origMatch[1];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function decodeHtmlEntities(str: string): string {
|
||||
return str
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
Reference in New Issue
Block a user