feat: OG image scraper - auto-fetch recipe images from Pinterest/URLs

- New backend service: og-scraper.service.ts (extracts og:image, og:title, og:description) - Pinterest support via Twitterbot UA (gets original resolution from i.pinimg.com) - Works with Chefkoch, Allrecipes, blogs, any site with og:image meta tags - GET /api/og-preview?url= for preview - POST /api/recipes/:id/fetch-image to download + process with sharp - Frontend: 'Bild holen' button appears when source URL is filled - Auto-fills title & description from OG data if empty - Images processed to WebP, max 1200px wide
2026-02-18 10:15:18 +00:00
parent ee452efa6a
commit 60ca01fb94
8 changed files with 216 additions and 10 deletions
--- a/backend/data/images/recipes/01KHPTHD73XRVXTKRMFMAWAPA9/hero.webp
+++ b/backend/data/images/recipes/01KHPTHD73XRVXTKRMFMAWAPA9/hero.webp
--- a/backend/data/recipes.db-shm
+++ b/backend/data/recipes.db-shm
--- a/backend/data/recipes.db-wal
+++ b/backend/data/recipes.db-wal
--- a/backend/src/app.ts
+++ b/backend/src/app.ts
@@ -8,6 +8,7 @@ import { shoppingRoutes } from './routes/shopping.js';
 import { tagRoutes } from './routes/tags.js';
 import { imageRoutes } from './routes/images.js';
 import { botRoutes } from './routes/bot.js';
+import { ogScrapeRoutes } from './routes/og-scrape.js';

 export async function buildApp() {
  const app = Fastify({ logger: true });
@@ -39,5 +40,8 @@ export async function buildApp() {
  await app.register(botRoutes);
  await app.after();

+  await app.register(ogScrapeRoutes);
+  await app.after();
+
  return app;
 }
--- a/backend/src/routes/og-scrape.ts
+++ b/backend/src/routes/og-scrape.ts
@@ -0,0 +1,76 @@
+import { FastifyInstance } from 'fastify';
+import { scrapeOgData } from '../services/og-scraper.service.js';
+import { getDb } from '../db/connection.js';
+import sharp from 'sharp';
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const DATA_DIR = path.resolve(__dirname, '../../data');
+
+export async function ogScrapeRoutes(app: FastifyInstance) {
+  // Preview: Just fetch OG data without downloading
+  app.get('/api/og-preview', async (request, reply) => {
+    const { url } = request.query as { url?: string };
+    if (!url) return reply.status(400).send({ error: 'url parameter required' });
+
+    try {
+      const data = await scrapeOgData(url);
+      return data;
+    } catch (err: any) {
+      return reply.status(502).send({ error: `Failed to scrape: ${err.message}` });
+    }
+  });
+
+  // Download OG image and attach to recipe
+  app.post('/api/recipes/:id/fetch-image', async (request, reply) => {
+    const { id } = request.params as { id: string };
+    const { url } = request.body as { url: string };
+
+    if (!url) return reply.status(400).send({ error: 'url required' });
+
+    const db = getDb();
+    const recipe = db.prepare('SELECT id FROM recipes WHERE id = ?').get(id) as any;
+    if (!recipe) return reply.status(404).send({ error: 'Recipe not found' });
+
+    try {
+      // Scrape OG data
+      const ogData = await scrapeOgData(url);
+      if (!ogData.image) return reply.status(404).send({ error: 'No image found at URL' });
+
+      // Download image
+      const imgRes = await fetch(ogData.image, {
+        headers: { 'User-Agent': 'Mozilla/5.0' },
+        signal: AbortSignal.timeout(15000),
+      });
+      if (!imgRes.ok) throw new Error(`Image download failed: ${imgRes.status}`);
+
+      const buffer = Buffer.from(await imgRes.arrayBuffer());
+
+      // Process with sharp → WebP, max 1200px wide
+      const imgDir = path.join(DATA_DIR, 'images', 'recipes', id);
+      fs.mkdirSync(imgDir, { recursive: true });
+      const imgPath = path.join(imgDir, 'hero.webp');
+
+      await sharp(buffer)
+        .resize(1200, null, { withoutEnlargement: true })
+        .webp({ quality: 85 })
+        .toFile(imgPath);
+
+      // Update recipe
+      const imageUrl = `/images/recipes/${id}/hero.webp`;
+      db.prepare('UPDATE recipes SET image_url = ?, source_url = ?, updated_at = datetime(\'now\') WHERE id = ?')
+        .run(imageUrl, url, id);
+
+      return {
+        ok: true,
+        image_url: imageUrl,
+        og_title: ogData.title,
+        og_description: ogData.description,
+      };
+    } catch (err: any) {
+      return reply.status(502).send({ error: `Failed: ${err.message}` });
+    }
+  });
+}
--- a/backend/src/services/og-scraper.service.ts
+++ b/backend/src/services/og-scraper.service.ts
@@ -0,0 +1,73 @@
+/**
+ * OG Image Scraper — holt og:image aus beliebiger URL
+ * Funktioniert mit Pinterest, Chefkoch, Allrecipes, Blogs etc.
+ */
+
+const USER_AGENTS: Record<string, string> = {
+  'pinterest.com': 'Twitterbot/1.0',
+  'pinterest.de': 'Twitterbot/1.0',
+  'pin.it': 'Twitterbot/1.0',
+  default: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+};
+
+function getUserAgent(url: string): string {
+  for (const [domain, ua] of Object.entries(USER_AGENTS)) {
+    if (domain !== 'default' && url.includes(domain)) return ua;
+  }
+  return USER_AGENTS.default;
+}
+
+export interface OgData {
+  image?: string;
+  title?: string;
+  description?: string;
+}
+
+export async function scrapeOgData(url: string): Promise<OgData> {
+  const ua = getUserAgent(url);
+
+  const res = await fetch(url, {
+    headers: { 'User-Agent': ua },
+    redirect: 'follow',
+    signal: AbortSignal.timeout(10000),
+  });
+
+  if (!res.ok) throw new Error(`HTTP ${res.status}`);
+
+  const html = await res.text();
+  const result: OgData = {};
+
+  // Extract og:image
+  const imageMatch = html.match(/<meta[^>]+(?:property|name)="og:image"[^>]+content="([^"]+)"/i)
+    || html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:image"/i);
+  if (imageMatch) result.image = imageMatch[1];
+
+  // Extract og:title
+  const titleMatch = html.match(/<meta[^>]+(?:property|name)="og:title"[^>]+content="([^"]+)"/i)
+    || html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:title"/i);
+  if (titleMatch) result.title = decodeHtmlEntities(titleMatch[1]);
+
+  // Extract og:description
+  const descMatch = html.match(/<meta[^>]+(?:property|name)="og:description"[^>]+content="([^"]+)"/i)
+    || html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:description"/i);
+  if (descMatch) result.description = decodeHtmlEntities(descMatch[1]);
+
+  // Pinterest special: higher-res image from JSON data
+  if (url.includes('pinterest') && result.image) {
+    // Try to get orig resolution instead of 736x
+    const origMatch = html.match(/"orig":\s*\{[^}]*"url"\s*:\s*"([^"]+)"/);
+    if (origMatch) result.image = origMatch[1];
+  }
+
+  return result;
+}
+
+function decodeHtmlEntities(str: string): string {
+  return str
+    .replace(/&amp;/g, '&')
+    .replace(/&lt;/g, '<')
+    .replace(/&gt;/g, '>')
+    .replace(/&quot;/g, '"')
+    .replace(/&#39;/g, "'")
+    .replace(/&#x27;/g, "'");
+}