feat: OG image scraper - auto-fetch recipe images from Pinterest/URLs
- New backend service: og-scraper.service.ts (extracts og:image, og:title, og:description) - Pinterest support via Twitterbot UA (gets original resolution from i.pinimg.com) - Works with Chefkoch, Allrecipes, blogs, any site with og:image meta tags - GET /api/og-preview?url= for preview - POST /api/recipes/:id/fetch-image to download + process with sharp - Frontend: 'Bild holen' button appears when source URL is filled - Auto-fills title & description from OG data if empty - Images processed to WebP, max 1200px wide
This commit is contained in:
Binary file not shown.
|
Before Width: | Height: | Size: 110 B After Width: | Height: | Size: 158 KiB |
Binary file not shown.
Binary file not shown.
@@ -8,6 +8,7 @@ import { shoppingRoutes } from './routes/shopping.js';
|
||||
import { tagRoutes } from './routes/tags.js';
|
||||
import { imageRoutes } from './routes/images.js';
|
||||
import { botRoutes } from './routes/bot.js';
|
||||
import { ogScrapeRoutes } from './routes/og-scrape.js';
|
||||
|
||||
export async function buildApp() {
|
||||
const app = Fastify({ logger: true });
|
||||
@@ -39,5 +40,8 @@ export async function buildApp() {
|
||||
await app.register(botRoutes);
|
||||
await app.after();
|
||||
|
||||
await app.register(ogScrapeRoutes);
|
||||
await app.after();
|
||||
|
||||
return app;
|
||||
}
|
||||
|
||||
76
backend/src/routes/og-scrape.ts
Normal file
76
backend/src/routes/og-scrape.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import { FastifyInstance } from 'fastify';
|
||||
import { scrapeOgData } from '../services/og-scraper.service.js';
|
||||
import { getDb } from '../db/connection.js';
|
||||
import sharp from 'sharp';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const DATA_DIR = path.resolve(__dirname, '../../data');
|
||||
|
||||
export async function ogScrapeRoutes(app: FastifyInstance) {
|
||||
// Preview: Just fetch OG data without downloading
|
||||
app.get('/api/og-preview', async (request, reply) => {
|
||||
const { url } = request.query as { url?: string };
|
||||
if (!url) return reply.status(400).send({ error: 'url parameter required' });
|
||||
|
||||
try {
|
||||
const data = await scrapeOgData(url);
|
||||
return data;
|
||||
} catch (err: any) {
|
||||
return reply.status(502).send({ error: `Failed to scrape: ${err.message}` });
|
||||
}
|
||||
});
|
||||
|
||||
// Download OG image and attach to recipe
|
||||
app.post('/api/recipes/:id/fetch-image', async (request, reply) => {
|
||||
const { id } = request.params as { id: string };
|
||||
const { url } = request.body as { url: string };
|
||||
|
||||
if (!url) return reply.status(400).send({ error: 'url required' });
|
||||
|
||||
const db = getDb();
|
||||
const recipe = db.prepare('SELECT id FROM recipes WHERE id = ?').get(id) as any;
|
||||
if (!recipe) return reply.status(404).send({ error: 'Recipe not found' });
|
||||
|
||||
try {
|
||||
// Scrape OG data
|
||||
const ogData = await scrapeOgData(url);
|
||||
if (!ogData.image) return reply.status(404).send({ error: 'No image found at URL' });
|
||||
|
||||
// Download image
|
||||
const imgRes = await fetch(ogData.image, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0' },
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (!imgRes.ok) throw new Error(`Image download failed: ${imgRes.status}`);
|
||||
|
||||
const buffer = Buffer.from(await imgRes.arrayBuffer());
|
||||
|
||||
// Process with sharp → WebP, max 1200px wide
|
||||
const imgDir = path.join(DATA_DIR, 'images', 'recipes', id);
|
||||
fs.mkdirSync(imgDir, { recursive: true });
|
||||
const imgPath = path.join(imgDir, 'hero.webp');
|
||||
|
||||
await sharp(buffer)
|
||||
.resize(1200, null, { withoutEnlargement: true })
|
||||
.webp({ quality: 85 })
|
||||
.toFile(imgPath);
|
||||
|
||||
// Update recipe
|
||||
const imageUrl = `/images/recipes/${id}/hero.webp`;
|
||||
db.prepare('UPDATE recipes SET image_url = ?, source_url = ?, updated_at = datetime(\'now\') WHERE id = ?')
|
||||
.run(imageUrl, url, id);
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
image_url: imageUrl,
|
||||
og_title: ogData.title,
|
||||
og_description: ogData.description,
|
||||
};
|
||||
} catch (err: any) {
|
||||
return reply.status(502).send({ error: `Failed: ${err.message}` });
|
||||
}
|
||||
});
|
||||
}
|
||||
73
backend/src/services/og-scraper.service.ts
Normal file
73
backend/src/services/og-scraper.service.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
/**
|
||||
* OG Image Scraper — holt og:image aus beliebiger URL
|
||||
* Funktioniert mit Pinterest, Chefkoch, Allrecipes, Blogs etc.
|
||||
*/
|
||||
|
||||
const USER_AGENTS: Record<string, string> = {
|
||||
'pinterest.com': 'Twitterbot/1.0',
|
||||
'pinterest.de': 'Twitterbot/1.0',
|
||||
'pin.it': 'Twitterbot/1.0',
|
||||
default: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
};
|
||||
|
||||
function getUserAgent(url: string): string {
|
||||
for (const [domain, ua] of Object.entries(USER_AGENTS)) {
|
||||
if (domain !== 'default' && url.includes(domain)) return ua;
|
||||
}
|
||||
return USER_AGENTS.default;
|
||||
}
|
||||
|
||||
export interface OgData {
|
||||
image?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export async function scrapeOgData(url: string): Promise<OgData> {
|
||||
const ua = getUserAgent(url);
|
||||
|
||||
const res = await fetch(url, {
|
||||
headers: { 'User-Agent': ua },
|
||||
redirect: 'follow',
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||
|
||||
const html = await res.text();
|
||||
const result: OgData = {};
|
||||
|
||||
// Extract og:image
|
||||
const imageMatch = html.match(/<meta[^>]+(?:property|name)="og:image"[^>]+content="([^"]+)"/i)
|
||||
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:image"/i);
|
||||
if (imageMatch) result.image = imageMatch[1];
|
||||
|
||||
// Extract og:title
|
||||
const titleMatch = html.match(/<meta[^>]+(?:property|name)="og:title"[^>]+content="([^"]+)"/i)
|
||||
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:title"/i);
|
||||
if (titleMatch) result.title = decodeHtmlEntities(titleMatch[1]);
|
||||
|
||||
// Extract og:description
|
||||
const descMatch = html.match(/<meta[^>]+(?:property|name)="og:description"[^>]+content="([^"]+)"/i)
|
||||
|| html.match(/<meta[^>]+content="([^"]+)"[^>]+(?:property|name)="og:description"/i);
|
||||
if (descMatch) result.description = decodeHtmlEntities(descMatch[1]);
|
||||
|
||||
// Pinterest special: higher-res image from JSON data
|
||||
if (url.includes('pinterest') && result.image) {
|
||||
// Try to get orig resolution instead of 736x
|
||||
const origMatch = html.match(/"orig":\s*\{[^}]*"url"\s*:\s*"([^"]+)"/);
|
||||
if (origMatch) result.image = origMatch[1];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function decodeHtmlEntities(str: string): string {
|
||||
return str
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
@@ -72,3 +72,20 @@ export function uploadRecipeImage(id: string, file: File) {
|
||||
return r.json() as Promise<{ image_url: string }>
|
||||
})
|
||||
}
|
||||
|
||||
export interface OgData {
|
||||
image?: string
|
||||
title?: string
|
||||
description?: string
|
||||
}
|
||||
|
||||
export function fetchOgPreview(url: string) {
|
||||
return apiFetch<OgData>(`/og-preview?url=${encodeURIComponent(url)}`)
|
||||
}
|
||||
|
||||
export function fetchImageFromUrl(recipeId: string, url: string) {
|
||||
return apiFetch<{ ok: boolean; image_url: string; og_title?: string; og_description?: string }>(
|
||||
`/recipes/${recipeId}/fetch-image`,
|
||||
{ method: 'POST', body: JSON.stringify({ url }) }
|
||||
)
|
||||
}
|
||||
|
||||
@@ -2,8 +2,8 @@ import { useState, useEffect, useRef } from 'react'
|
||||
import { useParams, useNavigate } from 'react-router'
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
|
||||
import toast from 'react-hot-toast'
|
||||
import { ArrowLeft, Plus, Trash2, Camera, X, GripVertical } from 'lucide-react'
|
||||
import { fetchRecipe, createRecipe, updateRecipe, deleteRecipe, uploadRecipeImage } from '../api/recipes'
|
||||
import { ArrowLeft, Plus, Trash2, Camera, X, GripVertical, Link, Loader2 } from 'lucide-react'
|
||||
import { fetchRecipe, createRecipe, updateRecipe, deleteRecipe, uploadRecipeImage, fetchOgPreview } from '../api/recipes'
|
||||
import { fetchCategories } from '../api/categories'
|
||||
import type { RecipeFormData } from '../api/recipes'
|
||||
import type { Ingredient, Step } from '../api/types'
|
||||
@@ -61,6 +61,7 @@ export function RecipeFormPage() {
|
||||
{ key: nextKey(), instruction: '' },
|
||||
])
|
||||
const [showDeleteConfirm, setShowDeleteConfirm] = useState(false)
|
||||
const [fetchingOg, setFetchingOg] = useState(false)
|
||||
|
||||
// Populate form when editing
|
||||
useEffect(() => {
|
||||
@@ -331,16 +332,51 @@ export function RecipeFormPage() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Source URL */}
|
||||
{/* Source URL + OG Fetch */}
|
||||
<div>
|
||||
<label className={labelClass}>Quelle (URL)</label>
|
||||
<div className="flex gap-2">
|
||||
<input
|
||||
type="url"
|
||||
value={sourceUrl}
|
||||
onChange={e => setSourceUrl(e.target.value)}
|
||||
placeholder="https://pinterest.com/..."
|
||||
className={inputClass}
|
||||
className={`${inputClass} flex-1`}
|
||||
/>
|
||||
{sourceUrl.trim() && !imagePreview && (
|
||||
<button
|
||||
type="button"
|
||||
disabled={fetchingOg}
|
||||
onClick={async () => {
|
||||
setFetchingOg(true)
|
||||
try {
|
||||
const og = await fetchOgPreview(sourceUrl.trim())
|
||||
if (og.image) {
|
||||
setImagePreview(og.image)
|
||||
setImageUrl(og.image)
|
||||
toast.success('Bild gefunden! 📸')
|
||||
} else {
|
||||
toast.error('Kein Bild auf der Seite gefunden')
|
||||
}
|
||||
// Auto-fill title & description if empty
|
||||
if (!title.trim() && og.title) setTitle(og.title)
|
||||
if (!description.trim() && og.description) setDescription(og.description)
|
||||
} catch {
|
||||
toast.error('Konnte die Seite nicht laden')
|
||||
} finally {
|
||||
setFetchingOg(false)
|
||||
}
|
||||
}}
|
||||
className="bg-secondary text-white px-4 py-3 rounded-xl font-medium min-h-[44px] min-w-[44px] flex items-center justify-center disabled:opacity-50 whitespace-nowrap gap-2"
|
||||
>
|
||||
{fetchingOg ? <Loader2 size={18} className="animate-spin" /> : <Link size={18} />}
|
||||
{!fetchingOg && <span className="hidden sm:inline text-sm">Bild holen</span>}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{sourceUrl.trim() && (
|
||||
<p className="text-xs text-warm-grey mt-1">💡 Pinterest, Chefkoch, Blogs — Bild wird automatisch geholt</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Ingredients */}
|
||||
|
||||
Reference in New Issue
Block a user