404 lines
15 KiB
JavaScript
404 lines
15 KiB
JavaScript
|
|
#!/usr/bin/env node
|
|||
|
|
/**
|
|||
|
|
* Pulls down the upstream prompt corpora (CC BY 4.0) and emits curated
|
|||
|
|
* JSON files under `prompt-templates/{image,video}/`. Re-run anytime to
|
|||
|
|
* pick up new featured prompts.
|
|||
|
|
*
|
|||
|
|
* Usage:
|
|||
|
|
* node scripts/import-prompt-templates.mjs
|
|||
|
|
*
|
|||
|
|
* Source READMEs:
|
|||
|
|
* - https://github.com/YouMind-OpenLab/awesome-gpt-image-2 (CC BY 4.0)
|
|||
|
|
* - https://github.com/YouMind-OpenLab/awesome-seedance-2-prompts (CC BY 4.0)
|
|||
|
|
*
|
|||
|
|
* Each upstream README is a structured catalog. Two patterns we care about:
|
|||
|
|
*
|
|||
|
|
* Featured block:
|
|||
|
|
* ### No. N: <Title>
|
|||
|
|
* <badges>
|
|||
|
|
* #### 📖 Description
|
|||
|
|
* <description paragraph>
|
|||
|
|
* #### 📝 Prompt
|
|||
|
|
* ```
|
|||
|
|
* <prompt body>
|
|||
|
|
* ```
|
|||
|
|
* #### 🎬 Video (or 🖼️ Generated Images)
|
|||
|
|
* <preview img / video link>
|
|||
|
|
* #### 📌 Details
|
|||
|
|
* - **Author:** [Name](url)
|
|||
|
|
* - **Source:** [Twitter Post](url)
|
|||
|
|
* - **Published:** ...
|
|||
|
|
*
|
|||
|
|
* All-Prompts block:
|
|||
|
|
* ### <Title>
|
|||
|
|
* <badges>
|
|||
|
|
* > <description>
|
|||
|
|
* #### 📝 Prompt
|
|||
|
|
* ```
|
|||
|
|
* <prompt body>
|
|||
|
|
* ```
|
|||
|
|
* <img src="<thumb>"> | <a href=...>
|
|||
|
|
* **Author:** [Name](url) | **Source:** [Link](url) | **Published:** ...
|
|||
|
|
*
|
|||
|
|
* We pick the featured 6 from each repo (always good) plus a sampled slice
|
|||
|
|
* of the All-Prompts head so the gallery has breadth across categories.
|
|||
|
|
*
|
|||
|
|
* All output JSON carries a `source` block so attribution stays intact.
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
import { mkdir, writeFile, readdir, unlink, readFile } from 'node:fs/promises';
|
|||
|
|
import path from 'node:path';
|
|||
|
|
import { fileURLToPath } from 'node:url';
|
|||
|
|
|
|||
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|||
|
|
const ROOT = path.resolve(__dirname, '..');
|
|||
|
|
const OUT_IMAGE = path.join(ROOT, 'prompt-templates', 'image');
|
|||
|
|
const OUT_VIDEO = path.join(ROOT, 'prompt-templates', 'video');
|
|||
|
|
|
|||
|
|
const SOURCES = [
|
|||
|
|
{
|
|||
|
|
surface: 'image',
|
|||
|
|
repo: 'YouMind-OpenLab/awesome-gpt-image-2',
|
|||
|
|
license: 'CC-BY-4.0',
|
|||
|
|
readmeUrl:
|
|||
|
|
'https://raw.githubusercontent.com/YouMind-OpenLab/awesome-gpt-image-2/main/README.md',
|
|||
|
|
defaultModel: 'gpt-image-2',
|
|||
|
|
defaultAspect: '1:1',
|
|||
|
|
// Cap how many entries we pull from the "All Prompts" tail to keep the
|
|||
|
|
// committed dataset reviewable. The featured block is always taken.
|
|||
|
|
sampleAllPrompts: 30,
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
surface: 'video',
|
|||
|
|
repo: 'YouMind-OpenLab/awesome-seedance-2-prompts',
|
|||
|
|
license: 'CC-BY-4.0',
|
|||
|
|
readmeUrl:
|
|||
|
|
'https://raw.githubusercontent.com/YouMind-OpenLab/awesome-seedance-2-prompts/main/README.md',
|
|||
|
|
defaultModel: 'seedance-2.0',
|
|||
|
|
defaultAspect: '16:9',
|
|||
|
|
sampleAllPrompts: 30,
|
|||
|
|
},
|
|||
|
|
];
|
|||
|
|
|
|||
|
|
async function fetchText(url) {
|
|||
|
|
const resp = await fetch(url);
|
|||
|
|
if (!resp.ok) {
|
|||
|
|
throw new Error(`failed ${url}: ${resp.status}`);
|
|||
|
|
}
|
|||
|
|
return resp.text();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function slugify(input) {
|
|||
|
|
return input
|
|||
|
|
.toLowerCase()
|
|||
|
|
.normalize('NFKD')
|
|||
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|||
|
|
.replace(/[^a-z0-9]+/g, '-')
|
|||
|
|
.replace(/^-+|-+$/g, '')
|
|||
|
|
.slice(0, 64);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Featured blocks come between the "🔥 Featured Prompts" / "⭐ Featured" /
|
|||
|
|
// "## 🔥 Featured Prompts" header and the next H2.
|
|||
|
|
function sliceSection(md, headerRe) {
|
|||
|
|
const match = headerRe.exec(md);
|
|||
|
|
if (!match) return '';
|
|||
|
|
const start = match.index + match[0].length;
|
|||
|
|
const next = md.slice(start).search(/\n## /);
|
|||
|
|
if (next === -1) return md.slice(start);
|
|||
|
|
return md.slice(start, start + next);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function parseFeaturedBlock(block, ctx) {
|
|||
|
|
const out = [];
|
|||
|
|
// Each featured prompt starts at "### No. N: Title".
|
|||
|
|
const headerRe = /^### No\. \d+: (.+?)\s*$/gm;
|
|||
|
|
const headers = [];
|
|||
|
|
let m;
|
|||
|
|
while ((m = headerRe.exec(block)) !== null) {
|
|||
|
|
headers.push({ index: m.index, end: m.index + m[0].length, title: m[1] });
|
|||
|
|
}
|
|||
|
|
for (let i = 0; i < headers.length; i += 1) {
|
|||
|
|
const h = headers[i];
|
|||
|
|
const next = headers[i + 1]?.index ?? block.length;
|
|||
|
|
const body = block.slice(h.end, next);
|
|||
|
|
const entry = parseEntryBody(body, h.title, ctx, true);
|
|||
|
|
if (entry) out.push(entry);
|
|||
|
|
}
|
|||
|
|
return out;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function parseAllPromptsBlock(block, ctx) {
|
|||
|
|
const out = [];
|
|||
|
|
// The "All Prompts" section uses "### <Title>" headers — sometimes
|
|||
|
|
// prefixed with "No. N:" (gpt-image-2 README), sometimes bare
|
|||
|
|
// (seedance README). Both shapes route through parseEntryBody which
|
|||
|
|
// strips the "No. N:" prefix where present.
|
|||
|
|
const headerRe = /^### (.+?)\s*$/gm;
|
|||
|
|
const headers = [];
|
|||
|
|
let m;
|
|||
|
|
while ((m = headerRe.exec(block)) !== null) {
|
|||
|
|
const title = m[1].replace(/^No\.\s*\d+:\s*/, '').trim();
|
|||
|
|
headers.push({ index: m.index, end: m.index + m[0].length, title });
|
|||
|
|
}
|
|||
|
|
for (let i = 0; i < headers.length && out.length < ctx.sampleAllPrompts; i += 1) {
|
|||
|
|
const h = headers[i];
|
|||
|
|
const next = headers[i + 1]?.index ?? block.length;
|
|||
|
|
const body = block.slice(h.end, next);
|
|||
|
|
const entry = parseEntryBody(body, h.title, ctx, false);
|
|||
|
|
if (entry) out.push(entry);
|
|||
|
|
}
|
|||
|
|
return out;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function parseEntryBody(body, title, ctx, featured) {
|
|||
|
|
const promptMatch = /#### 📝 Prompt\s*\n+```[a-zA-Z0-9_-]*\n([\s\S]*?)```/m.exec(
|
|||
|
|
body,
|
|||
|
|
);
|
|||
|
|
if (!promptMatch) return null;
|
|||
|
|
const prompt = promptMatch[1].trim();
|
|||
|
|
if (prompt.length < 40) return null;
|
|||
|
|
|
|||
|
|
// The image README structures every entry — featured AND in-list —
|
|||
|
|
// with a "#### 📖 Description" block. The seedance README only does
|
|||
|
|
// that for featured; in-list entries fall back to a leading blockquote.
|
|||
|
|
// Try the structured form first regardless, then fall back.
|
|||
|
|
const description =
|
|||
|
|
extractDescription(body) || extractBlockquoteSummary(body);
|
|||
|
|
const author = extractAuthor(body);
|
|||
|
|
const sourceUrl = extractSourceUrl(body) ?? null;
|
|||
|
|
const previewImage = extractFirstImage(body);
|
|||
|
|
const previewVideo = extractVideoLink(body);
|
|||
|
|
const category = inferCategory(title, ctx.surface);
|
|||
|
|
const tags = inferTags(title, prompt, ctx.surface);
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
id: slugify(title),
|
|||
|
|
surface: ctx.surface,
|
|||
|
|
title: cleanTitle(title),
|
|||
|
|
summary: (description || cleanTitle(title)).slice(0, 200),
|
|||
|
|
category,
|
|||
|
|
tags,
|
|||
|
|
model: ctx.defaultModel,
|
|||
|
|
aspect: ctx.defaultAspect,
|
|||
|
|
prompt,
|
|||
|
|
previewImageUrl: previewImage ?? undefined,
|
|||
|
|
previewVideoUrl: previewVideo ?? undefined,
|
|||
|
|
source: {
|
|||
|
|
repo: ctx.repo,
|
|||
|
|
license: ctx.license,
|
|||
|
|
author: author ?? undefined,
|
|||
|
|
url: sourceUrl ?? undefined,
|
|||
|
|
},
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function extractDescription(body) {
|
|||
|
|
const m = /#### 📖 Description\s*\n+([\s\S]*?)(?=\n+####|\n+---)/m.exec(body);
|
|||
|
|
return m?.[1]?.trim().replace(/\s+/g, ' ') ?? '';
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function extractBlockquoteSummary(body) {
|
|||
|
|
const m = /^>\s*(.+?)\s*$/m.exec(body);
|
|||
|
|
return m?.[1]?.trim() ?? '';
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function extractAuthor(body) {
|
|||
|
|
// Featured: "- **Author:** [Name](url)"
|
|||
|
|
// All-prompts: "**Author:** [Name](url) | ..."
|
|||
|
|
const m = /\*\*Author:\*\*\s*\[([^\]]+)\]/.exec(body);
|
|||
|
|
return m?.[1]?.trim() ?? null;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function extractSourceUrl(body) {
|
|||
|
|
const m = /\*\*Source:\*\*\s*\[[^\]]+\]\(([^)]+)\)/.exec(body);
|
|||
|
|
return m?.[1]?.trim() ?? null;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function extractFirstImage(body) {
|
|||
|
|
const m = /<img[^>]*src=["']([^"']+)["']/.exec(body);
|
|||
|
|
if (!m) return null;
|
|||
|
|
return m[1];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function extractVideoLink(body) {
|
|||
|
|
// 1) Featured entries embed an explicit "<a href=...releases/.../<id>.mp4">"
|
|||
|
|
// download link — prefer it. GitHub releases are stable and don't
|
|||
|
|
// rely on a per-request signed redirect. Catches all 6 featured
|
|||
|
|
// prompts in awesome-seedance-2-prompts.
|
|||
|
|
const releaseLink = /href=["']([^"']+\.mp4)["']/.exec(body);
|
|||
|
|
if (releaseLink) return releaseLink[1];
|
|||
|
|
// 2) All-prompts entries don't expose a static mp4 — they only embed
|
|||
|
|
// the Cloudflare Stream thumbnail. Reconstruct the playable mp4
|
|||
|
|
// from the Stream video id encoded in the thumbnail URL. The
|
|||
|
|
// /downloads/default.mp4 endpoint 302s to a freshly-signed CDN
|
|||
|
|
// URL on every request; the browser follows that transparently
|
|||
|
|
// when set as <video src>. CORS is permissive (`*` on origin)
|
|||
|
|
// and `accept-ranges: bytes` is honored, so seeking works too.
|
|||
|
|
// This is what unlocks an actual video preview for the other
|
|||
|
|
// ~30 sampled templates instead of a static thumbnail.
|
|||
|
|
const streamThumb =
|
|||
|
|
/https?:\/\/([a-z0-9-]+\.cloudflarestream\.com)\/([a-f0-9]{20,})\/thumbnails\/thumbnail\.jpg/i.exec(
|
|||
|
|
body,
|
|||
|
|
);
|
|||
|
|
if (streamThumb) {
|
|||
|
|
return `https://${streamThumb[1]}/${streamThumb[2]}/downloads/default.mp4`;
|
|||
|
|
}
|
|||
|
|
return null;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function cleanTitle(raw) {
|
|||
|
|
// "Profile / Avatar - Cyberpunk Anime …" → strip the leading category
|
|||
|
|
// prefix shared by every entry in the same gpt-image-2 bucket. Keeps
|
|||
|
|
// titles scannable on cards without losing meaning.
|
|||
|
|
return raw
|
|||
|
|
.replace(/\s*\(.*\)\s*$/, '')
|
|||
|
|
.replace(/^\s*[-–]\s*/, '')
|
|||
|
|
.trim();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function inferCategory(title, surface) {
|
|||
|
|
const lower = title.toLowerCase();
|
|||
|
|
if (surface === 'image') {
|
|||
|
|
if (/profile|avatar|portrait/.test(lower)) return 'Profile / Avatar';
|
|||
|
|
if (/social|post|carousel/.test(lower)) return 'Social Media Post';
|
|||
|
|
if (/info[ -]?graphic|chart|diagram/.test(lower)) return 'Infographic';
|
|||
|
|
if (/youtube|thumbnail/.test(lower)) return 'YouTube Thumbnail';
|
|||
|
|
if (/comic|storyboard|panel/.test(lower)) return 'Comic / Storyboard';
|
|||
|
|
if (/poster|flyer/.test(lower)) return 'Poster / Flyer';
|
|||
|
|
if (/ui|app|web design|mockup|landing/.test(lower)) return 'App / Web Design';
|
|||
|
|
if (/product|exploded|merch|packaging/.test(lower)) return 'Product Marketing';
|
|||
|
|
if (/anime|manga/.test(lower)) return 'Anime / Manga';
|
|||
|
|
if (/cinematic|film/.test(lower)) return 'Cinematic';
|
|||
|
|
if (/3d|render|isometric/.test(lower)) return '3D Render';
|
|||
|
|
if (/sketch|line art|pencil/.test(lower)) return 'Sketch / Line Art';
|
|||
|
|
if (/pixel/.test(lower)) return 'Pixel Art';
|
|||
|
|
if (/oil|water[- ]?color/.test(lower)) return 'Painterly';
|
|||
|
|
if (/cyberpunk|sci[- ]?fi|futuristic/.test(lower)) return 'Cyberpunk / Sci-Fi';
|
|||
|
|
if (/landscape|nature/.test(lower)) return 'Landscape';
|
|||
|
|
return 'Illustration';
|
|||
|
|
}
|
|||
|
|
// video
|
|||
|
|
if (/cinematic|film|movie|noir/.test(lower)) return 'Cinematic';
|
|||
|
|
if (/anime|manga/.test(lower)) return 'Anime';
|
|||
|
|
if (/ad|advert|commercial|brand/.test(lower)) return 'Advertising';
|
|||
|
|
if (/ugc|tutorial|vlog/.test(lower)) return 'UGC / Vlog';
|
|||
|
|
if (/meme|tiktok|viral/.test(lower)) return 'Social / Meme';
|
|||
|
|
if (/drama|short film|romance/.test(lower)) return 'Short Film / Drama';
|
|||
|
|
if (/intro|motion graphics|title sequence/.test(lower)) return 'Motion Graphics';
|
|||
|
|
if (/vfx|fantasy|magic/.test(lower)) return 'VFX / Fantasy';
|
|||
|
|
if (/race|action|combat|fight/.test(lower)) return 'Action';
|
|||
|
|
return 'General';
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function inferTags(title, prompt, surface) {
|
|||
|
|
const set = new Set();
|
|||
|
|
const blob = `${title} ${prompt}`.toLowerCase();
|
|||
|
|
const checks = [
|
|||
|
|
['portrait', /portrait|selfie|headshot/],
|
|||
|
|
['anime', /anime|manga/],
|
|||
|
|
['cinematic', /cinematic|filmic|grain|8k/],
|
|||
|
|
['cyberpunk', /cyberpunk|neon/],
|
|||
|
|
['fantasy', /fantasy|mage|elf|dragon/],
|
|||
|
|
['3d-render', /3d render|unreal engine|render/],
|
|||
|
|
['isometric', /isometric/],
|
|||
|
|
['typography', /typography|kerning|font|lettering/],
|
|||
|
|
['product', /product|packaging|exploded/],
|
|||
|
|
['ugc', /ugc|vlog|selfie cam/],
|
|||
|
|
['cinematic-romance', /romance|pure love|romantic/],
|
|||
|
|
['action', /chase|action|combat|race/],
|
|||
|
|
['food', /food|coffee|kitchen/],
|
|||
|
|
['nature', /forest|river|mountain|landscape/],
|
|||
|
|
];
|
|||
|
|
for (const [tag, re] of checks) {
|
|||
|
|
if (re.test(blob)) set.add(tag);
|
|||
|
|
}
|
|||
|
|
const lim = surface === 'image' ? 4 : 3;
|
|||
|
|
return Array.from(set).slice(0, lim);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Remove previously generated JSON files. Hand-authored templates (those
|
|||
|
|
// whose `source.repo` is not the upstream CC-BY corpus we import from) are
|
|||
|
|
// preserved so first-party curated prompts aren't wiped on re-run.
|
|||
|
|
async function clearDir(dir, upstreamRepo) {
|
|||
|
|
try {
|
|||
|
|
const files = await readdir(dir);
|
|||
|
|
for (const f of files) {
|
|||
|
|
if (!f.endsWith('.json')) continue;
|
|||
|
|
const filePath = path.join(dir, f);
|
|||
|
|
let keep = false;
|
|||
|
|
try {
|
|||
|
|
const parsed = JSON.parse(await readFile(filePath, 'utf8'));
|
|||
|
|
const repo = parsed?.source?.repo;
|
|||
|
|
if (repo && repo !== upstreamRepo) keep = true;
|
|||
|
|
} catch {
|
|||
|
|
// Unparseable file — treat as generated and remove.
|
|||
|
|
}
|
|||
|
|
if (!keep) await unlink(filePath);
|
|||
|
|
}
|
|||
|
|
} catch {
|
|||
|
|
// missing dir is fine — created below.
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
async function writeAll(entries, outDir, upstreamRepo) {
|
|||
|
|
await mkdir(outDir, { recursive: true });
|
|||
|
|
await clearDir(outDir, upstreamRepo);
|
|||
|
|
// De-dup on slug; if two entries collide, keep the first (which is the
|
|||
|
|
// featured one — always parsed before "All Prompts"). Hand-authored
|
|||
|
|
// templates already on disk (preserved by clearDir) also take priority
|
|||
|
|
// so we never overwrite curated first-party prompts.
|
|||
|
|
const seen = new Set();
|
|||
|
|
try {
|
|||
|
|
const existing = await readdir(outDir);
|
|||
|
|
for (const f of existing) {
|
|||
|
|
if (f.endsWith('.json')) seen.add(f.replace(/\.json$/, ''));
|
|||
|
|
}
|
|||
|
|
} catch {
|
|||
|
|
// noop
|
|||
|
|
}
|
|||
|
|
let count = 0;
|
|||
|
|
for (const entry of entries) {
|
|||
|
|
if (seen.has(entry.id)) continue;
|
|||
|
|
seen.add(entry.id);
|
|||
|
|
const filePath = path.join(outDir, `${entry.id}.json`);
|
|||
|
|
await writeFile(filePath, `${JSON.stringify(entry, null, 2)}\n`, 'utf8');
|
|||
|
|
count += 1;
|
|||
|
|
}
|
|||
|
|
return count;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
async function main() {
|
|||
|
|
let totalImage = 0;
|
|||
|
|
let totalVideo = 0;
|
|||
|
|
for (const ctx of SOURCES) {
|
|||
|
|
const md = await fetchText(ctx.readmeUrl);
|
|||
|
|
const featuredBlock = sliceSection(md, /## 🔥 Featured Prompts/m)
|
|||
|
|
|| sliceSection(md, /## ⭐ Featured Prompts/m)
|
|||
|
|
|| sliceSection(md, /## Featured/m);
|
|||
|
|
const allPromptsBlock = sliceSection(md, /## (📋|🎬) All Prompts/m)
|
|||
|
|
|| sliceSection(md, /## All Prompts/m);
|
|||
|
|
const featured = parseFeaturedBlock(featuredBlock, ctx);
|
|||
|
|
const sampled = parseAllPromptsBlock(allPromptsBlock, ctx);
|
|||
|
|
const entries = [...featured, ...sampled];
|
|||
|
|
if (entries.length === 0) {
|
|||
|
|
console.error(`No entries parsed for ${ctx.repo}; check headers.`);
|
|||
|
|
process.exitCode = 1;
|
|||
|
|
continue;
|
|||
|
|
}
|
|||
|
|
const outDir = ctx.surface === 'image' ? OUT_IMAGE : OUT_VIDEO;
|
|||
|
|
const written = await writeAll(entries, outDir, ctx.repo);
|
|||
|
|
if (ctx.surface === 'image') totalImage += written;
|
|||
|
|
else totalVideo += written;
|
|||
|
|
console.log(
|
|||
|
|
`[${ctx.repo}] featured=${featured.length} sampled=${sampled.length} written=${written} → ${path.relative(ROOT, outDir)}`,
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
console.log(`\nDone. ${totalImage} image + ${totalVideo} video templates.`);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
main().catch((err) => {
|
|||
|
|
console.error(err);
|
|||
|
|
process.exit(1);
|
|||
|
|
});
|