// scripts/build-para-index.mjs import fs from "node:fs/promises"; import path from "node:path"; function parseArgs(argv) { const out = { inDir: "dist", outFile: "dist/para-index.json" }; for (let i = 0; i < argv.length; i++) { const a = argv[i]; if (a === "--in" && argv[i + 1]) { out.inDir = argv[++i]; continue; } if (a.startsWith("--in=")) { out.inDir = a.slice("--in=".length); continue; } if (a === "--out" && argv[i + 1]) { out.outFile = argv[++i]; continue; } if (a.startsWith("--out=")) { out.outFile = a.slice("--out=".length); continue; } } return out; } async function exists(p) { try { await fs.access(p); return true; } catch { return false; } } async function walk(dir) { const out = []; const ents = await fs.readdir(dir, { withFileTypes: true }); for (const e of ents) { const p = path.join(dir, e.name); if (e.isDirectory()) out.push(...(await walk(p))); else out.push(p); } return out; } function stripTags(html) { return String(html || "") .replace(//gi, " ") .replace(//gi, " ") .replace(/<[^>]+>/g, " "); } function decodeEntities(s) { // minimal, volontairement (évite dépendances) return String(s || "") .replace(/ /g, " ") .replace(/&/g, "&") .replace(/</g, "<") .replace(/>/g, ">") .replace(/"/g, '"') .replace(/'/g, "'"); } function normalizeSpaces(s) { return decodeEntities(s).replace(/\s+/g, " ").trim(); } function relPageFromIndexHtml(inDirAbs, fileAbs) { const rel = path.relative(inDirAbs, fileAbs).replace(/\\/g, "/"); if (!/index\.html$/i.test(rel)) return null; // dist//index.html -> "//" const page = "/" + rel.replace(/index\.html$/i, ""); return page; } async function main() { const { inDir, outFile } = parseArgs(process.argv.slice(2)); const CWD = process.cwd(); const inDirAbs = path.isAbsolute(inDir) ? inDir : path.join(CWD, inDir); const outAbs = path.isAbsolute(outFile) ? outFile : path.join(CWD, outFile); // ✅ antifragile: si dist/ (ou inDir) absent -> on SKIP proprement if (!(await exists(inDirAbs))) { console.log(`ℹ️ para-index: skip (input missing): ${inDir}`); process.exit(0); } const files = (await walk(inDirAbs)).filter((p) => /index\.html$/i.test(p)); if (!files.length) { console.log(`ℹ️ para-index: skip (no index.html found in): ${inDir}`); process.exit(0); } const items = []; const byId = Object.create(null); //

...

// (regex volontairement stricte sur l'id pour éviter faux positifs) const reP = /]*\bid\s*=\s*["'](p-\d+-[^"']+)["'][^>]*)>([\s\S]*?)<\/p>/gi; for (const f of files) { const page = relPageFromIndexHtml(inDirAbs, f); if (!page) continue; const html = await fs.readFile(f, "utf8"); let m; while ((m = reP.exec(html))) { const id = m[2]; const inner = m[3]; if (byId[id] != null) continue; // protège si jamais doublons const text = normalizeSpaces(stripTags(inner)); if (!text) continue; byId[id] = items.length; items.push({ id, page, text }); } } const out = { schema: 1, generatedAt: new Date().toISOString(), items, byId, }; await fs.mkdir(path.dirname(outAbs), { recursive: true }); await fs.writeFile(outAbs, JSON.stringify(out), "utf8"); console.log(`✅ para-index: items=${items.length} -> ${path.relative(CWD, outAbs)}`); } main().catch((e) => { console.error("FAIL: build-para-index crashed:", e); process.exit(1); });