149 lines
3.6 KiB
JavaScript
149 lines
3.6 KiB
JavaScript
// scripts/build-para-index.mjs
|
||
import fs from "node:fs/promises";
|
||
import path from "node:path";
|
||
|
||
function parseArgs(argv) {
|
||
const out = { inDir: "dist", outFile: "dist/para-index.json" };
|
||
|
||
for (let i = 0; i < argv.length; i++) {
|
||
const a = argv[i];
|
||
|
||
if (a === "--in" && argv[i + 1]) {
|
||
out.inDir = argv[++i];
|
||
continue;
|
||
}
|
||
if (a.startsWith("--in=")) {
|
||
out.inDir = a.slice("--in=".length);
|
||
continue;
|
||
}
|
||
|
||
if (a === "--out" && argv[i + 1]) {
|
||
out.outFile = argv[++i];
|
||
continue;
|
||
}
|
||
if (a.startsWith("--out=")) {
|
||
out.outFile = a.slice("--out=".length);
|
||
continue;
|
||
}
|
||
}
|
||
|
||
return out;
|
||
}
|
||
|
||
async function exists(p) {
|
||
try {
|
||
await fs.access(p);
|
||
return true;
|
||
} catch {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
async function walk(dir) {
|
||
const out = [];
|
||
const ents = await fs.readdir(dir, { withFileTypes: true });
|
||
for (const e of ents) {
|
||
const p = path.join(dir, e.name);
|
||
if (e.isDirectory()) out.push(...(await walk(p)));
|
||
else out.push(p);
|
||
}
|
||
return out;
|
||
}
|
||
|
||
function stripTags(html) {
|
||
return String(html || "")
|
||
.replace(/<script\b[\s\S]*?<\/script>/gi, " ")
|
||
.replace(/<style\b[\s\S]*?<\/style>/gi, " ")
|
||
.replace(/<[^>]+>/g, " ");
|
||
}
|
||
|
||
function decodeEntities(s) {
|
||
// minimal, volontairement (évite dépendances)
|
||
return String(s || "")
|
||
.replace(/ /g, " ")
|
||
.replace(/&/g, "&")
|
||
.replace(/</g, "<")
|
||
.replace(/>/g, ">")
|
||
.replace(/"/g, '"')
|
||
.replace(/'/g, "'");
|
||
}
|
||
|
||
function normalizeSpaces(s) {
|
||
return decodeEntities(s).replace(/\s+/g, " ").trim();
|
||
}
|
||
|
||
function relPageFromIndexHtml(inDirAbs, fileAbs) {
|
||
const rel = path.relative(inDirAbs, fileAbs).replace(/\\/g, "/");
|
||
if (!/index\.html$/i.test(rel)) return null;
|
||
|
||
// dist/<page>/index.html -> "/<page>/"
|
||
const page = "/" + rel.replace(/index\.html$/i, "");
|
||
return page;
|
||
}
|
||
|
||
async function main() {
|
||
const { inDir, outFile } = parseArgs(process.argv.slice(2));
|
||
const CWD = process.cwd();
|
||
|
||
const inDirAbs = path.isAbsolute(inDir) ? inDir : path.join(CWD, inDir);
|
||
const outAbs = path.isAbsolute(outFile) ? outFile : path.join(CWD, outFile);
|
||
|
||
// ✅ antifragile: si dist/ (ou inDir) absent -> on SKIP proprement
|
||
if (!(await exists(inDirAbs))) {
|
||
console.log(`ℹ️ para-index: skip (input missing): ${inDir}`);
|
||
process.exit(0);
|
||
}
|
||
|
||
const files = (await walk(inDirAbs)).filter((p) => /index\.html$/i.test(p));
|
||
|
||
if (!files.length) {
|
||
console.log(`ℹ️ para-index: skip (no index.html found in): ${inDir}`);
|
||
process.exit(0);
|
||
}
|
||
|
||
const items = [];
|
||
const byId = Object.create(null);
|
||
|
||
// <p ... id="p-...">...</p>
|
||
// (regex volontairement stricte sur l'id pour éviter faux positifs)
|
||
const reP = /<p\b([^>]*\bid\s*=\s*["'](p-\d+-[^"']+)["'][^>]*)>([\s\S]*?)<\/p>/gi;
|
||
|
||
for (const f of files) {
|
||
const page = relPageFromIndexHtml(inDirAbs, f);
|
||
if (!page) continue;
|
||
|
||
const html = await fs.readFile(f, "utf8");
|
||
|
||
let m;
|
||
while ((m = reP.exec(html))) {
|
||
const id = m[2];
|
||
const inner = m[3];
|
||
|
||
if (byId[id] != null) continue; // protège si jamais doublons
|
||
|
||
const text = normalizeSpaces(stripTags(inner));
|
||
if (!text) continue;
|
||
|
||
byId[id] = items.length;
|
||
items.push({ id, page, text });
|
||
}
|
||
}
|
||
|
||
const out = {
|
||
schema: 1,
|
||
generatedAt: new Date().toISOString(),
|
||
items,
|
||
byId,
|
||
};
|
||
|
||
await fs.mkdir(path.dirname(outAbs), { recursive: true });
|
||
await fs.writeFile(outAbs, JSON.stringify(out), "utf8");
|
||
|
||
console.log(`✅ para-index: items=${items.length} -> ${path.relative(CWD, outAbs)}`);
|
||
}
|
||
|
||
main().catch((e) => {
|
||
console.error("FAIL: build-para-index crashed:", e);
|
||
process.exit(1);
|
||
});
|