160 lines
5.0 KiB
JavaScript
160 lines
5.0 KiB
JavaScript
// scripts/build-annotations-index.mjs
|
||
import fs from "node:fs/promises";
|
||
import path from "node:path";
|
||
import YAML from "yaml";
|
||
|
||
function parseArgs(argv) {
|
||
const out = {
|
||
inDir: "src/annotations",
|
||
outFile: "dist/annotations-index.json",
|
||
};
|
||
|
||
for (let i = 0; i < argv.length; i++) {
|
||
const a = argv[i];
|
||
|
||
if (a === "--in" && argv[i + 1]) out.inDir = argv[++i];
|
||
else if (a.startsWith("--in=")) out.inDir = a.slice("--in=".length);
|
||
|
||
if (a === "--out" && argv[i + 1]) out.outFile = argv[++i];
|
||
else if (a.startsWith("--out=")) out.outFile = a.slice("--out=".length);
|
||
}
|
||
return out;
|
||
}
|
||
|
||
async function exists(p) {
|
||
try { await fs.access(p); return true; } catch { return false; }
|
||
}
|
||
|
||
async function walk(dir) {
|
||
const out = [];
|
||
const ents = await fs.readdir(dir, { withFileTypes: true });
|
||
for (const e of ents) {
|
||
const p = path.join(dir, e.name);
|
||
if (e.isDirectory()) out.push(...(await walk(p)));
|
||
else out.push(p);
|
||
}
|
||
return out;
|
||
}
|
||
|
||
function inferPageKeyFromFile(inDirAbs, fileAbs) {
|
||
// src/annotations/<page>.yml -> "<page>"
|
||
const rel = path.relative(inDirAbs, fileAbs).replace(/\\/g, "/");
|
||
return rel.replace(/\.(ya?ml|json)$/i, "");
|
||
}
|
||
|
||
function assert(cond, msg) {
|
||
if (!cond) throw new Error(msg);
|
||
}
|
||
|
||
function isPlainObject(x) {
|
||
return !!x && typeof x === "object" && !Array.isArray(x);
|
||
}
|
||
|
||
function normalizePageKey(s) {
|
||
// pas de / en tête/fin
|
||
return String(s || "").replace(/^\/+/, "").replace(/\/+$/, "");
|
||
}
|
||
|
||
function validateAndNormalizeDoc(doc, pageKey, fileRel) {
|
||
assert(isPlainObject(doc), `${fileRel}: document must be an object`);
|
||
assert(doc.schema === 1, `${fileRel}: schema must be 1`);
|
||
if (doc.page != null) {
|
||
assert(
|
||
normalizePageKey(doc.page) === pageKey,
|
||
`${fileRel}: page mismatch (page="${doc.page}" vs path="${pageKey}")`
|
||
);
|
||
}
|
||
assert(isPlainObject(doc.paras), `${fileRel}: missing object key "paras"`);
|
||
|
||
const parasOut = Object.create(null);
|
||
|
||
for (const [paraId, entry] of Object.entries(doc.paras)) {
|
||
assert(/^p-\d+-/i.test(paraId), `${fileRel}: invalid para id "${paraId}"`);
|
||
|
||
// entry peut être vide, mais doit être un objet si présent
|
||
assert(entry == null || isPlainObject(entry), `${fileRel}: paras.${paraId} must be an object`);
|
||
|
||
const e = entry ? { ...entry } : {};
|
||
|
||
// Sanity checks (non destructifs : on n’écrase pas, on vérifie juste les types)
|
||
if (e.refs != null) assert(Array.isArray(e.refs), `${fileRel}: paras.${paraId}.refs must be an array`);
|
||
if (e.authors != null) assert(Array.isArray(e.authors), `${fileRel}: paras.${paraId}.authors must be an array`);
|
||
if (e.quotes != null) assert(Array.isArray(e.quotes), `${fileRel}: paras.${paraId}.quotes must be an array`);
|
||
if (e.media != null) assert(Array.isArray(e.media), `${fileRel}: paras.${paraId}.media must be an array`);
|
||
if (e.comments_editorial != null) assert(Array.isArray(e.comments_editorial), `${fileRel}: paras.${paraId}.comments_editorial must be an array`);
|
||
|
||
parasOut[paraId] = e;
|
||
}
|
||
|
||
return parasOut;
|
||
}
|
||
|
||
async function readDoc(fileAbs) {
|
||
const raw = await fs.readFile(fileAbs, "utf8");
|
||
if (/\.json$/i.test(fileAbs)) return JSON.parse(raw);
|
||
return YAML.parse(raw);
|
||
}
|
||
|
||
async function main() {
|
||
const { inDir, outFile } = parseArgs(process.argv.slice(2));
|
||
const CWD = process.cwd();
|
||
|
||
const inDirAbs = path.isAbsolute(inDir) ? inDir : path.join(CWD, inDir);
|
||
const outAbs = path.isAbsolute(outFile) ? outFile : path.join(CWD, outFile);
|
||
|
||
// antifragile
|
||
if (!(await exists(inDirAbs))) {
|
||
console.log(`ℹ️ annotations-index: skip (input missing): ${inDir}`);
|
||
process.exit(0);
|
||
}
|
||
|
||
const files = (await walk(inDirAbs)).filter((p) => /\.(ya?ml|json)$/i.test(p));
|
||
if (!files.length) {
|
||
console.log(`ℹ️ annotations-index: skip (no .yml/.yaml/.json found in): ${inDir}`);
|
||
process.exit(0);
|
||
}
|
||
|
||
const pages = Object.create(null);
|
||
let paraCount = 0;
|
||
|
||
for (const f of files) {
|
||
const fileRel = path.relative(CWD, f).replace(/\\/g, "/");
|
||
const pageKey = normalizePageKey(inferPageKeyFromFile(inDirAbs, f));
|
||
assert(pageKey, `${fileRel}: cannot infer page key`);
|
||
|
||
let doc;
|
||
try {
|
||
doc = await readDoc(f);
|
||
} catch (e) {
|
||
throw new Error(`${fileRel}: parse failed: ${String(e?.message ?? e)}`);
|
||
}
|
||
|
||
const paras = validateAndNormalizeDoc(doc, pageKey, fileRel);
|
||
|
||
// 1 fichier = 1 page (canon)
|
||
assert(!pages[pageKey], `${fileRel}: duplicate page "${pageKey}" (only one file per page)`);
|
||
pages[pageKey] = { paras };
|
||
paraCount += Object.keys(paras).length;
|
||
}
|
||
|
||
const out = {
|
||
schema: 1,
|
||
generatedAt: new Date().toISOString(),
|
||
pages,
|
||
stats: {
|
||
pages: Object.keys(pages).length,
|
||
paras: paraCount,
|
||
},
|
||
};
|
||
|
||
await fs.mkdir(path.dirname(outAbs), { recursive: true });
|
||
await fs.writeFile(outAbs, JSON.stringify(out), "utf8");
|
||
|
||
console.log(`✅ annotations-index: pages=${out.stats.pages} paras=${out.stats.paras} -> ${path.relative(CWD, outAbs)}`);
|
||
}
|
||
|
||
main().catch((e) => {
|
||
console.error("FAIL: build-annotations-index crashed:", e);
|
||
process.exit(1);
|
||
});
|