#!/usr/bin/env node // scripts/build-annotations-index.mjs // Construit dist/annotations-index.json à partir de src/annotations/**/*.yml // Supporte: // - monolith : src/annotations/.yml // - shard : src/annotations//.yml (paraId = p--...) // Invariants: // - doc.schema === 1 // - doc.page (si présent) == pageKey déduit du chemin // - shard: doc.paras doit contenir EXACTEMENT la clé paraId (sinon fail) // // Deep-merge non destructif (media/refs/comments dédupliqués), tri stable. import fs from "node:fs/promises"; import path from "node:path"; import YAML from "yaml"; const ROOT = process.cwd(); const ANNO_ROOT = path.join(ROOT, "src", "annotations"); const DIST_DIR = path.join(ROOT, "dist"); const OUT = path.join(DIST_DIR, "annotations-index.json"); function assert(cond, msg) { if (!cond) throw new Error(msg); } function isObj(x) { return !!x && typeof x === "object" && !Array.isArray(x); } function isArr(x) { return Array.isArray(x); } function normPath(s) { return String(s || "") .replace(/\\/g, "/") .replace(/^\/+|\/+$/g, ""); } function paraNum(pid) { const m = String(pid).match(/^p-(\d+)-/i); return m ? Number(m[1]) : Number.POSITIVE_INFINITY; } function stableSortByTs(arr) { if (!Array.isArray(arr)) return; arr.sort((a, b) => { const ta = Date.parse(a?.ts || "") || 0; const tb = Date.parse(b?.ts || "") || 0; if (ta !== tb) return ta - tb; return JSON.stringify(a).localeCompare(JSON.stringify(b)); }); } function keyMedia(x) { return String(x?.src || ""); } function keyRef(x) { return `${x?.url || ""}||${x?.label || ""}||${x?.kind || ""}||${x?.citation || ""}`; } function keyComment(x) { return String(x?.text || "").trim(); } function uniqUnion(dst, src, keyFn) { const out = isArr(dst) ? [...dst] : []; const seen = new Set(out.map((x) => keyFn(x))); for (const it of (isArr(src) ? src : [])) { const k = keyFn(it); if (!k) continue; if (!seen.has(k)) { seen.add(k); out.push(it); } } return out; } function deepMergeEntry(dst, src) { if (!isObj(dst) || !isObj(src)) return; for (const [k, v] of Object.entries(src)) { if (k === "media" && isArr(v)) { dst.media = uniqUnion(dst.media, v, keyMedia); continue; } if (k === "refs" && isArr(v)) { dst.refs = uniqUnion(dst.refs, v, keyRef); continue; } if (k === "comments_editorial" && isArr(v)) { dst.comments_editorial = uniqUnion(dst.comments_editorial, v, keyComment); continue; } if (isObj(v)) { if (!isObj(dst[k])) dst[k] = {}; deepMergeEntry(dst[k], v); continue; } if (isArr(v)) { const cur = isArr(dst[k]) ? dst[k] : []; const seen = new Set(cur.map((x) => JSON.stringify(x))); const out = [...cur]; for (const it of v) { const s = JSON.stringify(it); if (!seen.has(s)) { seen.add(s); out.push(it); } } dst[k] = out; continue; } // scalar: set only if missing/empty if (!(k in dst) || dst[k] == null || dst[k] === "") dst[k] = v; } } async function walk(dir) { const out = []; const ents = await fs.readdir(dir, { withFileTypes: true }); for (const e of ents) { const p = path.join(dir, e.name); if (e.isDirectory()) out.push(...await walk(p)); else if (e.isFile() && /\.ya?ml$/i.test(e.name)) out.push(p); } return out; } function inferExpectedFromRel(relNoExt) { const parts = relNoExt.split("/").filter(Boolean); const last = parts.at(-1) || ""; const isShard = parts.length > 1 && /^p-\d+-/i.test(last); // ✅ durcissement const pageKey = isShard ? parts.slice(0, -1).join("/") : relNoExt; const paraId = isShard ? last : null; return { isShard, pageKey, paraId }; } function validateAndNormalizeDoc(doc, relFile, expectedPageKey, expectedParaId) { assert(isObj(doc), `${relFile}: doc must be an object`); assert(doc.schema === 1, `${relFile}: schema must be 1`); assert(isObj(doc.paras), `${relFile}: missing object key "paras"`); const gotPage = doc.page != null ? normPath(doc.page) : ""; const expPage = normPath(expectedPageKey); if (gotPage) { assert( gotPage === expPage, `${relFile}: page mismatch (page="${doc.page}" vs path="${expectedPageKey}")` ); } else { doc.page = expPage; } if (expectedParaId) { const keys = Object.keys(doc.paras || {}).map(String); assert( keys.includes(expectedParaId), `${relFile}: shard mismatch: must contain paras["${expectedParaId}"]` ); assert( keys.length === 1 && keys[0] === expectedParaId, `${relFile}: shard invariant violated: shard file must contain ONLY paras["${expectedParaId}"] (got: ${keys.join(", ")})` ); } return doc; } async function main() { const pages = {}; const errors = []; await fs.mkdir(DIST_DIR, { recursive: true }); const files = await walk(ANNO_ROOT); for (const fp of files) { const rel = normPath(path.relative(ANNO_ROOT, fp)); const relNoExt = rel.replace(/\.ya?ml$/i, ""); const { isShard, pageKey, paraId } = inferExpectedFromRel(relNoExt); try { const raw = await fs.readFile(fp, "utf8"); const doc = YAML.parse(raw) || {}; if (!isObj(doc) || doc.schema !== 1) continue; validateAndNormalizeDoc( doc, `src/annotations/${rel}`, pageKey, isShard ? paraId : null ); const pg = (pages[pageKey] ??= { paras: {} }); if (isShard) { const entry = doc.paras[paraId]; if (!isObj(pg.paras[paraId])) pg.paras[paraId] = {}; if (isObj(entry)) deepMergeEntry(pg.paras[paraId], entry); stableSortByTs(pg.paras[paraId].media); stableSortByTs(pg.paras[paraId].refs); stableSortByTs(pg.paras[paraId].comments_editorial); } else { for (const [pid, entry] of Object.entries(doc.paras || {})) { const p = String(pid); if (!isObj(pg.paras[p])) pg.paras[p] = {}; if (isObj(entry)) deepMergeEntry(pg.paras[p], entry); stableSortByTs(pg.paras[p].media); stableSortByTs(pg.paras[p].refs); stableSortByTs(pg.paras[p].comments_editorial); } } } catch (e) { errors.push({ file: `src/annotations/${rel}`, error: String(e?.message || e) }); } } for (const [pageKey, pg] of Object.entries(pages)) { const keys = Object.keys(pg.paras || {}); keys.sort((a, b) => { const ia = paraNum(a); const ib = paraNum(b); if (Number.isFinite(ia) && Number.isFinite(ib) && ia !== ib) return ia - ib; return String(a).localeCompare(String(b)); }); const next = {}; for (const k of keys) next[k] = pg.paras[k]; pg.paras = next; } const out = { schema: 1, generatedAt: new Date().toISOString(), pages, stats: { pages: Object.keys(pages).length, paras: Object.values(pages).reduce((n, p) => n + Object.keys(p.paras || {}).length, 0), errors: errors.length, }, errors, }; if (errors.length) { throw new Error(`${errors[0].file}: ${errors[0].error}`); } await fs.writeFile(OUT, JSON.stringify(out), "utf8"); console.log(`✅ annotations-index: pages=${out.stats.pages} paras=${out.stats.paras} -> dist/annotations-index.json`); } main().catch((e) => { console.error(`FAIL: build-annotations-index crashed: ${e?.stack || e?.message || e}`); process.exit(1); });