Files
archicratie-edition/scripts/build-annotations-index.mjs
Archicratia 210f621487
All checks were successful
CI / build-and-anchors (push) Successful in 1m58s
SMOKE / smoke (push) Successful in 13s
ci: support shard annotations in checks + endpoint (pageKey inference)
2026-02-27 13:13:31 +01:00

246 lines
7.3 KiB
JavaScript

#!/usr/bin/env node
// scripts/build-annotations-index.mjs
// Construit dist/annotations-index.json à partir de src/annotations/**/*.yml
// Supporte:
// - monolith : src/annotations/<pageKey>.yml
// - shard : src/annotations/<pageKey>/<paraId>.yml (paraId = p-<n>-...)
// Invariants:
// - doc.schema === 1
// - doc.page (si présent) == pageKey déduit du chemin
// - shard: doc.paras doit contenir EXACTEMENT la clé paraId (sinon fail)
//
// Deep-merge non destructif (media/refs/comments dédupliqués), tri stable.
import fs from "node:fs/promises";
import path from "node:path";
import YAML from "yaml";
const ROOT = process.cwd();
const ANNO_ROOT = path.join(ROOT, "src", "annotations");
const DIST_DIR = path.join(ROOT, "dist");
const OUT = path.join(DIST_DIR, "annotations-index.json");
function assert(cond, msg) {
if (!cond) throw new Error(msg);
}
function isObj(x) {
return !!x && typeof x === "object" && !Array.isArray(x);
}
function isArr(x) {
return Array.isArray(x);
}
function normPath(s) {
return String(s || "")
.replace(/\\/g, "/")
.replace(/^\/+|\/+$/g, "");
}
function paraNum(pid) {
const m = String(pid).match(/^p-(\d+)-/i);
return m ? Number(m[1]) : Number.POSITIVE_INFINITY;
}
function stableSortByTs(arr) {
if (!Array.isArray(arr)) return;
arr.sort((a, b) => {
const ta = Date.parse(a?.ts || "") || 0;
const tb = Date.parse(b?.ts || "") || 0;
if (ta !== tb) return ta - tb;
return JSON.stringify(a).localeCompare(JSON.stringify(b));
});
}
function keyMedia(x) { return String(x?.src || ""); }
function keyRef(x) {
return `${x?.url || ""}||${x?.label || ""}||${x?.kind || ""}||${x?.citation || ""}`;
}
function keyComment(x) { return String(x?.text || "").trim(); }
function uniqUnion(dst, src, keyFn) {
const out = isArr(dst) ? [...dst] : [];
const seen = new Set(out.map((x) => keyFn(x)));
for (const it of (isArr(src) ? src : [])) {
const k = keyFn(it);
if (!k) continue;
if (!seen.has(k)) {
seen.add(k);
out.push(it);
}
}
return out;
}
function deepMergeEntry(dst, src) {
if (!isObj(dst) || !isObj(src)) return;
for (const [k, v] of Object.entries(src)) {
if (k === "media" && isArr(v)) { dst.media = uniqUnion(dst.media, v, keyMedia); continue; }
if (k === "refs" && isArr(v)) { dst.refs = uniqUnion(dst.refs, v, keyRef); continue; }
if (k === "comments_editorial" && isArr(v)) { dst.comments_editorial = uniqUnion(dst.comments_editorial, v, keyComment); continue; }
if (isObj(v)) {
if (!isObj(dst[k])) dst[k] = {};
deepMergeEntry(dst[k], v);
continue;
}
if (isArr(v)) {
const cur = isArr(dst[k]) ? dst[k] : [];
const seen = new Set(cur.map((x) => JSON.stringify(x)));
const out = [...cur];
for (const it of v) {
const s = JSON.stringify(it);
if (!seen.has(s)) { seen.add(s); out.push(it); }
}
dst[k] = out;
continue;
}
// scalar: set only if missing/empty
if (!(k in dst) || dst[k] == null || dst[k] === "") dst[k] = v;
}
}
async function walk(dir) {
const out = [];
const ents = await fs.readdir(dir, { withFileTypes: true });
for (const e of ents) {
const p = path.join(dir, e.name);
if (e.isDirectory()) out.push(...await walk(p));
else if (e.isFile() && /\.ya?ml$/i.test(e.name)) out.push(p);
}
return out;
}
function inferExpectedFromRel(relNoExt) {
const parts = relNoExt.split("/").filter(Boolean);
const last = parts.at(-1) || "";
const isShard = parts.length > 1 && /^p-\d+-/i.test(last); // ✅ durcissement
const pageKey = isShard ? parts.slice(0, -1).join("/") : relNoExt;
const paraId = isShard ? last : null;
return { isShard, pageKey, paraId };
}
function validateAndNormalizeDoc(doc, relFile, expectedPageKey, expectedParaId) {
assert(isObj(doc), `${relFile}: doc must be an object`);
assert(doc.schema === 1, `${relFile}: schema must be 1`);
assert(isObj(doc.paras), `${relFile}: missing object key "paras"`);
const gotPage = doc.page != null ? normPath(doc.page) : "";
const expPage = normPath(expectedPageKey);
if (gotPage) {
assert(
gotPage === expPage,
`${relFile}: page mismatch (page="${doc.page}" vs path="${expectedPageKey}")`
);
} else {
doc.page = expPage;
}
if (expectedParaId) {
const keys = Object.keys(doc.paras || {}).map(String);
assert(
keys.includes(expectedParaId),
`${relFile}: shard mismatch: must contain paras["${expectedParaId}"]`
);
assert(
keys.length === 1 && keys[0] === expectedParaId,
`${relFile}: shard invariant violated: shard file must contain ONLY paras["${expectedParaId}"] (got: ${keys.join(", ")})`
);
}
return doc;
}
async function main() {
const pages = {};
const errors = [];
await fs.mkdir(DIST_DIR, { recursive: true });
const files = await walk(ANNO_ROOT);
for (const fp of files) {
const rel = normPath(path.relative(ANNO_ROOT, fp));
const relNoExt = rel.replace(/\.ya?ml$/i, "");
const { isShard, pageKey, paraId } = inferExpectedFromRel(relNoExt);
try {
const raw = await fs.readFile(fp, "utf8");
const doc = YAML.parse(raw) || {};
if (!isObj(doc) || doc.schema !== 1) continue;
validateAndNormalizeDoc(
doc,
`src/annotations/${rel}`,
pageKey,
isShard ? paraId : null
);
const pg = (pages[pageKey] ??= { paras: {} });
if (isShard) {
const entry = doc.paras[paraId];
if (!isObj(pg.paras[paraId])) pg.paras[paraId] = {};
if (isObj(entry)) deepMergeEntry(pg.paras[paraId], entry);
stableSortByTs(pg.paras[paraId].media);
stableSortByTs(pg.paras[paraId].refs);
stableSortByTs(pg.paras[paraId].comments_editorial);
} else {
for (const [pid, entry] of Object.entries(doc.paras || {})) {
const p = String(pid);
if (!isObj(pg.paras[p])) pg.paras[p] = {};
if (isObj(entry)) deepMergeEntry(pg.paras[p], entry);
stableSortByTs(pg.paras[p].media);
stableSortByTs(pg.paras[p].refs);
stableSortByTs(pg.paras[p].comments_editorial);
}
}
} catch (e) {
errors.push({ file: `src/annotations/${rel}`, error: String(e?.message || e) });
}
}
for (const [pageKey, pg] of Object.entries(pages)) {
const keys = Object.keys(pg.paras || {});
keys.sort((a, b) => {
const ia = paraNum(a);
const ib = paraNum(b);
if (Number.isFinite(ia) && Number.isFinite(ib) && ia !== ib) return ia - ib;
return String(a).localeCompare(String(b));
});
const next = {};
for (const k of keys) next[k] = pg.paras[k];
pg.paras = next;
}
const out = {
schema: 1,
generatedAt: new Date().toISOString(),
pages,
stats: {
pages: Object.keys(pages).length,
paras: Object.values(pages).reduce((n, p) => n + Object.keys(p.paras || {}).length, 0),
errors: errors.length,
},
errors,
};
if (errors.length) {
throw new Error(`${errors[0].file}: ${errors[0].error}`);
}
await fs.writeFile(OUT, JSON.stringify(out), "utf8");
console.log(`✅ annotations-index: pages=${out.stats.pages} paras=${out.stats.paras} -> dist/annotations-index.json`);
}
main().catch((e) => {
console.error(`FAIL: build-annotations-index crashed: ${e?.stack || e?.message || e}`);
process.exit(1);
});