Compare commits

...

2 Commits

Author SHA1 Message Date
210f621487 ci: support shard annotations in checks + endpoint (pageKey inference)
All checks were successful
CI / build-and-anchors (push) Successful in 1m58s
SMOKE / smoke (push) Successful in 13s
2026-02-27 13:13:31 +01:00
8ad960dc69 anno: build-annotations-index supports shard annotations
Some checks failed
SMOKE / smoke (push) Successful in 16s
CI / build-and-anchors (push) Failing after 1m48s
2026-02-27 12:27:35 +01:00
4 changed files with 271 additions and 125 deletions

View File

@@ -1,28 +1,106 @@
#!/usr/bin/env node
// scripts/build-annotations-index.mjs // scripts/build-annotations-index.mjs
// Construit dist/annotations-index.json à partir de src/annotations/**/*.yml
// Supporte:
// - monolith : src/annotations/<pageKey>.yml
// - shard : src/annotations/<pageKey>/<paraId>.yml (paraId = p-<n>-...)
// Invariants:
// - doc.schema === 1
// - doc.page (si présent) == pageKey déduit du chemin
// - shard: doc.paras doit contenir EXACTEMENT la clé paraId (sinon fail)
//
// Deep-merge non destructif (media/refs/comments dédupliqués), tri stable.
import fs from "node:fs/promises"; import fs from "node:fs/promises";
import path from "node:path"; import path from "node:path";
import YAML from "yaml"; import YAML from "yaml";
function parseArgs(argv) { const ROOT = process.cwd();
const out = { const ANNO_ROOT = path.join(ROOT, "src", "annotations");
inDir: "src/annotations", const DIST_DIR = path.join(ROOT, "dist");
outFile: "dist/annotations-index.json", const OUT = path.join(DIST_DIR, "annotations-index.json");
};
for (let i = 0; i < argv.length; i++) { function assert(cond, msg) {
const a = argv[i]; if (!cond) throw new Error(msg);
}
if (a === "--in" && argv[i + 1]) out.inDir = argv[++i]; function isObj(x) {
else if (a.startsWith("--in=")) out.inDir = a.slice("--in=".length); return !!x && typeof x === "object" && !Array.isArray(x);
}
function isArr(x) {
return Array.isArray(x);
}
if (a === "--out" && argv[i + 1]) out.outFile = argv[++i]; function normPath(s) {
else if (a.startsWith("--out=")) out.outFile = a.slice("--out=".length); return String(s || "")
.replace(/\\/g, "/")
.replace(/^\/+|\/+$/g, "");
}
function paraNum(pid) {
const m = String(pid).match(/^p-(\d+)-/i);
return m ? Number(m[1]) : Number.POSITIVE_INFINITY;
}
function stableSortByTs(arr) {
if (!Array.isArray(arr)) return;
arr.sort((a, b) => {
const ta = Date.parse(a?.ts || "") || 0;
const tb = Date.parse(b?.ts || "") || 0;
if (ta !== tb) return ta - tb;
return JSON.stringify(a).localeCompare(JSON.stringify(b));
});
}
function keyMedia(x) { return String(x?.src || ""); }
function keyRef(x) {
return `${x?.url || ""}||${x?.label || ""}||${x?.kind || ""}||${x?.citation || ""}`;
}
function keyComment(x) { return String(x?.text || "").trim(); }
function uniqUnion(dst, src, keyFn) {
const out = isArr(dst) ? [...dst] : [];
const seen = new Set(out.map((x) => keyFn(x)));
for (const it of (isArr(src) ? src : [])) {
const k = keyFn(it);
if (!k) continue;
if (!seen.has(k)) {
seen.add(k);
out.push(it);
}
} }
return out; return out;
} }
async function exists(p) { function deepMergeEntry(dst, src) {
try { await fs.access(p); return true; } catch { return false; } if (!isObj(dst) || !isObj(src)) return;
for (const [k, v] of Object.entries(src)) {
if (k === "media" && isArr(v)) { dst.media = uniqUnion(dst.media, v, keyMedia); continue; }
if (k === "refs" && isArr(v)) { dst.refs = uniqUnion(dst.refs, v, keyRef); continue; }
if (k === "comments_editorial" && isArr(v)) { dst.comments_editorial = uniqUnion(dst.comments_editorial, v, keyComment); continue; }
if (isObj(v)) {
if (!isObj(dst[k])) dst[k] = {};
deepMergeEntry(dst[k], v);
continue;
}
if (isArr(v)) {
const cur = isArr(dst[k]) ? dst[k] : [];
const seen = new Set(cur.map((x) => JSON.stringify(x)));
const out = [...cur];
for (const it of v) {
const s = JSON.stringify(it);
if (!seen.has(s)) { seen.add(s); out.push(it); }
}
dst[k] = out;
continue;
}
// scalar: set only if missing/empty
if (!(k in dst) || dst[k] == null || dst[k] === "") dst[k] = v;
}
} }
async function walk(dir) { async function walk(dir) {
@@ -30,111 +108,116 @@ async function walk(dir) {
const ents = await fs.readdir(dir, { withFileTypes: true }); const ents = await fs.readdir(dir, { withFileTypes: true });
for (const e of ents) { for (const e of ents) {
const p = path.join(dir, e.name); const p = path.join(dir, e.name);
if (e.isDirectory()) out.push(...(await walk(p))); if (e.isDirectory()) out.push(...await walk(p));
else out.push(p); else if (e.isFile() && /\.ya?ml$/i.test(e.name)) out.push(p);
} }
return out; return out;
} }
function inferPageKeyFromFile(inDirAbs, fileAbs) { function inferExpectedFromRel(relNoExt) {
// src/annotations/<page>.yml -> "<page>" const parts = relNoExt.split("/").filter(Boolean);
const rel = path.relative(inDirAbs, fileAbs).replace(/\\/g, "/"); const last = parts.at(-1) || "";
return rel.replace(/\.(ya?ml|json)$/i, ""); const isShard = parts.length > 1 && /^p-\d+-/i.test(last); // ✅ durcissement
const pageKey = isShard ? parts.slice(0, -1).join("/") : relNoExt;
const paraId = isShard ? last : null;
return { isShard, pageKey, paraId };
} }
function assert(cond, msg) { function validateAndNormalizeDoc(doc, relFile, expectedPageKey, expectedParaId) {
if (!cond) throw new Error(msg); assert(isObj(doc), `${relFile}: doc must be an object`);
} assert(doc.schema === 1, `${relFile}: schema must be 1`);
assert(isObj(doc.paras), `${relFile}: missing object key "paras"`);
function isPlainObject(x) { const gotPage = doc.page != null ? normPath(doc.page) : "";
return !!x && typeof x === "object" && !Array.isArray(x); const expPage = normPath(expectedPageKey);
}
function normalizePageKey(s) { if (gotPage) {
// pas de / en tête/fin
return String(s || "").replace(/^\/+/, "").replace(/\/+$/, "");
}
function validateAndNormalizeDoc(doc, pageKey, fileRel) {
assert(isPlainObject(doc), `${fileRel}: document must be an object`);
assert(doc.schema === 1, `${fileRel}: schema must be 1`);
if (doc.page != null) {
assert( assert(
normalizePageKey(doc.page) === pageKey, gotPage === expPage,
`${fileRel}: page mismatch (page="${doc.page}" vs path="${pageKey}")` `${relFile}: page mismatch (page="${doc.page}" vs path="${expectedPageKey}")`
);
} else {
doc.page = expPage;
}
if (expectedParaId) {
const keys = Object.keys(doc.paras || {}).map(String);
assert(
keys.includes(expectedParaId),
`${relFile}: shard mismatch: must contain paras["${expectedParaId}"]`
);
assert(
keys.length === 1 && keys[0] === expectedParaId,
`${relFile}: shard invariant violated: shard file must contain ONLY paras["${expectedParaId}"] (got: ${keys.join(", ")})`
); );
} }
assert(isPlainObject(doc.paras), `${fileRel}: missing object key "paras"`);
const parasOut = Object.create(null); return doc;
for (const [paraId, entry] of Object.entries(doc.paras)) {
assert(/^p-\d+-/i.test(paraId), `${fileRel}: invalid para id "${paraId}"`);
// entry peut être vide, mais doit être un objet si présent
assert(entry == null || isPlainObject(entry), `${fileRel}: paras.${paraId} must be an object`);
const e = entry ? { ...entry } : {};
// Sanity checks (non destructifs : on nécrase pas, on vérifie juste les types)
if (e.refs != null) assert(Array.isArray(e.refs), `${fileRel}: paras.${paraId}.refs must be an array`);
if (e.authors != null) assert(Array.isArray(e.authors), `${fileRel}: paras.${paraId}.authors must be an array`);
if (e.quotes != null) assert(Array.isArray(e.quotes), `${fileRel}: paras.${paraId}.quotes must be an array`);
if (e.media != null) assert(Array.isArray(e.media), `${fileRel}: paras.${paraId}.media must be an array`);
if (e.comments_editorial != null) assert(Array.isArray(e.comments_editorial), `${fileRel}: paras.${paraId}.comments_editorial must be an array`);
parasOut[paraId] = e;
}
return parasOut;
}
async function readDoc(fileAbs) {
const raw = await fs.readFile(fileAbs, "utf8");
if (/\.json$/i.test(fileAbs)) return JSON.parse(raw);
return YAML.parse(raw);
} }
async function main() { async function main() {
const { inDir, outFile } = parseArgs(process.argv.slice(2)); const pages = {};
const CWD = process.cwd(); const errors = [];
const inDirAbs = path.isAbsolute(inDir) ? inDir : path.join(CWD, inDir); await fs.mkdir(DIST_DIR, { recursive: true });
const outAbs = path.isAbsolute(outFile) ? outFile : path.join(CWD, outFile);
// antifragile const files = await walk(ANNO_ROOT);
if (!(await exists(inDirAbs))) {
console.log(` annotations-index: skip (input missing): ${inDir}`);
process.exit(0);
}
const files = (await walk(inDirAbs)).filter((p) => /\.(ya?ml|json)$/i.test(p)); for (const fp of files) {
if (!files.length) { const rel = normPath(path.relative(ANNO_ROOT, fp));
console.log(` annotations-index: skip (no .yml/.yaml/.json found in): ${inDir}`); const relNoExt = rel.replace(/\.ya?ml$/i, "");
process.exit(0); const { isShard, pageKey, paraId } = inferExpectedFromRel(relNoExt);
}
const pages = Object.create(null);
let paraCount = 0;
for (const f of files) {
const fileRel = path.relative(CWD, f).replace(/\\/g, "/");
const pageKey = normalizePageKey(inferPageKeyFromFile(inDirAbs, f));
assert(pageKey, `${fileRel}: cannot infer page key`);
let doc;
try { try {
doc = await readDoc(f); const raw = await fs.readFile(fp, "utf8");
const doc = YAML.parse(raw) || {};
if (!isObj(doc) || doc.schema !== 1) continue;
validateAndNormalizeDoc(
doc,
`src/annotations/${rel}`,
pageKey,
isShard ? paraId : null
);
const pg = (pages[pageKey] ??= { paras: {} });
if (isShard) {
const entry = doc.paras[paraId];
if (!isObj(pg.paras[paraId])) pg.paras[paraId] = {};
if (isObj(entry)) deepMergeEntry(pg.paras[paraId], entry);
stableSortByTs(pg.paras[paraId].media);
stableSortByTs(pg.paras[paraId].refs);
stableSortByTs(pg.paras[paraId].comments_editorial);
} else {
for (const [pid, entry] of Object.entries(doc.paras || {})) {
const p = String(pid);
if (!isObj(pg.paras[p])) pg.paras[p] = {};
if (isObj(entry)) deepMergeEntry(pg.paras[p], entry);
stableSortByTs(pg.paras[p].media);
stableSortByTs(pg.paras[p].refs);
stableSortByTs(pg.paras[p].comments_editorial);
}
}
} catch (e) { } catch (e) {
throw new Error(`${fileRel}: parse failed: ${String(e?.message ?? e)}`); errors.push({ file: `src/annotations/${rel}`, error: String(e?.message || e) });
} }
}
const paras = validateAndNormalizeDoc(doc, pageKey, fileRel); for (const [pageKey, pg] of Object.entries(pages)) {
const keys = Object.keys(pg.paras || {});
// 1 fichier = 1 page (canon) keys.sort((a, b) => {
assert(!pages[pageKey], `${fileRel}: duplicate page "${pageKey}" (only one file per page)`); const ia = paraNum(a);
pages[pageKey] = { paras }; const ib = paraNum(b);
paraCount += Object.keys(paras).length; if (Number.isFinite(ia) && Number.isFinite(ib) && ia !== ib) return ia - ib;
return String(a).localeCompare(String(b));
});
const next = {};
for (const k of keys) next[k] = pg.paras[k];
pg.paras = next;
} }
const out = { const out = {
@@ -143,17 +226,21 @@ async function main() {
pages, pages,
stats: { stats: {
pages: Object.keys(pages).length, pages: Object.keys(pages).length,
paras: paraCount, paras: Object.values(pages).reduce((n, p) => n + Object.keys(p.paras || {}).length, 0),
errors: errors.length,
}, },
errors,
}; };
await fs.mkdir(path.dirname(outAbs), { recursive: true }); if (errors.length) {
await fs.writeFile(outAbs, JSON.stringify(out), "utf8"); throw new Error(`${errors[0].file}: ${errors[0].error}`);
}
console.log(`✅ annotations-index: pages=${out.stats.pages} paras=${out.stats.paras} -> ${path.relative(CWD, outAbs)}`); await fs.writeFile(OUT, JSON.stringify(out), "utf8");
console.log(`✅ annotations-index: pages=${out.stats.pages} paras=${out.stats.paras} -> dist/annotations-index.json`);
} }
main().catch((e) => { main().catch((e) => {
console.error("FAIL: build-annotations-index crashed:", e); console.error(`FAIL: build-annotations-index crashed: ${e?.stack || e?.message || e}`);
process.exit(1); process.exit(1);
}); });

View File

@@ -48,6 +48,9 @@ async function main() {
let missing = 0; let missing = 0;
const notes = []; const notes = [];
// Optim: éviter de vérifier 100 fois le même fichier media
const seenMedia = new Set(); // src string
for (const f of files) { for (const f of files) {
const rel = path.relative(CWD, f).replace(/\\/g, "/"); const rel = path.relative(CWD, f).replace(/\\/g, "/");
const raw = await fs.readFile(f, "utf8"); const raw = await fs.readFile(f, "utf8");
@@ -70,6 +73,10 @@ async function main() {
const src = String(m?.src || ""); const src = String(m?.src || "");
if (!src.startsWith("/media/")) continue; // externes ok, ou autres conventions futures if (!src.startsWith("/media/")) continue; // externes ok, ou autres conventions futures
// dédupe
if (seenMedia.has(src)) continue;
seenMedia.add(src);
checked++; checked++;
const p = toPublicPathFromUrl(src); const p = toPublicPathFromUrl(src);
if (!p) continue; if (!p) continue;

View File

@@ -27,11 +27,6 @@ function escRe(s) {
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
} }
function inferPageKeyFromFile(fileAbs) {
const rel = path.relative(ANNO_DIR, fileAbs).replace(/\\/g, "/");
return rel.replace(/\.(ya?ml|json)$/i, "");
}
function normalizePageKey(s) { function normalizePageKey(s) {
return String(s || "").replace(/^\/+/, "").replace(/\/+$/, ""); return String(s || "").replace(/^\/+/, "").replace(/\/+$/, "");
} }
@@ -40,6 +35,31 @@ function isPlainObject(x) {
return !!x && typeof x === "object" && !Array.isArray(x); return !!x && typeof x === "object" && !Array.isArray(x);
} }
function isParaId(s) {
return /^p-\d+-/i.test(String(s || ""));
}
/**
* Supporte:
* - monolith: src/annotations/<pageKey>.yml -> pageKey = rel sans ext
* - shard : src/annotations/<pageKey>/<paraId>.yml -> pageKey = dirname(rel), paraId = basename
*
* shard seulement si le fichier est dans un sous-dossier (anti cas pathologique).
*/
function inferFromFile(fileAbs) {
const rel = path.relative(ANNO_DIR, fileAbs).replace(/\\/g, "/");
const relNoExt = rel.replace(/\.(ya?ml|json)$/i, "");
const parts = relNoExt.split("/").filter(Boolean);
const base = parts[parts.length - 1] || "";
const dirParts = parts.slice(0, -1);
const isShard = dirParts.length > 0 && isParaId(base);
const pageKey = isShard ? dirParts.join("/") : relNoExt;
const paraId = isShard ? base : "";
return { pageKey: normalizePageKey(pageKey), paraId };
}
async function loadAliases() { async function loadAliases() {
if (!(await exists(ALIASES_PATH))) return {}; if (!(await exists(ALIASES_PATH))) return {};
try { try {
@@ -83,7 +103,11 @@ async function main() {
const aliases = await loadAliases(); const aliases = await loadAliases();
const files = (await walk(ANNO_DIR)).filter((p) => /\.(ya?ml|json)$/i.test(p)); const files = (await walk(ANNO_DIR)).filter((p) => /\.(ya?ml|json)$/i.test(p));
let pages = 0; // perf: cache HTML par page (shards = beaucoup de fichiers pour 1 page)
const htmlCache = new Map(); // pageKey -> html
const missingDistPage = new Set(); // pageKey
let pagesSeen = new Set();
let checked = 0; let checked = 0;
let failures = 0; let failures = 0;
const notes = []; const notes = [];
@@ -107,7 +131,7 @@ async function main() {
continue; continue;
} }
const pageKey = normalizePageKey(inferPageKeyFromFile(f)); const { pageKey, paraId: shardParaId } = inferFromFile(f);
if (doc.page != null && normalizePageKey(doc.page) !== pageKey) { if (doc.page != null && normalizePageKey(doc.page) !== pageKey) {
failures++; failures++;
@@ -121,20 +145,44 @@ async function main() {
continue; continue;
} }
// shard invariant (fort) : doit contenir paras[paraId]
if (shardParaId) {
if (!Object.prototype.hasOwnProperty.call(doc.paras, shardParaId)) {
failures++;
notes.push(`- SHARD MISMATCH: ${rel} (expected paras["${shardParaId}"] present)`);
continue;
}
// si extras -> warning (non destructif)
const keys = Object.keys(doc.paras);
if (!(keys.length === 1 && keys[0] === shardParaId)) {
notes.push(`- WARN shard has extra paras: ${rel} (expected only "${shardParaId}", got ${keys.join(", ")})`);
}
}
pagesSeen.add(pageKey);
const distFile = path.join(DIST_DIR, pageKey, "index.html"); const distFile = path.join(DIST_DIR, pageKey, "index.html");
if (!(await exists(distFile))) { if (!(await exists(distFile))) {
failures++; if (!missingDistPage.has(pageKey)) {
notes.push(`- MISSING PAGE: dist/${pageKey}/index.html (from ${rel})`); missingDistPage.add(pageKey);
failures++;
notes.push(`- MISSING PAGE: dist/${pageKey}/index.html (from ${rel})`);
} else {
notes.push(`- WARN missing page already reported: dist/${pageKey}/index.html (from ${rel})`);
}
continue; continue;
} }
pages++; let html = htmlCache.get(pageKey);
const html = await fs.readFile(distFile, "utf8"); if (!html) {
html = await fs.readFile(distFile, "utf8");
htmlCache.set(pageKey, html);
}
for (const paraId of Object.keys(doc.paras)) { for (const paraId of Object.keys(doc.paras)) {
checked++; checked++;
if (!/^p-\d+-/i.test(paraId)) { if (!isParaId(paraId)) {
failures++; failures++;
notes.push(`- INVALID ID: ${rel} (${paraId})`); notes.push(`- INVALID ID: ${rel} (${paraId})`);
continue; continue;
@@ -158,6 +206,7 @@ async function main() {
} }
const warns = notes.filter((x) => x.startsWith("- WARN")); const warns = notes.filter((x) => x.startsWith("- WARN"));
const pages = pagesSeen.size;
if (failures > 0) { if (failures > 0) {
console.error(`FAIL: annotations invalid (pages=${pages} checked=${checked} failures=${failures})`); console.error(`FAIL: annotations invalid (pages=${pages} checked=${checked} failures=${failures})`);

View File

@@ -57,25 +57,24 @@ function deepMergeEntry(dst: any, src: any) {
if (k === "comments_editorial" && isArr(v)) { dst.comments_editorial = uniqUnion(dst.comments_editorial, v, keyComment); continue; } if (k === "comments_editorial" && isArr(v)) { dst.comments_editorial = uniqUnion(dst.comments_editorial, v, keyComment); continue; }
if (isObj(v)) { if (isObj(v)) {
if (!isObj(dst[k])) dst[k] = {}; if (!isObj((dst as any)[k])) (dst as any)[k] = {};
deepMergeEntry(dst[k], v); deepMergeEntry((dst as any)[k], v);
continue; continue;
} }
if (isArr(v)) { if (isArr(v)) {
const cur = isArr(dst[k]) ? dst[k] : []; const cur = isArr((dst as any)[k]) ? (dst as any)[k] : [];
const seen = new Set(cur.map((x:any) => JSON.stringify(x))); const seen = new Set(cur.map((x:any) => JSON.stringify(x)));
const out = [...cur]; const out = [...cur];
for (const it of v) { for (const it of v) {
const s = JSON.stringify(it); const s = JSON.stringify(it);
if (!seen.has(s)) { seen.add(s); out.push(it); } if (!seen.has(s)) { seen.add(s); out.push(it); }
} }
dst[k] = out; (dst as any)[k] = out;
continue; continue;
} }
// scalar: set only if missing/empty if (!(k in (dst as any)) || (dst as any)[k] == null || (dst as any)[k] === "") (dst as any)[k] = v;
if (!(k in dst) || dst[k] == null || dst[k] === "") dst[k] = v;
} }
} }
@@ -93,7 +92,7 @@ async function walk(dir: string): Promise<string[]> {
function inferExpected(relNoExt: string) { function inferExpected(relNoExt: string) {
const parts = relNoExt.split("/").filter(Boolean); const parts = relNoExt.split("/").filter(Boolean);
const last = parts.at(-1) || ""; const last = parts.at(-1) || "";
const isShard = /^p-\d+-/i.test(last); const isShard = parts.length > 1 && /^p-\d+-/i.test(last); // ✅ durcissement
const pageKey = isShard ? parts.slice(0, -1).join("/") : relNoExt; const pageKey = isShard ? parts.slice(0, -1).join("/") : relNoExt;
const paraId = isShard ? last : null; const paraId = isShard ? last : null;
return { isShard, pageKey, paraId }; return { isShard, pageKey, paraId };
@@ -136,6 +135,12 @@ export const GET: APIRoute = async () => {
if (!(paraId in doc.paras)) { if (!(paraId in doc.paras)) {
throw new Error(`shard mismatch: file must contain paras["${paraId}"]`); throw new Error(`shard mismatch: file must contain paras["${paraId}"]`);
} }
// ✅ invariant aligné avec build-annotations-index
const keys = Object.keys(doc.paras).map(String);
if (!(keys.length === 1 && keys[0] === paraId)) {
throw new Error(`shard invariant violated: shard must contain ONLY paras["${paraId}"] (got: ${keys.join(", ")})`);
}
const entry = doc.paras[paraId]; const entry = doc.paras[paraId];
if (!isObj(pg.paras[paraId])) pg.paras[paraId] = {}; if (!isObj(pg.paras[paraId])) pg.paras[paraId] = {};
if (isObj(entry)) deepMergeEntry(pg.paras[paraId], entry); if (isObj(entry)) deepMergeEntry(pg.paras[paraId], entry);
@@ -159,8 +164,7 @@ export const GET: APIRoute = async () => {
} }
} }
// sort paras for (const [pk, pg] of Object.entries(pages)) {
for (const [pageKey, pg] of Object.entries(pages)) {
const keys = Object.keys(pg.paras || {}); const keys = Object.keys(pg.paras || {});
keys.sort((a, b) => { keys.sort((a, b) => {
const ia = paraNum(a); const ia = paraNum(a);
@@ -185,7 +189,6 @@ export const GET: APIRoute = async () => {
errors, errors,
}; };
// 🔥 comportement “pro CI” : si erreurs => build fail
if (errors.length) { if (errors.length) {
throw new Error(`${errors[0].file}: ${errors[0].error}`); throw new Error(`${errors[0].file}: ${errors[0].error}`);
} }