anno: robust verify for para-index (normalize page keys)
Some checks failed
SMOKE / smoke (push) Successful in 33s
CI / build-and-anchors (push) Failing after 1m53s

This commit is contained in:
2026-02-27 10:20:49 +01:00
parent dcf1fc2d0b
commit b6e04a9138

View File

@@ -1,12 +1,17 @@
#!/usr/bin/env node
// scripts/apply-annotation-ticket.mjs
//
// Applique un ticket Gitea "type/media | type/reference | type/comment" vers:
//
// ✅ src/annotations/<oeuvre>/<chapitre>/<paraId>.yml (sharding par paragraphe)
// ✅ src/annotations/<oeuvre>/<chapitre>/<paraId>.yml (sharding par paragraphe)
// ✅ public/media/<oeuvre>/<chapitre>/<paraId>/<file>
//
// Robuste, idempotent, non destructif.
// Compat rétro : lit (si présent) l'ancien monolithe:
// src/annotations/<oeuvre>/<chapitre>.yml
// et deep-merge NON destructif dans le shard lors d'une nouvelle application,
// pour permettre une migration progressive sans perte.
//
// Robuste, idempotent, non destructif.
// DRY RUN si --dry-run
// Options: --dry-run --no-download --verify --strict --commit --close
//
@@ -49,8 +54,8 @@ Flags:
--dry-run : n'écrit rien (affiche un aperçu)
--no-download : n'essaie pas de télécharger les pièces jointes (media)
--verify : vérifie que (page, ancre) existent (dist/para-index.json si dispo, sinon baseline)
--strict : refuse si URL ref invalide (http/https) OU caption media vide
--commit : git add + git commit (le script commit dans la branche courante)
--strict : refuse si URL ref invalide (http/https) OU caption media vide OU verify impossible
--commit : git add + git commit (commit dans la branche courante)
--close : ferme le ticket (nécessite --commit)
Env requis:
@@ -191,6 +196,7 @@ function normalizeChemin(chemin) {
}
function normalizePageKeyFromChemin(chemin) {
// ex: /archicrat-ia/chapitre-4/ => archicrat-ia/chapitre-4
return normalizeChemin(chemin).replace(/^\/+|\/+$/g, "");
}
@@ -226,90 +232,156 @@ function isHttpUrl(u) {
}
}
/* ------------------------------ para-index (verify + sort) ------------------------------ */
function stableSortByTs(arr) {
if (!Array.isArray(arr)) return;
arr.sort((a, b) => {
const ta = Date.parse(a?.ts || "") || 0;
const tb = Date.parse(b?.ts || "") || 0;
if (ta !== tb) return ta - tb;
return JSON.stringify(a).localeCompare(JSON.stringify(b));
});
}
function normPage(s) {
let x = String(s || "").trim();
if (!x) return "";
// retire origin si on a une URL complète
x = x.replace(/^https?:\/\/[^/]+/i, "");
// enlève query/hash
x = x.split("#")[0].split("?")[0];
// enlève index.html
x = x.replace(/index\.html$/i, "");
// enlève slashs de bord
x = x.replace(/^\/+/, "").replace(/\/+$/, "");
return x;
}
/* ------------------------------ para-index (verify + order) ------------------------------ */
async function loadParaOrderFromDist(pageKey) {
const distIdx = path.join(CWD, "dist", "para-index.json");
if (!(await exists(distIdx))) return null;
let j;
try {
j = JSON.parse(await fs.readFile(distIdx, "utf8"));
} catch {
const distIdx = path.join(CWD, "dist", "para-index.json");
if (!(await exists(distIdx))) return null;
let j;
try {
j = JSON.parse(await fs.readFile(distIdx, "utf8"));
} catch {
return null;
}
const want = normPage(pageKey);
// Support A) { items:[{id,page,...}, ...] } (ou variantes)
const items = Array.isArray(j?.items)
? j.items
: Array.isArray(j?.index?.items)
? j.index.items
: null;
if (items) {
const ids = [];
for (const it of items) {
// page peut être dans plein de clés différentes
const pageCand = normPage(
it?.page ??
it?.pageKey ??
it?.path ??
it?.route ??
it?.href ??
it?.url ??
""
);
// id peut être dans plein de clés différentes
let id = String(it?.id ?? it?.paraId ?? it?.anchorId ?? it?.anchor ?? "");
if (id.startsWith("#")) id = id.slice(1);
if (pageCand === want && id) ids.push(id);
}
if (ids.length) return ids;
}
// Support B) { byId: { "p-...": { page:"...", ... }, ... } }
if (j?.byId && typeof j.byId === "object") {
const ids = Object.keys(j.byId)
.filter((id) => {
const meta = j.byId[id] || {};
const pageCand = normPage(meta.page ?? meta.pageKey ?? meta.path ?? meta.route ?? meta.url ?? "");
return pageCand === want;
});
if (ids.length) {
ids.sort((a, b) => {
const ia = paraIndexFromId(a);
const ib = paraIndexFromId(b);
if (Number.isFinite(ia) && Number.isFinite(ib) && ia !== ib) return ia - ib;
return String(a).localeCompare(String(b));
});
return ids;
}
}
// Support C) { pages: { "archicrat-ia/chapitre-4": { ids:[...] } } } (ou variantes)
if (j?.pages && typeof j.pages === "object") {
// essaie de trouver la bonne clé même si elle est /.../ ou .../index.html
const keys = Object.keys(j.pages);
const hit = keys.find((k) => normPage(k) === want);
if (hit) {
const pg = j.pages[hit];
if (Array.isArray(pg?.ids)) return pg.ids.map(String);
if (Array.isArray(pg?.paras)) return pg.paras.map(String);
}
}
return null;
}
// Support several shapes:
// A) { items:[{id,page,...}, ...] }
if (Array.isArray(j?.items)) {
const ids = [];
for (const it of j.items) {
const p = String(it?.page || it?.pageKey || "");
const id = String(it?.id || it?.paraId || "");
if (p === pageKey && id) ids.push(id);
}
if (ids.length) return ids;
}
// B) { byId: { "p-...": { page:"archicrat-ia/chapitre-4", ... }, ... } }
if (j?.byId && typeof j.byId === "object") {
// cannot rebuild full order; but can verify existence
// return a pseudo-order map from known ids sorted by p-<n>- then alpha
const ids = Object.keys(j.byId).filter((id) => String(j.byId[id]?.page || "") === pageKey);
if (ids.length) {
ids.sort((a, b) => {
const ia = paraIndexFromId(a);
const ib = paraIndexFromId(b);
if (Number.isFinite(ia) && Number.isFinite(ib) && ia !== ib) return ia - ib;
return String(a).localeCompare(String(b));
});
return ids;
}
}
// C) { pages: { "archicrat-ia/chapitre-4": { ids:[...]} } }
if (j?.pages && typeof j.pages === "object") {
const pg = j.pages[pageKey];
if (Array.isArray(pg?.ids)) return pg.ids.map(String);
if (Array.isArray(pg?.paras)) return pg.paras.map(String);
}
return null;
}
async function tryVerifyAnchor(pageKey, anchorId) {
// 1) dist/para-index.json
const order = await loadParaOrderFromDist(pageKey);
if (order) return order.includes(anchorId);
// 2) tests/anchors-baseline.json (fallback)
const base = path.join(CWD, "tests", "anchors-baseline.json");
if (await exists(base)) {
try {
const j = JSON.parse(await fs.readFile(base, "utf8"));
const candidates = [];
if (j?.pages && typeof j.pages === "object") {
for (const [k, v] of Object.entries(j.pages)) {
if (!Array.isArray(v)) continue;
if (String(k).includes(pageKey)) candidates.push(...v);
async function tryVerifyAnchor(pageKey, anchorId) {
// 1) dist/para-index.json : order complet si possible
const order = await loadParaOrderFromDist(pageKey);
if (order) return order.includes(anchorId);
// 1bis) dist/para-index.json : fallback “best effort” => recherche brute (IDs quasi uniques)
const distIdx = path.join(CWD, "dist", "para-index.json");
if (await exists(distIdx)) {
try {
const raw = await fs.readFile(distIdx, "utf8");
if (raw.includes(`"${anchorId}"`) || raw.includes(`"#${anchorId}"`)) {
return true;
}
} catch {
// ignore
}
if (Array.isArray(j?.entries)) {
for (const it of j.entries) {
const p = String(it?.page || "");
const ids = it?.ids;
if (Array.isArray(ids) && p.includes(pageKey)) candidates.push(...ids);
}
}
if (candidates.length) return candidates.some((x) => String(x) === anchorId);
} catch {
// ignore
}
// 2) tests/anchors-baseline.json (fallback)
const base = path.join(CWD, "tests", "anchors-baseline.json");
if (await exists(base)) {
try {
const j = JSON.parse(await fs.readFile(base, "utf8"));
const candidates = [];
if (j?.pages && typeof j.pages === "object") {
for (const [k, v] of Object.entries(j.pages)) {
if (!Array.isArray(v)) continue;
if (normPage(k).includes(normPage(pageKey))) candidates.push(...v);
}
}
if (Array.isArray(j?.entries)) {
for (const it of j.entries) {
const p = String(it?.page || "");
const ids = it?.ids;
if (Array.isArray(ids) && normPage(p).includes(normPage(pageKey))) candidates.push(...ids);
}
}
if (candidates.length) return candidates.some((x) => String(x) === anchorId);
} catch {
// ignore
}
}
return null; // cannot verify
}
return null; // cannot verify
}
/* ----------------------------- deep merge helpers (non destructive) ----------------------------- */
function keyMedia(x) {
@@ -360,7 +432,6 @@ function deepMergeEntry(dst, src) {
}
if (Array.isArray(v)) {
// fallback: union by JSON string
const cur = Array.isArray(dst[k]) ? dst[k] : [];
const seen = new Set(cur.map((x) => JSON.stringify(x)));
const out = [...cur];
@@ -382,16 +453,6 @@ function deepMergeEntry(dst, src) {
}
}
function stableSortByTs(arr) {
if (!Array.isArray(arr)) return;
arr.sort((a, b) => {
const ta = Date.parse(a?.ts || "") || 0;
const tb = Date.parse(b?.ts || "") || 0;
if (ta !== tb) return ta - tb;
return JSON.stringify(a).localeCompare(JSON.stringify(b));
});
}
/* ----------------------------- annotations I/O ----------------------------- */
async function loadAnnoDocYaml(fileAbs, pageKey) {
@@ -424,9 +485,7 @@ async function loadAnnoDocYaml(fileAbs, pageKey) {
function sortParasObject(paras, order) {
const keys = Object.keys(paras || {});
const idx = new Map();
if (Array.isArray(order)) {
order.forEach((id, i) => idx.set(String(id), i));
}
if (Array.isArray(order)) order.forEach((id, i) => idx.set(String(id), i));
keys.sort((a, b) => {
const ha = idx.has(a);
@@ -448,9 +507,9 @@ function sortParasObject(paras, order) {
async function saveAnnoDocYaml(fileAbs, doc, order = null) {
await fs.mkdir(path.dirname(fileAbs), { recursive: true });
doc.paras = sortParasObject(doc.paras, order);
// also sort known lists inside each para for stable diffs
for (const e of Object.values(doc.paras || {})) {
if (!isPlainObject(e)) continue;
stableSortByTs(e.media);
@@ -632,7 +691,6 @@ async function main() {
const pageKey = normalizePageKeyFromChemin(chemin);
assert(pageKey, "Ticket: impossible de dériver pageKey.", 2);
// para order (used for verify + sorting)
const paraOrder = DO_VERIFY ? await loadParaOrderFromDist(pageKey) : null;
if (DO_VERIFY) {
@@ -641,46 +699,43 @@ async function main() {
throw Object.assign(new Error(`Ticket verify: ancre introuvable pour page "${pageKey}" => ${ancre}`), { __exitCode: 2 });
}
if (ok === null) {
if (STRICT) throw Object.assign(new Error(`Ticket verify (strict): impossible de vérifier (pas de dist/para-index.json ou baseline)`), { __exitCode: 2 });
if (STRICT) {
throw Object.assign(
new Error(`Ticket verify (strict): impossible de vérifier (pas de dist/para-index.json ou baseline)`),
{ __exitCode: 2 }
);
}
console.warn("⚠️ verify: impossible de vérifier (pas de dist/para-index.json ou baseline) — on continue.");
}
}
// ✅ SHARD FILE: src/annotations/<pageKey>/<paraId>.yml
const annoShardFileAbs = path.join(ANNO_DIR, pageKey, `${ancre}.yml`);
const annoShardFileRel = path.relative(CWD, annoShardFileAbs).replace(/\\/g, "/");
// ✅ shard path: src/annotations/<pageKey>/<paraId>.yml
const shardAbs = path.join(ANNO_DIR, ...pageKey.split("/"), `${ancre}.yml`);
const shardRel = path.relative(CWD, shardAbs).replace(/\\/g, "/");
// legacy (read-only, used as base to avoid losing previously stored data)
const annoLegacyFileAbs = path.join(ANNO_DIR, `${pageKey}.yml`);
// legacy monolith: src/annotations/<pageKey>.yml (read-only, for migration)
const legacyAbs = path.join(ANNO_DIR, `${pageKey}.yml`);
console.log("✅ Parsed:", {
type,
chemin,
ancre: `#${ancre}`,
pageKey,
annoFile: annoShardFileRel,
});
console.log("✅ Parsed:", { type, chemin, ancre: `#${ancre}`, pageKey, annoFile: shardRel });
// load shard doc
const doc = await loadAnnoDocYaml(annoShardFileAbs, pageKey);
// merge legacy para into shard as base (non destructive)
if (await exists(annoLegacyFileAbs)) {
try {
const legacy = await loadAnnoDocYaml(annoLegacyFileAbs, pageKey);
const legacyEntry = legacy?.paras?.[ancre];
if (isPlainObject(legacyEntry)) {
if (!isPlainObject(doc.paras[ancre])) doc.paras[ancre] = {};
deepMergeEntry(doc.paras[ancre], legacyEntry);
}
} catch {
// ignore legacy parse issues (shard still works)
}
}
const doc = await loadAnnoDocYaml(shardAbs, pageKey);
if (!isPlainObject(doc.paras[ancre])) doc.paras[ancre] = {};
const entry = doc.paras[ancre];
// merge legacy entry into shard in-memory (non destructive) to keep compat + enable progressive migration
if (await exists(legacyAbs)) {
try {
const legacy = await loadAnnoDocYaml(legacyAbs, pageKey);
const legacyEntry = legacy?.paras?.[ancre];
if (isPlainObject(legacyEntry)) {
deepMergeEntry(entry, legacyEntry);
}
} catch {
// ignore legacy parse issues; shard still applies new data
}
}
const touchedFiles = [];
const notes = [];
let changed = false;
@@ -696,10 +751,13 @@ async function main() {
const before = entry.comments_editorial.length;
entry.comments_editorial = uniqUnion(entry.comments_editorial, [item], keyComment);
changed = changed || entry.comments_editorial.length !== before;
if (entry.comments_editorial.length !== before) {
changed = true;
notes.push(`+ comment added (len=${text.length})`);
} else {
notes.push(`~ comment already present (dedup)`);
}
stableSortByTs(entry.comments_editorial);
notes.push(changed ? `+ comment added (len=${text.length})` : `~ comment already present (dedup)`);
}
else if (type === "type/reference") {
@@ -722,15 +780,24 @@ async function main() {
const before = entry.refs.length;
entry.refs = uniqUnion(entry.refs, [item], keyRef);
changed = changed || entry.refs.length !== before;
if (entry.refs.length !== before) {
changed = true;
notes.push(`+ reference added (${item.url ? "url" : "label"})`);
} else {
notes.push(`~ reference already present (dedup)`);
}
stableSortByTs(entry.refs);
notes.push(changed ? `+ reference added (${item.url ? "url" : "label"})` : `~ reference already present (dedup)`);
}
else if (type === "type/media") {
if (!Array.isArray(entry.media)) entry.media = [];
const caption = (title || "").trim();
if (STRICT && !caption) {
throw Object.assign(new Error("Ticket media (strict): caption vide (titre de ticket requis)."), { __exitCode: 2 });
}
const captionFinal = caption || ".";
const atts = NO_DOWNLOAD ? [] : await fetchIssueAssets({ forgeApiBase, owner, repo, token, issueNum });
if (!atts.length) notes.push("! no assets found (nothing to download).");
@@ -739,13 +806,7 @@ async function main() {
const dl = a?.browser_download_url || a?.download_url || "";
if (!dl) { notes.push(`! asset missing download url: ${name}`); continue; }
const caption = (title || "").trim();
if (STRICT && !caption) {
throw Object.assign(new Error("Ticket media (strict): caption vide (titre de ticket requis)."), { __exitCode: 2 });
}
const captionFinal = caption || ".";
const mediaDirAbs = path.join(PUBLIC_DIR, "media", pageKey, ancre);
const mediaDirAbs = path.join(PUBLIC_DIR, "media", ...pageKey.split("/"), ancre);
const destAbs = path.join(mediaDirAbs, name);
const urlPath = `${MEDIA_URL_ROOT}/${pageKey}/${ancre}/${name}`.replace(/\/{2,}/g, "/");
@@ -790,7 +851,7 @@ async function main() {
if (DRY_RUN) {
console.log("\n--- DRY RUN (no write) ---");
console.log(`Would update: ${annoShardFileRel}`);
console.log(`Would update: ${shardRel}`);
for (const n of notes) console.log(" ", n);
console.log("\nExcerpt (resulting entry):");
console.log(YAML.stringify({ [ancre]: doc.paras[ancre] }).trimEnd());
@@ -798,10 +859,10 @@ async function main() {
return;
}
await saveAnnoDocYaml(annoShardFileAbs, doc, paraOrder);
touchedFiles.unshift(annoShardFileRel);
await saveAnnoDocYaml(shardAbs, doc, paraOrder);
touchedFiles.unshift(shardRel);
console.log(`✅ Updated: ${annoShardFileRel}`);
console.log(`✅ Updated: ${shardRel}`);
for (const n of notes) console.log(" ", n);
if (DO_COMMIT) {