Files
archicratie-edition/scripts/check-anchors.mjs
Archicratia c07028c052
All checks were successful
SMOKE / smoke (push) Successful in 11s
CI / build-and-anchors (push) Successful in 46s
CI / build-and-anchors (pull_request) Successful in 45s
Add manifest page and refine editorial landing pages
2026-05-05 23:30:18 +02:00

257 lines
7.8 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
import fs from "node:fs/promises";
import fssync from "node:fs";
import path from "node:path";
const args = new Set(process.argv.slice(2));
const getArg = (name, fallback = null) => {
const i = process.argv.indexOf(name);
if (i >= 0 && process.argv[i + 1]) return process.argv[i + 1];
return fallback;
};
const DIST_DIR = getArg("--dist", "dist");
const BASELINE = getArg("--baseline", path.join("tests", "anchors-baseline.json"));
const UPDATE = args.has("--update");
const ACCEPT_GLOSSARY_RESETS =
process.env.ACCEPT_GLOSSARY_ANCHOR_RESETS === "1";
// Ex: 0.2 => 20%
const THRESHOLD = Number(getArg("--threshold", process.env.ANCHORS_THRESHOLD ?? "0.2"));
const MIN_PREV = Number(getArg("--min-prev", process.env.ANCHORS_MIN_PREV ?? "10"));
const pct = (x) => (Math.round(x * 1000) / 10).toFixed(1) + "%";
async function walk(dir) {
const out = [];
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const ent of entries) {
const p = path.join(dir, ent.name);
if (ent.isDirectory()) out.push(...(await walk(p)));
else if (ent.isFile() && ent.name.endsWith(".html")) out.push(p);
}
return out;
}
// Contrat :
// - paragraphes citables : .reading p[id^="p-"]
// - alias web-natifs : .reading span.para-alias[id^="p-"]
function extractIds(html) {
if (!html.includes('class="reading"')) return [];
const ids = [];
let m;
// 1) IDs principaux (paragraphes)
const reP = /<p\b[^>]*\sid="(p-[^"]+)"/g;
while ((m = reP.exec(html))) ids.push(m[1]);
// 2) IDs alias (spans injectés)
// cas A : id="..." avant class="...para-alias..."
const reA1 = /<span\b[^>]*\bid="(p-[^"]+)"[^>]*\bclass="[^"]*\bpara-alias\b[^"]*"/g;
while ((m = reA1.exec(html))) ids.push(m[1]);
// cas B : class="...para-alias..." avant id="..."
const reA2 = /<span\b[^>]*\bclass="[^"]*\bpara-alias\b[^"]*"[^>]*\bid="(p-[^"]+)"/g;
while ((m = reA2.exec(html))) ids.push(m[1]);
// Dé-doublonnage (on garde un ordre stable)
const seen = new Set();
const uniq = [];
for (const id of ids) {
if (seen.has(id)) continue;
seen.add(id);
uniq.push(id);
}
return uniq;
}
function loadAllowMissing() {
const p = path.resolve("scripts/anchors-allow-missing.json");
if (!fssync.existsSync(p)) return new Set();
const raw = fssync.readFileSync(p, "utf8").trim();
if (!raw) return new Set();
const arr = JSON.parse(raw);
if (!Array.isArray(arr)) throw new Error("anchors-allow-missing.json must be an array");
return new Set(arr.map(String));
}
function loadAnchorChurnAllowlist() {
const p = path.resolve("config/anchor-churn-allowlist.json");
if (!fssync.existsSync(p)) return { acceptedResets: {}, acceptedPrefixes: {} };
const raw = fssync.readFileSync(p, "utf8").trim();
if (!raw) return { acceptedResets: {}, acceptedPrefixes: {} };
const data = JSON.parse(raw);
if (!data || typeof data !== "object" || Array.isArray(data)) {
throw new Error("anchor-churn-allowlist.json must be an object");
}
const acceptedResets = data.accepted_resets || {};
if (!acceptedResets || typeof acceptedResets !== "object" || Array.isArray(acceptedResets)) {
throw new Error("anchor-churn-allowlist.json: accepted_resets must be an object");
}
const acceptedPrefixes = data.accepted_prefixes || {};
if (!acceptedPrefixes || typeof acceptedPrefixes !== "object" || Array.isArray(acceptedPrefixes)) {
throw new Error("anchor-churn-allowlist.json: accepted_prefixes must be an object");
}
return { acceptedResets, acceptedPrefixes };
}
function acceptedResetReasonForPage(page) {
if (ACCEPTED_RESETS[page]) return ACCEPTED_RESETS[page];
for (const [prefix, reason] of Object.entries(ACCEPTED_PREFIXES)) {
if (page.startsWith(prefix)) return reason;
}
return null;
}
const ALLOW_MISSING = loadAllowMissing();
const { acceptedResets: ACCEPTED_RESETS, acceptedPrefixes: ACCEPTED_PREFIXES } =
loadAnchorChurnAllowlist();
async function buildSnapshot() {
const absDist = path.resolve(DIST_DIR);
const files = await walk(absDist);
const snap = {};
for (const f of files) {
const rel = path.relative(absDist, f).replace(/\\/g, "/");
const html = await fs.readFile(f, "utf8");
const ids = extractIds(html);
if (ids.length === 0) continue;
snap[rel] = ids;
}
// ordre stable
const ordered = {};
for (const k of Object.keys(snap).sort()) ordered[k] = snap[k];
return ordered;
}
async function readJson(p) {
const s = await fs.readFile(p, "utf8");
return JSON.parse(s);
}
async function writeJson(p, obj) {
await fs.mkdir(path.dirname(p), { recursive: true });
await fs.writeFile(p, JSON.stringify(obj, null, 2) + "\n", "utf8");
}
function diffPage(prevIds, curIds) {
const prev = new Set(prevIds);
const cur = new Set(curIds);
const added = curIds.filter((x) => !prev.has(x));
const removed = prevIds.filter((x) => !cur.has(x));
return { added, removed };
}
(async () => {
const snap = await buildSnapshot();
if (UPDATE) {
await writeJson(BASELINE, snap);
const pages = Object.keys(snap).length;
const total = Object.values(snap).reduce((a, xs) => a + xs.length, 0);
console.log(`OK baseline updated -> ${BASELINE}`);
console.log(`Pages: ${pages}, Total paragraph IDs: ${total}`);
process.exit(0);
}
let base;
try {
base = await readJson(BASELINE);
} catch {
console.error(`Baseline missing: ${BASELINE}`);
console.error(`Run: node scripts/check-anchors.mjs --update`);
process.exit(2);
}
const allPages = new Set([...Object.keys(base), ...Object.keys(snap)]);
const pages = Array.from(allPages).sort();
let failed = false;
let changedPages = 0;
let acceptedPages = 0;
for (const p of pages) {
const prevIds = base[p] || null;
const curIds = snap[p] || null;
// page nouvelle
if (!prevIds && curIds) {
console.log(`+ PAGE ${p} (new) ids=${curIds.length}`);
continue;
}
// page supprimée ou sortie volontairement du contrat dancres
if (prevIds && !curIds) {
const acceptedReason = acceptedResetReasonForPage(p);
if (ALLOW_MISSING.has(p)) {
console.log(`~ PAGE ${p} (missing now) ✅ allowed prevIds=${prevIds.length}`);
continue;
}
if (acceptedReason) {
acceptedPages += 1;
console.log(`- PAGE ${p} (missing now) prevIds=${prevIds.length}`);
console.log(` ✅ accepted reset: ${acceptedReason}`);
continue;
}
console.log(`- PAGE ${p} (missing now) prevIds=${prevIds.length}`);
failed = true;
continue;
}
// (sécurité) rien avant / rien maintenant
if (!prevIds && !curIds) continue;
const { added, removed } = diffPage(prevIds, curIds);
if (added.length === 0 && removed.length === 0) continue;
changedPages += 1;
const prevN = prevIds.length || 1;
const churn = (added.length + removed.length) / prevN;
const removedRatio = removed.length / prevN;
const acceptedReason = acceptedResetReasonForPage(p);
console.log(
`~ ${p} prev=${prevIds.length} now=${curIds.length}` +
` +${added.length} -${removed.length} churn=${pct(churn)}`
);
if (removed.length) {
console.log(` removed: ${removed.slice(0, 20).join(", ")}${removed.length > 20 ? " …" : ""}`);
}
const exceeds =
(prevIds.length >= MIN_PREV && churn > THRESHOLD) ||
(prevIds.length >= MIN_PREV && removedRatio > THRESHOLD);
if (exceeds && acceptedReason) {
acceptedPages += 1;
console.log(` ✅ accepted reset: ${acceptedReason}`);
continue;
}
if (exceeds) failed = true;
}
console.log(
`\nSummary: pages compared=${pages.length}, pages changed=${changedPages}, accepted resets=${acceptedPages}`
);
if (failed) {
console.error(`FAIL: anchor churn above threshold (threshold=${pct(THRESHOLD)} minPrev=${MIN_PREV})`);
process.exit(1);
}
console.log("OK: anchors stable within threshold");
})();