257 lines
7.8 KiB
JavaScript
257 lines
7.8 KiB
JavaScript
#!/usr/bin/env node
|
||
import fs from "node:fs/promises";
|
||
import fssync from "node:fs";
|
||
import path from "node:path";
|
||
|
||
const args = new Set(process.argv.slice(2));
|
||
const getArg = (name, fallback = null) => {
|
||
const i = process.argv.indexOf(name);
|
||
if (i >= 0 && process.argv[i + 1]) return process.argv[i + 1];
|
||
return fallback;
|
||
};
|
||
|
||
const DIST_DIR = getArg("--dist", "dist");
|
||
const BASELINE = getArg("--baseline", path.join("tests", "anchors-baseline.json"));
|
||
const UPDATE = args.has("--update");
|
||
|
||
const ACCEPT_GLOSSARY_RESETS =
|
||
process.env.ACCEPT_GLOSSARY_ANCHOR_RESETS === "1";
|
||
|
||
// Ex: 0.2 => 20%
|
||
const THRESHOLD = Number(getArg("--threshold", process.env.ANCHORS_THRESHOLD ?? "0.2"));
|
||
const MIN_PREV = Number(getArg("--min-prev", process.env.ANCHORS_MIN_PREV ?? "10"));
|
||
|
||
const pct = (x) => (Math.round(x * 1000) / 10).toFixed(1) + "%";
|
||
|
||
async function walk(dir) {
|
||
const out = [];
|
||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||
for (const ent of entries) {
|
||
const p = path.join(dir, ent.name);
|
||
if (ent.isDirectory()) out.push(...(await walk(p)));
|
||
else if (ent.isFile() && ent.name.endsWith(".html")) out.push(p);
|
||
}
|
||
return out;
|
||
}
|
||
|
||
// Contrat :
|
||
// - paragraphes citables : .reading p[id^="p-"]
|
||
// - alias web-natifs : .reading span.para-alias[id^="p-"]
|
||
function extractIds(html) {
|
||
if (!html.includes('class="reading"')) return [];
|
||
|
||
const ids = [];
|
||
let m;
|
||
|
||
// 1) IDs principaux (paragraphes)
|
||
const reP = /<p\b[^>]*\sid="(p-[^"]+)"/g;
|
||
while ((m = reP.exec(html))) ids.push(m[1]);
|
||
|
||
// 2) IDs alias (spans injectés)
|
||
// cas A : id="..." avant class="...para-alias..."
|
||
const reA1 = /<span\b[^>]*\bid="(p-[^"]+)"[^>]*\bclass="[^"]*\bpara-alias\b[^"]*"/g;
|
||
while ((m = reA1.exec(html))) ids.push(m[1]);
|
||
|
||
// cas B : class="...para-alias..." avant id="..."
|
||
const reA2 = /<span\b[^>]*\bclass="[^"]*\bpara-alias\b[^"]*"[^>]*\bid="(p-[^"]+)"/g;
|
||
while ((m = reA2.exec(html))) ids.push(m[1]);
|
||
|
||
// Dé-doublonnage (on garde un ordre stable)
|
||
const seen = new Set();
|
||
const uniq = [];
|
||
for (const id of ids) {
|
||
if (seen.has(id)) continue;
|
||
seen.add(id);
|
||
uniq.push(id);
|
||
}
|
||
return uniq;
|
||
}
|
||
|
||
function loadAllowMissing() {
|
||
const p = path.resolve("scripts/anchors-allow-missing.json");
|
||
if (!fssync.existsSync(p)) return new Set();
|
||
const raw = fssync.readFileSync(p, "utf8").trim();
|
||
if (!raw) return new Set();
|
||
const arr = JSON.parse(raw);
|
||
if (!Array.isArray(arr)) throw new Error("anchors-allow-missing.json must be an array");
|
||
return new Set(arr.map(String));
|
||
}
|
||
|
||
function loadAnchorChurnAllowlist() {
|
||
const p = path.resolve("config/anchor-churn-allowlist.json");
|
||
if (!fssync.existsSync(p)) return { acceptedResets: {}, acceptedPrefixes: {} };
|
||
const raw = fssync.readFileSync(p, "utf8").trim();
|
||
if (!raw) return { acceptedResets: {}, acceptedPrefixes: {} };
|
||
const data = JSON.parse(raw);
|
||
if (!data || typeof data !== "object" || Array.isArray(data)) {
|
||
throw new Error("anchor-churn-allowlist.json must be an object");
|
||
}
|
||
|
||
const acceptedResets = data.accepted_resets || {};
|
||
if (!acceptedResets || typeof acceptedResets !== "object" || Array.isArray(acceptedResets)) {
|
||
throw new Error("anchor-churn-allowlist.json: accepted_resets must be an object");
|
||
}
|
||
|
||
const acceptedPrefixes = data.accepted_prefixes || {};
|
||
if (!acceptedPrefixes || typeof acceptedPrefixes !== "object" || Array.isArray(acceptedPrefixes)) {
|
||
throw new Error("anchor-churn-allowlist.json: accepted_prefixes must be an object");
|
||
}
|
||
|
||
return { acceptedResets, acceptedPrefixes };
|
||
}
|
||
|
||
function acceptedResetReasonForPage(page) {
|
||
if (ACCEPTED_RESETS[page]) return ACCEPTED_RESETS[page];
|
||
|
||
for (const [prefix, reason] of Object.entries(ACCEPTED_PREFIXES)) {
|
||
if (page.startsWith(prefix)) return reason;
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
const ALLOW_MISSING = loadAllowMissing();
|
||
const { acceptedResets: ACCEPTED_RESETS, acceptedPrefixes: ACCEPTED_PREFIXES } =
|
||
loadAnchorChurnAllowlist();
|
||
|
||
async function buildSnapshot() {
|
||
const absDist = path.resolve(DIST_DIR);
|
||
const files = await walk(absDist);
|
||
|
||
const snap = {};
|
||
for (const f of files) {
|
||
const rel = path.relative(absDist, f).replace(/\\/g, "/");
|
||
const html = await fs.readFile(f, "utf8");
|
||
const ids = extractIds(html);
|
||
if (ids.length === 0) continue;
|
||
snap[rel] = ids;
|
||
}
|
||
|
||
// ordre stable
|
||
const ordered = {};
|
||
for (const k of Object.keys(snap).sort()) ordered[k] = snap[k];
|
||
return ordered;
|
||
}
|
||
|
||
async function readJson(p) {
|
||
const s = await fs.readFile(p, "utf8");
|
||
return JSON.parse(s);
|
||
}
|
||
|
||
async function writeJson(p, obj) {
|
||
await fs.mkdir(path.dirname(p), { recursive: true });
|
||
await fs.writeFile(p, JSON.stringify(obj, null, 2) + "\n", "utf8");
|
||
}
|
||
|
||
function diffPage(prevIds, curIds) {
|
||
const prev = new Set(prevIds);
|
||
const cur = new Set(curIds);
|
||
const added = curIds.filter((x) => !prev.has(x));
|
||
const removed = prevIds.filter((x) => !cur.has(x));
|
||
return { added, removed };
|
||
}
|
||
|
||
(async () => {
|
||
const snap = await buildSnapshot();
|
||
|
||
if (UPDATE) {
|
||
await writeJson(BASELINE, snap);
|
||
const pages = Object.keys(snap).length;
|
||
const total = Object.values(snap).reduce((a, xs) => a + xs.length, 0);
|
||
console.log(`OK baseline updated -> ${BASELINE}`);
|
||
console.log(`Pages: ${pages}, Total paragraph IDs: ${total}`);
|
||
process.exit(0);
|
||
}
|
||
|
||
let base;
|
||
try {
|
||
base = await readJson(BASELINE);
|
||
} catch {
|
||
console.error(`Baseline missing: ${BASELINE}`);
|
||
console.error(`Run: node scripts/check-anchors.mjs --update`);
|
||
process.exit(2);
|
||
}
|
||
|
||
const allPages = new Set([...Object.keys(base), ...Object.keys(snap)]);
|
||
const pages = Array.from(allPages).sort();
|
||
|
||
let failed = false;
|
||
let changedPages = 0;
|
||
let acceptedPages = 0;
|
||
|
||
for (const p of pages) {
|
||
const prevIds = base[p] || null;
|
||
const curIds = snap[p] || null;
|
||
|
||
// page nouvelle
|
||
if (!prevIds && curIds) {
|
||
console.log(`+ PAGE ${p} (new) ids=${curIds.length}`);
|
||
continue;
|
||
}
|
||
|
||
// page supprimée ou sortie volontairement du contrat d’ancres
|
||
if (prevIds && !curIds) {
|
||
const acceptedReason = acceptedResetReasonForPage(p);
|
||
|
||
if (ALLOW_MISSING.has(p)) {
|
||
console.log(`~ PAGE ${p} (missing now) ✅ allowed prevIds=${prevIds.length}`);
|
||
continue;
|
||
}
|
||
|
||
if (acceptedReason) {
|
||
acceptedPages += 1;
|
||
console.log(`- PAGE ${p} (missing now) prevIds=${prevIds.length}`);
|
||
console.log(` ✅ accepted reset: ${acceptedReason}`);
|
||
continue;
|
||
}
|
||
|
||
console.log(`- PAGE ${p} (missing now) prevIds=${prevIds.length}`);
|
||
failed = true;
|
||
continue;
|
||
}
|
||
|
||
// (sécurité) rien avant / rien maintenant
|
||
if (!prevIds && !curIds) continue;
|
||
|
||
const { added, removed } = diffPage(prevIds, curIds);
|
||
if (added.length === 0 && removed.length === 0) continue;
|
||
|
||
changedPages += 1;
|
||
|
||
const prevN = prevIds.length || 1;
|
||
const churn = (added.length + removed.length) / prevN;
|
||
const removedRatio = removed.length / prevN;
|
||
const acceptedReason = acceptedResetReasonForPage(p);
|
||
|
||
console.log(
|
||
`~ ${p} prev=${prevIds.length} now=${curIds.length}` +
|
||
` +${added.length} -${removed.length} churn=${pct(churn)}`
|
||
);
|
||
|
||
if (removed.length) {
|
||
console.log(` removed: ${removed.slice(0, 20).join(", ")}${removed.length > 20 ? " …" : ""}`);
|
||
}
|
||
|
||
const exceeds =
|
||
(prevIds.length >= MIN_PREV && churn > THRESHOLD) ||
|
||
(prevIds.length >= MIN_PREV && removedRatio > THRESHOLD);
|
||
|
||
if (exceeds && acceptedReason) {
|
||
acceptedPages += 1;
|
||
console.log(` ✅ accepted reset: ${acceptedReason}`);
|
||
continue;
|
||
}
|
||
|
||
if (exceeds) failed = true;
|
||
}
|
||
|
||
console.log(
|
||
`\nSummary: pages compared=${pages.length}, pages changed=${changedPages}, accepted resets=${acceptedPages}`
|
||
);
|
||
|
||
if (failed) {
|
||
console.error(`FAIL: anchor churn above threshold (threshold=${pct(THRESHOLD)} minPrev=${MIN_PREV})`);
|
||
process.exit(1);
|
||
}
|
||
console.log("OK: anchors stable within threshold");
|
||
})(); |