Files
archicratie-edition/scripts/fix-pandoc-false-ordered-lists.mjs
archicratia 60d88939b0
All checks were successful
CI / build-and-anchors (push) Successful in 1m25s
SMOKE / smoke (push) Successful in 11s
CI / build-and-anchors (pull_request) Successful in 1m20s
Seed from NAS prod snapshot 20260130-190531
2026-01-31 10:51:38 +00:00

183 lines
4.6 KiB
JavaScript

// scripts/fix-pandoc-false-ordered-lists.mjs
// Convertit les "fausses listes" Pandoc (1. gros paragraphe / 2. gros paragraphe / ...)
// en paragraphes normaux, sans toucher aux petites listes légitimes.
// Sécurités :
// - ignore les blocs code fences
// - n'agit que sur des listes top-level
// - heuristique (taille/volume) + backup .bak
import fs from "node:fs/promises";
import path from "node:path";
const args = process.argv.slice(2);
const DRY = args.includes("--dry-run");
const files = args.filter((a) => !a.startsWith("--"));
if (files.length === 0) {
console.error("Usage: node scripts/fix-pandoc-false-ordered-lists.mjs <file1.mdx> [file2.mdx ...] [--dry-run]");
process.exit(1);
}
const MIN_ITEMS = 6; // en dessous -> on ne touche pas
const MIN_AVG_LEN = 140; // moyenne chars/item -> prose
const MAX_SHORT_RATIO = 0.25; // si trop d'items courts -> c'est une vraie liste
function splitFrontmatter(txt) {
const m = txt.match(/^---\n[\s\S]*?\n---\n/);
if (!m) return { front: "", body: txt };
return { front: m[0], body: txt.slice(m[0].length) };
}
function isFence(line) {
const t = line.trim();
return t.startsWith("```") || t.startsWith("~~~");
}
function isOlItemStart(line) {
// top-level only (pas d'indent)
return /^(\d{1,3})([.)])\s+/.test(line);
}
function stripOlMarker(line) {
return line.replace(/^(\d{1,3})([.)])\s+/, "");
}
function visibleLen(s) {
// approx : retire les backticks et liens markdown
return String(s)
.replace(/`+/g, "")
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
.replace(/\s+/g, " ")
.trim().length;
}
function looksLikeFalseList(items) {
if (items.length < MIN_ITEMS) return false;
const lens = items.map((it) => visibleLen(it.join("\n")));
const avg = lens.reduce((a, b) => a + b, 0) / Math.max(1, lens.length);
const short = lens.filter((n) => n < 60).length / Math.max(1, lens.length);
// si ça ressemble à une liste de “vrais points” (beaucoup de courts), on laisse
if (short > MAX_SHORT_RATIO) return false;
return avg >= MIN_AVG_LEN;
}
function rewriteFalseList(items) {
// paragraphes séparés
const out = [];
for (const it of items) {
const txt = it.join("\n").trimEnd();
if (txt) out.push(txt, ""); // blank line
}
return out.join("\n");
}
function processBody(body) {
const lines = body.split(/\r?\n/);
let inFence = false;
let changed = false;
const out = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (isFence(line)) {
inFence = !inFence;
out.push(line);
continue;
}
if (inFence) {
out.push(line);
continue;
}
if (!isOlItemStart(line)) {
out.push(line);
continue;
}
// Parse bloc de liste ordonnée top-level
const items = [];
let cur = [stripOlMarker(line)];
i++;
for (; i < lines.length; i++) {
const l = lines[i];
if (isFence(l)) {
// fin de bloc list (safe)
i--; // reprocess fence in outer loop
break;
}
if (isOlItemStart(l)) {
items.push(cur);
cur = [stripOlMarker(l)];
continue;
}
// continuation: indent => appartient à l'item
if (/^\s{2,}\S/.test(l)) {
cur.push(l.replace(/^\s{2}/, ""));
continue;
}
// ligne vide => conserve dans item (rare) mais ne termine pas forcément
if (l.trim() === "") {
cur.push("");
continue;
}
// ligne non indentée => fin de bloc de liste
i--; // reprocess this line outside
break;
}
items.push(cur);
// Décision heuristique
if (looksLikeFalseList(items)) {
changed = true;
out.push(rewriteFalseList(items));
} else {
// on remet tel quel (reconstruit en markdown list)
for (let k = 0; k < items.length; k++) {
out.push(`${k + 1}. ${items[k][0]}`);
for (const extra of items[k].slice(1)) out.push(extra === "" ? "" : ` ${extra}`);
}
}
}
return { text: out.join("\n"), changed };
}
for (const file of files) {
const p = path.resolve(file);
const raw = await fs.readFile(p, "utf8");
const { front, body } = splitFrontmatter(raw);
const res = processBody(body);
if (!res.changed) {
console.log(`${file}: no false ordered-lists detected`);
continue;
}
if (DRY) {
console.log(`🟡 ${file}: would rewrite false ordered-lists (dry-run)`);
continue;
}
const bak = `${p}.bak`;
await fs.writeFile(bak, raw, "utf8");
await fs.writeFile(p, front + res.text, "utf8");
console.log(`${file}: rewritten (backup -> ${path.basename(bak)})`);
}