183 lines
4.6 KiB
JavaScript
183 lines
4.6 KiB
JavaScript
// scripts/fix-pandoc-false-ordered-lists.mjs
|
|
// Convertit les "fausses listes" Pandoc (1. gros paragraphe / 2. gros paragraphe / ...)
|
|
// en paragraphes normaux, sans toucher aux petites listes légitimes.
|
|
// Sécurités :
|
|
// - ignore les blocs code fences
|
|
// - n'agit que sur des listes top-level
|
|
// - heuristique (taille/volume) + backup .bak
|
|
|
|
import fs from "node:fs/promises";
|
|
import path from "node:path";
|
|
|
|
const args = process.argv.slice(2);
|
|
const DRY = args.includes("--dry-run");
|
|
const files = args.filter((a) => !a.startsWith("--"));
|
|
|
|
if (files.length === 0) {
|
|
console.error("Usage: node scripts/fix-pandoc-false-ordered-lists.mjs <file1.mdx> [file2.mdx ...] [--dry-run]");
|
|
process.exit(1);
|
|
}
|
|
|
|
const MIN_ITEMS = 6; // en dessous -> on ne touche pas
|
|
const MIN_AVG_LEN = 140; // moyenne chars/item -> prose
|
|
const MAX_SHORT_RATIO = 0.25; // si trop d'items courts -> c'est une vraie liste
|
|
|
|
function splitFrontmatter(txt) {
|
|
const m = txt.match(/^---\n[\s\S]*?\n---\n/);
|
|
if (!m) return { front: "", body: txt };
|
|
return { front: m[0], body: txt.slice(m[0].length) };
|
|
}
|
|
|
|
function isFence(line) {
|
|
const t = line.trim();
|
|
return t.startsWith("```") || t.startsWith("~~~");
|
|
}
|
|
|
|
function isOlItemStart(line) {
|
|
// top-level only (pas d'indent)
|
|
return /^(\d{1,3})([.)])\s+/.test(line);
|
|
}
|
|
|
|
function stripOlMarker(line) {
|
|
return line.replace(/^(\d{1,3})([.)])\s+/, "");
|
|
}
|
|
|
|
function visibleLen(s) {
|
|
// approx : retire les backticks et liens markdown
|
|
return String(s)
|
|
.replace(/`+/g, "")
|
|
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
|
|
.replace(/\s+/g, " ")
|
|
.trim().length;
|
|
}
|
|
|
|
function looksLikeFalseList(items) {
|
|
if (items.length < MIN_ITEMS) return false;
|
|
|
|
const lens = items.map((it) => visibleLen(it.join("\n")));
|
|
const avg = lens.reduce((a, b) => a + b, 0) / Math.max(1, lens.length);
|
|
|
|
const short = lens.filter((n) => n < 60).length / Math.max(1, lens.length);
|
|
|
|
// si ça ressemble à une liste de “vrais points” (beaucoup de courts), on laisse
|
|
if (short > MAX_SHORT_RATIO) return false;
|
|
|
|
return avg >= MIN_AVG_LEN;
|
|
}
|
|
|
|
function rewriteFalseList(items) {
|
|
// paragraphes séparés
|
|
const out = [];
|
|
for (const it of items) {
|
|
const txt = it.join("\n").trimEnd();
|
|
if (txt) out.push(txt, ""); // blank line
|
|
}
|
|
return out.join("\n");
|
|
}
|
|
|
|
function processBody(body) {
|
|
const lines = body.split(/\r?\n/);
|
|
|
|
let inFence = false;
|
|
let changed = false;
|
|
const out = [];
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
|
|
if (isFence(line)) {
|
|
inFence = !inFence;
|
|
out.push(line);
|
|
continue;
|
|
}
|
|
|
|
if (inFence) {
|
|
out.push(line);
|
|
continue;
|
|
}
|
|
|
|
if (!isOlItemStart(line)) {
|
|
out.push(line);
|
|
continue;
|
|
}
|
|
|
|
// Parse bloc de liste ordonnée top-level
|
|
const items = [];
|
|
let cur = [stripOlMarker(line)];
|
|
|
|
i++;
|
|
|
|
for (; i < lines.length; i++) {
|
|
const l = lines[i];
|
|
|
|
if (isFence(l)) {
|
|
// fin de bloc list (safe)
|
|
i--; // reprocess fence in outer loop
|
|
break;
|
|
}
|
|
|
|
if (isOlItemStart(l)) {
|
|
items.push(cur);
|
|
cur = [stripOlMarker(l)];
|
|
continue;
|
|
}
|
|
|
|
// continuation: indent => appartient à l'item
|
|
if (/^\s{2,}\S/.test(l)) {
|
|
cur.push(l.replace(/^\s{2}/, ""));
|
|
continue;
|
|
}
|
|
|
|
// ligne vide => conserve dans item (rare) mais ne termine pas forcément
|
|
if (l.trim() === "") {
|
|
cur.push("");
|
|
continue;
|
|
}
|
|
|
|
// ligne non indentée => fin de bloc de liste
|
|
i--; // reprocess this line outside
|
|
break;
|
|
}
|
|
|
|
items.push(cur);
|
|
|
|
// Décision heuristique
|
|
if (looksLikeFalseList(items)) {
|
|
changed = true;
|
|
out.push(rewriteFalseList(items));
|
|
} else {
|
|
// on remet tel quel (reconstruit en markdown list)
|
|
for (let k = 0; k < items.length; k++) {
|
|
out.push(`${k + 1}. ${items[k][0]}`);
|
|
for (const extra of items[k].slice(1)) out.push(extra === "" ? "" : ` ${extra}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
return { text: out.join("\n"), changed };
|
|
}
|
|
|
|
for (const file of files) {
|
|
const p = path.resolve(file);
|
|
const raw = await fs.readFile(p, "utf8");
|
|
|
|
const { front, body } = splitFrontmatter(raw);
|
|
const res = processBody(body);
|
|
|
|
if (!res.changed) {
|
|
console.log(`✅ ${file}: no false ordered-lists detected`);
|
|
continue;
|
|
}
|
|
|
|
if (DRY) {
|
|
console.log(`🟡 ${file}: would rewrite false ordered-lists (dry-run)`);
|
|
continue;
|
|
}
|
|
|
|
const bak = `${p}.bak`;
|
|
await fs.writeFile(bak, raw, "utf8");
|
|
await fs.writeFile(p, front + res.text, "utf8");
|
|
|
|
console.log(`✅ ${file}: rewritten (backup -> ${path.basename(bak)})`);
|
|
}
|