Seed from NAS prod snapshot 20260130-190531
This commit is contained in:
182
scripts/fix-pandoc-false-ordered-lists.mjs
Normal file
182
scripts/fix-pandoc-false-ordered-lists.mjs
Normal file
@@ -0,0 +1,182 @@
|
||||
// scripts/fix-pandoc-false-ordered-lists.mjs
|
||||
// Convertit les "fausses listes" Pandoc (1. gros paragraphe / 2. gros paragraphe / ...)
|
||||
// en paragraphes normaux, sans toucher aux petites listes légitimes.
|
||||
// Sécurités :
|
||||
// - ignore les blocs code fences
|
||||
// - n'agit que sur des listes top-level
|
||||
// - heuristique (taille/volume) + backup .bak
|
||||
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const DRY = args.includes("--dry-run");
|
||||
const files = args.filter((a) => !a.startsWith("--"));
|
||||
|
||||
if (files.length === 0) {
|
||||
console.error("Usage: node scripts/fix-pandoc-false-ordered-lists.mjs <file1.mdx> [file2.mdx ...] [--dry-run]");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const MIN_ITEMS = 6; // en dessous -> on ne touche pas
|
||||
const MIN_AVG_LEN = 140; // moyenne chars/item -> prose
|
||||
const MAX_SHORT_RATIO = 0.25; // si trop d'items courts -> c'est une vraie liste
|
||||
|
||||
function splitFrontmatter(txt) {
|
||||
const m = txt.match(/^---\n[\s\S]*?\n---\n/);
|
||||
if (!m) return { front: "", body: txt };
|
||||
return { front: m[0], body: txt.slice(m[0].length) };
|
||||
}
|
||||
|
||||
function isFence(line) {
|
||||
const t = line.trim();
|
||||
return t.startsWith("```") || t.startsWith("~~~");
|
||||
}
|
||||
|
||||
function isOlItemStart(line) {
|
||||
// top-level only (pas d'indent)
|
||||
return /^(\d{1,3})([.)])\s+/.test(line);
|
||||
}
|
||||
|
||||
function stripOlMarker(line) {
|
||||
return line.replace(/^(\d{1,3})([.)])\s+/, "");
|
||||
}
|
||||
|
||||
function visibleLen(s) {
|
||||
// approx : retire les backticks et liens markdown
|
||||
return String(s)
|
||||
.replace(/`+/g, "")
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim().length;
|
||||
}
|
||||
|
||||
function looksLikeFalseList(items) {
|
||||
if (items.length < MIN_ITEMS) return false;
|
||||
|
||||
const lens = items.map((it) => visibleLen(it.join("\n")));
|
||||
const avg = lens.reduce((a, b) => a + b, 0) / Math.max(1, lens.length);
|
||||
|
||||
const short = lens.filter((n) => n < 60).length / Math.max(1, lens.length);
|
||||
|
||||
// si ça ressemble à une liste de “vrais points” (beaucoup de courts), on laisse
|
||||
if (short > MAX_SHORT_RATIO) return false;
|
||||
|
||||
return avg >= MIN_AVG_LEN;
|
||||
}
|
||||
|
||||
function rewriteFalseList(items) {
|
||||
// paragraphes séparés
|
||||
const out = [];
|
||||
for (const it of items) {
|
||||
const txt = it.join("\n").trimEnd();
|
||||
if (txt) out.push(txt, ""); // blank line
|
||||
}
|
||||
return out.join("\n");
|
||||
}
|
||||
|
||||
function processBody(body) {
|
||||
const lines = body.split(/\r?\n/);
|
||||
|
||||
let inFence = false;
|
||||
let changed = false;
|
||||
const out = [];
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
|
||||
if (isFence(line)) {
|
||||
inFence = !inFence;
|
||||
out.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inFence) {
|
||||
out.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isOlItemStart(line)) {
|
||||
out.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse bloc de liste ordonnée top-level
|
||||
const items = [];
|
||||
let cur = [stripOlMarker(line)];
|
||||
|
||||
i++;
|
||||
|
||||
for (; i < lines.length; i++) {
|
||||
const l = lines[i];
|
||||
|
||||
if (isFence(l)) {
|
||||
// fin de bloc list (safe)
|
||||
i--; // reprocess fence in outer loop
|
||||
break;
|
||||
}
|
||||
|
||||
if (isOlItemStart(l)) {
|
||||
items.push(cur);
|
||||
cur = [stripOlMarker(l)];
|
||||
continue;
|
||||
}
|
||||
|
||||
// continuation: indent => appartient à l'item
|
||||
if (/^\s{2,}\S/.test(l)) {
|
||||
cur.push(l.replace(/^\s{2}/, ""));
|
||||
continue;
|
||||
}
|
||||
|
||||
// ligne vide => conserve dans item (rare) mais ne termine pas forcément
|
||||
if (l.trim() === "") {
|
||||
cur.push("");
|
||||
continue;
|
||||
}
|
||||
|
||||
// ligne non indentée => fin de bloc de liste
|
||||
i--; // reprocess this line outside
|
||||
break;
|
||||
}
|
||||
|
||||
items.push(cur);
|
||||
|
||||
// Décision heuristique
|
||||
if (looksLikeFalseList(items)) {
|
||||
changed = true;
|
||||
out.push(rewriteFalseList(items));
|
||||
} else {
|
||||
// on remet tel quel (reconstruit en markdown list)
|
||||
for (let k = 0; k < items.length; k++) {
|
||||
out.push(`${k + 1}. ${items[k][0]}`);
|
||||
for (const extra of items[k].slice(1)) out.push(extra === "" ? "" : ` ${extra}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { text: out.join("\n"), changed };
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
const p = path.resolve(file);
|
||||
const raw = await fs.readFile(p, "utf8");
|
||||
|
||||
const { front, body } = splitFrontmatter(raw);
|
||||
const res = processBody(body);
|
||||
|
||||
if (!res.changed) {
|
||||
console.log(`✅ ${file}: no false ordered-lists detected`);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (DRY) {
|
||||
console.log(`🟡 ${file}: would rewrite false ordered-lists (dry-run)`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const bak = `${p}.bak`;
|
||||
await fs.writeFile(bak, raw, "utf8");
|
||||
await fs.writeFile(p, front + res.text, "utf8");
|
||||
|
||||
console.log(`✅ ${file}: rewritten (backup -> ${path.basename(bak)})`);
|
||||
}
|
||||
Reference in New Issue
Block a user