propose: exact paragraph + apply-ticket guardrails

2026-01-20 12:42:30 +01:00
parent ec42c4b2f4
commit 3b8376d6a9
4 changed files with 290 additions and 187 deletions
--- a/scripts/apply-ticket.mjs
+++ b/scripts/apply-ticket.mjs
@@ -20,7 +20,7 @@ Env (recommandé):

 Notes:
  - Si dist/<chemin>/index.html est absent, le script lance "npm run build" sauf si --no-build.
-  - Sauvegarde automatique: <fichier>.bak.issue-<N>
+  - Sauvegarde automatique: <fichier>.bak.issue-<N> (uniquement si on écrit)
 `);
  process.exit(exitCode);
 }
@@ -45,12 +45,16 @@ function normalizeText(s) {
  return String(s ?? "")
    .normalize("NFKD")
    .replace(/\p{Diacritic}/gu, "")
+    .replace(/[’‘]/g, "'")
+    .replace(/[“”]/g, '"')
+    .replace(/[–—]/g, "-")
+    .replace(/…/g, "...")
    .replace(/\s+/g, " ")
    .trim()
    .toLowerCase();
 }

-// stripping très pragmatique (anti-fragile > parfait)
+// stripping très pragmatique
 function stripMd(mdx) {
  let s = String(mdx ?? "");
  s = s.replace(/`[^`]*`/g, " ");              // inline code
@@ -62,6 +66,14 @@ function stripMd(mdx) {
  return s;
 }

+function tokenize(s) {
+  const n = normalizeText(stripMd(s));
+  return n
+    .replace(/[^a-z0-9'\- ]+/g, " ")
+    .split(" ")
+    .filter((w) => w.length >= 4);
+}
+
 function run(cmd, args, opts = {}) {
  const r = spawnSync(cmd, args, { stdio: "inherit", ...opts });
  if (r.status !== 0) throw new Error(`Command failed: ${cmd} ${args.join(" ")}`);
@@ -79,25 +91,25 @@ function inferOwnerRepoFromGit() {
  const r = spawnSync("git", ["remote", "get-url", "origin"], { encoding: "utf-8" });
  if (r.status !== 0) return null;
  const u = (r.stdout || "").trim();
-  // supports: https://host/owner/repo.git or ssh
  const m = u.match(/[:/](?<owner>[^/]+)\/(?<repo>[^/]+?)(?:\.git)?$/);
  if (!m?.groups) return null;
  return { owner: m.groups.owner, repo: m.groups.repo };
 }

+function escapeRegExp(s) {
+  return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
 function pickLine(body, key) {
-  // tolère espaces/indent
  const re = new RegExp(`^\\s*${escapeRegExp(key)}\\s*:\\s*([^\\n\\r]+)`, "mi");
  const m = body.match(re);
  return m ? m[1].trim() : "";
 }

 function pickHeadingValue(body, headingKey) {
-  // ex: "## Chemin ..." ligne suivante contenant /...
  const re = new RegExp(`^##\\s*${escapeRegExp(headingKey)}[^\\n]*\\n([\\s\\S]*?)(?=\\n##\\s|\\n\\s*$)`, "mi");
  const m = body.match(re);
  if (!m) return "";
-  // première ligne non vide et non commentée
  const lines = m[1].split(/\r?\n/).map(l => l.trim());
  for (const l of lines) {
    if (!l) continue;
@@ -108,7 +120,6 @@ function pickHeadingValue(body, headingKey) {
 }

 function pickSection(body, markers) {
-  // capture bloc après le 1er marker trouvé, jusqu'à un séparateur connu
  const text = body.replace(/\r\n/g, "\n");
  const idx = markers
    .map(m => ({ m, i: text.toLowerCase().indexOf(m.toLowerCase()) }))
@@ -118,7 +129,6 @@ function pickSection(body, markers) {
  const start = idx.i + idx.m.length;
  const tail = text.slice(start);

-  // stop markers (robuste)
  const stops = [
    "\n## ", "\nJustification", "\n---", "\n## Justification", "\n## Sources",
    "\nProblème identifié", "\nSources proposées", "\n## Proposition", "\n## Problème"
@@ -132,7 +142,6 @@ function pickSection(body, markers) {
 }

 function unquoteBlock(s) {
-  // enlève ">" de citation markdown
  return String(s ?? "")
    .split(/\r?\n/)
    .map(l => l.replace(/^\s*>\s?/, ""))
@@ -140,64 +149,66 @@ function unquoteBlock(s) {
    .trim();
 }

-function escapeRegExp(s) {
-  return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-}
-
 async function readHtmlParagraphText(htmlPath, anchorId) {
  const html = await fs.readFile(htmlPath, "utf-8");
-  // cherche <p id="anchorId" ...> ... </p>
  const re = new RegExp(`<p[^>]*\\bid=["']${escapeRegExp(anchorId)}["'][^>]*>([\\s\\S]*?)<\\/p>`, "i");
  const m = html.match(re);
  if (!m) return "";
  let inner = m[1];

-  // supprime les outils "para-tools" si présents
  inner = inner.replace(/<span[^>]*class=["'][^"']*para-tools[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
-
-  // strip tags
  inner = inner.replace(/<[^>]+>/g, " ");
  inner = inner.replace(/\s+/g, " ").trim();
-
-  // enlève artefacts éventuels
  inner = inner.replace(/\b(¶|Citer|Proposer|Copié)\b/gi, "").replace(/\s+/g, " ").trim();
  return inner;
 }

 function splitParagraphBlocks(mdxText) {
-  // bloc = séparé par 2 sauts de ligne (pragmatique)
  const raw = mdxText.replace(/\r\n/g, "\n");
-  const parts = raw.split(/\n{2,}/);
-  return parts;
+  return raw.split(/\n{2,}/);
+}
+
+function isLikelyExcerpt(s) {
+  const t = String(s || "").trim();
+  if (!t) return true;
+  if (t.length < 120) return true;
+  if (/[.…]$/.test(t)) return true;
+  if (t.includes("tronqu")) return true; // tronqué/tronquee etc (sans diacritiques)
+  return false;
+}
+
+function scoreBlock(block, targetText) {
+  const tgt = tokenize(targetText);
+  const blk = tokenize(block);
+  if (!tgt.length || !blk.length) return 0;
+
+  const tgtSet = new Set(tgt);
+  const blkSet = new Set(blk);
+
+  let hit = 0;
+  for (const w of tgtSet) if (blkSet.has(w)) hit++;
+
+  // Bonus si un long préfixe ressemble (moins strict qu'un includes brut)
+  const tgtNorm = normalizeText(stripMd(targetText));
+  const blkNorm = normalizeText(stripMd(block));
+  const prefix = tgtNorm.slice(0, Math.min(180, tgtNorm.length));
+  const prefixBonus = prefix && blkNorm.includes(prefix) ? 1000 : 0;
+
+  // Ratio bonus (0..100)
+  const ratio = hit / Math.max(1, tgtSet.size);
+  const ratioBonus = Math.round(ratio * 100);
+
+  return prefixBonus + hit + ratioBonus;
 }

 function bestBlockMatchIndex(blocks, targetText) {
-  const tgt = normalizeText(stripMd(targetText));
-  if (!tgt) return -1;
-
-  // on compare par inclusion de snippet + score "overlap"
-  const snippet = tgt.slice(0, Math.min(160, tgt.length));
  let best = { i: -1, score: -1 };
-
  for (let i = 0; i < blocks.length; i++) {
-    const b = normalizeText(stripMd(blocks[i]));
-    if (!b) continue;
-
-    let score = 0;
-    if (b.includes(snippet)) score += 1000; // jackpot
-
-    // overlap par mots (cheap mais robuste)
-    const words = new Set(tgt.split(" ").filter(w => w.length >= 4));
-    let hit = 0;
-    for (const w of words) if (b.includes(w)) hit++;
-    score += hit;
-
-    if (score > best.score) best = { i, score };
+    const b = blocks[i];
+    const sc = scoreBlock(b, targetText);
+    if (sc > best.score) best = { i, score: sc };
  }
-
-  // seuil minimal : évite remplacement sauvage
-  if (best.score < 20) return -1;
-  return best.i;
+  return best;
 }

 async function findContentFileFromChemin(chemin) {
@@ -205,11 +216,10 @@ async function findContentFileFromChemin(chemin) {
  const parts = clean.split("/").filter(Boolean);
  if (parts.length < 2) return null;
  const collection = parts[0];
-  const slugPath = parts.slice(1).join("/"); // support nested
+  const slugPath = parts.slice(1).join("/");
  const root = path.join(CONTENT_ROOT, collection);
  if (!(await fileExists(root))) return null;

-  // cherche fichier dont le path relatif (sans ext) == slugPath
  const exts = [".mdx", ".md"];
  async function walk(dir) {
    const entries = await fs.readdir(dir, { withFileTypes: true });
@@ -250,7 +260,7 @@ async function fetchIssue({ forgeApiBase, owner, repo, token, issueNum }) {
    headers: {
      "Authorization": `token ${token}`,
      "Accept": "application/json",
-      "User-Agent": "archicratie-apply-ticket/1.0",
+      "User-Agent": "archicratie-apply-ticket/1.1",
    }
  });
  if (!res.ok) {
@@ -275,7 +285,6 @@ async function main() {
    process.exit(1);
  }

-  // API base: priorise LAN (FORGE_API), sinon FORGE_BASE
  const forgeApiBase = getEnv("FORGE_API") || getEnv("FORGE_BASE");
  if (!forgeApiBase) {
    console.error("❌ FORGE_API ou FORGE_BASE manquant. Ex: export FORGE_API='http://192.168.1.20:3000'");
@@ -285,22 +294,17 @@ async function main() {
  console.log(`🔎 Fetch ticket #${issueNum} from ${owner}/${repo} …`);
  const issue = await fetchIssue({ forgeApiBase, owner, repo, token, issueNum });

-  const title = issue.title || "";
-  const bodyRaw = issue.body || "";
-  const body = bodyRaw.replace(/\r\n/g, "\n");
+  const body = String(issue.body || "").replace(/\r\n/g, "\n");

-  // Chemin / Ancre: support format "Chemin:" OU "## Chemin"
  let chemin = pickLine(body, "Chemin") || pickHeadingValue(body, "Chemin");
  let ancre  = pickLine(body, "Ancre") || pickHeadingValue(body, "Ancre paragraphe") || pickHeadingValue(body, "Ancre");
-  ancre = ancre.trim();
+  ancre = (ancre || "").trim();
  if (ancre.startsWith("#")) ancre = ancre.slice(1);

-  // Texte actuel: support "Texte actuel (copie exacte...)" OU "Texte actuel (extrait)"
-  const current1 = pickSection(body, ["Texte actuel (copie exacte du paragraphe)", "## Texte actuel (copie exacte du paragraphe)"]);
-  const current2 = pickSection(body, ["Texte actuel (extrait)", "## Assertion / passage à vérifier", "Assertion / passage à vérifier"]);
-  const texteActuel = unquoteBlock(current1 || current2);
+  const currentFull = pickSection(body, ["Texte actuel (copie exacte du paragraphe)", "## Texte actuel (copie exacte du paragraphe)"]);
+  const currentEx  = pickSection(body, ["Texte actuel (extrait)", "## Assertion / passage à vérifier", "Assertion / passage à vérifier"]);
+  const texteActuel = unquoteBlock(currentFull || currentEx);

-  // Proposition: support 2 modèles
  const prop1 = pickSection(body, ["Proposition (texte corrigé complet)", "## Proposition (texte corrigé complet)"]);
  const prop2 = pickSection(body, ["Proposition (remplacer par):", "## Proposition (remplacer par)"]);
  const proposition = (prop1 || prop2).trim();
@@ -313,56 +317,62 @@ async function main() {

  const contentFile = await findContentFileFromChemin(chemin);
  if (!contentFile) throw new Error(`Fichier contenu introuvable pour Chemin=${chemin}`);
-
  console.log(`📄 Target content file: ${path.relative(CWD, contentFile)}`);

-  // dist html path
  const distHtmlPath = path.join(DIST_ROOT, chemin.replace(/^\/+|\/+$/g,""), "index.html");
  await ensureBuildIfNeeded(distHtmlPath);

-  // texte cible: priorité au texte actuel du ticket, sinon récup HTML du paragraphe via ancre
+  // targetText: préférence au texte complet (ticket), sinon dist si extrait probable
  let targetText = texteActuel;
-  if (!targetText) {
-    if (await fileExists(distHtmlPath)) {
-      const htmlText = await readHtmlParagraphText(distHtmlPath, ancre);
-      if (htmlText) targetText = htmlText;
-    }
+
+  let distText = "";
+  if (await fileExists(distHtmlPath)) {
+    distText = await readHtmlParagraphText(distHtmlPath, ancre);
  }
+
+  if (!targetText && distText) targetText = distText;
+  if (targetText && distText && isLikelyExcerpt(targetText) && distText.length > targetText.length) {
+    targetText = distText;
+  }
+
  if (!targetText) {
    throw new Error("Impossible de reconstruire le texte du paragraphe (ni texte actuel, ni dist html).");
  }

-  // lecture + split blocs
  const original = await fs.readFile(contentFile, "utf-8");
  const blocks = splitParagraphBlocks(original);

-  const idx = bestBlockMatchIndex(blocks, targetText);
-  if (idx < 0) {
+  const best = bestBlockMatchIndex(blocks, targetText);
+
+  // seuil de sécurité : on veut au moins un overlap raisonnable.
+  // Avec le bonus prefix+ratio, un match correct dépasse très vite ~60–80.
+  if (best.i < 0 || best.score < 40) {
    console.error("❌ Match trop faible: je refuse de remplacer automatiquement.");
-    console.error("➡️  Action: mets 'Texte actuel (copie exacte du paragraphe)' dans le ticket (recommandé).");
+    console.error(`➡️  Score=${best.score}. Recommandation: ticket avec 'Texte actuel (copie exacte du paragraphe)'.`);
+    // debug: top 5
+    const ranked = blocks
+      .map((b, i) => ({ i, score: scoreBlock(b, targetText), excerpt: stripMd(b).slice(0, 140) }))
+      .sort((a, b) => b.score - a.score)
+      .slice(0, 5);
+
+    console.error("Top candidates:");
+    for (const r of ranked) {
+      console.error(`  #${r.i + 1} score=${r.score}  ${r.excerpt}${r.excerpt.length >= 140 ? "…" : ""}`);
+    }
    process.exit(2);
  }

-  const beforeBlock = blocks[idx];
+  const beforeBlock = blocks[best.i];
  const afterBlock = proposition.trim();

-  // garde le style: 1 bloc -> 1 bloc
  const nextBlocks = blocks.slice();
-  nextBlocks[idx] = afterBlock;
-
+  nextBlocks[best.i] = afterBlock;
  const updated = nextBlocks.join("\n\n");

-  // backup
-  const bakPath = `${contentFile}.bak.issue-${issueNum}`;
-  if (!(await fileExists(bakPath))) {
-    await fs.writeFile(bakPath, original, "utf-8");
-  }
-
-  // preview stats
-  console.log(`🧩 Matched block #${idx+1}/${blocks.length} (backup: ${path.relative(CWD, bakPath)})`);
+  console.log(`🧩 Matched block #${best.i + 1}/${blocks.length} score=${best.score}`);

  if (DRY_RUN) {
-    console.log("\n--- DRY RUN (no write) ---\n");
+    console.log("\n--- DRY RUN (no write, no backup) ---\n");
    console.log("=== BEFORE (excerpt) ===");
    console.log(beforeBlock.slice(0, 400) + (beforeBlock.length > 400 ? "…" : ""));
    console.log("\n=== AFTER (excerpt) ===");
@@ -371,6 +381,12 @@ async function main() {
    return;
  }

+  // backup uniquement si on écrit
+  const bakPath = `${contentFile}.bak.issue-${issueNum}`;
+  if (!(await fileExists(bakPath))) {
+    await fs.writeFile(bakPath, original, "utf-8");
+  }
+
  await fs.writeFile(contentFile, updated, "utf-8");
  console.log("✅ Applied. Next:");
  console.log(`  git diff -- ${path.relative(CWD, contentFile)}`);