propose: exact paragraph + apply-ticket guardrails

This commit is contained in:
2026-01-20 12:42:30 +01:00
parent ec42c4b2f4
commit 3b8376d6a9
4 changed files with 290 additions and 187 deletions

View File

@@ -20,7 +20,7 @@ Env (recommandé):
Notes:
- Si dist/<chemin>/index.html est absent, le script lance "npm run build" sauf si --no-build.
- Sauvegarde automatique: <fichier>.bak.issue-<N>
- Sauvegarde automatique: <fichier>.bak.issue-<N> (uniquement si on écrit)
`);
process.exit(exitCode);
}
@@ -45,12 +45,16 @@ function normalizeText(s) {
return String(s ?? "")
.normalize("NFKD")
.replace(/\p{Diacritic}/gu, "")
.replace(/[]/g, "'")
.replace(/[“”]/g, '"')
.replace(/[–—]/g, "-")
.replace(/…/g, "...")
.replace(/\s+/g, " ")
.trim()
.toLowerCase();
}
// stripping très pragmatique (anti-fragile > parfait)
// stripping très pragmatique
function stripMd(mdx) {
let s = String(mdx ?? "");
s = s.replace(/`[^`]*`/g, " "); // inline code
@@ -62,6 +66,14 @@ function stripMd(mdx) {
return s;
}
function tokenize(s) {
const n = normalizeText(stripMd(s));
return n
.replace(/[^a-z0-9'\- ]+/g, " ")
.split(" ")
.filter((w) => w.length >= 4);
}
function run(cmd, args, opts = {}) {
const r = spawnSync(cmd, args, { stdio: "inherit", ...opts });
if (r.status !== 0) throw new Error(`Command failed: ${cmd} ${args.join(" ")}`);
@@ -79,25 +91,25 @@ function inferOwnerRepoFromGit() {
const r = spawnSync("git", ["remote", "get-url", "origin"], { encoding: "utf-8" });
if (r.status !== 0) return null;
const u = (r.stdout || "").trim();
// supports: https://host/owner/repo.git or ssh
const m = u.match(/[:/](?<owner>[^/]+)\/(?<repo>[^/]+?)(?:\.git)?$/);
if (!m?.groups) return null;
return { owner: m.groups.owner, repo: m.groups.repo };
}
function escapeRegExp(s) {
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function pickLine(body, key) {
// tolère espaces/indent
const re = new RegExp(`^\\s*${escapeRegExp(key)}\\s*:\\s*([^\\n\\r]+)`, "mi");
const m = body.match(re);
return m ? m[1].trim() : "";
}
function pickHeadingValue(body, headingKey) {
// ex: "## Chemin ..." ligne suivante contenant /...
const re = new RegExp(`^##\\s*${escapeRegExp(headingKey)}[^\\n]*\\n([\\s\\S]*?)(?=\\n##\\s|\\n\\s*$)`, "mi");
const m = body.match(re);
if (!m) return "";
// première ligne non vide et non commentée
const lines = m[1].split(/\r?\n/).map(l => l.trim());
for (const l of lines) {
if (!l) continue;
@@ -108,7 +120,6 @@ function pickHeadingValue(body, headingKey) {
}
function pickSection(body, markers) {
// capture bloc après le 1er marker trouvé, jusqu'à un séparateur connu
const text = body.replace(/\r\n/g, "\n");
const idx = markers
.map(m => ({ m, i: text.toLowerCase().indexOf(m.toLowerCase()) }))
@@ -118,7 +129,6 @@ function pickSection(body, markers) {
const start = idx.i + idx.m.length;
const tail = text.slice(start);
// stop markers (robuste)
const stops = [
"\n## ", "\nJustification", "\n---", "\n## Justification", "\n## Sources",
"\nProblème identifié", "\nSources proposées", "\n## Proposition", "\n## Problème"
@@ -132,7 +142,6 @@ function pickSection(body, markers) {
}
function unquoteBlock(s) {
// enlève ">" de citation markdown
return String(s ?? "")
.split(/\r?\n/)
.map(l => l.replace(/^\s*>\s?/, ""))
@@ -140,64 +149,66 @@ function unquoteBlock(s) {
.trim();
}
function escapeRegExp(s) {
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
async function readHtmlParagraphText(htmlPath, anchorId) {
const html = await fs.readFile(htmlPath, "utf-8");
// cherche <p id="anchorId" ...> ... </p>
const re = new RegExp(`<p[^>]*\\bid=["']${escapeRegExp(anchorId)}["'][^>]*>([\\s\\S]*?)<\\/p>`, "i");
const m = html.match(re);
if (!m) return "";
let inner = m[1];
// supprime les outils "para-tools" si présents
inner = inner.replace(/<span[^>]*class=["'][^"']*para-tools[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
// strip tags
inner = inner.replace(/<[^>]+>/g, " ");
inner = inner.replace(/\s+/g, " ").trim();
// enlève artefacts éventuels
inner = inner.replace(/\b(¶|Citer|Proposer|Copié)\b/gi, "").replace(/\s+/g, " ").trim();
return inner;
}
function splitParagraphBlocks(mdxText) {
// bloc = séparé par 2 sauts de ligne (pragmatique)
const raw = mdxText.replace(/\r\n/g, "\n");
const parts = raw.split(/\n{2,}/);
return parts;
return raw.split(/\n{2,}/);
}
function isLikelyExcerpt(s) {
const t = String(s || "").trim();
if (!t) return true;
if (t.length < 120) return true;
if (/[.…]$/.test(t)) return true;
if (t.includes("tronqu")) return true; // tronqué/tronquee etc (sans diacritiques)
return false;
}
function scoreBlock(block, targetText) {
const tgt = tokenize(targetText);
const blk = tokenize(block);
if (!tgt.length || !blk.length) return 0;
const tgtSet = new Set(tgt);
const blkSet = new Set(blk);
let hit = 0;
for (const w of tgtSet) if (blkSet.has(w)) hit++;
// Bonus si un long préfixe ressemble (moins strict qu'un includes brut)
const tgtNorm = normalizeText(stripMd(targetText));
const blkNorm = normalizeText(stripMd(block));
const prefix = tgtNorm.slice(0, Math.min(180, tgtNorm.length));
const prefixBonus = prefix && blkNorm.includes(prefix) ? 1000 : 0;
// Ratio bonus (0..100)
const ratio = hit / Math.max(1, tgtSet.size);
const ratioBonus = Math.round(ratio * 100);
return prefixBonus + hit + ratioBonus;
}
function bestBlockMatchIndex(blocks, targetText) {
const tgt = normalizeText(stripMd(targetText));
if (!tgt) return -1;
// on compare par inclusion de snippet + score "overlap"
const snippet = tgt.slice(0, Math.min(160, tgt.length));
let best = { i: -1, score: -1 };
for (let i = 0; i < blocks.length; i++) {
const b = normalizeText(stripMd(blocks[i]));
if (!b) continue;
let score = 0;
if (b.includes(snippet)) score += 1000; // jackpot
// overlap par mots (cheap mais robuste)
const words = new Set(tgt.split(" ").filter(w => w.length >= 4));
let hit = 0;
for (const w of words) if (b.includes(w)) hit++;
score += hit;
if (score > best.score) best = { i, score };
const b = blocks[i];
const sc = scoreBlock(b, targetText);
if (sc > best.score) best = { i, score: sc };
}
// seuil minimal : évite remplacement sauvage
if (best.score < 20) return -1;
return best.i;
return best;
}
async function findContentFileFromChemin(chemin) {
@@ -205,11 +216,10 @@ async function findContentFileFromChemin(chemin) {
const parts = clean.split("/").filter(Boolean);
if (parts.length < 2) return null;
const collection = parts[0];
const slugPath = parts.slice(1).join("/"); // support nested
const slugPath = parts.slice(1).join("/");
const root = path.join(CONTENT_ROOT, collection);
if (!(await fileExists(root))) return null;
// cherche fichier dont le path relatif (sans ext) == slugPath
const exts = [".mdx", ".md"];
async function walk(dir) {
const entries = await fs.readdir(dir, { withFileTypes: true });
@@ -250,7 +260,7 @@ async function fetchIssue({ forgeApiBase, owner, repo, token, issueNum }) {
headers: {
"Authorization": `token ${token}`,
"Accept": "application/json",
"User-Agent": "archicratie-apply-ticket/1.0",
"User-Agent": "archicratie-apply-ticket/1.1",
}
});
if (!res.ok) {
@@ -275,7 +285,6 @@ async function main() {
process.exit(1);
}
// API base: priorise LAN (FORGE_API), sinon FORGE_BASE
const forgeApiBase = getEnv("FORGE_API") || getEnv("FORGE_BASE");
if (!forgeApiBase) {
console.error("❌ FORGE_API ou FORGE_BASE manquant. Ex: export FORGE_API='http://192.168.1.20:3000'");
@@ -285,22 +294,17 @@ async function main() {
console.log(`🔎 Fetch ticket #${issueNum} from ${owner}/${repo}`);
const issue = await fetchIssue({ forgeApiBase, owner, repo, token, issueNum });
const title = issue.title || "";
const bodyRaw = issue.body || "";
const body = bodyRaw.replace(/\r\n/g, "\n");
const body = String(issue.body || "").replace(/\r\n/g, "\n");
// Chemin / Ancre: support format "Chemin:" OU "## Chemin"
let chemin = pickLine(body, "Chemin") || pickHeadingValue(body, "Chemin");
let ancre = pickLine(body, "Ancre") || pickHeadingValue(body, "Ancre paragraphe") || pickHeadingValue(body, "Ancre");
ancre = ancre.trim();
ancre = (ancre || "").trim();
if (ancre.startsWith("#")) ancre = ancre.slice(1);
// Texte actuel: support "Texte actuel (copie exacte...)" OU "Texte actuel (extrait)"
const current1 = pickSection(body, ["Texte actuel (copie exacte du paragraphe)", "## Texte actuel (copie exacte du paragraphe)"]);
const current2 = pickSection(body, ["Texte actuel (extrait)", "## Assertion / passage à vérifier", "Assertion / passage à vérifier"]);
const texteActuel = unquoteBlock(current1 || current2);
const currentFull = pickSection(body, ["Texte actuel (copie exacte du paragraphe)", "## Texte actuel (copie exacte du paragraphe)"]);
const currentEx = pickSection(body, ["Texte actuel (extrait)", "## Assertion / passage à vérifier", "Assertion / passage à vérifier"]);
const texteActuel = unquoteBlock(currentFull || currentEx);
// Proposition: support 2 modèles
const prop1 = pickSection(body, ["Proposition (texte corrigé complet)", "## Proposition (texte corrigé complet)"]);
const prop2 = pickSection(body, ["Proposition (remplacer par):", "## Proposition (remplacer par)"]);
const proposition = (prop1 || prop2).trim();
@@ -313,56 +317,62 @@ async function main() {
const contentFile = await findContentFileFromChemin(chemin);
if (!contentFile) throw new Error(`Fichier contenu introuvable pour Chemin=${chemin}`);
console.log(`📄 Target content file: ${path.relative(CWD, contentFile)}`);
// dist html path
const distHtmlPath = path.join(DIST_ROOT, chemin.replace(/^\/+|\/+$/g,""), "index.html");
await ensureBuildIfNeeded(distHtmlPath);
// texte cible: priorité au texte actuel du ticket, sinon récup HTML du paragraphe via ancre
// targetText: préférence au texte complet (ticket), sinon dist si extrait probable
let targetText = texteActuel;
if (!targetText) {
if (await fileExists(distHtmlPath)) {
const htmlText = await readHtmlParagraphText(distHtmlPath, ancre);
if (htmlText) targetText = htmlText;
}
let distText = "";
if (await fileExists(distHtmlPath)) {
distText = await readHtmlParagraphText(distHtmlPath, ancre);
}
if (!targetText && distText) targetText = distText;
if (targetText && distText && isLikelyExcerpt(targetText) && distText.length > targetText.length) {
targetText = distText;
}
if (!targetText) {
throw new Error("Impossible de reconstruire le texte du paragraphe (ni texte actuel, ni dist html).");
}
// lecture + split blocs
const original = await fs.readFile(contentFile, "utf-8");
const blocks = splitParagraphBlocks(original);
const idx = bestBlockMatchIndex(blocks, targetText);
if (idx < 0) {
const best = bestBlockMatchIndex(blocks, targetText);
// seuil de sécurité : on veut au moins un overlap raisonnable.
// Avec le bonus prefix+ratio, un match correct dépasse très vite ~6080.
if (best.i < 0 || best.score < 40) {
console.error("❌ Match trop faible: je refuse de remplacer automatiquement.");
console.error("➡️ Action: mets 'Texte actuel (copie exacte du paragraphe)' dans le ticket (recommandé).");
console.error(`➡️ Score=${best.score}. Recommandation: ticket avec 'Texte actuel (copie exacte du paragraphe)'.`);
// debug: top 5
const ranked = blocks
.map((b, i) => ({ i, score: scoreBlock(b, targetText), excerpt: stripMd(b).slice(0, 140) }))
.sort((a, b) => b.score - a.score)
.slice(0, 5);
console.error("Top candidates:");
for (const r of ranked) {
console.error(` #${r.i + 1} score=${r.score} ${r.excerpt}${r.excerpt.length >= 140 ? "…" : ""}`);
}
process.exit(2);
}
const beforeBlock = blocks[idx];
const beforeBlock = blocks[best.i];
const afterBlock = proposition.trim();
// garde le style: 1 bloc -> 1 bloc
const nextBlocks = blocks.slice();
nextBlocks[idx] = afterBlock;
nextBlocks[best.i] = afterBlock;
const updated = nextBlocks.join("\n\n");
// backup
const bakPath = `${contentFile}.bak.issue-${issueNum}`;
if (!(await fileExists(bakPath))) {
await fs.writeFile(bakPath, original, "utf-8");
}
// preview stats
console.log(`🧩 Matched block #${idx+1}/${blocks.length} (backup: ${path.relative(CWD, bakPath)})`);
console.log(`🧩 Matched block #${best.i + 1}/${blocks.length} score=${best.score}`);
if (DRY_RUN) {
console.log("\n--- DRY RUN (no write) ---\n");
console.log("\n--- DRY RUN (no write, no backup) ---\n");
console.log("=== BEFORE (excerpt) ===");
console.log(beforeBlock.slice(0, 400) + (beforeBlock.length > 400 ? "…" : ""));
console.log("\n=== AFTER (excerpt) ===");
@@ -371,6 +381,12 @@ async function main() {
return;
}
// backup uniquement si on écrit
const bakPath = `${contentFile}.bak.issue-${issueNum}`;
if (!(await fileExists(bakPath))) {
await fs.writeFile(bakPath, original, "utf-8");
}
await fs.writeFile(contentFile, updated, "utf-8");
console.log("✅ Applied. Next:");
console.log(` git diff -- ${path.relative(CWD, contentFile)}`);