propose: exact paragraph + apply-ticket guardrails
This commit is contained in:
@@ -20,7 +20,7 @@ Env (recommandé):
|
||||
|
||||
Notes:
|
||||
- Si dist/<chemin>/index.html est absent, le script lance "npm run build" sauf si --no-build.
|
||||
- Sauvegarde automatique: <fichier>.bak.issue-<N>
|
||||
- Sauvegarde automatique: <fichier>.bak.issue-<N> (uniquement si on écrit)
|
||||
`);
|
||||
process.exit(exitCode);
|
||||
}
|
||||
@@ -45,12 +45,16 @@ function normalizeText(s) {
|
||||
return String(s ?? "")
|
||||
.normalize("NFKD")
|
||||
.replace(/\p{Diacritic}/gu, "")
|
||||
.replace(/[’‘]/g, "'")
|
||||
.replace(/[“”]/g, '"')
|
||||
.replace(/[–—]/g, "-")
|
||||
.replace(/…/g, "...")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
}
|
||||
|
||||
// stripping très pragmatique (anti-fragile > parfait)
|
||||
// stripping très pragmatique
|
||||
function stripMd(mdx) {
|
||||
let s = String(mdx ?? "");
|
||||
s = s.replace(/`[^`]*`/g, " "); // inline code
|
||||
@@ -62,6 +66,14 @@ function stripMd(mdx) {
|
||||
return s;
|
||||
}
|
||||
|
||||
function tokenize(s) {
|
||||
const n = normalizeText(stripMd(s));
|
||||
return n
|
||||
.replace(/[^a-z0-9'\- ]+/g, " ")
|
||||
.split(" ")
|
||||
.filter((w) => w.length >= 4);
|
||||
}
|
||||
|
||||
function run(cmd, args, opts = {}) {
|
||||
const r = spawnSync(cmd, args, { stdio: "inherit", ...opts });
|
||||
if (r.status !== 0) throw new Error(`Command failed: ${cmd} ${args.join(" ")}`);
|
||||
@@ -79,25 +91,25 @@ function inferOwnerRepoFromGit() {
|
||||
const r = spawnSync("git", ["remote", "get-url", "origin"], { encoding: "utf-8" });
|
||||
if (r.status !== 0) return null;
|
||||
const u = (r.stdout || "").trim();
|
||||
// supports: https://host/owner/repo.git or ssh
|
||||
const m = u.match(/[:/](?<owner>[^/]+)\/(?<repo>[^/]+?)(?:\.git)?$/);
|
||||
if (!m?.groups) return null;
|
||||
return { owner: m.groups.owner, repo: m.groups.repo };
|
||||
}
|
||||
|
||||
function escapeRegExp(s) {
|
||||
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
function pickLine(body, key) {
|
||||
// tolère espaces/indent
|
||||
const re = new RegExp(`^\\s*${escapeRegExp(key)}\\s*:\\s*([^\\n\\r]+)`, "mi");
|
||||
const m = body.match(re);
|
||||
return m ? m[1].trim() : "";
|
||||
}
|
||||
|
||||
function pickHeadingValue(body, headingKey) {
|
||||
// ex: "## Chemin ..." ligne suivante contenant /...
|
||||
const re = new RegExp(`^##\\s*${escapeRegExp(headingKey)}[^\\n]*\\n([\\s\\S]*?)(?=\\n##\\s|\\n\\s*$)`, "mi");
|
||||
const m = body.match(re);
|
||||
if (!m) return "";
|
||||
// première ligne non vide et non commentée
|
||||
const lines = m[1].split(/\r?\n/).map(l => l.trim());
|
||||
for (const l of lines) {
|
||||
if (!l) continue;
|
||||
@@ -108,7 +120,6 @@ function pickHeadingValue(body, headingKey) {
|
||||
}
|
||||
|
||||
function pickSection(body, markers) {
|
||||
// capture bloc après le 1er marker trouvé, jusqu'à un séparateur connu
|
||||
const text = body.replace(/\r\n/g, "\n");
|
||||
const idx = markers
|
||||
.map(m => ({ m, i: text.toLowerCase().indexOf(m.toLowerCase()) }))
|
||||
@@ -118,7 +129,6 @@ function pickSection(body, markers) {
|
||||
const start = idx.i + idx.m.length;
|
||||
const tail = text.slice(start);
|
||||
|
||||
// stop markers (robuste)
|
||||
const stops = [
|
||||
"\n## ", "\nJustification", "\n---", "\n## Justification", "\n## Sources",
|
||||
"\nProblème identifié", "\nSources proposées", "\n## Proposition", "\n## Problème"
|
||||
@@ -132,7 +142,6 @@ function pickSection(body, markers) {
|
||||
}
|
||||
|
||||
function unquoteBlock(s) {
|
||||
// enlève ">" de citation markdown
|
||||
return String(s ?? "")
|
||||
.split(/\r?\n/)
|
||||
.map(l => l.replace(/^\s*>\s?/, ""))
|
||||
@@ -140,64 +149,66 @@ function unquoteBlock(s) {
|
||||
.trim();
|
||||
}
|
||||
|
||||
function escapeRegExp(s) {
|
||||
return String(s).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
async function readHtmlParagraphText(htmlPath, anchorId) {
|
||||
const html = await fs.readFile(htmlPath, "utf-8");
|
||||
// cherche <p id="anchorId" ...> ... </p>
|
||||
const re = new RegExp(`<p[^>]*\\bid=["']${escapeRegExp(anchorId)}["'][^>]*>([\\s\\S]*?)<\\/p>`, "i");
|
||||
const m = html.match(re);
|
||||
if (!m) return "";
|
||||
let inner = m[1];
|
||||
|
||||
// supprime les outils "para-tools" si présents
|
||||
inner = inner.replace(/<span[^>]*class=["'][^"']*para-tools[^"']*["'][^>]*>[\s\S]*?<\/span>/gi, " ");
|
||||
|
||||
// strip tags
|
||||
inner = inner.replace(/<[^>]+>/g, " ");
|
||||
inner = inner.replace(/\s+/g, " ").trim();
|
||||
|
||||
// enlève artefacts éventuels
|
||||
inner = inner.replace(/\b(¶|Citer|Proposer|Copié)\b/gi, "").replace(/\s+/g, " ").trim();
|
||||
return inner;
|
||||
}
|
||||
|
||||
function splitParagraphBlocks(mdxText) {
|
||||
// bloc = séparé par 2 sauts de ligne (pragmatique)
|
||||
const raw = mdxText.replace(/\r\n/g, "\n");
|
||||
const parts = raw.split(/\n{2,}/);
|
||||
return parts;
|
||||
return raw.split(/\n{2,}/);
|
||||
}
|
||||
|
||||
function isLikelyExcerpt(s) {
|
||||
const t = String(s || "").trim();
|
||||
if (!t) return true;
|
||||
if (t.length < 120) return true;
|
||||
if (/[.…]$/.test(t)) return true;
|
||||
if (t.includes("tronqu")) return true; // tronqué/tronquee etc (sans diacritiques)
|
||||
return false;
|
||||
}
|
||||
|
||||
function scoreBlock(block, targetText) {
|
||||
const tgt = tokenize(targetText);
|
||||
const blk = tokenize(block);
|
||||
if (!tgt.length || !blk.length) return 0;
|
||||
|
||||
const tgtSet = new Set(tgt);
|
||||
const blkSet = new Set(blk);
|
||||
|
||||
let hit = 0;
|
||||
for (const w of tgtSet) if (blkSet.has(w)) hit++;
|
||||
|
||||
// Bonus si un long préfixe ressemble (moins strict qu'un includes brut)
|
||||
const tgtNorm = normalizeText(stripMd(targetText));
|
||||
const blkNorm = normalizeText(stripMd(block));
|
||||
const prefix = tgtNorm.slice(0, Math.min(180, tgtNorm.length));
|
||||
const prefixBonus = prefix && blkNorm.includes(prefix) ? 1000 : 0;
|
||||
|
||||
// Ratio bonus (0..100)
|
||||
const ratio = hit / Math.max(1, tgtSet.size);
|
||||
const ratioBonus = Math.round(ratio * 100);
|
||||
|
||||
return prefixBonus + hit + ratioBonus;
|
||||
}
|
||||
|
||||
function bestBlockMatchIndex(blocks, targetText) {
|
||||
const tgt = normalizeText(stripMd(targetText));
|
||||
if (!tgt) return -1;
|
||||
|
||||
// on compare par inclusion de snippet + score "overlap"
|
||||
const snippet = tgt.slice(0, Math.min(160, tgt.length));
|
||||
let best = { i: -1, score: -1 };
|
||||
|
||||
for (let i = 0; i < blocks.length; i++) {
|
||||
const b = normalizeText(stripMd(blocks[i]));
|
||||
if (!b) continue;
|
||||
|
||||
let score = 0;
|
||||
if (b.includes(snippet)) score += 1000; // jackpot
|
||||
|
||||
// overlap par mots (cheap mais robuste)
|
||||
const words = new Set(tgt.split(" ").filter(w => w.length >= 4));
|
||||
let hit = 0;
|
||||
for (const w of words) if (b.includes(w)) hit++;
|
||||
score += hit;
|
||||
|
||||
if (score > best.score) best = { i, score };
|
||||
const b = blocks[i];
|
||||
const sc = scoreBlock(b, targetText);
|
||||
if (sc > best.score) best = { i, score: sc };
|
||||
}
|
||||
|
||||
// seuil minimal : évite remplacement sauvage
|
||||
if (best.score < 20) return -1;
|
||||
return best.i;
|
||||
return best;
|
||||
}
|
||||
|
||||
async function findContentFileFromChemin(chemin) {
|
||||
@@ -205,11 +216,10 @@ async function findContentFileFromChemin(chemin) {
|
||||
const parts = clean.split("/").filter(Boolean);
|
||||
if (parts.length < 2) return null;
|
||||
const collection = parts[0];
|
||||
const slugPath = parts.slice(1).join("/"); // support nested
|
||||
const slugPath = parts.slice(1).join("/");
|
||||
const root = path.join(CONTENT_ROOT, collection);
|
||||
if (!(await fileExists(root))) return null;
|
||||
|
||||
// cherche fichier dont le path relatif (sans ext) == slugPath
|
||||
const exts = [".mdx", ".md"];
|
||||
async function walk(dir) {
|
||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
@@ -250,7 +260,7 @@ async function fetchIssue({ forgeApiBase, owner, repo, token, issueNum }) {
|
||||
headers: {
|
||||
"Authorization": `token ${token}`,
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "archicratie-apply-ticket/1.0",
|
||||
"User-Agent": "archicratie-apply-ticket/1.1",
|
||||
}
|
||||
});
|
||||
if (!res.ok) {
|
||||
@@ -275,7 +285,6 @@ async function main() {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// API base: priorise LAN (FORGE_API), sinon FORGE_BASE
|
||||
const forgeApiBase = getEnv("FORGE_API") || getEnv("FORGE_BASE");
|
||||
if (!forgeApiBase) {
|
||||
console.error("❌ FORGE_API ou FORGE_BASE manquant. Ex: export FORGE_API='http://192.168.1.20:3000'");
|
||||
@@ -285,22 +294,17 @@ async function main() {
|
||||
console.log(`🔎 Fetch ticket #${issueNum} from ${owner}/${repo} …`);
|
||||
const issue = await fetchIssue({ forgeApiBase, owner, repo, token, issueNum });
|
||||
|
||||
const title = issue.title || "";
|
||||
const bodyRaw = issue.body || "";
|
||||
const body = bodyRaw.replace(/\r\n/g, "\n");
|
||||
const body = String(issue.body || "").replace(/\r\n/g, "\n");
|
||||
|
||||
// Chemin / Ancre: support format "Chemin:" OU "## Chemin"
|
||||
let chemin = pickLine(body, "Chemin") || pickHeadingValue(body, "Chemin");
|
||||
let ancre = pickLine(body, "Ancre") || pickHeadingValue(body, "Ancre paragraphe") || pickHeadingValue(body, "Ancre");
|
||||
ancre = ancre.trim();
|
||||
ancre = (ancre || "").trim();
|
||||
if (ancre.startsWith("#")) ancre = ancre.slice(1);
|
||||
|
||||
// Texte actuel: support "Texte actuel (copie exacte...)" OU "Texte actuel (extrait)"
|
||||
const current1 = pickSection(body, ["Texte actuel (copie exacte du paragraphe)", "## Texte actuel (copie exacte du paragraphe)"]);
|
||||
const current2 = pickSection(body, ["Texte actuel (extrait)", "## Assertion / passage à vérifier", "Assertion / passage à vérifier"]);
|
||||
const texteActuel = unquoteBlock(current1 || current2);
|
||||
const currentFull = pickSection(body, ["Texte actuel (copie exacte du paragraphe)", "## Texte actuel (copie exacte du paragraphe)"]);
|
||||
const currentEx = pickSection(body, ["Texte actuel (extrait)", "## Assertion / passage à vérifier", "Assertion / passage à vérifier"]);
|
||||
const texteActuel = unquoteBlock(currentFull || currentEx);
|
||||
|
||||
// Proposition: support 2 modèles
|
||||
const prop1 = pickSection(body, ["Proposition (texte corrigé complet)", "## Proposition (texte corrigé complet)"]);
|
||||
const prop2 = pickSection(body, ["Proposition (remplacer par):", "## Proposition (remplacer par)"]);
|
||||
const proposition = (prop1 || prop2).trim();
|
||||
@@ -313,56 +317,62 @@ async function main() {
|
||||
|
||||
const contentFile = await findContentFileFromChemin(chemin);
|
||||
if (!contentFile) throw new Error(`Fichier contenu introuvable pour Chemin=${chemin}`);
|
||||
|
||||
console.log(`📄 Target content file: ${path.relative(CWD, contentFile)}`);
|
||||
|
||||
// dist html path
|
||||
const distHtmlPath = path.join(DIST_ROOT, chemin.replace(/^\/+|\/+$/g,""), "index.html");
|
||||
await ensureBuildIfNeeded(distHtmlPath);
|
||||
|
||||
// texte cible: priorité au texte actuel du ticket, sinon récup HTML du paragraphe via ancre
|
||||
// targetText: préférence au texte complet (ticket), sinon dist si extrait probable
|
||||
let targetText = texteActuel;
|
||||
if (!targetText) {
|
||||
if (await fileExists(distHtmlPath)) {
|
||||
const htmlText = await readHtmlParagraphText(distHtmlPath, ancre);
|
||||
if (htmlText) targetText = htmlText;
|
||||
}
|
||||
|
||||
let distText = "";
|
||||
if (await fileExists(distHtmlPath)) {
|
||||
distText = await readHtmlParagraphText(distHtmlPath, ancre);
|
||||
}
|
||||
|
||||
if (!targetText && distText) targetText = distText;
|
||||
if (targetText && distText && isLikelyExcerpt(targetText) && distText.length > targetText.length) {
|
||||
targetText = distText;
|
||||
}
|
||||
|
||||
if (!targetText) {
|
||||
throw new Error("Impossible de reconstruire le texte du paragraphe (ni texte actuel, ni dist html).");
|
||||
}
|
||||
|
||||
// lecture + split blocs
|
||||
const original = await fs.readFile(contentFile, "utf-8");
|
||||
const blocks = splitParagraphBlocks(original);
|
||||
|
||||
const idx = bestBlockMatchIndex(blocks, targetText);
|
||||
if (idx < 0) {
|
||||
const best = bestBlockMatchIndex(blocks, targetText);
|
||||
|
||||
// seuil de sécurité : on veut au moins un overlap raisonnable.
|
||||
// Avec le bonus prefix+ratio, un match correct dépasse très vite ~60–80.
|
||||
if (best.i < 0 || best.score < 40) {
|
||||
console.error("❌ Match trop faible: je refuse de remplacer automatiquement.");
|
||||
console.error("➡️ Action: mets 'Texte actuel (copie exacte du paragraphe)' dans le ticket (recommandé).");
|
||||
console.error(`➡️ Score=${best.score}. Recommandation: ticket avec 'Texte actuel (copie exacte du paragraphe)'.`);
|
||||
// debug: top 5
|
||||
const ranked = blocks
|
||||
.map((b, i) => ({ i, score: scoreBlock(b, targetText), excerpt: stripMd(b).slice(0, 140) }))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, 5);
|
||||
|
||||
console.error("Top candidates:");
|
||||
for (const r of ranked) {
|
||||
console.error(` #${r.i + 1} score=${r.score} ${r.excerpt}${r.excerpt.length >= 140 ? "…" : ""}`);
|
||||
}
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
const beforeBlock = blocks[idx];
|
||||
const beforeBlock = blocks[best.i];
|
||||
const afterBlock = proposition.trim();
|
||||
|
||||
// garde le style: 1 bloc -> 1 bloc
|
||||
const nextBlocks = blocks.slice();
|
||||
nextBlocks[idx] = afterBlock;
|
||||
|
||||
nextBlocks[best.i] = afterBlock;
|
||||
const updated = nextBlocks.join("\n\n");
|
||||
|
||||
// backup
|
||||
const bakPath = `${contentFile}.bak.issue-${issueNum}`;
|
||||
if (!(await fileExists(bakPath))) {
|
||||
await fs.writeFile(bakPath, original, "utf-8");
|
||||
}
|
||||
|
||||
// preview stats
|
||||
console.log(`🧩 Matched block #${idx+1}/${blocks.length} (backup: ${path.relative(CWD, bakPath)})`);
|
||||
console.log(`🧩 Matched block #${best.i + 1}/${blocks.length} score=${best.score}`);
|
||||
|
||||
if (DRY_RUN) {
|
||||
console.log("\n--- DRY RUN (no write) ---\n");
|
||||
console.log("\n--- DRY RUN (no write, no backup) ---\n");
|
||||
console.log("=== BEFORE (excerpt) ===");
|
||||
console.log(beforeBlock.slice(0, 400) + (beforeBlock.length > 400 ? "…" : ""));
|
||||
console.log("\n=== AFTER (excerpt) ===");
|
||||
@@ -371,6 +381,12 @@ async function main() {
|
||||
return;
|
||||
}
|
||||
|
||||
// backup uniquement si on écrit
|
||||
const bakPath = `${contentFile}.bak.issue-${issueNum}`;
|
||||
if (!(await fileExists(bakPath))) {
|
||||
await fs.writeFile(bakPath, original, "utf-8");
|
||||
}
|
||||
|
||||
await fs.writeFile(contentFile, updated, "utf-8");
|
||||
console.log("✅ Applied. Next:");
|
||||
console.log(` git diff -- ${path.relative(CWD, contentFile)}`);
|
||||
|
||||
Reference in New Issue
Block a user