chore(tooling): add docx source audit and repair helpers
All checks were successful
SMOKE / smoke (push) Successful in 7s
CI / build-and-anchors (push) Successful in 48s
CI / build-and-anchors (pull_request) Successful in 47s

This commit is contained in:
2026-03-28 23:34:42 +01:00
parent ec8e29a313
commit f86704d67e
3 changed files with 233 additions and 0 deletions

72
scripts/audit-docx-source.py Executable file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import sys
import unicodedata
import xml.etree.ElementTree as ET
from zipfile import ZipFile
NS = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
FORBIDDEN = [
"coviabilité",
"sacroinstitutionnelle",
"technologistique",
"scripturonormative",
"textesrepères",
"ellemême",
"opérateur de darchicration",
"systèmes plusieurs statuts",
"celle-ci se donne à voir",
"Pour autant il serait",
"Telles peuvent être le cas de",
"la co-viabilité devient ,",
]
def norm(s: str) -> str:
return unicodedata.normalize("NFC", s or "")
def main() -> int:
parser = argparse.ArgumentParser(description="Audit simple dun DOCX source officiel.")
parser.add_argument("docx", help="Chemin du fichier .docx")
args = parser.parse_args()
try:
with ZipFile(args.docx) as zf:
data = zf.read("word/document.xml")
except FileNotFoundError:
print(f"ECHEC: fichier introuvable: {args.docx}", file=sys.stderr)
return 2
except KeyError:
print("ECHEC: word/document.xml introuvable dans le DOCX.", file=sys.stderr)
return 2
except Exception as e:
print(f"ECHEC: impossible douvrir le DOCX: {e}", file=sys.stderr)
return 2
root = ET.fromstring(data)
found = False
for i, p in enumerate(root.findall(".//w:p", NS), start=1):
txt = "".join(t.text or "" for t in p.findall(".//w:t", NS))
txt_n = norm(txt)
hits = [needle for needle in FORBIDDEN if needle in txt_n]
if hits:
found = True
print(f"\n[paragraphe {i}]")
print("Hits :", ", ".join(hits))
print(txt_n)
if found:
print("\nECHEC: formes interdites encore présentes dans le DOCX.")
return 1
print("OK: aucune forme interdite trouvée dans le DOCX.")
return 0
if __name__ == "__main__":
raise SystemExit(main())