chore(tooling): add docx source audit and repair helpers
This commit is contained in:
72
scripts/audit-docx-source.py
Executable file
72
scripts/audit-docx-source.py
Executable file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import unicodedata
|
||||
import xml.etree.ElementTree as ET
|
||||
from zipfile import ZipFile
|
||||
|
||||
NS = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
|
||||
|
||||
FORBIDDEN = [
|
||||
"coviabilité",
|
||||
"sacroinstitutionnelle",
|
||||
"technologistique",
|
||||
"scripturonormative",
|
||||
"textesrepères",
|
||||
"ellemême",
|
||||
"opérateur de d’archicration",
|
||||
"systèmes plusieurs statuts",
|
||||
"celle-ci se donne à voir",
|
||||
"Pour autant il serait",
|
||||
"Telles peuvent être le cas de",
|
||||
"la co-viabilité devient ,",
|
||||
]
|
||||
|
||||
|
||||
def norm(s: str) -> str:
|
||||
return unicodedata.normalize("NFC", s or "")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Audit simple d’un DOCX source officiel.")
|
||||
parser.add_argument("docx", help="Chemin du fichier .docx")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
with ZipFile(args.docx) as zf:
|
||||
data = zf.read("word/document.xml")
|
||||
except FileNotFoundError:
|
||||
print(f"ECHEC: fichier introuvable: {args.docx}", file=sys.stderr)
|
||||
return 2
|
||||
except KeyError:
|
||||
print("ECHEC: word/document.xml introuvable dans le DOCX.", file=sys.stderr)
|
||||
return 2
|
||||
except Exception as e:
|
||||
print(f"ECHEC: impossible d’ouvrir le DOCX: {e}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
root = ET.fromstring(data)
|
||||
found = False
|
||||
|
||||
for i, p in enumerate(root.findall(".//w:p", NS), start=1):
|
||||
txt = "".join(t.text or "" for t in p.findall(".//w:t", NS))
|
||||
txt_n = norm(txt)
|
||||
hits = [needle for needle in FORBIDDEN if needle in txt_n]
|
||||
if hits:
|
||||
found = True
|
||||
print(f"\n[paragraphe {i}]")
|
||||
print("Hits :", ", ".join(hits))
|
||||
print(txt_n)
|
||||
|
||||
if found:
|
||||
print("\nECHEC: formes interdites encore présentes dans le DOCX.")
|
||||
return 1
|
||||
|
||||
print("OK: aucune forme interdite trouvée dans le DOCX.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user