#!/usr/bin/env python3
from __future__ import annotations

import argparse
import shutil
import tempfile
import unicodedata
import xml.etree.ElementTree as ET
from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile

W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
XML_NS = "http://www.w3.org/XML/1998/namespace"
NS = {"w": W_NS}

ET.register_namespace("w", W_NS)


REPLACEMENTS = {
    "coviabilité": "co-viabilité",
    "sacroinstitutionnelle": "sacro-institutionnelle",
    "technologistique": "techno-logistique",
    "scripturonormative": "scripturo-normative",
    "textesrepères": "textes-repères",
    "ellemême": "elle-même",
    "opérateur de d’archicration": "opérateur d’archicration",
    "systèmes plusieurs statuts": "systèmes à plusieurs statuts",
    "celle-ci se donne à voir": "Celle-ci se donne à voir",
    "Pour autant il serait": "Pour autant, il serait",
    "Telles peuvent être le cas de": "Tels peuvent être les cas de",
}

# volontairement NON auto-corrigé : "la co-viabilité devient ,"
# ce cas demande une décision éditoriale humaine.


def qn(tag: str) -> str:
    prefix, local = tag.split(":")
    if prefix != "w":
        raise ValueError(tag)
    return f"{{{W_NS}}}{local}"


def norm(s: str) -> str:
    return unicodedata.normalize("NFC", s or "")


def paragraph_text(p: ET.Element) -> str:
    return "".join(t.text or "" for t in p.findall(".//w:t", NS))


def replaced_text(s: str) -> str:
    out = norm(s)
    for bad, good in REPLACEMENTS.items():
        out = out.replace(bad, good)
    return out


def rewrite_paragraph_text(p: ET.Element, new_text: str) -> None:
    ppr = p.find("w:pPr", NS)

    for child in list(p):
        if ppr is not None and child is ppr:
            continue
        p.remove(child)

    r = ET.Element(qn("w:r"))
    t = ET.SubElement(r, qn("w:t"))
    t.set(f"{{{XML_NS}}}space", "preserve")
    t.text = new_text
    p.append(r)


def process_document_xml(xml_path: Path) -> int:
    tree = ET.parse(xml_path)
    root = tree.getroot()

    changed = 0

    for p in root.findall(".//w:p", NS):
        old = paragraph_text(p)
        new = replaced_text(old)
        if new != old:
            rewrite_paragraph_text(p, new)
            changed += 1

    tree.write(xml_path, encoding="utf-8", xml_declaration=True)
    return changed


def repack_docx(tmpdir: Path, out_docx: Path) -> None:
    tmp_out = out_docx.with_suffix(out_docx.suffix + ".tmp")
    with ZipFile(tmp_out, "w", ZIP_DEFLATED) as zf:
        for p in sorted(tmpdir.rglob("*")):
            if p.is_file():
                zf.write(p, p.relative_to(tmpdir))
    shutil.move(tmp_out, out_docx)


def main() -> int:
    parser = argparse.ArgumentParser(description="Répare mécaniquement certaines scories DOCX.")
    parser.add_argument("docx", help="Chemin du DOCX")
    parser.add_argument("--in-place", action="store_true", help="Réécrit le DOCX en place")
    args = parser.parse_args()

    src = Path(args.docx)
    if not src.exists():
        print(f"ECHEC: fichier introuvable: {src}", file=sys.stderr)
        return 2

    out = src if args.in_place else src.with_name(src.stem + ".fixed.docx")

    with tempfile.TemporaryDirectory(prefix="docx-fix-") as td:
        td_path = Path(td)
        with ZipFile(src) as zf:
            zf.extractall(td_path)

        document_xml = td_path / "word" / "document.xml"
        if not document_xml.exists():
            print("ECHEC: word/document.xml absent.", file=sys.stderr)
            return 2

        changed = process_document_xml(document_xml)
        repack_docx(td_path, out)

    print(f"OK: DOCX réparé par réécriture paragraphe/XML. Paragraphes modifiés: {changed}")
    return 0


if __name__ == "__main__":
    import sys
    raise SystemExit(main())