#!/usr/bin/env python3 import argparse import os import re import shutil import subprocess import sys from pathlib import Path try: import yaml except ImportError: print("Erreur : PyYAML n'est pas installé. Lance : pip3 install pyyaml") sys.exit(1) EDITION = "archicrat-ia" STATUS = "essai_these" VERSION = "0.1.0" ORDER_MAP = { "prologue": 10, "chapitre-1": 20, "chapitre-2": 30, "chapitre-3": 40, "chapitre-4": 50, "chapitre-5": 60, "conclusion": 70, } TITLE_MAP = { "prologue": "Prologue — Fondation, finalité sociopolitique et historique", "chapitre-1": "Chapitre 1 — Fondements épistémologiques et modélisation", "chapitre-2": "Chapitre 2 — Archéogenèse des régimes de co-viabilité", "chapitre-3": "Chapitre 3 — Philosophies du pouvoir et archicration", "chapitre-4": "Chapitre 4 — Histoire archicratique des révolutions industrielles", "chapitre-5": "Chapitre 5 — Tensions, co-viabilités et régulations", "conclusion": "Conclusion — ArchiCraT-IA", } def slugify_name(path: Path) -> str: stem = path.stem.lower().strip() replacements = { " ": "-", "_": "-", "—": "-", "–": "-", "é": "e", "è": "e", "ê": "e", "ë": "e", "à": "a", "â": "a", "ä": "a", "î": "i", "ï": "i", "ô": "o", "ö": "o", "ù": "u", "û": "u", "ü": "u", "ç": "c", "'": "", "’": "", } for old, new in replacements.items(): stem = stem.replace(old, new) stem = re.sub(r"-+", "-", stem).strip("-") # normalisations spécifiques stem = stem.replace("chapitre-1-fondements-epistemologiques-et-modelisation-archicratie-version-officielle-revise", "chapitre-1") stem = stem.replace("chapitre-2", "chapitre-2") stem = stem.replace("chapitre-3", "chapitre-3") stem = stem.replace("chapitre-4", "chapitre-4") stem = stem.replace("chapitre-5", "chapitre-5") if "prologue" in stem: return "prologue" if "chapitre-1" in stem: return "chapitre-1" if "chapitre-2" in stem: return "chapitre-2" if "chapitre-3" in stem: return "chapitre-3" if "chapitre-4" in stem: return "chapitre-4" if "chapitre-5" in stem: return "chapitre-5" if "conclusion" in stem: return "conclusion" return stem def extract_title_from_markdown(md_text: str) -> str | None: for line in md_text.splitlines(): line = line.strip() if not line: continue if line.startswith("# "): return line[2:].strip() return None def remove_first_h1(md_text: str) -> str: lines = md_text.splitlines() out = [] removed = False for line in lines: if not removed and line.strip().startswith("# "): removed = True continue out.append(line) text = "\n".join(out).lstrip() return text def clean_markdown(md_text: str) -> str: text = md_text.replace("\r\n", "\n").replace("\r", "\n") # nettoyer espaces multiples text = re.sub(r"\n{3,}", "\n\n", text) # supprimer éventuels signets/artefacts de liens internes Pandoc text = re.sub(r"\[\]\(#.*?\)", "", text) # convertir astérismes parasites text = re.sub(r"[ \t]+$", "", text, flags=re.MULTILINE) return text.strip() + "\n" def compute_level(slug: str) -> int: if slug == "prologue": return 1 if slug.startswith("chapitre-"): return 1 if slug == "conclusion": return 1 return 1 def convert_one_file(input_docx: Path, output_dir: Path, source_root: Path): slug = slugify_name(input_docx) output_mdx = output_dir / f"{slug}.mdx" cmd = [ "pandoc", str(input_docx), "-f", "docx", "-t", "gfm+smart", ] result = subprocess.run(cmd, check=True, capture_output=True, text=True) md_text = result.stdout detected_title = extract_title_from_markdown(md_text) md_body = remove_first_h1(md_text) md_body = clean_markdown(md_body) title = TITLE_MAP.get(slug) or detected_title or input_docx.stem order = ORDER_MAP.get(slug, 999) level = compute_level(slug) relative_source = input_docx try: relative_source = input_docx.relative_to(source_root) except ValueError: relative_source = input_docx.name frontmatter = { "title": title, "edition": EDITION, "status": STATUS, "level": level, "version": VERSION, "concepts": [], "links": [], "order": order, "summary": "", "source": { "kind": "docx", "path": str(relative_source), }, } yaml_block = yaml.safe_dump( frontmatter, allow_unicode=True, sort_keys=False, default_flow_style=False, ).strip() final_text = f"---\n{yaml_block}\n---\n{md_body if md_body.startswith(chr(10)) else chr(10) + md_body}" output_mdx.write_text(final_text, encoding="utf-8") print(f"✅ {input_docx.name} -> {output_mdx.name}") def main(): parser = argparse.ArgumentParser(description="Convertit un dossier DOCX en MDX avec frontmatter.") parser.add_argument("input_dir", help="Dossier source contenant les DOCX") parser.add_argument("output_dir", help="Dossier de sortie pour les MDX") args = parser.parse_args() input_dir = Path(args.input_dir).expanduser().resolve() output_dir = Path(args.output_dir).expanduser().resolve() if not shutil.which("pandoc"): print("Erreur : pandoc n'est pas installé. Lance : brew install pandoc") sys.exit(1) if not input_dir.exists() or not input_dir.is_dir(): print(f"Erreur : dossier source introuvable : {input_dir}") sys.exit(1) output_dir.mkdir(parents=True, exist_ok=True) docx_files = sorted(input_dir.glob("*.docx")) if not docx_files: print(f"Aucun DOCX trouvé dans : {input_dir}") sys.exit(1) for docx_file in docx_files: convert_one_file(docx_file, output_dir, input_dir) print() print("Conversion DOCX -> MDX terminée.") if __name__ == "__main__": main()