anno: support shard annotations in annotations-index endpoint
This commit is contained in:
@@ -1,23 +1,81 @@
|
|||||||
|
// src/pages/annotations-index.json.ts
|
||||||
import type { APIRoute } from "astro";
|
import type { APIRoute } from "astro";
|
||||||
import * as fs from "node:fs/promises";
|
import fs from "node:fs/promises";
|
||||||
import * as path from "node:path";
|
import path from "node:path";
|
||||||
import { parse as parseYAML } from "yaml";
|
import YAML from "yaml";
|
||||||
|
|
||||||
const CWD = process.cwd();
|
const CWD = process.cwd();
|
||||||
const ANNO_DIR = path.join(CWD, "src", "annotations");
|
const ANNO_ROOT = path.join(CWD, "src", "annotations");
|
||||||
|
|
||||||
// Strict en CI (ou override explicite)
|
const isObj = (x: any) => !!x && typeof x === "object" && !Array.isArray(x);
|
||||||
const STRICT =
|
const isArr = (x: any) => Array.isArray(x);
|
||||||
process.env.ANNOTATIONS_STRICT === "1" ||
|
|
||||||
process.env.CI === "1" ||
|
|
||||||
process.env.CI === "true";
|
|
||||||
|
|
||||||
async function exists(p: string): Promise<boolean> {
|
function normPath(s: string) {
|
||||||
try {
|
return String(s || "").replace(/\\/g, "/").replace(/^\/+|\/+$/g, "");
|
||||||
await fs.access(p);
|
}
|
||||||
return true;
|
function paraNum(pid: string) {
|
||||||
} catch {
|
const m = String(pid).match(/^p-(\d+)-/i);
|
||||||
return false;
|
return m ? Number(m[1]) : Number.POSITIVE_INFINITY;
|
||||||
|
}
|
||||||
|
function toIso(v: any) {
|
||||||
|
if (v instanceof Date) return v.toISOString();
|
||||||
|
return typeof v === "string" ? v : "";
|
||||||
|
}
|
||||||
|
function stableSortByTs(arr: any[]) {
|
||||||
|
if (!Array.isArray(arr)) return;
|
||||||
|
arr.sort((a, b) => {
|
||||||
|
const ta = Date.parse(toIso(a?.ts)) || 0;
|
||||||
|
const tb = Date.parse(toIso(b?.ts)) || 0;
|
||||||
|
if (ta !== tb) return ta - tb;
|
||||||
|
return JSON.stringify(a).localeCompare(JSON.stringify(b));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function keyMedia(x: any) { return String(x?.src || ""); }
|
||||||
|
function keyRef(x: any) {
|
||||||
|
return `${x?.url || ""}||${x?.label || ""}||${x?.kind || ""}||${x?.citation || ""}`;
|
||||||
|
}
|
||||||
|
function keyComment(x: any) { return String(x?.text || "").trim(); }
|
||||||
|
|
||||||
|
function uniqUnion(dst: any[], src: any[], keyFn: (x:any)=>string) {
|
||||||
|
const out = isArr(dst) ? [...dst] : [];
|
||||||
|
const seen = new Set(out.map((x) => keyFn(x)));
|
||||||
|
for (const it of (isArr(src) ? src : [])) {
|
||||||
|
const k = keyFn(it);
|
||||||
|
if (!k) continue;
|
||||||
|
if (!seen.has(k)) { seen.add(k); out.push(it); }
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
function deepMergeEntry(dst: any, src: any) {
|
||||||
|
if (!isObj(dst) || !isObj(src)) return;
|
||||||
|
|
||||||
|
for (const [k, v] of Object.entries(src)) {
|
||||||
|
if (k === "media" && isArr(v)) { dst.media = uniqUnion(dst.media, v, keyMedia); continue; }
|
||||||
|
if (k === "refs" && isArr(v)) { dst.refs = uniqUnion(dst.refs, v, keyRef); continue; }
|
||||||
|
if (k === "comments_editorial" && isArr(v)) { dst.comments_editorial = uniqUnion(dst.comments_editorial, v, keyComment); continue; }
|
||||||
|
|
||||||
|
if (isObj(v)) {
|
||||||
|
if (!isObj(dst[k])) dst[k] = {};
|
||||||
|
deepMergeEntry(dst[k], v);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isArr(v)) {
|
||||||
|
const cur = isArr(dst[k]) ? dst[k] : [];
|
||||||
|
const seen = new Set(cur.map((x:any) => JSON.stringify(x)));
|
||||||
|
const out = [...cur];
|
||||||
|
for (const it of v) {
|
||||||
|
const s = JSON.stringify(it);
|
||||||
|
if (!seen.has(s)) { seen.add(s); out.push(it); }
|
||||||
|
}
|
||||||
|
dst[k] = out;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// scalar: set only if missing/empty
|
||||||
|
if (!(k in dst) || dst[k] == null || dst[k] === "") dst[k] = v;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,154 +84,93 @@ async function walk(dir: string): Promise<string[]> {
|
|||||||
const ents = await fs.readdir(dir, { withFileTypes: true });
|
const ents = await fs.readdir(dir, { withFileTypes: true });
|
||||||
for (const e of ents) {
|
for (const e of ents) {
|
||||||
const p = path.join(dir, e.name);
|
const p = path.join(dir, e.name);
|
||||||
if (e.isDirectory()) out.push(...(await walk(p)));
|
if (e.isDirectory()) out.push(...await walk(p));
|
||||||
else out.push(p);
|
else if (e.isFile() && /\.ya?ml$/i.test(e.name)) out.push(p);
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
function isPlainObject(x: unknown): x is Record<string, unknown> {
|
function inferExpected(relNoExt: string) {
|
||||||
return !!x && typeof x === "object" && !Array.isArray(x);
|
const parts = relNoExt.split("/").filter(Boolean);
|
||||||
}
|
const last = parts.at(-1) || "";
|
||||||
|
const isShard = /^p-\d+-/i.test(last);
|
||||||
function normalizePageKey(s: unknown): string {
|
const pageKey = isShard ? parts.slice(0, -1).join("/") : relNoExt;
|
||||||
return String(s ?? "")
|
const paraId = isShard ? last : null;
|
||||||
.replace(/^\/+/, "")
|
return { isShard, pageKey, paraId };
|
||||||
.replace(/\/+$/, "")
|
|
||||||
.trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
function inferPageKeyFromFile(inDirAbs: string, fileAbs: string): string {
|
|
||||||
const rel = path.relative(inDirAbs, fileAbs).replace(/\\/g, "/");
|
|
||||||
return rel.replace(/\.(ya?ml|json)$/i, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseDoc(raw: string, fileAbs: string): unknown {
|
|
||||||
if (/\.json$/i.test(fileAbs)) return JSON.parse(raw);
|
|
||||||
return parseYAML(raw);
|
|
||||||
}
|
|
||||||
|
|
||||||
function hardFailOrCollect(errors: string[], msg: string): void {
|
|
||||||
if (STRICT) throw new Error(msg);
|
|
||||||
errors.push(msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeEntry(
|
|
||||||
fileRel: string,
|
|
||||||
paraId: string,
|
|
||||||
entry: unknown,
|
|
||||||
errors: string[]
|
|
||||||
): Record<string, unknown> {
|
|
||||||
if (entry == null) return {};
|
|
||||||
|
|
||||||
if (!isPlainObject(entry)) {
|
|
||||||
hardFailOrCollect(errors, `${fileRel}: paras.${paraId} must be an object`);
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
const e: Record<string, unknown> = { ...entry };
|
|
||||||
|
|
||||||
const arrayFields = [
|
|
||||||
"refs",
|
|
||||||
"authors",
|
|
||||||
"quotes",
|
|
||||||
"media",
|
|
||||||
"comments_editorial",
|
|
||||||
] as const;
|
|
||||||
|
|
||||||
for (const k of arrayFields) {
|
|
||||||
if (e[k] == null) continue;
|
|
||||||
if (!Array.isArray(e[k])) {
|
|
||||||
errors.push(`${fileRel}: paras.${paraId}.${k} must be an array (coerced to [])`);
|
|
||||||
e[k] = [];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return e;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export const GET: APIRoute = async () => {
|
export const GET: APIRoute = async () => {
|
||||||
if (!(await exists(ANNO_DIR))) {
|
const pages: Record<string, { paras: Record<string, any> }> = {};
|
||||||
const out = {
|
const errors: Array<{ file: string; error: string }> = [];
|
||||||
schema: 1,
|
|
||||||
generatedAt: new Date().toISOString(),
|
|
||||||
pages: {},
|
|
||||||
stats: { pages: 0, paras: 0, errors: 0 },
|
|
||||||
errors: [] as string[],
|
|
||||||
};
|
|
||||||
|
|
||||||
return new Response(JSON.stringify(out), {
|
let files: string[] = [];
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json; charset=utf-8",
|
|
||||||
"Cache-Control": "no-store",
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const files = (await walk(ANNO_DIR)).filter((p) => /\.(ya?ml|json)$/i.test(p));
|
|
||||||
|
|
||||||
const pages: Record<string, { paras: Record<string, Record<string, unknown>> }> =
|
|
||||||
Object.create(null);
|
|
||||||
|
|
||||||
const errors: string[] = [];
|
|
||||||
let paraCount = 0;
|
|
||||||
|
|
||||||
for (const f of files) {
|
|
||||||
const fileRel = path.relative(CWD, f).replace(/\\/g, "/");
|
|
||||||
const pageKey = normalizePageKey(inferPageKeyFromFile(ANNO_DIR, f));
|
|
||||||
|
|
||||||
if (!pageKey) {
|
|
||||||
hardFailOrCollect(errors, `${fileRel}: cannot infer page key`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let doc: unknown;
|
|
||||||
try {
|
try {
|
||||||
const raw = await fs.readFile(f, "utf8");
|
files = await walk(ANNO_ROOT);
|
||||||
doc = parseDoc(raw, f);
|
} catch (e: any) {
|
||||||
} catch (e) {
|
throw new Error(`Missing annotations root: ${ANNO_ROOT} (${e?.message || e})`);
|
||||||
hardFailOrCollect(errors, `${fileRel}: parse failed: ${String((e as any)?.message ?? e)}`);
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isPlainObject(doc) || (doc as any).schema !== 1) {
|
for (const fp of files) {
|
||||||
hardFailOrCollect(errors, `${fileRel}: schema must be 1`);
|
const rel = normPath(path.relative(ANNO_ROOT, fp));
|
||||||
continue;
|
const relNoExt = rel.replace(/\.ya?ml$/i, "");
|
||||||
}
|
const { isShard, pageKey, paraId } = inferExpected(relNoExt);
|
||||||
|
|
||||||
if ((doc as any).page != null) {
|
try {
|
||||||
const declared = normalizePageKey((doc as any).page);
|
const raw = await fs.readFile(fp, "utf8");
|
||||||
if (declared !== pageKey) {
|
const doc = YAML.parse(raw) || {};
|
||||||
hardFailOrCollect(
|
|
||||||
errors,
|
if (!isObj(doc) || doc.schema !== 1) continue;
|
||||||
`${fileRel}: page mismatch (page="${declared}" vs path="${pageKey}")`
|
|
||||||
);
|
const docPage = normPath(doc.page || "");
|
||||||
|
if (docPage && docPage !== pageKey) {
|
||||||
|
throw new Error(`page mismatch (page="${doc.page}" vs path="${pageKey}")`);
|
||||||
|
}
|
||||||
|
if (!doc.page) doc.page = pageKey;
|
||||||
|
|
||||||
|
if (!isObj(doc.paras)) throw new Error(`missing object key "paras"`);
|
||||||
|
|
||||||
|
const pg = pages[pageKey] ??= { paras: {} };
|
||||||
|
|
||||||
|
if (isShard) {
|
||||||
|
if (!paraId) throw new Error("internal: missing paraId");
|
||||||
|
if (!(paraId in doc.paras)) {
|
||||||
|
throw new Error(`shard mismatch: file must contain paras["${paraId}"]`);
|
||||||
|
}
|
||||||
|
const entry = doc.paras[paraId];
|
||||||
|
if (!isObj(pg.paras[paraId])) pg.paras[paraId] = {};
|
||||||
|
if (isObj(entry)) deepMergeEntry(pg.paras[paraId], entry);
|
||||||
|
|
||||||
|
stableSortByTs(pg.paras[paraId].media);
|
||||||
|
stableSortByTs(pg.paras[paraId].refs);
|
||||||
|
stableSortByTs(pg.paras[paraId].comments_editorial);
|
||||||
|
} else {
|
||||||
|
for (const [pid, entry] of Object.entries(doc.paras)) {
|
||||||
|
const p = String(pid);
|
||||||
|
if (!isObj(pg.paras[p])) pg.paras[p] = {};
|
||||||
|
if (isObj(entry)) deepMergeEntry(pg.paras[p], entry);
|
||||||
|
|
||||||
|
stableSortByTs(pg.paras[p].media);
|
||||||
|
stableSortByTs(pg.paras[p].refs);
|
||||||
|
stableSortByTs(pg.paras[p].comments_editorial);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e: any) {
|
||||||
|
errors.push({ file: `src/annotations/${rel}`, error: String(e?.message || e) });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const parasAny = (doc as any).paras;
|
// sort paras
|
||||||
if (!isPlainObject(parasAny)) {
|
for (const [pageKey, pg] of Object.entries(pages)) {
|
||||||
hardFailOrCollect(errors, `${fileRel}: missing object key "paras"`);
|
const keys = Object.keys(pg.paras || {});
|
||||||
continue;
|
keys.sort((a, b) => {
|
||||||
}
|
const ia = paraNum(a);
|
||||||
|
const ib = paraNum(b);
|
||||||
if (pages[pageKey]) {
|
if (Number.isFinite(ia) && Number.isFinite(ib) && ia !== ib) return ia - ib;
|
||||||
hardFailOrCollect(errors, `${fileRel}: duplicate page "${pageKey}" (only one file per page)`);
|
return String(a).localeCompare(String(b));
|
||||||
continue;
|
});
|
||||||
}
|
const next: Record<string, any> = {};
|
||||||
|
for (const k of keys) next[k] = pg.paras[k];
|
||||||
const parasOut: Record<string, Record<string, unknown>> = Object.create(null);
|
pg.paras = next;
|
||||||
|
|
||||||
for (const [paraId, entry] of Object.entries(parasAny)) {
|
|
||||||
if (!/^p-\d+-/i.test(paraId)) {
|
|
||||||
hardFailOrCollect(errors, `${fileRel}: invalid para id "${paraId}"`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
parasOut[paraId] = sanitizeEntry(fileRel, paraId, entry, errors);
|
|
||||||
}
|
|
||||||
|
|
||||||
pages[pageKey] = { paras: parasOut };
|
|
||||||
paraCount += Object.keys(parasOut).length;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const out = {
|
const out = {
|
||||||
@@ -182,16 +179,18 @@ export const GET: APIRoute = async () => {
|
|||||||
pages,
|
pages,
|
||||||
stats: {
|
stats: {
|
||||||
pages: Object.keys(pages).length,
|
pages: Object.keys(pages).length,
|
||||||
paras: paraCount,
|
paras: Object.values(pages).reduce((n, p) => n + Object.keys(p.paras || {}).length, 0),
|
||||||
errors: errors.length,
|
errors: errors.length,
|
||||||
},
|
},
|
||||||
errors,
|
errors,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// 🔥 comportement “pro CI” : si erreurs => build fail
|
||||||
|
if (errors.length) {
|
||||||
|
throw new Error(`${errors[0].file}: ${errors[0].error}`);
|
||||||
|
}
|
||||||
|
|
||||||
return new Response(JSON.stringify(out), {
|
return new Response(JSON.stringify(out), {
|
||||||
headers: {
|
headers: { "Content-Type": "application/json; charset=utf-8" },
|
||||||
"Content-Type": "application/json; charset=utf-8",
|
|
||||||
"Cache-Control": "no-store",
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
Reference in New Issue
Block a user