feat(serializer): add class aot serialization support

This commit is contained in:
2026-05-21 17:24:42 +07:00
parent f327e64a6a
commit 720b8fbe2f
15 changed files with 736 additions and 541 deletions
+40 -9
View File
@@ -670,7 +670,20 @@ function emitDec(schema: AnySchema, ctx: Ctx): { pre: string; expr: string } {
const BARE_IDENT = /^[a-zA-Z_$][a-zA-Z0-9_$]*$/;
export function compileObject(schema: ObjectSchema): CodegenResult {
export interface CompileOptions {
/**
* If set, the generated decoder builds the result with
* `Object.create(${boundProtoExpr})` and assigns fields one by one, so the
* decoded value is an instance of the class whose prototype this expression
* resolves to. When omitted, the decoder returns a plain object literal.
*/
boundProtoExpr?: string;
}
export function compileObject(
schema: ObjectSchema,
options: CompileOptions = {},
): CodegenResult {
const encCtx = new Ctx('enc');
const decCtx = new Ctx('dec');
@@ -681,23 +694,35 @@ export function compileObject(schema: ObjectSchema): CodegenResult {
}
const encodeBody = encSeg.build();
// Decoder body: emit as inline object literal in return statement.
// For fields whose inner.expr is already a bare identifier (declared via inner.pre),
// skip the wrapping `const tmp = expr;` and use the identifier directly.
// Decoder body. For fields whose inner.expr is already a bare identifier
// (declared via inner.pre), skip the wrapping `const tmp = expr;`.
let pre = '';
const props: string[] = [];
const pairs: Array<{ key: string; expr: string; rawName: string }> = [];
for (const fname of Object.keys(schema.fields)) {
const inner = emitDec(schema.fields[fname]!, decCtx);
let expr: string;
if (inner.pre !== '' && BARE_IDENT.test(inner.expr)) {
pre += inner.pre;
props.push(`${JSON.stringify(fname)}: ${inner.expr}`);
expr = inner.expr;
} else {
const tmp = decCtx.fresh(`f_${sanitize(fname)}`);
pre += `${inner.pre} const ${tmp} = ${inner.expr};`;
props.push(`${JSON.stringify(fname)}: ${tmp}`);
expr = tmp;
}
pairs.push({ key: JSON.stringify(fname), expr, rawName: fname });
}
let decodeBody: string;
if (options.boundProtoExpr) {
// Class-bound: build via Object.create(proto) + sequential assigns so the
// result satisfies `instanceof` and inherits prototype methods.
const out = decCtx.fresh('out');
const assigns = pairs.map((p) => `${out}[${p.key}] = ${p.expr};`).join('');
decodeBody = `${pre} const ${out} = Object.create(${options.boundProtoExpr}); ${assigns} r.pos = pos; return ${out};`;
} else {
const literal = pairs.map((p) => `${p.key}: ${p.expr}`).join(', ');
decodeBody = `${pre} r.pos = pos; return { ${literal} };`;
}
const decodeBody = `${pre} r.pos = pos; return { ${props.join(', ')} };`;
const deps = new Map<string, { mode: 'enc' | 'dec'; targetName: string }>();
for (const [k, v] of encCtx.deps) deps.set(k, v);
@@ -708,7 +733,13 @@ export function compileObject(schema: ObjectSchema): CodegenResult {
return { encodeBody, decodeBody, deps, closure };
}
export function compileUnion(schema: UnionSchema): CodegenResult {
export function compileUnion(
schema: UnionSchema,
_options: CompileOptions = {},
): CodegenResult {
// Class-bound unions are not yet supported (each variant would need its own
// prototype binding). For now, ignore `boundProtoExpr` and emit a plain
// literal-based decoder.
const encCtx = new Ctx('enc');
const decCtx = new Ctx('dec');
+138 -47
View File
@@ -7,9 +7,18 @@
* that constructs the codec inline — no runtime `new Function`, no codegen
* module needed at runtime.
*
* Also detects class declarations with `static [Serializable] = type(...)`:
* - the AOT codec's decoder uses `Object.create(ClassName.prototype)` so
* decoded values are real `instanceof ClassName` instances with prototype
* methods working;
* - the codec auto-registers itself into the runtime registry on module
* load (so `deserialize(bytes)` dispatches by id);
* - top-level `const X = registerClass(ClassName)` calls are rewritten to
* `const X = ClassName[Serializable]` so users can drop the call entirely.
*
* Scope (v1):
* - Same-file only (no cross-file schema references).
* - Top-level `const X = type(...)` declarations (including `export const`).
* - Top-level `const X = type(...)` and `static [Serializable] = type(...)`.
* - Field values may be:
* • imported primitive markers (u8 … f64, bool, str, bytes, *Array)
* • calls to imported combinators (list, opt, enumOf, flags, tuple)
@@ -22,16 +31,9 @@ import { parseSync } from 'oxc-parser';
import MagicString from 'magic-string';
import { compileObject, compileUnion } from '../codegen.ts';
import { s } from '../schema.ts';
import type {
AnySchema,
ObjectSchema,
UnionSchema,
} from '../descriptors.ts';
import type { AnySchema, ObjectSchema, UnionSchema } from '../descriptors.ts';
const PKG_NAMES = new Set([
'@perf/serializer',
'@perf/serializer/index',
]);
const PKG_NAMES = new Set(['@perf/serializer', '@perf/serializer/index']);
interface ImportInfo {
bindings: Map<string, string>;
@@ -41,10 +43,6 @@ interface CompiledCodec {
schemaName: string;
schemaKind: 'object' | 'union';
fieldsDescriptor: string;
encodeBody: string;
decodeBody: string;
closure: Map<string, unknown>;
deps: Map<string, { mode: 'enc' | 'dec'; targetName: string }>;
id: number;
}
@@ -64,8 +62,6 @@ const PRIMITIVES = new Set([
'f32Array', 'f64Array', 'u8Array', 'u16Array', 'u32Array', 'i32Array',
]);
// oxc AST nodes vary widely per `type`; we treat them as loosely-typed objects
// and check `.type` before reading per-shape fields.
type AnyNode = Record<string, any>;
function collectImportsFromSet(program: AnyNode, aliases: Set<string>): ImportInfo {
@@ -196,27 +192,58 @@ function collectFields(obj: AnyNode, scope: Scope): Record<string, AnySchema> |
interface TypeCallInfo {
call: AnyNode;
/** Const name for `const X = type(...)` or class name for `class X { static [Serializable] = type(...) }`. */
declName: string;
fn: 'type' | 'oneOf';
/** If set, the call is a static-field initializer inside this class. */
boundClass?: string;
}
function unwrapStmt(stmt: AnyNode): AnyNode {
if (stmt.type === 'ExportNamedDeclaration' && stmt.declaration) return stmt.declaration as AnyNode;
if (stmt.type === 'ExportDefaultDeclaration' && stmt.declaration) return stmt.declaration as AnyNode;
return stmt;
}
function findTypeCalls(program: AnyNode, imports: ImportInfo): TypeCallInfo[] {
const calls: TypeCallInfo[] = [];
for (const topStmt of program.body as AnyNode[]) {
const stmt: AnyNode =
topStmt.type === 'ExportNamedDeclaration' && topStmt.declaration
? (topStmt.declaration as AnyNode)
: topStmt;
if (stmt.type !== 'VariableDeclaration') continue;
for (const decl of stmt.declarations as AnyNode[]) {
const id = decl.id as AnyNode;
const init = decl.init as AnyNode | null;
if (!init || id.type !== 'Identifier' || init.type !== 'CallExpression') continue;
const callee = init.callee as AnyNode;
if (callee.type !== 'Identifier') continue;
const exported = imports.bindings.get(callee.name as string);
if (exported === 'type' || exported === 'oneOf') {
calls.push({ call: init, declName: id.name as string, fn: exported });
const stmt = unwrapStmt(topStmt);
// Top-level `const X = type(...)` / `const X = oneOf(...)`
if (stmt.type === 'VariableDeclaration') {
for (const decl of stmt.declarations as AnyNode[]) {
const id = decl.id as AnyNode;
const init = decl.init as AnyNode | null;
if (!init || id.type !== 'Identifier' || init.type !== 'CallExpression') continue;
const callee = init.callee as AnyNode;
if (callee.type !== 'Identifier') continue;
const exported = imports.bindings.get(callee.name as string);
if (exported === 'type' || exported === 'oneOf') {
calls.push({ call: init, declName: id.name as string, fn: exported });
}
}
continue;
}
// `class X { static [Serializable] = type(...) }`
if (stmt.type === 'ClassDeclaration') {
const className = (stmt.id as AnyNode | null)?.name as string | undefined;
if (!className) continue;
const body = stmt.body as AnyNode;
for (const member of body.body as AnyNode[]) {
if (member.type !== 'PropertyDefinition' || !member.static || !member.computed) continue;
const key = member.key as AnyNode;
if (key.type !== 'Identifier') continue;
const exported = imports.bindings.get(key.name as string);
if (exported !== 'Serializable') continue;
const value = member.value as AnyNode | null;
if (!value || value.type !== 'CallExpression') continue;
const callee = value.callee as AnyNode;
if (callee.type !== 'Identifier') continue;
const fnName = imports.bindings.get(callee.name as string);
if (fnName !== 'type' && fnName !== 'oneOf') continue;
calls.push({ call: value, declName: className, fn: fnName, boundClass: className });
}
}
}
@@ -298,7 +325,11 @@ function compileCall(
const schema = buildSchemaFromTypeCall(info, scope);
if (!schema) return null;
const cg = schema.kind === 'object' ? compileObject(schema) : compileUnion(schema);
const protoExpr = info.boundClass ? `${info.boundClass}.prototype` : undefined;
const cg =
schema.kind === 'object'
? compileObject(schema, { boundProtoExpr: protoExpr })
: compileUnion(schema);
const id = fnv1a16(schema.name);
const fname = sanitize(schema.name);
if (cg.deps.size > 0) return null; // ref/codec deps not supported yet
@@ -326,7 +357,7 @@ function compileCall(
${cg.decodeBody}
}
const __desc = ${descriptorLit};
const __codec = {
const __codec = Object.freeze({
...__desc,
id: ${id},
encode(v, into) {
@@ -342,10 +373,8 @@ function compileCall(
encodeInto(v, w) { encode_${fname}(w, v); },
decodeFrom: decode_${fname},
$infer: undefined,
};
Object.freeze(__codec);
__serRegisterPrecompiled(__codec, encode_${fname}, decode_${fname});
return __codec;
});
return __serRegisterPrecompiled(__codec);
})()`;
return {
@@ -354,10 +383,6 @@ function compileCall(
schemaName: schema.name,
schemaKind: schema.kind,
fieldsDescriptor: descriptorLit,
encodeBody: cg.encodeBody,
decodeBody: cg.decodeBody,
closure: cg.closure,
deps: cg.deps,
id,
},
};
@@ -384,14 +409,69 @@ function serializeClosureValue(v: unknown): string {
function makePrelude(importPath: string): string {
return `
import { Writer as __SerWriter, Reader as __SerReader } from ${JSON.stringify(importPath)};
const __serRegistry = (globalThis.__serRegistry ??= new Map());
function __serRegisterPrecompiled(codec, enc, dec) {
__serRegistry.set(codec.id, codec);
}
import { Writer as __SerWriter, Reader as __SerReader, __registerPrecompiled as __serRegisterPrecompiled } from ${JSON.stringify(importPath)};
`;
}
/**
* Rewrite `const X = registerClass(Y)` (and statement-form `registerClass(Y)`)
* into a direct reference to the class's pre-attached codec. With the AOT codec
* already living at `Y[Serializable]`, the runtime `registerClass` call is
* redundant — we replace it so users can drop it entirely.
*/
function rewriteRegisterClassCalls(
program: AnyNode,
imports: ImportInfo,
ms: MagicString,
): number {
const serializableLocal = [...imports.bindings.entries()].find(
([, e]) => e === 'Serializable',
)?.[0];
// If user hasn't imported Serializable, they can't have written `static [Serializable] = ...`,
// so any registerClass(X) call is using the runtime path. Leave it alone.
if (!serializableLocal) return 0;
let count = 0;
for (const topStmt of program.body as AnyNode[]) {
const stmt = unwrapStmt(topStmt);
// `const X = registerClass(Y)` (or `let`/`var`)
if (stmt.type === 'VariableDeclaration') {
for (const decl of stmt.declarations as AnyNode[]) {
const init = decl.init as AnyNode | null;
if (!init || init.type !== 'CallExpression') continue;
if (!isRegisterClassCall(init, imports)) continue;
const arg = (init.arguments as AnyNode[])[0];
if (!arg || arg.type !== 'Identifier') continue;
ms.overwrite(
init.start as number,
init.end as number,
`${arg.name}[${serializableLocal}]`,
);
count++;
}
continue;
}
// Bare `registerClass(Y);` expression-statement
if (stmt.type === 'ExpressionStatement') {
const expr = stmt.expression as AnyNode;
if (expr.type !== 'CallExpression') continue;
if (!isRegisterClassCall(expr, imports)) continue;
// No-op now — codec already self-registers on class init.
ms.overwrite(stmt.start as number, stmt.end as number, '/* registerClass elided */');
count++;
}
}
return count;
}
function isRegisterClassCall(call: AnyNode, imports: ImportInfo): boolean {
const callee = call.callee as AnyNode;
if (callee.type !== 'Identifier') return false;
return imports.bindings.get(callee.name as string) === 'registerClass';
}
export interface TransformOptions {
importPath?: string;
packageAliases?: string[];
@@ -402,7 +482,11 @@ export interface TransformResult {
transformedCount: number;
}
export function transform(source: string, filename = 'input.ts', options: TransformOptions = {}): TransformResult {
export function transform(
source: string,
filename = 'input.ts',
options: TransformOptions = {},
): TransformResult {
const importPath = options.importPath ?? '@perf/serializer';
const aliases = new Set<string>(PKG_NAMES);
for (const a of options.packageAliases ?? []) aliases.add(a);
@@ -420,7 +504,10 @@ export function transform(source: string, filename = 'input.ts', options: Transf
let hasTypeImport = false;
for (const v of imports.bindings.values()) {
if (v === 'type' || v === 'oneOf') { hasTypeImport = true; break; }
if (v === 'type' || v === 'oneOf') {
hasTypeImport = true;
break;
}
}
if (!hasTypeImport) return { code: source, transformedCount: 0 };
@@ -452,6 +539,10 @@ export function transform(source: string, filename = 'input.ts', options: Transf
}
if (transformedCount === 0) return { code: source, transformedCount: 0 };
// Now that codecs are inlined, rewrite registerClass(X) → X[Serializable].
rewriteRegisterClassCalls(program, imports, ms);
ms.prepend(makePrelude(importPath));
return { code: ms.toString(), transformedCount };
}
+7 -28
View File
@@ -1,4 +1,4 @@
// ── Simplified façade (recommended) ────────────────────────────────────────
// ── Public API ─────────────────────────────────────────────────────────────
export {
type,
oneOf,
@@ -10,32 +10,11 @@ export {
} from './api.ts';
export type { TypeCodec, InferType, Router } from './api.ts';
// ── Low-level API (advanced) ───────────────────────────────────────────────
// ── Low-level (writer/reader for hot paths, framing primitives) ────────────
export { Writer, Reader } from './io.ts';
export { s, defineSchema } from './schema.ts';
export type { SchemaBuilder } from './schema.ts';
// ── Class contract ─────────────────────────────────────────────────────────
export { Serializable } from './symbol.ts';
export {
register,
registerClass,
serialize,
deserialize,
clearRegistry,
} from './register.ts';
export type { Codec } from './register.ts';
export type {
AnySchema,
ObjectSchema,
UnionSchema,
ArraySchema,
OptionalSchema,
EnumSchema,
BitsetSchema,
TupleSchema,
RefSchema,
CodecSchema,
PrimitiveSchema,
TypedArraySchema,
PrimitiveKind,
TypedArrayKind,
} from './descriptors.ts';
// ── Test / AOT helpers ─────────────────────────────────────────────────────
export { clearRegistry, __registerPrecompiled } from './register.ts';
+24 -45
View File
@@ -1,7 +1,6 @@
import { compileObject, compileUnion } from './codegen.ts';
import type { AnySchema, ObjectSchema, UnionSchema } from './descriptors.ts';
import { Reader, Writer } from './io.ts';
import { Serializable } from './symbol.ts';
import type { Reader, Writer } from './io.ts';
export interface Codec<T = unknown> {
readonly id: number;
@@ -14,7 +13,6 @@ type AnyCodec = Codec<any>;
const byName = new Map<string, AnyCodec>();
const byId = new Map<number, AnyCodec>();
const byCtor = new WeakMap<object, AnyCodec>();
function fnv1a16(s: string): number {
let h = 0x811c9dc5;
@@ -57,6 +55,10 @@ function sanIdent(name: string): string {
return name.replace(/[^A-Za-z0-9_]/g, '_');
}
/**
* Compile a schema into a `Codec` and register it in the lookup tables. Used
* internally by `type(...)` / `oneOf(...)`. Idempotent by schema name.
*/
export function register<T = unknown>(schema: ObjectSchema | UnionSchema): Codec<T> {
const existing = byName.get(schema.name);
if (existing) return existing as Codec<T>;
@@ -149,49 +151,26 @@ return function decode_${fname}(r) {
return codec;
}
export function registerClass<T>(Ctor: new (...args: never[]) => T): Codec<T> {
const cached = byCtor.get(Ctor);
if (cached) return cached as Codec<T>;
const schema = (Ctor as unknown as Record<symbol, unknown>)[Serializable] as
| ObjectSchema
| UnionSchema
| undefined;
if (!schema) {
throw new Error(`${Ctor.name} has no [Serializable] schema`);
}
const codec = register<T>(schema);
byCtor.set(Ctor, codec);
return codec;
}
/**
* Encode a value into a framed Uint8Array (2-byte schema ID + body).
* If a Writer is passed, returns a view; otherwise returns a fresh copy.
*/
export function serialize<T>(value: T, codec: Codec<T>, writer?: Writer): Uint8Array {
if (writer) {
writer.u16(codec.id);
codec.encode(writer, value);
return writer.bytes();
}
const w = new Writer();
w.u16(codec.id);
codec.encode(w, value);
return w.bytesCopy();
}
/**
* Decode a framed Uint8Array by looking up its schema ID.
*/
export function deserialize<T = unknown>(bytes: Uint8Array): T {
const r = new Reader(bytes);
const id = r.u16();
const codec = byId.get(id);
if (!codec) throw new Error(`Unknown schema ID: 0x${id.toString(16)}`);
return codec.decode(r) as T;
}
/** Reset the global codec registry. Test helper. */
export function clearRegistry(): void {
byName.clear();
byId.clear();
}
/**
* Internal: AOT-generated codecs use this to inject themselves into the
* runtime registry on module load. Not meant for user code.
*/
export function __registerPrecompiled<T = unknown>(codec: Codec<T>): Codec<T> {
const existing = byName.get(codec.name);
if (existing) return existing as Codec<T>;
const idExisting = byId.get(codec.id);
if (idExisting && idExisting.name !== codec.name) {
throw new Error(
`Schema ID collision: "${codec.name}" and "${idExisting.name}" both hash to 0x${codec.id.toString(16)}`,
);
}
byName.set(codec.name, codec as AnyCodec);
byId.set(codec.id, codec as AnyCodec);
return codec;
}