feat(serializer): add aot serializer

This commit is contained in:
2026-05-21 09:11:51 +00:00
parent 6f417ba514
commit f327e64a6a
30 changed files with 6720 additions and 0 deletions
+197
View File
@@ -0,0 +1,197 @@
/**
* Compile-time (AOT) vs runtime codegen benchmark.
*
* The AOT codecs are produced by running our transformer on a sample TS file
* at bench startup, writing the result to a temp file, and importing it. The
* runtime codecs come from the regular `type(...)` runtime path.
*/
import { bench, beforeAll, afterAll, describe } from 'vitest';
import { writeFileSync, rmSync, existsSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';
import { transform } from '../../plugin/compile/transformer.ts';
import {
type as runtimeType,
type TypeCodec,
u53,
f64,
str,
list,
enumOf,
flags,
clearRegistry,
Reader,
Writer,
} from '../../plugin/index.ts';
const HERE = dirname(fileURLToPath(import.meta.url));
const GEN_FILE = join(HERE, '__aot_codecs.ts');
const AOT_SOURCE = `
import { type, u53, f64, str, list, enumOf, flags } from '../../plugin/index.ts';
export const Ticker = type('AotTicker', {
symbol: str,
last: f64,
bid: f64,
ask: f64,
volume: f64,
});
export const Order = type('AotOrder', {
id: u53,
account: u53,
symbol: str,
side: enumOf(['buy', 'sell'] as const),
price: f64,
qty: f64,
filledQty: f64,
ts: u53,
flags: flags(['ioc', 'post_only', 'reduce_only'] as const),
});
export const Level = type('AotLevel', { p: f64, q: f64 });
export const Book = type('AotBook', {
symbol: str,
ts: u53,
bids: list(Level),
asks: list(Level),
});
`;
interface AotCodec {
encode: (v: unknown, w?: Writer) => Uint8Array;
decode: (b: Uint8Array) => unknown;
encodeInto: (v: unknown, w: Writer) => void;
decodeFrom: (r: Reader) => unknown;
id: number;
}
let aot: Record<string, AotCodec>;
let rtTicker: TypeCodec<unknown>;
let rtOrder: TypeCodec<unknown>;
let rtLevel: TypeCodec<unknown>;
let rtBook: TypeCodec<unknown>;
const ticker = {
symbol: 'BTC-USD', last: 67891.23, bid: 67890.5, ask: 67892.0, volume: 1234567.89,
};
const order = {
id: 9876543210, account: 12345678, symbol: 'BTC-USD',
side: 'buy' as const, price: 67500.5, qty: 0.125, filledQty: 0,
ts: 1716100000123,
flags: { ioc: false, post_only: true, reduce_only: false },
};
const book = {
symbol: 'BTC-USD',
ts: 1716100000123,
bids: Array.from({ length: 1000 }, (_, i) => ({ p: 67890 - i * 0.5, q: 0.1 + (i % 100) * 0.01 })),
asks: Array.from({ length: 1000 }, (_, i) => ({ p: 67891 + i * 0.5, q: 0.1 + (i % 100) * 0.01 })),
};
const wT = new Writer(256);
const wO = new Writer(256);
const wB = new Writer(64 * 1024);
let tickerAot: Uint8Array;
let tickerRt: Uint8Array;
let orderAot: Uint8Array;
let orderRt: Uint8Array;
let bookAot: Uint8Array;
let bookRt: Uint8Array;
beforeAll(async () => {
// Build the AOT module on the fly.
const transformed = transform(AOT_SOURCE, GEN_FILE, {
importPath: '../../plugin/index.ts',
packageAliases: ['../../plugin/index.ts'],
});
writeFileSync(GEN_FILE, transformed.code, 'utf8');
const url = `${pathToFileURL(GEN_FILE).href}?t=${Date.now()}`;
aot = (await import(/* @vite-ignore */ url)) as Record<string, AotCodec>;
// Runtime equivalents with non-colliding names.
clearRegistry();
rtTicker = runtimeType('RtTicker', {
symbol: str, last: f64, bid: f64, ask: f64, volume: f64,
});
rtOrder = runtimeType('RtOrder', {
id: u53, account: u53, symbol: str,
side: enumOf(['buy', 'sell'] as const),
price: f64, qty: f64, filledQty: f64, ts: u53,
flags: flags(['ioc', 'post_only', 'reduce_only'] as const),
});
rtLevel = runtimeType('RtLevel', { p: f64, q: f64 });
rtBook = runtimeType('RtBook', {
symbol: str, ts: u53, bids: list(rtLevel), asks: list(rtLevel),
});
// Pre-encode for decode benches
tickerAot = aot.Ticker!.encode(ticker);
wT.reset(); rtTicker.encodeInto(ticker, wT); tickerRt = wT.bytes().slice();
orderAot = aot.Order!.encode(order);
wO.reset(); rtOrder.encodeInto(order, wO); orderRt = wO.bytes().slice();
bookAot = aot.Book!.encode(book);
wB.reset(); rtBook.encodeInto(book, wB); bookRt = wB.bytes().slice();
});
afterAll(() => {
if (existsSync(GEN_FILE)) rmSync(GEN_FILE, { force: true });
});
describe('encode ticker (AOT vs runtime)', () => {
bench('AOT (compiled)', () => {
wT.reset();
aot.Ticker!.encodeInto(ticker, wT);
});
bench('runtime (new Function)', () => {
wT.reset();
rtTicker.encodeInto(ticker, wT);
});
});
describe('encode order (AOT vs runtime)', () => {
bench('AOT (compiled)', () => {
wO.reset();
aot.Order!.encodeInto(order, wO);
});
bench('runtime', () => {
wO.reset();
rtOrder.encodeInto(order, wO);
});
});
describe('encode book 1000 levels (AOT vs runtime)', () => {
bench('AOT (compiled)', () => {
wB.reset();
aot.Book!.encodeInto(book, wB);
});
bench('runtime', () => {
wB.reset();
rtBook.encodeInto(book, wB);
});
});
describe('decode ticker (AOT vs runtime)', () => {
bench('AOT (compiled)', () => {
const r = new Reader(tickerAot);
aot.Ticker!.decodeFrom(r);
});
bench('runtime', () => {
const r = new Reader(tickerRt);
rtTicker.decodeFrom(r);
});
});
describe('decode book 1000 levels (AOT vs runtime)', () => {
bench('AOT (compiled)', () => {
const r = new Reader(bookAot);
aot.Book!.decodeFrom(r);
});
bench('runtime', () => {
const r = new Reader(bookRt);
rtBook.decodeFrom(r);
});
});
+121
View File
@@ -0,0 +1,121 @@
import { bench, describe } from 'vitest';
import { Reader, Writer, deserialize, serialize } from '../../plugin/index.ts';
import {
buildBook,
buildOrder,
buildTicker,
registerAll,
} from './payloads.ts';
const codecs = registerAll();
const ticker = buildTicker();
const order = buildOrder();
const book = buildBook(1000);
// Pre-allocated pooled Writers (sized generously so we don't measure grow()).
const wTicker = new Writer(256);
const wOrder = new Writer(256);
const wBook = new Writer(64 * 1024);
// Pre-encoded buffers for decode benches.
const tickerJSON = JSON.stringify(ticker);
const orderJSON = JSON.stringify(order);
const bookJSON = JSON.stringify(book);
const tickerBin = serialize(ticker, codecs.ticker);
const orderBin = serialize(order, codecs.order);
const bookBin = serialize(book, codecs.book);
// One-time payload-size print on module load so it appears once in bench output.
// eslint-disable-next-line no-console
console.log(
'\n--- payload sizes ---\n' +
`ticker | json: ${tickerJSON.length}b bin: ${tickerBin.length}b (${((tickerBin.length / tickerJSON.length) * 100).toFixed(0)}%)\n` +
`order | json: ${orderJSON.length}b bin: ${orderBin.length}b (${((orderBin.length / orderJSON.length) * 100).toFixed(0)}%)\n` +
`book | json: ${bookJSON.length}b bin: ${bookBin.length}b (${((bookBin.length / bookJSON.length) * 100).toFixed(0)}%)\n`,
);
describe('encode ticker (5 fields)', () => {
bench('JSON.stringify', () => {
JSON.stringify(ticker);
});
bench('codec.encode (pooled)', () => {
wTicker.reset();
codecs.ticker.encode(wTicker, ticker);
});
});
describe('encode order (10 fields + bitset)', () => {
bench('JSON.stringify', () => {
JSON.stringify(order);
});
bench('codec.encode (pooled)', () => {
wOrder.reset();
codecs.order.encode(wOrder, order);
});
});
describe('encode book (1000 levels)', () => {
bench('JSON.stringify', () => {
JSON.stringify(book);
});
bench('codec.encode (pooled)', () => {
wBook.reset();
codecs.book.encode(wBook, book);
});
});
describe('decode ticker', () => {
bench('JSON.parse', () => {
JSON.parse(tickerJSON);
});
bench('codec.decode', () => {
const r = new Reader(tickerBin);
r.pos = 2;
codecs.ticker.decode(r);
});
});
describe('decode order', () => {
bench('JSON.parse', () => {
JSON.parse(orderJSON);
});
bench('codec.decode', () => {
const r = new Reader(orderBin);
r.pos = 2;
codecs.order.decode(r);
});
});
describe('decode book (1000 levels)', () => {
bench('JSON.parse', () => {
JSON.parse(bookJSON);
});
bench('codec.decode', () => {
const r = new Reader(bookBin);
r.pos = 2;
codecs.book.decode(r);
});
});
describe('roundtrip ticker', () => {
bench('JSON', () => {
JSON.parse(JSON.stringify(ticker));
});
bench('codec (pooled)', () => {
wTicker.reset();
codecs.ticker.encode(wTicker, ticker);
const r = new Reader(wTicker.bytes());
codecs.ticker.decode(r);
});
});
describe('serialize+deserialize ticker (with frame)', () => {
bench('JSON', () => {
JSON.parse(JSON.stringify(ticker));
});
bench('serialize/deserialize (framed)', () => {
deserialize(serialize(ticker, codecs.ticker));
});
});
+118
View File
@@ -0,0 +1,118 @@
import { defineSchema, register, s } from '../../plugin/index.ts';
import type { Codec } from '../../plugin/index.ts';
export const TickerSchema = defineSchema('BenchTicker', (s) => ({
symbol: s.str,
last: s.f64,
bid: s.f64,
ask: s.f64,
volume: s.f64,
}));
export const OrderSchema = defineSchema('BenchOrder', (s) => ({
id: s.u53,
account: s.u53,
symbol: s.str,
side: s.enum(['buy', 'sell'] as const),
type: s.enum(['limit', 'market', 'stop', 'stop_limit'] as const),
price: s.f64,
qty: s.f64,
filledQty: s.f64,
ts: s.u53,
flags: s.bitset(['ioc', 'post_only', 'reduce_only'] as const),
}));
export const LevelSchema = defineSchema('BenchLevel', (s) => ({
p: s.f64,
q: s.f64,
}));
export const BookSchema = defineSchema('BenchBook', (s) => ({
symbol: s.str,
ts: s.u53,
bids: s.array(LevelSchema),
asks: s.array(LevelSchema),
}));
export interface Ticker {
symbol: string;
last: number;
bid: number;
ask: number;
volume: number;
}
export interface Order {
id: number;
account: number;
symbol: string;
side: 'buy' | 'sell';
type: 'limit' | 'market' | 'stop' | 'stop_limit';
price: number;
qty: number;
filledQty: number;
ts: number;
flags: { ioc: boolean; post_only: boolean; reduce_only: boolean };
}
export interface Level {
p: number;
q: number;
}
export interface Book {
symbol: string;
ts: number;
bids: Level[];
asks: Level[];
}
export function buildTicker(): Ticker {
return {
symbol: 'BTC-USD',
last: 67891.23,
bid: 67890.5,
ask: 67892.0,
volume: 1234567.89,
};
}
export function buildOrder(): Order {
return {
id: 9876543210,
account: 12345678,
symbol: 'BTC-USD',
side: 'buy',
type: 'limit',
price: 67500.5,
qty: 0.125,
filledQty: 0,
ts: 1716100000123,
flags: { ioc: false, post_only: true, reduce_only: false },
};
}
export function buildBook(depth: number): Book {
const bids: Level[] = new Array(depth);
const asks: Level[] = new Array(depth);
for (let i = 0; i < depth; i++) {
bids[i] = { p: 67890 - i * 0.5, q: 0.1 + (i % 100) * 0.01 };
asks[i] = { p: 67891 + i * 0.5, q: 0.1 + (i % 100) * 0.01 };
}
return { symbol: 'BTC-USD', ts: 1716100000123, bids, asks };
}
export interface Codecs {
ticker: Codec<Ticker>;
order: Codec<Order>;
level: Codec<Level>;
book: Codec<Book>;
}
export function registerAll(): Codecs {
const ticker = register<Ticker>(TickerSchema);
const order = register<Order>(OrderSchema);
const level = register<Level>(LevelSchema);
const book = register<Book>(BookSchema);
return { ticker, order, level, book };
}
+117
View File
@@ -0,0 +1,117 @@
# Benchmark results
Hardware: Intel Xeon (Icelake) @ 2.46 GHz, Windows Server 2019
Runtime: Node.js 24.14.0 (x64)
Tool: mitata
Date: 2026-05-21
Reproduce: `npm run bench` from the `serializer/` directory. Numbers below use the avg (the p75 column where they diverge).
## Payload sizes
| Workload | JSON bytes | Binary bytes | Binary/JSON ratio |
|---|---:|---:|---:|
| Ticker (5 fields) | 82 | 42 | **0.51** |
| Order (10 fields + bitset) | 203 | 52 | **0.26** |
| Book snapshot (1000 levels) | 48,577 | 32,020 | **0.66** |
## Encode (lower is better)
| Workload | JSON.stringify | codec.encode (pooled) | Speedup vs JSON |
|---|---:|---:|---:|
| Ticker | 598.4 ns | **52.2 ns** | **11.5×** |
| Order | 1,170 ns | **123.4 ns** | **9.5×** |
| Book (1000 levels) | 437 µs | **10.2 µs** | **42.9×** |
## Decode (lower is better)
| Workload | JSON.parse | codec.decode | Speedup vs JSON |
|---|---:|---:|---:|
| Ticker | 696.3 ns | **311.0 ns** | **2.2×** |
| Order | 1,440 ns | **360.6 ns** | **4.0×** |
| Book (1000 levels) | 497 µs | **2428 µs** (high GC variance) | **1720×** |
## Roundtrip
| | ns/iter | Note |
|---|---:|---|
| `JSON.parse(JSON.stringify(...))` | 1,400 ns | baseline |
| Pooled codec encode + Reader decode | **418 ns** | **3.35× faster** |
| Un-pooled `serialize` + `deserialize` (framed) | 2,180 ns | 1.55× slower |
The un-pooled `serialize()` allocates a fresh Writer + DataView + Uint8Array on every call. Hot paths must pool a Writer.
## What changed vs v1 baseline
The v1 codec used method-call style for every operation: every `w.f64(v)` was a method dispatch with internal property reads on `this.buf`, `this.view`, `this.pos`. The optimized codec restructures the generated functions around four V8-friendly patterns:
| # | Optimization | Effect |
|---|---|---|
| 1 | Lift `pos`, `buf`, `view` to function-local `let/const` at start; sync `w.pos = pos` at end | Replaces N×3 property loads with N register reads |
| 2 | Inline all bounded-size ops (`u8``f64`, `bool`, varints, `enum`, `bitset`) using the lifted locals | Eliminates the method-call cost per primitive |
| 3 | Pre-`ensure` for the bounded prefix of each schema in a single bounds check | One growth check per ~10 fields instead of one per field |
| 4 | Inline nested objects/arrays/unions/tuples — no per-element function dispatch | Tight inner loops for array<object> (e.g., order book levels) |
| 5 | Closure-captured frozen map for `enum` with ≥4 values; ternary chain for 23 | Avoids string-switch overhead |
| 6 | For array elements that are themselves bounded, pre-`ensure(L * elementMax)` once outside the loop, then run a loop with no per-iteration ensure | Order-book encode goes from method-per-level to inline-per-level |
Unbounded leaves (`str`, `bytes`, `typedArray`, `ref`, `codec`) still go through the Writer/Reader methods, with a small sync/refetch dance around the call.
## Before / after (v1 baseline → v2 optimized, both avg ns)
| Workload | v1 baseline | v2 optimized | Improvement |
|---|---:|---:|---:|
| Ticker encode | 77.4 ns | **52.2 ns** | **1.48×** |
| Order encode | 130.4 ns | **123.4 ns** | 1.06× |
| Book encode | 27.9 µs | **10.2 µs** | **2.73×** |
| Ticker decode | 308.4 ns | 311.0 ns | ~same |
| Order decode | 368.3 ns | 360.6 ns | ~same |
| Book decode | 26.1 µs | 24.4 µs (p75) | 1.07× |
The decode side gains less than encode because Node's `JSON.parse` was already not the bottleneck — most of the decode time goes to allocating the result object and the string for `symbol`/`reason` fields, which the codec also has to do.
The book encode at **2.7× faster than v1 baseline (43× faster than JSON.stringify)** is the headline number: inlining the per-level encoder into the outer loop turned 1000 function calls per snapshot into 1000 inline `view.setFloat64(pos, ...)` pairs sharing one `ensure()`.
## What didn't pan out
We tried `String.fromCharCode.apply(null, buf.subarray(start, end))` for ASCII strings in the 864 char range. On Node 24 it was consistently slower than the simple `s += String.fromCharCode(buf[i])` loop for the short strings dominating exchange payloads — the variadic-args wrapper has its own overhead. Reverted.
## Generated source — example
For the Ticker schema (after optimization), the encoder body produced by codegen is:
```js
function encode_BenchTicker(w, o) {
let pos = w.pos;
let buf = w.buf;
let view = w.view;
if (pos + 33 > buf.byteLength) {
w.pos = pos; w.grow(33); buf = w.buf; view = w.view;
}
// varu53 symbol-length and 4 × f64 are bounded, but the str body itself isn't:
// (the str field flushes the bounded prefix, calls w.str, then refetches)
w.pos = pos; w.str(o["symbol"]); pos = w.pos; buf = w.buf; view = w.view;
if (pos + 32 > buf.byteLength) {
w.pos = pos; w.grow(32); buf = w.buf; view = w.view;
}
view.setFloat64(pos, o["last"], true); pos += 8;
view.setFloat64(pos, o["bid"], true); pos += 8;
view.setFloat64(pos, o["ask"], true); pos += 8;
view.setFloat64(pos, o["volume"], true); pos += 8;
w.pos = pos;
}
```
No `this.` indirections, no method dispatch for the floats, one ensure for the 4-float run. The result is **52 ns per Ticker encode**, ~12× faster than `JSON.stringify`.
## Acceptance bar (from plan)
| Target | Actual | Status |
|---|---|---|
| Encode ≥ 3× faster than JSON.stringify on medium-order workload | 9.5× | exceeded |
| Decode ≥ 5× faster than JSON.parse on order-book workload | 1720× | exceeded |
| Payload ≤ 60% of JSON byte length on numeric-heavy data | 26% (Order) / 66% (Book) | partial (Book is f64-dense, little to compress) |
| Zero deopt events on hot benchmark loop | one-time OSR transition only | acceptable |