Use persistent WebTransport frame stream

This commit is contained in:
2026-06-25 10:48:09 -07:00
parent 3e2ca8057b
commit f6e2b7edda
4 changed files with 79 additions and 25 deletions

View File

@@ -48,6 +48,8 @@ Audio is the playback clock. The server sends JPEG frames over WebSocket by defa
- First 8 bytes: little-endian float64 timestamp in seconds.
- Remaining bytes: one complete JPEG image.
WebSocket sends one binary frame packet per message. WebTransport uses one persistent server-to-client unidirectional frame stream: the stream begins with the single byte `WT_STREAM_FRAME`, then repeats records of 4-byte little-endian uint32 packet length followed by one complete binary frame packet. Do not open one WebTransport stream per JPEG frame; that caused poor device performance at normal frame rates.
The frontend decodes JPEGs with browser image APIs, queues frames, and paints frames whose timestamps are due relative to `audio.currentTime`. This means the browser decodes only audio and still images, not video.
## Why JPEG Frames

View File

@@ -3,7 +3,7 @@
A small web app that plays a remote video stream without using browser video decoding. The server uses `ffmpeg` to decode the input URL into:
- an MP3 audio stream served to a normal `<audio>` element
- timed JPEG image frames sent over a WebSocket, or optionally WebTransport/QUIC, and painted onto a `<canvas>`
- timed JPEG image frames sent over WebSocket, or optionally WebTransport/QUIC, and painted onto a `<canvas>`
This is meant for machines where image and audio decoding work but browser video decoding is unavailable or unreliable.
@@ -56,9 +56,9 @@ The app sets `FFMPEG_INPUT_SEEKABLE=0` by default so `ffmpeg` reads stream input
YouTube URLs are resolved server-side with `yt-dlp` before they enter the existing ffmpeg pipeline. Recents and favorites keep the original YouTube URL, while the short-lived playback session uses the resolved media URL and headers returned by `yt-dlp`. Tune the selected format with `YT_DLP_FORMAT` and the resolver timeout with `YT_DLP_TIMEOUT_MS`.
JPEG frames are dropped when the browser WebSocket falls behind instead of letting stale frames queue indefinitely. Tune the server-side backlog cap with `MAX_WS_BUFFER_BYTES`; the default is `2097152`.
JPEG frames are dropped when the browser frame transport falls behind instead of letting stale frames queue indefinitely. Tune the server-side backlog cap with `MAX_WS_BUFFER_BYTES`; the default is `2097152`.
WebSocket remains the default frame transport. To try WebTransport/QUIC for frame delivery, set `FRAME_TRANSPORT=webtransport` or `FRAME_TRANSPORT=auto` and expose the WebTransport UDP port. The default WebTransport port is `PORT + 1`; override it with `WEBTRANSPORT_PORT` and, when needed behind Docker or a proxy, `WEBTRANSPORT_PUBLIC_HOST` and `WEBTRANSPORT_PUBLIC_PORT`. The server generates a short-lived local ECDSA certificate under `data/` unless `WEBTRANSPORT_CERT_PATH` and `WEBTRANSPORT_KEY_PATH` are both set. Browser WebTransport requires a secure context: localhost is usually allowed, but plain HTTP over a LAN address may not expose the API. Browsers that cannot connect with WebTransport fall back to WebSocket.
WebSocket remains the default frame transport. To try WebTransport/QUIC for frame delivery, set `FRAME_TRANSPORT=webtransport` or `FRAME_TRANSPORT=auto` and expose the WebTransport UDP port. The WebSocket path sends one binary frame packet per message. The WebTransport path uses one persistent server-to-client unidirectional stream with repeated length-prefixed frame packets. The default WebTransport port is `PORT + 1`; override it with `WEBTRANSPORT_PORT` and, when needed behind Docker or a proxy, `WEBTRANSPORT_PUBLIC_HOST` and `WEBTRANSPORT_PUBLIC_PORT`. The server generates a short-lived local ECDSA certificate under `data/` unless `WEBTRANSPORT_CERT_PATH` and `WEBTRANSPORT_KEY_PATH` are both set. Browser WebTransport requires a secure context: localhost is usually allowed, but plain HTTP over a LAN address may not expose the API. Browsers that cannot connect with WebTransport fall back to WebSocket.
In single mode, audio output from `ffmpeg` is buffered before it is written to the browser so short HTTP backpressure pauses are less likely to stall frame generation. Tune the cap with `MAX_AUDIO_QUEUE_BYTES`; the default is `4194304`.

View File

@@ -73,6 +73,8 @@ const METADATA_REFRESH_INTERVAL_MS = 650;
const WT_STREAM_CONTROL_TO_CLIENT = 1;
const WT_STREAM_FRAME = 2;
const WT_STREAM_CONTROL_TO_SERVER = 3;
const WT_FRAME_RECORD_HEADER_BYTES = 4;
const MAX_FRAME_PACKET_BYTES = 16 * 1024 * 1024;
const FRAME_CONNECTION_OPEN = 1;
const FRAME_CONNECTION_CLOSING = 2;
const FRAME_CONNECTION_CLOSED = 3;
@@ -741,35 +743,64 @@ async function readWebTransportControlStream(reader, initialPayload, events) {
}
async function readWebTransportFrameStream(reader, initialPayload, events) {
const chunks = [];
let byteLength = 0;
let pending = new Uint8Array(0);
if (initialPayload.byteLength > 0) {
chunks.push(initialPayload);
byteLength += initialPayload.byteLength;
pending = appendWebTransportFrameChunk(pending, initialPayload);
pending = dispatchWebTransportFrameRecords(pending, events);
}
while (true) {
const { value, done } = await reader.read();
if (done) {
const packet = new Uint8Array(byteLength);
let offset = 0;
for (const chunk of chunks) {
packet.set(chunk, offset);
offset += chunk.byteLength;
if (pending.byteLength > 0) {
throw new Error('WebTransport frame stream ended with a partial frame record.');
}
events.dispatchEvent(new MessageEvent('message', { data: packet.buffer }));
return;
}
chunks.push(value);
byteLength += value.byteLength;
pending = appendWebTransportFrameChunk(pending, value);
pending = dispatchWebTransportFrameRecords(pending, events);
}
}
function appendWebTransportFrameChunk(pending, chunk) {
if (pending.byteLength === 0) {
return chunk;
}
const merged = new Uint8Array(pending.byteLength + chunk.byteLength);
merged.set(pending, 0);
merged.set(chunk, pending.byteLength);
return merged;
}
function dispatchWebTransportFrameRecords(buffer, events) {
let offset = 0;
while (buffer.byteLength - offset >= WT_FRAME_RECORD_HEADER_BYTES) {
const packetLength = new DataView(buffer.buffer, buffer.byteOffset + offset, WT_FRAME_RECORD_HEADER_BYTES).getUint32(0, true);
if (packetLength <= 8 || packetLength > MAX_FRAME_PACKET_BYTES) {
throw new Error(`Invalid WebTransport frame packet length: ${packetLength}`);
}
const recordBytes = WT_FRAME_RECORD_HEADER_BYTES + packetLength;
if (buffer.byteLength - offset < recordBytes) {
break;
}
const packet = buffer.slice(offset + WT_FRAME_RECORD_HEADER_BYTES, offset + recordBytes);
events.dispatchEvent(new MessageEvent('message', { data: packet.buffer }));
offset += recordBytes;
}
return offset === buffer.byteLength ? new Uint8Array(0) : buffer.slice(offset);
}
function dispatchWebTransportControlLines(text, events) {
const lines = text.split(/\n/);
const pending = lines.pop() ?? '';

View File

@@ -65,6 +65,7 @@ const JPEG_EOI = Buffer.from([0xff, 0xd9]);
const WT_STREAM_CONTROL_TO_CLIENT = 1;
const WT_STREAM_FRAME = 2;
const WT_STREAM_CONTROL_TO_SERVER = 3;
const WT_FRAME_RECORD_HEADER_BYTES = 4;
const BEST_EFFORT_RESUME_MAX_SECONDS = 30 * 24 * 60 * 60;
const RECORDED_MEDIA_EXTENSIONS = new Set([
'.avi',
@@ -2524,6 +2525,8 @@ class WebTransportFrameConnection extends EventEmitter {
this._closedEmitted = false;
this._controlStreamPromise = this.openControlStream();
this._controlWrite = Promise.resolve();
this._frameStreamPromise = null;
this._frameWrite = Promise.resolve();
this.session.closed().then((info) => {
this.markClosed(info?.closeCode ?? 1000, info?.reason ?? '');
@@ -2554,7 +2557,11 @@ class WebTransportFrameConnection extends EventEmitter {
}
const packet = Buffer.isBuffer(data) ? data : Buffer.from(data);
this.writeFrame(packet).then(() => {
this._frameWrite = this._frameWrite
.catch(() => {})
.then(() => this.writeFrame(packet));
this._frameWrite.then(() => {
done?.();
}).catch((error) => {
this.emitError(error);
@@ -2583,6 +2590,17 @@ class WebTransportFrameConnection extends EventEmitter {
return stream;
}
async openFrameStream() {
const stream = await this.session.openUni();
await stream.write(Buffer.from([WT_STREAM_FRAME]));
return stream;
}
getFrameStream() {
this._frameStreamPromise ??= this.openFrameStream();
return this._frameStreamPromise;
}
async writeControl(data) {
if (this.readyState !== WebSocket.OPEN) {
return;
@@ -2597,17 +2615,20 @@ class WebTransportFrameConnection extends EventEmitter {
throw new Error('WebTransport frame connection is closed.');
}
this._bufferedAmount += packet.length;
if (packet.length > 0xffffffff) {
throw new Error('WebTransport frame packet is too large.');
}
const recordHeader = Buffer.allocUnsafe(WT_FRAME_RECORD_HEADER_BYTES);
recordHeader.writeUInt32LE(packet.length, 0);
this._bufferedAmount += WT_FRAME_RECORD_HEADER_BYTES + packet.length;
try {
const stream = await this.session.openUni();
const payload = Buffer.allocUnsafe(packet.length + 1);
payload[0] = WT_STREAM_FRAME;
packet.copy(payload, 1);
await stream.write(payload);
await stream.finish();
const stream = await this.getFrameStream();
await stream.write(recordHeader);
await stream.write(packet);
} finally {
this._bufferedAmount = Math.max(0, this._bufferedAmount - packet.length);
this._bufferedAmount = Math.max(0, this._bufferedAmount - WT_FRAME_RECORD_HEADER_BYTES - packet.length);
}
}