Merge wavelet_video branch: Add TAV encoder with audio buffer management and NTSC frame duplication

- Implemented TAV (TSVM Advanced Video) encoder with DWT-based compression
- Added sophisticated audio buffer deficit tracking system ported from TEV
- Fixed NTSC frame duplication ghosting by emitting extra sync packets
- Resolved merge conflicts in GraphicsJSR223Delegate.kt

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
minjaesong
2025-09-16 23:05:20 +09:00
6 changed files with 4301 additions and 435 deletions

View File

@@ -0,0 +1,741 @@
// Created by Claude on 2025-09-13.
// TSVM Advanced Video (TAV) Format Decoder - DWT-based compression
// Adapted from the working playtev.js decoder
// Usage: playtav moviefile.tav [options]
// Options: -i (interactive), -debug-mv (show motion vector debug visualization)
// -deinterlace=algorithm (yadif or bwdif, default: yadif)
// -deblock (enable post-processing deblocking filter)
const WIDTH = 560
const HEIGHT = 448
const TILE_SIZE = 112 // 112x112 tiles for DWT (perfect fit for TSVM 560x448 resolution)
const TAV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x41, 0x56] // "\x1FTSVM TAV"
const TAV_VERSION = 1 // Initial DWT version
const SND_BASE_ADDR = audio.getBaseAddr()
const pcm = require("pcm")
const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728]
// Tile encoding modes (same as TEV block modes)
const TAV_MODE_SKIP = 0x00
const TAV_MODE_INTRA = 0x01
const TAV_MODE_INTER = 0x02
const TAV_MODE_MOTION = 0x03
// Packet types (same as TEV)
const TAV_PACKET_IFRAME = 0x10
const TAV_PACKET_PFRAME = 0x11
const TAV_PACKET_AUDIO_MP2 = 0x20
const TAV_PACKET_SUBTITLE = 0x30
const TAV_PACKET_SYNC = 0xFF
// Wavelet filter types
const WAVELET_5_3_REVERSIBLE = 0
const WAVELET_9_7_IRREVERSIBLE = 1
// Subtitle opcodes (SSF format - same as TEV)
const SSF_OP_NOP = 0x00
const SSF_OP_SHOW = 0x01
const SSF_OP_HIDE = 0x02
const SSF_OP_MOVE = 0x03
const SSF_OP_UPLOAD_LOW_FONT = 0x80
const SSF_OP_UPLOAD_HIGH_FONT = 0x81
// Subtitle state
let subtitleVisible = false
let subtitleText = ""
let subtitlePosition = 0 // 0=bottom center (default)
// Parse command line options
let interactive = false
let debugMotionVectors = false
let deinterlaceAlgorithm = "yadif"
let enableDeblocking = false // Default: disabled (use -deblock to enable)
if (exec_args.length > 2) {
for (let i = 2; i < exec_args.length; i++) {
const arg = exec_args[i].toLowerCase()
if (arg === "-i") {
interactive = true
} else if (arg === "-debug-mv") {
debugMotionVectors = true
} else if (arg === "-deblock") {
enableDeblocking = true
} else if (arg.startsWith("-deinterlace=")) {
deinterlaceAlgorithm = arg.substring(13)
}
}
}
const fullFilePath = _G.shell.resolvePathInput(exec_args[1])
const FILE_LENGTH = files.open(fullFilePath.full).size
let videoRateBin = []
let errorlevel = 0
let notifHideTimer = 0
const NOTIF_SHOWUPTIME = 3000000000
let [cy, cx] = con.getyx()
let seqreadserial = require("seqread")
let seqreadtape = require("seqreadtape")
let seqread = undefined
let fullFilePathStr = fullFilePath.full
// Select seqread driver to use
if (fullFilePathStr.startsWith('$:/TAPE') || fullFilePathStr.startsWith('$:\\\\TAPE')) {
seqread = seqreadtape
seqread.prepare(fullFilePathStr)
seqread.seek(0)
} else {
seqread = seqreadserial
seqread.prepare(fullFilePathStr)
}
con.clear()
con.curs_set(0)
graphics.setGraphicsMode(4) // 4096-colour mode
graphics.clearPixels(0)
graphics.clearPixels2(0)
// Initialize audio
audio.resetParams(0)
audio.purgeQueue(0)
audio.setPcmMode(0)
audio.setMasterVolume(0, 255)
// Subtitle display functions
function clearSubtitleArea() {
// Clear the subtitle area at the bottom of the screen
// Text mode is 80x32, so clear the bottom few lines
let oldFgColour = con.get_color_fore()
let oldBgColour = con.get_color_back()
con.color_pair(255, 255) // transparent to clear
// Clear bottom 4 lines for subtitles
for (let row = 29; row <= 32; row++) {
con.move(row, 1)
for (let col = 1; col <= 80; col++) {
print(" ")
}
}
con.color_pair(oldFgColour, oldBgColour)
}
function getVisualLength(line) {
// Calculate the visual length of a line excluding formatting tags
let visualLength = 0
let i = 0
while (i < line.length) {
if (i < line.length - 2 && line[i] === '<') {
// Check for formatting tags and skip them
if (line.substring(i, i + 3).toLowerCase() === '<b>' ||
line.substring(i, i + 3).toLowerCase() === '<i>') {
i += 3 // Skip tag
} else if (i < line.length - 3 &&
(line.substring(i, i + 4).toLowerCase() === '</b>' ||
line.substring(i, i + 4).toLowerCase() === '</i>')) {
i += 4 // Skip closing tag
} else {
// Not a formatting tag, count the character
visualLength++
i++
}
} else {
// Regular character, count it
visualLength++
i++
}
}
return visualLength
}
function displayFormattedLine(line) {
// Parse line and handle <b> and <i> tags with colour changes
// Default subtitle colour: yellow (231), formatted text: white (254)
let i = 0
let inBoldOrItalic = false
// insert initial padding block
con.color_pair(0, 255)
con.prnch(0xDE)
con.color_pair(231, 0)
while (i < line.length) {
if (i < line.length - 2 && line[i] === '<') {
// Check for opening tags
if (line.substring(i, i + 3).toLowerCase() === '<b>' ||
line.substring(i, i + 3).toLowerCase() === '<i>') {
con.color_pair(254, 0) // Switch to white for formatted text
inBoldOrItalic = true
i += 3
} else if (i < line.length - 3 &&
(line.substring(i, i + 4).toLowerCase() === '</b>' ||
line.substring(i, i + 4).toLowerCase() === '</i>')) {
con.color_pair(231, 0) // Switch back to yellow for normal text
inBoldOrItalic = false
i += 4
} else {
// Not a formatting tag, print the character
print(line[i])
i++
}
} else {
// Regular character, print it
print(line[i])
i++
}
}
// insert final padding block
con.color_pair(0, 255)
con.prnch(0xDD)
con.color_pair(231, 0)
}
function displaySubtitle(text, position = 0) {
if (!text || text.length === 0) {
clearSubtitleArea()
return
}
// Set subtitle colours: yellow (231) on black (0)
let oldFgColour = con.get_color_fore()
let oldBgColour = con.get_color_back()
con.color_pair(231, 0)
// Split text into lines
let lines = text.split('\n')
// Calculate position based on subtitle position setting
let startRow, startCol
// Calculate visual length without formatting tags for positioning
let longestLineLength = lines.map(s => getVisualLength(s)).sort().last()
switch (position) {
case 2: // center left
case 6: // center right
case 8: // dead center
startRow = 16 - Math.floor(lines.length / 2)
break
case 3: // top left
case 4: // top center
case 5: // top right
startRow = 2
break
case 0: // bottom center
case 1: // bottom left
case 7: // bottom right
default:
startRow = 32 - lines.length
startRow = 32 - lines.length
startRow = 32 - lines.length // Default to bottom center
}
// Display each line
for (let i = 0; i < lines.length; i++) {
let line = lines[i].trim()
if (line.length === 0) continue
let row = startRow + i
if (row < 1) row = 1
if (row > 32) row = 32
// Calculate column based on alignment
switch (position) {
case 1: // bottom left
case 2: // center left
case 3: // top left
startCol = 1
break
case 5: // top right
case 6: // center right
case 7: // bottom right
startCol = Math.max(1, 78 - getVisualLength(line) - 2)
break
case 0: // bottom center
case 4: // top center
case 8: // dead center
default:
startCol = Math.max(1, Math.floor((80 - longestLineLength - 2) / 2) + 1)
break
}
con.move(row, startCol)
// Parse and display line with formatting tag support
displayFormattedLine(line)
}
con.color_pair(oldFgColour, oldBgColour)
}
function processSubtitlePacket(packetSize) {
// Read subtitle packet data according to SSF format
// uint24 index + uint8 opcode + variable arguments
let index = 0
// Read 24-bit index (little-endian)
let indexByte0 = seqread.readOneByte()
let indexByte1 = seqread.readOneByte()
let indexByte2 = seqread.readOneByte()
index = indexByte0 | (indexByte1 << 8) | (indexByte2 << 16)
let opcode = seqread.readOneByte()
let remainingBytes = packetSize - 4 // Subtract 3 bytes for index + 1 byte for opcode
switch (opcode) {
case SSF_OP_SHOW: {
// Read UTF-8 text until null terminator
if (remainingBytes > 1) {
let textBytes = seqread.readBytes(remainingBytes)
let textStr = ""
// Convert bytes to string, stopping at null terminator
for (let i = 0; i < remainingBytes - 1; i++) { // -1 for null terminator
let byte = sys.peek(textBytes + i)
if (byte === 0) break
textStr += String.fromCharCode(byte)
}
sys.free(textBytes)
subtitleText = textStr
subtitleVisible = true
displaySubtitle(subtitleText, subtitlePosition)
}
break
}
case SSF_OP_HIDE: {
subtitleVisible = false
subtitleText = ""
clearSubtitleArea()
break
}
case SSF_OP_MOVE: {
if (remainingBytes >= 2) { // Need at least 1 byte for position + 1 null terminator
let newPosition = seqread.readOneByte()
seqread.readOneByte() // Read null terminator
if (newPosition >= 0 && newPosition <= 7) {
subtitlePosition = newPosition
// Re-display current subtitle at new position if visible
if (subtitleVisible && subtitleText.length > 0) {
clearSubtitleArea()
displaySubtitle(subtitleText, subtitlePosition)
}
}
}
break
}
case SSF_OP_UPLOAD_LOW_FONT:
case SSF_OP_UPLOAD_HIGH_FONT: {
// Font upload - read payload length and font data
if (remainingBytes >= 3) { // uint16 length + at least 1 byte data
let payloadLen = seqread.readShort()
if (remainingBytes >= payloadLen + 2) {
let fontData = seqread.readBytes(payloadLen)
// upload font data
for (let i = 0; i < Math.min(payloadLen, 1920); i++) sys.poke(-1300607 - i, sys.peek(fontData + i))
sys.poke(-1299460, (opcode == SSF_OP_UPLOAD_LOW_FONT) ? 18 : 19)
sys.free(fontData)
}
}
break
}
case SSF_OP_NOP:
default: {
// Skip remaining bytes
if (remainingBytes > 0) {
let skipBytes = seqread.readBytes(remainingBytes)
sys.free(skipBytes)
}
if (interactive && opcode !== SSF_OP_NOP) {
serial.println(`[SUBTITLE UNKNOWN] Index: ${index}, Opcode: 0x${opcode.toString(16).padStart(2, '0')}`)
}
break
}
}
}
// TAV header structure (32 bytes vs TEV's 24 bytes)
let header = {
magic: new Array(8),
version: 0,
width: 0,
height: 0,
fps: 0,
totalFrames: 0,
waveletFilter: 0, // TAV-specific: wavelet filter type
decompLevels: 0, // TAV-specific: decomposition levels
qualityY: 0, // TAV-specific: Y channel quality
qualityCo: 0, // TAV-specific: Co channel quality
qualityCg: 0, // TAV-specific: Cg channel quality
extraFlags: 0,
videoFlags: 0,
reserved: new Array(7)
}
// Read and validate header
for (let i = 0; i < 8; i++) {
header.magic[i] = seqread.readOneByte()
}
// Validate magic number
let magicValid = true
for (let i = 0; i < 8; i++) {
if (header.magic[i] !== TAV_MAGIC[i]) {
magicValid = false
break
}
}
if (!magicValid) {
con.puts("Error: Invalid TAV file format")
errorlevel = 1
return
}
header.version = seqread.readOneByte()
header.width = seqread.readShort()
header.height = seqread.readShort()
header.fps = seqread.readOneByte()
header.totalFrames = seqread.readInt()
header.waveletFilter = seqread.readOneByte()
header.decompLevels = seqread.readOneByte()
header.qualityY = seqread.readOneByte()
header.qualityCo = seqread.readOneByte()
header.qualityCg = seqread.readOneByte()
header.extraFlags = seqread.readOneByte()
header.videoFlags = seqread.readOneByte()
// Skip reserved bytes
for (let i = 0; i < 7; i++) {
seqread.readOneByte()
}
if (header.version < 1 || header.version > 2) {
con.puts(`Error: Unsupported TAV version ${header.version}`)
errorlevel = 1
return
}
const hasAudio = (header.extraFlags & 0x01) !== 0
const hasSubtitles = (header.extraFlags & 0x02) !== 0
const progressiveTransmission = (header.extraFlags & 0x04) !== 0
const roiCoding = (header.extraFlags & 0x08) !== 0
const isInterlaced = (header.videoFlags & 0x01) !== 0
const isNTSC = (header.videoFlags & 0x02) !== 0
const isLossless = (header.videoFlags & 0x04) !== 0
// Calculate tile dimensions (112x112 vs TEV's 16x16 blocks)
const tilesX = Math.ceil(header.width / TILE_SIZE)
const tilesY = Math.ceil(header.height / TILE_SIZE)
const numTiles = tilesX * tilesY
console.log(`TAV Decoder`)
console.log(`Resolution: ${header.width}x${header.height}`)
console.log(`FPS: ${header.fps}`)
console.log(`Total frames: ${header.totalFrames}`)
console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ? "5/3 reversible" : "9/7 irreversible"}`)
console.log(`Decomposition levels: ${header.decompLevels}`)
console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`)
console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`)
console.log(`Colour space: ${header.version === 2 ? "ICtCp" : "YCoCg-R"}`)
console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`)
// Frame buffer addresses - same as TEV
const FRAME_PIXELS = header.width * header.height
const FRAME_SIZE = FRAME_PIXELS * 3 // RGB buffer size
const RGB_BUFFER_A = sys.malloc(FRAME_SIZE)
const RGB_BUFFER_B = sys.malloc(FRAME_SIZE)
// Ping-pong buffer pointers (swap instead of copy)
let CURRENT_RGB_ADDR = RGB_BUFFER_A
let PREV_RGB_ADDR = RGB_BUFFER_B
// Motion vector storage
let motionVectors = new Array(numTiles)
for (let i = 0; i < numTiles; i++) {
motionVectors[i] = { mvX: 0, mvY: 0, rcf: 1.0 }
}
// Audio state
let audioBufferBytesLastFrame = 0
let frame_cnt = 0
let frametime = 1000000000.0 / header.fps
let nextFrameTime = 0
let mp2Initialised = false
let audioFired = false
// Performance tracking variables (from TEV)
let decompressTime = 0
let decodeTime = 0
let uploadTime = 0
let biasTime = 0
const BIAS_LIGHTING_MIN = 1.0 / 16.0
let oldBgcol = [BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN]
let notifHidden = false
function getRGBfromScr(x, y) {
let offset = y * WIDTH + x
let rg = sys.peek(-1048577 - offset)
let ba = sys.peek(-1310721 - offset)
return [(rg >>> 4) / 15.0, (rg & 15) / 15.0, (ba >>> 4) / 15.0]
}
function setBiasLighting() {
let samples = []
let nativeWidth = graphics.getPixelDimension()[0]
let nativeHeight = graphics.getPixelDimension()[1]
let width = header.width; let height = header.height
let offsetX = Math.floor((nativeWidth - width) / 2)
let offsetY = Math.floor((nativeHeight - height) / 2)
let sampleStepX = Math.max(8, Math.floor(width / 18))
let sampleStepY = Math.max(8, Math.floor(height / 17))
let borderMargin = Math.min(8, Math.floor(width / 70))
for (let x = borderMargin; x < width - borderMargin; x += sampleStepX) {
samples.push(getRGBfromScr(x + offsetX, borderMargin + offsetY))
samples.push(getRGBfromScr(x + offsetX, height - borderMargin - 1 + offsetY))
}
for (let y = borderMargin; y < height - borderMargin; y += sampleStepY) {
samples.push(getRGBfromScr(borderMargin + offsetX, y + offsetY))
samples.push(getRGBfromScr(width - borderMargin - 1 + offsetX, y + offsetY))
}
let out = [0.0, 0.0, 0.0]
samples.forEach(rgb=>{
out[0] += rgb[0]
out[1] += rgb[1]
out[2] += rgb[2]
})
out[0] = BIAS_LIGHTING_MIN + (out[0] / samples.length / 2.0)
out[1] = BIAS_LIGHTING_MIN + (out[1] / samples.length / 2.0)
out[2] = BIAS_LIGHTING_MIN + (out[2] / samples.length / 2.0)
let bgr = (oldBgcol[0]*5 + out[0]) / 6.0
let bgg = (oldBgcol[1]*5 + out[1]) / 6.0
let bgb = (oldBgcol[2]*5 + out[2]) / 6.0
oldBgcol = [bgr, bgg, bgb]
graphics.setBackground(Math.round(bgr * 255), Math.round(bgg * 255), Math.round(bgb * 255))
}
function updateDataRateBin(rate) {
videoRateBin.push(rate)
if (videoRateBin.length > header.fps) {
videoRateBin.shift()
}
}
let FRAME_TIME = 1.0 / header.fps
let frameCount = 0
let trueFrameCount = 0
let stopPlay = false
let akku = FRAME_TIME
let akku2 = 0.0
let blockDataPtr = sys.malloc(2377764)
// Playback loop - properly adapted from TEV
try {
let t1 = sys.nanoTime()
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && (header.totalFrames == 0 || header.totalFrames > 0 && frameCount < header.totalFrames)) {
// Handle interactive controls
if (interactive) {
sys.poke(-40, 1)
if (sys.peek(-41) == 67) { // Backspace
stopPlay = true
break
}
}
if (akku >= FRAME_TIME) {
// Read packet header
const packetType = seqread.readOneByte()
if (packetType === TAV_PACKET_SYNC) {
// Sync packet - no additional data
akku -= FRAME_TIME
frameCount++
trueFrameCount++
// Swap ping-pong buffers instead of expensive memcpy (752KB copy eliminated!)
let temp = CURRENT_RGB_ADDR
CURRENT_RGB_ADDR = PREV_RGB_ADDR
PREV_RGB_ADDR = temp
} else if (packetType === TAV_PACKET_IFRAME || packetType === TAV_PACKET_PFRAME) {
// Video packet
const compressedSize = seqread.readInt()
const isKeyframe = (packetType === TAV_PACKET_IFRAME)
// Read compressed tile data
let compressedPtr = seqread.readBytes(compressedSize)
updateDataRateBin(compressedSize)
let actualSize
let decompressStart = sys.nanoTime()
try {
// Use gzip decompression (only compression format supported in TSVM JS)
actualSize = gzip.decompFromTo(compressedPtr, compressedSize, blockDataPtr)
decompressTime = (sys.nanoTime() - decompressStart) / 1000000.0
} catch (e) {
decompressTime = (sys.nanoTime() - decompressStart) / 1000000.0
console.log(`Frame ${frameCount}: Gzip decompression failed, skipping (compressed size: ${compressedSize}, error: ${e})`)
sys.free(compressedPtr)
continue
}
try {
// serial.println(actualSize)
let decodeStart = sys.nanoTime()
// Call TAV hardware decoder (like TEV's tevDecode but with RGB buffer outputs)
graphics.tavDecode(
blockDataPtr,
CURRENT_RGB_ADDR, PREV_RGB_ADDR, // RGB buffer pointers (not float arrays!)
header.width, header.height,
header.qualityY, header.qualityCo, header.qualityCg,
frameCount,
debugMotionVectors,
header.waveletFilter, // TAV-specific parameter
header.decompLevels, // TAV-specific parameter
enableDeblocking,
isLossless,
header.version // TAV version for colour space detection
)
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0
// Upload RGB buffer to display framebuffer (like TEV)
let uploadStart = sys.nanoTime()
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, frameCount, true)
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
// Defer audio playback until a first frame is sent
if (isInterlaced) {
// fire audio after frame 1
if (!audioFired && frameCount > 0) {
audio.play(0)
audioFired = true
}
}
else {
// fire audio after frame 0
if (!audioFired) {
audio.play(0)
audioFired = true
}
}
} catch (e) {
console.log(`Frame ${frameCount}: decode failed: ${e}`)
} finally {
sys.free(compressedPtr)
}
let biasStart = sys.nanoTime()
setBiasLighting()
biasTime = (sys.nanoTime() - biasStart) / 1000000.0
// Log performance data every 60 frames
if (frameCount % 60 == 0 || frameCount == 0) {
let totalTime = decompressTime + decodeTime + uploadTime + biasTime
console.log(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`)
}
} else if (packetType === TAV_PACKET_AUDIO_MP2) {
// MP2 Audio packet
let audioLen = seqread.readInt()
if (!mp2Initialised) {
mp2Initialised = true
audio.mp2Init()
}
seqread.readBytes(audioLen, SND_BASE_ADDR - 2368)
audio.mp2Decode()
audio.mp2UploadDecoded(0)
} else if (packetType === TAV_PACKET_SUBTITLE) {
// Subtitle packet - same format as TEV
let packetSize = seqread.readInt()
processSubtitlePacket(packetSize)
} else if (packetType == 0x00) {
// Silently discard, faulty subtitle creation can cause this as 0x00 is used as an argument terminator
} else {
println(`Unknown packet type: 0x${packetType.toString(16)}`)
break
}
}
let t2 = sys.nanoTime()
akku += (t2 - t1) / 1000000000.0
akku2 += (t2 - t1) / 1000000000.0
// Simple progress display
if (interactive) {
notifHideTimer += (t2 - t1)
if (!notifHidden && notifHideTimer > (NOTIF_SHOWUPTIME + FRAME_TIME)) {
con.move(1, 1)
print(' '.repeat(79))
notifHidden = true
}
if (notifHidden) {
con.move(31, 1)
con.color_pair(253, 0)
//print(`Frame: ${frameCount}/${header.totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f) `)
}
}
t1 = t2
}
}
catch (e) {
printerrln(`TAV decode error: ${e}`)
errorlevel = 1
}
finally {
// Cleanup
sys.free(blockDataPtr)
sys.free(RGB_BUFFER_A)
sys.free(RGB_BUFFER_B)
con.curs_set(1)
con.clear()
if (errorlevel === 0) {
console.log(`Playback completed: ${frameCount} frames`)
} else {
console.log(`Playback failed with error ${errorlevel}`)
}
}
graphics.setPalette(0, 0, 0, 0, 0)
con.move(cy, cx) // restore cursor
return errorlevel

View File

@@ -709,6 +709,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
uint8 Video Flags
- bit 0 = is interlaced (should be default for most non-archival TEV videos)
- bit 1 = is NTSC framerate (repeat every 1000th frame)
- bit 2 = is lossless mode
uint8 Reserved, fill with zero
## Packet Types
@@ -792,6 +793,168 @@ The format is designed to be compatible with SubRip and SAMI (without markups).
text argument may be terminated by 0x00 BEFORE the entire arguments being terminated by 0x00,
leaving extra 0x00 on the byte stream. A decoder must be able to handle the extra zeros.
## NTSC Framerate handling
The encoder encodes the frames as-is. The decoder must duplicate every 1000th frame to keep the decoding
in-sync.
--------------------------------------------------------------------------------
TSVM Advanced Video (TAV) Format
Created by Claude on 2025-09-13
TAV is a next-generation video codec for TSVM utilizing Discrete Wavelet Transform (DWT)
similar to JPEG2000, providing superior compression efficiency and scalability compared
to DCT-based codecs like TEV. Features include multi-resolution encoding, progressive
transmission capability, and region-of-interest coding.
## Version History
- Version 1.0: Initial DWT-based implementation with 5/3 reversible filter
- Version 1.1: Added 9/7 irreversible filter for higher compression
- Version 1.2: Multi-resolution pyramid encoding with up to 4 decomposition levels
- Version 1.3: Optimized 112x112 tiles for TSVM resolution with up to 6 decomposition levels
# File Structure
\x1F T S V M T A V
[HEADER]
[PACKET 0]
[PACKET 1]
[PACKET 2]
...
## Header (32 bytes)
uint8 Magic[8]: "\x1FTSVM TAV"
uint8 Version: 1
uint16 Width: video width in pixels
uint16 Height: video height in pixels
uint8 FPS: frames per second
uint32 Total Frames: number of video frames
uint8 Wavelet Filter Type: 0=5/3 reversible, 1=9/7 irreversible
uint8 Decomposition Levels: number of DWT levels (1-4)
uint8 Quantiser Index for Y channel (1: lossless, 255: potato)
uint8 Quantiser Index for Co channel (1: lossless, 255: potato)
uint8 Quantiser Index for Cg channel (1: lossless, 255: potato)
uint8 Extra Feature Flags
- bit 0 = has audio
- bit 1 = has subtitle
uint8 Video Flags
- bit 0 = is interlaced (unused)
- bit 1 = is NTSC framerate
- bit 2 = is lossless mode
uint8 Reserved[7]: fill with zeros
## Packet Types
0x10: I-frame (intra-coded frame)
0x11: P-frame (predicted frame with motion compensation)
0x20: MP2 audio packet
0x30: Subtitle in "Simple" format
0xFF: sync packet
## Video Packet Structure
uint8 Packet Type
uint32 Compressed Size
* Zstd-compressed Block Data
## Block Data (per 112x112 tile)
uint8 Mode: encoding mode
0x00 = SKIP (copy from previous frame)
0x01 = INTRA (DWT-coded, no prediction)
0x02 = INTER (DWT-coded with motion compensation)
0x03 = MOTION (motion vector only, no residual)
int16 Motion Vector X (1/4 pixel precision)
int16 Motion Vector Y (1/4 pixel precision)
float32 Rate Control Factor (4 bytes, little-endian)
## DWT Coefficient Structure (per tile)
For each decomposition level L (from highest to lowest):
uint16 LL_size: size of LL subband coefficients
uint16 LH_size: size of LH subband coefficients
uint16 HL_size: size of HL subband coefficients
uint16 HH_size: size of HH subband coefficients
int16[] LL_coeffs: quantized LL subband (low-low frequencies)
int16[] LH_coeffs: quantized LH subband (low-high frequencies)
int16[] HL_coeffs: quantized HL subband (high-low frequencies)
int16[] HH_coeffs: quantized HH subband (high-high frequencies)
## DWT Implementation Details
### Wavelet Filters
- 5/3 Reversible Filter (lossless capable):
* Analysis: Low-pass [1/2, 1, 1/2], High-pass [-1/8, -1/4, 3/4, -1/4, -1/8]
* Synthesis: Low-pass [1/4, 1/2, 1/4], High-pass [-1/16, -1/8, 3/8, -1/8, -1/16]
- 9/7 Irreversible Filter (higher compression):
* Analysis: Daubechies 9/7 coefficients optimized for image compression
* Provides better energy compaction than 5/3 but lossy reconstruction
### Decomposition Levels
- Level 1: 112x112 → 56x56 (LL) + 3×56x56 subbands (LH,HL,HH)
- Level 2: 56x56 → 28x28 (LL) + 3×28x28 subbands
- Level 3: 28x28 → 14x14 (LL) + 3×14x14 subbands
- Level 4: 14x14 → 7x7 (LL) + 3×7x7 subbands
- Level 5: 7x7 → 3x3 (LL) + 3×3x3 subbands
- Level 6: 3x3 → 1x1 (LL) + 3×1x1 subbands (maximum)
### Quantization Strategy
TAV uses different quantization steps for each subband based on human visual
system sensitivity:
- LL subbands: Fine quantization (preserve DC and low frequencies)
- LH/HL subbands: Medium quantization (diagonal details less critical)
- HH subbands: Coarse quantization (high frequency noise can be discarded)
### Progressive Transmission
When enabled, coefficients are transmitted in order of visual importance:
1. LL subband of highest decomposition level (thumbnail)
2. Lower frequency subbands first
3. Higher frequency subbands for refinement
## Motion Compensation
- Search range: ±28 pixels (optimized for 112x112 tiles)
- Sub-pixel precision: 1/4 pixel with bilinear interpolation
- Tile size: 112x112 pixels (perfect fit for TSVM 560x448 resolution)
* Exactly 5×4 = 20 tiles per frame (560÷112 = 5, 448÷112 = 4)
* No partial tiles needed - optimal for processing efficiency
- Uses Sum of Absolute Differences (SAD) for motion estimation
- Overlapped block motion compensation (OBMC) for smooth boundaries
## Colour Space
TAV operates in YCoCg-R colour space with full resolution channels:
- Y: Luma channel (full resolution, fine quantization)
- Co: Orange-Cyan chroma (full resolution, aggressive quantization by default)
- Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default)
## Compression Features
- 112x112 DWT tiles vs 16x16 DCT blocks in TEV
- Multi-resolution representation enables scalable decoding
- Better frequency localization than DCT
- Reduced blocking artifacts due to overlapping basis functions
- Region-of-Interest (ROI) coding for selective quality enhancement
- Progressive transmission for bandwidth adaptation
## Performance Comparison
Expected improvements over TEV:
- 20-30% better compression efficiency
- Reduced blocking artifacts
- Scalable quality/resolution decoding
- Better performance on natural images vs artificial content
- Full resolution chroma preserves color detail while aggressive quantization maintains compression
## Hardware Acceleration Functions
TAV decoder requires new GraphicsJSR223Delegate functions:
- tavDecode(): Main DWT decoding function
- tavDWT2D(): 2D DWT/IDWT transforms
- tavQuantize(): Multi-band quantization
- tavMotionCompensate(): 64x64 tile motion compensation
## Audio Support
Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
## Subtitle Support
Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality.
## NTSC Framerate handling
Unlike the TEV format, TAV emits extra sync packet for every 1000th frames. Decoder can just play the video
without any special treatment.
--------------------------------------------------------------------------------
Sound Adapter

File diff suppressed because it is too large Load Diff

View File

@@ -438,13 +438,89 @@ class VM(
(memspace as PeriBase).poke(offset, value)
}
fun peek(addr:Long): Byte? {
fun pokeShort(addr: Long, value: Short) {
val value0 = value.toByte()
val value1 = value.toInt().shr(8).toByte()
val (memspace, offset) = translateAddr(addr)
if (memspace == null)
throw ErrorIllegalAccess(this, addr)
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
memspace.set(offset+0, value0)
memspace.set(offset+1, value1)
}
}
else {
(memspace as PeriBase).poke(offset+0, value0)
(memspace as PeriBase).poke(offset+1, value1)
}
}
fun pokeFloat(addr: Long, value: Float) {
val vi = value.toRawBits()
val value0 = vi.toByte()
val value1 = vi.shr(8).toByte()
val value2 = vi.shr(16).toByte()
val value3 = vi.shr(24).toByte()
val (memspace, offset) = translateAddr(addr)
if (memspace == null)
throw ErrorIllegalAccess(this, addr)
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
memspace.set(offset+0, value0)
memspace.set(offset+1, value1)
memspace.set(offset+2, value2)
memspace.set(offset+3, value3)
}
}
else {
(memspace as PeriBase).poke(offset+0, value0)
(memspace as PeriBase).poke(offset+1, value1)
(memspace as PeriBase).poke(offset+2, value2)
(memspace as PeriBase).poke(offset+3, value3)
}
}
fun pokeInt(addr: Long, value: Int) {
val value0 = value.toByte()
val value1 = value.shr(8).toByte()
val value2 = value.shr(16).toByte()
val value3 = value.shr(24).toByte()
val (memspace, offset) = translateAddr(addr)
if (memspace == null)
throw ErrorIllegalAccess(this, addr)
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
memspace.set(offset+0, value0)
memspace.set(offset+1, value1)
memspace.set(offset+2, value2)
memspace.set(offset+3, value3)
}
}
else {
(memspace as PeriBase).poke(offset+0, value0)
(memspace as PeriBase).poke(offset+1, value1)
(memspace as PeriBase).poke(offset+2, value2)
(memspace as PeriBase).poke(offset+3, value3)
}
}
fun peek(addr:Long): Byte {
val (memspace, offset) = translateAddr(addr)
// println("peek $addr -> ${offset}@${memspace?.javaClass?.canonicalName}")
return if (memspace == null)
null
throw NullPointerException()//null
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
@@ -452,7 +528,76 @@ class VM(
memspace.get(offset)
}
else
(memspace as PeriBase).peek(offset)
(memspace as PeriBase).peek(offset)!!
}
fun peekShort(addr: Long): Short {
val (memspace, offset) = translateAddr(addr)
return if (memspace == null)
throw NullPointerException()//null
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
(memspace.get(offset+0).toUint() or
memspace.get(offset+1).toUint().shl(8)).toShort()
}
}
else {
((memspace as PeriBase).peek(offset+0)!!.toUint() or
(memspace as PeriBase).peek(offset+1)!!.toUint().shl(8)).toShort()
}
}
fun peekFloat(addr: Long): Float {
val (memspace, offset) = translateAddr(addr)
return if (memspace == null)
throw NullPointerException()//null
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
Float.fromBits(memspace.get(offset+0).toUint() or
memspace.get(offset+1).toUint().shl(8) or
memspace.get(offset+2).toUint().shl(16) or
memspace.get(offset+3).toUint().shl(24)
)
}
}
else {
Float.fromBits((memspace as PeriBase).peek(offset+0)!!.toUint() or
(memspace as PeriBase).peek(offset+1)!!.toUint().shl(8) or
(memspace as PeriBase).peek(offset+2)!!.toUint().shl(16) or
(memspace as PeriBase).peek(offset+3)!!.toUint().shl(24)
)
}
}
fun peekInt(addr: Long): Int? {
val (memspace, offset) = translateAddr(addr)
return if (memspace == null)
throw NullPointerException()//null
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
(memspace.get(offset+0).toUint() or
memspace.get(offset+1).toUint().shl(8) or
memspace.get(offset+2).toUint().shl(16) or
memspace.get(offset+3).toUint().shl(24)
)
}
}
else {
((memspace as PeriBase).peek(offset+0)!!.toUint() or
(memspace as PeriBase).peek(offset+1)!!.toUint().shl(8) or
(memspace as PeriBase).peek(offset+2)!!.toUint().shl(16) or
(memspace as PeriBase).peek(offset+3)!!.toUint().shl(24)
)
}
}
private fun findEmptySpace(blockSize: Int): Int? {

View File

@@ -6,16 +6,19 @@ CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE
LIBS = -lm -lzstd
# Source files and targets
SOURCES = encoder_tev.c
TARGETS = encoder_tev
TARGETS = encoder_tev encoder_tav
# Build all encoders
all: $(TARGETS)
# Build main encoder
encoder_tev: encoder_tev.c
tev: encoder_tev.c
rm -f encoder_tev
$(CC) $(CFLAGS) -o $@ $< $(LIBS)
$(CC) $(CFLAGS) -o encoder_tev $< $(LIBS)
tav: encoder_tav.c
rm -f encoder_tav
$(CC) $(CFLAGS) -o encoder_tav $< $(LIBS)
# Default target
$(TARGETS): all
@@ -45,8 +48,8 @@ help:
@echo ""
@echo "Targets:"
@echo " all - Build both encoders (default)"
@echo " encoder_tev - Build the main TEV encoder"
@echo " encoder_tev_xyb - Build the XYB color space encoder"
@echo " tev - Build the main TEV encoder"
@echo " tav - Build the advanced TAV encoder"
@echo " debug - Build with debug symbols"
@echo " clean - Remove build artifacts"
@echo " install - Install to /usr/local/bin"
@@ -54,8 +57,9 @@ help:
@echo " help - Show this help"
@echo ""
@echo "Usage:"
@echo " make # Build both encoders"
@echo " ./encoder_tev input.mp4 -o output.tev"
@echo " ./encoder_tev_xyb input.mp4 -o output.tev"
@echo " make # Build both encoders"
@echo " make tev # Build TEV encoder"
@echo " make tav # Build TAV encoder"
@echo " sudo make install # Install both encoders"
.PHONY: all clean install check-deps help debug

2214
video_encoder/encoder_tav.c Normal file

File diff suppressed because it is too large Load Diff