diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js new file mode 100644 index 0000000..174e6e7 --- /dev/null +++ b/assets/disk0/tvdos/bin/playtav.js @@ -0,0 +1,741 @@ +// Created by Claude on 2025-09-13. +// TSVM Advanced Video (TAV) Format Decoder - DWT-based compression +// Adapted from the working playtev.js decoder +// Usage: playtav moviefile.tav [options] +// Options: -i (interactive), -debug-mv (show motion vector debug visualization) +// -deinterlace=algorithm (yadif or bwdif, default: yadif) +// -deblock (enable post-processing deblocking filter) + +const WIDTH = 560 +const HEIGHT = 448 +const TILE_SIZE = 112 // 112x112 tiles for DWT (perfect fit for TSVM 560x448 resolution) +const TAV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x41, 0x56] // "\x1FTSVM TAV" +const TAV_VERSION = 1 // Initial DWT version +const SND_BASE_ADDR = audio.getBaseAddr() +const pcm = require("pcm") +const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728] + +// Tile encoding modes (same as TEV block modes) +const TAV_MODE_SKIP = 0x00 +const TAV_MODE_INTRA = 0x01 +const TAV_MODE_INTER = 0x02 +const TAV_MODE_MOTION = 0x03 + +// Packet types (same as TEV) +const TAV_PACKET_IFRAME = 0x10 +const TAV_PACKET_PFRAME = 0x11 +const TAV_PACKET_AUDIO_MP2 = 0x20 +const TAV_PACKET_SUBTITLE = 0x30 +const TAV_PACKET_SYNC = 0xFF + +// Wavelet filter types +const WAVELET_5_3_REVERSIBLE = 0 +const WAVELET_9_7_IRREVERSIBLE = 1 + +// Subtitle opcodes (SSF format - same as TEV) +const SSF_OP_NOP = 0x00 +const SSF_OP_SHOW = 0x01 +const SSF_OP_HIDE = 0x02 +const SSF_OP_MOVE = 0x03 +const SSF_OP_UPLOAD_LOW_FONT = 0x80 +const SSF_OP_UPLOAD_HIGH_FONT = 0x81 + +// Subtitle state +let subtitleVisible = false +let subtitleText = "" +let subtitlePosition = 0 // 0=bottom center (default) + +// Parse command line options +let interactive = false +let debugMotionVectors = false +let deinterlaceAlgorithm = "yadif" +let enableDeblocking = false // Default: disabled (use -deblock to enable) + +if (exec_args.length > 2) { + for (let i = 2; i < exec_args.length; i++) { + const arg = exec_args[i].toLowerCase() + if (arg === "-i") { + interactive = true + } else if (arg === "-debug-mv") { + debugMotionVectors = true + } else if (arg === "-deblock") { + enableDeblocking = true + } else if (arg.startsWith("-deinterlace=")) { + deinterlaceAlgorithm = arg.substring(13) + } + } +} + +const fullFilePath = _G.shell.resolvePathInput(exec_args[1]) +const FILE_LENGTH = files.open(fullFilePath.full).size + +let videoRateBin = [] +let errorlevel = 0 +let notifHideTimer = 0 +const NOTIF_SHOWUPTIME = 3000000000 +let [cy, cx] = con.getyx() + +let seqreadserial = require("seqread") +let seqreadtape = require("seqreadtape") +let seqread = undefined +let fullFilePathStr = fullFilePath.full + +// Select seqread driver to use +if (fullFilePathStr.startsWith('$:/TAPE') || fullFilePathStr.startsWith('$:\\\\TAPE')) { + seqread = seqreadtape + seqread.prepare(fullFilePathStr) + seqread.seek(0) +} else { + seqread = seqreadserial + seqread.prepare(fullFilePathStr) +} + +con.clear() +con.curs_set(0) +graphics.setGraphicsMode(4) // 4096-colour mode +graphics.clearPixels(0) +graphics.clearPixels2(0) + +// Initialize audio +audio.resetParams(0) +audio.purgeQueue(0) +audio.setPcmMode(0) +audio.setMasterVolume(0, 255) + +// Subtitle display functions +function clearSubtitleArea() { + // Clear the subtitle area at the bottom of the screen + // Text mode is 80x32, so clear the bottom few lines + let oldFgColour = con.get_color_fore() + let oldBgColour = con.get_color_back() + + con.color_pair(255, 255) // transparent to clear + + // Clear bottom 4 lines for subtitles + for (let row = 29; row <= 32; row++) { + con.move(row, 1) + for (let col = 1; col <= 80; col++) { + print(" ") + } + } + + con.color_pair(oldFgColour, oldBgColour) +} + +function getVisualLength(line) { + // Calculate the visual length of a line excluding formatting tags + let visualLength = 0 + let i = 0 + + while (i < line.length) { + if (i < line.length - 2 && line[i] === '<') { + // Check for formatting tags and skip them + if (line.substring(i, i + 3).toLowerCase() === '' || + line.substring(i, i + 3).toLowerCase() === '') { + i += 3 // Skip tag + } else if (i < line.length - 3 && + (line.substring(i, i + 4).toLowerCase() === '' || + line.substring(i, i + 4).toLowerCase() === '')) { + i += 4 // Skip closing tag + } else { + // Not a formatting tag, count the character + visualLength++ + i++ + } + } else { + // Regular character, count it + visualLength++ + i++ + } + } + + return visualLength +} + +function displayFormattedLine(line) { + // Parse line and handle and tags with colour changes + // Default subtitle colour: yellow (231), formatted text: white (254) + + let i = 0 + let inBoldOrItalic = false + + // insert initial padding block + con.color_pair(0, 255) + con.prnch(0xDE) + con.color_pair(231, 0) + + while (i < line.length) { + if (i < line.length - 2 && line[i] === '<') { + // Check for opening tags + if (line.substring(i, i + 3).toLowerCase() === '' || + line.substring(i, i + 3).toLowerCase() === '') { + con.color_pair(254, 0) // Switch to white for formatted text + inBoldOrItalic = true + i += 3 + } else if (i < line.length - 3 && + (line.substring(i, i + 4).toLowerCase() === '' || + line.substring(i, i + 4).toLowerCase() === '')) { + con.color_pair(231, 0) // Switch back to yellow for normal text + inBoldOrItalic = false + i += 4 + } else { + // Not a formatting tag, print the character + print(line[i]) + i++ + } + } else { + // Regular character, print it + print(line[i]) + i++ + } + } + + // insert final padding block + con.color_pair(0, 255) + con.prnch(0xDD) + con.color_pair(231, 0) +} + +function displaySubtitle(text, position = 0) { + if (!text || text.length === 0) { + clearSubtitleArea() + return + } + + // Set subtitle colours: yellow (231) on black (0) + let oldFgColour = con.get_color_fore() + let oldBgColour = con.get_color_back() + con.color_pair(231, 0) + + // Split text into lines + let lines = text.split('\n') + + // Calculate position based on subtitle position setting + let startRow, startCol + // Calculate visual length without formatting tags for positioning + let longestLineLength = lines.map(s => getVisualLength(s)).sort().last() + + switch (position) { + case 2: // center left + case 6: // center right + case 8: // dead center + startRow = 16 - Math.floor(lines.length / 2) + break + case 3: // top left + case 4: // top center + case 5: // top right + startRow = 2 + break + case 0: // bottom center + case 1: // bottom left + case 7: // bottom right + default: + startRow = 32 - lines.length + startRow = 32 - lines.length + startRow = 32 - lines.length // Default to bottom center + } + + // Display each line + for (let i = 0; i < lines.length; i++) { + let line = lines[i].trim() + if (line.length === 0) continue + + let row = startRow + i + if (row < 1) row = 1 + if (row > 32) row = 32 + + // Calculate column based on alignment + switch (position) { + case 1: // bottom left + case 2: // center left + case 3: // top left + startCol = 1 + break + case 5: // top right + case 6: // center right + case 7: // bottom right + startCol = Math.max(1, 78 - getVisualLength(line) - 2) + break + case 0: // bottom center + case 4: // top center + case 8: // dead center + default: + startCol = Math.max(1, Math.floor((80 - longestLineLength - 2) / 2) + 1) + break + } + + con.move(row, startCol) + + // Parse and display line with formatting tag support + displayFormattedLine(line) + } + + con.color_pair(oldFgColour, oldBgColour) +} + +function processSubtitlePacket(packetSize) { + // Read subtitle packet data according to SSF format + // uint24 index + uint8 opcode + variable arguments + + let index = 0 + // Read 24-bit index (little-endian) + let indexByte0 = seqread.readOneByte() + let indexByte1 = seqread.readOneByte() + let indexByte2 = seqread.readOneByte() + index = indexByte0 | (indexByte1 << 8) | (indexByte2 << 16) + + let opcode = seqread.readOneByte() + let remainingBytes = packetSize - 4 // Subtract 3 bytes for index + 1 byte for opcode + + switch (opcode) { + case SSF_OP_SHOW: { + // Read UTF-8 text until null terminator + if (remainingBytes > 1) { + let textBytes = seqread.readBytes(remainingBytes) + let textStr = "" + + // Convert bytes to string, stopping at null terminator + for (let i = 0; i < remainingBytes - 1; i++) { // -1 for null terminator + let byte = sys.peek(textBytes + i) + if (byte === 0) break + textStr += String.fromCharCode(byte) + } + + sys.free(textBytes) + subtitleText = textStr + subtitleVisible = true + displaySubtitle(subtitleText, subtitlePosition) + } + break + } + + case SSF_OP_HIDE: { + subtitleVisible = false + subtitleText = "" + clearSubtitleArea() + break + } + + case SSF_OP_MOVE: { + if (remainingBytes >= 2) { // Need at least 1 byte for position + 1 null terminator + let newPosition = seqread.readOneByte() + seqread.readOneByte() // Read null terminator + + if (newPosition >= 0 && newPosition <= 7) { + subtitlePosition = newPosition + + // Re-display current subtitle at new position if visible + if (subtitleVisible && subtitleText.length > 0) { + clearSubtitleArea() + displaySubtitle(subtitleText, subtitlePosition) + } + } + } + break + } + + case SSF_OP_UPLOAD_LOW_FONT: + case SSF_OP_UPLOAD_HIGH_FONT: { + // Font upload - read payload length and font data + if (remainingBytes >= 3) { // uint16 length + at least 1 byte data + let payloadLen = seqread.readShort() + if (remainingBytes >= payloadLen + 2) { + let fontData = seqread.readBytes(payloadLen) + + // upload font data + for (let i = 0; i < Math.min(payloadLen, 1920); i++) sys.poke(-1300607 - i, sys.peek(fontData + i)) + sys.poke(-1299460, (opcode == SSF_OP_UPLOAD_LOW_FONT) ? 18 : 19) + + sys.free(fontData) + } + } + break + } + + case SSF_OP_NOP: + default: { + // Skip remaining bytes + if (remainingBytes > 0) { + let skipBytes = seqread.readBytes(remainingBytes) + sys.free(skipBytes) + } + + if (interactive && opcode !== SSF_OP_NOP) { + serial.println(`[SUBTITLE UNKNOWN] Index: ${index}, Opcode: 0x${opcode.toString(16).padStart(2, '0')}`) + } + break + } + } +} + + +// TAV header structure (32 bytes vs TEV's 24 bytes) +let header = { + magic: new Array(8), + version: 0, + width: 0, + height: 0, + fps: 0, + totalFrames: 0, + waveletFilter: 0, // TAV-specific: wavelet filter type + decompLevels: 0, // TAV-specific: decomposition levels + qualityY: 0, // TAV-specific: Y channel quality + qualityCo: 0, // TAV-specific: Co channel quality + qualityCg: 0, // TAV-specific: Cg channel quality + extraFlags: 0, + videoFlags: 0, + reserved: new Array(7) +} + +// Read and validate header +for (let i = 0; i < 8; i++) { + header.magic[i] = seqread.readOneByte() +} + +// Validate magic number +let magicValid = true +for (let i = 0; i < 8; i++) { + if (header.magic[i] !== TAV_MAGIC[i]) { + magicValid = false + break + } +} + +if (!magicValid) { + con.puts("Error: Invalid TAV file format") + errorlevel = 1 + return +} + +header.version = seqread.readOneByte() +header.width = seqread.readShort() +header.height = seqread.readShort() +header.fps = seqread.readOneByte() +header.totalFrames = seqread.readInt() +header.waveletFilter = seqread.readOneByte() +header.decompLevels = seqread.readOneByte() +header.qualityY = seqread.readOneByte() +header.qualityCo = seqread.readOneByte() +header.qualityCg = seqread.readOneByte() +header.extraFlags = seqread.readOneByte() +header.videoFlags = seqread.readOneByte() + +// Skip reserved bytes +for (let i = 0; i < 7; i++) { + seqread.readOneByte() +} + +if (header.version < 1 || header.version > 2) { + con.puts(`Error: Unsupported TAV version ${header.version}`) + errorlevel = 1 + return +} + +const hasAudio = (header.extraFlags & 0x01) !== 0 +const hasSubtitles = (header.extraFlags & 0x02) !== 0 +const progressiveTransmission = (header.extraFlags & 0x04) !== 0 +const roiCoding = (header.extraFlags & 0x08) !== 0 + +const isInterlaced = (header.videoFlags & 0x01) !== 0 +const isNTSC = (header.videoFlags & 0x02) !== 0 +const isLossless = (header.videoFlags & 0x04) !== 0 + +// Calculate tile dimensions (112x112 vs TEV's 16x16 blocks) +const tilesX = Math.ceil(header.width / TILE_SIZE) +const tilesY = Math.ceil(header.height / TILE_SIZE) +const numTiles = tilesX * tilesY + +console.log(`TAV Decoder`) +console.log(`Resolution: ${header.width}x${header.height}`) +console.log(`FPS: ${header.fps}`) +console.log(`Total frames: ${header.totalFrames}`) +console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ? "5/3 reversible" : "9/7 irreversible"}`) +console.log(`Decomposition levels: ${header.decompLevels}`) +console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`) +console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`) +console.log(`Colour space: ${header.version === 2 ? "ICtCp" : "YCoCg-R"}`) +console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`) + +// Frame buffer addresses - same as TEV +const FRAME_PIXELS = header.width * header.height +const FRAME_SIZE = FRAME_PIXELS * 3 // RGB buffer size + +const RGB_BUFFER_A = sys.malloc(FRAME_SIZE) +const RGB_BUFFER_B = sys.malloc(FRAME_SIZE) + +// Ping-pong buffer pointers (swap instead of copy) +let CURRENT_RGB_ADDR = RGB_BUFFER_A +let PREV_RGB_ADDR = RGB_BUFFER_B + +// Motion vector storage +let motionVectors = new Array(numTiles) +for (let i = 0; i < numTiles; i++) { + motionVectors[i] = { mvX: 0, mvY: 0, rcf: 1.0 } +} + +// Audio state +let audioBufferBytesLastFrame = 0 +let frame_cnt = 0 +let frametime = 1000000000.0 / header.fps +let nextFrameTime = 0 +let mp2Initialised = false +let audioFired = false + + +// Performance tracking variables (from TEV) +let decompressTime = 0 +let decodeTime = 0 +let uploadTime = 0 +let biasTime = 0 + +const BIAS_LIGHTING_MIN = 1.0 / 16.0 +let oldBgcol = [BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN] + +let notifHidden = false + +function getRGBfromScr(x, y) { + let offset = y * WIDTH + x + let rg = sys.peek(-1048577 - offset) + let ba = sys.peek(-1310721 - offset) + return [(rg >>> 4) / 15.0, (rg & 15) / 15.0, (ba >>> 4) / 15.0] +} + +function setBiasLighting() { + let samples = [] + let nativeWidth = graphics.getPixelDimension()[0] + let nativeHeight = graphics.getPixelDimension()[1] + let width = header.width; let height = header.height + + let offsetX = Math.floor((nativeWidth - width) / 2) + let offsetY = Math.floor((nativeHeight - height) / 2) + + let sampleStepX = Math.max(8, Math.floor(width / 18)) + let sampleStepY = Math.max(8, Math.floor(height / 17)) + let borderMargin = Math.min(8, Math.floor(width / 70)) + + for (let x = borderMargin; x < width - borderMargin; x += sampleStepX) { + samples.push(getRGBfromScr(x + offsetX, borderMargin + offsetY)) + samples.push(getRGBfromScr(x + offsetX, height - borderMargin - 1 + offsetY)) + } + + for (let y = borderMargin; y < height - borderMargin; y += sampleStepY) { + samples.push(getRGBfromScr(borderMargin + offsetX, y + offsetY)) + samples.push(getRGBfromScr(width - borderMargin - 1 + offsetX, y + offsetY)) + } + + let out = [0.0, 0.0, 0.0] + samples.forEach(rgb=>{ + out[0] += rgb[0] + out[1] += rgb[1] + out[2] += rgb[2] + }) + out[0] = BIAS_LIGHTING_MIN + (out[0] / samples.length / 2.0) + out[1] = BIAS_LIGHTING_MIN + (out[1] / samples.length / 2.0) + out[2] = BIAS_LIGHTING_MIN + (out[2] / samples.length / 2.0) + + let bgr = (oldBgcol[0]*5 + out[0]) / 6.0 + let bgg = (oldBgcol[1]*5 + out[1]) / 6.0 + let bgb = (oldBgcol[2]*5 + out[2]) / 6.0 + + oldBgcol = [bgr, bgg, bgb] + + graphics.setBackground(Math.round(bgr * 255), Math.round(bgg * 255), Math.round(bgb * 255)) +} + +function updateDataRateBin(rate) { + videoRateBin.push(rate) + if (videoRateBin.length > header.fps) { + videoRateBin.shift() + } +} + +let FRAME_TIME = 1.0 / header.fps + +let frameCount = 0 +let trueFrameCount = 0 +let stopPlay = false +let akku = FRAME_TIME +let akku2 = 0.0 + +let blockDataPtr = sys.malloc(2377764) + +// Playback loop - properly adapted from TEV +try { + let t1 = sys.nanoTime() + while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && (header.totalFrames == 0 || header.totalFrames > 0 && frameCount < header.totalFrames)) { + + // Handle interactive controls + if (interactive) { + sys.poke(-40, 1) + if (sys.peek(-41) == 67) { // Backspace + stopPlay = true + break + } + } + + if (akku >= FRAME_TIME) { + // Read packet header + const packetType = seqread.readOneByte() + + if (packetType === TAV_PACKET_SYNC) { + // Sync packet - no additional data + akku -= FRAME_TIME + frameCount++ + trueFrameCount++ + + // Swap ping-pong buffers instead of expensive memcpy (752KB copy eliminated!) + let temp = CURRENT_RGB_ADDR + CURRENT_RGB_ADDR = PREV_RGB_ADDR + PREV_RGB_ADDR = temp + + } else if (packetType === TAV_PACKET_IFRAME || packetType === TAV_PACKET_PFRAME) { + // Video packet + const compressedSize = seqread.readInt() + const isKeyframe = (packetType === TAV_PACKET_IFRAME) + + // Read compressed tile data + let compressedPtr = seqread.readBytes(compressedSize) + updateDataRateBin(compressedSize) + + let actualSize + let decompressStart = sys.nanoTime() + try { + // Use gzip decompression (only compression format supported in TSVM JS) + actualSize = gzip.decompFromTo(compressedPtr, compressedSize, blockDataPtr) + decompressTime = (sys.nanoTime() - decompressStart) / 1000000.0 + } catch (e) { + decompressTime = (sys.nanoTime() - decompressStart) / 1000000.0 + console.log(`Frame ${frameCount}: Gzip decompression failed, skipping (compressed size: ${compressedSize}, error: ${e})`) + sys.free(compressedPtr) + continue + } + + try { +// serial.println(actualSize) + let decodeStart = sys.nanoTime() + + // Call TAV hardware decoder (like TEV's tevDecode but with RGB buffer outputs) + graphics.tavDecode( + blockDataPtr, + CURRENT_RGB_ADDR, PREV_RGB_ADDR, // RGB buffer pointers (not float arrays!) + header.width, header.height, + header.qualityY, header.qualityCo, header.qualityCg, + frameCount, + debugMotionVectors, + header.waveletFilter, // TAV-specific parameter + header.decompLevels, // TAV-specific parameter + enableDeblocking, + isLossless, + header.version // TAV version for colour space detection + ) + + decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 + + // Upload RGB buffer to display framebuffer (like TEV) + let uploadStart = sys.nanoTime() + graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, frameCount, true) + uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 + + // Defer audio playback until a first frame is sent + if (isInterlaced) { + // fire audio after frame 1 + if (!audioFired && frameCount > 0) { + audio.play(0) + audioFired = true + } + } + else { + // fire audio after frame 0 + if (!audioFired) { + audio.play(0) + audioFired = true + } + } + } catch (e) { + console.log(`Frame ${frameCount}: decode failed: ${e}`) + } finally { + sys.free(compressedPtr) + } + + + let biasStart = sys.nanoTime() + setBiasLighting() + biasTime = (sys.nanoTime() - biasStart) / 1000000.0 + + // Log performance data every 60 frames + if (frameCount % 60 == 0 || frameCount == 0) { + let totalTime = decompressTime + decodeTime + uploadTime + biasTime + console.log(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`) + } + + } else if (packetType === TAV_PACKET_AUDIO_MP2) { + // MP2 Audio packet + let audioLen = seqread.readInt() + + if (!mp2Initialised) { + mp2Initialised = true + audio.mp2Init() + } + + seqread.readBytes(audioLen, SND_BASE_ADDR - 2368) + audio.mp2Decode() + audio.mp2UploadDecoded(0) + + } else if (packetType === TAV_PACKET_SUBTITLE) { + // Subtitle packet - same format as TEV + let packetSize = seqread.readInt() + processSubtitlePacket(packetSize) + } else if (packetType == 0x00) { + // Silently discard, faulty subtitle creation can cause this as 0x00 is used as an argument terminator + } else { + println(`Unknown packet type: 0x${packetType.toString(16)}`) + break + } + } + + let t2 = sys.nanoTime() + akku += (t2 - t1) / 1000000000.0 + akku2 += (t2 - t1) / 1000000000.0 + + // Simple progress display + if (interactive) { + notifHideTimer += (t2 - t1) + if (!notifHidden && notifHideTimer > (NOTIF_SHOWUPTIME + FRAME_TIME)) { + con.move(1, 1) + print(' '.repeat(79)) + notifHidden = true + } + + if (notifHidden) { + con.move(31, 1) + con.color_pair(253, 0) + //print(`Frame: ${frameCount}/${header.totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f) `) + } + } + + t1 = t2 + } +} +catch (e) { + printerrln(`TAV decode error: ${e}`) + errorlevel = 1 +} +finally { + // Cleanup + sys.free(blockDataPtr) + sys.free(RGB_BUFFER_A) + sys.free(RGB_BUFFER_B) + + con.curs_set(1) + con.clear() + + if (errorlevel === 0) { + console.log(`Playback completed: ${frameCount} frames`) + } else { + console.log(`Playback failed with error ${errorlevel}`) + } +} + +graphics.setPalette(0, 0, 0, 0, 0) +con.move(cy, cx) // restore cursor +return errorlevel \ No newline at end of file diff --git a/terranmon.txt b/terranmon.txt index 742653d..774fef0 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -709,6 +709,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. uint8 Video Flags - bit 0 = is interlaced (should be default for most non-archival TEV videos) - bit 1 = is NTSC framerate (repeat every 1000th frame) + - bit 2 = is lossless mode uint8 Reserved, fill with zero ## Packet Types @@ -792,6 +793,168 @@ The format is designed to be compatible with SubRip and SAMI (without markups). text argument may be terminated by 0x00 BEFORE the entire arguments being terminated by 0x00, leaving extra 0x00 on the byte stream. A decoder must be able to handle the extra zeros. +## NTSC Framerate handling +The encoder encodes the frames as-is. The decoder must duplicate every 1000th frame to keep the decoding +in-sync. + +-------------------------------------------------------------------------------- + +TSVM Advanced Video (TAV) Format +Created by Claude on 2025-09-13 + +TAV is a next-generation video codec for TSVM utilizing Discrete Wavelet Transform (DWT) +similar to JPEG2000, providing superior compression efficiency and scalability compared +to DCT-based codecs like TEV. Features include multi-resolution encoding, progressive +transmission capability, and region-of-interest coding. + +## Version History +- Version 1.0: Initial DWT-based implementation with 5/3 reversible filter +- Version 1.1: Added 9/7 irreversible filter for higher compression +- Version 1.2: Multi-resolution pyramid encoding with up to 4 decomposition levels +- Version 1.3: Optimized 112x112 tiles for TSVM resolution with up to 6 decomposition levels + +# File Structure +\x1F T S V M T A V +[HEADER] +[PACKET 0] +[PACKET 1] +[PACKET 2] +... + +## Header (32 bytes) + uint8 Magic[8]: "\x1FTSVM TAV" + uint8 Version: 1 + uint16 Width: video width in pixels + uint16 Height: video height in pixels + uint8 FPS: frames per second + uint32 Total Frames: number of video frames + uint8 Wavelet Filter Type: 0=5/3 reversible, 1=9/7 irreversible + uint8 Decomposition Levels: number of DWT levels (1-4) + uint8 Quantiser Index for Y channel (1: lossless, 255: potato) + uint8 Quantiser Index for Co channel (1: lossless, 255: potato) + uint8 Quantiser Index for Cg channel (1: lossless, 255: potato) + uint8 Extra Feature Flags + - bit 0 = has audio + - bit 1 = has subtitle + uint8 Video Flags + - bit 0 = is interlaced (unused) + - bit 1 = is NTSC framerate + - bit 2 = is lossless mode + uint8 Reserved[7]: fill with zeros + +## Packet Types + 0x10: I-frame (intra-coded frame) + 0x11: P-frame (predicted frame with motion compensation) + 0x20: MP2 audio packet + 0x30: Subtitle in "Simple" format + 0xFF: sync packet + +## Video Packet Structure + uint8 Packet Type + uint32 Compressed Size + * Zstd-compressed Block Data + +## Block Data (per 112x112 tile) + uint8 Mode: encoding mode + 0x00 = SKIP (copy from previous frame) + 0x01 = INTRA (DWT-coded, no prediction) + 0x02 = INTER (DWT-coded with motion compensation) + 0x03 = MOTION (motion vector only, no residual) + int16 Motion Vector X (1/4 pixel precision) + int16 Motion Vector Y (1/4 pixel precision) + float32 Rate Control Factor (4 bytes, little-endian) + + ## DWT Coefficient Structure (per tile) + For each decomposition level L (from highest to lowest): + uint16 LL_size: size of LL subband coefficients + uint16 LH_size: size of LH subband coefficients + uint16 HL_size: size of HL subband coefficients + uint16 HH_size: size of HH subband coefficients + int16[] LL_coeffs: quantized LL subband (low-low frequencies) + int16[] LH_coeffs: quantized LH subband (low-high frequencies) + int16[] HL_coeffs: quantized HL subband (high-low frequencies) + int16[] HH_coeffs: quantized HH subband (high-high frequencies) + +## DWT Implementation Details + +### Wavelet Filters +- 5/3 Reversible Filter (lossless capable): + * Analysis: Low-pass [1/2, 1, 1/2], High-pass [-1/8, -1/4, 3/4, -1/4, -1/8] + * Synthesis: Low-pass [1/4, 1/2, 1/4], High-pass [-1/16, -1/8, 3/8, -1/8, -1/16] + +- 9/7 Irreversible Filter (higher compression): + * Analysis: Daubechies 9/7 coefficients optimized for image compression + * Provides better energy compaction than 5/3 but lossy reconstruction + +### Decomposition Levels +- Level 1: 112x112 → 56x56 (LL) + 3×56x56 subbands (LH,HL,HH) +- Level 2: 56x56 → 28x28 (LL) + 3×28x28 subbands +- Level 3: 28x28 → 14x14 (LL) + 3×14x14 subbands +- Level 4: 14x14 → 7x7 (LL) + 3×7x7 subbands +- Level 5: 7x7 → 3x3 (LL) + 3×3x3 subbands +- Level 6: 3x3 → 1x1 (LL) + 3×1x1 subbands (maximum) + +### Quantization Strategy +TAV uses different quantization steps for each subband based on human visual +system sensitivity: +- LL subbands: Fine quantization (preserve DC and low frequencies) +- LH/HL subbands: Medium quantization (diagonal details less critical) +- HH subbands: Coarse quantization (high frequency noise can be discarded) + +### Progressive Transmission +When enabled, coefficients are transmitted in order of visual importance: +1. LL subband of highest decomposition level (thumbnail) +2. Lower frequency subbands first +3. Higher frequency subbands for refinement + +## Motion Compensation +- Search range: ±28 pixels (optimized for 112x112 tiles) +- Sub-pixel precision: 1/4 pixel with bilinear interpolation +- Tile size: 112x112 pixels (perfect fit for TSVM 560x448 resolution) + * Exactly 5×4 = 20 tiles per frame (560÷112 = 5, 448÷112 = 4) + * No partial tiles needed - optimal for processing efficiency +- Uses Sum of Absolute Differences (SAD) for motion estimation +- Overlapped block motion compensation (OBMC) for smooth boundaries + +## Colour Space +TAV operates in YCoCg-R colour space with full resolution channels: +- Y: Luma channel (full resolution, fine quantization) +- Co: Orange-Cyan chroma (full resolution, aggressive quantization by default) +- Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default) + +## Compression Features +- 112x112 DWT tiles vs 16x16 DCT blocks in TEV +- Multi-resolution representation enables scalable decoding +- Better frequency localization than DCT +- Reduced blocking artifacts due to overlapping basis functions +- Region-of-Interest (ROI) coding for selective quality enhancement +- Progressive transmission for bandwidth adaptation + +## Performance Comparison +Expected improvements over TEV: +- 20-30% better compression efficiency +- Reduced blocking artifacts +- Scalable quality/resolution decoding +- Better performance on natural images vs artificial content +- Full resolution chroma preserves color detail while aggressive quantization maintains compression + +## Hardware Acceleration Functions +TAV decoder requires new GraphicsJSR223Delegate functions: +- tavDecode(): Main DWT decoding function +- tavDWT2D(): 2D DWT/IDWT transforms +- tavQuantize(): Multi-band quantization +- tavMotionCompensate(): 64x64 tile motion compensation + +## Audio Support +Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility. + +## Subtitle Support +Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality. + +## NTSC Framerate handling +Unlike the TEV format, TAV emits extra sync packet for every 1000th frames. Decoder can just play the video +without any special treatment. + -------------------------------------------------------------------------------- Sound Adapter diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 217be35..f86471b 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -12,87 +12,26 @@ import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint import net.torvald.tsvm.peripheral.GraphicsAdapter import net.torvald.tsvm.peripheral.PeriBase import net.torvald.tsvm.peripheral.fmod -import net.torvald.util.Float16 import kotlin.math.* class GraphicsJSR223Delegate(private val vm: VM) { + // TAV Simulated overlapping tiles constants (must match encoder) + private val TILE_SIZE_X = 280 + private val TILE_SIZE_Y = 224 + private val TAV_TILE_MARGIN = 32 // 32-pixel margin for 3 DWT levels (4 * 2^3 = 32px) + private val PADDED_TILE_SIZE_X = TILE_SIZE_X + 2 * TAV_TILE_MARGIN // 280 + 64 = 344px + private val PADDED_TILE_SIZE_Y = TILE_SIZE_Y + 2 * TAV_TILE_MARGIN // 224 + 64 = 288px + // Reusable working arrays to reduce allocation overhead - private val idct8TempBuffer = FloatArray(64) - private val idct16TempBuffer = FloatArray(256) // For 16x16 IDCT - private val idct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT - - // Lossless IDCT functions for float16 coefficients (no quantization) - private fun tevIdct8x8_lossless(coeffs: FloatArray): IntArray { - val result = IntArray(64) - - // Fast separable IDCT (row-column decomposition) for lossless coefficients - // First pass: Process rows (8 1D IDCTs) - for (row in 0 until 8) { - for (col in 0 until 8) { - var sum = 0f - for (u in 0 until 8) { - sum += dctBasis8[u][col] * coeffs[row * 8 + u] - } - idct8TempBuffer[row * 8 + col] = sum * 0.5f - } - } - - // Second pass: Process columns (8 1D IDCTs) - for (col in 0 until 8) { - for (row in 0 until 8) { - var sum = 0f - for (v in 0 until 8) { - sum += dctBasis8[v][row] * idct8TempBuffer[v * 8 + col] - } - val finalValue = sum * 0.5f + 128f - result[row * 8 + col] = if (finalValue.isNaN() || finalValue.isInfinite()) { - println("NaN/Inf detected in 8x8 IDCT at ($row,$col): sum=$sum, finalValue=$finalValue") - 128 // Default to middle gray - } else { - finalValue.roundToInt().coerceIn(0, 255) - } - } - } - - return result - } - - private fun tevIdct16x16_lossless(coeffs: FloatArray): IntArray { - val result = IntArray(256) - - // Fast separable IDCT (row-column decomposition) for 16x16 lossless coefficients - // First pass: Process rows (16 1D IDCTs) - for (row in 0 until 16) { - for (col in 0 until 16) { - var sum = 0f - for (u in 0 until 16) { - sum += dctBasis16[u][col] * coeffs[row * 16 + u] - } - idct16TempBuffer[row * 16 + col] = sum * 0.25f - } - } - - // Second pass: Process columns (16 1D IDCTs) - for (col in 0 until 16) { - for (row in 0 until 16) { - var sum = 0f - for (v in 0 until 16) { - sum += dctBasis16[v][row] * idct16TempBuffer[v * 16 + col] - } - val finalValue = sum * 0.25f + 128f - result[row * 16 + col] = if (finalValue.isNaN() || finalValue.isInfinite()) { - println("NaN/Inf detected in 16x16 IDCT at ($row,$col): sum=$sum, finalValue=$finalValue") - 128 // Default to middle gray - } else { - finalValue.roundToInt().coerceIn(0, 255) - } - } - } - - return result - } + private val tevIdct8TempBuffer = FloatArray(64) + private val tevIdct16TempBuffer = FloatArray(256) // For 16x16 IDCT + private val tevIdct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT + // TAV coefficient delta storage for previous frame (for efficient P-frames) + private var tavPreviousCoeffsY: MutableMap? = null + private var tavPreviousCoeffsCo: MutableMap? = null + private var tavPreviousCoeffsCg: MutableMap? = null private fun getFirstGPU(): GraphicsAdapter? { return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter @@ -149,19 +88,19 @@ class GraphicsJSR223Delegate(private val vm: VM) { getFirstGPU()?._storebulk(fromAddr, toAddr, length) }*/ - fun plotPixel(x: Int, y: Int, color: Int) { + fun plotPixel(x: Int, y: Int, colour: Int) { getFirstGPU()?.let { if (x in 0 until it.config.width && y in 0 until it.config.height) { - it.poke(y.toLong() * it.config.width + x, color.toByte()) + it.poke(y.toLong() * it.config.width + x, colour.toByte()) it.applyDelay() } } } - fun plotPixel2(x: Int, y: Int, color: Int) { + fun plotPixel2(x: Int, y: Int, colour: Int) { getFirstGPU()?.let { if (x in 0 until it.config.width && y in 0 until it.config.height) { - it.poke(262144 + y.toLong() * it.config.width + x, color.toByte()) + it.poke(262144 + y.toLong() * it.config.width + x, colour.toByte()) it.applyDelay() } } @@ -986,7 +925,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } private fun clampRGB(f: Float) = f.coerceIn(0f, 1f) - private fun ycocgToRGB(co: Int, cg: Int, ys: Int, As: Int): Array { // ys: 4 Y-values + private fun ipf1YcocgToRGB(co: Int, cg: Int, ys: Int, As: Int): Array { // ys: 4 Y-values // return [R1|G1, B1|A1, R2|G2, B2|A2, R3|G3, B3|A3, R4|G4, B4|A4] // cocg = 0x7777 @@ -1035,7 +974,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { ) } - private fun ycocgToRGB(co1: Int, co2: Int, cg1: Int, cg2: Int, ys: Int, As: Int): Array { // ys: 4 Y-values + private fun ipf2YcocgToRGB(co1: Int, co2: Int, cg1: Int, cg2: Int, ys: Int, As: Int): Array { // ys: 4 Y-values // return [R1|G1, B1|A1, R2|G2, B2|A2, R3|G3, B3|A3, R4|G4, B4|A4] // cocg = 0x7777 @@ -1118,25 +1057,25 @@ class GraphicsJSR223Delegate(private val vm: VM) { a4 = readShort() } - var corner = ycocgToRGB(co and 15, cg and 15, y1, a1) + var corner = ipf1YcocgToRGB(co and 15, cg and 15, y1, a1) rg[0] = corner[0];ba[0] = corner[1] rg[1] = corner[2];ba[1] = corner[3] rg[4] = corner[4];ba[4] = corner[5] rg[5] = corner[6];ba[5] = corner[7] - corner = ycocgToRGB((co shr 4) and 15, (cg shr 4) and 15, y2, a2) + corner = ipf1YcocgToRGB((co shr 4) and 15, (cg shr 4) and 15, y2, a2) rg[2] = corner[0];ba[2] = corner[1] rg[3] = corner[2];ba[3] = corner[3] rg[6] = corner[4];ba[6] = corner[5] rg[7] = corner[6];ba[7] = corner[7] - corner = ycocgToRGB((co shr 8) and 15, (cg shr 8) and 15, y3, a3) + corner = ipf1YcocgToRGB((co shr 8) and 15, (cg shr 8) and 15, y3, a3) rg[8] = corner[0];ba[8] = corner[1] rg[9] = corner[2];ba[9] = corner[3] rg[12] = corner[4];ba[12] = corner[5] rg[13] = corner[6];ba[13] = corner[7] - corner = ycocgToRGB((co shr 12) and 15, (cg shr 12) and 15, y4, a4) + corner = ipf1YcocgToRGB((co shr 12) and 15, (cg shr 12) and 15, y4, a4) rg[10] = corner[0];ba[10] = corner[1] rg[11] = corner[2];ba[11] = corner[3] rg[14] = corner[4];ba[14] = corner[5] @@ -1209,25 +1148,25 @@ class GraphicsJSR223Delegate(private val vm: VM) { val rg = IntArray(16) val ba = IntArray(16) - var px = ycocgToRGB(co and 15, cg and 15, y1, 65535) + var px = ipf1YcocgToRGB(co and 15, cg and 15, y1, 65535) rg[0] = px[0]; ba[0] = px[1] rg[1] = px[2]; ba[1] = px[3] rg[4] = px[4]; ba[4] = px[5] rg[5] = px[6]; ba[5] = px[7] - px = ycocgToRGB((co shr 4) and 15, (cg shr 4) and 15, y2, 65535) + px = ipf1YcocgToRGB((co shr 4) and 15, (cg shr 4) and 15, y2, 65535) rg[2] = px[0]; ba[2] = px[1] rg[3] = px[2]; ba[3] = px[3] rg[6] = px[4]; ba[6] = px[5] rg[7] = px[6]; ba[7] = px[7] - px = ycocgToRGB((co shr 8) and 15, (cg shr 8) and 15, y3, 65535) + px = ipf1YcocgToRGB((co shr 8) and 15, (cg shr 8) and 15, y3, 65535) rg[8] = px[0]; ba[8] = px[1] rg[9] = px[2]; ba[9] = px[3] rg[12] = px[4]; ba[12] = px[5] rg[13] = px[6]; ba[13] = px[7] - px = ycocgToRGB((co shr 12) and 15, (cg shr 12) and 15, y4, 65535) + px = ipf1YcocgToRGB((co shr 12) and 15, (cg shr 12) and 15, y4, 65535) rg[10] = px[0]; ba[10] = px[1] rg[11] = px[2]; ba[11] = px[3] rg[14] = px[4]; ba[14] = px[5] @@ -1302,25 +1241,25 @@ class GraphicsJSR223Delegate(private val vm: VM) { a4 = readShort() } - var corner = ycocgToRGB(co and 15, (co shr 8) and 15, cg and 15, (cg shr 8) and 15, y1, a1) + var corner = ipf2YcocgToRGB(co and 15, (co shr 8) and 15, cg and 15, (cg shr 8) and 15, y1, a1) rg[0] = corner[0];ba[0] = corner[1] rg[1] = corner[2];ba[1] = corner[3] rg[4] = corner[4];ba[4] = corner[5] rg[5] = corner[6];ba[5] = corner[7] - corner = ycocgToRGB((co shr 4) and 15, (co shr 12) and 15, (cg shr 4) and 15, (cg shr 12) and 15, y2, a2) + corner = ipf2YcocgToRGB((co shr 4) and 15, (co shr 12) and 15, (cg shr 4) and 15, (cg shr 12) and 15, y2, a2) rg[2] = corner[0];ba[2] = corner[1] rg[3] = corner[2];ba[3] = corner[3] rg[6] = corner[4];ba[6] = corner[5] rg[7] = corner[6];ba[7] = corner[7] - corner = ycocgToRGB((co shr 16) and 15, (co shr 24) and 15, (cg shr 16) and 15, (cg shr 24) and 15, y3, a3) + corner = ipf2YcocgToRGB((co shr 16) and 15, (co shr 24) and 15, (cg shr 16) and 15, (cg shr 24) and 15, y3, a3) rg[8] = corner[0];ba[8] = corner[1] rg[9] = corner[2];ba[9] = corner[3] rg[12] = corner[4];ba[12] = corner[5] rg[13] = corner[6];ba[13] = corner[7] - corner = ycocgToRGB((co shr 20) and 15, (co shr 28) and 15, (cg shr 20) and 15, (cg shr 28) and 15, y4, a4) + corner = ipf2YcocgToRGB((co shr 20) and 15, (co shr 28) and 15, (cg shr 20) and 15, (cg shr 28) and 15, y4, a4) rg[10] = corner[0];ba[10] = corner[1] rg[11] = corner[2];ba[11] = corner[3] rg[14] = corner[4];ba[14] = corner[5] @@ -1351,7 +1290,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { return (if ((q < 50)) 5000f / q else 200f - 2 * q) / 100f } - // Quality settings for quantization (Y channel) - 16x16 tables + // Quality settings for quantisation (Y channel) - 16x16 tables val QUANT_TABLE_Y: IntArray = intArrayOf( 16, 14, 12, 11, 11, 13, 16, 20, 24, 30, 39, 48, 54, 61, 67, 73, 14, 13, 12, 12, 12, 15, 18, 21, 25, 33, 46, 57, 61, 65, 67, 70, @@ -1370,7 +1309,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { 73, 82, 92, 98, 103, 107, 110, 117, 126, 132, 134, 136, 138, 138, 133, 127, 86, 98, 109, 112, 114, 116, 118, 124, 133, 135, 129, 125, 128, 130, 128, 127) - // Quality settings for quantization (Co channel - orange-blue, 8x8) + // Quality settings for quantisation (Co channel - orange-blue, 8x8) val QUANT_TABLE_C: IntArray = intArrayOf( 17, 18, 24, 47, 99, 99, 99, 99, 18, 21, 26, 66, 99, 99, 99, 99, @@ -1498,8 +1437,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { val videoX = nativeX * scaleX val videoY = nativeY * scaleY - // Sample RGB values using bilinear interpolation (optimized version) - val rgb = sampleBilinearOptimized(rgbAddr, width, height, videoX, videoY, rgbAddrIncVec) + // Sample RGB values using bilinear interpolation (optimised version) + val rgb = sampleBilinearOptimised(rgbAddr, width, height, videoX, videoY, rgbAddrIncVec) val r = rgb[0] val g = rgb[1] val b = rgb[2] @@ -1525,7 +1464,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { pixelsProcessed += pixelsInChunk } } else { - // Optimized centering logic with bulk memory operations + // Optimised centering logic with bulk memory operations val offsetX = (nativeWidth - width) / 2 val offsetY = (nativeHeight - height) / 2 @@ -1593,10 +1532,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { } /** - * Apply Bayer dithering to reduce banding when quantizing to 4-bit + * Apply Bayer dithering to reduce banding when quantising to 4-bit */ private fun ditherValue(value: Int, x: Int, y: Int, f: Int): Int { - // Preserve pure values (0 and 255) exactly to maintain color primaries + // Preserve pure values (0 and 255) exactly to maintain colour primaries if (value == 0) return 0 if (value == 255) return 15 @@ -1657,9 +1596,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { } /** - * Optimized bilinear sampling with bulk memory access and caching + * Optimised bilinear sampling with bulk memory access and caching */ - private fun sampleBilinearOptimized(rgbAddr: Long, width: Int, height: Int, x: Float, y: Float, rgbAddrIncVec: Int): IntArray { + private fun sampleBilinearOptimised(rgbAddr: Long, width: Int, height: Int, x: Float, y: Float, rgbAddrIncVec: Int): IntArray { // Clamp coordinates to valid range val clampedX = x.coerceIn(0f, (width - 1).toFloat()) val clampedY = y.coerceIn(0f, (height - 1).toFloat()) @@ -1678,7 +1617,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val (memspace, baseOffset) = vm.translateAddr(rgbAddr) if (memspace is UnsafePtr && rgbAddrIncVec == 1) { - // Optimized path for user memory with forward addressing + // Optimised path for user memory with forward addressing val y0RowAddr = baseOffset + (y0 * width + x0) * 3 val y1RowAddr = baseOffset + (y1 * width + x0) * 3 @@ -1721,7 +1660,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val result = IntArray(64) // Reuse preallocated temp buffer to reduce GC pressure for (i in coeffs.indices) { - idct8TempBuffer[i] = coeffs[i] * (quantTable[i] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)).coerceIn(1f, 255f) + tevIdct8TempBuffer[i] = coeffs[i] * (quantTable[i] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)).coerceIn(1f, 255f) } // Fast separable IDCT (row-column decomposition) @@ -1738,7 +1677,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } sum += dctBasis8[u][col] * coeff } - idct8TempBuffer[row * 8 + col] = sum + tevIdct8TempBuffer[row * 8 + col] = sum } } @@ -1747,7 +1686,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (row in 0 until 8) { var sum = 0f for (v in 0 until 8) { - sum += dctBasis8[v][row] * idct8TempBuffer[v * 8 + col] + sum += dctBasis8[v][row] * tevIdct8TempBuffer[v * 8 + col] } val pixel = if (isChromaResidual) { @@ -1773,7 +1712,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun tevIdct16x16_fast(coeffs: ShortArray, quantTable: IntArray, qualityIndex: Int, rateControlFactor: Float): IntArray { val result = IntArray(256) // 16x16 = 256 - // Process coefficients and dequantize using preallocated buffer + // Process coefficients and dequantise using preallocated buffer for (u in 0 until 16) { for (v in 0 until 16) { val idx = u * 16 + v @@ -1782,7 +1721,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } else { coeffs[idx] * (quantTable[idx] * jpeg_quality_to_mult(qualityIndex * rateControlFactor)).coerceIn(1f, 255f) } - idct16TempBuffer[idx] = coeff + tevIdct16TempBuffer[idx] = coeff } } @@ -1792,9 +1731,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (col in 0 until 16) { var sum = 0f for (u in 0 until 16) { - sum += dctBasis16[u][col] * idct16TempBuffer[row * 16 + u] + sum += dctBasis16[u][col] * tevIdct16TempBuffer[row * 16 + u] } - idct16SeparableBuffer[row * 16 + col] = sum + tevIdct16SeparableBuffer[row * 16 + col] = sum } } @@ -1803,7 +1742,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (row in 0 until 16) { var sum = 0f for (v in 0 until 16) { - sum += dctBasis16[v][row] * idct16SeparableBuffer[v * 16 + col] + sum += dctBasis16[v][row] * tevIdct16SeparableBuffer[v * 16 + col] } val pixel = (sum + 128f).coerceIn(0f, 255f) result[row * 16 + col] = pixel.toInt() @@ -1822,7 +1761,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private val interlacedFieldBuffer = IntArray(560 * 224 * 3) // Half-height RGB buffer /** - * YADIF (Yet Another Deinterlacing Filter) implementation - Optimized + * YADIF (Yet Another Deinterlacing Filter) implementation - Optimised * Converts interlaced field to progressive frame with temporal/spatial interpolation */ fun yadifDeinterlace(fieldRGBAddr: Long, outputRGBAddr: Long, width: Int, height: Int, @@ -1929,7 +1868,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - // Cover up top and bottom lines with border color (optimized) + // Cover up top and bottom lines with border colour (optimised) val destT = 0 val destB = (height - 2) * width * 3 val col = (vm.peek(-1299457)!!.toUint() shl 16) or (vm.peek(-1299458)!!.toUint() shl 8) or vm.peek(-1299459)!!.toUint() @@ -1955,7 +1894,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (c in 0..2) { val idx = pixelIdx + c - // Get spatial neighbors + // Get spatial neighbours val above = fieldBuffer[aboveRowIdx + idx].toUint() val below = fieldBuffer[belowRowIdx + idx].toUint() val current = fieldBuffer[rowStartIdx + idx].toUint() @@ -1970,7 +1909,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val nextPixel = nextBuffer[rowStartIdx + idx].toUint() val tempInterp = (prevPixel + nextPixel) / 2 - // YADIF edge-directed decision (optimized) + // YADIF edge-directed decision (optimised) val spatialDiff = kotlin.math.abs(above.toInt() - below.toInt()) val temporalDiff = kotlin.math.abs(prevPixel.toInt() - nextPixel.toInt()) @@ -2028,7 +1967,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val interpOutputOffset = (interpLine * width + x) * 3 for (c in 0..2) { - // Get spatial neighbors from sequential field data + // Get spatial neighbours from sequential field data val fieldStride = width * 3 val aboveOffset = fieldOffset - fieldStride + c val belowOffset = fieldOffset + fieldStride + c @@ -2053,7 +1992,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { var interpolatedValue = (above + below) / 2 // Default spatial interpolation if (prevFieldAddr != 0L && nextFieldAddr != 0L) { - // Get temporal neighbors + // Get temporal neighbours val tempFieldOffset = (y * width + x) * 3 + c val prevPixel = (vm.peek(prevFieldAddr + tempFieldOffset * fieldIncVec)?.toInt() ?: current) and 0xFF val nextPixel = (vm.peek(nextFieldAddr + tempFieldOffset * fieldIncVec)?.toInt() ?: current) and 0xFF @@ -2061,7 +2000,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // BWDIF-inspired temporal differences (adapted for 3-frame window) // Note: True BWDIF uses 5 frames, we adapt to 3-frame constraint - // Get spatial neighbors from previous and next fields for temporal comparison + // Get spatial neighbours from previous and next fields for temporal comparison // Use same addressing pattern as working YADIF implementation val prevAboveOffset = if (y > 0) ((y-1) * width + x) * 3 + c else tempFieldOffset val prevBelowOffset = if (y < fieldHeight - 1) ((y+1) * width + x) * 3 + c else tempFieldOffset @@ -2176,9 +2115,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { val Sp = I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp // HLG decode: L'M'S' -> linear LMS - val L = HLG_inverse_OETF(Lp) - val M = HLG_inverse_OETF(Mp) - val S = HLG_inverse_OETF(Sp) + val L = HLG_EOTF(Lp) + val M = HLG_EOTF(Mp) + val S = HLG_EOTF(Sp) // LMS -> linear sRGB (inverse matrix) val rLin = 6.1723815689243215 * L -5.319534979827695 * M + 0.14699442094633924 * S @@ -2204,7 +2143,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Helper functions for ICtCp decoding // Inverse HLG OETF (HLG -> linear) - fun HLG_inverse_OETF(V: Double): Double { + fun HLG_EOTF(V: Double): Double { val a = 0.17883277 val b = 1.0 - 4.0 * a val c = 0.5 - a * ln(4.0 * a) @@ -2309,102 +2248,102 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // ENHANCED: Knusperli-inspired boundary discontinuity analysis - fun analyzeBoundaryDiscontinuity(samples: IntArray): Pair { + fun analyseBoundaryDiscontinuity(samples: IntArray): Pair { // samples: 8-pixel samples across the boundary for frequency analysis var delta = 0L var hfPenalty = 0L - + for (u in 0 until 8) { val alpha = kAlphaSqrt2[u] val sign = if (u and 1 != 0) -1 else 1 val leftVal = samples[u] val rightVal = samples[7 - u] // Mirror for boundary analysis - + delta += alpha * (rightVal - sign * leftVal) hfPenalty += (u * u) * (leftVal * leftVal + rightVal * rightVal) } - + return Pair(delta, hfPenalty) } - + // ENHANCED: Adaptive strength based on local complexity fun calculateAdaptiveStrength(baseStrength: Float, hfPenalty: Long, delta: Long): Float { val complexity = kotlin.math.sqrt(hfPenalty.toDouble()).toFloat() val discontinuityMagnitude = kotlin.math.abs(delta).toFloat() - + // Reduce filtering strength in high-frequency areas (preserve detail) val complexityFactor = if (complexity > 800) 0.3f else 1.0f - + // Increase filtering strength for clear discontinuities val discontinuityFactor = kotlin.math.min(2.0f, discontinuityMagnitude / 1000.0f) - + return baseStrength * complexityFactor * discontinuityFactor } - + // ENHANCED: Apply Knusperli-style corrections using linear gradient patterns fun applyBoundaryCorrection( samples: IntArray, delta: Long, adaptiveStrength: Float ): IntArray { val result = samples.clone() val correction = (delta * 724 shr 31).toInt() // Apply sqrt(2)/2 weighting like Knusperli - + // Apply linear gradient corrections across boundary for (i in 0 until 8) { val gradientWeight = kLinearGradient[i] * correction / 1024 // Scale from 10-bit fixed-point val sign = if (i < 4) 1 else -1 // Left/right side weighting - + val adjustment = (gradientWeight * sign * adaptiveStrength).toInt() result[i] = (result[i] + adjustment).coerceIn(0, 255) } - + return result } - + // ENHANCED HORIZONTAL DEBLOCKING: Using Knusperli-inspired boundary analysis for (by in 0 until blocksY) { for (bx in 1 until blocksX) { val blockEdgeX = bx * blockSize if (blockEdgeX >= width) continue - + // Process boundary in chunks for better performance val yStart = by * blockSize val yEnd = minOf((by + 1) * blockSize, height) - + for (y in yStart until yEnd step 2) { // Process 2 lines at a time if (y + 1 >= height) continue - + // Sample 8x2 pixel region across boundary for both lines val samples1 = IntArray(24) // 8 pixels × 3 channels (RGB) val samples2 = IntArray(24) - + for (i in 0 until 8) { val x = blockEdgeX - 4 + i val rgb1 = getPixelBulk(x, y) val rgb2 = getPixelBulk(x, y + 1) - + samples1[i * 3] = rgb1[0] // R - samples1[i * 3 + 1] = rgb1[1] // G + samples1[i * 3 + 1] = rgb1[1] // G samples1[i * 3 + 2] = rgb1[2] // B samples2[i * 3] = rgb2[0] samples2[i * 3 + 1] = rgb2[1] samples2[i * 3 + 2] = rgb2[2] } - - // Analyze each color channel separately + + // Analyse each colour channel separately for (c in 0..2) { val channelSamples1 = IntArray(8) { samples1[it * 3 + c] } val channelSamples2 = IntArray(8) { samples2[it * 3 + c] } - - val (delta1, hfPenalty1) = analyzeBoundaryDiscontinuity(channelSamples1) - val (delta2, hfPenalty2) = analyzeBoundaryDiscontinuity(channelSamples2) - - // Skip if very small discontinuity (early exit optimization) + + val (delta1, hfPenalty1) = analyseBoundaryDiscontinuity(channelSamples1) + val (delta2, hfPenalty2) = analyseBoundaryDiscontinuity(channelSamples2) + + // Skip if very small discontinuity (early exit optimisation) if (kotlin.math.abs(delta1) < 50 && kotlin.math.abs(delta2) < 50) continue - + // Calculate adaptive filtering strength val adaptiveStrength1 = calculateAdaptiveStrength(strength, hfPenalty1, delta1) val adaptiveStrength2 = calculateAdaptiveStrength(strength, hfPenalty2, delta2) - + // Apply corrections if strength is significant if (adaptiveStrength1 > 0.05f) { val corrected1 = applyBoundaryCorrection(channelSamples1, delta1, adaptiveStrength1) @@ -2412,7 +2351,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { samples1[i * 3 + c] = corrected1[i] } } - + if (adaptiveStrength2 > 0.05f) { val corrected2 = applyBoundaryCorrection(channelSamples2, delta2, adaptiveStrength2) for (i in 0 until 8) { @@ -2420,7 +2359,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } - + // Write back corrected pixels in bulk for (i in 2..5) { // Only write middle 4 pixels to avoid artifacts val x = blockEdgeX - 4 + i @@ -2432,28 +2371,28 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } - + // ENHANCED VERTICAL DEBLOCKING: Same approach for horizontal block boundaries for (by in 1 until blocksY) { for (bx in 0 until blocksX) { val blockEdgeY = by * blockSize if (blockEdgeY >= height) continue - + val xStart = bx * blockSize val xEnd = minOf((bx + 1) * blockSize, width) - + for (x in xStart until xEnd step 2) { if (x + 1 >= width) continue - + // Sample 8x2 pixel region across vertical boundary val samples1 = IntArray(24) val samples2 = IntArray(24) - + for (i in 0 until 8) { val y = blockEdgeY - 4 + i val rgb1 = getPixelBulk(x, y) val rgb2 = getPixelBulk(x + 1, y) - + samples1[i * 3] = rgb1[0] samples1[i * 3 + 1] = rgb1[1] samples1[i * 3 + 2] = rgb1[2] @@ -2461,27 +2400,27 @@ class GraphicsJSR223Delegate(private val vm: VM) { samples2[i * 3 + 1] = rgb2[1] samples2[i * 3 + 2] = rgb2[2] } - + // Same boundary analysis and correction as horizontal for (c in 0..2) { val channelSamples1 = IntArray(8) { samples1[it * 3 + c] } val channelSamples2 = IntArray(8) { samples2[it * 3 + c] } - - val (delta1, hfPenalty1) = analyzeBoundaryDiscontinuity(channelSamples1) - val (delta2, hfPenalty2) = analyzeBoundaryDiscontinuity(channelSamples2) - + + val (delta1, hfPenalty1) = analyseBoundaryDiscontinuity(channelSamples1) + val (delta2, hfPenalty2) = analyseBoundaryDiscontinuity(channelSamples2) + if (kotlin.math.abs(delta1) < 50 && kotlin.math.abs(delta2) < 50) continue - + val adaptiveStrength1 = calculateAdaptiveStrength(strength, hfPenalty1, delta1) val adaptiveStrength2 = calculateAdaptiveStrength(strength, hfPenalty2, delta2) - + if (adaptiveStrength1 > 0.05f) { val corrected1 = applyBoundaryCorrection(channelSamples1, delta1, adaptiveStrength1) for (i in 0 until 8) { samples1[i * 3 + c] = corrected1[i] } } - + if (adaptiveStrength2 > 0.05f) { val corrected2 = applyBoundaryCorrection(channelSamples2, delta2, adaptiveStrength2) for (i in 0 until 8) { @@ -2489,7 +2428,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } - + // Write back corrected pixels for (i in 2..5) { val y = blockEdgeY - 4 + i @@ -2504,33 +2443,33 @@ class GraphicsJSR223Delegate(private val vm: VM) { } /** - * Bulk write RGB block data to VM memory + * Bulk write RGB block data to VM memory */ private fun bulkWriteRGB(destAddr: Long, rgbData: IntArray, width: Int, height: Int, startX: Int, startY: Int, blockWidth: Int, blockHeight: Int, addrIncVec: Int) { val (memspace, baseOffset) = vm.translateAddr(destAddr) - + if (memspace is UnsafePtr && addrIncVec == 1) { - // Optimized path for user memory with forward addressing + // Optimised path for user memory with forward addressing for (dy in 0 until blockHeight) { val y = startY + dy if (y >= height) break - + val rowStartX = kotlin.math.max(0, startX) val rowEndX = kotlin.math.min(width, startX + blockWidth) val rowPixels = rowEndX - rowStartX - + if (rowPixels > 0) { val srcRowOffset = dy * blockWidth * 3 + (rowStartX - startX) * 3 val dstRowOffset = baseOffset + (y * width + rowStartX) * 3 val rowBytes = rowPixels * 3 - + // Convert IntArray to ByteArray for this row val rowBuffer = ByteArray(rowBytes) for (i in 0 until rowBytes) { rowBuffer[i] = rgbData[srcRowOffset + i].toByte() } - + // Bulk write the row UnsafeHelper.memcpyRaw( rowBuffer, UnsafeHelper.getArrayOffset(rowBuffer), @@ -2546,7 +2485,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { if (x < width && y < height) { val rgbIdx = (dy * blockWidth + dx) * 3 val bufferOffset = (y.toLong() * width + x) * 3 - + vm.poke(destAddr + bufferOffset * addrIncVec, rgbData[rgbIdx].toByte()) vm.poke(destAddr + (bufferOffset + 1) * addrIncVec, rgbData[rgbIdx + 1].toByte()) vm.poke(destAddr + (bufferOffset + 2) * addrIncVec, rgbData[rgbIdx + 2].toByte()) @@ -2559,13 +2498,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { /** * Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format * Decodes compressed TEV block data directly to framebuffer - * + * * @param blockDataPtr Pointer to decompressed TEV block data * @param currentRGBAddr Address of current frame RGB buffer (24-bit: R,G,B per pixel) * @param prevRGBAddr Address of previous frame RGB buffer (for motion compensation) * @param width Frame width in pixels * @param height Frame height in pixels - * @param quality Quantization quality level (0-7) + * @param quality Quantisation quality level (0-7) * @param frameCounter Frame counter for temporal patterns */ fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, @@ -2581,7 +2520,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { var readPtr = blockDataPtr - // decide increment "direction" by the sign of the pointer + // decide increment "direction" by the sign of the pointer val prevAddrIncVec = if (prevRGBAddr >= 0) 1 else -1 val thisAddrIncVec = if (currentRGBAddr >= 0) 1 else -1 @@ -2594,13 +2533,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { val blockModes = IntArray(blocksX * blocksY) val motionVectors = Array(blocksX * blocksY) { intArrayOf(0, 0) } val rateControlFactors = FloatArray(blocksX * blocksY) - + // Collect all blocks first var tempReadPtr = readPtr for (by in 0 until blocksY) { for (bx in 0 until blocksX) { val blockIndex = by * blocksX + bx - + // Read TEV block header to get rate control factor val headerBuffer = ByteArray(11) val (memspace, offset) = vm.translateAddr(tempReadPtr) @@ -2613,7 +2552,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { headerBuffer[i] = vm.peek(tempReadPtr + i) ?: 0 } } - + val mode = headerBuffer[0].toUint() val mvX = ((headerBuffer[1].toUint()) or ((headerBuffer[2].toUint()) shl 8)).toShort().toInt() val mvY = ((headerBuffer[3].toUint()) or ((headerBuffer[4].toUint()) shl 8)).toShort().toInt() @@ -2622,20 +2561,20 @@ class GraphicsJSR223Delegate(private val vm: VM) { ((headerBuffer[7].toUint()) shl 16) or ((headerBuffer[8].toUint()) shl 24)) tempReadPtr += 11 // Skip header - + blockModes[blockIndex] = mode.toInt() motionVectors[blockIndex] = intArrayOf(mvX, mvY) rateControlFactors[blockIndex] = rateControlFactor - + // TEV format always has 768 bytes of DCT coefficients per block (fixed size) val coeffShortArray = ShortArray(384) // 256 Y + 64 Co + 64 Cg = 384 shorts - + // Use bulk read like the original implementation vm.bulkPeekShort(tempReadPtr.toInt(), coeffShortArray, 768) tempReadPtr += 768 - + when (mode.toInt()) { - 0x01, 0x02 -> { // INTRA or INTER - store raw coefficients for boundary optimization + 0x01, 0x02 -> { // INTRA or INTER - store raw coefficients for boundary optimisation yBlocks[blockIndex] = coeffShortArray.sliceArray(0 until 256) coBlocks[blockIndex] = coeffShortArray.sliceArray(256 until 320) cgBlocks[blockIndex] = coeffShortArray.sliceArray(320 until 384) @@ -2644,9 +2583,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } - - // PASS 2: Apply proper knusperli boundary optimization (Google's algorithm) - val (optimizedYBlocks, optimizedCoBlocks, optimizedCgBlocks) = applyKnusperliOptimization( + + // PASS 2: Apply proper knusperli boundary optimisation (Google's algorithm) + val (optimisedYBlocks, optimisedCoBlocks, optimisedCgBlocks) = tevApplyKnusperliOptimisation( yBlocks, coBlocks, cgBlocks, if (tevVersion == 3) QUANT_TABLE_Y else QUANT_TABLE_Y, if (tevVersion == 3) QUANT_TABLE_C else QUANT_TABLE_C, @@ -2654,46 +2593,46 @@ class GraphicsJSR223Delegate(private val vm: VM) { qY, qCo, qCg, rateControlFactors, blocksX, blocksY ) - - // PASS 3: Convert optimized blocks to RGB and output + + // PASS 3: Convert optimised blocks to RGB and output for (by in 0 until blocksY) { for (bx in 0 until blocksX) { val blockIndex = by * blocksX + bx val startX = bx * 16 val startY = by * 16 - + when (blockModes[blockIndex]) { 0x00 -> { // SKIP - copy from previous frame - handleSkipBlockTwoPass(startX, startY, currentRGBAddr, prevRGBAddr, width, height, thisAddrIncVec, prevAddrIncVec) + tevHandleSkipBlockTwoPass(startX, startY, currentRGBAddr, prevRGBAddr, width, height, thisAddrIncVec, prevAddrIncVec) } 0x03 -> { // MOTION - copy with motion vector val mv = motionVectors[blockIndex] - handleMotionBlockTwoPass(startX, startY, mv[0], mv[1], currentRGBAddr, prevRGBAddr, width, height, thisAddrIncVec, prevAddrIncVec, debugMotionVectors) + tevHandleMotionBlockTwoPass(startX, startY, mv[0], mv[1], currentRGBAddr, prevRGBAddr, width, height, thisAddrIncVec, prevAddrIncVec, debugMotionVectors) } - 0x01, 0x02 -> { // INTRA/INTER - use optimized DCT blocks - val yBlock = optimizedYBlocks[blockIndex] - val coBlock = optimizedCoBlocks[blockIndex] - val cgBlock = optimizedCgBlocks[blockIndex] - + 0x01, 0x02 -> { // INTRA/INTER - use optimised DCT blocks + val yBlock = optimisedYBlocks[blockIndex] + val coBlock = optimisedCoBlocks[blockIndex] + val cgBlock = optimisedCgBlocks[blockIndex] + if (yBlock != null && coBlock != null && cgBlock != null) { // Skip INTER motion compensation for now (debugging) // TODO: Implement proper motion compensation for two-pass mode // if (blockModes[blockIndex] == 0x02) { // val mv = motionVectors[blockIndex] - // applyMotionCompensationTwoPass(yBlock, coBlock, cgBlock, startX, startY, mv[0], mv[1], prevRGBAddr, width, height, prevAddrIncVec) + // tevApplyMotionCompensationTwoPass(yBlock, coBlock, cgBlock, startX, startY, mv[0], mv[1], prevRGBAddr, width, height, prevAddrIncVec) // } - - // Use IDCT on knusperli-optimized coefficients (coefficients are already optimally dequantized) - val yPixels = tevIdct16x16_fromOptimizedCoeffs(yBlock) - val coPixels = tevIdct8x8_fromOptimizedCoeffs(coBlock) - val cgPixels = tevIdct8x8_fromOptimizedCoeffs(cgBlock) - + + // Use IDCT on knusperli-optimised coefficients (coefficients are already optimally dequantised) + val yPixels = tevIdct16x16_fromOptimisedCoeffs(yBlock) + val coPixels = tevIdct8x8_fromOptimisedCoeffs(coBlock) + val cgPixels = tevIdct8x8_fromOptimisedCoeffs(cgBlock) + val rgbData = if (tevVersion == 3) { tevIctcpToRGB(yPixels, coPixels, cgPixels) } else { tevYcocgToRGB(yPixels, coPixels, cgPixels) } - + bulkWriteRGB(currentRGBAddr, rgbData, width, height, startX, startY, 16, 16, thisAddrIncVec) } } @@ -2730,10 +2669,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { when (mode) { - 0x00 -> { // TEV_MODE_SKIP - copy RGB from previous frame (optimized with memcpy) + 0x00 -> { // TEV_MODE_SKIP - copy RGB from previous frame (optimised with memcpy) // Check if we can copy the entire block at once (no clipping) if (startX + 16 <= width && startY + 16 <= height) { - // Optimized case: copy entire 16x16 block with row-by-row memcpy + // Optimised case: copy entire 16x16 block with row-by-row memcpy for (dy in 0 until 16) { val srcRowOffset = ((startY + dy).toLong() * width + startX) * 3 val dstRowOffset = srcRowOffset @@ -2744,7 +2683,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { ) } } else { - // Optimized fallback using row-by-row copying for boundary blocks + // Optimised fallback using row-by-row copying for boundary blocks for (dy in 0 until 16) { val y = startY + dy if (y < height) { @@ -2771,7 +2710,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { readPtr += 768 } - 0x03 -> { // TEV_MODE_MOTION - motion compensation with RGB (optimized with memcpy) + 0x03 -> { // TEV_MODE_MOTION - motion compensation with RGB (optimised with memcpy) if (debugMotionVectors) { // Debug mode: use original pixel-by-pixel for motion vector visualization for (dy in 0 until 16) { @@ -2785,7 +2724,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val dstPixelOffset = y.toLong() * width + x val dstRgbOffset = dstPixelOffset * 3 - // Debug: Color INTER blocks by motion vector magnitude + // Debug: Colour INTER blocks by motion vector magnitude val mvMagnitude = kotlin.math.sqrt((mvX * mvX + mvY * mvY).toDouble()).toInt() val intensity = (mvMagnitude * 8).coerceIn(0, 255) // Scale for visibility @@ -2796,7 +2735,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } else { - // Optimized motion compensation + // Optimised motion compensation val refStartX = startX + mvX val refStartY = startY + mvY @@ -2804,7 +2743,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { if (startX + 16 <= width && startY + 16 <= height && refStartX >= 0 && refStartY >= 0 && refStartX + 16 <= width && refStartY + 16 <= height) { - // Optimized case: copy entire 16x16 block with row-by-row memcpy + // Optimised case: copy entire 16x16 block with row-by-row memcpy for (dy in 0 until 16) { val srcRowOffset = ((refStartY + dy).toLong() * width + refStartX) * 3 val dstRowOffset = ((startY + dy).toLong() * width + startX) * 3 @@ -2864,8 +2803,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { } 0x01 -> { // TEV_MODE_INTRA - Full YCoCg-R DCT decode (no motion compensation) - // Regular lossy mode: quantized int16 coefficients - // Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes + // Regular lossy mode: quantised int16 coefficients + // Optimised bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768) readPtr += 768 @@ -2889,7 +2828,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { 0x02 -> { // TEV_MODE_INTER - Motion compensation + residual DCT // Step 1: Read residual DCT coefficients - // Optimized bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes + // Optimised bulk reading of all DCT coefficients: Y(256×2) + Co(64×2) + Cg(64×2) = 768 bytes val coeffShortArray = ShortArray(384) // Total coefficients: 256 + 64 + 64 = 384 shorts vm.bulkPeekShort(readPtr.toInt(), coeffShortArray, 768) readPtr += 768 @@ -3030,7 +2969,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } else { - // Optimized bulk write for normal operation + // Optimised bulk write for normal operation bulkWriteRGB(currentRGBAddr, finalRgb, width, height, startX, startY, 16, 16, thisAddrIncVec) } } @@ -3058,7 +2997,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } - + // Apply enhanced deblocking filter if enabled to reduce blocking artifacts if (enableDeblocking) { tevDeblockingFilterEnhanced(currentRGBAddr, width, height) @@ -3099,104 +3038,104 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Helper functions for motion compensation and block handling in two-pass mode - private fun handleSkipBlockTwoPass(startX: Int, startY: Int, currentRGBAddr: Long, prevRGBAddr: Long, - width: Int, height: Int, thisAddrIncVec: Int, prevAddrIncVec: Int) { + private fun tevHandleSkipBlockTwoPass(startX: Int, startY: Int, currentRGBAddr: Long, prevRGBAddr: Long, + width: Int, height: Int, thisAddrIncVec: Int, prevAddrIncVec: Int) { // Copy 16x16 block from previous frame for (py in 0 until 16) { val y = startY + py if (y >= height) break - + for (px in 0 until 16) { val x = startX + px if (x >= width) break - + val offset = (y * width + x) * 3 val prevR = vm.peek(prevRGBAddr + offset * prevAddrIncVec) ?: 0 val prevG = vm.peek(prevRGBAddr + (offset + 1) * prevAddrIncVec) ?: 0 val prevB = vm.peek(prevRGBAddr + (offset + 2) * prevAddrIncVec) ?: 0 - + vm.poke(currentRGBAddr + offset * thisAddrIncVec, prevR) vm.poke(currentRGBAddr + (offset + 1) * thisAddrIncVec, prevG) vm.poke(currentRGBAddr + (offset + 2) * thisAddrIncVec, prevB) } } } - - private fun handleMotionBlockTwoPass(startX: Int, startY: Int, mvX: Int, mvY: Int, - currentRGBAddr: Long, prevRGBAddr: Long, - width: Int, height: Int, thisAddrIncVec: Int, prevAddrIncVec: Int, - debugMotionVectors: Boolean) { + + private fun tevHandleMotionBlockTwoPass(startX: Int, startY: Int, mvX: Int, mvY: Int, + currentRGBAddr: Long, prevRGBAddr: Long, + width: Int, height: Int, thisAddrIncVec: Int, prevAddrIncVec: Int, + debugMotionVectors: Boolean) { // Copy 16x16 block with motion compensation for (py in 0 until 16) { val y = startY + py if (y >= height) break - + for (px in 0 until 16) { val x = startX + px if (x >= width) break - + val srcX = (x + mvX).coerceIn(0, width - 1) val srcY = (y + mvY).coerceIn(0, height - 1) - + val srcOffset = (srcY * width + srcX) * 3 val dstOffset = (y * width + x) * 3 - + val r = vm.peek(prevRGBAddr + srcOffset * prevAddrIncVec) ?: 0 val g = vm.peek(prevRGBAddr + (srcOffset + 1) * prevAddrIncVec) ?: 0 val b = vm.peek(prevRGBAddr + (srcOffset + 2) * prevAddrIncVec) ?: 0 - + vm.poke(currentRGBAddr + dstOffset * thisAddrIncVec, r) vm.poke(currentRGBAddr + (dstOffset + 1) * thisAddrIncVec, g) vm.poke(currentRGBAddr + (dstOffset + 2) * thisAddrIncVec, b) } } } - - /*private fun applyMotionCompensationTwoPass(yBlock: ShortArray, coBlock: ShortArray, cgBlock: ShortArray, + + /*private fun tevApplyMotionCompensationTwoPass(yBlock: ShortArray, coBlock: ShortArray, cgBlock: ShortArray, startX: Int, startY: Int, mvX: Int, mvY: Int, prevRGBAddr: Long, width: Int, height: Int, prevAddrIncVec: Int) { // For INTER blocks, add residual to motion-compensated reference // This is a simplified version - full implementation would extract reference block and add residuals - + // Apply motion compensation by reading reference pixels and converting to YCoCg-R coefficients for (py in 0 until 16) { val y = startY + py if (y >= height) break - + for (px in 0 until 16) { val x = startX + px if (x >= width) break - + val srcX = (x + mvX).coerceIn(0, width - 1) val srcY = (y + mvY).coerceIn(0, height - 1) - + val srcOffset = (srcY * width + srcX) * 3 val r = vm.peek(prevRGBAddr + srcOffset * prevAddrIncVec)?.toInt() ?: 0 val g = vm.peek(prevRGBAddr + (srcOffset + 1) * prevAddrIncVec)?.toInt() ?: 0 val b = vm.peek(prevRGBAddr + (srcOffset + 2) * prevAddrIncVec)?.toInt() ?: 0 - + // Convert reference RGB to YCoCg-R and add residual val co = r - b - val tmp = b + (co / 2) + val tmp = b + (co / 2) val cg = g - tmp val refY = tmp + (cg / 2) - + val yIdx = py * 16 + px if (yIdx < yBlock.size) { yBlock[yIdx] += refY.toFloat() } - + val cIdx = (py / 2) * 8 + (px / 2) if (cIdx < coBlock.size) { - coBlock[cIdx] += co.toFloat() + coBlock[cIdx] += co.toFloat() cgBlock[cIdx] += cg.toFloat() } } } }*/ - // Proper knusperli boundary-aware DCT optimization based on Google's algorithm - private fun applyKnusperliOptimization( + // Proper knusperli boundary-aware DCT optimisation based on Google's algorithm + private fun tevApplyKnusperliOptimisation( yBlocks: Array, coBlocks: Array, cgBlocks: Array, quantTableY: IntArray, quantTableCo: IntArray, quantTableCg: IntArray, qY: Int, qCo: Int, qCg: Int, rateControlFactors: FloatArray, @@ -3207,19 +3146,19 @@ class GraphicsJSR223Delegate(private val vm: VM) { val kAlphaSqrt2 = intArrayOf(1024, 1448, 1448, 1448, 1448, 1448, 1448, 1448) val kHalfSqrt2 = 724 // sqrt(2)/2 in 10-bit fixed-point - // Convert to dequantized FloatArrays and apply knusperli optimization - val optimizedYBlocks = convertAndOptimize16x16Blocks(yBlocks, quantTableY, qY, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2) - val optimizedCoBlocks = convertAndOptimize8x8Blocks(coBlocks, quantTableCo, qCo, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2) - val optimizedCgBlocks = convertAndOptimize8x8Blocks(cgBlocks, quantTableCg, qCg, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2) + // Convert to dequantised FloatArrays and apply knusperli optimisation + val optimisedYBlocks = tevConvertAndOptimise16x16Blocks(yBlocks, quantTableY, qY, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2) + val optimisedCoBlocks = tevConvertAndOptimise8x8Blocks(coBlocks, quantTableCo, qCo, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2) + val optimisedCgBlocks = tevConvertAndOptimise8x8Blocks(cgBlocks, quantTableCg, qCg, rateControlFactors, blocksX, blocksY, kLinearGradient, kAlphaSqrt2, kHalfSqrt2) - return Triple(optimizedYBlocks, optimizedCoBlocks, optimizedCgBlocks) + return Triple(optimisedYBlocks, optimisedCoBlocks, optimisedCgBlocks) } - // IDCT functions for knusperli-optimized coefficients (coefficients are already dequantized) - private fun tevIdct16x16_fromOptimizedCoeffs(coeffs: FloatArray): IntArray { + // IDCT functions for knusperli-optimised coefficients (coefficients are already dequantised) + private fun tevIdct16x16_fromOptimisedCoeffs(coeffs: FloatArray): IntArray { val result = IntArray(256) // 16x16 - - // Apply 2D IDCT directly to optimized coefficients (fix u/v indexing) + + // Apply 2D IDCT directly to optimised coefficients (fix u/v indexing) for (y in 0 until 16) { for (x in 0 until 16) { var sum = 0.0 @@ -3238,11 +3177,11 @@ class GraphicsJSR223Delegate(private val vm: VM) { } return result } - - private fun tevIdct8x8_fromOptimizedCoeffs(coeffs: FloatArray): IntArray { + + private fun tevIdct8x8_fromOptimisedCoeffs(coeffs: FloatArray): IntArray { val result = IntArray(64) // 8x8 - - // Apply 2D IDCT directly to optimized coefficients (fix u/v indexing) + + // Apply 2D IDCT directly to optimised coefficients (fix u/v indexing) for (y in 0 until 8) { for (x in 0 until 8) { var sum = 0.0 @@ -3262,31 +3201,31 @@ class GraphicsJSR223Delegate(private val vm: VM) { } return result } - - // Convert and optimize functions for proper knusperli implementation + + // Convert and optimise functions for proper knusperli implementation // Direct 16x16 block processing for Y blocks (no subdivision needed) - private fun convertAndOptimize16x16Blocks( + private fun tevConvertAndOptimise16x16Blocks( blocks: Array, quantTable: IntArray, qScale: Int, rateControlFactors: FloatArray, blocksX: Int, blocksY: Int, kLinearGradient: IntArray, kAlphaSqrt2: IntArray, kHalfSqrt2: Int ): Array { val result = Array(blocks.size) { null } - + // Extended constants for 16x16 blocks (based on Google's 8x8 pattern) val kLinearGradient16 = intArrayOf(318, -285, 81, -32, 17, -9, 5, -2, 1, 0, 0, 0, 0, 0, 0, 0) val kAlphaSqrt2_16 = intArrayOf(1024, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448) - - // Apply knusperli boundary optimization to 16x16 blocks - processBlocksWithKnusperli16x16(blocks, quantTable, qScale, rateControlFactors, + + // Apply knusperli boundary optimisation to 16x16 blocks + tevProcessBlocksWithKnusperli16x16(blocks, quantTable, qScale, rateControlFactors, blocksX, blocksY, kLinearGradient16, kAlphaSqrt2_16, kHalfSqrt2) - - // Convert optimized ShortArray blocks to FloatArray (dequantized) + + // Convert optimised ShortArray blocks to FloatArray (dequantised) for (blockIndex in 0 until blocks.size) { val block = blocks[blockIndex] if (block != null) { result[blockIndex] = FloatArray(256) // 16x16 = 256 coefficients val rateControlFactor = rateControlFactors[blockIndex] - + for (i in 0 until 256) { val coeffIdx = i.coerceIn(0, quantTable.size - 1) val quantValue = if (i == 0) 1.0f else { @@ -3296,32 +3235,32 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } - + return result } - - // Optimized 16x16 version of Knusperli processing for Y blocks - private fun processBlocksWithKnusperli16x16( + + // Optimised 16x16 version of Knusperli processing for Y blocks + private fun tevProcessBlocksWithKnusperli16x16( blocks: Array, quantTable: IntArray, qScale: Int, rateControlFactors: FloatArray, blocksX: Int, blocksY: Int, kLinearGradient16: IntArray, kAlphaSqrt2_16: IntArray, kHalfSqrt2: Int ) { val coeffsSize = 256 // 16x16 = 256 val numBlocks = blocksX * blocksY - - // OPTIMIZATION 1: Pre-compute quantization values to avoid repeated calculations + + // OPTIMIZATION 1: Pre-compute quantisation values to avoid repeated calculations val quantValues = Array(numBlocks) { IntArray(coeffsSize) } val quantHalfValues = Array(numBlocks) { IntArray(coeffsSize) } - + for (blockIndex in 0 until numBlocks) { val block = blocks[blockIndex] if (block != null) { val rateControlFactor = rateControlFactors[blockIndex] val qualityMult = jpeg_quality_to_mult(qScale * rateControlFactor) - + quantValues[blockIndex][0] = 1 // DC is lossless - quantHalfValues[blockIndex][0] = 0 // DC has no quantization interval - + quantHalfValues[blockIndex][0] = 0 // DC has no quantisation interval + for (i in 1 until coeffsSize) { val coeffIdx = i.coerceIn(0, quantTable.size - 1) val quant = (quantTable[coeffIdx] * qualityMult).coerceIn(1f, 255f).toInt() @@ -3330,49 +3269,49 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } - + // OPTIMIZATION 2: Use single-allocation arrays with block-stride access val blocksMid = Array(numBlocks) { IntArray(coeffsSize) } val blocksOff = Array(numBlocks) { LongArray(coeffsSize) } // Keep Long for accumulation - - // Step 1: Setup dequantized values and initialize adjustments (BULK OPTIMIZED) + + // Step 1: Setup dequantised values and initialize adjustments (BULK OPTIMIZED) for (blockIndex in 0 until numBlocks) { val block = blocks[blockIndex] if (block != null) { val mid = blocksMid[blockIndex] val off = blocksOff[blockIndex] val quantVals = quantValues[blockIndex] - - // OPTIMIZATION 9: Bulk dequantization using vectorized operations - bulkDequantizeCoefficients(block, mid, quantVals, coeffsSize) - + + // OPTIMIZATION 9: Bulk dequantisation using vectorized operations + tevBulkDequantiseCoefficients(block, mid, quantVals, coeffsSize) + // OPTIMIZATION 10: Bulk zero initialization of adjustments off.fill(0L) } } - + // OPTIMIZATION 7: Combined boundary analysis loops for better cache locality // Process horizontal and vertical boundaries in interleaved pattern for (by in 0 until blocksY) { for (bx in 0 until blocksX) { val currentIndex = by * blocksX + bx - + // Horizontal boundary (if not rightmost column) if (bx < blocksX - 1) { val rightIndex = currentIndex + 1 if (blocks[currentIndex] != null && blocks[rightIndex] != null) { - analyzeHorizontalBoundary16x16( - currentIndex, rightIndex, blocksMid, blocksOff, + tevAnalyseHorizontalBoundary16x16( + currentIndex, rightIndex, blocksMid, blocksOff, kLinearGradient16, kAlphaSqrt2_16 ) } } - + // Vertical boundary (if not bottom row) if (by < blocksY - 1) { val bottomIndex = currentIndex + blocksX if (blocks[currentIndex] != null && blocks[bottomIndex] != null) { - analyzeVerticalBoundary16x16( + tevAnalyseVerticalBoundary16x16( currentIndex, bottomIndex, blocksMid, blocksOff, kLinearGradient16, kAlphaSqrt2_16 ) @@ -3380,13 +3319,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } - - // Step 4: Apply corrections and clamp to quantization intervals (BULK OPTIMIZED) + + // Step 4: Apply corrections and clamp to quantisation intervals (BULK OPTIMIZED) for (blockIndex in 0 until numBlocks) { val block = blocks[blockIndex] if (block != null) { - // OPTIMIZATION 11: Bulk apply corrections and quantization clamping - bulkApplyCorrectionsAndClamp( + // OPTIMIZATION 11: Bulk apply corrections and quantisation clamping + tevBulkApplyCorrectionsAndClamp( block, blocksMid[blockIndex], blocksOff[blockIndex], quantValues[blockIndex], quantHalfValues[blockIndex], kHalfSqrt2, coeffsSize @@ -3394,20 +3333,20 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } - + // BULK MEMORY ACCESS HELPER FUNCTIONS FOR KNUSPERLI - + /** - * OPTIMIZATION 9: Bulk dequantization using vectorized operations - * Performs coefficient * quantization in optimized chunks + * OPTIMIZATION 9: Bulk dequantisation using vectorized operations + * Performs coefficient * quantisation in optimised chunks */ - private fun bulkDequantizeCoefficients( + private fun tevBulkDequantiseCoefficients( coeffs: ShortArray, result: IntArray, quantVals: IntArray, size: Int ) { // Process in chunks of 16 for better vectorization (CPU can process multiple values per instruction) var i = 0 val chunks = size and 0xFFFFFFF0.toInt() // Round down to nearest 16 - + // Bulk process 16 coefficients at a time for SIMD-friendly operations while (i < chunks) { // Manual loop unrolling for better performance @@ -3429,26 +3368,26 @@ class GraphicsJSR223Delegate(private val vm: VM) { result[i + 15] = coeffs[i + 15].toInt() * quantVals[i + 15] i += 16 } - + // Handle remaining coefficients while (i < size) { result[i] = coeffs[i].toInt() * quantVals[i] i++ } } - + /** - * OPTIMIZATION 11: Bulk apply corrections and quantization clamping + * OPTIMIZATION 11: Bulk apply corrections and quantisation clamping * Vectorized correction application with proper bounds checking */ - private fun bulkApplyCorrectionsAndClamp( + private fun tevBulkApplyCorrectionsAndClamp( block: ShortArray, mid: IntArray, off: LongArray, quantVals: IntArray, quantHalf: IntArray, kHalfSqrt2: Int, size: Int ) { var i = 0 val chunks = size and 0xFFFFFFF0.toInt() // Process in chunks of 16 - + // Bulk process corrections in chunks for better CPU pipeline utilization while (i < chunks) { // Apply corrections with sqrt(2)/2 weighting - bulk operations @@ -3460,7 +3399,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val corr5 = ((off[i + 5] * kHalfSqrt2) shr 31).toInt() val corr6 = ((off[i + 6] * kHalfSqrt2) shr 31).toInt() val corr7 = ((off[i + 7] * kHalfSqrt2) shr 31).toInt() - + mid[i] += corr0 mid[i + 1] += corr1 mid[i + 2] += corr2 @@ -3469,8 +3408,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { mid[i + 5] += corr5 mid[i + 6] += corr6 mid[i + 7] += corr7 - - // Apply quantization interval clamping - bulk operations + + // Apply quantisation interval clamping - bulk operations val orig0 = block[i].toInt() * quantVals[i] val orig1 = block[i + 1].toInt() * quantVals[i + 1] val orig2 = block[i + 2].toInt() * quantVals[i + 2] @@ -3479,7 +3418,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val orig5 = block[i + 5].toInt() * quantVals[i + 5] val orig6 = block[i + 6].toInt() * quantVals[i + 6] val orig7 = block[i + 7].toInt() * quantVals[i + 7] - + mid[i] = mid[i].coerceIn(orig0 - quantHalf[i], orig0 + quantHalf[i]) mid[i + 1] = mid[i + 1].coerceIn(orig1 - quantHalf[i + 1], orig1 + quantHalf[i + 1]) mid[i + 2] = mid[i + 2].coerceIn(orig2 - quantHalf[i + 2], orig2 + quantHalf[i + 2]) @@ -3488,8 +3427,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { mid[i + 5] = mid[i + 5].coerceIn(orig5 - quantHalf[i + 5], orig5 + quantHalf[i + 5]) mid[i + 6] = mid[i + 6].coerceIn(orig6 - quantHalf[i + 6], orig6 + quantHalf[i + 6]) mid[i + 7] = mid[i + 7].coerceIn(orig7 - quantHalf[i + 7], orig7 + quantHalf[i + 7]) - - // Convert back to quantized coefficients - bulk operations + + // Convert back to quantised coefficients - bulk operations val quantMax = Short.MAX_VALUE.toInt() val quantMin = Short.MIN_VALUE.toInt() block[i] = (mid[i] / quantVals[i]).coerceIn(quantMin, quantMax).toShort() @@ -3500,24 +3439,24 @@ class GraphicsJSR223Delegate(private val vm: VM) { block[i + 5] = (mid[i + 5] / quantVals[i + 5]).coerceIn(quantMin, quantMax).toShort() block[i + 6] = (mid[i + 6] / quantVals[i + 6]).coerceIn(quantMin, quantMax).toShort() block[i + 7] = (mid[i + 7] / quantVals[i + 7]).coerceIn(quantMin, quantMax).toShort() - + i += 8 // Process 8 at a time for the remaining corrections } - + // Handle remaining coefficients (usually 0-15 remaining for 256-coefficient blocks) while (i < size) { mid[i] += ((off[i] * kHalfSqrt2) shr 31).toInt() - + val originalValue = block[i].toInt() * quantVals[i] mid[i] = mid[i].coerceIn(originalValue - quantHalf[i], originalValue + quantHalf[i]) - + block[i] = (mid[i] / quantVals[i]).coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()).toShort() i++ } } - - // OPTIMIZED 16x16 horizontal boundary analysis - private fun analyzeHorizontalBoundary16x16( + + // OPTIMIZED 16x16 horizontal boundary analysis + private fun tevAnalyseHorizontalBoundary16x16( leftBlockIndex: Int, rightBlockIndex: Int, blocksMid: Array, blocksOff: Array, kLinearGradient16: IntArray, kAlphaSqrt2_16: IntArray @@ -3526,13 +3465,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { val rightMid = blocksMid[rightBlockIndex] val leftOff = blocksOff[leftBlockIndex] val rightOff = blocksOff[rightBlockIndex] - + // OPTIMIZATION 4: Process multiple frequencies in single loop for better cache locality for (v in 0 until 8) { // Only low-to-mid frequencies var deltaV = 0L var hfPenalty = 0L val vOffset = v * 16 - + // First pass: Calculate boundary discontinuity for (u in 0 until 16) { val idx = vOffset + u @@ -3540,17 +3479,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { val sign = if (u and 1 != 0) -1 else 1 val gi = leftMid[idx] val gj = rightMid[idx] - + deltaV += alpha * (gj - sign * gi) hfPenalty += (u * u) * (gi * gi + gj * gj) } - - // OPTIMIZATION 8: Early exit for very small adjustments + + // OPTIMIZATION 8: Early exit for very small adjustments if (kotlin.math.abs(deltaV) < 100) continue - + // OPTIMIZATION 5: Apply high-frequency damping once per frequency band if (hfPenalty > 1600) deltaV /= 2 - + // Second pass: Apply corrections (BULK OPTIMIZED with unrolling) val correction = deltaV // Bulk apply corrections for 16 coefficients - manually unrolled for performance @@ -3588,9 +3527,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { rightOff[vOffset + 15] -= correction * kLinearGradient16[15] } } - - // OPTIMIZED 16x16 vertical boundary analysis - private fun analyzeVerticalBoundary16x16( + + // OPTIMIZED 16x16 vertical boundary analysis + private fun tevAnalyseVerticalBoundary16x16( topBlockIndex: Int, bottomBlockIndex: Int, blocksMid: Array, blocksOff: Array, kLinearGradient16: IntArray, kAlphaSqrt2_16: IntArray @@ -3599,12 +3538,12 @@ class GraphicsJSR223Delegate(private val vm: VM) { val bottomMid = blocksMid[bottomBlockIndex] val topOff = blocksOff[topBlockIndex] val bottomOff = blocksOff[bottomBlockIndex] - - // OPTIMIZATION 6: Optimized vertical analysis with better cache access pattern + + // OPTIMIZATION 6: Optimised vertical analysis with better cache access pattern for (u in 0 until 16) { // Only low-to-mid frequencies var deltaU = 0L var hfPenalty = 0L - + // First pass: Calculate boundary discontinuity for (v in 0 until 16) { val idx = v * 16 + u @@ -3612,17 +3551,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { val sign = if (v and 1 != 0) -1 else 1 val gi = topMid[idx] val gj = bottomMid[idx] - + deltaU += alpha * (gj - sign * gi) hfPenalty += (v * v) * (gi * gi + gj * gj) } - + // Early exit for very small adjustments if (kotlin.math.abs(deltaU) < 100) continue - + // Apply high-frequency damping once per frequency band if (hfPenalty > 1600) deltaU /= 2 - + // Second pass: Apply corrections (BULK OPTIMIZED vertical) val correction = deltaU // Bulk apply corrections for 16 vertical coefficients - manually unrolled @@ -3661,129 +3600,83 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - private fun convertAndDoNothing( - blocks: Array, quantTable: IntArray, qScale: Int, rateControlFactors: FloatArray, - blocksX: Int, blocksY: Int, - kLinearGradient: IntArray, kAlphaSqrt2: IntArray, kHalfSqrt2: Int - ): Array { - val coeffsSize = 16 * 16 - val numBlocks = blocksX * blocksY - - val blocksMid = Array(numBlocks) { IntArray(coeffsSize) } - - for (blockIndex in 0 until numBlocks) { - val block = blocks[blockIndex] - if (block != null) { - val rateControlFactor = rateControlFactors[blockIndex] - for (i in 0 until coeffsSize) { - val quantIdx = i.coerceIn(0, quantTable.size - 1) - - if (i == 0) { - // DC coefficient: lossless (no quantization) - val dcValue = block[i].toInt() - blocksMid[blockIndex][i] = dcValue - } else { - // AC coefficients: use quantization intervals - val quant = (quantTable[quantIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).coerceIn(1f, 255f).toInt() - - // Standard dequantized value (midpoint) - blocksMid[blockIndex][i] = block[i].toInt() * quant - } - } - } - } - - val result = Array(blocks.size) { null } - for (blockIndex in 0 until numBlocks) { - val block = blocks[blockIndex] - if (block != null) { - result[blockIndex] = FloatArray(coeffsSize) { i -> - blocksMid[blockIndex][i].toFloat() - } - } - } - - return result - - } - - private fun convertAndOptimize8x8Blocks( + private fun tevConvertAndOptimise8x8Blocks( blocks: Array, quantTable: IntArray, qScale: Int, rateControlFactors: FloatArray, blocksX: Int, blocksY: Int, kLinearGradient: IntArray, kAlphaSqrt2: IntArray, kHalfSqrt2: Int ): Array { val coeffsSize = 64 val numBlocks = blocksX * blocksY - - // Step 1: Setup quantization intervals for all blocks (using integers like Google's code) + + // Step 1: Setup quantisation intervals for all blocks (using integers like Google's code) val blocksMid = Array(numBlocks) { IntArray(coeffsSize) } val blocksMin = Array(numBlocks) { IntArray(coeffsSize) } val blocksMax = Array(numBlocks) { IntArray(coeffsSize) } val blocksOff = Array(numBlocks) { LongArray(coeffsSize) } // Long for accumulation - + for (blockIndex in 0 until numBlocks) { val block = blocks[blockIndex] if (block != null) { val rateControlFactor = rateControlFactors[blockIndex] for (i in 0 until coeffsSize) { val quantIdx = i.coerceIn(0, quantTable.size - 1) - + if (i == 0) { - // DC coefficient: lossless (no quantization) + // DC coefficient: lossless (no quantisation) val dcValue = block[i].toInt() blocksMid[blockIndex][i] = dcValue blocksMin[blockIndex][i] = dcValue // No interval for DC blocksMax[blockIndex][i] = dcValue } else { - // AC coefficients: use quantization intervals + // AC coefficients: use quantisation intervals val quant = (quantTable[quantIdx] * jpeg_quality_to_mult(qScale * rateControlFactor)).coerceIn(1f, 255f).toInt() - - // Standard dequantized value (midpoint) + + // Standard dequantised value (midpoint) blocksMid[blockIndex][i] = block[i].toInt() * quant - - // Quantization interval bounds + + // Quantisation interval bounds val halfQuant = quant / 2 blocksMin[blockIndex][i] = blocksMid[blockIndex][i] - halfQuant blocksMax[blockIndex][i] = blocksMid[blockIndex][i] + halfQuant } - + // Initialize adjustment accumulator blocksOff[blockIndex][i] = 0L } } } - + // Step 2: Horizontal continuity analysis for (by in 0 until blocksY) { for (bx in 0 until blocksX - 1) { val leftBlockIndex = by * blocksX + bx val rightBlockIndex = by * blocksX + (bx + 1) - + if (blocks[leftBlockIndex] != null && blocks[rightBlockIndex] != null) { - analyzeHorizontalBoundary( - leftBlockIndex, rightBlockIndex, blocksMid, blocksOff, + tevAnalyseHorizontalBoundary8x8( + leftBlockIndex, rightBlockIndex, blocksMid, blocksOff, kLinearGradient, kAlphaSqrt2 ) } } } - - // Step 3: Vertical continuity analysis + + // Step 3: Vertical continuity analysis for (by in 0 until blocksY - 1) { for (bx in 0 until blocksX) { val topBlockIndex = by * blocksX + bx val bottomBlockIndex = (by + 1) * blocksX + bx - + if (blocks[topBlockIndex] != null && blocks[bottomBlockIndex] != null) { - analyzeVerticalBoundary( + tevAnalyseVerticalBoundary8x8( topBlockIndex, bottomBlockIndex, blocksMid, blocksOff, kLinearGradient, kAlphaSqrt2 ) } } } - - // Step 4: Apply corrections and return optimized dequantized coefficients + + // Step 4: Apply corrections and return optimised dequantised coefficients val result = Array(blocks.size) { null } for (blockIndex in 0 until numBlocks) { val block = blocks[blockIndex] @@ -3791,23 +3684,23 @@ class GraphicsJSR223Delegate(private val vm: VM) { result[blockIndex] = FloatArray(coeffsSize) { i -> // Apply corrections with sqrt(2)/2 weighting (Google's exact formula with right shift) blocksMid[blockIndex][i] += ((blocksOff[blockIndex][i] * kHalfSqrt2) shr 31).toInt() - - // Clamp to quantization interval bounds - val optimizedValue = blocksMid[blockIndex][i].coerceIn( - blocksMin[blockIndex][i], + + // Clamp to quantisation interval bounds + val optimisedValue = blocksMid[blockIndex][i].coerceIn( + blocksMin[blockIndex][i], blocksMax[blockIndex][i] ) - - optimizedValue.toFloat() + + optimisedValue.toFloat() } } } - + return result } // BULK OPTIMIZED 8x8 horizontal boundary analysis for chroma channels - private fun analyzeHorizontalBoundary( + private fun tevAnalyseHorizontalBoundary8x8( leftBlockIndex: Int, rightBlockIndex: Int, blocksMid: Array, blocksOff: Array, kLinearGradient: IntArray, kAlphaSqrt2: IntArray @@ -3816,13 +3709,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { val rightMid = blocksMid[rightBlockIndex] val leftOff = blocksOff[leftBlockIndex] val rightOff = blocksOff[rightBlockIndex] - + // OPTIMIZATION 12: Process 8x8 boundaries with bulk operations (v < 4 for low-to-mid frequencies) for (v in 0 until 4) { // Only low-to-mid frequencies for 8x8 var deltaV = 0L var hfPenalty = 0L val vOffset = v * 8 - + // First pass: Calculate boundary discontinuity for (u in 0 until 8) { val idx = vOffset + u @@ -3830,17 +3723,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { val sign = if (u and 1 != 0) -1 else 1 val gi = leftMid[idx] val gj = rightMid[idx] - + deltaV += alpha * (gj - sign * gi) hfPenalty += (u * u) * (gi * gi + gj * gj) } - + // Early exit for very small adjustments if (kotlin.math.abs(deltaV) < 100) continue - + // Apply high-frequency damping once per frequency band if (hfPenalty > 400) deltaV /= 2 // 8x8 threshold - + // Second pass: Apply corrections (BULK OPTIMIZED with unrolling for 8x8) val correction = deltaV // Bulk apply corrections for 8 coefficients - manually unrolled for performance @@ -3862,9 +3755,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { rightOff[vOffset + 7] -= correction * kLinearGradient[7] } } - + // BULK OPTIMIZED 8x8 vertical boundary analysis for chroma channels - private fun analyzeVerticalBoundary( + private fun tevAnalyseVerticalBoundary8x8( topBlockIndex: Int, bottomBlockIndex: Int, blocksMid: Array, blocksOff: Array, kLinearGradient: IntArray, kAlphaSqrt2: IntArray @@ -3874,7 +3767,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val topOff = blocksOff[topBlockIndex] val bottomOff = blocksOff[bottomBlockIndex] - // OPTIMIZATION 13: Optimized vertical analysis for 8x8 with better cache access pattern + // OPTIMIZATION 13: Optimised vertical analysis for 8x8 with better cache access pattern for (u in 0 until 4) { // Only low-to-mid frequencies for 8x8 var deltaU = 0L var hfPenalty = 0L @@ -3919,4 +3812,710 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + // ================= TAV (TSVM Advanced Video) Decoder ================= + // DWT-based video codec with ICtCp colour space support + + fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int, + debugMotionVectors: Boolean = false, waveletFilter: Int = 1, + decompLevels: Int = 6, enableDeblocking: Boolean = true, + isLossless: Boolean = false, tavVersion: Int = 1) { + + var readPtr = blockDataPtr + + try { + val tilesX = (width + TILE_SIZE_X - 1) / TILE_SIZE_X // 280x224 tiles + val tilesY = (height + TILE_SIZE_Y - 1) / TILE_SIZE_Y + + // Process each tile + for (tileY in 0 until tilesY) { + for (tileX in 0 until tilesX) { + + // Read tile header (9 bytes: mode + mvX + mvY + rcf) + val mode = vm.peek(readPtr).toInt() and 0xFF + readPtr += 1 + val mvX = vm.peekShort(readPtr).toInt() + readPtr += 2 + val mvY = vm.peekShort(readPtr).toInt() + readPtr += 2 + val rcf = vm.peekFloat(readPtr) + readPtr += 4 + + // debug print: raw decompressed bytes + /*print("TAV Decode raw bytes (Frame $frameCounter, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ") + for (i in 0 until 32) { + print("${vm.peek(blockDataPtr + i).toUint().toString(16).uppercase().padStart(2, '0')} ") + } + println("...")*/ + + when (mode) { + 0x00 -> { // TAV_MODE_SKIP + // Copy 280x224 tile from previous frame to current frame + tavCopyTileRGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height) + } + 0x01 -> { // TAV_MODE_INTRA + // Decode DWT coefficients directly to RGB buffer + readPtr = tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, + width, height, qY, qCo, qCg, rcf, + waveletFilter, decompLevels, isLossless, tavVersion) + } + 0x02 -> { // TAV_MODE_DELTA + // Coefficient delta encoding for efficient P-frames + readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr, + width, height, qY, qCo, qCg, rcf, + waveletFilter, decompLevels, isLossless, tavVersion) + } + } + } + } + + } catch (e: Exception) { + println("TAV decode error: ${e.message}") + } + } + + private fun tavDecodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, + waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { + // Now reading padded coefficient tiles (344x288) instead of core tiles (280x224) + val paddedCoeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y + var ptr = readPtr + + // Read quantised DWT coefficients for padded tile Y, Co, Cg channels (344x288) + val quantisedY = ShortArray(paddedCoeffCount) + val quantisedCo = ShortArray(paddedCoeffCount) + val quantisedCg = ShortArray(paddedCoeffCount) + + // OPTIMIZATION: Bulk read all coefficient data (344x288 * 3 channels * 2 bytes = 594,432 bytes) + val totalCoeffBytes = paddedCoeffCount * 3 * 2L // 3 channels, 2 bytes per short + val coeffBuffer = ByteArray(totalCoeffBytes.toInt()) + UnsafeHelper.memcpyRaw(null, vm.usermem.ptr + ptr, coeffBuffer, UnsafeHelper.getArrayOffset(coeffBuffer), totalCoeffBytes) + + // Convert bulk data to coefficient arrays + var bufferOffset = 0 + for (i in 0 until paddedCoeffCount) { + quantisedY[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() + bufferOffset += 2 + } + for (i in 0 until paddedCoeffCount) { + quantisedCo[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() + bufferOffset += 2 + } + for (i in 0 until paddedCoeffCount) { + quantisedCg[i] = (((coeffBuffer[bufferOffset + 1].toInt() and 0xFF) shl 8) or (coeffBuffer[bufferOffset].toInt() and 0xFF)).toShort() + bufferOffset += 2 + } + + ptr += totalCoeffBytes.toInt() + + // Dequantise padded coefficient tiles (344x288) + val yPaddedTile = FloatArray(paddedCoeffCount) + val coPaddedTile = FloatArray(paddedCoeffCount) + val cgPaddedTile = FloatArray(paddedCoeffCount) + + for (i in 0 until paddedCoeffCount) { + yPaddedTile[i] = quantisedY[i] * qY * rcf + coPaddedTile[i] = quantisedCo[i] * qCo * rcf + cgPaddedTile[i] = quantisedCg[i] * qCg * rcf + } + + // Store coefficients for future delta reference (for P-frames) + val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + if (tavPreviousCoeffsY == null) { + tavPreviousCoeffsY = mutableMapOf() + tavPreviousCoeffsCo = mutableMapOf() + tavPreviousCoeffsCg = mutableMapOf() + } + tavPreviousCoeffsY!![tileIdx] = yPaddedTile.clone() + tavPreviousCoeffsCo!![tileIdx] = coPaddedTile.clone() + tavPreviousCoeffsCg!![tileIdx] = cgPaddedTile.clone() + + // Apply inverse DWT on full padded tiles (344x288) + if (isLossless) { + tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + } else { + tavApplyDWTInverseMultiLevel(yPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(coPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(cgPaddedTile, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + } + + // Extract core 280x224 pixels from reconstructed padded tiles (344x288) + val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { + val coreIdx = y * TILE_SIZE_X + x + val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) + + yTile[coreIdx] = yPaddedTile[paddedIdx] + coTile[coreIdx] = coPaddedTile[paddedIdx] + cgTile[coreIdx] = cgPaddedTile[paddedIdx] + } + } + + // Convert to RGB based on TAV version (YCoCg-R for v1, ICtCp for v2) + if (tavVersion == 2) { + tavConvertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + } else { + tavConvertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + } + + return ptr + } + + private fun tavConvertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray, + rgbAddr: Long, width: Int, height: Int) { + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y + + // OPTIMIZATION: Process pixels row by row with bulk copying for better cache locality + for (y in 0 until TILE_SIZE_Y) { + val frameY = startY + y + if (frameY >= height) break + + // Calculate valid pixel range for this row + val validStartX = maxOf(0, startX) + val validEndX = minOf(width, startX + TILE_SIZE_X) + val validPixelsInRow = validEndX - validStartX + + if (validPixelsInRow > 0) { + // Create row buffer for bulk RGB data + val rowRgbBuffer = ByteArray(validPixelsInRow * 3) + var bufferIdx = 0 + + for (x in validStartX until validEndX) { + val tileIdx = y * TILE_SIZE_X + (x - startX) + + // YCoCg-R to RGB conversion (exact inverse of encoder) + val Y = yTile[tileIdx] + val Co = coTile[tileIdx] + val Cg = cgTile[tileIdx] + + // Inverse of encoder's YCoCg-R transform: + val tmp = Y - Cg / 2.0f + val g = Cg + tmp + val b = tmp - Co / 2.0f + val r = Co + b + + rowRgbBuffer[bufferIdx++] = r.toInt().coerceIn(0, 255).toByte() + rowRgbBuffer[bufferIdx++] = g.toInt().coerceIn(0, 255).toByte() + rowRgbBuffer[bufferIdx++] = b.toInt().coerceIn(0, 255).toByte() + } + + // OPTIMIZATION: Bulk copy entire row at once + val rowStartOffset = (frameY * width + validStartX) * 3L + UnsafeHelper.memcpyRaw(rowRgbBuffer, UnsafeHelper.getArrayOffset(rowRgbBuffer), + null, vm.usermem.ptr + rgbAddr + rowStartOffset, rowRgbBuffer.size.toLong()) + } + } + } + + private fun tavConvertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray, + rgbAddr: Long, width: Int, height: Int) { + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y + + // OPTIMIZATION: Process pixels row by row with bulk copying for better cache locality + for (y in 0 until TILE_SIZE_Y) { + val frameY = startY + y + if (frameY >= height) break + + // Calculate valid pixel range for this row + val validStartX = maxOf(0, startX) + val validEndX = minOf(width, startX + TILE_SIZE_X) + val validPixelsInRow = validEndX - validStartX + + if (validPixelsInRow > 0) { + // Create row buffer for bulk RGB data + val rowRgbBuffer = ByteArray(validPixelsInRow * 3) + var bufferIdx = 0 + + for (x in validStartX until validEndX) { + val tileIdx = y * TILE_SIZE_X + (x - startX) + + // ICtCp to sRGB conversion (adapted from encoder ICtCp functions) + val I = iTile[tileIdx].toDouble() / 255.0 + val Ct = (ctTile[tileIdx].toDouble() - 127.5) / 255.0 + val Cp = (cpTile[tileIdx].toDouble() - 127.5) / 255.0 + + // ICtCp -> L'M'S' (inverse matrix) + val Lp = I + 0.015718580108730416 * Ct + 0.2095810681164055 * Cp + val Mp = I - 0.015718580108730416 * Ct - 0.20958106811640548 * Cp + val Sp = I + 1.0212710798422344 * Ct - 0.6052744909924316 * Cp + + // HLG decode: L'M'S' -> linear LMS + val L = HLG_EOTF(Lp) + val M = HLG_EOTF(Mp) + val S = HLG_EOTF(Sp) + + // LMS -> linear sRGB (inverse matrix) + val rLin = 6.1723815689243215 * L -5.319534979827695 * M + 0.14699442094633924 * S + val gLin = -1.3243428148026244 * L + 2.560286104841917 * M -0.2359203727576164 * S + val bLin = -0.011819739235953752 * L -0.26473549971186555 * M + 1.2767952602537955 * S + + // Gamma encode to sRGB + val rSrgb = srgbUnlinearize(rLin) + val gSrgb = srgbUnlinearize(gLin) + val bSrgb = srgbUnlinearize(bLin) + + rowRgbBuffer[bufferIdx++] = (rSrgb * 255.0).toInt().coerceIn(0, 255).toByte() + rowRgbBuffer[bufferIdx++] = (gSrgb * 255.0).toInt().coerceIn(0, 255).toByte() + rowRgbBuffer[bufferIdx++] = (bSrgb * 255.0).toInt().coerceIn(0, 255).toByte() + } + + // OPTIMIZATION: Bulk copy entire row at once + val rowStartOffset = (frameY * width + validStartX) * 3L + UnsafeHelper.memcpyRaw(rowRgbBuffer, UnsafeHelper.getArrayOffset(rowRgbBuffer), + null, vm.usermem.ptr + rgbAddr + rowStartOffset, rowRgbBuffer.size.toLong()) + } + } + } + + private fun tavAddYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray, + rgbAddr: Long, width: Int, height: Int) { + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y + + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { + val frameX = startX + x + val frameY = startY + y + + if (frameX < width && frameY < height) { + val tileIdx = y * TILE_SIZE_X + x + val pixelIdx = frameY * width + frameX + val rgbOffset = pixelIdx * 3L + + // Get current RGB (from motion compensation) + val curR = (vm.peek(rgbAddr + rgbOffset).toInt() and 0xFF).toFloat() + val curG = (vm.peek(rgbAddr + rgbOffset + 1).toInt() and 0xFF).toFloat() + val curB = (vm.peek(rgbAddr + rgbOffset + 2).toInt() and 0xFF).toFloat() + + // Convert current RGB back to YCoCg + val co = (curR - curB) / 2 + val tmp = curB + co + val cg = (curG - tmp) / 2 + val yPred = tmp + cg + + // Add residual + val yFinal = yPred + yRes[tileIdx] + val coFinal = co + coRes[tileIdx] + val cgFinal = cg + cgRes[tileIdx] + + // Convert back to RGB + val tmpFinal = yFinal - cgFinal + val gFinal = yFinal + cgFinal + val bFinal = tmpFinal - coFinal + val rFinal = tmpFinal + coFinal + + vm.poke(rgbAddr + rgbOffset, rFinal.toInt().coerceIn(0, 255).toByte()) + vm.poke(rgbAddr + rgbOffset + 1, gFinal.toInt().coerceIn(0, 255).toByte()) + vm.poke(rgbAddr + rgbOffset + 2, bFinal.toInt().coerceIn(0, 255).toByte()) + } + } + } + } + + // Helper functions (simplified versions of existing DWT functions) + private fun tavCopyTileRGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y + + // OPTIMIZATION: Copy entire rows at once for maximum performance + for (y in 0 until TILE_SIZE_Y) { + val frameY = startY + y + if (frameY >= height) break + + // Calculate valid pixel range for this row + val validStartX = maxOf(0, startX) + val validEndX = minOf(width, startX + TILE_SIZE_X) + val validPixelsInRow = validEndX - validStartX + + if (validPixelsInRow > 0) { + val rowStartOffset = (frameY * width + validStartX) * 3L + val rowByteCount = validPixelsInRow * 3L + + // OPTIMIZATION: Bulk copy entire row of RGB data in one operation + UnsafeHelper.memcpy( + vm.usermem.ptr + prevRGBAddr + rowStartOffset, + vm.usermem.ptr + currentRGBAddr + rowStartOffset, + rowByteCount + ) + } + } + } + + private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, + width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, + waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { + + val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX + var ptr = readPtr + + // Initialize coefficient storage if needed + if (tavPreviousCoeffsY == null) { + tavPreviousCoeffsY = mutableMapOf() + tavPreviousCoeffsCo = mutableMapOf() + tavPreviousCoeffsCg = mutableMapOf() + } + + // Coefficient count for padded tiles: 344x288 = 99,072 coefficients per channel + val coeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y + + // Read delta coefficients (same format as intra: quantised int16 -> float) + val deltaY = ShortArray(coeffCount) + val deltaCo = ShortArray(coeffCount) + val deltaCg = ShortArray(coeffCount) + + vm.bulkPeekShort(ptr.toInt(), deltaY, coeffCount * 2) + ptr += coeffCount * 2 + vm.bulkPeekShort(ptr.toInt(), deltaCo, coeffCount * 2) + ptr += coeffCount * 2 + vm.bulkPeekShort(ptr.toInt(), deltaCg, coeffCount * 2) + ptr += coeffCount * 2 + + // Get or initialize previous coefficients for this tile + val prevY = tavPreviousCoeffsY!![tileIdx] ?: FloatArray(coeffCount) + val prevCo = tavPreviousCoeffsCo!![tileIdx] ?: FloatArray(coeffCount) + val prevCg = tavPreviousCoeffsCg!![tileIdx] ?: FloatArray(coeffCount) + + // Reconstruct current coefficients: current = previous + delta + val currentY = FloatArray(coeffCount) + val currentCo = FloatArray(coeffCount) + val currentCg = FloatArray(coeffCount) + + for (i in 0 until coeffCount) { + currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY * rcf) + currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo * rcf) + currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg * rcf) + } + + // Store current coefficients as previous for next frame + tavPreviousCoeffsY!![tileIdx] = currentY.clone() + tavPreviousCoeffsCo!![tileIdx] = currentCo.clone() + tavPreviousCoeffsCg!![tileIdx] = currentCg.clone() + + // Apply inverse DWT + if (isLossless) { + tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, 0) + } else { + tavApplyDWTInverseMultiLevel(currentY, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(currentCo, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + tavApplyDWTInverseMultiLevel(currentCg, PADDED_TILE_SIZE_X, PADDED_TILE_SIZE_Y, decompLevels, waveletFilter) + } + + // Extract core 280x224 pixels and convert to RGB (same as intra) + val yTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + val coTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + val cgTile = FloatArray(TILE_SIZE_X * TILE_SIZE_Y) + + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { + val coreIdx = y * TILE_SIZE_X + x + val paddedIdx = (y + TAV_TILE_MARGIN) * PADDED_TILE_SIZE_X + (x + TAV_TILE_MARGIN) + + yTile[coreIdx] = currentY[paddedIdx] + coTile[coreIdx] = currentCo[paddedIdx] + cgTile[coreIdx] = currentCg[paddedIdx] + } + } + + // Convert to RGB based on TAV version + if (tavVersion == 2) { + tavConvertICtCpTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + } else { + tavConvertYCoCgTileToRGB(tileX, tileY, yTile, coTile, cgTile, currentRGBAddr, width, height) + } + + return ptr + } + + private fun tavApplyMotionCompensationRGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int, + currentRGBAddr: Long, prevRGBAddr: Long, + width: Int, height: Int) { + val startX = tileX * TILE_SIZE_X + val startY = tileY * TILE_SIZE_Y + + // Motion vectors in quarter-pixel precision + val refX = startX + (mvX / 4.0f) + val refY = startY + (mvY / 4.0f) + + for (y in 0 until TILE_SIZE_Y) { + for (x in 0 until TILE_SIZE_X) { + val currentPixelIdx = (startY + y) * width + (startX + x) + + if (currentPixelIdx >= 0 && currentPixelIdx < width * height) { + // Bilinear interpolation for sub-pixel motion vectors + val srcX = refX + x + val srcY = refY + y + + val interpolatedRGB = tavBilinearInterpolateRGB(prevRGBAddr, width, height, srcX, srcY) + + val rgbOffset = currentPixelIdx * 3L + vm.poke(currentRGBAddr + rgbOffset, interpolatedRGB[0]) + vm.poke(currentRGBAddr + rgbOffset + 1, interpolatedRGB[1]) + vm.poke(currentRGBAddr + rgbOffset + 2, interpolatedRGB[2]) + } + } + } + } + + private fun tavBilinearInterpolateRGB(rgbPtr: Long, width: Int, height: Int, x: Float, y: Float): ByteArray { + val x0 = kotlin.math.floor(x).toInt() + val y0 = kotlin.math.floor(y).toInt() + val x1 = x0 + 1 + val y1 = y0 + 1 + + if (x0 < 0 || y0 < 0 || x1 >= width || y1 >= height) { + return byteArrayOf(0, 0, 0) // Out of bounds - return black + } + + val fx = x - x0 + val fy = y - y0 + + // Get 4 corner pixels + val rgb00 = getRGBPixel(rgbPtr, y0 * width + x0) + val rgb10 = getRGBPixel(rgbPtr, y0 * width + x1) + val rgb01 = getRGBPixel(rgbPtr, y1 * width + x0) + val rgb11 = getRGBPixel(rgbPtr, y1 * width + x1) + + // Bilinear interpolation + val result = ByteArray(3) + for (c in 0..2) { + val interp = (1 - fx) * (1 - fy) * (rgb00[c].toInt() and 0xFF) + + fx * (1 - fy) * (rgb10[c].toInt() and 0xFF) + + (1 - fx) * fy * (rgb01[c].toInt() and 0xFF) + + fx * fy * (rgb11[c].toInt() and 0xFF) + result[c] = interp.toInt().coerceIn(0, 255).toByte() + } + + return result + } + + private fun getRGBPixel(rgbPtr: Long, pixelIdx: Int): ByteArray { + val offset = pixelIdx * 3L + return byteArrayOf( + vm.peek(rgbPtr + offset), + vm.peek(rgbPtr + offset + 1), + vm.peek(rgbPtr + offset + 2) + ) + } + + private fun tavApplyDWTInverseMultiLevel(data: FloatArray, width: Int, height: Int, levels: Int, filterType: Int) { + // Multi-level inverse DWT - reconstruct from smallest to largest (reverse of encoder) + val maxSize = kotlin.math.max(width, height) + val tempRow = FloatArray(maxSize) + val tempCol = FloatArray(maxSize) + + for (level in levels - 1 downTo 0) { + val currentWidth = width shr level + val currentHeight = height shr level + + // Handle edge cases for very small decomposition levels + if (currentWidth < 1 || currentHeight < 1) continue // Skip invalid sizes + if (currentWidth == 1 && currentHeight == 1) { + // Single DC coefficient, no DWT needed but preserve it + continue + } + + // Apply inverse DWT to current subband region - EXACT match to encoder + // The encoder does ROW transform first, then COLUMN transform + // So inverse must do COLUMN inverse first, then ROW inverse + + // Column inverse transform first (vertical) + for (x in 0 until currentWidth) { + for (y in 0 until currentHeight) { + tempCol[y] = data[y * width + x] + } + + if (filterType == 0) { + tavApplyDWT53Inverse1D(tempCol, currentHeight) + } else { + tavApplyDWT97Inverse1D(tempCol, currentHeight) + } + + for (y in 0 until currentHeight) { + data[y * width + x] = tempCol[y] + } + } + + // Row inverse transform second (horizontal) + for (y in 0 until currentHeight) { + for (x in 0 until currentWidth) { + tempRow[x] = data[y * width + x] + } + + if (filterType == 0) { + tavApplyDWT53Inverse1D(tempRow, currentWidth) + } else { + tavApplyDWT97Inverse1D(tempRow, currentWidth) + } + + for (x in 0 until currentWidth) { + data[y * width + x] = tempRow[x] + } + } + } + } + + // 1D lifting scheme implementations for 9/7 irreversible filter + private fun tavApplyDWT97Inverse1D(data: FloatArray, length: Int) { + if (length < 2) return + + val temp = FloatArray(length) + val half = (length + 1) / 2 // Handle odd lengths properly + + // Split into low and high frequency components (matching encoder layout) + // After forward DWT: first half = low-pass, second half = high-pass + for (i in 0 until half) { + temp[i] = data[i] // Low-pass coefficients (first half) + } + for (i in 0 until length / 2) { + if (half + i < length && half + i < data.size) { + temp[half + i] = data[half + i] // High-pass coefficients (second half) + } + } + + // 9/7 inverse lifting coefficients (original working values) + val alpha = -1.586134342f + val beta = -0.052980118f + val gamma = 0.882911076f + val delta = 0.443506852f + val K = 1.230174105f + + // JPEG2000 9/7 inverse lifting steps (corrected implementation) + // Reference order: undo scaling → undo δ → undo γ → undo β → undo α → interleave + + // Step 1: Undo scaling - s[i] /= K, d[i] *= K + for (i in 0 until half) { + temp[i] /= K // Low-pass coefficients + } + for (i in 0 until length / 2) { + if (half + i < length) { + temp[half + i] *= K // High-pass coefficients + } + } + + // Step 2: Undo δ update - s[i] -= δ * (d[i] + d[i-1]) + for (i in 0 until half) { + val d_curr = if (half + i < length) temp[half + i] else 0.0f + val d_prev = if (i > 0 && half + i - 1 < length) temp[half + i - 1] else d_curr + temp[i] -= delta * (d_curr + d_prev) + } + + // Step 3: Undo γ predict - d[i] -= γ * (s[i] + s[i+1]) + for (i in 0 until length / 2) { + if (half + i < length) { + val s_curr = temp[i] + val s_next = if (i + 1 < half) temp[i + 1] else s_curr + temp[half + i] -= gamma * (s_curr + s_next) + } + } + + // Step 4: Undo β update - s[i] -= β * (d[i] + d[i-1]) + for (i in 0 until half) { + val d_curr = if (half + i < length) temp[half + i] else 0.0f + val d_prev = if (i > 0 && half + i - 1 < length) temp[half + i - 1] else d_curr + temp[i] -= beta * (d_curr + d_prev) + } + + // Step 5: Undo α predict - d[i] -= α * (s[i] + s[i+1]) + for (i in 0 until length / 2) { + if (half + i < length) { + val s_curr = temp[i] + val s_next = if (i + 1 < half) temp[i + 1] else s_curr + temp[half + i] -= alpha * (s_curr + s_next) + } + } + + // Simple reconstruction (revert to working version) + for (i in 0 until length) { + if (i % 2 == 0) { + // Even positions: low-pass coefficients + data[i] = temp[i / 2] + } else { + // Odd positions: high-pass coefficients + val idx = i / 2 + if (half + idx < length) { + data[i] = temp[half + idx] + } else { + data[i] = 0.0f // Boundary case + } + } + } + } + + private fun tavApplyDWT53Inverse1D(data: FloatArray, length: Int) { + if (length < 2) return + + val temp = FloatArray(length) + val half = (length + 1) / 2 // Handle odd lengths properly + + // Split into low and high frequency components (matching encoder layout) + for (i in 0 until half) { + temp[i] = data[i] // Low-pass coefficients (first half) + } + for (i in 0 until length / 2) { + if (half + i < length && half + i < data.size) { + temp[half + i] = data[half + i] // High-pass coefficients (second half) + } + } + + // 5/3 inverse lifting (undo forward steps in reverse order) + + // Step 2: Undo update step (1/4 coefficient) - JPEG2000 symmetric extension + for (i in 0 until half) { + val leftIdx = half + i - 1 + val centerIdx = half + i + + // Symmetric extension for boundary handling + val left = when { + leftIdx >= 0 && leftIdx < length -> temp[leftIdx] + centerIdx < length && centerIdx + 1 < length -> temp[centerIdx + 1] // Mirror + centerIdx < length -> temp[centerIdx] + else -> 0.0f + } + val right = if (centerIdx < length) temp[centerIdx] else 0.0f + temp[i] -= 0.25f * (left + right) + } + + // Step 1: Undo predict step (1/2 coefficient) - JPEG2000 symmetric extension + for (i in 0 until length / 2) { + if (half + i < length) { + val left = temp[i] + // Symmetric extension for right boundary + val right = if (i < half - 1) temp[i + 1] else if (half > 2) temp[half - 2] else temp[half - 1] + temp[half + i] -= 0.5f * (left + right) + } + } + + // Simple reconstruction (revert to working version) + for (i in 0 until length) { + if (i % 2 == 0) { + // Even positions: low-pass coefficients + data[i] = temp[i / 2] + } else { + // Odd positions: high-pass coefficients + val idx = i / 2 + if (half + idx < length) { + data[i] = temp[half + idx] + } else { + // Symmetric extension: mirror the last available high-pass coefficient + val lastHighIdx = (length / 2) - 1 + if (lastHighIdx >= 0 && half + lastHighIdx < length) { + data[i] = temp[half + lastHighIdx] + } else { + data[i] = 0.0f + } + } + } + } + } + } \ No newline at end of file diff --git a/tsvm_core/src/net/torvald/tsvm/VM.kt b/tsvm_core/src/net/torvald/tsvm/VM.kt index eeab9fe..82e4452 100644 --- a/tsvm_core/src/net/torvald/tsvm/VM.kt +++ b/tsvm_core/src/net/torvald/tsvm/VM.kt @@ -438,13 +438,89 @@ class VM( (memspace as PeriBase).poke(offset, value) } - fun peek(addr:Long): Byte? { + fun pokeShort(addr: Long, value: Short) { + val value0 = value.toByte() + val value1 = value.toInt().shr(8).toByte() + + val (memspace, offset) = translateAddr(addr) + if (memspace == null) + throw ErrorIllegalAccess(this, addr) + else if (memspace is UnsafePtr) { + if (addr >= memspace.size) + throw ErrorIllegalAccess(this, addr) + else { + memspace.set(offset+0, value0) + memspace.set(offset+1, value1) + } + } + else { + (memspace as PeriBase).poke(offset+0, value0) + (memspace as PeriBase).poke(offset+1, value1) + } + } + + fun pokeFloat(addr: Long, value: Float) { + val vi = value.toRawBits() + val value0 = vi.toByte() + val value1 = vi.shr(8).toByte() + val value2 = vi.shr(16).toByte() + val value3 = vi.shr(24).toByte() + + val (memspace, offset) = translateAddr(addr) + if (memspace == null) + throw ErrorIllegalAccess(this, addr) + else if (memspace is UnsafePtr) { + if (addr >= memspace.size) + throw ErrorIllegalAccess(this, addr) + else { + memspace.set(offset+0, value0) + memspace.set(offset+1, value1) + memspace.set(offset+2, value2) + memspace.set(offset+3, value3) + } + } + else { + (memspace as PeriBase).poke(offset+0, value0) + (memspace as PeriBase).poke(offset+1, value1) + (memspace as PeriBase).poke(offset+2, value2) + (memspace as PeriBase).poke(offset+3, value3) + } + } + + fun pokeInt(addr: Long, value: Int) { + val value0 = value.toByte() + val value1 = value.shr(8).toByte() + val value2 = value.shr(16).toByte() + val value3 = value.shr(24).toByte() + + val (memspace, offset) = translateAddr(addr) + if (memspace == null) + throw ErrorIllegalAccess(this, addr) + else if (memspace is UnsafePtr) { + if (addr >= memspace.size) + throw ErrorIllegalAccess(this, addr) + else { + memspace.set(offset+0, value0) + memspace.set(offset+1, value1) + memspace.set(offset+2, value2) + memspace.set(offset+3, value3) + } + } + else { + (memspace as PeriBase).poke(offset+0, value0) + (memspace as PeriBase).poke(offset+1, value1) + (memspace as PeriBase).poke(offset+2, value2) + (memspace as PeriBase).poke(offset+3, value3) + } + } + + fun peek(addr:Long): Byte { val (memspace, offset) = translateAddr(addr) // println("peek $addr -> ${offset}@${memspace?.javaClass?.canonicalName}") return if (memspace == null) - null + throw NullPointerException()//null else if (memspace is UnsafePtr) { if (addr >= memspace.size) throw ErrorIllegalAccess(this, addr) @@ -452,7 +528,76 @@ class VM( memspace.get(offset) } else - (memspace as PeriBase).peek(offset) + (memspace as PeriBase).peek(offset)!! + } + + fun peekShort(addr: Long): Short { + val (memspace, offset) = translateAddr(addr) + + return if (memspace == null) + throw NullPointerException()//null + else if (memspace is UnsafePtr) { + if (addr >= memspace.size) + throw ErrorIllegalAccess(this, addr) + else { + (memspace.get(offset+0).toUint() or + memspace.get(offset+1).toUint().shl(8)).toShort() + } + } + else { + ((memspace as PeriBase).peek(offset+0)!!.toUint() or + (memspace as PeriBase).peek(offset+1)!!.toUint().shl(8)).toShort() + } + } + + fun peekFloat(addr: Long): Float { + val (memspace, offset) = translateAddr(addr) + + return if (memspace == null) + throw NullPointerException()//null + else if (memspace is UnsafePtr) { + if (addr >= memspace.size) + throw ErrorIllegalAccess(this, addr) + else { + Float.fromBits(memspace.get(offset+0).toUint() or + memspace.get(offset+1).toUint().shl(8) or + memspace.get(offset+2).toUint().shl(16) or + memspace.get(offset+3).toUint().shl(24) + ) + } + } + else { + Float.fromBits((memspace as PeriBase).peek(offset+0)!!.toUint() or + (memspace as PeriBase).peek(offset+1)!!.toUint().shl(8) or + (memspace as PeriBase).peek(offset+2)!!.toUint().shl(16) or + (memspace as PeriBase).peek(offset+3)!!.toUint().shl(24) + ) + } + } + + fun peekInt(addr: Long): Int? { + val (memspace, offset) = translateAddr(addr) + + return if (memspace == null) + throw NullPointerException()//null + else if (memspace is UnsafePtr) { + if (addr >= memspace.size) + throw ErrorIllegalAccess(this, addr) + else { + (memspace.get(offset+0).toUint() or + memspace.get(offset+1).toUint().shl(8) or + memspace.get(offset+2).toUint().shl(16) or + memspace.get(offset+3).toUint().shl(24) + ) + } + } + else { + ((memspace as PeriBase).peek(offset+0)!!.toUint() or + (memspace as PeriBase).peek(offset+1)!!.toUint().shl(8) or + (memspace as PeriBase).peek(offset+2)!!.toUint().shl(16) or + (memspace as PeriBase).peek(offset+3)!!.toUint().shl(24) + ) + } } private fun findEmptySpace(blockSize: Int): Int? { diff --git a/video_encoder/Makefile b/video_encoder/Makefile index c3d269d..e337b8e 100644 --- a/video_encoder/Makefile +++ b/video_encoder/Makefile @@ -6,16 +6,19 @@ CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE LIBS = -lm -lzstd # Source files and targets -SOURCES = encoder_tev.c -TARGETS = encoder_tev +TARGETS = encoder_tev encoder_tav # Build all encoders all: $(TARGETS) # Build main encoder -encoder_tev: encoder_tev.c +tev: encoder_tev.c rm -f encoder_tev - $(CC) $(CFLAGS) -o $@ $< $(LIBS) + $(CC) $(CFLAGS) -o encoder_tev $< $(LIBS) + +tav: encoder_tav.c + rm -f encoder_tav + $(CC) $(CFLAGS) -o encoder_tav $< $(LIBS) # Default target $(TARGETS): all @@ -45,8 +48,8 @@ help: @echo "" @echo "Targets:" @echo " all - Build both encoders (default)" - @echo " encoder_tev - Build the main TEV encoder" - @echo " encoder_tev_xyb - Build the XYB color space encoder" + @echo " tev - Build the main TEV encoder" + @echo " tav - Build the advanced TAV encoder" @echo " debug - Build with debug symbols" @echo " clean - Remove build artifacts" @echo " install - Install to /usr/local/bin" @@ -54,8 +57,9 @@ help: @echo " help - Show this help" @echo "" @echo "Usage:" - @echo " make # Build both encoders" - @echo " ./encoder_tev input.mp4 -o output.tev" - @echo " ./encoder_tev_xyb input.mp4 -o output.tev" + @echo " make # Build both encoders" + @echo " make tev # Build TEV encoder" + @echo " make tav # Build TAV encoder" + @echo " sudo make install # Install both encoders" .PHONY: all clean install check-deps help debug diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c new file mode 100644 index 0000000..2d247fa --- /dev/null +++ b/video_encoder/encoder_tav.c @@ -0,0 +1,2214 @@ +// Created by Claude on 2025-09-13. +// TAV (TSVM Advanced Video) Encoder - DWT-based compression with full resolution YCoCg-R +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef PI +#define PI 3.14159265358979323846f +#endif + +// TSVM Advanced Video (TAV) format constants +#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" +// TAV version - dynamic based on colour space mode +// Version 1: YCoCg-R (default) +// Version 2: ICtCp (--ictcp flag) + +// Tile encoding modes (280x224 tiles) +#define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference) +#define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles) +#define TAV_MODE_DELTA 0x02 // Coefficient delta encoding (efficient P-frames) + +// Video packet types +#define TAV_PACKET_IFRAME 0x10 // Intra frame (keyframe) +#define TAV_PACKET_PFRAME 0x11 // Predicted frame +#define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio +#define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet +#define TAV_PACKET_SYNC 0xFF // Sync packet + +// DWT settings +#define TILE_SIZE_X 280 // 280x224 tiles - better compression efficiency +#define TILE_SIZE_Y 224 // Optimized for TSVM 560x448 (2×2 tiles exactly) +#define MAX_DECOMP_LEVELS 6 // Can go deeper: 280→140→70→35→17→8→4, 224→112→56→28→14→7→3 + +// Simulated overlapping tiles settings for seamless DWT processing +#define DWT_FILTER_HALF_SUPPORT 4 // For 9/7 filter (filter lengths 9,7 → L=4) +#define TILE_MARGIN_LEVELS 3 // Use margin for 3 levels: 4 * (2^3) = 4 * 8 = 32px +#define TILE_MARGIN (DWT_FILTER_HALF_SUPPORT * (1 << TILE_MARGIN_LEVELS)) // 4 * 8 = 32px +#define PADDED_TILE_SIZE_X (TILE_SIZE_X + 2 * TILE_MARGIN) // 280 + 64 = 344px +#define PADDED_TILE_SIZE_Y (TILE_SIZE_Y + 2 * TILE_MARGIN) // 224 + 64 = 288px + +// Wavelet filter types +#define WAVELET_5_3_REVERSIBLE 0 // Lossless capable +#define WAVELET_9_7_IRREVERSIBLE 1 // Higher compression + +// Default settings +#define DEFAULT_WIDTH 560 +#define DEFAULT_HEIGHT 448 +#define DEFAULT_FPS 30 +#define DEFAULT_QUALITY 2 +int KEYFRAME_INTERVAL = 60; + +// Audio/subtitle constants (reused from TEV) +#define MP2_DEFAULT_PACKET_SIZE 1152 +#define MAX_SUBTITLE_LENGTH 2048 + +// Subtitle structure +typedef struct subtitle_entry { + int start_frame; + int end_frame; + char *text; + struct subtitle_entry *next; +} subtitle_entry_t; + +static void generate_random_filename(char *filename) { + srand(time(NULL)); + + const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const int charset_size = sizeof(charset) - 1; + + // Start with the prefix + strcpy(filename, "/tmp/"); + + // Generate 32 random characters + for (int i = 0; i < 32; i++) { + filename[5 + i] = charset[rand() % charset_size]; + } + + // Add the .mp2 extension + strcpy(filename + 37, ".mp2"); + filename[41] = '\0'; // Null terminate +} + +char TEMP_AUDIO_FILE[42]; + + +// Utility macros +static inline int CLAMP(int x, int min, int max) { + return x < min ? min : (x > max ? max : x); +} +static inline float FCLAMP(float x, float min, float max) { + return x < min ? min : (x > max ? max : x); +} + +// MP2 audio rate table (same as TEV) +static const int MP2_RATE_TABLE[] = {128, 160, 224, 320, 384, 384}; + +// Quality level to quantisation mapping for different channels +static const int QUALITY_Y[] = {60, 42, 25, 12, 6, 2}; +static const int QUALITY_CO[] = {120, 90, 60, 30, 15, 3}; +static const int QUALITY_CG[] = {240, 180, 120, 60, 30, 5}; + +// DWT coefficient structure for each subband +typedef struct { + int16_t *coeffs; + int width, height; + int size; +} dwt_subband_t; + +// DWT tile structure +typedef struct { + dwt_subband_t *ll, *lh, *hl, *hh; // Subbands for each level + int decomp_levels; + int tile_x, tile_y; +} dwt_tile_t; + +// Motion vector structure +typedef struct { + int16_t mv_x, mv_y; // 1/4 pixel precision + float rate_control_factor; +} motion_vector_t; + +// TAV encoder structure +typedef struct { + // Input/output files + char *input_file; + char *output_file; + char *subtitle_file; + FILE *output_fp; + FILE *mp2_file; + FILE *ffmpeg_video_pipe; + + // Video parameters + int width, height; + int fps; + int output_fps; // For frame rate conversion + int total_frames; + int frame_count; + double duration; + int has_audio; + int is_ntsc_framerate; + + // Encoding parameters + int quality_level; + int quantiser_y, quantiser_co, quantiser_cg; + int wavelet_filter; + int decomp_levels; + int bitrate_mode; + int target_bitrate; + + // Flags +// int progressive; // no interlaced mode for TAV + int lossless; + int enable_rcf; + int enable_progressive_transmission; + int enable_roi; + int verbose; + int test_mode; + int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp colour space + int intra_only; // Force all tiles to use INTRA mode (disable delta encoding) + + // Frame buffers + uint8_t *current_frame_rgb; + uint8_t *previous_frame_rgb; + float *current_frame_y, *current_frame_co, *current_frame_cg; + float *previous_frame_y, *previous_frame_co, *previous_frame_cg; + + // Tile processing + int tiles_x, tiles_y; + dwt_tile_t *tiles; + motion_vector_t *motion_vectors; + + // Audio processing (expanded from TEV) + size_t audio_remaining; + uint8_t *mp2_buffer; + size_t mp2_buffer_size; + int mp2_packet_size; + int mp2_rate_index; + int target_audio_buffer_size; + double audio_frames_in_buffer; + + // Subtitle processing + subtitle_entry_t *subtitles; + subtitle_entry_t *current_subtitle; + int subtitle_visible; + + // Compression + ZSTD_CCtx *zstd_ctx; + void *compressed_buffer; + size_t compressed_buffer_size; + + // OPTIMIZATION: Pre-allocated buffers to avoid malloc/free per tile + int16_t *reusable_quantised_y; + int16_t *reusable_quantised_co; + int16_t *reusable_quantised_cg; + + // Coefficient delta storage for P-frames (previous frame's coefficients) + float *previous_coeffs_y; // Previous frame Y coefficients for all tiles + float *previous_coeffs_co; // Previous frame Co coefficients for all tiles + float *previous_coeffs_cg; // Previous frame Cg coefficients for all tiles + int previous_coeffs_allocated; // Flag to track allocation + + // Statistics + size_t total_compressed_size; + size_t total_uncompressed_size; + + // Progress tracking + struct timeval start_time; + +} tav_encoder_t; + +// Wavelet filter constants removed - using lifting scheme implementation instead + +// Function prototypes +static void show_usage(const char *program_name); +static tav_encoder_t* create_encoder(void); +static void cleanup_encoder(tav_encoder_t *enc); +static int initialize_encoder(tav_encoder_t *enc); +static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height); + +// Audio and subtitle processing prototypes (from TEV) +static int start_audio_conversion(tav_encoder_t *enc); +static int get_mp2_packet_size(uint8_t *header); +static int mp2_packet_size_to_rate_index(int packet_size, int is_mono); +static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output); +static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps); +static subtitle_entry_t* parse_srt_file(const char *filename, int fps); +static subtitle_entry_t* parse_smi_file(const char *filename, int fps); +static int srt_time_to_frame(const char *time_str, int fps); +static int sami_ms_to_frame(int milliseconds, int fps); +static void free_subtitle_list(subtitle_entry_t *list); +static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text); +static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output); + +// Show usage information +static void show_usage(const char *program_name) { + printf("TAV DWT-based Video Encoder\n"); + printf("Usage: %s [options] -i input.mp4 -o output.mv3\n\n", program_name); + printf("Options:\n"); + printf(" -i, --input FILE Input video file\n"); + printf(" -o, --output FILE Output video file (use '-' for stdout)\n"); + printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT); + printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n"); + printf(" -q, --quality N Quality level 0-5 (default: 2)\n"); + printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 0-100 for each channel\n"); +// printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n"); + printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n"); + printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); + printf(" -v, --verbose Verbose output\n"); + printf(" -t, --test Test mode: generate solid colour frames\n"); + printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n"); +// printf(" --enable-progressive Enable progressive transmission\n"); +// printf(" --enable-roi Enable region-of-interest coding\n"); + printf(" --intra-only Disable delta encoding (force all tiles to use INTRA mode)\n"); + printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n"); + printf(" --help Show this help\n\n"); + + printf("Audio Rate by Quality:\n "); + for (int i = 0; i < sizeof(MP2_RATE_TABLE) / sizeof(int); i++) { + printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]); + } + printf("\n\nQuantiser Value by Quality:\n"); + printf(" Y (Luma): "); + for (int i = 0; i < 6; i++) { + printf("%d: Q%d ", i, QUALITY_Y[i]); + } + printf("\n Co (Chroma): "); + for (int i = 0; i < 6; i++) { + printf("%d: Q%d ", i, QUALITY_CO[i]); + } + printf("\n Cg (Chroma): "); + for (int i = 0; i < 6; i++) { + printf("%d: Q%d ", i, QUALITY_CG[i]); + } + + printf("\n\nFeatures:\n"); + printf(" - 112x112 DWT tiles with multi-resolution encoding\n"); + printf(" - Full resolution YCoCg-R/ICtCp colour space\n"); + printf(" - Lossless and lossy compression modes\n"); + + printf("\nExamples:\n"); + printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name); + printf(" %s -i input.mkv -q 3 -w 1 -d 6 -o output.mv3 # Maximum quality with 9/7 wavelet\n", program_name); + printf(" %s -i input.avi --lossless -o output.mv3 # Lossless encoding\n", program_name); + printf(" %s -i input.mp4 -b 800 -o output.mv3 # 800 kbps bitrate target\n", program_name); + printf(" %s -i input.webm -S subs.srt -o output.mv3 # With subtitles\n", program_name); +} + +// Create encoder instance +static tav_encoder_t* create_encoder(void) { + tav_encoder_t *enc = calloc(1, sizeof(tav_encoder_t)); + if (!enc) return NULL; + + // Set defaults + enc->width = DEFAULT_WIDTH; + enc->height = DEFAULT_HEIGHT; + enc->fps = DEFAULT_FPS; + enc->quality_level = DEFAULT_QUALITY; + enc->wavelet_filter = WAVELET_9_7_IRREVERSIBLE; + enc->decomp_levels = MAX_DECOMP_LEVELS; + enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY]; + enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY]; + enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY]; + + return enc; +} + +// Initialize encoder resources +static int initialize_encoder(tav_encoder_t *enc) { + if (!enc) return -1; + + // Calculate tile dimensions + enc->tiles_x = (enc->width + TILE_SIZE_X - 1) / TILE_SIZE_X; + enc->tiles_y = (enc->height + TILE_SIZE_Y - 1) / TILE_SIZE_Y; + int num_tiles = enc->tiles_x * enc->tiles_y; + + // Allocate frame buffers + size_t frame_size = enc->width * enc->height; + enc->current_frame_rgb = malloc(frame_size * 3); + enc->previous_frame_rgb = malloc(frame_size * 3); + enc->current_frame_y = malloc(frame_size * sizeof(float)); + enc->current_frame_co = malloc(frame_size * sizeof(float)); + enc->current_frame_cg = malloc(frame_size * sizeof(float)); + enc->previous_frame_y = malloc(frame_size * sizeof(float)); + enc->previous_frame_co = malloc(frame_size * sizeof(float)); + enc->previous_frame_cg = malloc(frame_size * sizeof(float)); + + // Allocate tile structures + enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t)); + enc->motion_vectors = malloc(num_tiles * sizeof(motion_vector_t)); + + // Initialize motion vectors + for (int i = 0; i < num_tiles; i++) { + enc->motion_vectors[i].mv_x = 0; + enc->motion_vectors[i].mv_y = 0; + enc->motion_vectors[i].rate_control_factor = 1.0f; // Initialize to 1.0f + } + + // Initialize ZSTD compression + enc->zstd_ctx = ZSTD_createCCtx(); + enc->compressed_buffer_size = ZSTD_compressBound(1024 * 1024); // 1MB max + enc->compressed_buffer = malloc(enc->compressed_buffer_size); + + // OPTIMIZATION: Allocate reusable quantisation buffers for padded tiles (344x288) + const int padded_coeff_count = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y; + enc->reusable_quantised_y = malloc(padded_coeff_count * sizeof(int16_t)); + enc->reusable_quantised_co = malloc(padded_coeff_count * sizeof(int16_t)); + enc->reusable_quantised_cg = malloc(padded_coeff_count * sizeof(int16_t)); + + // Allocate coefficient delta storage for P-frames (per-tile coefficient storage) + size_t total_coeff_size = num_tiles * padded_coeff_count * sizeof(float); + enc->previous_coeffs_y = malloc(total_coeff_size); + enc->previous_coeffs_co = malloc(total_coeff_size); + enc->previous_coeffs_cg = malloc(total_coeff_size); + enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame + + if (!enc->current_frame_rgb || !enc->previous_frame_rgb || + !enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg || + !enc->previous_frame_y || !enc->previous_frame_co || !enc->previous_frame_cg || + !enc->tiles || !enc->motion_vectors || !enc->zstd_ctx || !enc->compressed_buffer || + !enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg || + !enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) { + return -1; + } + + return 0; +} + +// ============================================================================= +// DWT Implementation - 5/3 Reversible and 9/7 Irreversible Filters +// ============================================================================= + +// 1D DWT using lifting scheme for 5/3 reversible filter +static void dwt_53_forward_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; // Handle odd lengths properly + + // Predict step (high-pass) + for (int i = 0; i < half; i++) { + int idx = 2 * i + 1; + if (idx < length) { + float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i])); + temp[half + i] = data[idx] - pred; + } + } + + // Update step (low-pass) + for (int i = 0; i < half; i++) { + float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) + + (i < half - 1 ? temp[half + i] : 0)); + temp[i] = data[2 * i] + update; + } + + // Copy back + memcpy(data, temp, length * sizeof(float)); + free(temp); +} + + +// 1D DWT using lifting scheme for 9/7 irreversible filter +static void dwt_97_forward_1d(float *data, int length) { + if (length < 2) return; + + float *temp = malloc(length * sizeof(float)); + int half = (length + 1) / 2; // Handle odd lengths properly + + // Split into even/odd samples + for (int i = 0; i < half; i++) { + temp[i] = data[2 * i]; // Even (low) + } + for (int i = 0; i < length / 2; i++) { + temp[half + i] = data[2 * i + 1]; // Odd (high) + } + + // JPEG2000 9/7 forward lifting steps (corrected to match decoder) + const float alpha = -1.586134342f; + const float beta = -0.052980118f; + const float gamma = 0.882911076f; + const float delta = 0.443506852f; + const float K = 1.230174105f; + + // Step 1: Predict α - d[i] += α * (s[i] + s[i+1]) + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] += alpha * (s_curr + s_next); + } + } + + // Step 2: Update β - s[i] += β * (d[i-1] + d[i]) + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] += beta * (d_prev + d_curr); + } + + // Step 3: Predict γ - d[i] += γ * (s[i] + s[i+1]) + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + float s_curr = temp[i]; + float s_next = (i + 1 < half) ? temp[i + 1] : s_curr; + temp[half + i] += gamma * (s_curr + s_next); + } + } + + // Step 4: Update δ - s[i] += δ * (d[i-1] + d[i]) + for (int i = 0; i < half; i++) { + float d_curr = (half + i < length) ? temp[half + i] : 0.0f; + float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr; + temp[i] += delta * (d_prev + d_curr); + } + + // Step 5: Scaling - s[i] *= K, d[i] /= K + for (int i = 0; i < half; i++) { + temp[i] *= K; // Low-pass coefficients + } + for (int i = 0; i < length / 2; i++) { + if (half + i < length) { + temp[half + i] /= K; // High-pass coefficients + } + } + + memcpy(data, temp, length * sizeof(float)); + free(temp); +} + +// Extract padded tile with margins for seamless DWT processing (correct implementation) +static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y, + float *padded_y, float *padded_co, float *padded_cg) { + const int core_start_x = tile_x * TILE_SIZE_X; + const int core_start_y = tile_y * TILE_SIZE_Y; + + // OPTIMIZATION: Process row by row with bulk copying for core region + for (int py = 0; py < PADDED_TILE_SIZE_Y; py++) { + // Map padded row to source image row + int src_y = core_start_y + py - TILE_MARGIN; + + // Handle vertical boundary conditions with mirroring + if (src_y < 0) src_y = -src_y; + else if (src_y >= enc->height) src_y = enc->height - 1 - (src_y - enc->height); + src_y = CLAMP(src_y, 0, enc->height - 1); + + // Calculate source and destination row offsets + const int padded_row_offset = py * PADDED_TILE_SIZE_X; + const int src_row_offset = src_y * enc->width; + + // Check if we can do bulk copying for the core region + int core_start_px = TILE_MARGIN; + int core_end_px = TILE_MARGIN + TILE_SIZE_X; + + // Check if core region is entirely within frame bounds + int core_src_start_x = core_start_x; + int core_src_end_x = core_start_x + TILE_SIZE_X; + + if (core_src_start_x >= 0 && core_src_end_x <= enc->width) { + // OPTIMIZATION: Bulk copy core region (280 pixels) in one operation + const int src_core_offset = src_row_offset + core_src_start_x; + + memcpy(&padded_y[padded_row_offset + core_start_px], + &enc->current_frame_y[src_core_offset], + TILE_SIZE_X * sizeof(float)); + memcpy(&padded_co[padded_row_offset + core_start_px], + &enc->current_frame_co[src_core_offset], + TILE_SIZE_X * sizeof(float)); + memcpy(&padded_cg[padded_row_offset + core_start_px], + &enc->current_frame_cg[src_core_offset], + TILE_SIZE_X * sizeof(float)); + + // Handle margin pixels individually (left and right margins) + for (int px = 0; px < core_start_px; px++) { + int src_x = core_start_x + px - TILE_MARGIN; + if (src_x < 0) src_x = -src_x; + src_x = CLAMP(src_x, 0, enc->width - 1); + + int src_idx = src_row_offset + src_x; + int padded_idx = padded_row_offset + px; + + padded_y[padded_idx] = enc->current_frame_y[src_idx]; + padded_co[padded_idx] = enc->current_frame_co[src_idx]; + padded_cg[padded_idx] = enc->current_frame_cg[src_idx]; + } + + for (int px = core_end_px; px < PADDED_TILE_SIZE_X; px++) { + int src_x = core_start_x + px - TILE_MARGIN; + if (src_x >= enc->width) src_x = enc->width - 1 - (src_x - enc->width); + src_x = CLAMP(src_x, 0, enc->width - 1); + + int src_idx = src_row_offset + src_x; + int padded_idx = padded_row_offset + px; + + padded_y[padded_idx] = enc->current_frame_y[src_idx]; + padded_co[padded_idx] = enc->current_frame_co[src_idx]; + padded_cg[padded_idx] = enc->current_frame_cg[src_idx]; + } + } else { + // Fallback: process entire row pixel by pixel (for edge tiles) + for (int px = 0; px < PADDED_TILE_SIZE_X; px++) { + int src_x = core_start_x + px - TILE_MARGIN; + + // Handle horizontal boundary conditions with mirroring + if (src_x < 0) src_x = -src_x; + else if (src_x >= enc->width) src_x = enc->width - 1 - (src_x - enc->width); + src_x = CLAMP(src_x, 0, enc->width - 1); + + int src_idx = src_row_offset + src_x; + int padded_idx = padded_row_offset + px; + + padded_y[padded_idx] = enc->current_frame_y[src_idx]; + padded_co[padded_idx] = enc->current_frame_co[src_idx]; + padded_cg[padded_idx] = enc->current_frame_cg[src_idx]; + } + } + } +} + + +// 2D DWT forward transform for rectangular padded tile (344x288) +static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type) { + const int width = PADDED_TILE_SIZE_X; // 344 + const int height = PADDED_TILE_SIZE_Y; // 288 + const int max_size = (width > height) ? width : height; + float *temp_row = malloc(max_size * sizeof(float)); + float *temp_col = malloc(max_size * sizeof(float)); + + for (int level = 0; level < levels; level++) { + int current_width = width >> level; + int current_height = height >> level; + if (current_width < 1 || current_height < 1) break; + + // Row transform (horizontal) + for (int y = 0; y < current_height; y++) { + for (int x = 0; x < current_width; x++) { + temp_row[x] = tile_data[y * width + x]; + } + + if (filter_type == WAVELET_5_3_REVERSIBLE) { + dwt_53_forward_1d(temp_row, current_width); + } else { + dwt_97_forward_1d(temp_row, current_width); + } + + for (int x = 0; x < current_width; x++) { + tile_data[y * width + x] = temp_row[x]; + } + } + + // Column transform (vertical) + for (int x = 0; x < current_width; x++) { + for (int y = 0; y < current_height; y++) { + temp_col[y] = tile_data[y * width + x]; + } + + if (filter_type == WAVELET_5_3_REVERSIBLE) { + dwt_53_forward_1d(temp_col, current_height); + } else { + dwt_97_forward_1d(temp_col, current_height); + } + + for (int y = 0; y < current_height; y++) { + tile_data[y * width + x] = temp_col[y]; + } + } + } + + free(temp_row); + free(temp_col); +} + + + + +// Quantisation for DWT subbands with rate control +static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser, float rcf) { + float effective_q = quantiser * rcf; + effective_q = FCLAMP(effective_q, 1.0f, 255.0f); + + for (int i = 0; i < size; i++) { + float quantised_val = coeffs[i] / effective_q; + quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767); + } +} + +// Serialize tile data for compression +static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, + const float *tile_y_data, const float *tile_co_data, const float *tile_cg_data, + const motion_vector_t *mv, uint8_t mode, uint8_t *buffer) { + size_t offset = 0; + + // Write tile header + buffer[offset++] = mode; + memcpy(buffer + offset, &mv->mv_x, sizeof(int16_t)); offset += sizeof(int16_t); + memcpy(buffer + offset, &mv->mv_y, sizeof(int16_t)); offset += sizeof(int16_t); + memcpy(buffer + offset, &mv->rate_control_factor, sizeof(float)); offset += sizeof(float); + + if (mode == TAV_MODE_SKIP) { + // No coefficient data for SKIP/MOTION modes + return offset; + } + + // Quantise and serialize DWT coefficients (full padded tile: 344x288) + const int tile_size = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y; + // OPTIMIZATION: Use pre-allocated buffers instead of malloc/free per tile + int16_t *quantised_y = enc->reusable_quantised_y; + int16_t *quantised_co = enc->reusable_quantised_co; + int16_t *quantised_cg = enc->reusable_quantised_cg; + + // Debug: check DWT coefficients before quantisation + /*if (tile_x == 0 && tile_y == 0) { + printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantisation (first 16): "); + for (int i = 0; i < 16; i++) { + printf("%.2f ", tile_y_data[i]); + } + printf("\n"); + printf("Encoder Debug: Quantisers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n", + enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg, mv->rate_control_factor); + }*/ + + if (mode == TAV_MODE_INTRA) { + // INTRA mode: quantise coefficients directly and store for future reference + quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor); + quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor); + quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor); + + // Store current coefficients for future delta reference + int tile_idx = tile_y * enc->tiles_x + tile_x; + float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size); + float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size); + float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size); + memcpy(prev_y, tile_y_data, tile_size * sizeof(float)); + memcpy(prev_co, tile_co_data, tile_size * sizeof(float)); + memcpy(prev_cg, tile_cg_data, tile_size * sizeof(float)); + + } else if (mode == TAV_MODE_DELTA) { + // DELTA mode: compute coefficient deltas and quantise them + int tile_idx = tile_y * enc->tiles_x + tile_x; + float *prev_y = enc->previous_coeffs_y + (tile_idx * tile_size); + float *prev_co = enc->previous_coeffs_co + (tile_idx * tile_size); + float *prev_cg = enc->previous_coeffs_cg + (tile_idx * tile_size); + + // Compute deltas: delta = current - previous + float *delta_y = malloc(tile_size * sizeof(float)); + float *delta_co = malloc(tile_size * sizeof(float)); + float *delta_cg = malloc(tile_size * sizeof(float)); + + for (int i = 0; i < tile_size; i++) { + delta_y[i] = tile_y_data[i] - prev_y[i]; + delta_co[i] = tile_co_data[i] - prev_co[i]; + delta_cg[i] = tile_cg_data[i] - prev_cg[i]; + } + + // Quantise the deltas + quantise_dwt_coefficients(delta_y, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor); + quantise_dwt_coefficients(delta_co, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor); + quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor); + + // Reconstruct coefficients like decoder will (previous + dequantised_delta) + for (int i = 0; i < tile_size; i++) { + float dequant_delta_y = (float)quantised_y[i] * enc->quantiser_y * mv->rate_control_factor; + float dequant_delta_co = (float)quantised_co[i] * enc->quantiser_co * mv->rate_control_factor; + float dequant_delta_cg = (float)quantised_cg[i] * enc->quantiser_cg * mv->rate_control_factor; + + prev_y[i] = prev_y[i] + dequant_delta_y; + prev_co[i] = prev_co[i] + dequant_delta_co; + prev_cg[i] = prev_cg[i] + dequant_delta_cg; + } + + free(delta_y); + free(delta_co); + free(delta_cg); + } + + // Debug: check quantised coefficients after quantisation + /*if (tile_x == 0 && tile_y == 0) { + printf("Encoder Debug: Tile (0,0) - Quantised Y coeffs (first 16): "); + for (int i = 0; i < 16; i++) { + printf("%d ", quantised_y[i]); + } + printf("\n"); + }*/ + + // Write quantised coefficients + memcpy(buffer + offset, quantised_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); + memcpy(buffer + offset, quantised_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); + memcpy(buffer + offset, quantised_cg, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); + + // OPTIMIZATION: No need to free - using pre-allocated reusable buffers + + return offset; +} + +// Compress and write frame data +static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) { + // Calculate total uncompressed size (for padded tile coefficients: 344x288) + const size_t max_tile_size = 9 + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y * 3 * sizeof(int16_t)); // header + 3 channels of coefficients + const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size; + + // Allocate buffer for uncompressed tile data + uint8_t *uncompressed_buffer = malloc(total_uncompressed_size); + size_t uncompressed_offset = 0; + + // Serialize all tiles + for (int tile_y = 0; tile_y < enc->tiles_y; tile_y++) { + for (int tile_x = 0; tile_x < enc->tiles_x; tile_x++) { + int tile_idx = tile_y * enc->tiles_x + tile_x; + + // Determine tile mode based on frame type, coefficient availability, and intra_only flag + uint8_t mode; + int is_keyframe = (packet_type == TAV_PACKET_IFRAME); + if (is_keyframe || !enc->previous_coeffs_allocated) { + mode = TAV_MODE_INTRA; // I-frames, first frames, or intra-only mode always use INTRA + } else { + mode = TAV_MODE_DELTA; // P-frames use coefficient delta encoding + } + + // Extract padded tile data (344x288) with neighbour context for overlapping tiles + float tile_y_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y]; + float tile_co_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y]; + float tile_cg_data[PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y]; + + // Extract padded tiles using context from neighbours + extract_padded_tile(enc, tile_x, tile_y, tile_y_data, tile_co_data, tile_cg_data); + + // Debug: check input data before DWT + /*if (tile_x == 0 && tile_y == 0) { + printf("Encoder Debug: Tile (0,0) - Y data before DWT (first 16): "); + for (int i = 0; i < 16; i++) { + printf("%.2f ", tile_y_data[i]); + } + printf("\n"); + }*/ + + // Apply DWT transform to each padded channel (176x176) + dwt_2d_forward_padded(tile_y_data, enc->decomp_levels, enc->wavelet_filter); + dwt_2d_forward_padded(tile_co_data, enc->decomp_levels, enc->wavelet_filter); + dwt_2d_forward_padded(tile_cg_data, enc->decomp_levels, enc->wavelet_filter); + + // Serialize tile + size_t tile_size = serialize_tile_data(enc, tile_x, tile_y, + tile_y_data, tile_co_data, tile_cg_data, + &enc->motion_vectors[tile_idx], mode, + uncompressed_buffer + uncompressed_offset); + uncompressed_offset += tile_size; + } + } + + // Compress with zstd + size_t compressed_size = ZSTD_compress(enc->compressed_buffer, enc->compressed_buffer_size, + uncompressed_buffer, uncompressed_offset, + ZSTD_CLEVEL_DEFAULT); + + if (ZSTD_isError(compressed_size)) { + fprintf(stderr, "Error: ZSTD compression failed: %s\n", ZSTD_getErrorName(compressed_size)); + free(uncompressed_buffer); + return 0; + } + + // Write packet header and compressed data + fwrite(&packet_type, 1, 1, enc->output_fp); + uint32_t compressed_size_32 = (uint32_t)compressed_size; + fwrite(&compressed_size_32, sizeof(uint32_t), 1, enc->output_fp); + fwrite(enc->compressed_buffer, 1, compressed_size, enc->output_fp); + + free(uncompressed_buffer); + + enc->total_compressed_size += compressed_size; + enc->total_uncompressed_size += uncompressed_offset; + + // Mark coefficient storage as available after first I-frame + if (packet_type == TAV_PACKET_IFRAME) { + enc->previous_coeffs_allocated = 1; + } + + return compressed_size + 5; // packet type + size field + compressed data +} + +// RGB to YCoCg colour space conversion +static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height) { + const int total_pixels = width * height; + + // OPTIMIZATION: Process 4 pixels at a time for better cache utilization + int i = 0; + const int simd_end = (total_pixels / 4) * 4; + + // Vectorized processing for groups of 4 pixels + for (i = 0; i < simd_end; i += 4) { + // Load 4 RGB triplets (12 bytes) at once + const uint8_t *rgb_ptr = &rgb[i * 3]; + + // Process 4 pixels simultaneously with loop unrolling + for (int j = 0; j < 4; j++) { + const int idx = i + j; + const float r = rgb_ptr[j * 3 + 0]; + const float g = rgb_ptr[j * 3 + 1]; + const float b = rgb_ptr[j * 3 + 2]; + + // YCoCg-R transform (optimised with fewer temporary variables) + co[idx] = r - b; + const float tmp = b + co[idx] * 0.5f; + cg[idx] = g - tmp; + y[idx] = tmp + cg[idx] * 0.5f; + } + } + + // Handle remaining pixels (1-3 pixels) + for (; i < total_pixels; i++) { + const float r = rgb[i * 3 + 0]; + const float g = rgb[i * 3 + 1]; + const float b = rgb[i * 3 + 2]; + + co[i] = r - b; + const float tmp = b + co[i] * 0.5f; + cg[i] = g - tmp; + y[i] = tmp + cg[i] * 0.5f; + } +} + +// ---------------------- ICtCp Implementation ---------------------- + +static inline int iround(double v) { return (int)floor(v + 0.5); } + +// ---------------------- sRGB gamma helpers ---------------------- +static inline double srgb_linearize(double val) { + if (val <= 0.04045) return val / 12.92; + return pow((val + 0.055) / 1.055, 2.4); +} + +static inline double srgb_unlinearize(double val) { + if (val <= 0.0031308) return 12.92 * val; + return 1.055 * pow(val, 1.0/2.4) - 0.055; +} + +// ---------------------- HLG OETF/EOTF ---------------------- +static inline double HLG_OETF(double E) { + const double a = 0.17883277; + const double b = 0.28466892; // 1 - 4*a + const double c = 0.55991073; // 0.5 - a*ln(4*a) + + if (E <= 1.0/12.0) return sqrt(3.0 * E); + return a * log(12.0 * E - b) + c; +} + +static inline double HLG_EOTF(double Ep) { + const double a = 0.17883277; + const double b = 0.28466892; + const double c = 0.55991073; + + if (Ep <= 0.5) { + double val = Ep * Ep / 3.0; + return val; + } + double val = (exp((Ep - c) / a) + b) / 12.0; + return val; +} + +// sRGB -> LMS matrix +/*static const double M_RGB_TO_LMS[3][3] = { + {0.2958564579364564, 0.6230869483219083, 0.08106989398623762}, + {0.15627390752659093, 0.727308963512872, 0.11639736914944238}, + {0.035141262332177715, 0.15657109121101628, 0.8080956851990795} +};*/ +// BT.2100 -> LMS matrix +static const double M_RGB_TO_LMS[3][3] = { + {1688.0/4096,2146.0/4096, 262.0/4096}, + { 683.0/4096,2951.0/4096, 462.0/4096}, + { 99.0/4096, 309.0/4096,3688.0/4096} +}; + +static const double M_LMS_TO_RGB[3][3] = { + {6.1723815689243215, -5.319534979827695, 0.14699442094633924}, + {-1.3243428148026244, 2.560286104841917, -0.2359203727576164}, + {-0.011819739235953752, -0.26473549971186555, 1.2767952602537955} +}; + +// ICtCp matrix (L' M' S' -> I Ct Cp). Values are the BT.2100 integer-derived /4096 constants. +static const double M_LMSPRIME_TO_ICTCP[3][3] = { + { 2048.0/4096.0, 2048.0/4096.0, 0.0 }, + { 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 }, + { 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 } +}; + +// Inverse matrices +static const double M_ICTCP_TO_LMSPRIME[3][3] = { + { 1.0, 0.015718580108730416, 0.2095810681164055 }, + { 1.0, -0.015718580108730416, -0.20958106811640548 }, + { 1.0, 1.0212710798422344, -0.6052744909924316 } +}; + +// ---------------------- Forward: sRGB8 -> ICtCp (doubles) ---------------------- +void srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8, + double *out_I, double *out_Ct, double *out_Cp) +{ + // 1) linearize sRGB to 0..1 + double r = srgb_linearize((double)r8 / 255.0); + double g = srgb_linearize((double)g8 / 255.0); + double b = srgb_linearize((double)b8 / 255.0); + + // 2) linear RGB -> LMS (single 3x3 multiply) + double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b; + double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b; + double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b; + + // 3) HLG OETF + double Lp = HLG_OETF(L); + double Mp = HLG_OETF(M); + double Sp = HLG_OETF(S); + + // 4) L'M'S' -> ICtCp + double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp; + double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp; + double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp; + + *out_I = FCLAMP(I * 255.f, 0.f, 255.f); + *out_Ct = FCLAMP(Ct * 255.f + 127.5f, 0.f, 255.f); + *out_Cp = FCLAMP(Cp * 255.f + 127.5f, 0.f, 255.f); +} + +// ---------------------- Reverse: ICtCp -> sRGB8 (doubles) ---------------------- +void ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8, + uint8_t *r8, uint8_t *g8, uint8_t *b8) +{ + double I = I8 / 255.f; + double Ct = (Ct8 - 127.5f) / 255.f; + double Cp = (Cp8 - 127.5f) / 255.f; + + // 1) ICtCp -> L' M' S' (3x3 multiply) + double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp; + double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp; + double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp; + + // 2) HLG decode: L' -> linear LMS + double L = HLG_EOTF(Lp); + double M = HLG_EOTF(Mp); + double S = HLG_EOTF(Sp); + + // 3) LMS -> linear sRGB (3x3 inverse) + double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S; + double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S; + double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S; + + // 4) gamma encode and convert to 0..255 with center-of-bin rounding + double r = srgb_unlinearize(r_lin); + double g = srgb_unlinearize(g_lin); + double b = srgb_unlinearize(b_lin); + + *r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0)); + *g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0)); + *b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0)); +} + +// ---------------------- Colour Space Switching Functions ---------------------- +// Wrapper functions that choose between YCoCg-R and ICtCp based on encoder mode + +static void rgb_to_colour_space(tav_encoder_t *enc, uint8_t r, uint8_t g, uint8_t b, + double *c1, double *c2, double *c3) { + if (enc->ictcp_mode) { + // Use ICtCp colour space + srgb8_to_ictcp_hlg(r, g, b, c1, c2, c3); + } else { + // Use YCoCg-R colour space (convert from existing function) + float rf = r, gf = g, bf = b; + float co = rf - bf; + float tmp = bf + co / 2; + float cg = gf - tmp; + float y = tmp + cg / 2; + *c1 = (double)y; + *c2 = (double)co; + *c3 = (double)cg; + } +} + +static void colour_space_to_rgb(tav_encoder_t *enc, double c1, double c2, double c3, + uint8_t *r, uint8_t *g, uint8_t *b) { + if (enc->ictcp_mode) { + // Use ICtCp colour space + ictcp_hlg_to_srgb8(c1, c2, c3, r, g, b); + } else { + // Use YCoCg-R colour space (inverse of rgb_to_ycocg) + float y = (float)c1; + float co = (float)c2; + float cg = (float)c3; + float tmp = y - cg / 2.0f; + float g_val = cg + tmp; + float b_val = tmp - co / 2.0f; + float r_val = co + b_val; + *r = (uint8_t)CLAMP((int)(r_val + 0.5f), 0, 255); + *g = (uint8_t)CLAMP((int)(g_val + 0.5f), 0, 255); + *b = (uint8_t)CLAMP((int)(b_val + 0.5f), 0, 255); + } +} + +// RGB to colour space conversion for full frames +static void rgb_to_colour_space_frame(tav_encoder_t *enc, const uint8_t *rgb, + float *c1, float *c2, float *c3, int width, int height) { + if (enc->ictcp_mode) { + // ICtCp mode + for (int i = 0; i < width * height; i++) { + double I, Ct, Cp; + srgb8_to_ictcp_hlg(rgb[i*3], rgb[i*3+1], rgb[i*3+2], &I, &Ct, &Cp); + c1[i] = (float)I; + c2[i] = (float)Ct; + c3[i] = (float)Cp; + } + } else { + // Use existing YCoCg function + rgb_to_ycocg(rgb, c1, c2, c3, width, height); + } +} + +// Write TAV file header +static int write_tav_header(tav_encoder_t *enc) { + if (!enc->output_fp) return -1; + + // Magic number + fwrite(TAV_MAGIC, 1, 8, enc->output_fp); + + // Version (dynamic based on colour space) + uint8_t version = enc->ictcp_mode ? 2 : 1; // Version 2 for ICtCp, 1 for YCoCg-R + fputc(version, enc->output_fp); + + // Video parameters + fwrite(&enc->width, sizeof(uint16_t), 1, enc->output_fp); + fwrite(&enc->height, sizeof(uint16_t), 1, enc->output_fp); + fputc(enc->fps, enc->output_fp); + fwrite(&enc->total_frames, sizeof(uint32_t), 1, enc->output_fp); + + // Encoder parameters + fputc(enc->wavelet_filter, enc->output_fp); + fputc(enc->decomp_levels, enc->output_fp); + fputc(enc->quantiser_y, enc->output_fp); + fputc(enc->quantiser_co, enc->output_fp); + fputc(enc->quantiser_cg, enc->output_fp); + + // Feature flags + uint8_t extra_flags = 0; + if (enc->has_audio) extra_flags |= 0x01; // Has audio (placeholder) + if (enc->subtitle_file) extra_flags |= 0x02; // Has subtitles + if (enc->enable_progressive_transmission) extra_flags |= 0x04; + if (enc->enable_roi) extra_flags |= 0x08; + fputc(extra_flags, enc->output_fp); + + uint8_t video_flags = 0; +// if (!enc->progressive) video_flags |= 0x01; // Interlaced + if (enc->is_ntsc_framerate) video_flags |= 0x02; // NTSC + if (enc->lossless) video_flags |= 0x04; // Lossless + fputc(video_flags, enc->output_fp); + + // Reserved bytes (7 bytes) + for (int i = 0; i < 7; i++) { + fputc(0, enc->output_fp); + } + + return 0; +} + +// ============================================================================= +// Video Processing Pipeline (from TEV for compatibility) +// ============================================================================= + +// Execute command and capture output +static char* execute_command(const char* command) { + FILE* pipe = popen(command, "r"); + if (!pipe) return NULL; + + size_t buffer_size = 4096; + char* buffer = malloc(buffer_size); + size_t total_size = 0; + size_t bytes_read; + + while ((bytes_read = fread(buffer + total_size, 1, buffer_size - total_size - 1, pipe)) > 0) { + total_size += bytes_read; + if (total_size + 1 >= buffer_size) { + buffer_size *= 2; + buffer = realloc(buffer, buffer_size); + } + } + + buffer[total_size] = '\0'; + pclose(pipe); + return buffer; +} + +// Get video metadata using ffprobe +static int get_video_metadata(tav_encoder_t *config) { + char command[1024]; + char *output; + + // Get all metadata without frame count (much faster) + snprintf(command, sizeof(command), + "ffprobe -v quiet " + "-show_entries stream=r_frame_rate:format=duration " + "-select_streams v:0 -of csv=p=0 \"%s\" 2>/dev/null; " + "ffprobe -v quiet -select_streams a:0 -show_entries stream=index -of csv=p=0 \"%s\" 2>/dev/null", + config->input_file, config->input_file); + + output = execute_command(command); + if (!output) { + fprintf(stderr, "Failed to get video metadata (ffprobe failed)\n"); + return 0; + } + + // Parse the combined output + char *line = strtok(output, "\n"); + int line_num = 0; + double inputFramerate = 0; + + while (line) { + switch (line_num) { + case 0: // framerate (e.g., "30000/1001", "30/1") + if (strlen(line) > 0) { + double num, den; + if (sscanf(line, "%lf/%lf", &num, &den) == 2) { + inputFramerate = num / den; + config->fps = (int)round(inputFramerate); + config->is_ntsc_framerate = (fabs(den - 1001.0) < 0.1); + } else { + config->fps = (int)round(atof(line)); + config->is_ntsc_framerate = 0; + } + // Frame count will be determined during encoding + config->total_frames = 0; + } + break; + case 1: // duration in seconds + config->duration = atof(line); + break; + } + line = strtok(NULL, "\n"); + line_num++; + } + + // Check for audio (line_num > 2 means audio stream was found) + config->has_audio = (line_num > 2); + + free(output); + + if (config->fps <= 0) { + fprintf(stderr, "Invalid or missing framerate in input file\n"); + return 0; + } + + // Set output FPS to input FPS if not specified + if (config->output_fps == 0) { + config->output_fps = config->fps; + } + + // Frame count will be determined during encoding + config->total_frames = 0; + + fprintf(stderr, "Video metadata:\n"); + fprintf(stderr, " Frames: (will be determined during encoding)\n"); + fprintf(stderr, " FPS: %.2f\n", inputFramerate); + fprintf(stderr, " Duration: %.2fs\n", config->duration); + fprintf(stderr, " Audio: %s\n", config->has_audio ? "Yes" : "No"); +// fprintf(stderr, " Resolution: %dx%d (%s)\n", config->width, config->height, +// config->progressive ? "progressive" : "interlaced"); + fprintf(stderr, " Resolution: %dx%d\n", config->width, config->height); + + return 1; +} + +// Start FFmpeg process for video conversion with frame rate support +static int start_video_conversion(tav_encoder_t *enc) { + char command[2048]; + + // Use simple FFmpeg command like TEV encoder for reliable EOF detection + if (enc->output_fps > 0 && enc->output_fps != enc->fps) { + // Frame rate conversion requested + snprintf(command, sizeof(command), + "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " + "-y - 2>&1", + enc->input_file, enc->output_fps, enc->width, enc->height, enc->width, enc->height); + } else { + // No frame rate conversion + snprintf(command, sizeof(command), + "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " + "-y -", + enc->input_file, enc->width, enc->height, enc->width, enc->height); + } + + if (enc->verbose) { + printf("FFmpeg command: %s\n", command); + } + + enc->ffmpeg_video_pipe = popen(command, "r"); + if (!enc->ffmpeg_video_pipe) { + fprintf(stderr, "Failed to start FFmpeg video conversion\n"); + return 0; + } + + return 1; +} + +// Start audio conversion +static int start_audio_conversion(tav_encoder_t *enc) { + if (!enc->has_audio) return 1; + + char command[2048]; + snprintf(command, sizeof(command), + "ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar 32000 -ac 2 -y \"%s\" 2>/dev/null", + enc->input_file, enc->lossless ? 384 : MP2_RATE_TABLE[enc->quality_level], TEMP_AUDIO_FILE); + + int result = system(command); + if (result == 0) { + enc->mp2_file = fopen(TEMP_AUDIO_FILE, "rb"); + if (enc->mp2_file) { + fseek(enc->mp2_file, 0, SEEK_END); + enc->audio_remaining = ftell(enc->mp2_file); + fseek(enc->mp2_file, 0, SEEK_SET); + } + return 1; + } + return 0; +} + +// Get MP2 packet size from header (copied from TEV) +static int get_mp2_packet_size(uint8_t *header) { + int bitrate_index = (header[2] >> 4) & 0x0F; + int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384}; + if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE; + + int bitrate = bitrates[bitrate_index]; + if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE; + + int sampling_freq_index = (header[2] >> 2) & 0x03; + int sampling_freqs[] = {44100, 48000, 32000, 0}; + int sampling_freq = sampling_freqs[sampling_freq_index]; + if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE; + + int padding = (header[2] >> 1) & 0x01; + return (144 * bitrate * 1000) / sampling_freq + padding; +} + +// Convert MP2 packet size to rate index (copied from TEV) +static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) { + // Map packet size to rate index for MP2_RATE_TABLE + if (packet_size <= 576) return is_mono ? 0 : 0; // 128k + else if (packet_size <= 720) return 1; // 160k + else if (packet_size <= 1008) return 2; // 224k + else if (packet_size <= 1440) return 3; // 320k + else return 4; // 384k +} + +// Convert SRT time format to frame number (copied from TEV) +static int srt_time_to_frame(const char *time_str, int fps) { + int hours, minutes, seconds, milliseconds; + if (sscanf(time_str, "%d:%d:%d,%d", &hours, &minutes, &seconds, &milliseconds) != 4) { + return -1; + } + + double total_seconds = hours * 3600.0 + minutes * 60.0 + seconds + milliseconds / 1000.0; + return (int)(total_seconds * fps + 0.5); // Round to nearest frame +} + +// Convert SAMI milliseconds to frame number (copied from TEV) +static int sami_ms_to_frame(int milliseconds, int fps) { + double seconds = milliseconds / 1000.0; + return (int)(seconds * fps + 0.5); // Round to nearest frame +} + +// Parse SubRip subtitle file (copied from TEV) +static subtitle_entry_t* parse_srt_file(const char *filename, int fps) { + FILE *file = fopen(filename, "r"); + if (!file) { + fprintf(stderr, "Failed to open subtitle file: %s\n", filename); + return NULL; + } + + subtitle_entry_t *head = NULL; + subtitle_entry_t *tail = NULL; + char line[1024]; + int state = 0; // 0=index, 1=time, 2=text, 3=blank + + subtitle_entry_t *current_entry = NULL; + char *text_buffer = NULL; + size_t text_buffer_size = 0; + + while (fgets(line, sizeof(line), file)) { + // Remove trailing newline + size_t len = strlen(line); + if (len > 0 && line[len-1] == '\n') { + line[len-1] = '\0'; + len--; + } + if (len > 0 && line[len-1] == '\r') { + line[len-1] = '\0'; + len--; + } + + if (state == 0) { // Expecting subtitle index + if (strlen(line) == 0) continue; // Skip empty lines + // Create new subtitle entry + current_entry = calloc(1, sizeof(subtitle_entry_t)); + if (!current_entry) break; + state = 1; + } else if (state == 1) { // Expecting time range + char start_time[32], end_time[32]; + if (sscanf(line, "%31s --> %31s", start_time, end_time) == 2) { + current_entry->start_frame = srt_time_to_frame(start_time, fps); + current_entry->end_frame = srt_time_to_frame(end_time, fps); + + if (current_entry->start_frame < 0 || current_entry->end_frame < 0) { + free(current_entry); + current_entry = NULL; + state = 3; // Skip to next blank line + continue; + } + + // Initialize text buffer + text_buffer_size = 256; + text_buffer = malloc(text_buffer_size); + if (!text_buffer) { + free(current_entry); + current_entry = NULL; + fprintf(stderr, "Memory allocation failed while parsing subtitles\n"); + break; + } + text_buffer[0] = '\0'; + state = 2; + } else { + free(current_entry); + current_entry = NULL; + state = 3; // Skip malformed entry + } + } else if (state == 2) { // Collecting subtitle text + if (strlen(line) == 0) { + // End of subtitle text + current_entry->text = strdup(text_buffer); + free(text_buffer); + text_buffer = NULL; + + // Add to list + if (!head) { + head = current_entry; + tail = current_entry; + } else { + tail->next = current_entry; + tail = current_entry; + } + current_entry = NULL; + state = 0; + } else { + // Append text line + size_t current_len = strlen(text_buffer); + size_t line_len = strlen(line); + size_t needed = current_len + line_len + 2; // +2 for newline and null + + if (needed > text_buffer_size) { + text_buffer_size = needed + 256; + char *new_buffer = realloc(text_buffer, text_buffer_size); + if (!new_buffer) { + free(text_buffer); + free(current_entry); + current_entry = NULL; + fprintf(stderr, "Memory reallocation failed while parsing subtitles\n"); + break; + } + text_buffer = new_buffer; + } + + if (current_len > 0) { + strcat(text_buffer, "\\n"); // Use \n as newline marker in subtitle text + } + strcat(text_buffer, line); + } + } else if (state == 3) { // Skip to next blank line + if (strlen(line) == 0) { + state = 0; + } + } + } + + // Handle final subtitle if file doesn't end with blank line + if (current_entry && state == 2) { + current_entry->text = strdup(text_buffer); + if (!head) { + head = current_entry; + } else { + tail->next = current_entry; + } + free(text_buffer); + } + + fclose(file); + return head; +} + +// Parse SAMI subtitle file (simplified version from TEV) +static subtitle_entry_t* parse_smi_file(const char *filename, int fps) { + FILE *file = fopen(filename, "r"); + if (!file) { + fprintf(stderr, "Failed to open subtitle file: %s\n", filename); + return NULL; + } + + subtitle_entry_t *head = NULL; + subtitle_entry_t *tail = NULL; + char line[2048]; + + while (fgets(line, sizeof(line), file)) { + // Look for SYNC tags with Start= attribute + char *sync_pos = strstr(line, "'); + if (text_start) { + text_start++; + char *text_end = strstr(text_start, "

"); + if (text_end) { + size_t text_len = text_end - text_start; + if (text_len > 0 && text_len < MAX_SUBTITLE_LENGTH) { + subtitle_entry_t *entry = calloc(1, sizeof(subtitle_entry_t)); + if (entry) { + entry->start_frame = sami_ms_to_frame(start_ms, fps); + entry->end_frame = entry->start_frame + fps * 3; // Default 3 second duration + entry->text = strndup(text_start, text_len); + + // Add to list + if (!head) { + head = entry; + tail = entry; + } else { + tail->next = entry; + tail = entry; + } + } + } + } + } + } + } + } + } + } + + fclose(file); + return head; +} + +// Parse subtitle file based on extension (copied from TEV) +static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps) { + if (!filename) return NULL; + + size_t len = strlen(filename); + if (len > 4 && strcasecmp(filename + len - 4, ".smi") == 0) { + return parse_smi_file(filename, fps); + } else { + return parse_srt_file(filename, fps); + } +} + +// Free subtitle list (copied from TEV) +static void free_subtitle_list(subtitle_entry_t *list) { + while (list) { + subtitle_entry_t *next = list->next; + free(list->text); + free(list); + list = next; + } +} + +// Write subtitle packet (copied from TEV) +static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text) { + // Calculate packet size + size_t text_len = text ? strlen(text) : 0; + size_t packet_size = 3 + 1 + text_len + 1; // index (3 bytes) + opcode + text + null terminator + + // Write packet type and size + uint8_t packet_type = TAV_PACKET_SUBTITLE; + fwrite(&packet_type, 1, 1, output); + uint32_t size32 = (uint32_t)packet_size; + fwrite(&size32, 4, 1, output); + + // Write subtitle data + uint8_t index_bytes[3] = { + (uint8_t)(index & 0xFF), + (uint8_t)((index >> 8) & 0xFF), + (uint8_t)((index >> 16) & 0xFF) + }; + fwrite(index_bytes, 3, 1, output); + fwrite(&opcode, 1, 1, output); + + if (text && text_len > 0) { + fwrite(text, 1, text_len, output); + } + + uint8_t null_terminator = 0; + fwrite(&null_terminator, 1, 1, output); + + return 1 + 4 + packet_size; // Total bytes written +} + +// Process audio for current frame (copied and adapted from TEV) +static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) { + if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) { + return 1; + } + + // Initialize packet size on first frame + if (frame_num == 0) { + uint8_t header[4]; + if (fread(header, 1, 4, enc->mp2_file) != 4) return 1; + fseek(enc->mp2_file, 0, SEEK_SET); + enc->mp2_packet_size = get_mp2_packet_size(header); + int is_mono = (header[3] >> 6) == 3; + enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono); + enc->target_audio_buffer_size = 4; // 4 audio packets in buffer + enc->audio_frames_in_buffer = 0.0; + } + + // Calculate how much audio time each frame represents (in seconds) + double frame_audio_time = 1.0 / enc->fps; + + // Calculate how much audio time each MP2 packet represents + // MP2 frame contains 1152 samples at 32kHz = 0.036 seconds + #define MP2_SAMPLE_RATE 32000 + double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE; + + // Estimate how many packets we consume per video frame + double packets_per_frame = frame_audio_time / packet_audio_time; + + // Allocate MP2 buffer if needed + if (!enc->mp2_buffer) { + enc->mp2_buffer_size = enc->mp2_packet_size * 2; // Space for multiple packets + enc->mp2_buffer = malloc(enc->mp2_buffer_size); + if (!enc->mp2_buffer) { + fprintf(stderr, "Failed to allocate audio buffer\n"); + return 1; + } + } + + // Audio buffering strategy: maintain target buffer level + int packets_to_insert = 0; + if (frame_num == 0) { + // Prime buffer to target level initially + packets_to_insert = enc->target_audio_buffer_size; + enc->audio_frames_in_buffer = 0; // count starts from 0 + if (enc->verbose) { + printf("Frame %d: Priming audio buffer with %d packets\n", frame_num, packets_to_insert); + } + } else { + // Simulate buffer consumption (fractional consumption per frame) + double old_buffer = enc->audio_frames_in_buffer; + enc->audio_frames_in_buffer -= packets_per_frame; + + // Calculate how many packets we need to maintain target buffer level + // Only insert when buffer drops below target, and only insert enough to restore target + double target_level = (double)enc->target_audio_buffer_size; + if (enc->audio_frames_in_buffer < target_level) { + double deficit = target_level - enc->audio_frames_in_buffer; + // Insert packets to cover the deficit, but at least maintain minimum flow + packets_to_insert = (int)ceil(deficit); + // Cap at reasonable maximum to prevent excessive insertion + if (packets_to_insert > enc->target_audio_buffer_size) { + packets_to_insert = enc->target_audio_buffer_size; + } + + if (enc->verbose) { + printf("Frame %d: Buffer low (%.2f->%.2f), deficit %.2f, inserting %d packets\n", + frame_num, old_buffer, enc->audio_frames_in_buffer, deficit, packets_to_insert); + } + } else if (enc->verbose && old_buffer != enc->audio_frames_in_buffer) { + printf("Frame %d: Buffer sufficient (%.2f->%.2f), no packets\n", + frame_num, old_buffer, enc->audio_frames_in_buffer); + } + } + + // Insert the calculated number of audio packets + for (int q = 0; q < packets_to_insert; q++) { + size_t bytes_to_read = enc->mp2_packet_size; + if (bytes_to_read > enc->audio_remaining) { + bytes_to_read = enc->audio_remaining; + } + + size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file); + if (bytes_read == 0) break; + + // Write TAV MP2 audio packet + uint8_t audio_packet_type = TAV_PACKET_AUDIO_MP2; + uint32_t audio_len = (uint32_t)bytes_read; + fwrite(&audio_packet_type, 1, 1, output); + fwrite(&audio_len, 4, 1, output); + fwrite(enc->mp2_buffer, 1, bytes_read, output); + + // Track audio bytes written + enc->audio_remaining -= bytes_read; + enc->audio_frames_in_buffer++; + + if (frame_num == 0) { + enc->audio_frames_in_buffer = enc->target_audio_buffer_size / 2; // trick the buffer simulator so that it doesn't count the frame 0 priming + } + + if (enc->verbose) { + printf("Audio packet %d: %zu bytes (buffer: %.2f packets)\n", + q, bytes_read, enc->audio_frames_in_buffer); + } + } + + return 1; +} + +// Process subtitles for current frame (copied and adapted from TEV) +static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) { + if (!enc->subtitles) { + return 1; // No subtitles to process + } + + int bytes_written = 0; + + // Check if we need to show a new subtitle + if (!enc->subtitle_visible) { + subtitle_entry_t *sub = enc->current_subtitle; + if (!sub) sub = enc->subtitles; // Start from beginning if not set + + // Find next subtitle to show + while (sub && sub->start_frame <= frame_num) { + if (sub->end_frame > frame_num) { + // This subtitle should be shown + if (sub != enc->current_subtitle) { + enc->current_subtitle = sub; + enc->subtitle_visible = 1; + bytes_written += write_subtitle_packet(output, 0, 0x01, sub->text); + if (enc->verbose) { + printf("Frame %d: Showing subtitle: %.50s%s\n", + frame_num, sub->text, strlen(sub->text) > 50 ? "..." : ""); + } + } + break; + } + sub = sub->next; + } + } + + // Check if we need to hide current subtitle + if (enc->subtitle_visible && enc->current_subtitle) { + if (frame_num >= enc->current_subtitle->end_frame) { + enc->subtitle_visible = 0; + bytes_written += write_subtitle_packet(output, 0, 0x02, NULL); + if (enc->verbose) { + printf("Frame %d: Hiding subtitle\n", frame_num); + } + } + } + + return bytes_written; +} + +// Detect scene changes by analysing frame differences +static int detect_scene_change(tav_encoder_t *enc) { + if (!enc->current_frame_rgb || enc->intra_only) { + return 0; // No current frame to compare + } + + uint8_t *comparison_buffer = enc->previous_frame_rgb; + + long long total_diff = 0; + int changed_pixels = 0; + + // Sample every 4th pixel for performance (still gives good detection) + for (int y = 0; y < enc->height; y += 2) { + for (int x = 0; x < enc->width; x += 2) { + int offset = (y * enc->width + x) * 3; + + // Calculate color difference + int r_diff = abs(enc->current_frame_rgb[offset] - comparison_buffer[offset]); + int g_diff = abs(enc->current_frame_rgb[offset + 1] - comparison_buffer[offset + 1]); + int b_diff = abs(enc->current_frame_rgb[offset + 2] - comparison_buffer[offset + 2]); + + int pixel_diff = r_diff + g_diff + b_diff; + total_diff += pixel_diff; + + // Count significantly changed pixels (threshold of 30 per channel average) + if (pixel_diff > 90) { + changed_pixels++; + } + } + } + + // Calculate metrics for scene change detection + int sampled_pixels = (enc->height / 2) * (enc->width / 2); + double avg_diff = (double)total_diff / sampled_pixels; + double changed_ratio = (double)changed_pixels / sampled_pixels; + + if (enc->verbose) { + printf("Scene change detection: avg_diff=%.2f\tchanged_ratio=%.4f\n", avg_diff, changed_ratio); + } + + // Scene change thresholds - adjust for interlaced mode + // Interlaced fields have more natural differences due to temporal field separation + double threshold = 0.30; + + return changed_ratio > threshold; +} + +// Main function +int main(int argc, char *argv[]) { + generate_random_filename(TEMP_AUDIO_FILE); + + printf("Initialising encoder...\n"); + tav_encoder_t *enc = create_encoder(); + if (!enc) { + fprintf(stderr, "Error: Failed to create encoder\n"); + return 1; + } + + // Command line option parsing (similar to TEV encoder) + static struct option long_options[] = { + {"input", required_argument, 0, 'i'}, + {"output", required_argument, 0, 'o'}, + {"size", required_argument, 0, 's'}, + {"fps", required_argument, 0, 'f'}, + {"quality", required_argument, 0, 'q'}, + {"quantiser", required_argument, 0, 'Q'}, + {"quantizer", required_argument, 0, 'Q'}, +// {"wavelet", required_argument, 0, 'w'}, +// {"decomp", required_argument, 0, 'd'}, + {"bitrate", required_argument, 0, 'b'}, +// {"progressive", no_argument, 0, 'p'}, + {"subtitles", required_argument, 0, 'S'}, + {"verbose", no_argument, 0, 'v'}, + {"test", no_argument, 0, 't'}, + {"lossless", no_argument, 0, 1000}, +// {"enable-progressive", no_argument, 0, 1002}, +// {"enable-roi", no_argument, 0, 1003}, + {"intra-only", no_argument, 0, 1006}, + {"ictcp", no_argument, 0, 1005}, + {"help", no_argument, 0, 1004}, + {0, 0, 0, 0} + }; + + int c, option_index = 0; + while ((c = getopt_long(argc, argv, "i:o:s:f:q:Q:w:d:b:pS:vt", long_options, &option_index)) != -1) { + switch (c) { + case 'i': + enc->input_file = strdup(optarg); + break; + case 'o': + enc->output_file = strdup(optarg); + break; + case 'q': + enc->quality_level = CLAMP(atoi(optarg), 0, 5); + enc->quantiser_y = QUALITY_Y[enc->quality_level]; + enc->quantiser_co = QUALITY_CO[enc->quality_level]; + enc->quantiser_cg = QUALITY_CG[enc->quality_level]; + break; + case 'Q': + // Parse quantiser values Y,Co,Cg + if (sscanf(optarg, "%d,%d,%d", &enc->quantiser_y, &enc->quantiser_co, &enc->quantiser_cg) != 3) { + fprintf(stderr, "Error: Invalid quantiser format. Use Y,Co,Cg (e.g., 5,3,2)\n"); + cleanup_encoder(enc); + return 1; + } + enc->quantiser_y = CLAMP(enc->quantiser_y, 1, 100); + enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 100); + enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 100); + break; + /*case 'w': + enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1); + break;*/ + case 'f': + enc->output_fps = atoi(optarg); + enc->is_ntsc_framerate = 0; + if (enc->output_fps <= 0) { + fprintf(stderr, "Invalid FPS: %d\n", enc->output_fps); + cleanup_encoder(enc); + return 1; + } + break; + /*case 'd': + enc->decomp_levels = CLAMP(atoi(optarg), 1, MAX_DECOMP_LEVELS); + break;*/ + case 'v': + enc->verbose = 1; + break; + case 't': + enc->test_mode = 1; + break; + case 'S': + enc->subtitle_file = strdup(optarg); + break; + case 1000: // --lossless + enc->lossless = 1; + enc->wavelet_filter = WAVELET_5_3_REVERSIBLE; + break; + case 1005: // --ictcp + enc->ictcp_mode = 1; + break; + case 1006: // --intra-only + enc->intra_only = 1; + break; + case 1004: // --help + show_usage(argv[0]); + cleanup_encoder(enc); + return 0; + default: + show_usage(argv[0]); + cleanup_encoder(enc); + return 1; + } + } + + // adjust encoding parameters for ICtCp + if (enc->ictcp_mode) { + enc->quantiser_cg = enc->quantiser_co; + } + + if ((!enc->input_file && !enc->test_mode) || !enc->output_file) { + fprintf(stderr, "Error: Input and output files must be specified\n"); + show_usage(argv[0]); + cleanup_encoder(enc); + return 1; + } + + if (initialize_encoder(enc) != 0) { + fprintf(stderr, "Error: Failed to initialize encoder\n"); + cleanup_encoder(enc); + return 1; + } + + printf("TAV Encoder - DWT-based video compression\n"); + printf("Input: %s\n", enc->input_file); + printf("Output: %s\n", enc->output_file); + printf("Resolution: %dx%d\n", enc->width, enc->height); + printf("Wavelet: %s\n", enc->wavelet_filter ? "9/7 irreversible" : "5/3 reversible"); + printf("Decomposition levels: %d\n", enc->decomp_levels); + if (enc->ictcp_mode) { + printf("Quantiser: I=%d, Ct=%d, Cp=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg); + } else { + printf("Quantiser: Y=%d, Co=%d, Cg=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg); + } + printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R"); + + // Open output file + if (strcmp(enc->output_file, "-") == 0) { + enc->output_fp = stdout; + } else { + enc->output_fp = fopen(enc->output_file, "wb"); + if (!enc->output_fp) { + fprintf(stderr, "Error: Cannot open output file %s\n", enc->output_file); + cleanup_encoder(enc); + return 1; + } + } + + // Start FFmpeg process for video input (using TEV-compatible filtergraphs) + if (enc->test_mode) { + // Test mode - generate solid colour frames + enc->total_frames = 15; // Fixed 15 test frames like TEV + printf("Test mode: Generating %d solid colour frames\n", enc->total_frames); + } else { + // Normal mode - get video metadata first + printf("Retrieving video metadata...\n"); + if (!get_video_metadata(enc)) { + fprintf(stderr, "Error: Failed to get video metadata\n"); + cleanup_encoder(enc); + return 1; + } + + // Start video preprocessing pipeline + if (start_video_conversion(enc) != 1) { + fprintf(stderr, "Error: Failed to start video conversion\n"); + cleanup_encoder(enc); + return 1; + } + + // Start audio conversion if needed + if (enc->has_audio) { + printf("Starting audio conversion...\n"); + if (!start_audio_conversion(enc)) { + fprintf(stderr, "Warning: Audio conversion failed\n"); + enc->has_audio = 0; + } + } + } + + // Parse subtitles if provided + if (enc->subtitle_file) { + printf("Parsing subtitles: %s\n", enc->subtitle_file); + enc->subtitles = parse_subtitle_file(enc->subtitle_file, enc->fps); + if (!enc->subtitles) { + fprintf(stderr, "Warning: Failed to parse subtitle file\n"); + } else { + printf("Loaded subtitles successfully\n"); + } + } + + // Write TAV header + if (write_tav_header(enc) != 0) { + fprintf(stderr, "Error: Failed to write TAV header\n"); + cleanup_encoder(enc); + return 1; + } + + gettimeofday(&enc->start_time, NULL); + + if (enc->output_fps != enc->fps) { + printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps); + } + + printf("Starting encoding...\n"); + + // Main encoding loop - process frames until EOF or frame limit + int frame_count = 0; + int continue_encoding = 1; + + int count_iframe = 0; + int count_pframe = 0; + + while (continue_encoding) { + if (enc->test_mode) { + // Test mode has a fixed frame count + if (frame_count >= enc->total_frames) { + continue_encoding = 0; + break; + } + + // Generate test frame with solid colours (TEV-style) + size_t rgb_size = enc->width * enc->height * 3; + uint8_t test_r = 0, test_g = 0, test_b = 0; + const char* colour_name = "unknown"; + + switch (frame_count) { + case 0: test_r = 0; test_g = 0; test_b = 0; colour_name = "black"; break; + case 1: test_r = 127; test_g = 127; test_b = 127; colour_name = "grey"; break; + case 2: test_r = 255; test_g = 255; test_b = 255; colour_name = "white"; break; + case 3: test_r = 127; test_g = 0; test_b = 0; colour_name = "half red"; break; + case 4: test_r = 127; test_g = 127; test_b = 0; colour_name = "half yellow"; break; + case 5: test_r = 0; test_g = 127; test_b = 0; colour_name = "half green"; break; + case 6: test_r = 0; test_g = 127; test_b = 127; colour_name = "half cyan"; break; + case 7: test_r = 0; test_g = 0; test_b = 127; colour_name = "half blue"; break; + case 8: test_r = 127; test_g = 0; test_b = 127; colour_name = "half magenta"; break; + case 9: test_r = 255; test_g = 0; test_b = 0; colour_name = "red"; break; + case 10: test_r = 255; test_g = 255; test_b = 0; colour_name = "yellow"; break; + case 11: test_r = 0; test_g = 255; test_b = 0; colour_name = "green"; break; + case 12: test_r = 0; test_g = 255; test_b = 255; colour_name = "cyan"; break; + case 13: test_r = 0; test_g = 0; test_b = 255; colour_name = "blue"; break; + case 14: test_r = 255; test_g = 0; test_b = 255; colour_name = "magenta"; break; + } + + // Fill frame with test colour + for (size_t i = 0; i < rgb_size; i += 3) { + enc->current_frame_rgb[i] = test_r; + enc->current_frame_rgb[i + 1] = test_g; + enc->current_frame_rgb[i + 2] = test_b; + } + + printf("Frame %d: %s (%d,%d,%d)\n", frame_count, colour_name, test_r, test_g, test_b); + + } else { + // Real video mode - read frame from FFmpeg + // height-halving is already done on the encoder initialisation + int frame_height = enc->height; + size_t rgb_size = enc->width * frame_height * 3; + size_t bytes_read = fread(enc->current_frame_rgb, 1, rgb_size, enc->ffmpeg_video_pipe); + + if (bytes_read != rgb_size) { + if (enc->verbose) { + printf("Frame %d: Expected %zu bytes, got %zu bytes\n", frame_count, rgb_size, bytes_read); + if (feof(enc->ffmpeg_video_pipe)) { + printf("FFmpeg pipe reached end of file\n"); + } + if (ferror(enc->ffmpeg_video_pipe)) { + printf("FFmpeg pipe error occurred\n"); + } + } + continue_encoding = 0; + break; + } + + // Each frame from FFmpeg is now a single field at half height (for interlaced) + // Frame parity: even frames (0,2,4...) = bottom fields, odd frames (1,3,5...) = top fields + } + + // Determine frame type + int is_scene_change = detect_scene_change(enc); + int is_time_keyframe = (frame_count % KEYFRAME_INTERVAL) == 0; + int is_keyframe = enc->intra_only || is_time_keyframe || is_scene_change; + + // Verbose output for keyframe decisions + /*if (enc->verbose && is_keyframe) { + if (is_scene_change && !is_time_keyframe) { + printf("Frame %d: Scene change detected, inserting keyframe\n", frame_count); + } else if (is_time_keyframe) { + printf("Frame %d: Time-based keyframe (interval: %d)\n", frame_count, KEYFRAME_INTERVAL); + } + }*/ + + // Debug: check RGB input data + /*if (frame_count < 3) { + printf("Encoder Debug: Frame %d - RGB data (first 16 bytes): ", frame_count); + for (int i = 0; i < 16; i++) { + printf("%d ", enc->current_frame_rgb[i]); + } + printf("\n"); + }*/ + + // Convert RGB to colour space (YCoCg-R or ICtCp) + rgb_to_colour_space_frame(enc, enc->current_frame_rgb, + enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg, + enc->width, enc->height); + + // Debug: check YCoCg conversion result + /*if (frame_count < 3) { + printf("Encoder Debug: Frame %d - YCoCg result (first 16): ", frame_count); + for (int i = 0; i < 16; i++) { + printf("Y=%.1f Co=%.1f Cg=%.1f ", enc->current_frame_y[i], enc->current_frame_co[i], enc->current_frame_cg[i]); + if (i % 4 == 3) break; // Only show first 4 pixels for readability + } + printf("\n"); + }*/ + + // Compress and write frame packet + uint8_t packet_type = is_keyframe ? TAV_PACKET_IFRAME : TAV_PACKET_PFRAME; + size_t packet_size = compress_and_write_frame(enc, packet_type); + + if (packet_size == 0) { + fprintf(stderr, "Error: Failed to compress frame %d\n", frame_count); + break; + } + else { + // Process audio for this frame + process_audio(enc, frame_count, enc->output_fp); + + // Process subtitles for this frame + process_subtitles(enc, frame_count, enc->output_fp); + + // Write a sync packet only after a video is been coded + uint8_t sync_packet = TAV_PACKET_SYNC; + fwrite(&sync_packet, 1, 1, enc->output_fp); + + // NTSC frame duplication: emit extra sync packet for every 1000n+500 frames + if (enc->is_ntsc_framerate && (frame_count % 1000 == 500)) { + fwrite(&sync_packet, 1, 1, enc->output_fp); + printf("Frame %d: NTSC duplication - extra sync packet emitted\n", frame_count); + } + + if (is_keyframe) + count_iframe++; + else + count_pframe++; + } + + // Copy current frame to previous frame buffer + size_t float_frame_size = enc->width * enc->height * sizeof(float); + size_t rgb_frame_size = enc->width * enc->height * 3; + memcpy(enc->previous_frame_y, enc->current_frame_y, float_frame_size); + memcpy(enc->previous_frame_co, enc->current_frame_co, float_frame_size); + memcpy(enc->previous_frame_cg, enc->current_frame_cg, float_frame_size); + memcpy(enc->previous_frame_rgb, enc->current_frame_rgb, rgb_frame_size); + + frame_count++; + enc->frame_count = frame_count; + + if (enc->verbose || frame_count % 30 == 0) { + struct timeval now; + gettimeofday(&now, NULL); + double elapsed = (now.tv_sec - enc->start_time.tv_sec) + + (now.tv_usec - enc->start_time.tv_usec) / 1000000.0; + double fps = frame_count / elapsed; + printf("Encoded frame %d (%s, %.1f fps)\n", frame_count, + is_keyframe ? "I-frame" : "P-frame", fps); + } + } + + // Update actual frame count in encoder struct + enc->total_frames = frame_count; + + // Write final sync packet + uint8_t sync_packet = TAV_PACKET_SYNC; + fwrite(&sync_packet, 1, 1, enc->output_fp); + + // Update header with actual frame count (seek back to header position) + if (enc->output_fp != stdout) { + long current_pos = ftell(enc->output_fp); + fseek(enc->output_fp, 14, SEEK_SET); // Offset of total_frames field in TAV header + uint32_t actual_frames = frame_count; + fwrite(&actual_frames, sizeof(uint32_t), 1, enc->output_fp); + fseek(enc->output_fp, current_pos, SEEK_SET); // Restore position + if (enc->verbose) { + printf("Updated header with actual frame count: %d\n", frame_count); + } + } + + // Final statistics + struct timeval end_time; + gettimeofday(&end_time, NULL); + double total_time = (end_time.tv_sec - enc->start_time.tv_sec) + + (end_time.tv_usec - enc->start_time.tv_usec) / 1000000.0; + + printf("\nEncoding complete!\n"); + printf(" Frames encoded: %d\n", frame_count); + printf(" Framerate: %d\n", enc->output_fps); + printf(" Output size: %zu bytes\n", enc->total_compressed_size); + printf(" Encoding time: %.2fs (%.1f fps)\n", total_time, frame_count / total_time); + printf(" Frame statistics: I-Frame=%d, P-Frame=%d\n", count_iframe, count_pframe); + + + cleanup_encoder(enc); + return 0; +} + +// Cleanup encoder resources +static void cleanup_encoder(tav_encoder_t *enc) { + if (!enc) return; + + if (enc->ffmpeg_video_pipe) { + pclose(enc->ffmpeg_video_pipe); + } + if (enc->mp2_file) { + fclose(enc->mp2_file); + unlink(TEMP_AUDIO_FILE); + } + if (enc->output_fp) { + fclose(enc->output_fp); + } + + free(enc->input_file); + free(enc->output_file); + free(enc->subtitle_file); + free(enc->current_frame_rgb); + free(enc->previous_frame_rgb); + free(enc->current_frame_y); + free(enc->current_frame_co); + free(enc->current_frame_cg); + free(enc->previous_frame_y); + free(enc->previous_frame_co); + free(enc->previous_frame_cg); + free(enc->tiles); + free(enc->motion_vectors); + free(enc->compressed_buffer); + free(enc->mp2_buffer); + + // OPTIMIZATION: Free reusable quantisation buffers + free(enc->reusable_quantised_y); + free(enc->reusable_quantised_co); + free(enc->reusable_quantised_cg); + + // Free coefficient delta storage + free(enc->previous_coeffs_y); + free(enc->previous_coeffs_co); + free(enc->previous_coeffs_cg); + + // Free subtitle list + if (enc->subtitles) { + free_subtitle_list(enc->subtitles); + } + + if (enc->zstd_ctx) { + ZSTD_freeCCtx(enc->zstd_ctx); + } + + free(enc); +} \ No newline at end of file