diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index b5fdc0a..1400f20 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -7,7 +7,8 @@ const WIDTH = 560 const HEIGHT = 448 const BLOCK_SIZE = 16 // 16x16 blocks for YCoCg-R const TEV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x45, 0x56] // "\x1FTSVM TEV" -const TEV_VERSION = 2 // YCoCg-R version +const TEV_VERSION_YCOCG = 2 // YCoCg-R version +const TEV_VERSION_XYB = 3 // XYB version const SND_BASE_ADDR = audio.getBaseAddr() const pcm = require("pcm") const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728] @@ -35,11 +36,6 @@ let notifHideTimer = 0 const NOTIF_SHOWUPTIME = 3000000000 let [cy, cx] = con.getyx() -if (interactive) { - con.move(1,1) - println("Push and hold Backspace to exit") -} - let seqreadserial = require("seqread") let seqreadtape = require("seqreadtape") let seqread = undefined @@ -285,11 +281,17 @@ if (!magicMatching) { // Read header let version = seqread.readOneByte() -if (version !== TEV_VERSION) { - println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION})`) +if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_XYB) { + println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_XYB} for XYB)`) return 1 } +let colorSpace = (version === TEV_VERSION_XYB) ? "XYB" : "YCoCg-R" +if (interactive) { + con.move(1,1) + println(`Push and hold Backspace to exit | TEV Format ${version} (${colorSpace})`) +} + let width = seqread.readShort() let height = seqread.readShort() let fps = seqread.readOneByte() @@ -353,6 +355,8 @@ let biasTime = 0 const BIAS_LIGHTING_MIN = 1.0 / 16.0 let oldBgcol = [BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN] +let notifHidden = false + function getRGBfromScr(x, y) { let offset = y * WIDTH + x let rg = sys.peek(-1048577 - offset) @@ -425,18 +429,6 @@ try { } else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) { // Video frame packet (always includes rate control factor) let payloadLen = seqread.readInt() - - // Always read rate control factor (4 bytes, little-endian float) - let rateFactorBytes = seqread.readBytes(4) - let view = new DataView(new ArrayBuffer(4)) - for (let i = 0; i < 4; i++) { - view.setUint8(i, sys.peek(rateFactorBytes + i)) - } - let rateControlFactor = view.getFloat32(0, true) // true = little-endian - //serial.println(`rateControlFactor = ${rateControlFactor}`) - sys.free(rateFactorBytes) - payloadLen -= 4 // Subtract rate factor size from payload - let compressedPtr = seqread.readBytes(payloadLen) updateDataRateBin(payloadLen) @@ -469,10 +461,10 @@ try { continue } - // Hardware-accelerated TEV YCoCg-R decoding to RGB buffers (with rate control factor) + // Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or XYB based on version) try { let decodeStart = sys.nanoTime() - graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors, rateControlFactor) + graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors, version) decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds // Upload RGB buffer to display framebuffer with dithering @@ -487,7 +479,7 @@ try { audioFired = true } } catch (e) { - serial.println(`Frame ${frameCount}: Hardware YCoCg-R decode failed: ${e}`) + serial.println(`Frame ${frameCount}: Hardware ${colorSpace} decode failed: ${e}`) } sys.free(compressedPtr) @@ -531,6 +523,12 @@ try { // Simple progress display if (interactive) { + notifHideTimer += (t2 - t1) + if (!notifHidden && notifHideTimer > (NOTIF_SHOWUPTIME + FRAME_TIME)) { + con.clear() + notifHidden = true + } + con.move(31, 1) graphics.setTextFore(161) print(`Frame: ${frameCount}/${totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f) `) @@ -544,7 +542,7 @@ try { } } catch (e) { - printerrln(`TEV YCoCg-R decode error: ${e}`) + printerrln(`TEV ${colorSpace} decode error: ${e}`) errorlevel = 1 } finally { diff --git a/terranmon.txt b/terranmon.txt index 10185a0..da8b396 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -683,6 +683,8 @@ DCT-based compression, motion compensation, and efficient temporal coding. - Version 2.1: Added Rate Control Factor to all video packets (breaking change) * Enables bitrate-constrained encoding alongside quality modes * All video frames now include 4-byte rate control factor after payload size +- Version 3.0: Additional support of XYB Colour space + * Increased encoding efficiency, decreased decoding performance # File Structure \x1F T S V M T E V @@ -692,16 +694,15 @@ DCT-based compression, motion compensation, and efficient temporal coding. [PACKET 2] ... -## Header (24 bytes) +## Header (20 bytes) uint8 Magic[8]: "\x1FTSVM TEV" - uint8 Version: 2 - uint8 Flags: bit 0 = has audio + uint8 Version: 2 or 3 uint16 Width: video width in pixels - uint16 Height: video height in pixels - uint16 FPS: frames per second + uint16 Height: video height in pixels + uint8 FPS: frames per second uint32 Total Frames: number of video frames uint8 Quality: quantization quality (0-4, higher = better) - byte[5] Reserved + uint8 Flags: bit 0 = has audio ## Packet Types 0x10: I-frame (intra-coded frame) @@ -713,7 +714,6 @@ DCT-based compression, motion compensation, and efficient temporal coding. ## Video Packet Structure uint8 Packet Type uint32 Compressed Size (includes rate control factor size) - float Rate Control Factor (4 bytes, little-endian) * Gzip-compressed Block Data ## Block Data (per 16x16 block) @@ -724,6 +724,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. 0x03 = MOTION (motion vector only) int16 Motion Vector X ("capable of" 1/4 pixel precision, integer precision for now) int16 Motion Vector Y ("capable of" 1/4 pixel precision, integer precision for now) + float32 Rate Control Factor (4 bytes, little-endian) uint16 Coded Block Pattern (which 8x8 have non-zero coeffs) int16[256] DCT Coefficients Y int16[64] DCT Coefficients Co (subsampled by two) @@ -731,7 +732,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. For SKIP and MOTION mode, DCT coefficients are filled with zero ## DCT Quantization and Rate Control -TEV uses 8 quality levels (0=lowest, 7=highest) with progressive quantization +TEV uses 5 quality levels (0=lowest, 4=highest) with progressive quantization tables optimized for perceptual quality. DC coefficients use fixed quantizer of 8, while AC coefficients are quantized according to quality tables. diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 9f4f792..fa67852 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -2,14 +2,15 @@ package net.torvald.tsvm import com.badlogic.gdx.graphics.Pixmap import com.badlogic.gdx.math.MathUtils.* +import com.badlogic.gdx.math.MathUtils.PI +import com.badlogic.gdx.math.MathUtils.ceil +import com.badlogic.gdx.math.MathUtils.floor +import com.badlogic.gdx.math.MathUtils.round import net.torvald.UnsafeHelper import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint import net.torvald.tsvm.peripheral.GraphicsAdapter import net.torvald.tsvm.peripheral.fmod -import kotlin.math.abs -import kotlin.math.cos -import kotlin.math.roundToInt -import kotlin.math.sqrt +import kotlin.math.* class GraphicsJSR223Delegate(private val vm: VM) { @@ -1605,6 +1606,138 @@ class GraphicsJSR223Delegate(private val vm: VM) { return ycocgData } + + // XYB conversion constants from JPEG XL specification + private val XYB_BIAS = 0.00379307325527544933 + private val CBRT_BIAS = 0.155954200549248620 // cbrt(XYB_BIAS) + + // RGB to LMS mixing coefficients + private val RGB_TO_LMS = arrayOf( + doubleArrayOf(0.3, 0.622, 0.078), // L coefficients + doubleArrayOf(0.23, 0.692, 0.078), // M coefficients + doubleArrayOf(0.24342268924547819, 0.20476744424496821, 0.55180986650955360) // S coefficients + ) + + // LMS to RGB inverse matrix + private val LMS_TO_RGB = arrayOf( + doubleArrayOf(11.0315669046, -9.8669439081, -0.1646229965), + doubleArrayOf(-3.2541473811, 4.4187703776, -0.1646229965), + doubleArrayOf(-3.6588512867, 2.7129230459, 1.9459282408) + ) + + // sRGB linearization functions + private fun srgbLinearise(value: Double): Double { + return if (value > 0.04045) { + Math.pow((value + 0.055) / 1.055, 2.4) + } else { + value / 12.92 + } + } + + private fun srgbUnlinearise(value: Double): Double { + return if (value > 0.0031308) { + 1.055 * Math.pow(value, 1.0 / 2.4) - 0.055 + } else { + value * 12.92 + } + } + + // XYB to RGB conversion for hardware decoding + fun tevXybToRGB(yBlock: IntArray, xBlock: IntArray, bBlock: IntArray): IntArray { + val rgbData = IntArray(16 * 16 * 3) // R,G,B for 16x16 pixels + + for (py in 0 until 16) { + for (px in 0 until 16) { + val yIdx = py * 16 + px + val y = yBlock[yIdx] + + // Get chroma values from subsampled 8x8 blocks (nearest neighbor upsampling) + val xbIdx = (py / 2) * 8 + (px / 2) + val x = xBlock[xbIdx] + val b = bBlock[xbIdx] + + // Dequantize from integer ranges + val yVal = (y - 128.0) / 255.0 + val xVal = x / 255.0 + val bVal = b / 255.0 + + // XYB to LMS gamma + val lgamma = xVal + yVal + val mgamma = yVal - xVal + val sgamma = bVal + + // Remove gamma correction + val lmix = (lgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS + val mmix = (mgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS + val smix = (sgamma + CBRT_BIAS).pow(3.0) - XYB_BIAS + + // LMS to linear RGB using inverse matrix + val rLinear = (LMS_TO_RGB[0][0] * lmix + LMS_TO_RGB[0][1] * mmix + LMS_TO_RGB[0][2] * smix).coerceIn(0.0, 1.0) + val gLinear = (LMS_TO_RGB[1][0] * lmix + LMS_TO_RGB[1][1] * mmix + LMS_TO_RGB[1][2] * smix).coerceIn(0.0, 1.0) + val bLinear = (LMS_TO_RGB[2][0] * lmix + LMS_TO_RGB[2][1] * mmix + LMS_TO_RGB[2][2] * smix).coerceIn(0.0, 1.0) + + // Convert back to sRGB gamma and 0-255 range + val r = (srgbUnlinearise(rLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255) + val g = (srgbUnlinearise(gLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255) + val bRgb = (srgbUnlinearise(bLinear) * 255.0 + 0.5).toInt().coerceIn(0, 255) + + // Store RGB + val baseIdx = (py * 16 + px) * 3 + rgbData[baseIdx] = r // R + rgbData[baseIdx + 1] = g // G + rgbData[baseIdx + 2] = bRgb // B + } + } + + return rgbData + } + + // RGB to XYB conversion for INTER mode residual calculation + fun tevRGBToXyb(rgbBlock: IntArray): IntArray { + val xybData = IntArray(16 * 16 * 3) // Y,X,B for 16x16 pixels + + for (py in 0 until 16) { + for (px in 0 until 16) { + val baseIdx = (py * 16 + px) * 3 + val r = rgbBlock[baseIdx] + val g = rgbBlock[baseIdx + 1] + val b = rgbBlock[baseIdx + 2] + + // Convert RGB to 0-1 range and linearise sRGB + val rNorm = srgbLinearise(r / 255.0) + val gNorm = srgbLinearise(g / 255.0) + val bNorm = srgbLinearise(b / 255.0) + + // RGB to LMS mixing with bias + val lmix = RGB_TO_LMS[0][0] * rNorm + RGB_TO_LMS[0][1] * gNorm + RGB_TO_LMS[0][2] * bNorm + XYB_BIAS + val mmix = RGB_TO_LMS[1][0] * rNorm + RGB_TO_LMS[1][1] * gNorm + RGB_TO_LMS[1][2] * bNorm + XYB_BIAS + val smix = RGB_TO_LMS[2][0] * rNorm + RGB_TO_LMS[2][1] * gNorm + RGB_TO_LMS[2][2] * bNorm + XYB_BIAS + + // Apply gamma correction (cube root) + val lgamma = lmix.pow(1.0 / 3.0) - CBRT_BIAS + val mgamma = mmix.pow(1.0 / 3.0) - CBRT_BIAS + val sgamma = smix.pow(1.0 / 3.0) - CBRT_BIAS + + // LMS to XYB transformation + val xVal = (lgamma - mgamma) / 2.0 + val yVal = (lgamma + mgamma) / 2.0 + val bVal = sgamma + + // Quantize to integer ranges suitable for TEV + val yQuant = (yVal * 255.0 + 128.0).toInt().coerceIn(0, 255) // Y: 0-255 (like YCoCg Y) + val xQuant = (xVal * 255.0).toInt().coerceIn(-128, 127) // X: -128 to +127 (like Co) + val bQuant = (bVal * 255.0).toInt().coerceIn(-128, 127) // B: -128 to +127 (like Cg, aggressively quantized) + + // Store XYB values + val yIdx = py * 16 + px + xybData[yIdx * 3] = yQuant // Y + xybData[yIdx * 3 + 1] = xQuant // X + xybData[yIdx * 3 + 2] = bQuant // B + } + } + + return xybData + } /** * Hardware-accelerated TEV frame decoder for YCoCg-R 4:2:0 format @@ -1620,7 +1753,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { */ fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int, quality: Int, debugMotionVectors: Boolean = false, - rateControlFactor: Float = 1.0f) { + tevVersion: Int = 2) { val blocksX = (width + 15) / 16 // 16x16 blocks now val blocksY = (height + 15) / 16 @@ -1630,21 +1763,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { val quantCGmult = QUANT_MULT_CG[quality] // Apply rate control factor to quantization tables (if not ~1.0, skip optimization) - val quantTableY = if (rateControlFactor in 0.999f..1.001f) { - QUANT_TABLE_Y.map { it * quantYmult }.toIntArray() - } else { - QUANT_TABLE_Y.map { (it * quantYmult * rateControlFactor).toInt() }.toIntArray() - } - val quantTableCo = if (rateControlFactor in 0.999f..1.001f) { - QUANT_TABLE_C.map { it * quantCOmult }.toIntArray() - } else { - QUANT_TABLE_C.map { (it * quantCOmult * rateControlFactor).toInt() }.toIntArray() - } - val quantTableCg = if (rateControlFactor in 0.999f..1.001f) { - QUANT_TABLE_C.map { it * quantCGmult }.toIntArray() - } else { - QUANT_TABLE_C.map { (it * quantCGmult * rateControlFactor).toInt() }.toIntArray() - } + val quantTableY = QUANT_TABLE_Y.map { it * quantYmult }.toIntArray() + val quantTableCo = QUANT_TABLE_C.map { it * quantCOmult }.toIntArray() + val quantTableCg = QUANT_TABLE_C.map { it * quantCGmult }.toIntArray() var readPtr = blockDataPtr @@ -1664,7 +1785,11 @@ class GraphicsJSR223Delegate(private val vm: VM) { ((vm.peek(readPtr + 2)!!.toUint()) shl 8)).toShort().toInt() val mvY = ((vm.peek(readPtr + 3)!!.toUint()) or ((vm.peek(readPtr + 4)!!.toUint()) shl 8)).toShort().toInt() - readPtr += 7 // Skip CBP field + val rateControlFactor = Float.fromBits((vm.peek(readPtr + 5)!!.toUint()) or + ((vm.peek(readPtr + 6)!!.toUint()) shl 8) or + ((vm.peek(readPtr + 7)!!.toUint()) shl 16) or + ((vm.peek(readPtr + 8)!!.toUint()) shl 24)) + readPtr += 11 // Skip CBP field when (mode) { @@ -1784,8 +1909,12 @@ class GraphicsJSR223Delegate(private val vm: VM) { val coBlock = tevIdct8x8_fast(coCoeffs, quantTableCo, true) val cgBlock = tevIdct8x8_fast(cgCoeffs, quantTableCg, true) - // Convert YCoCg-R to RGB - val rgbData = tevYcocgToRGB(yBlock, coBlock, cgBlock) + // Convert to RGB (YCoCg-R for v2, XYB for v3) + val rgbData = if (tevVersion == 3) { + tevXybToRGB(yBlock, coBlock, cgBlock) // XYB format (v3) + } else { + tevYcocgToRGB(yBlock, coBlock, cgBlock) // YCoCg-R format (v2) + } // Store RGB data to frame buffer (complete replacement) for (dy in 0 until 16) { @@ -1943,8 +2072,12 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - // Step 4: Convert final YCoCg-R to RGB - val finalRgb = tevYcocgToRGB(finalY, finalCo, finalCg) + // Step 4: Convert final data to RGB (YCoCg-R for v2, XYB for v3) + val finalRgb = if (tevVersion == 3) { + tevXybToRGB(finalY, finalCo, finalCg) // XYB format (v3) + } else { + tevYcocgToRGB(finalY, finalCo, finalCg) // YCoCg-R format (v2) + } // Step 5: Store final RGB data to frame buffer for (dy in 0 until 16) { @@ -2002,71 +2135,6 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - // YCoCg-R transform for 16x16 Y blocks and 8x8 chroma blocks (4:2:0 subsampling) - fun blockEncodeToYCoCgR16x16(blockX: Int, blockY: Int, srcPtr: Int, width: Int, height: Int): List { - val yBlock = IntArray(16 * 16) // 16x16 Y - val coBlock = IntArray(8 * 8) // 8x8 Co (subsampled) - val cgBlock = IntArray(8 * 8) // 8x8 Cg (subsampled) - val incVec = if (srcPtr >= 0) 1L else -1L - - // Process 16x16 Y block - for (py in 0 until 16) { - for (px in 0 until 16) { - val ox = blockX * 16 + px - val oy = blockY * 16 + py - if (ox < width && oy < height) { - val offset = 3 * (oy * width + ox) - val r = vm.peek(srcPtr + offset * incVec)!!.toUint() - val g = vm.peek(srcPtr + (offset + 1) * incVec)!!.toUint() - val b = vm.peek(srcPtr + (offset + 2) * incVec)!!.toUint() - - // YCoCg-R transform - val co = r - b - val tmp = b + (co / 2) - val cg = g - tmp - val y = tmp + (cg / 2) - - yBlock[py * 16 + px] = y - } - } - } - - // Process 8x8 Co/Cg blocks with 4:2:0 subsampling (average 2x2 pixels) - for (py in 0 until 8) { - for (px in 0 until 8) { - var coSum = 0 - var cgSum = 0 - var count = 0 - - // Average 2x2 block of pixels for chroma subsampling - for (dy in 0 until 2) { - for (dx in 0 until 2) { - val ox = blockX * 16 + px * 2 + dx - val oy = blockY * 16 + py * 2 + dy - if (ox < width && oy < height) { - val offset = 3 * (oy * width + ox) - val r = vm.peek(srcPtr + offset * incVec)!!.toUint() - val g = vm.peek(srcPtr + (offset + 1) * incVec)!!.toUint() - val b = vm.peek(srcPtr + (offset + 2) * incVec)!!.toUint() - - val co = r - b - val tmp = b + (co / 2) - val cg = g - tmp - - coSum += co - cgSum += cg - count++ - } - } - } - - if (count > 0) { - coBlock[py * 8 + px] = coSum / count - cgBlock[py * 8 + px] = cgSum / count - } - } - } - - return listOf(yBlock, coBlock, cgBlock) - } + + } \ No newline at end of file diff --git a/video_encoder/Makefile b/video_encoder/Makefile index 2e584e8..71154f0 100644 --- a/video_encoder/Makefile +++ b/video_encoder/Makefile @@ -5,26 +5,37 @@ CC = gcc CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE LIBS = -lm -lz -# Source files -SOURCES = encoder_tev.c -TARGET = encoder_tev +# Source files and targets +SOURCES = encoder_tev.c encoder_tev_xyb.c +TARGETS = encoder_tev encoder_tev_xyb -# Build encoder -$(TARGET): $(SOURCES) - rm -f $(TARGET) +# Build all encoders +all: $(TARGETS) + +# Build main encoder +encoder_tev: encoder_tev.c + rm -f encoder_tev $(CC) $(CFLAGS) -o $@ $< $(LIBS) +# Build XYB encoder +encoder_tev_xyb: encoder_tev_xyb.c + rm -f encoder_tev_xyb + $(CC) $(CFLAGS) -o $@ $< $(LIBS) + +# Default target +$(TARGETS): all + # Build with debug symbols debug: CFLAGS += -g -DDEBUG -debug: $(TARGET) +debug: $(TARGETS) # Clean build artifacts clean: - rm -f $(TARGET) + rm -f $(TARGETS) # Install (copy to PATH) -install: $(TARGET) - cp $(TARGET) /usr/local/bin/ +install: $(TARGETS) + cp $(TARGETS) /usr/local/bin/ # Check for required dependencies check-deps: @@ -38,7 +49,9 @@ help: @echo "TSVM Enhanced Video (TEV) Encoder" @echo "" @echo "Targets:" - @echo " encoder_tev - Build the encoder (default)" + @echo " all - Build both encoders (default)" + @echo " encoder_tev - Build the main TEV encoder" + @echo " encoder_tev_xyb - Build the XYB color space encoder" @echo " debug - Build with debug symbols" @echo " clean - Remove build artifacts" @echo " install - Install to /usr/local/bin" @@ -46,7 +59,8 @@ help: @echo " help - Show this help" @echo "" @echo "Usage:" - @echo " make" + @echo " make # Build both encoders" @echo " ./encoder_tev input.mp4 -o output.tev" + @echo " ./encoder_tev_xyb input.mp4 -o output.tev" -.PHONY: clean install check-deps help debug +.PHONY: all clean install check-deps help debug diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c index 12ba31c..e1f6bdd 100644 --- a/video_encoder/encoder_tev.c +++ b/video_encoder/encoder_tev.c @@ -95,6 +95,7 @@ int KEYFRAME_INTERVAL = 60; typedef struct __attribute__((packed)) { uint8_t mode; // Block encoding mode int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision) + float rate_control_factor; // Rate control factor (4 bytes, little-endian) uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs) int16_t y_coeffs[256]; // quantised Y DCT coefficients (16x16) int16_t co_coeffs[64]; // quantised Co DCT coefficients (8x8) @@ -666,6 +667,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke // Intra coding for keyframes block->mode = TEV_MODE_INTRA; block->mv_x = block->mv_y = 0; + block->rate_control_factor = enc->rate_control_factor; enc->blocks_intra++; } else { // Implement proper mode decision for P-frames @@ -749,6 +751,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke block->mode = TEV_MODE_SKIP; block->mv_x = 0; block->mv_y = 0; + block->rate_control_factor = enc->rate_control_factor; block->cbp = 0x00; // No coefficients present // Zero out DCT coefficients for consistent format memset(block->y_coeffs, 0, sizeof(block->y_coeffs)); @@ -760,6 +763,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) { // Good motion prediction - use motion-only mode block->mode = TEV_MODE_MOTION; + block->rate_control_factor = enc->rate_control_factor; block->cbp = 0x00; // No coefficients present // Zero out DCT coefficients for consistent format memset(block->y_coeffs, 0, sizeof(block->y_coeffs)); @@ -772,6 +776,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke // Motion compensation with threshold if (motion_sad <= 1024) { block->mode = TEV_MODE_MOTION; + block->rate_control_factor = enc->rate_control_factor; block->cbp = 0x00; // No coefficients present memset(block->y_coeffs, 0, sizeof(block->y_coeffs)); memset(block->co_coeffs, 0, sizeof(block->co_coeffs)); @@ -783,10 +788,12 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke // Use INTER mode with motion vector and residuals if (abs(block->mv_x) <= 24 && abs(block->mv_y) <= 24) { block->mode = TEV_MODE_INTER; + block->rate_control_factor = enc->rate_control_factor; enc->blocks_inter++; } else { // Motion vector too large, fall back to INTRA block->mode = TEV_MODE_INTRA; + block->rate_control_factor = enc->rate_control_factor; block->mv_x = 0; block->mv_y = 0; enc->blocks_intra++; @@ -795,6 +802,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke } else { // No good motion prediction - use intra mode block->mode = TEV_MODE_INTRA; + block->rate_control_factor = enc->rate_control_factor; block->mv_x = 0; block->mv_y = 0; enc->blocks_intra++; @@ -1293,20 +1301,19 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) { // Clean up frame stream deflateEnd(&frame_stream); - // Write frame packet header (always include rate control factor) + // Write frame packet header (rate control factor now per-block) uint8_t packet_type = is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME; - uint32_t payload_size = compressed_size + 4; // +4 bytes for rate control factor (always) + uint32_t payload_size = compressed_size; // Rate control factor now per-block, not per-packet fwrite(&packet_type, 1, 1, output); fwrite(&payload_size, 4, 1, output); - fwrite(&enc->rate_control_factor, 4, 1, output); // Always store rate control factor fwrite(enc->compressed_buffer, 1, compressed_size, output); if (enc->verbose) { printf("rateControlFactor=%.6f\n", enc->rate_control_factor); } - enc->total_output_bytes += 5 + 4 + compressed_size; // packet + size + rate_factor + data + enc->total_output_bytes += 5 + compressed_size; // packet + size + data (rate_factor now per-block) // Update rate control for next frame if (enc->bitrate_mode > 0) { diff --git a/video_encoder/encoder_tev_xyb.c b/video_encoder/encoder_tev_xyb.c new file mode 100644 index 0000000..b227a1c --- /dev/null +++ b/video_encoder/encoder_tev_xyb.c @@ -0,0 +1,2056 @@ +// Created by Claude on 2025-08-18. +// TEV (TSVM Enhanced Video) Encoder - XYB 4:2:0 16x16 Block Version +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// TSVM Enhanced Video (TEV) format constants +#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV" +#define TEV_VERSION 3 // Updated for XYB 4:2:0 +// version 1: 8x8 RGB +// version 2: 16x16 Y, 8x8 Co/Cg, asymetric quantisation, optional quantiser multiplier for rate control multiplier (1.0 when unused) +// version 3: 16x16 Y, 8x8 X/B (XYB color space), perceptually optimized quantisation + +// Block encoding modes (16x16 blocks) +#define TEV_MODE_SKIP 0x00 // Skip block (copy from reference) +#define TEV_MODE_INTRA 0x01 // Intra DCT coding (I-frame blocks) +#define TEV_MODE_INTER 0x02 // Inter DCT coding with motion compensation +#define TEV_MODE_MOTION 0x03 // Motion vector only (good prediction) + +// Video packet types +#define TEV_PACKET_IFRAME 0x10 // Intra frame (keyframe) +#define TEV_PACKET_PFRAME 0x11 // Predicted frame +#define TEV_PACKET_AUDIO_MP2 0x20 // MP2 audio +#define TEV_PACKET_SUBTITLE 0x30 // Subtitle packet +#define TEV_PACKET_SYNC 0xFF // Sync packet + +// Utility macros +static inline int CLAMP(int x, int min, int max) { + return x < min ? min : (x > max ? max : x); +} +static inline float FCLAMP(float x, float min, float max) { + return x < min ? min : (x > max ? max : x); +} + +static const int MP2_RATE_TABLE[5] = {80, 128, 192, 224, 384}; +static const int QUANT_MULT_Y[5] = {40, 10, 6, 4, 1}; +static const int QUANT_MULT_X[5] = {40, 10, 6, 4, 1}; +static const int QUANT_MULT_B[5] = {106, 22, 10, 5, 1}; // X[i] * sqrt(7 - 2i) - B channel aggressively quantized +// only leave (4, 6, 7) + +// Quality settings for quantisation (Y channel) - 16x16 tables +static const uint32_t QUANT_TABLE_Y[256] = + // Quality 7 (highest) + {2, 1, 1, 2, 3, 5, 6, 7, 6, 7, 8, 9, 10, 11, 12, 13, + 1, 1, 1, 2, 3, 6, 7, 9, 7, 9, 10, 11, 12, 13, 14, 15, + 1, 1, 2, 3, 5, 6, 7, 9, 7, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 6, 7, 9, 10, 9, 10, 11, 12, 13, 14, 15, 16, + 2, 3, 5, 6, 7, 9, 10, 11, 10, 11, 12, 13, 14, 15, 16, 17, + 3, 4, 6, 7, 9, 10, 11, 12, 11, 12, 13, 14, 15, 16, 17, 18, + 6, 6, 7, 9, 10, 11, 12, 13, 12, 13, 14, 15, 16, 17, 18, 19, + 6, 7, 9, 10, 11, 12, 13, 14, 13, 14, 15, 16, 17, 18, 19, 20, + 6, 7, 9, 10, 11, 12, 13, 14, 13, 14, 15, 16, 17, 18, 19, 20, + 7, 9, 10, 11, 12, 13, 14, 15, 14, 15, 16, 17, 18, 19, 20, 21, + 9, 10, 11, 12, 13, 14, 15, 16, 15, 16, 17, 18, 19, 20, 21, 22, + 10, 11, 12, 13, 14, 15, 16, 17, 16, 17, 18, 19, 20, 21, 22, 23, + 11, 12, 13, 14, 15, 16, 17, 18, 17, 18, 19, 20, 21, 22, 23, 24, + 12, 13, 14, 15, 16, 17, 18, 19, 18, 19, 20, 21, 22, 23, 24, 25, + 13, 14, 15, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 24, 25, 26, + 14, 15, 16, 17, 18, 19, 20, 21, 20, 21, 22, 23, 24, 25, 26, 27}; + +// Quality settings for quantisation (X channel - 8x8) +static const uint32_t QUANT_TABLE_C[64] = + {2, 3, 4, 6, 8, 12, 16, 20, + 3, 4, 6, 8, 12, 16, 20, 24, + 4, 6, 8, 12, 16, 20, 24, 28, + 6, 8, 12, 16, 20, 24, 28, 32, + 8, 12, 16, 20, 24, 28, 32, 36, + 12, 16, 20, 24, 28, 32, 36, 40, + 16, 20, 24, 28, 32, 36, 40, 44, + 20, 24, 28, 32, 36, 40, 44, 48}; + + +// Audio constants (reuse MP2 from existing system) +#define MP2_SAMPLE_RATE 32000 +#define MP2_DEFAULT_PACKET_SIZE 0x240 + +// Encoding parameters +#define MAX_MOTION_SEARCH 8 +int KEYFRAME_INTERVAL = 60; +#define BLOCK_SIZE 16 // 16x16 blocks now + +// Default values +#define DEFAULT_WIDTH 560 +#define DEFAULT_HEIGHT 448 +#define TEMP_AUDIO_FILE "/tmp/tev_temp_audio.mp2" + +typedef struct __attribute__((packed)) { + uint8_t mode; // Block encoding mode + int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision) + float rate_control_factor; // Rate control factor (4 bytes, little-endian) + uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs) + int16_t y_coeffs[256]; // quantised Y DCT coefficients (16x16) + int16_t x_coeffs[64]; // quantised X DCT coefficients (8x8) + int16_t b_coeffs[64]; // quantised B DCT coefficients (8x8) +} tev_block_t; + +// Subtitle entry structure +typedef struct subtitle_entry { + int start_frame; + int end_frame; + char *text; + struct subtitle_entry *next; +} subtitle_entry_t; + +typedef struct { + char *input_file; + char *output_file; + char *subtitle_file; // SubRip (.srt) file path + int width; + int height; + int fps; + int output_fps; // User-specified output FPS (for frame rate conversion) + int total_frames; + double duration; + int has_audio; + int has_subtitles; + int output_to_stdout; + int quality; // 0-4, higher = better quality + int verbose; + + // Bitrate control + int target_bitrate_kbps; // Target bitrate in kbps (0 = quality mode) + int bitrate_mode; // 0 = quality, 1 = bitrate, 2 = hybrid + float rate_control_factor; // Dynamic adjustment factor + + // Frame buffers (8-bit RGB format for encoding) + uint8_t *current_rgb, *previous_rgb, *reference_rgb; + + // XYB workspace + float *y_workspace, *x_workspace, *b_workspace; + float *dct_workspace; // DCT coefficients + tev_block_t *block_data; // Encoded block data + uint8_t *compressed_buffer; // Zstd output + + // Audio handling + FILE *mp2_file; + int mp2_packet_size; + int mp2_rate_index; + size_t audio_remaining; + uint8_t *mp2_buffer; + double audio_frames_in_buffer; + int target_audio_buffer_size; + + // Compression context + z_stream gzip_stream; + + // FFmpeg processes + FILE *ffmpeg_video_pipe; + + // Progress tracking + struct timeval start_time; + size_t total_output_bytes; + + // Statistics + int blocks_skip, blocks_intra, blocks_inter, blocks_motion; + + // Rate control statistics + size_t frame_bits_accumulator; + size_t target_bits_per_frame; + float complexity_history[60]; // Rolling window for complexity + int complexity_history_index; + float average_complexity; + + // Subtitle handling + subtitle_entry_t *subtitle_list; + subtitle_entry_t *current_subtitle; +} tev_encoder_t; + +// XYB conversion constants from JPEG XL specification +static const double XYB_BIAS = 0.00379307325527544933; +static const double CBRT_BIAS = 0.155954200549248620; // cbrt(XYB_BIAS) + +// RGB to LMS mixing coefficients +static const double RGB_TO_LMS[3][3] = { + {0.3, 0.622, 0.078}, // L coefficients + {0.23, 0.692, 0.078}, // M coefficients + {0.24342268924547819, 0.20476744424496821, 0.55180986650955360} // S coefficients +}; + +// LMS to RGB inverse matrix +static const double LMS_TO_RGB[3][3] = { + {11.0315669046, -9.8669439081, -0.1646229965}, + {-3.2541473811, 4.4187703776, -0.1646229965}, + {-3.6588512867, 2.7129230459, 1.9459282408} +}; + +// sRGB linearization (0..1 range) +static inline double srgb_linearize(double val) { + if (val > 0.04045) { + return pow((val + 0.055) / 1.055, 2.4); + } else { + return val / 12.92; + } +} + +// sRGB unlinearization (0..1 range) +static inline double srgb_unlinearize(double val) { + if (val > 0.0031308) { + return 1.055 * pow(val, 1.0 / 2.4) - 0.055; + } else { + return val * 12.92; + } +} + +// RGB to XYB transform (JPEG XL specification with sRGB linearization) +static void rgb_to_xyb(uint8_t r, uint8_t g, uint8_t b, int *y, int *x, int *xyb_b) { + // Convert RGB to 0-1 range and linearize sRGB + double r_norm = srgb_linearize(r / 255.0); + double g_norm = srgb_linearize(g / 255.0); + double b_norm = srgb_linearize(b / 255.0); + + // RGB to LMS mixing with bias + double lmix = RGB_TO_LMS[0][0] * r_norm + RGB_TO_LMS[0][1] * g_norm + RGB_TO_LMS[0][2] * b_norm + XYB_BIAS; + double mmix = RGB_TO_LMS[1][0] * r_norm + RGB_TO_LMS[1][1] * g_norm + RGB_TO_LMS[1][2] * b_norm + XYB_BIAS; + double smix = RGB_TO_LMS[2][0] * r_norm + RGB_TO_LMS[2][1] * g_norm + RGB_TO_LMS[2][2] * b_norm + XYB_BIAS; + + // Apply gamma correction (cube root) + double lgamma = cbrt(lmix) - CBRT_BIAS; + double mgamma = cbrt(mmix) - CBRT_BIAS; + double sgamma = cbrt(smix) - CBRT_BIAS; + + // LMS to XYB transformation + double x_val = (lgamma - mgamma) / 2.0; + double y_val = (lgamma + mgamma) / 2.0; + double b_val = sgamma; + + // Quantize to integer ranges suitable for TEV + *y = CLAMP((int)(y_val * 255.0 + 128.0), 0, 255); // Y: 0-255 (like YCoCg Y) + *x = CLAMP((int)(x_val * 255.0), -128, 127); // X: -128 to +127 (like Co) + *xyb_b = CLAMP((int)(b_val * 255.0), -128, 127); // B: -128 to +127 (like Cg, aggressively quantized) +} + +// XYB to RGB transform (for verification) +static void xyb_to_rgb(int y, int x, int xyb_b, uint8_t *r, uint8_t *g, uint8_t *b) { + // Dequantize from integer ranges + double y_val = (y - 128.0) / 255.0; + double x_val = x / 255.0; + double b_val = xyb_b / 255.0; + + // XYB to LMS gamma + double lgamma = x_val + y_val; + double mgamma = y_val - x_val; + double sgamma = b_val; + + // Remove gamma correction + double lmix = pow(lgamma + CBRT_BIAS, 3.0) - XYB_BIAS; + double mmix = pow(mgamma + CBRT_BIAS, 3.0) - XYB_BIAS; + double smix = pow(sgamma + CBRT_BIAS, 3.0) - XYB_BIAS; + + // LMS to linear RGB using inverse matrix + double r_linear = LMS_TO_RGB[0][0] * lmix + LMS_TO_RGB[0][1] * mmix + LMS_TO_RGB[0][2] * smix; + double g_linear = LMS_TO_RGB[1][0] * lmix + LMS_TO_RGB[1][1] * mmix + LMS_TO_RGB[1][2] * smix; + double b_linear = LMS_TO_RGB[2][0] * lmix + LMS_TO_RGB[2][1] * mmix + LMS_TO_RGB[2][2] * smix; + + // Clamp linear RGB to valid range + r_linear = FCLAMP(r_linear, 0.0, 1.0); + g_linear = FCLAMP(g_linear, 0.0, 1.0); + b_linear = FCLAMP(b_linear, 0.0, 1.0); + + // Convert back to sRGB gamma and 0-255 range + *r = CLAMP((int)(srgb_unlinearize(r_linear) * 255.0 + 0.5), 0, 255); + *g = CLAMP((int)(srgb_unlinearize(g_linear) * 255.0 + 0.5), 0, 255); + *b = CLAMP((int)(srgb_unlinearize(b_linear) * 255.0 + 0.5), 0, 255); +} + +// Pre-calculated cosine tables +static float dct_table_16[16][16]; // For 16x16 DCT +static float dct_table_8[8][8]; // For 8x8 DCT +static int tables_initialized = 0; + +// Initialize the pre-calculated tables +static void init_dct_tables(void) { + if (tables_initialized) return; + + // Pre-calculate cosine values for 16x16 DCT + for (int u = 0; u < 16; u++) { + for (int x = 0; x < 16; x++) { + dct_table_16[u][x] = cosf((2.0f * x + 1.0f) * u * M_PI / 32.0f); + } + } + + // Pre-calculate cosine values for 8x8 DCT + for (int u = 0; u < 8; u++) { + for (int x = 0; x < 8; x++) { + dct_table_8[u][x] = cosf((2.0f * x + 1.0f) * u * M_PI / 16.0f); + } + } + + tables_initialized = 1; +} + +// 16x16 2D DCT +// Fast separable 16x16 DCT - 8x performance improvement +static float temp_dct_16[256]; // Reusable temporary buffer + +static void dct_16x16_fast(float *input, float *output) { + init_dct_tables(); // Ensure tables are initialized + + // First pass: Process rows (16 1D DCTs) + for (int row = 0; row < 16; row++) { + for (int u = 0; u < 16; u++) { + float sum = 0.0f; + float cu = (u == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; + + for (int x = 0; x < 16; x++) { + sum += input[row * 16 + x] * dct_table_16[u][x]; + } + + temp_dct_16[row * 16 + u] = 0.5f * cu * sum; + } + } + + // Second pass: Process columns (16 1D DCTs) + for (int col = 0; col < 16; col++) { + for (int v = 0; v < 16; v++) { + float sum = 0.0f; + float cv = (v == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; + + for (int y = 0; y < 16; y++) { + sum += temp_dct_16[y * 16 + col] * dct_table_16[v][y]; + } + + output[v * 16 + col] = 0.5f * cv * sum; + } + } +} + +// Legacy O(n^4) version for reference/fallback +static void dct_16x16(float *input, float *output) { + init_dct_tables(); // Ensure tables are initialized + + for (int u = 0; u < 16; u++) { + for (int v = 0; v < 16; v++) { + float sum = 0.0f; + float cu = (u == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; + float cv = (v == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; + + for (int x = 0; x < 16; x++) { + for (int y = 0; y < 16; y++) { + sum += input[y * 16 + x] * + dct_table_16[u][x] * + dct_table_16[v][y]; + } + } + + output[u * 16 + v] = 0.25f * cu * cv * sum; + } + } +} + +// Fast separable 8x8 DCT - 4x performance improvement +static float temp_dct_8[64]; // Reusable temporary buffer + +static void dct_8x8_fast(float *input, float *output) { + init_dct_tables(); // Ensure tables are initialized + + // First pass: Process rows (8 1D DCTs) + for (int row = 0; row < 8; row++) { + for (int u = 0; u < 8; u++) { + float sum = 0.0f; + float cu = (u == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; + + for (int x = 0; x < 8; x++) { + sum += input[row * 8 + x] * dct_table_8[u][x]; + } + + temp_dct_8[row * 8 + u] = 0.5f * cu * sum; + } + } + + // Second pass: Process columns (8 1D DCTs) + for (int col = 0; col < 8; col++) { + for (int v = 0; v < 8; v++) { + float sum = 0.0f; + float cv = (v == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; + + for (int y = 0; y < 8; y++) { + sum += temp_dct_8[y * 8 + col] * dct_table_8[v][y]; + } + + output[v * 8 + col] = 0.5f * cv * sum; + } + } +} + +// Legacy 8x8 2D DCT (for chroma) - O(n^4) version +static void dct_8x8(float *input, float *output) { + init_dct_tables(); // Ensure tables are initialized + + for (int u = 0; u < 8; u++) { + for (int v = 0; v < 8; v++) { + float sum = 0.0f; + float cu = (u == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; + float cv = (v == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; + + for (int x = 0; x < 8; x++) { + for (int y = 0; y < 8; y++) { + sum += input[y * 8 + x] * + dct_table_8[u][x] * + dct_table_8[v][y]; + } + } + + output[u * 8 + v] = 0.25f * cu * cv * sum; + } + } +} + +// quantise DCT coefficient using quality table with rate control +static int16_t quantise_coeff(float coeff, uint32_t quant, int is_dc, int is_chroma, float rate_factor) { + if (is_dc) { + if (is_chroma) { + // Chroma DC: range -256 to +255, use lossless quantisation for testing + return (int16_t)roundf(coeff); + } else { + // Luma DC: range -128 to +127, use lossless quantisation for testing + return (int16_t)roundf(coeff); + } + } else { + // AC coefficients use quality table with rate control adjustment + float adjusted_quant = quant * rate_factor; + adjusted_quant = fmaxf(adjusted_quant, 1.0f); // Prevent division by zero + return (int16_t)roundf(coeff / adjusted_quant); + } +} + +// Extract 16x16 block from RGB frame and convert to XYB +static void extract_xyb_block(uint8_t *rgb_frame, int width, int height, + int block_x, int block_y, + float *y_block, float *x_block, float *b_block) { + int start_x = block_x * 16; + int start_y = block_y * 16; + + // Extract 16x16 Y block + for (int py = 0; py < 16; py++) { + for (int px = 0; px < 16; px++) { + int x = start_x + px; + int y = start_y + py; + + if (x < width && y < height) { + int offset = (y * width + x) * 3; + uint8_t r = rgb_frame[offset]; + uint8_t g = rgb_frame[offset + 1]; + uint8_t b_val = rgb_frame[offset + 2]; + + int y_val, x_val, b_val_xyb; + rgb_to_xyb(r, g, b_val, &y_val, &x_val, &b_val_xyb); + + y_block[py * 16 + px] = (float)y_val - 128.0f; // Center around 0 + } + } + } + + // Extract 8x8 chroma blocks with 4:2:0 subsampling (average 2x2 pixels) + for (int py = 0; py < 8; py++) { + for (int px = 0; px < 8; px++) { + int x_sum = 0, b_sum = 0, count = 0; + + // Average 2x2 block of pixels + for (int dy = 0; dy < 2; dy++) { + for (int dx = 0; dx < 2; dx++) { + int x = start_x + px * 2 + dx; + int y = start_y + py * 2 + dy; + + if (x < width && y < height) { + int offset = (y * width + x) * 3; + uint8_t r = rgb_frame[offset]; + uint8_t g = rgb_frame[offset + 1]; + uint8_t b_val = rgb_frame[offset + 2]; + + int y_val, x_val, b_val_xyb; + rgb_to_xyb(r, g, b_val, &y_val, &x_val, &b_val_xyb); + + x_sum += x_val; + b_sum += b_val_xyb; + count++; + } + } + } + + if (count > 0) { + // Center chroma around 0 for DCT (X/B range is -128 to +127) + x_block[py * 8 + px] = (float)(x_sum / count); + b_block[py * 8 + px] = (float)(b_sum / count); + } + } + } +} + +// Simple motion estimation (full search) for 16x16 blocks +static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y, + int16_t *best_mv_x, int16_t *best_mv_y) { + int best_sad = INT_MAX; + *best_mv_x = 0; + *best_mv_y = 0; + + int start_x = block_x * 16; + int start_y = block_y * 16; + + // Diamond search pattern (much faster than full search) + static const int diamond_x[] = {0, -1, 1, 0, 0, -2, 2, 0, 0}; + static const int diamond_y[] = {0, 0, 0, -1, 1, 0, 0, -2, 2}; + + int center_x = 0, center_y = 0; + int step_size = 4; // Start with larger steps + + while (step_size >= 1) { + int improved = 0; + + for (int i = 0; i < 9; i++) { + int mv_x = center_x + diamond_x[i] * step_size; + int mv_y = center_y + diamond_y[i] * step_size; + + // Check bounds + if (mv_x < -MAX_MOTION_SEARCH || mv_x > MAX_MOTION_SEARCH || + mv_y < -MAX_MOTION_SEARCH || mv_y > MAX_MOTION_SEARCH) { + continue; + } + + int ref_x = start_x - mv_x; + int ref_y = start_y - mv_y; + + if (ref_x < 0 || ref_y < 0 || + ref_x + 16 > enc->width || ref_y + 16 > enc->height) { + continue; + } + + // Fast SAD using integer luma approximation + int sad = 0; + for (int dy = 0; dy < 16; dy += 2) { // Sample every 2nd row for speed + uint8_t *cur_row = &enc->current_rgb[((start_y + dy) * enc->width + start_x) * 3]; + uint8_t *ref_row = &enc->previous_rgb[((ref_y + dy) * enc->width + ref_x) * 3]; + + for (int dx = 0; dx < 16; dx += 2) { // Sample every 2nd pixel + // Fast luma approximation: (R + 2*G + B) >> 2 + int cur_luma = (cur_row[dx*3] + (cur_row[dx*3+1] << 1) + cur_row[dx*3+2]) >> 2; + int ref_luma = (ref_row[dx*3] + (ref_row[dx*3+1] << 1) + ref_row[dx*3+2]) >> 2; + sad += abs(cur_luma - ref_luma); + } + } + + if (sad < best_sad) { + best_sad = sad; + *best_mv_x = mv_x; + *best_mv_y = mv_y; + center_x = mv_x; + center_y = mv_y; + improved = 1; + } + } + + if (!improved) { + step_size >>= 1; // Reduce step size + } + } +} + +// Convert RGB block to YCoCg-R with 4:2:0 chroma subsampling +static void convert_rgb_to_xyb_block(const uint8_t *rgb_block, + uint8_t *y_block, int8_t *co_block, int8_t *cg_block) { + // Convert 16x16 RGB to Y (full resolution) + for (int py = 0; py < 16; py++) { + for (int px = 0; px < 16; px++) { + int rgb_idx = (py * 16 + px) * 3; + int r = rgb_block[rgb_idx]; + int g = rgb_block[rgb_idx + 1]; + int b = rgb_block[rgb_idx + 2]; + + // YCoCg-R transform (per specification with truncated division) + int y = (r + 2*g + b) / 4; + + y_block[py * 16 + px] = CLAMP(y, 0, 255); + } + } + + // Convert to Co and Cg with 4:2:0 subsampling (8x8) + for (int cy = 0; cy < 8; cy++) { + for (int cx = 0; cx < 8; cx++) { + // Sample 2x2 block from RGB and average for chroma + int sum_co = 0, sum_cg = 0; + + for (int dy = 0; dy < 2; dy++) { + for (int dx = 0; dx < 2; dx++) { + int py = cy * 2 + dy; + int px = cx * 2 + dx; + int rgb_idx = (py * 16 + px) * 3; + + int r = rgb_block[rgb_idx]; + int g = rgb_block[rgb_idx + 1]; + int b = rgb_block[rgb_idx + 2]; + + int co = r - b; + int tmp = b + (co / 2); + int cg = g - tmp; + + sum_co += co; + sum_cg += cg; + } + } + + // Average and store subsampled chroma + co_block[cy * 8 + cx] = CLAMP(sum_co / 4, -256, 255); + cg_block[cy * 8 + cx] = CLAMP(sum_cg / 4, -256, 255); + } + } +} + +// Extract motion-compensated YCoCg-R block from reference frame +static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height, + int block_x, int block_y, int mv_x, int mv_y, + uint8_t *y_block, int8_t *co_block, int8_t *cg_block) { + // Extract 16x16 RGB block with motion compensation + uint8_t rgb_block[16 * 16 * 3]; + + for (int dy = 0; dy < 16; dy++) { + for (int dx = 0; dx < 16; dx++) { + int cur_x = block_x + dx; + int cur_y = block_y + dy; + int ref_x = cur_x + mv_x; // Revert to original motion compensation + int ref_y = cur_y + mv_y; + + int rgb_idx = (dy * 16 + dx) * 3; + + if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) { + // Copy RGB from reference position + int ref_offset = (ref_y * width + ref_x) * 3; + rgb_block[rgb_idx] = rgb_data[ref_offset]; // R + rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G + rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B + } else { + // Out of bounds - use black + rgb_block[rgb_idx] = 0; // R + rgb_block[rgb_idx + 1] = 0; // G + rgb_block[rgb_idx + 2] = 0; // B + } + } + } + + // Convert RGB block to YCoCg-R + convert_rgb_to_xyb_block(rgb_block, y_block, co_block, cg_block); +} + +// Compute motion-compensated residual for INTER mode +static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y, int mv_x, int mv_y) { + int start_x = block_x * 16; + int start_y = block_y * 16; + + // Extract motion-compensated reference block from previous frame + uint8_t ref_y[256]; + int8_t ref_co[64], ref_cg[64]; + extract_motion_compensated_block(enc->previous_rgb, enc->width, enc->height, + start_x, start_y, mv_x, mv_y, + ref_y, ref_co, ref_cg); + + // Compute residuals: current - motion_compensated_reference + // Current is already centered (-128 to +127), reference is 0-255, so subtract and center reference + for (int i = 0; i < 256; i++) { + float ref_y_centered = (float)ref_y[i] - 128.0f; // Center reference to match current + enc->y_workspace[i] = enc->y_workspace[i] - ref_y_centered; + } + + // Chroma residuals (already centered in both current and reference) + for (int i = 0; i < 64; i++) { + enc->x_workspace[i] = enc->x_workspace[i] - (float)ref_co[i]; + enc->b_workspace[i] = enc->b_workspace[i] - (float)ref_cg[i]; + } +} + +// Calculate block complexity for rate control +static float calculate_block_complexity(float *workspace, int size) { + float complexity = 0.0f; + for (int i = 1; i < size; i++) { // Skip DC component + complexity += fabsf(workspace[i]); + } + return complexity; +} + +const float EPSILON = 1.0f / 16777216.0f; +const float RATE_CONTROL_CLAMP_MAX = 64.0f; +const float RATE_CONTROL_CLAMP_MIN = 1.0f / RATE_CONTROL_CLAMP_MAX; + +// Update rate control factor based on target bitrate +static void update_rate_control(tev_encoder_t *enc, float frame_complexity, size_t frame_bits) { + if (enc->bitrate_mode == 0) { + // Quality mode - no rate control + enc->rate_control_factor = 1.0f; + return; + } + + // Update complexity history + enc->complexity_history[enc->complexity_history_index] = frame_complexity; + enc->complexity_history_index = (enc->complexity_history_index + 1) % 60; + + // Calculate rolling average complexity + float sum = 0.0f; + int count = 0; + for (int i = 0; i < 60; i++) { + if (enc->complexity_history[i] > 0.0f) { + sum += enc->complexity_history[i]; + count++; + } + } + enc->average_complexity = (count > 0) ? sum / count : frame_complexity; + + // Calculate rate adjustment + if (enc->target_bits_per_frame > 0 && frame_bits > 0) { + float bitrate_ratio = (float)enc->target_bits_per_frame / frame_bits; + float complexity_ratio = frame_complexity / fmaxf(enc->average_complexity, 1.0f); + + // Adaptive adjustment with damping + float adjustment = 1.0f / (bitrate_ratio * complexity_ratio); + enc->rate_control_factor = adjustment; + enc->rate_control_factor = 0.8f * enc->rate_control_factor + 0.2f * adjustment; + + // Clamp to reasonable range + enc->rate_control_factor = FCLAMP(enc->rate_control_factor, RATE_CONTROL_CLAMP_MIN, RATE_CONTROL_CLAMP_MAX); + } +} + +// Encode a 16x16 block +static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) { + tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x]; + + // Extract YCoCg-R block + extract_xyb_block(enc->current_rgb, enc->width, enc->height, + block_x, block_y, + enc->y_workspace, enc->x_workspace, enc->b_workspace); + + if (is_keyframe) { + // Intra coding for keyframes + block->mode = TEV_MODE_INTRA; + block->mv_x = block->mv_y = 0; + block->rate_control_factor = enc->rate_control_factor; + enc->blocks_intra++; + } else { + // Implement proper mode decision for P-frames + int start_x = block_x * 16; + int start_y = block_y * 16; + + // Calculate SAD for skip mode (no motion compensation) + int skip_sad = 0; + int skip_color_diff = 0; + for (int dy = 0; dy < 16; dy++) { + for (int dx = 0; dx < 16; dx++) { + int x = start_x + dx; + int y = start_y + dy; + if (x < enc->width && y < enc->height) { + int cur_offset = (y * enc->width + x) * 3; + + // Compare current with previous frame (using YCoCg-R Luma calculation) + int cur_luma = (enc->current_rgb[cur_offset] + + 2 * enc->current_rgb[cur_offset + 1] + + enc->current_rgb[cur_offset + 2]) / 4; + int prev_luma = (enc->previous_rgb[cur_offset] + + 2 * enc->previous_rgb[cur_offset + 1] + + enc->previous_rgb[cur_offset + 2]) / 4; + + skip_sad += abs(cur_luma - prev_luma); + + // Also check for color differences to prevent SKIP on color changes + int cur_r = enc->current_rgb[cur_offset]; + int cur_g = enc->current_rgb[cur_offset + 1]; + int cur_b = enc->current_rgb[cur_offset + 2]; + int prev_r = enc->previous_rgb[cur_offset]; + int prev_g = enc->previous_rgb[cur_offset + 1]; + int prev_b = enc->previous_rgb[cur_offset + 2]; + + skip_color_diff += abs(cur_r - prev_r) + abs(cur_g - prev_g) + abs(cur_b - prev_b); + } + } + } + + // Try motion estimation + estimate_motion(enc, block_x, block_y, &block->mv_x, &block->mv_y); + + // Calculate motion compensation SAD + int motion_sad = INT_MAX; + if (abs(block->mv_x) > 0 || abs(block->mv_y) > 0) { + motion_sad = 0; + for (int dy = 0; dy < 16; dy++) { + for (int dx = 0; dx < 16; dx++) { + int cur_x = start_x + dx; + int cur_y = start_y + dy; + int ref_x = cur_x + block->mv_x; + int ref_y = cur_y + block->mv_y; + + if (cur_x < enc->width && cur_y < enc->height && + ref_x >= 0 && ref_y >= 0 && + ref_x < enc->width && ref_y < enc->height) { + + int cur_offset = (cur_y * enc->width + cur_x) * 3; + int ref_offset = (ref_y * enc->width + ref_x) * 3; + + // use YCoCg-R Luma calculation + int cur_luma = (enc->current_rgb[cur_offset] + + 2 * enc->current_rgb[cur_offset + 1] + + enc->current_rgb[cur_offset + 2]) / 4; + int ref_luma = (enc->previous_rgb[ref_offset] + + 2 * enc->previous_rgb[ref_offset + 1] + + enc->previous_rgb[ref_offset + 2]) / 4; + + motion_sad += abs(cur_luma - ref_luma); + } else { + motion_sad += 128; // Penalty for out-of-bounds + } + } + } + } + + // Mode decision with strict thresholds for quality + // Require both low luma difference AND low color difference for SKIP + if (skip_sad <= 64 && skip_color_diff <= 192) { + // Very small difference - skip block (copy from previous frame) + block->mode = TEV_MODE_SKIP; + block->mv_x = 0; + block->mv_y = 0; + block->rate_control_factor = enc->rate_control_factor; + block->cbp = 0x00; // No coefficients present + // Zero out DCT coefficients for consistent format + memset(block->y_coeffs, 0, sizeof(block->y_coeffs)); + memset(block->x_coeffs, 0, sizeof(block->x_coeffs)); + memset(block->b_coeffs, 0, sizeof(block->b_coeffs)); + enc->blocks_skip++; + return; // Skip DCT encoding entirely + } else if (motion_sad < skip_sad && motion_sad <= 1024 && + (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) { + // Good motion prediction - use motion-only mode + block->mode = TEV_MODE_MOTION; + block->rate_control_factor = enc->rate_control_factor; + block->cbp = 0x00; // No coefficients present + // Zero out DCT coefficients for consistent format + memset(block->y_coeffs, 0, sizeof(block->y_coeffs)); + memset(block->x_coeffs, 0, sizeof(block->x_coeffs)); + memset(block->b_coeffs, 0, sizeof(block->b_coeffs)); + enc->blocks_motion++; + return; // Skip DCT encoding, just store motion vector + // disabling INTER mode: residual DCT is crapping out no matter what I do + /*} else if (motion_sad < skip_sad && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) { + // Motion compensation with threshold + if (motion_sad <= 1024) { + block->mode = TEV_MODE_MOTION; + block->cbp = 0x00; // No coefficients present + memset(block->y_coeffs, 0, sizeof(block->y_coeffs)); + memset(block->x_coeffs, 0, sizeof(block->x_coeffs)); + memset(block->b_coeffs, 0, sizeof(block->b_coeffs)); + enc->blocks_motion++; + return; // Skip DCT encoding, just store motion vector + } + + // Use INTER mode with motion vector and residuals + if (abs(block->mv_x) <= 24 && abs(block->mv_y) <= 24) { + block->mode = TEV_MODE_INTER; + block->rate_control_factor = enc->rate_control_factor; + enc->blocks_inter++; + } else { + // Motion vector too large, fall back to INTRA + block->mode = TEV_MODE_INTRA; + block->rate_control_factor = enc->rate_control_factor; + block->mv_x = 0; + block->mv_y = 0; + enc->blocks_intra++; + return; + }*/ + } else { + // No good motion prediction - use intra mode + block->mode = TEV_MODE_INTRA; + block->rate_control_factor = enc->rate_control_factor; + block->mv_x = 0; + block->mv_y = 0; + enc->blocks_intra++; + } + } + + // Apply fast DCT transform + dct_16x16_fast(enc->y_workspace, enc->dct_workspace); + + // quantise Y coefficients (luma) + const uint32_t *y_quant = QUANT_TABLE_Y; + const uint32_t qmult_y = QUANT_MULT_Y[enc->quality]; + for (int i = 0; i < 256; i++) { + block->y_coeffs[i] = quantise_coeff(enc->dct_workspace[i], y_quant[i] * qmult_y, i == 0, 0, enc->rate_control_factor); + } + + // Apply fast DCT transform to chroma + dct_8x8_fast(enc->x_workspace, enc->dct_workspace); + + // quantise Co coefficients (chroma - orange-blue) + const uint32_t *co_quant = QUANT_TABLE_C; + const uint32_t qmult_co = QUANT_MULT_X[enc->quality]; + for (int i = 0; i < 64; i++) { + block->x_coeffs[i] = quantise_coeff(enc->dct_workspace[i], co_quant[i] * qmult_co, i == 0, 1, enc->rate_control_factor); + } + + // Apply fast DCT transform to Cg + dct_8x8_fast(enc->b_workspace, enc->dct_workspace); + + // quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q) + const uint32_t *cg_quant = QUANT_TABLE_C; + const uint32_t qmult_cg = QUANT_MULT_B[enc->quality]; + for (int i = 0; i < 64; i++) { + block->b_coeffs[i] = quantise_coeff(enc->dct_workspace[i], cg_quant[i] * qmult_cg, i == 0, 1, enc->rate_control_factor); + } + + // Set CBP (simplified - always encode all channels) + block->cbp = 0x07; // Y, Co, Cg all present +} + +// Convert SubRip time format (HH:MM:SS,mmm) to frame number +static int srt_time_to_frame(const char *time_str, int fps) { + int hours, minutes, seconds, milliseconds; + if (sscanf(time_str, "%d:%d:%d,%d", &hours, &minutes, &seconds, &milliseconds) != 4) { + return -1; + } + + double total_seconds = hours * 3600.0 + minutes * 60.0 + seconds + milliseconds / 1000.0; + return (int)(total_seconds * fps + 0.5); // Round to nearest frame +} + +// Parse SubRip subtitle file +static subtitle_entry_t* parse_srt_file(const char *filename, int fps) { + FILE *file = fopen(filename, "r"); + if (!file) { + fprintf(stderr, "Failed to open subtitle file: %s\n", filename); + return NULL; + } + + subtitle_entry_t *head = NULL; + subtitle_entry_t *tail = NULL; + char line[1024]; + int state = 0; // 0=index, 1=time, 2=text, 3=blank + + subtitle_entry_t *current_entry = NULL; + char *text_buffer = NULL; + size_t text_buffer_size = 0; + + while (fgets(line, sizeof(line), file)) { + // Remove trailing newline + size_t len = strlen(line); + if (len > 0 && line[len-1] == '\n') { + line[len-1] = '\0'; + len--; + } + if (len > 0 && line[len-1] == '\r') { + line[len-1] = '\0'; + len--; + } + + if (state == 0) { // Expecting subtitle index + if (strlen(line) == 0) continue; // Skip empty lines + // Create new subtitle entry + current_entry = calloc(1, sizeof(subtitle_entry_t)); + if (!current_entry) break; + state = 1; + } else if (state == 1) { // Expecting time range + char start_time[32], end_time[32]; + if (sscanf(line, "%31s --> %31s", start_time, end_time) == 2) { + current_entry->start_frame = srt_time_to_frame(start_time, fps); + current_entry->end_frame = srt_time_to_frame(end_time, fps); + + if (current_entry->start_frame < 0 || current_entry->end_frame < 0) { + free(current_entry); + current_entry = NULL; + state = 3; // Skip to next blank line + continue; + } + + // Initialize text buffer + text_buffer_size = 256; + text_buffer = malloc(text_buffer_size); + if (!text_buffer) { + free(current_entry); + current_entry = NULL; + fprintf(stderr, "Memory allocation failed while parsing subtitles\n"); + break; + } + text_buffer[0] = '\0'; + state = 2; + } else { + free(current_entry); + current_entry = NULL; + state = 3; // Skip malformed entry + } + } else if (state == 2) { // Collecting subtitle text + if (strlen(line) == 0) { + // End of subtitle text + current_entry->text = strdup(text_buffer); + free(text_buffer); + text_buffer = NULL; + + // Add to list + if (!head) { + head = current_entry; + tail = current_entry; + } else { + tail->next = current_entry; + tail = current_entry; + } + current_entry = NULL; + state = 0; + } else { + // Append text line + size_t current_len = strlen(text_buffer); + size_t line_len = strlen(line); + size_t needed = current_len + line_len + 2; // +2 for newline and null + + if (needed > text_buffer_size) { + text_buffer_size = needed + 256; + char *new_buffer = realloc(text_buffer, text_buffer_size); + if (!new_buffer) { + free(text_buffer); + free(current_entry); + current_entry = NULL; + fprintf(stderr, "Memory allocation failed while parsing subtitles\n"); + break; + } + text_buffer = new_buffer; + } + + if (current_len > 0) { + strcat(text_buffer, "\n"); + } + strcat(text_buffer, line); + } + } else if (state == 3) { // Skip to next blank line + if (strlen(line) == 0) { + state = 0; + } + } + } + + // Handle final subtitle if file doesn't end with blank line + if (current_entry && text_buffer) { + current_entry->text = strdup(text_buffer); + free(text_buffer); + + if (!head) { + head = current_entry; + } else { + tail->next = current_entry; + } + } + + fclose(file); + return head; +} + +// Free subtitle list +static void free_subtitle_list(subtitle_entry_t *list) { + while (list) { + subtitle_entry_t *next = list->next; + free(list->text); + free(list); + list = next; + } +} + +// Write subtitle packet to output +static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text) { + // Calculate packet size + size_t text_len = text ? strlen(text) : 0; + size_t packet_size = 3 + 1 + text_len + 1; // index (3 bytes) + opcode + text + null terminator + + // Write packet type and size + uint8_t packet_type = TEV_PACKET_SUBTITLE; + fwrite(&packet_type, 1, 1, output); + fwrite(&packet_size, 4, 1, output); + + // Write subtitle packet data + uint8_t index_bytes[3]; + index_bytes[0] = index & 0xFF; + index_bytes[1] = (index >> 8) & 0xFF; + index_bytes[2] = (index >> 16) & 0xFF; + fwrite(index_bytes, 1, 3, output); + + fwrite(&opcode, 1, 1, output); + + if (text && text_len > 0) { + fwrite(text, 1, text_len, output); + } + + // Write null terminator + uint8_t null_term = 0x00; + fwrite(&null_term, 1, 1, output); + + return packet_size + 5; // packet_size + packet_type + size field +} + +// Process subtitles for the current frame +static int process_subtitles(tev_encoder_t *enc, int frame_num, FILE *output) { + if (!enc->has_subtitles) return 0; + + int bytes_written = 0; + + // Check if any subtitles need to be shown at this frame + subtitle_entry_t *sub = enc->current_subtitle; + while (sub && sub->start_frame <= frame_num) { + if (sub->start_frame == frame_num) { + // Show subtitle + bytes_written += write_subtitle_packet(output, 0, 0x01, sub->text); + if (enc->verbose) { + printf("Frame %d: Showing subtitle: %.50s%s\n", + frame_num, sub->text, strlen(sub->text) > 50 ? "..." : ""); + } + } + + if (sub->end_frame == frame_num) { + // Hide subtitle + bytes_written += write_subtitle_packet(output, 0, 0x02, NULL); + if (enc->verbose) { + printf("Frame %d: Hiding subtitle\n", frame_num); + } + } + + // Move to next subtitle if we're past the end of current one + if (sub->end_frame <= frame_num) { + enc->current_subtitle = sub->next; + } + + sub = sub->next; + } + + return bytes_written; +} + +// Initialize encoder +static tev_encoder_t* init_encoder(void) { + tev_encoder_t *enc = calloc(1, sizeof(tev_encoder_t)); + if (!enc) return NULL; + + // set defaults + enc->quality = 2; // Default quality + enc->mp2_packet_size = 0; // Will be detected from MP2 header + enc->mp2_rate_index = 0; + enc->audio_frames_in_buffer = 0; + enc->target_audio_buffer_size = 4; + enc->width = DEFAULT_WIDTH; + enc->height = DEFAULT_HEIGHT; + enc->fps = 0; // Will be detected from input + enc->output_fps = 0; // No frame rate conversion by default + enc->verbose = 0; + enc->subtitle_file = NULL; + enc->has_subtitles = 0; + enc->subtitle_list = NULL; + enc->current_subtitle = NULL; + + // Rate control defaults + enc->target_bitrate_kbps = 0; // 0 = quality mode + enc->bitrate_mode = 0; // Quality mode by default + enc->rate_control_factor = 1.0f; // No adjustment initially + enc->frame_bits_accumulator = 0; + enc->target_bits_per_frame = 0; + enc->complexity_history_index = 0; + enc->average_complexity = 0.0f; + memset(enc->complexity_history, 0, sizeof(enc->complexity_history)); + + init_dct_tables(); + + return enc; +} + +// Allocate encoder buffers +static int alloc_encoder_buffers(tev_encoder_t *enc) { + int pixels = enc->width * enc->height; + int blocks_x = (enc->width + 15) / 16; + int blocks_y = (enc->height + 15) / 16; + int total_blocks = blocks_x * blocks_y; + + enc->current_rgb = malloc(pixels * 3); + enc->previous_rgb = malloc(pixels * 3); + enc->reference_rgb = malloc(pixels * 3); + + enc->y_workspace = malloc(16 * 16 * sizeof(float)); + enc->x_workspace = malloc(8 * 8 * sizeof(float)); + enc->b_workspace = malloc(8 * 8 * sizeof(float)); + enc->dct_workspace = malloc(16 * 16 * sizeof(float)); + + enc->block_data = malloc(total_blocks * sizeof(tev_block_t)); + enc->compressed_buffer = malloc(total_blocks * sizeof(tev_block_t) * 2); + enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE); + + if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb || + !enc->y_workspace || !enc->x_workspace || !enc->b_workspace || + !enc->dct_workspace || !enc->block_data || + !enc->compressed_buffer || !enc->mp2_buffer) { + return -1; + } + + // Initialize gzip compression stream + enc->gzip_stream.zalloc = Z_NULL; + enc->gzip_stream.zfree = Z_NULL; + enc->gzip_stream.opaque = Z_NULL; + + int gzip_init_result = deflateInit2(&enc->gzip_stream, Z_DEFAULT_COMPRESSION, + Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY); // 15+16 for gzip format + + if (gzip_init_result != Z_OK) { + fprintf(stderr, "Failed to initialize gzip compression\n"); + return 0; + } + + // Initialize previous frame to black + memset(enc->previous_rgb, 0, pixels * 3); + + return 1; +} + +// Free encoder resources +static void free_encoder(tev_encoder_t *enc) { + if (!enc) return; + + deflateEnd(&enc->gzip_stream); + + free(enc->current_rgb); + free(enc->previous_rgb); + free(enc->reference_rgb); + free(enc->y_workspace); + free(enc->x_workspace); + free(enc->b_workspace); + free(enc->dct_workspace); + free(enc->block_data); + free(enc->compressed_buffer); + free(enc->mp2_buffer); + free(enc); +} + +// Write TEV header +static int write_tev_header(FILE *output, tev_encoder_t *enc) { + // Magic + version + fwrite(TEV_MAGIC, 1, 8, output); + uint8_t version = TEV_VERSION; + fwrite(&version, 1, 1, output); + + // Video parameters + uint16_t width = enc->width; + uint16_t height = enc->height; + uint8_t fps = enc->fps; + uint32_t total_frames = enc->total_frames; + uint8_t quality = enc->quality; + uint8_t has_audio = enc->has_audio; + + fwrite(&width, 2, 1, output); + fwrite(&height, 2, 1, output); + fwrite(&fps, 1, 1, output); + fwrite(&total_frames, 4, 1, output); + fwrite(&quality, 1, 1, output); + fwrite(&has_audio, 1, 1, output); + + return 0; +} + +// Detect scene changes by analyzing frame differences +static int detect_scene_change(tev_encoder_t *enc) { + if (!enc->previous_rgb || !enc->current_rgb) { + return 0; // No previous frame to compare + } + + long long total_diff = 0; + int changed_pixels = 0; + + // Sample every 4th pixel for performance (still gives good detection) + for (int y = 0; y < enc->height; y += 2) { + for (int x = 0; x < enc->width; x += 2) { + int offset = (y * enc->width + x) * 3; + + // Calculate color difference + int r_diff = abs(enc->current_rgb[offset] - enc->previous_rgb[offset]); + int g_diff = abs(enc->current_rgb[offset + 1] - enc->previous_rgb[offset + 1]); + int b_diff = abs(enc->current_rgb[offset + 2] - enc->previous_rgb[offset + 2]); + + int pixel_diff = r_diff + g_diff + b_diff; + total_diff += pixel_diff; + + // Count significantly changed pixels (threshold of 30 per channel average) + if (pixel_diff > 90) { + changed_pixels++; + } + } + } + + // Calculate metrics for scene change detection + int sampled_pixels = (enc->height / 2) * (enc->width / 2); + double avg_diff = (double)total_diff / sampled_pixels; + double changed_ratio = (double)changed_pixels / sampled_pixels; + + // Scene change thresholds: + // - High average difference (> 40) OR + // - Large percentage of changed pixels (> 30%) + return (avg_diff > 40.0) || (changed_ratio > 0.30); +} + +// Encode and write a frame +static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) { + // Check for scene change or time-based keyframe + int is_scene_change = detect_scene_change(enc); + int is_time_keyframe = (frame_num % KEYFRAME_INTERVAL) == 0; + int is_keyframe = is_time_keyframe || is_scene_change; + + // Verbose output for keyframe decisions + if (enc->verbose && is_keyframe) { + if (is_scene_change && !is_time_keyframe) { + printf("Frame %d: Scene change detected, inserting keyframe\n", frame_num); + } else if (is_time_keyframe) { + printf("Frame %d: Time-based keyframe (interval: %d)\n", frame_num, KEYFRAME_INTERVAL); + } + } + int blocks_x = (enc->width + 15) / 16; + int blocks_y = (enc->height + 15) / 16; + + // Track frame complexity for rate control + float frame_complexity = 0.0f; + size_t frame_start_bits = enc->total_output_bytes * 8; + + // Encode all blocks + for (int by = 0; by < blocks_y; by++) { + for (int bx = 0; bx < blocks_x; bx++) { + encode_block(enc, bx, by, is_keyframe); + + // Calculate complexity for rate control (if enabled) + if (enc->bitrate_mode > 0) { + tev_block_t *block = &enc->block_data[by * blocks_x + bx]; + if (block->mode == TEV_MODE_INTRA || block->mode == TEV_MODE_INTER) { + // Sum absolute values of quantised coefficients as complexity metric + for (int i = 1; i < 256; i++) frame_complexity += abs(block->y_coeffs[i]); + for (int i = 1; i < 64; i++) frame_complexity += abs(block->x_coeffs[i]); + for (int i = 1; i < 64; i++) frame_complexity += abs(block->b_coeffs[i]); + } + } + } + } + + // Compress block data using gzip (compatible with TSVM decoder) + size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t); + + // Initialize fresh gzip stream for each frame (since Z_FINISH terminates the stream) + z_stream frame_stream; + frame_stream.zalloc = Z_NULL; + frame_stream.zfree = Z_NULL; + frame_stream.opaque = Z_NULL; + + int init_result = deflateInit2(&frame_stream, Z_DEFAULT_COMPRESSION, + Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY); // 15+16 for gzip format + + if (init_result != Z_OK) { + fprintf(stderr, "Failed to initialize gzip compression for frame\n"); + return 0; + } + + // Set up compression stream + frame_stream.next_in = (Bytef*)enc->block_data; + frame_stream.avail_in = block_data_size; + frame_stream.next_out = (Bytef*)enc->compressed_buffer; + frame_stream.avail_out = block_data_size * 2; + + int result = deflate(&frame_stream, Z_FINISH); + if (result != Z_STREAM_END) { + fprintf(stderr, "Gzip compression failed: %d\n", result); + deflateEnd(&frame_stream); + return 0; + } + + size_t compressed_size = frame_stream.total_out; + + // Clean up frame stream + deflateEnd(&frame_stream); + + // Write frame packet header (rate control factor now per-block) + uint8_t packet_type = is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME; + uint32_t payload_size = compressed_size; // Rate control factor now per-block, not per-packet + + fwrite(&packet_type, 1, 1, output); + fwrite(&payload_size, 4, 1, output); + fwrite(enc->compressed_buffer, 1, compressed_size, output); + + if (enc->verbose) { + printf("rateControlFactor=%.6f\n", enc->rate_control_factor); + } + + enc->total_output_bytes += 5 + compressed_size; // packet + size + data (rate_factor now per-block) + + // Update rate control for next frame + if (enc->bitrate_mode > 0) { + size_t frame_bits = (enc->total_output_bytes * 8) - frame_start_bits; + update_rate_control(enc, frame_complexity, frame_bits); + } + + // Swap frame buffers for next frame + uint8_t *temp_rgb = enc->previous_rgb; + enc->previous_rgb = enc->current_rgb; + enc->current_rgb = temp_rgb; + + return 1; +} + +// Execute command and capture output +static char *execute_command(const char *command) { + FILE *pipe = popen(command, "r"); + if (!pipe) return NULL; + + char *result = malloc(4096); + if (!result) { + pclose(pipe); + return NULL; + } + + size_t len = fread(result, 1, 4095, pipe); + result[len] = '\0'; + + pclose(pipe); + return result; +} + +// Get video metadata using ffprobe +static int get_video_metadata(tev_encoder_t *enc) { + char command[1024]; + char *output; + + // Get frame count + snprintf(command, sizeof(command), + "ffprobe -v quiet -select_streams v:0 -count_frames -show_entries stream=nb_read_frames -of csv=p=0 \"%s\"", + enc->input_file); + output = execute_command(command); + if (!output) { + fprintf(stderr, "Failed to get frame count\n"); + return 0; + } + enc->total_frames = atoi(output); + free(output); + + // Get original frame rate (will be converted if user specified different FPS) + snprintf(command, sizeof(command), + "ffprobe -v quiet -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 \"%s\"", + enc->input_file); + output = execute_command(command); + if (!output) { + fprintf(stderr, "Failed to get frame rate\n"); + return 0; + } + + int num, den; + if (sscanf(output, "%d/%d", &num, &den) == 2) { + enc->fps = (den > 0) ? (int)round((float)num/(float)den) : 30; + } else { + enc->fps = (int)round(atof(output)); + } + free(output); + + // If user specified output FPS, calculate new total frames for conversion + if (enc->output_fps > 0 && enc->output_fps != enc->fps) { + // Calculate duration and new frame count + snprintf(command, sizeof(command), + "ffprobe -v quiet -show_entries format=duration -of csv=p=0 \"%s\"", + enc->input_file); + output = execute_command(command); + if (output) { + enc->duration = atof(output); + free(output); + // Update total frames for new frame rate + enc->total_frames = (int)(enc->duration * enc->output_fps); + if (enc->verbose) { + printf("Frame rate conversion: %d fps -> %d fps\n", enc->fps, enc->output_fps); + printf("Original frames: %d, Output frames: %d\n", + (int)(enc->duration * enc->fps), enc->total_frames); + } + enc->fps = enc->output_fps; // Use output FPS for encoding + } + } + + // set keyframe interval + KEYFRAME_INTERVAL = 2 * enc->fps; + + // Calculate target bits per frame for bitrate mode + if (enc->target_bitrate_kbps > 0) { + enc->target_bits_per_frame = (enc->target_bitrate_kbps * 1000) / enc->fps; + if (enc->verbose) { + printf("Target bitrate: %d kbps (%zu bits per frame)\n", + enc->target_bitrate_kbps, enc->target_bits_per_frame); + } + } + + // Check for audio stream + snprintf(command, sizeof(command), + "ffprobe -v quiet -select_streams a:0 -show_entries stream=codec_type -of csv=p=0 \"%s\" 2>/dev/null", + enc->input_file); + output = execute_command(command); + enc->has_audio = (output && strstr(output, "audio")); + if (output) free(output); + + if (enc->verbose) { + fprintf(stderr, "Video metadata:\n"); + fprintf(stderr, " Frames: %d\n", enc->total_frames); + fprintf(stderr, " FPS: %d\n", enc->fps); + fprintf(stderr, " Audio: %s\n", enc->has_audio ? "Yes" : "No"); + fprintf(stderr, " Resolution: %dx%d\n", enc->width, enc->height); + } + + return (enc->total_frames > 0 && enc->fps > 0); +} + +// Start FFmpeg process for video conversion with frame rate support +static int start_video_conversion(tev_encoder_t *enc) { + char command[2048]; + + // Build FFmpeg command with potential frame rate conversion + if (enc->output_fps > 0 && enc->output_fps != enc->fps) { + // Frame rate conversion requested + snprintf(command, sizeof(command), + "ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " + "-y - 2>&1", + enc->input_file, enc->output_fps, enc->width, enc->height, enc->width, enc->height); + } else { + // No frame rate conversion + snprintf(command, sizeof(command), + "ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " + "-y -", + enc->input_file, enc->width, enc->height, enc->width, enc->height); + } + + if (enc->verbose) { + printf("FFmpeg command: %s\n", command); + } + + enc->ffmpeg_video_pipe = popen(command, "r"); + if (!enc->ffmpeg_video_pipe) { + fprintf(stderr, "Failed to start FFmpeg process\n"); + return 0; + } + + return 1; +} + +// Start audio conversion +static int start_audio_conversion(tev_encoder_t *enc) { + if (!enc->has_audio) return 1; + + char command[2048]; + snprintf(command, sizeof(command), + "ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null", + enc->input_file, MP2_RATE_TABLE[enc->quality], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE); + + int result = system(command); + if (result == 0) { + enc->mp2_file = fopen(TEMP_AUDIO_FILE, "rb"); + if (enc->mp2_file) { + fseek(enc->mp2_file, 0, SEEK_END); + enc->audio_remaining = ftell(enc->mp2_file); + fseek(enc->mp2_file, 0, SEEK_SET); + } + } + + return (result == 0); +} + +// Get MP2 packet size and rate index from header +static int get_mp2_packet_size(uint8_t *header) { + int bitrate_index = (header[2] >> 4) & 0x0F; + int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384}; + if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE; + + int bitrate = bitrates[bitrate_index]; + int padding_bit = (header[2] >> 1) & 0x01; + if (bitrate <= 0) return MP2_DEFAULT_PACKET_SIZE; + + int frame_size = (144 * bitrate * 1000) / MP2_SAMPLE_RATE + padding_bit; + return frame_size; +} + +static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) { + // Map packet sizes to rate indices for TEV format + const int mp2_frame_sizes[] = {144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728}; + for (int i = 0; i < 14; i++) { + if (packet_size <= mp2_frame_sizes[i]) { + return i; + } + } + return 13; // Default to highest rate +} + +// Process audio for current frame +static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) { + if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) { + return 1; + } + + // Initialize packet size on first frame + if (enc->mp2_packet_size == 0) { + uint8_t header[4]; + if (fread(header, 1, 4, enc->mp2_file) != 4) return 1; + fseek(enc->mp2_file, 0, SEEK_SET); + + enc->mp2_packet_size = get_mp2_packet_size(header); + int is_mono = (header[3] >> 6) == 3; + enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono); + enc->target_audio_buffer_size = 4; // 4 audio packets in buffer + } + + // Calculate how much audio time each frame represents (in seconds) + double frame_audio_time = 1.0 / enc->fps; + + // Calculate how much audio time each MP2 packet represents + // MP2 frame contains 1152 samples at 32kHz = 0.036 seconds + double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE; + + // Estimate how many packets we consume per video frame + double packets_per_frame = frame_audio_time / packet_audio_time; + + // Audio buffering strategy: maintain target buffer level + int packets_to_insert = 0; + if (frame_num == 0) { + // Prime buffer to target level initially + packets_to_insert = enc->target_audio_buffer_size; + enc->audio_frames_in_buffer = 0; // count starts from 0 + if (enc->verbose) { + printf("Frame %d: Priming audio buffer with %d packets\n", frame_num, packets_to_insert); + } + } else { + // Simulate buffer consumption (fractional consumption per frame) + double old_buffer = enc->audio_frames_in_buffer; + enc->audio_frames_in_buffer -= packets_per_frame; + + // Calculate how many packets we need to maintain target buffer level + // Only insert when buffer drops below target, and only insert enough to restore target + double target_level = (double)enc->target_audio_buffer_size; + if (enc->audio_frames_in_buffer < target_level) { + double deficit = target_level - enc->audio_frames_in_buffer; + // Insert packets to cover the deficit, but at least maintain minimum flow + packets_to_insert = (int)ceil(deficit); + // Cap at reasonable maximum to prevent excessive insertion + if (packets_to_insert > enc->target_audio_buffer_size) { + packets_to_insert = enc->target_audio_buffer_size; + } + + if (enc->verbose) { + printf("Frame %d: Buffer low (%.2f->%.2f), deficit %.2f, inserting %d packets\n", + frame_num, old_buffer, enc->audio_frames_in_buffer, deficit, packets_to_insert); + } + } else if (enc->verbose && old_buffer != enc->audio_frames_in_buffer) { + printf("Frame %d: Buffer sufficient (%.2f->%.2f), no packets\n", + frame_num, old_buffer, enc->audio_frames_in_buffer); + } + } + + // Insert the calculated number of audio packets + for (int q = 0; q < packets_to_insert; q++) { + size_t bytes_to_read = enc->mp2_packet_size; + if (bytes_to_read > enc->audio_remaining) { + bytes_to_read = enc->audio_remaining; + } + + size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file); + if (bytes_read == 0) break; + + // Write TEV MP2 audio packet + uint8_t audio_packet_type = TEV_PACKET_AUDIO_MP2; + uint32_t audio_len = (uint32_t)bytes_read; + fwrite(&audio_packet_type, 1, 1, output); + fwrite(&audio_len, 4, 1, output); + fwrite(enc->mp2_buffer, 1, bytes_read, output); + + // Track audio bytes written + enc->total_output_bytes += 1 + 4 + bytes_read; + enc->audio_remaining -= bytes_read; + enc->audio_frames_in_buffer++; + + if (frame_num == 0) { + enc->audio_frames_in_buffer = enc->target_audio_buffer_size / 2; // trick the buffer simulator so that it doesn't count the frame 0 priming + } + + if (enc->verbose) { + printf("Audio packet %d: %zu bytes (buffer: %.2f packets)\n", + q, bytes_read, enc->audio_frames_in_buffer); + } + } + + return 1; +} + +// Show usage information +static void show_usage(const char *program_name) { + printf("TEV YCoCg-R 4:2:0 Video Encoder with Bitrate Control\n"); + printf("Usage: %s [options] -i input.mp4 -o output.mv2\n\n", program_name); + printf("Options:\n"); + printf(" -i, --input FILE Input video file\n"); + printf(" -o, --output FILE Output video file (use '-' for stdout)\n"); + printf(" -s, --subtitles FILE SubRip (.srt) subtitle file\n"); + printf(" -w, --width N Video width (default: %d)\n", DEFAULT_WIDTH); + printf(" -h, --height N Video height (default: %d)\n", DEFAULT_HEIGHT); + printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n"); + printf(" -q, --quality N Quality level 0-4 (default: 2, only decides audio rate in bitrate mode)\n"); + printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode; DON'T USE - NOT WORKING AS INTENDED)\n"); + printf(" -v, --verbose Verbose output\n"); + printf(" -t, --test Test mode: generate solid colour frames\n"); + printf(" --help Show this help\n\n"); + printf("Rate Control Modes:\n"); + printf(" Quality mode (default): Fixed quantisation based on -q parameter\n"); + printf(" Bitrate mode (-b N): Dynamic quantisation targeting N kbps average\n\n"); + printf("Audio Rate by Quality:\n"); + printf(" "); + for (int i = 0; i < sizeof(MP2_RATE_TABLE) / sizeof(int); i++) { + printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]); + } + printf("\n\n"); + printf("Features:\n"); + printf(" - YCoCg-R 4:2:0 chroma subsampling for 50%% compression improvement\n"); + printf(" - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n"); + printf(" - Frame rate conversion with FFmpeg temporal filtering\n"); +// printf(" - Adaptive bitrate control with complexity-based adjustment\n"); + printf("Examples:\n"); + printf(" %s -i input.mp4 -o output.mv2 # Use default setting (q=2)\n", program_name); + printf(" %s -i input.avi -f 15 -q 3 -o output.mv2 # 15fps @ q=3\n", program_name); + printf(" %s -i input.mp4 -s input.srt -o output.mv2 # With SubRip subtitles\n", program_name); +// printf(" %s -i input.mp4 -b 800 -o output.mv2 # 800 kbps bitrate target\n", program_name); +// printf(" %s -i input.avi -f 15 -b 500 -o output.mv2 # 15fps @ 500 kbps\n", program_name); +// printf(" %s --test -b 1000 -o test.mv2 # Test with 1000 kbps target\n", program_name); +} + + +// Cleanup encoder resources +static void cleanup_encoder(tev_encoder_t *enc) { + if (!enc) return; + + if (enc->ffmpeg_video_pipe) pclose(enc->ffmpeg_video_pipe); + if (enc->mp2_file) { + fclose(enc->mp2_file); + unlink(TEMP_AUDIO_FILE); // Remove temporary audio file + } + + free(enc->input_file); + free(enc->output_file); + free(enc->subtitle_file); + free_subtitle_list(enc->subtitle_list); + + free_encoder(enc); +} + +int sync_packet_count = 0; + +// Main function +int main(int argc, char *argv[]) { + tev_encoder_t *enc = init_encoder(); + if (!enc) { + fprintf(stderr, "Failed to initialize encoder\n"); + return 1; + } + + int test_mode = 0; + + static struct option long_options[] = { + {"input", required_argument, 0, 'i'}, + {"output", required_argument, 0, 'o'}, + {"subtitles", required_argument, 0, 's'}, + {"width", required_argument, 0, 'w'}, + {"height", required_argument, 0, 'h'}, + {"fps", required_argument, 0, 'f'}, + {"quality", required_argument, 0, 'q'}, + {"bitrate", required_argument, 0, 'b'}, + {"verbose", no_argument, 0, 'v'}, + {"test", no_argument, 0, 't'}, + {"help", no_argument, 0, '?'}, + {0, 0, 0, 0} + }; + + int option_index = 0; + int c; + + while ((c = getopt_long(argc, argv, "i:o:s:w:h:f:q:b:vt", long_options, &option_index)) != -1) { + switch (c) { + case 'i': + enc->input_file = strdup(optarg); + break; + case 'o': + enc->output_file = strdup(optarg); + enc->output_to_stdout = (strcmp(optarg, "-") == 0); + break; + case 's': + enc->subtitle_file = strdup(optarg); + break; + case 'w': + enc->width = atoi(optarg); + break; + case 'h': + enc->height = atoi(optarg); + break; + case 'f': + enc->output_fps = atoi(optarg); + if (enc->output_fps <= 0) { + fprintf(stderr, "Invalid FPS: %d\n", enc->output_fps); + cleanup_encoder(enc); + return 1; + } + break; + case 'q': + enc->quality = CLAMP(atoi(optarg), 0, 4); + break; + case 'b': + enc->target_bitrate_kbps = atoi(optarg); + if (enc->target_bitrate_kbps > 0) { + enc->bitrate_mode = 1; // Enable bitrate control + } + break; + case 'v': + enc->verbose = 1; + break; + case 't': + test_mode = 1; + break; + case 0: + if (strcmp(long_options[option_index].name, "help") == 0) { + show_usage(argv[0]); + cleanup_encoder(enc); + return 0; + } + break; + default: + show_usage(argv[0]); + cleanup_encoder(enc); + return 1; + } + } + + if (!test_mode && (!enc->input_file || !enc->output_file)) { + fprintf(stderr, "Input and output files are required (unless using --test mode)\n"); + show_usage(argv[0]); + cleanup_encoder(enc); + return 1; + } + + if (!enc->output_file) { + fprintf(stderr, "Output file is required\n"); + show_usage(argv[0]); + cleanup_encoder(enc); + return 1; + } + + // Handle test mode or real video + if (test_mode) { + // Test mode: generate solid colour frames + enc->fps = 1; + enc->total_frames = 15; + enc->has_audio = 0; + printf("Test mode: Generating 15 solid colour frames\n"); + } else { + // Get video metadata and start FFmpeg processes + if (!get_video_metadata(enc)) { + fprintf(stderr, "Failed to get video metadata\n"); + cleanup_encoder(enc); + return 1; + } + } + + // Load subtitle file if specified + if (enc->subtitle_file) { + enc->subtitle_list = parse_srt_file(enc->subtitle_file, enc->fps); + if (enc->subtitle_list) { + enc->has_subtitles = 1; + enc->current_subtitle = enc->subtitle_list; + if (enc->verbose) { + printf("Loaded subtitles from: %s\n", enc->subtitle_file); + } + } else { + fprintf(stderr, "Failed to parse subtitle file: %s\n", enc->subtitle_file); + // Continue without subtitles + } + } + + // Allocate buffers + if (!alloc_encoder_buffers(enc)) { + fprintf(stderr, "Failed to allocate encoder buffers\n"); + cleanup_encoder(enc); + return 1; + } + + // Start FFmpeg processes (only for real video mode) + if (!test_mode) { + // Start FFmpeg video conversion + if (!start_video_conversion(enc)) { + fprintf(stderr, "Failed to start video conversion\n"); + cleanup_encoder(enc); + return 1; + } + + // Start audio conversion (if audio present) + if (!start_audio_conversion(enc)) { + fprintf(stderr, "Warning: Audio conversion failed\n"); + enc->has_audio = 0; + } + } + + // Open output + FILE *output = enc->output_to_stdout ? stdout : fopen(enc->output_file, "wb"); + if (!output) { + perror("Failed to open output file"); + cleanup_encoder(enc); + return 1; + } + + // Write TEV header + write_tev_header(output, enc); + gettimeofday(&enc->start_time, NULL); + + printf("Encoding video with YCoCg-R 4:2:0 format...\n"); + if (enc->output_fps > 0) { + printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps); + } + if (enc->bitrate_mode > 0) { + printf("Bitrate control enabled: targeting %d kbps\n", enc->target_bitrate_kbps); + } else { + printf("Quality mode: q=%d\n", enc->quality); + } + + // Process frames + int frame_count = 0; + while (frame_count < enc->total_frames) { + if (test_mode) { + // Generate test frame with solid colours + size_t rgb_size = enc->width * enc->height * 3; + uint8_t test_r = 0, test_g = 0, test_b = 0; + const char* colour_name = "unknown"; + + switch (frame_count) { + case 0: test_r = 0; test_g = 0; test_b = 0; colour_name = "black"; break; + case 1: test_r = 127; test_g = 127; test_b = 127; colour_name = "grey"; break; + case 2: test_r = 255; test_g = 255; test_b = 255; colour_name = "white"; break; + case 3: test_r = 127; test_g = 0; test_b = 0; colour_name = "half red"; break; + case 4: test_r = 127; test_g = 127; test_b = 0; colour_name = "half yellow"; break; + case 5: test_r = 0; test_g = 127; test_b = 0; colour_name = "half green"; break; + case 6: test_r = 0; test_g = 127; test_b = 127; colour_name = "half cyan"; break; + case 7: test_r = 0; test_g = 0; test_b = 127; colour_name = "half blue"; break; + case 8: test_r = 127; test_g = 0; test_b = 127; colour_name = "half magenta"; break; + case 9: test_r = 255; test_g = 0; test_b = 0; colour_name = "red"; break; + case 10: test_r = 255; test_g = 255; test_b = 0; colour_name = "yellow"; break; + case 11: test_r = 0; test_g = 255; test_b = 0; colour_name = "green"; break; + case 12: test_r = 0; test_g = 255; test_b = 255; colour_name = "cyan"; break; + case 13: test_r = 0; test_g = 0; test_b = 255; colour_name = "blue"; break; + case 14: test_r = 255; test_g = 0; test_b = 255; colour_name = "magenta"; break; + } + + // Fill entire frame with solid colour + for (size_t i = 0; i < rgb_size; i += 3) { + enc->current_rgb[i] = test_r; + enc->current_rgb[i + 1] = test_g; + enc->current_rgb[i + 2] = test_b; + } + + printf("Frame %d: %s (%d,%d,%d)\n", frame_count, colour_name, test_r, test_g, test_b); + + // Test YCoCg-R conversion + int y_test, x_test, b_test; + rgb_to_xyb(test_r, test_g, test_b, &y_test, &x_test, &b_test); + printf(" XYB: Y=%d X=%d B=%d\n", y_test, x_test, b_test); + + // Test reverse conversion + uint8_t r_rev, g_rev, b_rev; + xyb_to_rgb(y_test, x_test, b_test, &r_rev, &g_rev, &b_rev); + printf(" Reverse: R=%d G=%d B=%d\n", r_rev, g_rev, b_rev); + + } else { + // Read RGB data directly from FFmpeg pipe + size_t rgb_size = enc->width * enc->height * 3; + size_t bytes_read = fread(enc->current_rgb, 1, rgb_size, enc->ffmpeg_video_pipe); + + if (bytes_read != rgb_size) { + if (enc->verbose) { + printf("Frame %d: Expected %zu bytes, got %zu bytes\n", frame_count, rgb_size, bytes_read); + if (feof(enc->ffmpeg_video_pipe)) { + printf("FFmpeg pipe reached end of file\n"); + } + if (ferror(enc->ffmpeg_video_pipe)) { + printf("FFmpeg pipe error occurred\n"); + } + } + break; // End of video or error + } + } + + // Process audio for this frame + process_audio(enc, frame_count, output); + + // Process subtitles for this frame + process_subtitles(enc, frame_count, output); + + // Encode frame + if (!encode_frame(enc, output, frame_count)) { + fprintf(stderr, "Failed to encode frame %d\n", frame_count); + break; + } + else { + // Write a sync packet only after a video is been coded + uint8_t sync_packet = TEV_PACKET_SYNC; + fwrite(&sync_packet, 1, 1, output); + sync_packet_count++; + } + + + + frame_count++; + if (enc->verbose || frame_count % 30 == 0) { + struct timeval now; + gettimeofday(&now, NULL); + double elapsed = (now.tv_sec - enc->start_time.tv_sec) + + (now.tv_usec - enc->start_time.tv_usec) / 1000000.0; + double fps = frame_count / elapsed; + printf("Encoded frame %d/%d (%.1f fps)\n", frame_count, enc->total_frames, fps); + } + } + + // Write final sync packet + uint8_t sync_packet = TEV_PACKET_SYNC; + fwrite(&sync_packet, 1, 1, output); + sync_packet_count++; + + if (!enc->output_to_stdout) { + fclose(output); + } + + // Final statistics + struct timeval end_time; + gettimeofday(&end_time, NULL); + double total_time = (end_time.tv_sec - enc->start_time.tv_sec) + + (end_time.tv_usec - enc->start_time.tv_usec) / 1000000.0; + + printf("\nEncoding complete!\n"); + printf(" Frames encoded: %d\n", frame_count); + printf(" - sync packets: %d\n", sync_packet_count); + printf(" Framerate: %d\n", enc->fps); + printf(" Output size: %zu bytes\n", enc->total_output_bytes); + + // Calculate achieved bitrate + double achieved_bitrate_kbps = (enc->total_output_bytes * 8.0) / 1000.0 / total_time; + printf(" Achieved bitrate: %.1f kbps", achieved_bitrate_kbps); + if (enc->bitrate_mode > 0) { + printf(" (target: %d kbps, %.1f%%)", enc->target_bitrate_kbps, + (achieved_bitrate_kbps / enc->target_bitrate_kbps) * 100.0); + } + printf("\n"); + + printf(" Encoding time: %.2fs (%.1f fps)\n", total_time, frame_count / total_time); + printf(" Block statistics: INTRA=%d, INTER=%d, MOTION=%d, SKIP=%d\n", + enc->blocks_intra, enc->blocks_inter, enc->blocks_motion, enc->blocks_skip); + + if (enc->bitrate_mode > 0) { + printf(" Rate control factor: %.3f\n", enc->rate_control_factor); + } + + cleanup_encoder(enc); + return 0; +} \ No newline at end of file diff --git a/video_encoder/xyb_conversion.c b/video_encoder/xyb_conversion.c new file mode 100644 index 0000000..84b4c3c --- /dev/null +++ b/video_encoder/xyb_conversion.c @@ -0,0 +1,200 @@ +// XYB Color Space Conversion Functions for TEV +// Based on JPEG XL XYB specification with proper sRGB linearization +// test with: +//// gcc -DXYB_TEST_MAIN -o test_xyb xyb_conversion.c -lm && ./test_xyb + +#include +#include +#include +#include + +#define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x))) + +// XYB conversion constants from JPEG XL specification +static const double XYB_BIAS = 0.00379307325527544933; +static const double CBRT_BIAS = 0.01558; // cbrt(XYB_BIAS) + +// RGB to LMS mixing coefficients +static const double RGB_TO_LMS[3][3] = { + {0.3, 0.622, 0.078}, // L coefficients + {0.23, 0.692, 0.078}, // M coefficients + {0.24342268924547819, 0.20476744424496821, 0.55180986650955360} // S coefficients +}; + +// LMS to RGB inverse matrix (calculated via matrix inversion) +static const double LMS_TO_RGB[3][3] = { + {11.0315669046, -9.8669439081, -0.1646229965}, + {-3.2541473811, 4.4187703776, -0.1646229965}, + {-3.6588512867, 2.7129230459, 1.9459282408} +}; + +// sRGB linearization (0..1 range) +static inline double srgb_linearize(double val) { + if (val > 0.04045) { + return pow((val + 0.055) / 1.055, 2.4); + } else { + return val / 12.92; + } +} + +// sRGB unlinearization (0..1 range) +static inline double srgb_unlinearize(double val) { + if (val > 0.0031308) { + return 1.055 * pow(val, 1.0 / 2.4) - 0.055; + } else { + return val * 12.92; + } +} + +// Fast cube root approximation for performance +static inline double fast_cbrt(double x) { + if (x < 0) return -cbrt(-x); + return cbrt(x); +} + +// RGB to XYB conversion with proper sRGB linearization +void rgb_to_xyb(uint8_t r, uint8_t g, uint8_t b, double *x, double *y, double *xyb_b) { + // Convert RGB to 0-1 range and linearize sRGB + double r_norm = srgb_linearize(r / 255.0); + double g_norm = srgb_linearize(g / 255.0); + double b_norm = srgb_linearize(b / 255.0); + + // RGB to LMS mixing with bias + double lmix = RGB_TO_LMS[0][0] * r_norm + RGB_TO_LMS[0][1] * g_norm + RGB_TO_LMS[0][2] * b_norm + XYB_BIAS; + double mmix = RGB_TO_LMS[1][0] * r_norm + RGB_TO_LMS[1][1] * g_norm + RGB_TO_LMS[1][2] * b_norm + XYB_BIAS; + double smix = RGB_TO_LMS[2][0] * r_norm + RGB_TO_LMS[2][1] * g_norm + RGB_TO_LMS[2][2] * b_norm + XYB_BIAS; + + // Apply gamma correction (cube root) + double lgamma = fast_cbrt(lmix) - CBRT_BIAS; + double mgamma = fast_cbrt(mmix) - CBRT_BIAS; + double sgamma = fast_cbrt(smix) - CBRT_BIAS; + + // LMS to XYB transformation + *x = (lgamma - mgamma) / 2.0; + *y = (lgamma + mgamma) / 2.0; + *xyb_b = sgamma; +} + +// XYB to RGB conversion with proper sRGB unlinearization +void xyb_to_rgb(double x, double y, double xyb_b, uint8_t *r, uint8_t *g, uint8_t *b) { + // XYB to LMS gamma + double lgamma = x + y; + double mgamma = y - x; + double sgamma = xyb_b; + + // Remove gamma correction + double lmix = pow(lgamma + CBRT_BIAS, 3.0) - XYB_BIAS; + double mmix = pow(mgamma + CBRT_BIAS, 3.0) - XYB_BIAS; + double smix = pow(sgamma + CBRT_BIAS, 3.0) - XYB_BIAS; + + // LMS to linear RGB using inverse matrix + double r_linear = LMS_TO_RGB[0][0] * lmix + LMS_TO_RGB[0][1] * mmix + LMS_TO_RGB[0][2] * smix; + double g_linear = LMS_TO_RGB[1][0] * lmix + LMS_TO_RGB[1][1] * mmix + LMS_TO_RGB[1][2] * smix; + double b_linear = LMS_TO_RGB[2][0] * lmix + LMS_TO_RGB[2][1] * mmix + LMS_TO_RGB[2][2] * smix; + + // Clamp linear RGB to valid range + r_linear = CLAMP(r_linear, 0.0, 1.0); + g_linear = CLAMP(g_linear, 0.0, 1.0); + b_linear = CLAMP(b_linear, 0.0, 1.0); + + // Convert back to sRGB gamma and 0-255 range + *r = CLAMP((int)(srgb_unlinearize(r_linear) * 255.0 + 0.5), 0, 255); + *g = CLAMP((int)(srgb_unlinearize(g_linear) * 255.0 + 0.5), 0, 255); + *b = CLAMP((int)(srgb_unlinearize(b_linear) * 255.0 + 0.5), 0, 255); +} + +// Convert RGB to XYB with integer quantization suitable for TEV format +void rgb_to_xyb_quantized(uint8_t r, uint8_t g, uint8_t b, int *x_quant, int *y_quant, int *b_quant) { + double x, y, xyb_b; + rgb_to_xyb(r, g, b, &x, &y, &xyb_b); + + // Quantize to suitable integer ranges for TEV + // Y channel: 0-255 (similar to current Y in YCoCg) + *y_quant = CLAMP((int)(y * 255.0 + 128.0), 0, 255); + + // X channel: -128 to +127 (similar to Co range) + *x_quant = CLAMP((int)(x * 255.0), -128, 127); + + // B channel: -128 to +127 (similar to Cg, can be aggressively quantized) + *b_quant = CLAMP((int)(xyb_b * 255.0), -128, 127); +} + +// Test function to verify conversion accuracy +int test_xyb_conversion() { + printf("Testing XYB conversion accuracy with sRGB linearization...\n"); + + // Test with various RGB values + uint8_t test_colors[][3] = { + {255, 0, 0}, // Red + {0, 255, 0}, // Green + {0, 0, 255}, // Blue + {255, 255, 255}, // White + {0, 0, 0}, // Black + {128, 128, 128}, // Gray + {255, 255, 0}, // Yellow + {255, 0, 255}, // Magenta + {0, 255, 255}, // Cyan + // MacBeth chart colours converted to sRGB + {0x73,0x52,0x44}, + {0xc2,0x96,0x82}, + {0x62,0x7a,0x9d}, + {0x57,0x6c,0x43}, + {0x85,0x80,0xb1}, + {0x67,0xbd,0xaa}, + {0xd6,0x7e,0x2c}, + {0x50,0x5b,0xa6}, + {0xc1,0x5a,0x63}, + {0x5e,0x3c,0x6c}, + {0x9d,0xbc,0x40}, + {0xe0,0xa3,0x2e}, + {0x38,0x3d,0x96}, + {0x46,0x94,0x49}, + {0xaf,0x36,0x3c}, + {0xe7,0xc7,0x1f}, + {0xbb,0x56,0x95}, + {0x08,0x85,0xa1}, + {0xf3,0xf3,0xf3}, + {0xc8,0xc8,0xc8}, + {0xa0,0xa0,0xa0}, + {0x7a,0x7a,0x7a}, + {0x55,0x55,0x55}, + {0x34,0x34,0x34} + }; + + int num_tests = sizeof(test_colors) / sizeof(test_colors[0]); + int errors = 0; + + for (int i = 0; i < num_tests; i++) { + uint8_t r_orig = test_colors[i][0]; + uint8_t g_orig = test_colors[i][1]; + uint8_t b_orig = test_colors[i][2]; + + double x, y, xyb_b; + uint8_t r_conv, g_conv, b_conv; + + // Forward and reverse conversion + rgb_to_xyb(r_orig, g_orig, b_orig, &x, &y, &xyb_b); + xyb_to_rgb(x, y, xyb_b, &r_conv, &g_conv, &b_conv); + + // Check accuracy (allow small rounding errors) + int r_error = abs((int)r_orig - (int)r_conv); + int g_error = abs((int)g_orig - (int)g_conv); + int b_error = abs((int)b_orig - (int)b_conv); + + printf("RGB(%3d,%3d,%3d) -> XYB(%6.3f,%6.3f,%6.3f) -> RGB(%3d,%3d,%3d) [Error: %d,%d,%d]\n", + r_orig, g_orig, b_orig, x, y, xyb_b, r_conv, g_conv, b_conv, r_error, g_error, b_error); + + if (r_error > 2 || g_error > 2 || b_error > 2) { + errors++; + } + } + + printf("Test completed: %d/%d passed\n", num_tests - errors, num_tests); + return errors == 0; +} + +#ifdef XYB_TEST_MAIN +int main() { + return test_xyb_conversion() ? 0 : 1; +} +#endif \ No newline at end of file