From 41a8b578b53396d4a73454baa7ca0d3f33ab6e55 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Tue, 30 Sep 2025 01:05:14 +0900 Subject: [PATCH] Apparently you can push the chroma extremely far --- assets/disk0/tvdos/bin/playtav.js | 4 +- terranmon.txt | 277 +++++++++++++++++- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 39 ++- video_encoder/decoder_tav.c | 24 +- video_encoder/encoder_tav.c | 93 ++++-- 5 files changed, 387 insertions(+), 50 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 0be8573..199a22c 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -498,6 +498,8 @@ let oldBgcol = [BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN] let notifHidden = false +const QLUT = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096]; + function getRGBfromScr(x, y) { let offset = y * WIDTH + x let rg = sys.peek(-1048577 - offset) @@ -723,7 +725,7 @@ try { compressedSize, // Size of compressed data CURRENT_RGB_ADDR, PREV_RGB_ADDR, // RGB buffer pointers header.width, header.height, - header.qualityLevel, header.qualityY, header.qualityCo, header.qualityCg, + header.qualityLevel, QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg], header.channelLayout, // Channel layout for variable processing trueFrameCount, header.waveletFilter, // TAV-specific parameter diff --git a/terranmon.txt b/terranmon.txt index 617aa57..ffa2a3b 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -906,9 +906,9 @@ transmission capability, and region-of-interest coding. - 16 = DD-4 (Four-point interpolating Deslauriers-Dubuc; experimental) - 255 = Haar (demonstration purpose only) uint8 Decomposition Levels: number of DWT levels (1-6+) - uint8 Quantiser Index for Y channel (1: lossless, 255: potato) - uint8 Quantiser Index for Co channel (1: lossless, 255: potato) - uint8 Quantiser Index for Cg channel (1: lossless, 255: potato) + uint8 Quantiser Index for Y channel (uses exponential numeric system; 0: lossless, 255: potato) + uint8 Quantiser Index for Co channel (uses exponential numeric system; 0: lossless, 255: potato) + uint8 Quantiser Index for Cg channel (uses exponential numeric system; 0: lossless, 255: potato) uint8 Extra Feature Flags (must be ignored for still images) - bit 0 = has audio - bit 1 = has subtitle @@ -976,9 +976,9 @@ transmission capability, and region-of-interest coding. 0x00 = SKIP (copy from previous frame) 0x01 = INTRA (DWT-coded) 0x02 = DELTA (DWT delta) - uint8 Quantiser override Y (use 0 to disable overriding; shared with A channel) - uint8 Quantiser override Co (use 0 to disable overriding) - uint8 Quantiser override Cg (use 0 to disable overriding) + uint8 Quantiser override Y (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding; shared with A channel) + uint8 Quantiser override Co (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding) + uint8 Quantiser override Cg (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding) - note: quantiser overrides are always present regardless of the channel layout ## Coefficient Storage Format (Significance Map Compression) @@ -1081,7 +1081,7 @@ TAV supports two colour spaces: Perceptual versions (5-6) apply HVS-optimized quantization weights per channel, while uniform versions (3-4) use consistent quantization across all subbands. -When Alpha channel is stored, they must be sRGB nonlinearised before DWT and quantisation. +The encoder expects linear alpha. ## Compression Features - Single DWT tiles vs 16x16 DCT blocks in TEV @@ -1113,6 +1113,269 @@ Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality. ## NTSC Framerate handling Unlike the TEV format, TAV encoder emits extra sync packet for every 1000th frames. Decoder can just play the video without any special treatment. +## Exponential Numeric System +This system maps [0..255] to [1..4096] + +Number|Index +------+----- +1|0 +2|1 +3|2 +4|3 +5|4 +6|5 +7|6 +8|7 +9|8 +10|9 +11|10 +12|11 +13|12 +14|13 +15|14 +16|15 +17|16 +18|17 +19|18 +20|19 +21|20 +22|21 +23|22 +24|23 +25|24 +26|25 +27|26 +28|27 +29|28 +30|29 +31|30 +32|31 +33|32 +34|33 +35|34 +36|35 +37|36 +38|37 +39|38 +40|39 +41|40 +42|41 +43|42 +44|43 +45|44 +46|45 +47|46 +48|47 +49|48 +50|49 +51|50 +52|51 +53|52 +54|53 +55|54 +56|55 +57|56 +58|57 +59|58 +60|59 +61|60 +62|61 +63|62 +64|63 +66|64 +68|65 +70|66 +72|67 +74|68 +76|69 +78|70 +80|71 +82|72 +84|73 +86|74 +88|75 +90|76 +92|77 +94|78 +96|79 +98|80 +100|81 +102|82 +104|83 +106|84 +108|85 +110|86 +112|87 +114|88 +116|89 +118|90 +120|91 +122|92 +124|93 +126|94 +128|95 +132|96 +136|97 +140|98 +144|99 +148|100 +152|101 +156|102 +160|103 +164|104 +168|105 +172|106 +176|107 +180|108 +184|109 +188|110 +192|111 +196|112 +200|113 +204|114 +208|115 +212|116 +216|117 +220|118 +224|119 +228|120 +232|121 +236|122 +240|123 +244|124 +248|125 +252|126 +256|127 +264|128 +272|129 +280|130 +288|131 +296|132 +304|133 +312|134 +320|135 +328|136 +336|137 +344|138 +352|139 +360|140 +368|141 +376|142 +384|143 +392|144 +400|145 +408|146 +416|147 +424|148 +432|149 +440|150 +448|151 +456|152 +464|153 +472|154 +480|155 +488|156 +496|157 +504|158 +512|159 +528|160 +544|161 +560|162 +576|163 +592|164 +608|165 +624|166 +640|167 +656|168 +672|169 +688|170 +704|171 +720|172 +736|173 +752|174 +768|175 +784|176 +800|177 +816|178 +832|179 +848|180 +864|181 +880|182 +896|183 +912|184 +928|185 +944|186 +960|187 +976|188 +992|189 +1008|190 +1024|191 +1056|192 +1088|193 +1120|194 +1152|195 +1184|196 +1216|197 +1248|198 +1280|199 +1312|200 +1344|201 +1376|202 +1408|203 +1440|204 +1472|205 +1504|206 +1536|207 +1568|208 +1600|209 +1632|210 +1664|211 +1696|212 +1728|213 +1760|214 +1792|215 +1824|216 +1856|217 +1888|218 +1920|219 +1952|220 +1984|221 +2016|222 +2048|223 +2112|224 +2176|225 +2240|226 +2304|227 +2368|228 +2432|229 +2496|230 +2560|231 +2624|232 +2688|233 +2752|234 +2816|235 +2880|236 +2944|237 +3008|238 +3072|239 +3136|240 +3200|241 +3264|242 +3328|243 +3392|244 +3456|245 +3520|246 +3584|247 +3648|248 +3712|249 +3776|250 +3840|251 +3904|252 +3968|253 +4032|254 +4096|255 + + -------------------------------------------------------------------------------- TSVM Universal Cue format diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index db64b7d..4f7a326 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4298,6 +4298,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private val tavDebugFrameTarget = -1 // use negative number to disable the debug print private var tavDebugCurrentFrameNumber = 0 + private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096) // New tavDecode function that accepts compressed data and decompresses internally fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long, @@ -4375,11 +4376,11 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (tileY in 0 until tilesY) { for (tileX in 0 until tilesX) { - // Read tile header (9 bytes: mode + mvX + mvY + rcf) + // Read tile header (4 bytes: mode + qY + qCo + qCg) val mode = vm.peek(readPtr++).toUint() - val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else it } - val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else it } - val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else it } + val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else TAV_QLUT[it - 1] } + val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else TAV_QLUT[it - 1] } + val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else TAV_QLUT[it - 1] } // debug print: raw decompressed bytes /*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ") @@ -4428,10 +4429,11 @@ class GraphicsJSR223Delegate(private val vm: VM) { var ptr = readPtr - // Read quantised DWT coefficients for Y, Co, Cg channels + // Read quantised DWT coefficients for Y, Co, Cg, and Alpha channels val quantisedY = ShortArray(coeffCount) val quantisedCo = ShortArray(coeffCount) val quantisedCg = ShortArray(coeffCount) + val quantisedAlpha = ShortArray(coeffCount) // First, we need to determine the size of compressed data for each channel // Read a large buffer to work with significance map format @@ -4471,7 +4473,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Use variable channel layout concatenated maps format - postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, quantisedY, quantisedCo, quantisedCg, null) + postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, quantisedY, quantisedCo, quantisedCg, quantisedAlpha) // Calculate total size for variable channel layout format val numChannels = when (channelLayout) { @@ -4671,12 +4673,12 @@ class GraphicsJSR223Delegate(private val vm: VM) { if (isLossless) { tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) - tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) - tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) + tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, 0, TavNullFilter) + tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, 0, TavNullFilter) } else { tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) - tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) - tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) + tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter) + tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter) } // Debug: Check coefficient values after inverse DWT @@ -4706,6 +4708,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val finalYTile: FloatArray val finalCoTile: FloatArray val finalCgTile: FloatArray + val finalAlphaTile: FloatArray if (isMonoblock) { // Monoblock mode: use full frame data directly (no padding to extract) @@ -5080,6 +5083,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val deltaY = ShortArray(coeffCount) val deltaCo = ShortArray(coeffCount) val deltaCg = ShortArray(coeffCount) + val deltaAlpha = ShortArray(coeffCount) // Read using significance map format for deltas too val maxPossibleSize = coeffCount * 3 * 2 + (coeffCount + 7) / 8 * 3 // Worst case @@ -5117,7 +5121,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Use variable channel layout concatenated maps format for deltas - postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, deltaY, deltaCo, deltaCg, null) + postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, deltaY, deltaCo, deltaCg, deltaAlpha) // Calculate total size for variable channel layout format (deltas) val numChannels = when (channelLayout) { @@ -5178,12 +5182,12 @@ class GraphicsJSR223Delegate(private val vm: VM) { if (isLossless) { tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) - tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) - tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) + tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0, TavNullFilter) + tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 0, TavNullFilter) } else { tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) - tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) - tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) + tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter) + tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter) } // Debug: Check coefficient values after inverse DWT @@ -5213,6 +5217,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val finalYTile: FloatArray val finalCoTile: FloatArray val finalCgTile: FloatArray + val finalAlphaTile: FloatArray if (isMonoblock) { // Monoblock mode: use full frame data directly (no padding to extract) @@ -5318,6 +5323,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + private object TavNullFilter : TavWaveletFilter { + override fun getCoeffMultiplier(level: Int): Float = 1.0f + } + private fun tavApplyDWTInverseMultiLevel(data: FloatArray, width: Int, height: Int, levels: Int, filterType: Int, sharpenFilter: TavWaveletFilter) { // Multi-level inverse DWT - reconstruct from smallest to largest (reverse of encoder) val maxSize = kotlin.math.max(width, height) diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c index ed5d5dd..a503869 100644 --- a/video_encoder/decoder_tav.c +++ b/video_encoder/decoder_tav.c @@ -21,11 +21,24 @@ #define TAV_PACKET_SUBTITLE 0x30 #define TAV_PACKET_SYNC 0xFF +// Channel layout constants (bit-field design) +#define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg (000: no alpha, has chroma, has luma) +#define CHANNEL_LAYOUT_YCOCG_A 1 // Y-Co-Cg-A (001: has alpha, has chroma, has luma) +#define CHANNEL_LAYOUT_Y_ONLY 2 // Y only (010: no alpha, no chroma, has luma) +#define CHANNEL_LAYOUT_Y_A 3 // Y-A (011: has alpha, no chroma, has luma) +#define CHANNEL_LAYOUT_COCG 4 // Co-Cg (100: no alpha, has chroma, no luma) +#define CHANNEL_LAYOUT_COCG_A 5 // Co-Cg-A (101: has alpha, has chroma, no luma) + // Utility macros static inline int CLAMP(int x, int min, int max) { return x < min ? min : (x > max ? max : x); } +// Helper function to check if alpha channel is needed for given channel layout +static inline int needs_alpha_channel(int channel_layout) { + return (channel_layout & 1) != 0; // bit 0: 1 means has alpha +} + // Decoder: reconstruct coefficients from significance map static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) { int map_bytes = (coeff_count + 7) / 8; @@ -137,8 +150,9 @@ typedef struct { } tav_decoder_t; // TAV Perceptual quantization constants (must match Kotlin decoder exactly) -static const float ANISOTROPY_MULT[] = {1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f}; -static const float ANISOTROPY_BIAS[] = {0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f}; +static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096}; +static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; +static const float ANISOTROPY_BIAS[] = {0.4f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f}; static const float ANISOTROPY_MULT_CHROMA[] = {6.6f, 5.5f, 4.4f, 3.3f, 2.2f, 1.1f}; static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f}; static const float FOUR_PIXEL_DETAILER = 0.88f; @@ -623,9 +637,9 @@ static int decode_frame(tav_decoder_t *decoder) { uint8_t qco_override = *ptr++; uint8_t qcg_override = *ptr++; - int qy = qy_override ? qy_override : decoder->header.quantiser_y; - int qco = qco_override ? qco_override : decoder->header.quantiser_co; - int qcg = qcg_override ? qcg_override : decoder->header.quantiser_cg; + int qy = QLUT[qy_override ? qy_override : decoder->header.quantiser_y]; + int qco = QLUT[qco_override ? qco_override : decoder->header.quantiser_co]; + int qcg = QLUT[qcg_override ? qcg_override : decoder->header.quantiser_cg]; if (mode == TAV_MODE_SKIP) { // Copy from reference frame diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 00d611f..08ba6b7 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -89,6 +89,12 @@ static const channel_layout_config_t channel_layouts[] = { {CHANNEL_LAYOUT_COCG_A, 3, {NULL, "Co", "Cg", "A"}, 0, 1, 1, 1} // 5: Co-Cg-A }; +// Helper function to check if alpha channel is needed for given channel layout +static int needs_alpha_channel(int channel_layout) { + if (channel_layout < 0 || channel_layout >= 6) return 0; + return channel_layouts[channel_layout].has_alpha; +} + // Default settings #define DEFAULT_WIDTH 560 #define DEFAULT_HEIGHT 448 @@ -173,13 +179,14 @@ static int validate_mp2_bitrate(int bitrate) { return 0; // Invalid bitrate } +static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096}; + // Quality level to quantisation mapping for different channels -static const int QUALITY_Y[] = {60, 42, 25, 12, 6, 2}; -static const int QUALITY_CO[] = {120, 90, 60, 30, 15, 3}; -static const int QUALITY_CG[] = {240, 180, 120, 60, 30, 5}; -//static const int QUALITY_Y[] = { 25, 12, 6, 3, 2, 1}; -//static const int QUALITY_CO[] = {60, 30, 15, 7, 5, 2}; -//static const int QUALITY_CG[] = {120, 60, 30, 15, 10, 4}; +// the values are indices to the QLUT +static const int QUALITY_Y[] = {59, 41, 24, 11, 5, 1}; // 60, 42, 25, 12, 6, 2 +static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29}; // 240, 180, 120, 90, 60, 30 +static const int QUALITY_CG[] = {132, 119, 100, 87, 68, 37}; // 296, 224, 148, 112, 74, 38 +static const int QUALITY_ALPHA[] = {59, 41, 24, 11, 5, 1}; // psychovisual tuning parameters static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; @@ -256,7 +263,7 @@ typedef struct { // Frame buffers - ping-pong implementation uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous int frame_buffer_index; // 0 or 1, indicates which set is "current" - float *current_frame_y, *current_frame_co, *current_frame_cg; + float *current_frame_y, *current_frame_co, *current_frame_cg, *current_frame_alpha; // Convenience pointers (updated each frame to point to current ping-pong buffers) uint8_t *current_frame_rgb; @@ -290,11 +297,13 @@ typedef struct { int16_t *reusable_quantised_y; int16_t *reusable_quantised_co; int16_t *reusable_quantised_cg; - + int16_t *reusable_quantised_alpha; + // Coefficient delta storage for P-frames (previous frame's coefficients) - float *previous_coeffs_y; // Previous frame Y coefficients for all tiles - float *previous_coeffs_co; // Previous frame Co coefficients for all tiles - float *previous_coeffs_cg; // Previous frame Cg coefficients for all tiles + float *previous_coeffs_y; // Previous frame Y coefficients for all tiles + float *previous_coeffs_co; // Previous frame Co coefficients for all tiles + float *previous_coeffs_cg; // Previous frame Cg coefficients for all tiles + float *previous_coeffs_alpha; // Previous frame Alpha coefficients for all tiles int previous_coeffs_allocated; // Flag to track allocation // Statistics @@ -489,6 +498,7 @@ static int initialise_encoder(tav_encoder_t *enc) { enc->current_frame_y = malloc(frame_size * sizeof(float)); enc->current_frame_co = malloc(frame_size * sizeof(float)); enc->current_frame_cg = malloc(frame_size * sizeof(float)); + enc->current_frame_alpha = malloc(frame_size * sizeof(float)); // Allocate tile structures enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t)); @@ -517,19 +527,21 @@ static int initialise_encoder(tav_encoder_t *enc) { enc->reusable_quantised_y = malloc(coeff_count_per_tile * sizeof(int16_t)); enc->reusable_quantised_co = malloc(coeff_count_per_tile * sizeof(int16_t)); enc->reusable_quantised_cg = malloc(coeff_count_per_tile * sizeof(int16_t)); + enc->reusable_quantised_alpha = malloc(coeff_count_per_tile * sizeof(int16_t)); // Allocate coefficient delta storage for P-frames (per-tile coefficient storage) size_t total_coeff_size = num_tiles * coeff_count_per_tile * sizeof(float); enc->previous_coeffs_y = malloc(total_coeff_size); enc->previous_coeffs_co = malloc(total_coeff_size); enc->previous_coeffs_cg = malloc(total_coeff_size); + enc->previous_coeffs_alpha = malloc(total_coeff_size); enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame if (!enc->frame_rgb[0] || !enc->frame_rgb[1] || - !enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg || + !enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg || !enc->current_frame_alpha || !enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer || - !enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg || - !enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) { + !enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg || !enc->reusable_quantised_alpha || + !enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg || !enc->previous_coeffs_alpha) { return -1; } @@ -1360,9 +1372,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, buffer[offset++] = 0; // qCo override buffer[offset++] = 0; // qCg override // technically, putting this in here would create three redundant copies of the same value, but it's much easier to code this way :v - int this_frame_qY = enc->quantiser_y; - int this_frame_qCo = enc->quantiser_co; - int this_frame_qCg = enc->quantiser_cg; + int this_frame_qY = QLUT[enc->quantiser_y]; + int this_frame_qCo = QLUT[enc->quantiser_co]; + int this_frame_qCg = QLUT[enc->quantiser_cg]; if (mode == TAV_MODE_SKIP) { // No coefficient data for SKIP/MOTION modes @@ -1377,6 +1389,7 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, int16_t *quantised_y = enc->reusable_quantised_y; int16_t *quantised_co = enc->reusable_quantised_co; int16_t *quantised_cg = enc->reusable_quantised_cg; + int16_t *quantised_alpha = enc->reusable_quantised_alpha; // Debug: check DWT coefficients before quantisation /*if (tile_x == 0 && tile_y == 0) { @@ -1881,6 +1894,36 @@ static void rgb_to_colour_space_frame(tav_encoder_t *enc, const uint8_t *rgb, } } +// RGBA to colour space conversion for full frames with alpha channel +static void rgba_to_colour_space_frame(tav_encoder_t *enc, const uint8_t *rgba, + float *c1, float *c2, float *c3, float *alpha, + int width, int height) { + const int total_pixels = width * height; + + if (enc->ictcp_mode) { + // ICtCp mode with alpha + for (int i = 0; i < total_pixels; i++) { + double I, Ct, Cp; + srgb8_to_ictcp_hlg(rgba[i*4], rgba[i*4+1], rgba[i*4+2], &I, &Ct, &Cp); + c1[i] = (float)I; + c2[i] = (float)Ct; + c3[i] = (float)Cp; + alpha[i] = (float)rgba[i*4+3] / 255.0f; // Normalize alpha to [0,1] + } + } else { + // YCoCg mode with alpha - extract RGB first, then convert + uint8_t *temp_rgb = malloc(total_pixels * 3); + for (int i = 0; i < total_pixels; i++) { + temp_rgb[i*3] = rgba[i*4]; // R + temp_rgb[i*3+1] = rgba[i*4+1]; // G + temp_rgb[i*3+2] = rgba[i*4+2]; // B + alpha[i] = (float)rgba[i*4+3] / 255.0f; // Normalize alpha to [0,1] + } + rgb_to_ycocg(temp_rgb, c1, c2, c3, width, height); + free(temp_rgb); + } +} + // Write TAV file header static int write_tav_header(tav_encoder_t *enc) { if (!enc->output_fp) return -1; @@ -2917,9 +2960,9 @@ int main(int argc, char *argv[]) { cleanup_encoder(enc); return 1; } - enc->quantiser_y = CLAMP(enc->quantiser_y, 1, 255); - enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 255); - enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 255); + enc->quantiser_y = CLAMP(enc->quantiser_y, 0, 255); + enc->quantiser_co = CLAMP(enc->quantiser_co, 0, 255); + enc->quantiser_cg = CLAMP(enc->quantiser_cg, 0, 255); break; case 'w': enc->wavelet_filter = CLAMP(atoi(optarg), 0, 255); @@ -3051,9 +3094,9 @@ int main(int argc, char *argv[]) { printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R"); printf("Quantisation: %s\n", enc->perceptual_tuning ? "Perceptual (HVS-optimised)" : "Uniform (legacy)"); if (enc->ictcp_mode) { - printf("Base quantiser: I=%d, Ct=%d, Cp=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg); + printf("Base quantiser: I=%d, Ct=%d, Cp=%d\n", QLUT[enc->quantiser_y], QLUT[enc->quantiser_co], QLUT[enc->quantiser_cg]); } else { - printf("Base quantiser: Y=%d, Co=%d, Cg=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg); + printf("Base quantiser: Y=%d, Co=%d, Cg=%d\n", QLUT[enc->quantiser_y], QLUT[enc->quantiser_co], QLUT[enc->quantiser_cg]); } if (enc->perceptual_tuning) { printf("Perceptual tuning enabled\n"); @@ -3357,6 +3400,10 @@ static void cleanup_encoder(tav_encoder_t *enc) { free(enc->subtitle_file); free(enc->frame_rgb[0]); free(enc->frame_rgb[1]); + free(enc->current_frame_y); + free(enc->current_frame_co); + free(enc->current_frame_cg); + free(enc->current_frame_alpha); free(enc->tiles); free(enc->compressed_buffer); free(enc->mp2_buffer); @@ -3365,11 +3412,13 @@ static void cleanup_encoder(tav_encoder_t *enc) { free(enc->reusable_quantised_y); free(enc->reusable_quantised_co); free(enc->reusable_quantised_cg); + free(enc->reusable_quantised_alpha); // Free coefficient delta storage free(enc->previous_coeffs_y); free(enc->previous_coeffs_co); free(enc->previous_coeffs_cg); + free(enc->previous_coeffs_alpha); // Free subtitle list if (enc->subtitles) {