Apparently you can push the chroma extremely far

This commit is contained in:
minjaesong
2025-09-30 01:05:14 +09:00
parent 836e69a40b
commit 41a8b578b5
5 changed files with 387 additions and 50 deletions

View File

@@ -498,6 +498,8 @@ let oldBgcol = [BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN]
let notifHidden = false let notifHidden = false
const QLUT = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096];
function getRGBfromScr(x, y) { function getRGBfromScr(x, y) {
let offset = y * WIDTH + x let offset = y * WIDTH + x
let rg = sys.peek(-1048577 - offset) let rg = sys.peek(-1048577 - offset)
@@ -723,7 +725,7 @@ try {
compressedSize, // Size of compressed data compressedSize, // Size of compressed data
CURRENT_RGB_ADDR, PREV_RGB_ADDR, // RGB buffer pointers CURRENT_RGB_ADDR, PREV_RGB_ADDR, // RGB buffer pointers
header.width, header.height, header.width, header.height,
header.qualityLevel, header.qualityY, header.qualityCo, header.qualityCg, header.qualityLevel, QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout, // Channel layout for variable processing header.channelLayout, // Channel layout for variable processing
trueFrameCount, trueFrameCount,
header.waveletFilter, // TAV-specific parameter header.waveletFilter, // TAV-specific parameter

View File

@@ -906,9 +906,9 @@ transmission capability, and region-of-interest coding.
- 16 = DD-4 (Four-point interpolating Deslauriers-Dubuc; experimental) - 16 = DD-4 (Four-point interpolating Deslauriers-Dubuc; experimental)
- 255 = Haar (demonstration purpose only) - 255 = Haar (demonstration purpose only)
uint8 Decomposition Levels: number of DWT levels (1-6+) uint8 Decomposition Levels: number of DWT levels (1-6+)
uint8 Quantiser Index for Y channel (1: lossless, 255: potato) uint8 Quantiser Index for Y channel (uses exponential numeric system; 0: lossless, 255: potato)
uint8 Quantiser Index for Co channel (1: lossless, 255: potato) uint8 Quantiser Index for Co channel (uses exponential numeric system; 0: lossless, 255: potato)
uint8 Quantiser Index for Cg channel (1: lossless, 255: potato) uint8 Quantiser Index for Cg channel (uses exponential numeric system; 0: lossless, 255: potato)
uint8 Extra Feature Flags (must be ignored for still images) uint8 Extra Feature Flags (must be ignored for still images)
- bit 0 = has audio - bit 0 = has audio
- bit 1 = has subtitle - bit 1 = has subtitle
@@ -976,9 +976,9 @@ transmission capability, and region-of-interest coding.
0x00 = SKIP (copy from previous frame) 0x00 = SKIP (copy from previous frame)
0x01 = INTRA (DWT-coded) 0x01 = INTRA (DWT-coded)
0x02 = DELTA (DWT delta) 0x02 = DELTA (DWT delta)
uint8 Quantiser override Y (use 0 to disable overriding; shared with A channel) uint8 Quantiser override Y (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding; shared with A channel)
uint8 Quantiser override Co (use 0 to disable overriding) uint8 Quantiser override Co (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
uint8 Quantiser override Cg (use 0 to disable overriding) uint8 Quantiser override Cg (uses exponential numeric system; stored with index bias of 1 (127->252, 255->4032); use 0 to disable overriding)
- note: quantiser overrides are always present regardless of the channel layout - note: quantiser overrides are always present regardless of the channel layout
## Coefficient Storage Format (Significance Map Compression) ## Coefficient Storage Format (Significance Map Compression)
@@ -1081,7 +1081,7 @@ TAV supports two colour spaces:
Perceptual versions (5-6) apply HVS-optimized quantization weights per channel, Perceptual versions (5-6) apply HVS-optimized quantization weights per channel,
while uniform versions (3-4) use consistent quantization across all subbands. while uniform versions (3-4) use consistent quantization across all subbands.
When Alpha channel is stored, they must be sRGB nonlinearised before DWT and quantisation. The encoder expects linear alpha.
## Compression Features ## Compression Features
- Single DWT tiles vs 16x16 DCT blocks in TEV - Single DWT tiles vs 16x16 DCT blocks in TEV
@@ -1113,6 +1113,269 @@ Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality.
## NTSC Framerate handling ## NTSC Framerate handling
Unlike the TEV format, TAV encoder emits extra sync packet for every 1000th frames. Decoder can just play the video without any special treatment. Unlike the TEV format, TAV encoder emits extra sync packet for every 1000th frames. Decoder can just play the video without any special treatment.
## Exponential Numeric System
This system maps [0..255] to [1..4096]
Number|Index
------+-----
1|0
2|1
3|2
4|3
5|4
6|5
7|6
8|7
9|8
10|9
11|10
12|11
13|12
14|13
15|14
16|15
17|16
18|17
19|18
20|19
21|20
22|21
23|22
24|23
25|24
26|25
27|26
28|27
29|28
30|29
31|30
32|31
33|32
34|33
35|34
36|35
37|36
38|37
39|38
40|39
41|40
42|41
43|42
44|43
45|44
46|45
47|46
48|47
49|48
50|49
51|50
52|51
53|52
54|53
55|54
56|55
57|56
58|57
59|58
60|59
61|60
62|61
63|62
64|63
66|64
68|65
70|66
72|67
74|68
76|69
78|70
80|71
82|72
84|73
86|74
88|75
90|76
92|77
94|78
96|79
98|80
100|81
102|82
104|83
106|84
108|85
110|86
112|87
114|88
116|89
118|90
120|91
122|92
124|93
126|94
128|95
132|96
136|97
140|98
144|99
148|100
152|101
156|102
160|103
164|104
168|105
172|106
176|107
180|108
184|109
188|110
192|111
196|112
200|113
204|114
208|115
212|116
216|117
220|118
224|119
228|120
232|121
236|122
240|123
244|124
248|125
252|126
256|127
264|128
272|129
280|130
288|131
296|132
304|133
312|134
320|135
328|136
336|137
344|138
352|139
360|140
368|141
376|142
384|143
392|144
400|145
408|146
416|147
424|148
432|149
440|150
448|151
456|152
464|153
472|154
480|155
488|156
496|157
504|158
512|159
528|160
544|161
560|162
576|163
592|164
608|165
624|166
640|167
656|168
672|169
688|170
704|171
720|172
736|173
752|174
768|175
784|176
800|177
816|178
832|179
848|180
864|181
880|182
896|183
912|184
928|185
944|186
960|187
976|188
992|189
1008|190
1024|191
1056|192
1088|193
1120|194
1152|195
1184|196
1216|197
1248|198
1280|199
1312|200
1344|201
1376|202
1408|203
1440|204
1472|205
1504|206
1536|207
1568|208
1600|209
1632|210
1664|211
1696|212
1728|213
1760|214
1792|215
1824|216
1856|217
1888|218
1920|219
1952|220
1984|221
2016|222
2048|223
2112|224
2176|225
2240|226
2304|227
2368|228
2432|229
2496|230
2560|231
2624|232
2688|233
2752|234
2816|235
2880|236
2944|237
3008|238
3072|239
3136|240
3200|241
3264|242
3328|243
3392|244
3456|245
3520|246
3584|247
3648|248
3712|249
3776|250
3840|251
3904|252
3968|253
4032|254
4096|255
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
TSVM Universal Cue format TSVM Universal Cue format

View File

@@ -4298,6 +4298,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private val tavDebugFrameTarget = -1 // use negative number to disable the debug print private val tavDebugFrameTarget = -1 // use negative number to disable the debug print
private var tavDebugCurrentFrameNumber = 0 private var tavDebugCurrentFrameNumber = 0
private val TAV_QLUT = intArrayOf(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096)
// New tavDecode function that accepts compressed data and decompresses internally // New tavDecode function that accepts compressed data and decompresses internally
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long, fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
@@ -4375,11 +4376,11 @@ class GraphicsJSR223Delegate(private val vm: VM) {
for (tileY in 0 until tilesY) { for (tileY in 0 until tilesY) {
for (tileX in 0 until tilesX) { for (tileX in 0 until tilesX) {
// Read tile header (9 bytes: mode + mvX + mvY + rcf) // Read tile header (4 bytes: mode + qY + qCo + qCg)
val mode = vm.peek(readPtr++).toUint() val mode = vm.peek(readPtr++).toUint()
val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else it } val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else TAV_QLUT[it - 1] }
val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else it } val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else TAV_QLUT[it - 1] }
val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else it } val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else TAV_QLUT[it - 1] }
// debug print: raw decompressed bytes // debug print: raw decompressed bytes
/*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ") /*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ")
@@ -4428,10 +4429,11 @@ class GraphicsJSR223Delegate(private val vm: VM) {
var ptr = readPtr var ptr = readPtr
// Read quantised DWT coefficients for Y, Co, Cg channels // Read quantised DWT coefficients for Y, Co, Cg, and Alpha channels
val quantisedY = ShortArray(coeffCount) val quantisedY = ShortArray(coeffCount)
val quantisedCo = ShortArray(coeffCount) val quantisedCo = ShortArray(coeffCount)
val quantisedCg = ShortArray(coeffCount) val quantisedCg = ShortArray(coeffCount)
val quantisedAlpha = ShortArray(coeffCount)
// First, we need to determine the size of compressed data for each channel // First, we need to determine the size of compressed data for each channel
// Read a large buffer to work with significance map format // Read a large buffer to work with significance map format
@@ -4471,7 +4473,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
// Use variable channel layout concatenated maps format // Use variable channel layout concatenated maps format
postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, quantisedY, quantisedCo, quantisedCg, null) postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, quantisedY, quantisedCo, quantisedCg, quantisedAlpha)
// Calculate total size for variable channel layout format // Calculate total size for variable channel layout format
val numChannels = when (channelLayout) { val numChannels = when (channelLayout) {
@@ -4671,12 +4673,12 @@ class GraphicsJSR223Delegate(private val vm: VM) {
if (isLossless) { if (isLossless) {
tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal)
tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, 0, TavNullFilter)
tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, 0, TavNullFilter)
} else { } else {
tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) tavApplyDWTInverseMultiLevel(yTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal)
tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) tavApplyDWTInverseMultiLevel(coTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter)
tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) tavApplyDWTInverseMultiLevel(cgTile, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter)
} }
// Debug: Check coefficient values after inverse DWT // Debug: Check coefficient values after inverse DWT
@@ -4706,6 +4708,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val finalYTile: FloatArray val finalYTile: FloatArray
val finalCoTile: FloatArray val finalCoTile: FloatArray
val finalCgTile: FloatArray val finalCgTile: FloatArray
val finalAlphaTile: FloatArray
if (isMonoblock) { if (isMonoblock) {
// Monoblock mode: use full frame data directly (no padding to extract) // Monoblock mode: use full frame data directly (no padding to extract)
@@ -5080,6 +5083,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val deltaY = ShortArray(coeffCount) val deltaY = ShortArray(coeffCount)
val deltaCo = ShortArray(coeffCount) val deltaCo = ShortArray(coeffCount)
val deltaCg = ShortArray(coeffCount) val deltaCg = ShortArray(coeffCount)
val deltaAlpha = ShortArray(coeffCount)
// Read using significance map format for deltas too // Read using significance map format for deltas too
val maxPossibleSize = coeffCount * 3 * 2 + (coeffCount + 7) / 8 * 3 // Worst case val maxPossibleSize = coeffCount * 3 * 2 + (coeffCount + 7) / 8 * 3 // Worst case
@@ -5117,7 +5121,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
// Use variable channel layout concatenated maps format for deltas // Use variable channel layout concatenated maps format for deltas
postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, deltaY, deltaCo, deltaCg, null) postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, deltaY, deltaCo, deltaCg, deltaAlpha)
// Calculate total size for variable channel layout format (deltas) // Calculate total size for variable channel layout format (deltas)
val numChannels = when (channelLayout) { val numChannels = when (channelLayout) {
@@ -5178,12 +5182,12 @@ class GraphicsJSR223Delegate(private val vm: VM) {
if (isLossless) { if (isLossless) {
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal)
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, 0, TavNullFilter)
tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 0, TavSharpenNormal) tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, 0, TavNullFilter)
} else { } else {
tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) tavApplyDWTInverseMultiLevel(currentY, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal)
tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) tavApplyDWTInverseMultiLevel(currentCo, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter)
tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, waveletFilter, TavSharpenNormal) tavApplyDWTInverseMultiLevel(currentCg, tileWidth, tileHeight, decompLevels, waveletFilter, TavNullFilter)
} }
// Debug: Check coefficient values after inverse DWT // Debug: Check coefficient values after inverse DWT
@@ -5213,6 +5217,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val finalYTile: FloatArray val finalYTile: FloatArray
val finalCoTile: FloatArray val finalCoTile: FloatArray
val finalCgTile: FloatArray val finalCgTile: FloatArray
val finalAlphaTile: FloatArray
if (isMonoblock) { if (isMonoblock) {
// Monoblock mode: use full frame data directly (no padding to extract) // Monoblock mode: use full frame data directly (no padding to extract)
@@ -5318,6 +5323,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
} }
private object TavNullFilter : TavWaveletFilter {
override fun getCoeffMultiplier(level: Int): Float = 1.0f
}
private fun tavApplyDWTInverseMultiLevel(data: FloatArray, width: Int, height: Int, levels: Int, filterType: Int, sharpenFilter: TavWaveletFilter) { private fun tavApplyDWTInverseMultiLevel(data: FloatArray, width: Int, height: Int, levels: Int, filterType: Int, sharpenFilter: TavWaveletFilter) {
// Multi-level inverse DWT - reconstruct from smallest to largest (reverse of encoder) // Multi-level inverse DWT - reconstruct from smallest to largest (reverse of encoder)
val maxSize = kotlin.math.max(width, height) val maxSize = kotlin.math.max(width, height)

View File

@@ -21,11 +21,24 @@
#define TAV_PACKET_SUBTITLE 0x30 #define TAV_PACKET_SUBTITLE 0x30
#define TAV_PACKET_SYNC 0xFF #define TAV_PACKET_SYNC 0xFF
// Channel layout constants (bit-field design)
#define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg (000: no alpha, has chroma, has luma)
#define CHANNEL_LAYOUT_YCOCG_A 1 // Y-Co-Cg-A (001: has alpha, has chroma, has luma)
#define CHANNEL_LAYOUT_Y_ONLY 2 // Y only (010: no alpha, no chroma, has luma)
#define CHANNEL_LAYOUT_Y_A 3 // Y-A (011: has alpha, no chroma, has luma)
#define CHANNEL_LAYOUT_COCG 4 // Co-Cg (100: no alpha, has chroma, no luma)
#define CHANNEL_LAYOUT_COCG_A 5 // Co-Cg-A (101: has alpha, has chroma, no luma)
// Utility macros // Utility macros
static inline int CLAMP(int x, int min, int max) { static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x); return x < min ? min : (x > max ? max : x);
} }
// Helper function to check if alpha channel is needed for given channel layout
static inline int needs_alpha_channel(int channel_layout) {
return (channel_layout & 1) != 0; // bit 0: 1 means has alpha
}
// Decoder: reconstruct coefficients from significance map // Decoder: reconstruct coefficients from significance map
static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) { static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) {
int map_bytes = (coeff_count + 7) / 8; int map_bytes = (coeff_count + 7) / 8;
@@ -137,8 +150,9 @@ typedef struct {
} tav_decoder_t; } tav_decoder_t;
// TAV Perceptual quantization constants (must match Kotlin decoder exactly) // TAV Perceptual quantization constants (must match Kotlin decoder exactly)
static const float ANISOTROPY_MULT[] = {1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f}; static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
static const float ANISOTROPY_BIAS[] = {0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f}; static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f};
static const float ANISOTROPY_BIAS[] = {0.4f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
static const float ANISOTROPY_MULT_CHROMA[] = {6.6f, 5.5f, 4.4f, 3.3f, 2.2f, 1.1f}; static const float ANISOTROPY_MULT_CHROMA[] = {6.6f, 5.5f, 4.4f, 3.3f, 2.2f, 1.1f};
static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f}; static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f};
static const float FOUR_PIXEL_DETAILER = 0.88f; static const float FOUR_PIXEL_DETAILER = 0.88f;
@@ -623,9 +637,9 @@ static int decode_frame(tav_decoder_t *decoder) {
uint8_t qco_override = *ptr++; uint8_t qco_override = *ptr++;
uint8_t qcg_override = *ptr++; uint8_t qcg_override = *ptr++;
int qy = qy_override ? qy_override : decoder->header.quantiser_y; int qy = QLUT[qy_override ? qy_override : decoder->header.quantiser_y];
int qco = qco_override ? qco_override : decoder->header.quantiser_co; int qco = QLUT[qco_override ? qco_override : decoder->header.quantiser_co];
int qcg = qcg_override ? qcg_override : decoder->header.quantiser_cg; int qcg = QLUT[qcg_override ? qcg_override : decoder->header.quantiser_cg];
if (mode == TAV_MODE_SKIP) { if (mode == TAV_MODE_SKIP) {
// Copy from reference frame // Copy from reference frame

View File

@@ -89,6 +89,12 @@ static const channel_layout_config_t channel_layouts[] = {
{CHANNEL_LAYOUT_COCG_A, 3, {NULL, "Co", "Cg", "A"}, 0, 1, 1, 1} // 5: Co-Cg-A {CHANNEL_LAYOUT_COCG_A, 3, {NULL, "Co", "Cg", "A"}, 0, 1, 1, 1} // 5: Co-Cg-A
}; };
// Helper function to check if alpha channel is needed for given channel layout
static int needs_alpha_channel(int channel_layout) {
if (channel_layout < 0 || channel_layout >= 6) return 0;
return channel_layouts[channel_layout].has_alpha;
}
// Default settings // Default settings
#define DEFAULT_WIDTH 560 #define DEFAULT_WIDTH 560
#define DEFAULT_HEIGHT 448 #define DEFAULT_HEIGHT 448
@@ -173,13 +179,14 @@ static int validate_mp2_bitrate(int bitrate) {
return 0; // Invalid bitrate return 0; // Invalid bitrate
} }
static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
// Quality level to quantisation mapping for different channels // Quality level to quantisation mapping for different channels
static const int QUALITY_Y[] = {60, 42, 25, 12, 6, 2}; // the values are indices to the QLUT
static const int QUALITY_CO[] = {120, 90, 60, 30, 15, 3}; static const int QUALITY_Y[] = {59, 41, 24, 11, 5, 1}; // 60, 42, 25, 12, 6, 2
static const int QUALITY_CG[] = {240, 180, 120, 60, 30, 5}; static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29}; // 240, 180, 120, 90, 60, 30
//static const int QUALITY_Y[] = { 25, 12, 6, 3, 2, 1}; static const int QUALITY_CG[] = {132, 119, 100, 87, 68, 37}; // 296, 224, 148, 112, 74, 38
//static const int QUALITY_CO[] = {60, 30, 15, 7, 5, 2}; static const int QUALITY_ALPHA[] = {59, 41, 24, 11, 5, 1};
//static const int QUALITY_CG[] = {120, 60, 30, 15, 10, 4};
// psychovisual tuning parameters // psychovisual tuning parameters
static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f};
@@ -256,7 +263,7 @@ typedef struct {
// Frame buffers - ping-pong implementation // Frame buffers - ping-pong implementation
uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous
int frame_buffer_index; // 0 or 1, indicates which set is "current" int frame_buffer_index; // 0 or 1, indicates which set is "current"
float *current_frame_y, *current_frame_co, *current_frame_cg; float *current_frame_y, *current_frame_co, *current_frame_cg, *current_frame_alpha;
// Convenience pointers (updated each frame to point to current ping-pong buffers) // Convenience pointers (updated each frame to point to current ping-pong buffers)
uint8_t *current_frame_rgb; uint8_t *current_frame_rgb;
@@ -290,11 +297,13 @@ typedef struct {
int16_t *reusable_quantised_y; int16_t *reusable_quantised_y;
int16_t *reusable_quantised_co; int16_t *reusable_quantised_co;
int16_t *reusable_quantised_cg; int16_t *reusable_quantised_cg;
int16_t *reusable_quantised_alpha;
// Coefficient delta storage for P-frames (previous frame's coefficients) // Coefficient delta storage for P-frames (previous frame's coefficients)
float *previous_coeffs_y; // Previous frame Y coefficients for all tiles float *previous_coeffs_y; // Previous frame Y coefficients for all tiles
float *previous_coeffs_co; // Previous frame Co coefficients for all tiles float *previous_coeffs_co; // Previous frame Co coefficients for all tiles
float *previous_coeffs_cg; // Previous frame Cg coefficients for all tiles float *previous_coeffs_cg; // Previous frame Cg coefficients for all tiles
float *previous_coeffs_alpha; // Previous frame Alpha coefficients for all tiles
int previous_coeffs_allocated; // Flag to track allocation int previous_coeffs_allocated; // Flag to track allocation
// Statistics // Statistics
@@ -489,6 +498,7 @@ static int initialise_encoder(tav_encoder_t *enc) {
enc->current_frame_y = malloc(frame_size * sizeof(float)); enc->current_frame_y = malloc(frame_size * sizeof(float));
enc->current_frame_co = malloc(frame_size * sizeof(float)); enc->current_frame_co = malloc(frame_size * sizeof(float));
enc->current_frame_cg = malloc(frame_size * sizeof(float)); enc->current_frame_cg = malloc(frame_size * sizeof(float));
enc->current_frame_alpha = malloc(frame_size * sizeof(float));
// Allocate tile structures // Allocate tile structures
enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t)); enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t));
@@ -517,19 +527,21 @@ static int initialise_encoder(tav_encoder_t *enc) {
enc->reusable_quantised_y = malloc(coeff_count_per_tile * sizeof(int16_t)); enc->reusable_quantised_y = malloc(coeff_count_per_tile * sizeof(int16_t));
enc->reusable_quantised_co = malloc(coeff_count_per_tile * sizeof(int16_t)); enc->reusable_quantised_co = malloc(coeff_count_per_tile * sizeof(int16_t));
enc->reusable_quantised_cg = malloc(coeff_count_per_tile * sizeof(int16_t)); enc->reusable_quantised_cg = malloc(coeff_count_per_tile * sizeof(int16_t));
enc->reusable_quantised_alpha = malloc(coeff_count_per_tile * sizeof(int16_t));
// Allocate coefficient delta storage for P-frames (per-tile coefficient storage) // Allocate coefficient delta storage for P-frames (per-tile coefficient storage)
size_t total_coeff_size = num_tiles * coeff_count_per_tile * sizeof(float); size_t total_coeff_size = num_tiles * coeff_count_per_tile * sizeof(float);
enc->previous_coeffs_y = malloc(total_coeff_size); enc->previous_coeffs_y = malloc(total_coeff_size);
enc->previous_coeffs_co = malloc(total_coeff_size); enc->previous_coeffs_co = malloc(total_coeff_size);
enc->previous_coeffs_cg = malloc(total_coeff_size); enc->previous_coeffs_cg = malloc(total_coeff_size);
enc->previous_coeffs_alpha = malloc(total_coeff_size);
enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame
if (!enc->frame_rgb[0] || !enc->frame_rgb[1] || if (!enc->frame_rgb[0] || !enc->frame_rgb[1] ||
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg || !enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg || !enc->current_frame_alpha ||
!enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer || !enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer ||
!enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg || !enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg || !enc->reusable_quantised_alpha ||
!enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) { !enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg || !enc->previous_coeffs_alpha) {
return -1; return -1;
} }
@@ -1360,9 +1372,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
buffer[offset++] = 0; // qCo override buffer[offset++] = 0; // qCo override
buffer[offset++] = 0; // qCg override buffer[offset++] = 0; // qCg override
// technically, putting this in here would create three redundant copies of the same value, but it's much easier to code this way :v // technically, putting this in here would create three redundant copies of the same value, but it's much easier to code this way :v
int this_frame_qY = enc->quantiser_y; int this_frame_qY = QLUT[enc->quantiser_y];
int this_frame_qCo = enc->quantiser_co; int this_frame_qCo = QLUT[enc->quantiser_co];
int this_frame_qCg = enc->quantiser_cg; int this_frame_qCg = QLUT[enc->quantiser_cg];
if (mode == TAV_MODE_SKIP) { if (mode == TAV_MODE_SKIP) {
// No coefficient data for SKIP/MOTION modes // No coefficient data for SKIP/MOTION modes
@@ -1377,6 +1389,7 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
int16_t *quantised_y = enc->reusable_quantised_y; int16_t *quantised_y = enc->reusable_quantised_y;
int16_t *quantised_co = enc->reusable_quantised_co; int16_t *quantised_co = enc->reusable_quantised_co;
int16_t *quantised_cg = enc->reusable_quantised_cg; int16_t *quantised_cg = enc->reusable_quantised_cg;
int16_t *quantised_alpha = enc->reusable_quantised_alpha;
// Debug: check DWT coefficients before quantisation // Debug: check DWT coefficients before quantisation
/*if (tile_x == 0 && tile_y == 0) { /*if (tile_x == 0 && tile_y == 0) {
@@ -1881,6 +1894,36 @@ static void rgb_to_colour_space_frame(tav_encoder_t *enc, const uint8_t *rgb,
} }
} }
// RGBA to colour space conversion for full frames with alpha channel
static void rgba_to_colour_space_frame(tav_encoder_t *enc, const uint8_t *rgba,
float *c1, float *c2, float *c3, float *alpha,
int width, int height) {
const int total_pixels = width * height;
if (enc->ictcp_mode) {
// ICtCp mode with alpha
for (int i = 0; i < total_pixels; i++) {
double I, Ct, Cp;
srgb8_to_ictcp_hlg(rgba[i*4], rgba[i*4+1], rgba[i*4+2], &I, &Ct, &Cp);
c1[i] = (float)I;
c2[i] = (float)Ct;
c3[i] = (float)Cp;
alpha[i] = (float)rgba[i*4+3] / 255.0f; // Normalize alpha to [0,1]
}
} else {
// YCoCg mode with alpha - extract RGB first, then convert
uint8_t *temp_rgb = malloc(total_pixels * 3);
for (int i = 0; i < total_pixels; i++) {
temp_rgb[i*3] = rgba[i*4]; // R
temp_rgb[i*3+1] = rgba[i*4+1]; // G
temp_rgb[i*3+2] = rgba[i*4+2]; // B
alpha[i] = (float)rgba[i*4+3] / 255.0f; // Normalize alpha to [0,1]
}
rgb_to_ycocg(temp_rgb, c1, c2, c3, width, height);
free(temp_rgb);
}
}
// Write TAV file header // Write TAV file header
static int write_tav_header(tav_encoder_t *enc) { static int write_tav_header(tav_encoder_t *enc) {
if (!enc->output_fp) return -1; if (!enc->output_fp) return -1;
@@ -2917,9 +2960,9 @@ int main(int argc, char *argv[]) {
cleanup_encoder(enc); cleanup_encoder(enc);
return 1; return 1;
} }
enc->quantiser_y = CLAMP(enc->quantiser_y, 1, 255); enc->quantiser_y = CLAMP(enc->quantiser_y, 0, 255);
enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 255); enc->quantiser_co = CLAMP(enc->quantiser_co, 0, 255);
enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 255); enc->quantiser_cg = CLAMP(enc->quantiser_cg, 0, 255);
break; break;
case 'w': case 'w':
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 255); enc->wavelet_filter = CLAMP(atoi(optarg), 0, 255);
@@ -3051,9 +3094,9 @@ int main(int argc, char *argv[]) {
printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R"); printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
printf("Quantisation: %s\n", enc->perceptual_tuning ? "Perceptual (HVS-optimised)" : "Uniform (legacy)"); printf("Quantisation: %s\n", enc->perceptual_tuning ? "Perceptual (HVS-optimised)" : "Uniform (legacy)");
if (enc->ictcp_mode) { if (enc->ictcp_mode) {
printf("Base quantiser: I=%d, Ct=%d, Cp=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg); printf("Base quantiser: I=%d, Ct=%d, Cp=%d\n", QLUT[enc->quantiser_y], QLUT[enc->quantiser_co], QLUT[enc->quantiser_cg]);
} else { } else {
printf("Base quantiser: Y=%d, Co=%d, Cg=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg); printf("Base quantiser: Y=%d, Co=%d, Cg=%d\n", QLUT[enc->quantiser_y], QLUT[enc->quantiser_co], QLUT[enc->quantiser_cg]);
} }
if (enc->perceptual_tuning) { if (enc->perceptual_tuning) {
printf("Perceptual tuning enabled\n"); printf("Perceptual tuning enabled\n");
@@ -3357,6 +3400,10 @@ static void cleanup_encoder(tav_encoder_t *enc) {
free(enc->subtitle_file); free(enc->subtitle_file);
free(enc->frame_rgb[0]); free(enc->frame_rgb[0]);
free(enc->frame_rgb[1]); free(enc->frame_rgb[1]);
free(enc->current_frame_y);
free(enc->current_frame_co);
free(enc->current_frame_cg);
free(enc->current_frame_alpha);
free(enc->tiles); free(enc->tiles);
free(enc->compressed_buffer); free(enc->compressed_buffer);
free(enc->mp2_buffer); free(enc->mp2_buffer);
@@ -3365,11 +3412,13 @@ static void cleanup_encoder(tav_encoder_t *enc) {
free(enc->reusable_quantised_y); free(enc->reusable_quantised_y);
free(enc->reusable_quantised_co); free(enc->reusable_quantised_co);
free(enc->reusable_quantised_cg); free(enc->reusable_quantised_cg);
free(enc->reusable_quantised_alpha);
// Free coefficient delta storage // Free coefficient delta storage
free(enc->previous_coeffs_y); free(enc->previous_coeffs_y);
free(enc->previous_coeffs_co); free(enc->previous_coeffs_co);
free(enc->previous_coeffs_cg); free(enc->previous_coeffs_cg);
free(enc->previous_coeffs_alpha);
// Free subtitle list // Free subtitle list
if (enc->subtitles) { if (enc->subtitles) {