TAV: channel-concatenated coeffs preprocessing

2026-06-08 06:14:04 +09:00 · 2025-09-29 14:42:52 +09:00
parent 5012ca4085
commit 1d3d218238
5 changed files with 339 additions and 81 deletions
--- a/video_encoder/decoder_tav.c
+++ b/video_encoder/decoder_tav.c
@@ -47,6 +47,57 @@ static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count,
    }
 }

+// Decoder: reconstruct coefficients from concatenated significance maps
+// Layout: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals]
+static void postprocess_coefficients_concatenated(uint8_t *compressed_data, int coeff_count,
+                                                 int16_t *output_y, int16_t *output_co, int16_t *output_cg) {
+    int map_bytes = (coeff_count + 7) / 8;
+
+    // Pointers to each section
+    uint8_t *y_map = compressed_data;
+    uint8_t *co_map = compressed_data + map_bytes;
+    uint8_t *cg_map = compressed_data + map_bytes * 2;
+
+    // Count non-zeros for each channel to find value arrays
+    int y_nonzeros = 0, co_nonzeros = 0, cg_nonzeros = 0;
+
+    for (int i = 0; i < coeff_count; i++) {
+        int byte_idx = i / 8;
+        int bit_idx = i % 8;
+
+        if (y_map[byte_idx] & (1 << bit_idx)) y_nonzeros++;
+        if (co_map[byte_idx] & (1 << bit_idx)) co_nonzeros++;
+        if (cg_map[byte_idx] & (1 << bit_idx)) cg_nonzeros++;
+    }
+
+    // Pointers to value arrays
+    int16_t *y_values = (int16_t *)(compressed_data + map_bytes * 3);
+    int16_t *co_values = y_values + y_nonzeros;
+    int16_t *cg_values = co_values + co_nonzeros;
+
+    // Clear outputs
+    memset(output_y, 0, coeff_count * sizeof(int16_t));
+    memset(output_co, 0, coeff_count * sizeof(int16_t));
+    memset(output_cg, 0, coeff_count * sizeof(int16_t));
+
+    // Reconstruct coefficients for each channel
+    int y_idx = 0, co_idx = 0, cg_idx = 0;
+    for (int i = 0; i < coeff_count; i++) {
+        int byte_idx = i / 8;
+        int bit_idx = i % 8;
+
+        if (y_map[byte_idx] & (1 << bit_idx)) {
+            output_y[i] = y_values[y_idx++];
+        }
+        if (co_map[byte_idx] & (1 << bit_idx)) {
+            output_co[i] = co_values[co_idx++];
+        }
+        if (cg_map[byte_idx] & (1 << bit_idx)) {
+            output_cg[i] = cg_values[cg_idx++];
+        }
+    }
+}
+
 // TAV header structure (32 bytes)
 typedef struct {
    uint8_t magic[8];
@@ -588,37 +639,25 @@ static int decode_frame(tav_decoder_t *decoder) {
        int16_t *quantized_co = malloc(coeff_count * sizeof(int16_t));
        int16_t *quantized_cg = malloc(coeff_count * sizeof(int16_t));

-        // Postprocess coefficients from significance map format
-        // First find where each channel's data starts by reading the preprocessing output
-        size_t y_map_bytes = (coeff_count + 7) / 8;
+        // Use concatenated maps format: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals]
+        postprocess_coefficients_concatenated(coeff_ptr, coeff_count, quantized_y, quantized_co, quantized_cg);

-        // Count non-zeros in Y significance map to find Y data size
-        int y_nonzeros = 0;
-        for (int i = 0; i < y_map_bytes; i++) {
-            uint8_t byte = coeff_ptr[i];
-            for (int bit = 0; bit < 8 && i*8+bit < coeff_count; bit++) {
-                if (byte & (1 << bit)) y_nonzeros++;
-            }
+        // Calculate total processed data size for concatenated format
+        int map_bytes = (coeff_count + 7) / 8;
+        int y_nonzeros = 0, co_nonzeros = 0, cg_nonzeros = 0;
+
+        // Count non-zeros in each channel's significance map
+        for (int i = 0; i < coeff_count; i++) {
+            int byte_idx = i / 8;
+            int bit_idx = i % 8;
+
+            if (coeff_ptr[byte_idx] & (1 << bit_idx)) y_nonzeros++;                    // Y map
+            if (coeff_ptr[map_bytes + byte_idx] & (1 << bit_idx)) co_nonzeros++;      // Co map
+            if (coeff_ptr[map_bytes * 2 + byte_idx] & (1 << bit_idx)) cg_nonzeros++; // Cg map
        }
-        size_t y_data_size = y_map_bytes + y_nonzeros * sizeof(int16_t);

-        // Count non-zeros in Co significance map
-        uint8_t *co_ptr = coeff_ptr + y_data_size;
-        int co_nonzeros = 0;
-        for (int i = 0; i < y_map_bytes; i++) {
-            uint8_t byte = co_ptr[i];
-            for (int bit = 0; bit < 8 && i*8+bit < coeff_count; bit++) {
-                if (byte & (1 << bit)) co_nonzeros++;
-            }
-        }
-        size_t co_data_size = y_map_bytes + co_nonzeros * sizeof(int16_t);
-
-        uint8_t *cg_ptr = co_ptr + co_data_size;
-
-        // Decompress each channel
-        postprocess_coefficients(coeff_ptr, coeff_count, quantized_y);
-        postprocess_coefficients(co_ptr, coeff_count, quantized_co);
-        postprocess_coefficients(cg_ptr, coeff_count, quantized_cg);
+        // Total size consumed: 3 maps + all non-zero values
+        size_t total_processed_size = map_bytes * 3 + (y_nonzeros + co_nonzeros + cg_nonzeros) * sizeof(int16_t);

        // Apply dequantization (perceptual for version 5, uniform for earlier versions)
        const int is_perceptual = (decoder->header.version == 5);
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -990,6 +990,57 @@ static size_t preprocess_coefficients(int16_t *coeffs, int coeff_count, uint8_t
    return map_bytes + (nonzero_count * sizeof(int16_t));
 }

+// Preprocess coefficients using concatenated significance maps for optimal cross-channel compression
+static size_t preprocess_coefficients_concatenated(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg,
+                                                   int coeff_count, uint8_t *output_buffer) {
+    int map_bytes = (coeff_count + 7) / 8;
+
+    // Count non-zeros per channel
+    int nonzero_y = 0, nonzero_co = 0, nonzero_cg = 0;
+    for (int i = 0; i < coeff_count; i++) {
+        if (coeffs_y[i] != 0) nonzero_y++;
+        if (coeffs_co[i] != 0) nonzero_co++;
+        if (coeffs_cg[i] != 0) nonzero_cg++;
+    }
+
+    // Layout: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals]
+    uint8_t *y_map = output_buffer;
+    uint8_t *co_map = output_buffer + map_bytes;
+    uint8_t *cg_map = output_buffer + map_bytes * 2;
+    int16_t *y_values = (int16_t *)(output_buffer + map_bytes * 3);
+    int16_t *co_values = y_values + nonzero_y;
+    int16_t *cg_values = co_values + nonzero_co;
+
+    // Clear significance maps
+    memset(y_map, 0, map_bytes);
+    memset(co_map, 0, map_bytes);
+    memset(cg_map, 0, map_bytes);
+
+    // Fill significance maps and extract values
+    int y_idx = 0, co_idx = 0, cg_idx = 0;
+    for (int i = 0; i < coeff_count; i++) {
+        int byte_idx = i / 8;
+        int bit_idx = i % 8;
+
+        if (coeffs_y[i] != 0) {
+            y_map[byte_idx] |= (1 << bit_idx);
+            y_values[y_idx++] = coeffs_y[i];
+        }
+
+        if (coeffs_co[i] != 0) {
+            co_map[byte_idx] |= (1 << bit_idx);
+            co_values[co_idx++] = coeffs_co[i];
+        }
+
+        if (coeffs_cg[i] != 0) {
+            cg_map[byte_idx] |= (1 << bit_idx);
+            cg_values[cg_idx++] = coeffs_cg[i];
+        }
+    }
+
+    return map_bytes * 3 + (nonzero_y + nonzero_co + nonzero_cg) * sizeof(int16_t);
+}
+
 // Quantisation for DWT subbands with rate control
 static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) {
    float effective_q = quantiser;
@@ -1311,15 +1362,10 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
        printf("\n");
    }*/

-    // Preprocess and write quantised coefficients using significance mapping for better compression
-    size_t y_compressed_size = preprocess_coefficients(quantised_y, tile_size, buffer + offset);
-    offset += y_compressed_size;
-
-    size_t co_compressed_size = preprocess_coefficients(quantised_co, tile_size, buffer + offset);
-    offset += co_compressed_size;
-
-    size_t cg_compressed_size = preprocess_coefficients(quantised_cg, tile_size, buffer + offset);
-    offset += cg_compressed_size;
+    // Preprocess and write quantised coefficients using concatenated significance maps for optimal compression
+    size_t total_compressed_size = preprocess_coefficients_concatenated(quantised_y, quantised_co, quantised_cg,
+                                                                        tile_size, buffer + offset);
+    offset += total_compressed_size;

    // DEBUG: Dump raw DWT coefficients for frame ~60 when it's an intra-frame
    if (!debugDumpMade && enc->frame_count >= makeDebugDump - 1 && enc->frame_count <= makeDebugDump + 2 &&