TAV: improved compression using some coefficient preprocessing

2026-06-08 22:34:03 +09:00 · 2025-09-29 01:17:53 +09:00
parent 01278815c7
commit 66909537a0
5 changed files with 280 additions and 47 deletions
--- a/video_encoder/decoder_tav.c
+++ b/video_encoder/decoder_tav.c
@@ -26,6 +26,27 @@ static inline int CLAMP(int x, int min, int max) {
    return x < min ? min : (x > max ? max : x);
 }

+// Decoder: reconstruct coefficients from significance map
+static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) {
+    int map_bytes = (coeff_count + 7) / 8;
+    uint8_t *sig_map = compressed_data;
+    int16_t *values = (int16_t *)(compressed_data + map_bytes);
+
+    // Clear output
+    memset(output_coeffs, 0, coeff_count * sizeof(int16_t));
+
+    // Reconstruct coefficients
+    int value_idx = 0;
+    for (int i = 0; i < coeff_count; i++) {
+        int byte_idx = i / 8;
+        int bit_idx = i % 8;
+
+        if (sig_map[byte_idx] & (1 << bit_idx)) {
+            output_coeffs[i] = values[value_idx++];
+        }
+    }
+}
+
 // TAV header structure (32 bytes)
 typedef struct {
    uint8_t magic[8];
@@ -558,27 +579,46 @@ static int decode_frame(tav_decoder_t *decoder) {
        // Copy from reference frame
        memcpy(decoder->current_frame_rgb, decoder->reference_frame_rgb, decoder->frame_size * 3);
    } else {
-        // Read coefficients in TSVM order: all Y, then all Co, then all Cg
+        // Read coefficients with significance map postprocessing
        int coeff_count = decoder->frame_size;
        uint8_t *coeff_ptr = ptr;

-        // Read coefficients into temporary arrays
+        // Allocate arrays for decompressed coefficients
        int16_t *quantized_y = malloc(coeff_count * sizeof(int16_t));
        int16_t *quantized_co = malloc(coeff_count * sizeof(int16_t));
        int16_t *quantized_cg = malloc(coeff_count * sizeof(int16_t));

-        for (int i = 0; i < coeff_count; i++) {
-            quantized_y[i] = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
-            coeff_ptr += 2;
+        // Postprocess coefficients from significance map format
+        // First find where each channel's data starts by reading the preprocessing output
+        size_t y_map_bytes = (coeff_count + 7) / 8;
+
+        // Count non-zeros in Y significance map to find Y data size
+        int y_nonzeros = 0;
+        for (int i = 0; i < y_map_bytes; i++) {
+            uint8_t byte = coeff_ptr[i];
+            for (int bit = 0; bit < 8 && i*8+bit < coeff_count; bit++) {
+                if (byte & (1 << bit)) y_nonzeros++;
+            }
        }
-        for (int i = 0; i < coeff_count; i++) {
-            quantized_co[i] = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
-            coeff_ptr += 2;
-        }
-        for (int i = 0; i < coeff_count; i++) {
-            quantized_cg[i] = (int16_t)((coeff_ptr[1] << 8) | coeff_ptr[0]);
-            coeff_ptr += 2;
+        size_t y_data_size = y_map_bytes + y_nonzeros * sizeof(int16_t);
+
+        // Count non-zeros in Co significance map
+        uint8_t *co_ptr = coeff_ptr + y_data_size;
+        int co_nonzeros = 0;
+        for (int i = 0; i < y_map_bytes; i++) {
+            uint8_t byte = co_ptr[i];
+            for (int bit = 0; bit < 8 && i*8+bit < coeff_count; bit++) {
+                if (byte & (1 << bit)) co_nonzeros++;
+            }
        }
+        size_t co_data_size = y_map_bytes + co_nonzeros * sizeof(int16_t);
+
+        uint8_t *cg_ptr = co_ptr + co_data_size;
+
+        // Decompress each channel
+        postprocess_coefficients(coeff_ptr, coeff_count, quantized_y);
+        postprocess_coefficients(co_ptr, coeff_count, quantized_co);
+        postprocess_coefficients(cg_ptr, coeff_count, quantized_cg);

        // Apply dequantization (perceptual for version 5, uniform for earlier versions)
        const int is_perceptual = (decoder->header.version == 5);
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -74,6 +74,9 @@ int KEYFRAME_INTERVAL = 2; // refresh often because deltas in DWT are more visib
 #define MP2_DEFAULT_PACKET_SIZE 1152
 #define MAX_SUBTITLE_LENGTH 2048

+const int makeDebugDump = -100; // enter a frame number
+int debugDumpMade = 0;
+
 // Subtitle structure
 typedef struct subtitle_entry {
    int start_frame;
@@ -954,6 +957,38 @@ static void dwt_2d_forward_flexible(float *tile_data, int width, int height, int
    free(temp_col);
 }

+// Preprocess coefficients using significance map for better compression
+static size_t preprocess_coefficients(int16_t *coeffs, int coeff_count, uint8_t *output_buffer) {
+    // Count non-zero coefficients
+    int nonzero_count = 0;
+    for (int i = 0; i < coeff_count; i++) {
+        if (coeffs[i] != 0) nonzero_count++;
+    }
+
+    // Create significance map (1 bit per coefficient, packed into bytes)
+    int map_bytes = (coeff_count + 7) / 8;  // Round up to nearest byte
+    uint8_t *sig_map = output_buffer;
+    int16_t *values = (int16_t *)(output_buffer + map_bytes);
+
+    // Clear significance map
+    memset(sig_map, 0, map_bytes);
+
+    // Fill significance map and extract non-zero values
+    int value_idx = 0;
+    for (int i = 0; i < coeff_count; i++) {
+        if (coeffs[i] != 0) {
+            // Set bit in significance map
+            int byte_idx = i / 8;
+            int bit_idx = i % 8;
+            sig_map[byte_idx] |= (1 << bit_idx);
+
+            // Store the value
+            values[value_idx++] = coeffs[i];
+        }
+    }
+
+    return map_bytes + (nonzero_count * sizeof(int16_t));
+}

 // Quantisation for DWT subbands with rate control
 static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) {
@@ -1276,10 +1311,56 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
        printf("\n");
    }*/

-    // Write quantised coefficients (both uniform and perceptual use same linear layout)
-    memcpy(buffer + offset, quantised_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
-    memcpy(buffer + offset, quantised_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
-    memcpy(buffer + offset, quantised_cg, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
+    // Preprocess and write quantised coefficients using significance mapping for better compression
+    size_t y_compressed_size = preprocess_coefficients(quantised_y, tile_size, buffer + offset);
+    offset += y_compressed_size;
+
+    size_t co_compressed_size = preprocess_coefficients(quantised_co, tile_size, buffer + offset);
+    offset += co_compressed_size;
+
+    size_t cg_compressed_size = preprocess_coefficients(quantised_cg, tile_size, buffer + offset);
+    offset += cg_compressed_size;
+
+    // DEBUG: Dump raw DWT coefficients for frame ~60 when it's an intra-frame
+    if (!debugDumpMade && enc->frame_count >= makeDebugDump - 1 && enc->frame_count <= makeDebugDump + 2 &&
+        (mode == TAV_MODE_INTRA)) {
+
+        char filename[256];
+        size_t data_size = tile_size * sizeof(int16_t);
+
+        // Dump Y channel coefficients
+        snprintf(filename, sizeof(filename), "frame_%03d.tavframe.y.bin", enc->frame_count);
+        FILE *debug_fp = fopen(filename, "wb");
+        if (debug_fp) {
+            fwrite(quantised_y, 1, data_size, debug_fp);
+            fclose(debug_fp);
+            printf("DEBUG: Dumped Y coefficients to %s (%zu bytes)\n", filename, data_size);
+        }
+
+        // Dump Co channel coefficients
+        snprintf(filename, sizeof(filename), "frame_%03d.tavframe.co.bin", enc->frame_count);
+        debug_fp = fopen(filename, "wb");
+        if (debug_fp) {
+            fwrite(quantised_co, 1, data_size, debug_fp);
+            fclose(debug_fp);
+            printf("DEBUG: Dumped Co coefficients to %s (%zu bytes)\n", filename, data_size);
+        }
+
+        // Dump Cg channel coefficients
+        snprintf(filename, sizeof(filename), "frame_%03d.tavframe.cg.bin", enc->frame_count);
+        debug_fp = fopen(filename, "wb");
+        if (debug_fp) {
+            fwrite(quantised_cg, 1, data_size, debug_fp);
+            fclose(debug_fp);
+            printf("DEBUG: Dumped Cg coefficients to %s (%zu bytes)\n", filename, data_size);
+        }
+
+        printf("DEBUG: Frame %d - Dumped all %zu coefficient bytes per channel (total: %zu bytes)\n",
+               enc->frame_count, data_size, data_size * 3);
+
+        debugDumpMade = 1;
+    }
+

    // OPTIMISATION: No need to free - using pre-allocated reusable buffers