tav: grain synthesis on the spec

2026-06-06 13:38:30 +09:00 · 2025-10-08 23:47:54 +09:00
parent 17b5063ef0
commit 1a072f6a0c
4 changed files with 212 additions and 61 deletions
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -281,6 +281,7 @@ typedef struct tav_encoder_s {
    int perceptual_tuning; // 1 = perceptual quantisation (default), 0 = uniform quantisation
    int channel_layout;   // Channel layout: 0=Y-Co-Cg, 1=Y-only, 2=Y-Co-Cg-A, 3=Y-A, 4=Co-Cg
    int progressive_mode;  // 0 = interlaced (default), 1 = progressive
+    int grain_synthesis;   // 1 = enable grain synthesis (default), 0 = disable

    // Frame buffers - ping-pong implementation
    uint8_t *frame_rgb[2];      // [0] and [1] alternate between current and previous
@@ -616,6 +617,21 @@ static void free_subtitle_list(subtitle_entry_t *list);
 static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text);
 static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output);

+// Film grain synthesis
+static uint32_t rng_hash(uint32_t x) {
+    x ^= x >> 16;
+    x *= 0x7feb352d;
+    x ^= x >> 15;
+    x *= 0x846ca68b;
+    x ^= x >> 16;
+    return x;
+}
+
+static uint32_t grain_synthesis_rng(uint32_t frame, uint32_t band, uint32_t x, uint32_t y) {
+    uint32_t key = frame * 0x9e3779b9u ^ band * 0x7f4a7c15u ^ (y << 16) ^ x;
+    return rng_hash(key);
+}
+
 // Show usage information
 static void show_usage(const char *program_name) {
    int qtsize = sizeof(MP2_RATE_TABLE) / sizeof(int);
@@ -647,6 +663,7 @@ static void show_usage(const char *program_name) {
    printf("  --dump-frame N          Dump quantised coefficients for frame N (creates .bin files)\n");
    printf("  --wavelet N             Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n");
    printf("  --zstd-level N          Zstd compression level 1-22 (default: %d, higher = better compression but slower)\n", DEFAULT_ZSTD_LEVEL);
+    printf("  --no-grain-synthesis    Disable grain synthesis (enabled by default)\n");
    printf("  --help                  Show this help\n\n");

    printf("Audio Rate by Quality:\n  ");
@@ -710,6 +727,7 @@ static tav_encoder_t* create_encoder(void) {
    enc->encode_limit = 0;  // Default: no frame limit
    enc->zstd_level = DEFAULT_ZSTD_LEVEL;  // Default Zstd compression level
    enc->progressive_mode = 1;  // Default to progressive mode
+    enc->grain_synthesis = 0;  // Default: disable grain synthesis (only do it on the decoder)

    return enc;
 }
@@ -1142,6 +1160,67 @@ static void extract_padded_tile(tav_encoder_t *enc, int tile_x, int tile_y,
    }
 }

+// ==============================================================================
+// Grain Synthesis Functions
+// ==============================================================================
+
+// Forward declaration for perceptual weight function
+static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
+
+// Generate triangular noise from uint32 RNG
+// Returns value in range [-1.0, 1.0]
+static float grain_triangular_noise(uint32_t rng_val) {
+    // Get two uniform random values in [0, 1]
+    float u1 = (rng_val & 0xFFFF) / 65535.0f;
+    float u2 = ((rng_val >> 16) & 0xFFFF) / 65535.0f;
+
+    // Convert to range [-1, 1] and average for triangular distribution
+    return (u1 + u2) - 1.0f;
+}
+
+// Apply grain synthesis to DWT coefficients (encoder adds noise)
+static void apply_grain_synthesis_encoder(tav_encoder_t *enc, float *coeffs, int width, int height,
+                                         int decomp_levels, uint32_t frame_num,
+                                         int quantiser, int is_chroma) {
+    // Only apply to Y channel, excluding LL band
+    // Noise amplitude = half of quantization step (scaled by perceptual weight if enabled)
+
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            int idx = y * width + x;
+
+            // Check if this is the LL band (level 0)
+            int level = get_subband_level_2d(x, y, width, height, decomp_levels);
+            int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
+            if (level == 0) {
+                continue; // Skip LL band
+            }
+
+            // Get subband type for perceptual weight calculation
+            /*int subband_type = get_subband_type_2d(x, y, width, height, decomp_levels);
+
+            // Calculate noise amplitude based on perceptual tuning mode
+            float noise_amplitude;
+            if (enc->perceptual_tuning) {
+                // Perceptual mode: scale by perceptual weight
+                float perceptual_weight = get_perceptual_weight(enc, level, subband_type, is_chroma, decomp_levels);
+                noise_amplitude = (quantiser * perceptual_weight) * 0.5f;
+            } else {
+                // Uniform mode: use global quantiser
+                noise_amplitude = quantiser * 0.5f;
+            }*/
+            float noise_amplitude = FCLAMP(quantiser, 0.0f, 32.0f) * 0.25f;
+
+            // Generate deterministic noise
+            uint32_t rng_val = grain_synthesis_rng(frame_num, level + subband_type * 31 + 16777219, x, y);
+            float noise = grain_triangular_noise(rng_val);
+
+            // Add noise to coefficient
+            coeffs[idx] += noise * noise_amplitude;
+        }
+    }
+}
+

 // 2D DWT forward transform for rectangular padded tile (344x288)
 static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type) {
@@ -2002,6 +2081,21 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                printf("\n");
            }*/

+            // Apply grain synthesis to Y channel (after DWT, before quantization)
+            if (enc->grain_synthesis && mode != TAV_MODE_SKIP) {
+                // Get the quantiser value that will be used for this frame
+                int qY_value = enc->bitrate_mode ? quantiser_float_to_int_dithered(enc) : enc->quantiser_y;
+                int actual_qY = QLUT[qY_value];
+
+                // Determine dimensions based on mode
+                int gs_width = enc->monoblock ? enc->width : PADDED_TILE_SIZE_X;
+                int gs_height = enc->monoblock ? enc->height : PADDED_TILE_SIZE_Y;
+
+                // Apply grain synthesis to Y channel only (is_chroma = 0)
+                apply_grain_synthesis_encoder(enc, tile_y_data, gs_width, gs_height,
+                                             enc->decomp_levels, enc->frame_count, actual_qY, 0);
+            }
+
            // Serialise tile
            size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,
                                                   tile_y_data, tile_co_data, tile_cg_data,
@@ -3555,6 +3649,7 @@ int main(int argc, char *argv[]) {
        {"zstd-level", required_argument, 0, 1014},
        {"interlace", no_argument, 0, 1015},
        {"interlaced", no_argument, 0, 1015},
+//        {"no-grain-synthesis", no_argument, 0, 1016},
        {"help", no_argument, 0, '?'},
        {0, 0, 0, 0}
    };
@@ -3704,6 +3799,9 @@ int main(int argc, char *argv[]) {
            case 1015: // --interlaced
                enc->progressive_mode = 0;
                break;
+            case 1016: // --no-grain-synthesis
+                enc->grain_synthesis = 0;
+                break;
            case 'a':
                int bitrate = atoi(optarg);
                int valid_bitrate = validate_mp2_bitrate(bitrate);