complexity calculation now considers chroma

2026-06-07 05:54:06 +09:00 · 2025-09-09 19:02:15 +09:00
parent 9f7a4ef2e7
commit 5a5ac8ef74
1 changed files with 88 additions and 12 deletions
--- a/video_encoder/encoder_tev.c
+++ b/video_encoder/encoder_tev.c
@@ -427,7 +427,79 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
    }
 }

-// Calculate block complexity based on spatial activity
+// Calculate spatial activity for any channel (16x16 or 8x8)
+static float calculate_spatial_activity(const float *block, int block_size) {
+    float activity = 0.0f;
+    
+    // Sum of absolute differences with neighbors (spatial activity)
+    for (int y = 0; y < block_size; y++) {
+        for (int x = 0; x < block_size; x++) {
+            float pixel = block[y * block_size + x];
+            
+            // Compare with right neighbor
+            if (x < block_size - 1) {
+                activity += fabsf(pixel - block[y * block_size + (x + 1)]);
+            }
+            
+            // Compare with bottom neighbor
+            if (y < block_size - 1) {
+                activity += fabsf(pixel - block[(y + 1) * block_size + x]);
+            }
+        }
+    }
+    
+    return activity;
+}
+
+// Calculate variance for any channel
+static float calculate_variance(const float *block, int block_size) {
+    int total_pixels = block_size * block_size;
+    
+    // Calculate mean
+    float mean = 0.0f;
+    for (int i = 0; i < total_pixels; i++) {
+        mean += block[i];
+    }
+    mean /= total_pixels;
+    
+    // Calculate variance
+    float variance = 0.0f;
+    for (int i = 0; i < total_pixels; i++) {
+        float diff = block[i] - mean;
+        variance += diff * diff;
+    }
+    variance /= total_pixels;
+    
+    return variance;
+}
+
+// Enhanced block complexity calculation including chroma information
+static float calculate_block_complexity_enhanced(const float *y_block, const float *co_block, const float *cg_block) {
+    // Luma complexity (16x16)
+    float luma_activity = calculate_spatial_activity(y_block, BLOCK_SIZE);
+    float luma_variance = calculate_variance(y_block, BLOCK_SIZE);
+    float luma_complexity = luma_activity + sqrtf(luma_variance) * 10.0f;
+    
+    // Chroma complexity (8x8 blocks, but weighted appropriately)
+    float co_activity = calculate_spatial_activity(co_block, HALF_BLOCK_SIZE);
+    float co_variance = calculate_variance(co_block, HALF_BLOCK_SIZE);
+    float co_complexity = co_activity + sqrtf(co_variance) * 10.0f;
+    
+    float cg_activity = calculate_spatial_activity(cg_block, HALF_BLOCK_SIZE);
+    float cg_variance = calculate_variance(cg_block, HALF_BLOCK_SIZE);
+    float cg_complexity = cg_activity + sqrtf(cg_variance) * 10.0f;
+    
+    // Combine complexities with appropriate weighting
+    // Luma gets primary weight, chroma gets secondary weight but significant enough to matter
+    // Scale chroma by 4 to account for 8x8 vs 16x16 size difference (64 vs 256 pixels)
+    float total_complexity = luma_complexity + 
+                           (co_complexity * 4.0f * 0.3f) + 
+                           (cg_complexity * 4.0f * 0.3f);
+    
+    return total_complexity;
+}
+
+// Legacy function for compatibility - calls enhanced version
 static float calculate_block_complexity(const float *y_block) {
    float complexity = 0.0f;
    
@@ -475,14 +547,15 @@ static float complexity_to_rate_factor(float complexity) {
        return 0.7f; // Reduce detail for flat blocks (saves bits, minimal perceptual loss)
    }
    
-    // Parameters derived from statistical analysis of 10 video samples:
-    // - Most content has median complexity around 500-3000
-    // - Heavy concentration at low complexity, wide spread at high complexity
+    // Parameters recalibrated for chroma-aware complexity calculation:
+    // - Median complexity now ~1400-3700 (increased due to chroma contribution)
+    // - High complexity threshold ~10000-15000 (91st percentile)
+    // - Maximum values up to ~22800 (vs ~17000 in luma-only version)
    
-    const float median_complexity = 2400.0f;  // Target for rate_factor = 1.0
-    const float high_complexity = 8500.0f;    // ~91st percentile threshold
+    const float median_complexity = 4447.0f;  // Target for rate_factor ≈ 1.0. e^8.4
+    const float high_complexity = 12088.0f;   // ~91st percentile threshold. e^9.4
    
-    // Logarithmic preprocessing to handle wide dynamic range (0 to 17000+)
+    // Logarithmic preprocessing to handle wide dynamic range (0 to 23000+)
    float log_complexity = logf(complexity + 1.0f);
    float log_median = logf(median_complexity + 1.0f);
    float log_high = logf(high_complexity + 1.0f);
@@ -495,7 +568,9 @@ static float complexity_to_rate_factor(float complexity) {
    float rate_factor = 0.7f + 0.9f * sigmoid; // Range: 0.7 to 1.6
    
    // Clamp to prevent extreme coefficient amplification/reduction
-    return FCLAMP(rate_factor, 0.6f, 1.8f);
+    return FCLAMP(rate_factor, 0.7f, 1.6f);
+
+    // See also: https://www.desmos.com/calculator/awwjztvv3o
 }

 // Add complexity value to statistics collection
@@ -862,7 +937,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
            block->mv_x = 0;
            block->mv_y = 0;
            // Even skip blocks benefit from complexity analysis for consistency
-            float block_complexity = calculate_block_complexity(enc->y_workspace);
+            float block_complexity = calculate_block_complexity_enhanced(enc->y_workspace, enc->co_workspace, enc->cg_workspace);
            add_complexity_value(enc, block_complexity);
            block->rate_control_factor = complexity_to_rate_factor(block_complexity);
            block->cbp = 0x00;  // No coefficients present
@@ -877,7 +952,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
            // Good motion prediction - use motion-only mode
            block->mode = TEV_MODE_MOTION;
            // Analyze complexity for motion blocks too
-            float block_complexity = calculate_block_complexity(enc->y_workspace);
+            float block_complexity = calculate_block_complexity_enhanced(enc->y_workspace, enc->co_workspace, enc->cg_workspace);
            add_complexity_value(enc, block_complexity);
            block->rate_control_factor = complexity_to_rate_factor(block_complexity);
            block->cbp = 0x00;  // No coefficients present
@@ -922,7 +997,8 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
    }

    // Calculate block complexity BEFORE DCT transform for adaptive rate control
-    float block_complexity = calculate_block_complexity(enc->y_workspace);
+    // Use enhanced complexity calculation that includes chroma information
+    float block_complexity = calculate_block_complexity_enhanced(enc->y_workspace, enc->co_workspace, enc->cg_workspace);
    add_complexity_value(enc, block_complexity);
    block->rate_control_factor = complexity_to_rate_factor(block_complexity);

@@ -2439,7 +2515,7 @@ int main(int argc, char *argv[]) {
    gettimeofday(&enc->start_time, NULL);

    printf("Encoding video with YCoCg-R 4:2:0 format...\n");
-    if (enc->output_fps > 0) {
+    if (enc->output_fps != enc->fps) {
        printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
    }
    if (enc->bitrate_mode > 0) {