adaptive quality control

2026-06-06 05:28:31 +09:00 · 2025-08-28 00:10:17 +09:00
parent 6f7e407a1c
commit d43a2f66d0
2 changed files with 84 additions and 113 deletions
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -1512,42 +1512,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        
        return result
    }
-    
-    private fun tevIdct16x16(coeffs: IntArray, quantTable: IntArray): IntArray {
-        val dctCoeffs = Array(16) { FloatArray(16) }
-        val result = IntArray(256)  // 16x16 = 256
-        
-        // Convert integer coefficients to 2D array and dequantize
-        for (u in 0 until 16) {
-            for (v in 0 until 16) {
-                val idx = u * 16 + v
-                val coeff = coeffs[idx]
-                if (idx == 0) {
-                    // DC coefficient for luma: lossless quantization (no scaling)
-                    dctCoeffs[u][v] = coeff.toFloat()
-                } else {
-                    // AC coefficients: use quantization table
-                    dctCoeffs[u][v] = (coeff * quantTable[idx]).toFloat()
-                }
-            }
-        }
-        
-        // Apply 2D inverse DCT
-        for (x in 0 until 16) {
-            for (y in 0 until 16) {
-                var sum = 0f
-                for (u in 0 until 16) {
-                    for (v in 0 until 16) {
-                        sum += dctBasis16[u][x] * dctBasis16[v][y] * dctCoeffs[u][v]
-                    }
-                }
-                val pixel = (sum + 128).coerceIn(0f, 255f)
-                result[y * 16 + x] = pixel.toInt()
-            }
-        }
-        
-        return result
-    }

    // YCoCg-R to RGB conversion with 4:2:0 chroma upsampling
    fun tevYcocgToRGB(yBlock: IntArray, coBlock: IntArray, cgBlock: IntArray): IntArray {
--- a/video_encoder/encoder_tev.c
+++ b/video_encoder/encoder_tev.c
@@ -48,8 +48,8 @@ static const int MP2_RATE_TABLE[] = {64, 96, 128, 192, 256};
 // from dataset of three videos with Q0..Q95: (real life video, low res pixel art, high res pixel art)
 //  5  25  50  75  90  Claude Opus 4.1 (with data analysis)
 // 10  25  45  65  85  ChatGPT-5 (without data analysis)
-static const int QUALITY_Y[] =  {8, 24, 48, 70, 88};
-static const int QUALITY_CO[] = {8, 24, 48, 70, 88};
+static const int QUALITY_Y[] =  {5, 18, 45, 65, 85};
+static const int QUALITY_CO[] = {5, 18, 45, 65, 85};

 static float jpeg_quality_to_mult(int q) {
    return ((q < 50) ? 5000.f / q : 200.f - 2*q) / 100.f;
@@ -356,7 +356,7 @@ static void dct_8x8(float *input, float *output) {
 }

 // quantise DCT coefficient using quality table with rate control
-static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma, float rate_factor) {
+static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma) {
    if (is_dc) {
        if (is_chroma) {
            // Chroma DC: range -256 to +255, use lossless quantisation for testing
@@ -366,10 +366,9 @@ static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma
            return (int16_t)roundf(coeff);
        }
    } else {
-        // AC coefficients use quality table with rate control adjustment
-        float adjusted_quant = quant * rate_factor;
-        adjusted_quant = fmaxf(adjusted_quant, 1.0f); // Prevent division by zero
-        return (int16_t)roundf(coeff / adjusted_quant);
+        // AC coefficients use quality table (rate control factor applied to quant table before calling)
+        float safe_quant = fmaxf(quant, 1.0f); // Prevent division by zero
+        return (int16_t)roundf(coeff / safe_quant);
    }
 }

@@ -436,6 +435,53 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
    }
 }

+// Calculate block complexity based on spatial activity
+static float calculate_block_complexity(const float *y_block) {
+    float complexity = 0.0f;
+    
+    // Method 1: Sum of absolute differences with neighbors (spatial activity)
+    for (int y = 0; y < 16; y++) {
+        for (int x = 0; x < 16; x++) {
+            float pixel = y_block[y * 16 + x];
+            
+            // Compare with right neighbor
+            if (x < 15) {
+                complexity += fabsf(pixel - y_block[y * 16 + (x + 1)]);
+            }
+            
+            // Compare with bottom neighbor
+            if (y < 15) {
+                complexity += fabsf(pixel - y_block[(y + 1) * 16 + x]);
+            }
+        }
+    }
+    
+    // Method 2: Add variance contribution
+    float mean = 0.0f;
+    for (int i = 0; i < 256; i++) {
+        mean += y_block[i];
+    }
+    mean /= 256.0f;
+    
+    float variance = 0.0f;
+    for (int i = 0; i < 256; i++) {
+        float diff = y_block[i] - mean;
+        variance += diff * diff;
+    }
+    variance /= 256.0f;
+    
+    // Combine spatial activity and variance
+    return complexity + sqrtf(variance) * 10.0f;
+}
+
+// Map complexity to rate control factor (pure per-block, no global factor)
+static float complexity_to_rate_factor(float complexity) {
+    const float P = 18.f;
+    const float e = -0.5f;
+    float factor = P * powf(FCLAMP(complexity, 1.f, 16777216.f), e);
+    return FCLAMP(factor, 0.5f, P); // the "auto quality" thing can be excessively permissive
+}
+
 // Simple motion estimation (full search) for 16x16 blocks
 static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y,
                           int16_t *best_mv_x, int16_t *best_mv_y) {
@@ -616,55 +662,7 @@ static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y
 }

 // Calculate block complexity for rate control
-static float calculate_block_complexity(float *workspace, int size) {
-    float complexity = 0.0f;
-    for (int i = 1; i < size; i++) { // Skip DC component
-        complexity += fabsf(workspace[i]);
-    }
-    return complexity;
-}

-const float EPSILON = 1.0f / 16777216.0f;
-const float RATE_CONTROL_CLAMP_MAX = 64.0f;
-const float RATE_CONTROL_CLAMP_MIN = 1.0f / RATE_CONTROL_CLAMP_MAX;
-
-// Update rate control factor based on target bitrate
-static void update_rate_control(tev_encoder_t *enc, float frame_complexity, size_t frame_bits) {
-    if (enc->bitrate_mode == 0) {
-        // Quality mode - no rate control
-        enc->rate_control_factor = 1.0f;
-        return;
-    }
-
-    // Update complexity history
-    enc->complexity_history[enc->complexity_history_index] = frame_complexity;
-    enc->complexity_history_index = (enc->complexity_history_index + 1) % 60;
-
-    // Calculate rolling average complexity
-    float sum = 0.0f;
-    int count = 0;
-    for (int i = 0; i < 60; i++) {
-        if (enc->complexity_history[i] > 0.0f) {
-            sum += enc->complexity_history[i];
-            count++;
-        }
-    }
-    enc->average_complexity = (count > 0) ? sum / count : frame_complexity;
-
-    // Calculate rate adjustment
-    if (enc->target_bits_per_frame > 0 && frame_bits > 0) {
-        float bitrate_ratio = (float)enc->target_bits_per_frame / frame_bits;
-        float complexity_ratio = frame_complexity / fmaxf(enc->average_complexity, 1.0f);
-
-        // Adaptive adjustment with damping
-        float adjustment = 1.0f / (bitrate_ratio * complexity_ratio);
-        enc->rate_control_factor = adjustment;
-        enc->rate_control_factor = 0.8f * enc->rate_control_factor + 0.2f * adjustment;
-
-        // Clamp to reasonable range
-        enc->rate_control_factor = FCLAMP(enc->rate_control_factor, RATE_CONTROL_CLAMP_MIN, RATE_CONTROL_CLAMP_MAX);
-    }
-}

 // Encode a 16x16 block
 static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
@@ -763,7 +761,9 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
            block->mode = TEV_MODE_SKIP;
            block->mv_x = 0;
            block->mv_y = 0;
-            block->rate_control_factor = enc->rate_control_factor;
+            // Even skip blocks benefit from complexity analysis for consistency
+            float block_complexity = calculate_block_complexity(enc->y_workspace);
+            block->rate_control_factor = complexity_to_rate_factor(block_complexity);
            block->cbp = 0x00;  // No coefficients present
            // Zero out DCT coefficients for consistent format
            memset(block->y_coeffs, 0, sizeof(block->y_coeffs));
@@ -775,7 +775,9 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
                   (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
            // Good motion prediction - use motion-only mode
            block->mode = TEV_MODE_MOTION;
-            block->rate_control_factor = enc->rate_control_factor;
+            // Analyze complexity for motion blocks too
+            float block_complexity = calculate_block_complexity(enc->y_workspace);
+            block->rate_control_factor = complexity_to_rate_factor(block_complexity);
            block->cbp = 0x00;  // No coefficients present
            // Zero out DCT coefficients for consistent format
            memset(block->y_coeffs, 0, sizeof(block->y_coeffs));
@@ -814,41 +816,50 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
        } else {
            // No good motion prediction - use intra mode
            block->mode = TEV_MODE_INTRA;
-            block->rate_control_factor = enc->rate_control_factor;
            block->mv_x = 0;
            block->mv_y = 0;
            enc->blocks_intra++;
        }
    }

+    // Calculate block complexity BEFORE DCT transform for adaptive rate control
+    float block_complexity = calculate_block_complexity(enc->y_workspace);
+    block->rate_control_factor = complexity_to_rate_factor(block_complexity);
+
    // Apply fast DCT transform
    dct_16x16_fast(enc->y_workspace, enc->dct_workspace);

-    // quantise Y coefficients (luma)
+    // quantise Y coefficients (luma) using per-block rate control
    const uint32_t *y_quant = QUANT_TABLE_Y;
    const float qmult_y = jpeg_quality_to_mult(enc->qualityY);
    for (int i = 0; i < 256; i++) {
-        block->y_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(y_quant[i] * qmult_y, 1.f, 255.f), i == 0, 0, enc->rate_control_factor);
+        // Apply rate control factor to quantization table (like decoder does)
+        float effective_quant = y_quant[i] * qmult_y * block->rate_control_factor;
+        block->y_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 0);
    }

    // Apply fast DCT transform to chroma
    dct_8x8_fast(enc->co_workspace, enc->dct_workspace);

-    // quantise Co coefficients (chroma - orange-blue)
+    // quantise Co coefficients (chroma - orange-blue) using per-block rate control
    const uint32_t *co_quant = QUANT_TABLE_C;
    const float qmult_co = jpeg_quality_to_mult(enc->qualityCo);
    for (int i = 0; i < 64; i++) {
-        block->co_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(co_quant[i] * qmult_co, 1.f, 255.f), i == 0, 1, enc->rate_control_factor);
+        // Apply rate control factor to quantization table (like decoder does)
+        float effective_quant = co_quant[i] * qmult_co * block->rate_control_factor;
+        block->co_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
    }

    // Apply fast DCT transform to Cg
    dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);

-    // quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q)
+    // quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q) using per-block rate control
    const uint32_t *cg_quant = QUANT_TABLE_C;
    const float qmult_cg = jpeg_quality_to_mult(enc->qualityCg);
    for (int i = 0; i < 64; i++) {
-        block->cg_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(cg_quant[i] * qmult_cg, 1.f, 255.f), i == 0, 1, enc->rate_control_factor);
+        // Apply rate control factor to quantization table (like decoder does)
+        float effective_quant = cg_quant[i] * qmult_cg * block->rate_control_factor;
+        block->cg_coeffs[i] = quantise_coeff(enc->dct_workspace[i], FCLAMP(effective_quant, 1.f, 255.f), i == 0, 1);
    }

    // Set CBP (simplified - always encode all channels)
@@ -1099,7 +1110,7 @@ static tev_encoder_t* init_encoder(void) {
    // Rate control defaults
    enc->target_bitrate_kbps = 0;    // 0 = quality mode
    enc->bitrate_mode = 0;           // Quality mode by default
-    enc->rate_control_factor = 1.0f; // No adjustment initially
+    // No global rate control factor needed - per-block complexity-based control only
    enc->frame_bits_accumulator = 0;
    enc->target_bits_per_frame = 0;
    enc->complexity_history_index = 0;
@@ -1331,16 +1342,12 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) {
    fwrite(enc->compressed_buffer, 1, compressed_size, output);

    if (enc->verbose) {
-        printf("rateControlFactor=%.6f\n", enc->rate_control_factor);
+        printf("perBlockComplexityBasedRateControl=enabled\n");
    }

    enc->total_output_bytes += 5 + compressed_size; // packet + size + data (rate_factor now per-block)

-    // Update rate control for next frame
-    if (enc->bitrate_mode > 0) {
-        size_t frame_bits = (enc->total_output_bytes * 8) - frame_start_bits;
-        update_rate_control(enc, frame_complexity, frame_bits);
-    }
+    // No global rate control needed - per-block complexity-based control only

    // Swap frame buffers for next frame
    uint8_t *temp_rgb = enc->previous_rgb;
@@ -1648,13 +1655,13 @@ static void show_usage(const char *program_name) {
    printf("  -f, --fps N          Output frames per second (enables frame rate conversion)\n");
    printf("  -q, --quality N      Quality level 0-4 (default: 2, only decides audio rate in bitrate mode and quantiser mode)\n");
    printf("  -Q, --quantiser N    Quantiser level 0-100 (100: lossless, 0: potato)\n");
-    printf("  -b, --bitrate N      Target bitrate in kbps (enables bitrate control mode; DON'T USE - NOT WORKING AS INTENDED)\n");
+//    printf("  -b, --bitrate N      Target bitrate in kbps (enables bitrate control mode; DON'T USE - NOT WORKING AS INTENDED)\n");
    printf("  -v, --verbose        Verbose output\n");
    printf("  -t, --test           Test mode: generate solid colour frames\n");
    printf("  --help               Show this help\n\n");
-    printf("Rate Control Modes:\n");
-    printf("  Quality mode (default): Fixed quantisation based on -q parameter\n");
-    printf("  Bitrate mode (-b N):    Dynamic quantisation targeting N kbps average\n\n");
+//    printf("Rate Control Modes:\n");
+//    printf("  Quality mode (default): Fixed quantisation based on -q parameter\n");
+//    printf("  Bitrate mode (-b N):    Dynamic quantisation targeting N kbps average\n\n");
    printf("Audio Rate by Quality:\n");
    printf("  ");
    for (int i = 0; i < sizeof(MP2_RATE_TABLE) / sizeof(int); i++) {
@@ -1670,7 +1677,7 @@ static void show_usage(const char *program_name) {
    printf("  - YCoCg-R 4:2:0 chroma subsampling for 50%% compression improvement\n");
    printf("  - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n");
    printf("  - Frame rate conversion with FFmpeg temporal filtering\n");
-//    printf("  - Adaptive bitrate control with complexity-based adjustment\n");
+    printf("  - Adaptive quality control with complexity-based adjustment\n");
    printf("Examples:\n");
    printf("  %s -i input.mp4 -o output.mv2                 # Use default setting (q=2)\n", program_name);
    printf("  %s -i input.avi -f 15 -q 3 -o output.mv2      # 15fps @ q=3\n", program_name);
@@ -2016,7 +2023,7 @@ int main(int argc, char *argv[]) {
           enc->blocks_intra, enc->blocks_inter, enc->blocks_motion, enc->blocks_skip);
    
    if (enc->bitrate_mode > 0) {
-        printf("  Rate control factor: %.3f\n", enc->rate_control_factor);
+        printf("  Per-block complexity-based rate control: enabled\n");
    }
    
    cleanup_encoder(enc);