mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-06 05:28:31 +09:00
complexity calculation now considers chroma
This commit is contained in:
@@ -427,7 +427,79 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate block complexity based on spatial activity
|
// Calculate spatial activity for any channel (16x16 or 8x8)
|
||||||
|
static float calculate_spatial_activity(const float *block, int block_size) {
|
||||||
|
float activity = 0.0f;
|
||||||
|
|
||||||
|
// Sum of absolute differences with neighbors (spatial activity)
|
||||||
|
for (int y = 0; y < block_size; y++) {
|
||||||
|
for (int x = 0; x < block_size; x++) {
|
||||||
|
float pixel = block[y * block_size + x];
|
||||||
|
|
||||||
|
// Compare with right neighbor
|
||||||
|
if (x < block_size - 1) {
|
||||||
|
activity += fabsf(pixel - block[y * block_size + (x + 1)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare with bottom neighbor
|
||||||
|
if (y < block_size - 1) {
|
||||||
|
activity += fabsf(pixel - block[(y + 1) * block_size + x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return activity;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate variance for any channel
|
||||||
|
static float calculate_variance(const float *block, int block_size) {
|
||||||
|
int total_pixels = block_size * block_size;
|
||||||
|
|
||||||
|
// Calculate mean
|
||||||
|
float mean = 0.0f;
|
||||||
|
for (int i = 0; i < total_pixels; i++) {
|
||||||
|
mean += block[i];
|
||||||
|
}
|
||||||
|
mean /= total_pixels;
|
||||||
|
|
||||||
|
// Calculate variance
|
||||||
|
float variance = 0.0f;
|
||||||
|
for (int i = 0; i < total_pixels; i++) {
|
||||||
|
float diff = block[i] - mean;
|
||||||
|
variance += diff * diff;
|
||||||
|
}
|
||||||
|
variance /= total_pixels;
|
||||||
|
|
||||||
|
return variance;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enhanced block complexity calculation including chroma information
|
||||||
|
static float calculate_block_complexity_enhanced(const float *y_block, const float *co_block, const float *cg_block) {
|
||||||
|
// Luma complexity (16x16)
|
||||||
|
float luma_activity = calculate_spatial_activity(y_block, BLOCK_SIZE);
|
||||||
|
float luma_variance = calculate_variance(y_block, BLOCK_SIZE);
|
||||||
|
float luma_complexity = luma_activity + sqrtf(luma_variance) * 10.0f;
|
||||||
|
|
||||||
|
// Chroma complexity (8x8 blocks, but weighted appropriately)
|
||||||
|
float co_activity = calculate_spatial_activity(co_block, HALF_BLOCK_SIZE);
|
||||||
|
float co_variance = calculate_variance(co_block, HALF_BLOCK_SIZE);
|
||||||
|
float co_complexity = co_activity + sqrtf(co_variance) * 10.0f;
|
||||||
|
|
||||||
|
float cg_activity = calculate_spatial_activity(cg_block, HALF_BLOCK_SIZE);
|
||||||
|
float cg_variance = calculate_variance(cg_block, HALF_BLOCK_SIZE);
|
||||||
|
float cg_complexity = cg_activity + sqrtf(cg_variance) * 10.0f;
|
||||||
|
|
||||||
|
// Combine complexities with appropriate weighting
|
||||||
|
// Luma gets primary weight, chroma gets secondary weight but significant enough to matter
|
||||||
|
// Scale chroma by 4 to account for 8x8 vs 16x16 size difference (64 vs 256 pixels)
|
||||||
|
float total_complexity = luma_complexity +
|
||||||
|
(co_complexity * 4.0f * 0.3f) +
|
||||||
|
(cg_complexity * 4.0f * 0.3f);
|
||||||
|
|
||||||
|
return total_complexity;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Legacy function for compatibility - calls enhanced version
|
||||||
static float calculate_block_complexity(const float *y_block) {
|
static float calculate_block_complexity(const float *y_block) {
|
||||||
float complexity = 0.0f;
|
float complexity = 0.0f;
|
||||||
|
|
||||||
@@ -475,14 +547,15 @@ static float complexity_to_rate_factor(float complexity) {
|
|||||||
return 0.7f; // Reduce detail for flat blocks (saves bits, minimal perceptual loss)
|
return 0.7f; // Reduce detail for flat blocks (saves bits, minimal perceptual loss)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parameters derived from statistical analysis of 10 video samples:
|
// Parameters recalibrated for chroma-aware complexity calculation:
|
||||||
// - Most content has median complexity around 500-3000
|
// - Median complexity now ~1400-3700 (increased due to chroma contribution)
|
||||||
// - Heavy concentration at low complexity, wide spread at high complexity
|
// - High complexity threshold ~10000-15000 (91st percentile)
|
||||||
|
// - Maximum values up to ~22800 (vs ~17000 in luma-only version)
|
||||||
|
|
||||||
const float median_complexity = 2400.0f; // Target for rate_factor = 1.0
|
const float median_complexity = 4447.0f; // Target for rate_factor ≈ 1.0. e^8.4
|
||||||
const float high_complexity = 8500.0f; // ~91st percentile threshold
|
const float high_complexity = 12088.0f; // ~91st percentile threshold. e^9.4
|
||||||
|
|
||||||
// Logarithmic preprocessing to handle wide dynamic range (0 to 17000+)
|
// Logarithmic preprocessing to handle wide dynamic range (0 to 23000+)
|
||||||
float log_complexity = logf(complexity + 1.0f);
|
float log_complexity = logf(complexity + 1.0f);
|
||||||
float log_median = logf(median_complexity + 1.0f);
|
float log_median = logf(median_complexity + 1.0f);
|
||||||
float log_high = logf(high_complexity + 1.0f);
|
float log_high = logf(high_complexity + 1.0f);
|
||||||
@@ -495,7 +568,9 @@ static float complexity_to_rate_factor(float complexity) {
|
|||||||
float rate_factor = 0.7f + 0.9f * sigmoid; // Range: 0.7 to 1.6
|
float rate_factor = 0.7f + 0.9f * sigmoid; // Range: 0.7 to 1.6
|
||||||
|
|
||||||
// Clamp to prevent extreme coefficient amplification/reduction
|
// Clamp to prevent extreme coefficient amplification/reduction
|
||||||
return FCLAMP(rate_factor, 0.6f, 1.8f);
|
return FCLAMP(rate_factor, 0.7f, 1.6f);
|
||||||
|
|
||||||
|
// See also: https://www.desmos.com/calculator/awwjztvv3o
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add complexity value to statistics collection
|
// Add complexity value to statistics collection
|
||||||
@@ -862,7 +937,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
block->mv_x = 0;
|
block->mv_x = 0;
|
||||||
block->mv_y = 0;
|
block->mv_y = 0;
|
||||||
// Even skip blocks benefit from complexity analysis for consistency
|
// Even skip blocks benefit from complexity analysis for consistency
|
||||||
float block_complexity = calculate_block_complexity(enc->y_workspace);
|
float block_complexity = calculate_block_complexity_enhanced(enc->y_workspace, enc->co_workspace, enc->cg_workspace);
|
||||||
add_complexity_value(enc, block_complexity);
|
add_complexity_value(enc, block_complexity);
|
||||||
block->rate_control_factor = complexity_to_rate_factor(block_complexity);
|
block->rate_control_factor = complexity_to_rate_factor(block_complexity);
|
||||||
block->cbp = 0x00; // No coefficients present
|
block->cbp = 0x00; // No coefficients present
|
||||||
@@ -877,7 +952,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
// Good motion prediction - use motion-only mode
|
// Good motion prediction - use motion-only mode
|
||||||
block->mode = TEV_MODE_MOTION;
|
block->mode = TEV_MODE_MOTION;
|
||||||
// Analyze complexity for motion blocks too
|
// Analyze complexity for motion blocks too
|
||||||
float block_complexity = calculate_block_complexity(enc->y_workspace);
|
float block_complexity = calculate_block_complexity_enhanced(enc->y_workspace, enc->co_workspace, enc->cg_workspace);
|
||||||
add_complexity_value(enc, block_complexity);
|
add_complexity_value(enc, block_complexity);
|
||||||
block->rate_control_factor = complexity_to_rate_factor(block_complexity);
|
block->rate_control_factor = complexity_to_rate_factor(block_complexity);
|
||||||
block->cbp = 0x00; // No coefficients present
|
block->cbp = 0x00; // No coefficients present
|
||||||
@@ -922,7 +997,8 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate block complexity BEFORE DCT transform for adaptive rate control
|
// Calculate block complexity BEFORE DCT transform for adaptive rate control
|
||||||
float block_complexity = calculate_block_complexity(enc->y_workspace);
|
// Use enhanced complexity calculation that includes chroma information
|
||||||
|
float block_complexity = calculate_block_complexity_enhanced(enc->y_workspace, enc->co_workspace, enc->cg_workspace);
|
||||||
add_complexity_value(enc, block_complexity);
|
add_complexity_value(enc, block_complexity);
|
||||||
block->rate_control_factor = complexity_to_rate_factor(block_complexity);
|
block->rate_control_factor = complexity_to_rate_factor(block_complexity);
|
||||||
|
|
||||||
@@ -2439,7 +2515,7 @@ int main(int argc, char *argv[]) {
|
|||||||
gettimeofday(&enc->start_time, NULL);
|
gettimeofday(&enc->start_time, NULL);
|
||||||
|
|
||||||
printf("Encoding video with YCoCg-R 4:2:0 format...\n");
|
printf("Encoding video with YCoCg-R 4:2:0 format...\n");
|
||||||
if (enc->output_fps > 0) {
|
if (enc->output_fps != enc->fps) {
|
||||||
printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
|
printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
|
||||||
}
|
}
|
||||||
if (enc->bitrate_mode > 0) {
|
if (enc->bitrate_mode > 0) {
|
||||||
|
|||||||
Reference in New Issue
Block a user