bitrate mode wip

This commit is contained in:
minjaesong
2025-09-30 16:06:58 +09:00
parent 7f22ec8cc7
commit 0f784eb741
4 changed files with 287 additions and 22 deletions

View File

@@ -647,6 +647,7 @@ function tryReadNextTAVHeader() {
try {
let t1 = sys.nanoTime()
let totalFilesProcessed = 0
let decoderDbgInfo = {}
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH) {
@@ -719,7 +720,7 @@ try {
// Call new TAV hardware decoder that handles Zstd decompression internally
// Note: No longer using JS gzip.decompFromTo - Kotlin handles Zstd natively
graphics.tavDecodeCompressed(
decoderDbgInfo = graphics.tavDecodeCompressed(
compressedPtr, // Pass compressed data directly
compressedSize, // Size of compressed data
CURRENT_RGB_ADDR, PREV_RGB_ADDR, // RGB buffer pointers
@@ -817,9 +818,9 @@ try {
con.move(31, 1)
con.color_pair(253, 0)
if (currentFileIndex > 1) {
print(`File ${currentFileIndex}: ${frameCount}/${header.totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f) `)
print(`File ${currentFileIndex}: ${frameCount}/${header.totalFrames} (qY=${decoderDbgInfo.qY}, ${((frameCount / akku2 * 100)|0) / 100}f) `)
} else {
print(`Frame: ${frameCount}/${header.totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f) `)
print(`Frame: ${frameCount}/${header.totalFrames} (qY=${decoderDbgInfo.qY}, ${((frameCount / akku2 * 100)|0) / 100}f) `)
}
con.move(32, 1)
con.color_pair(253, 0)

View File

@@ -917,7 +917,8 @@ transmission capability, and region-of-interest coding.
uint8 Video Flags
- bit 0 = unused
- bit 1 = is NTSC framerate
- bit 2 = is lossless mode (shorthand for `-q 5 -Q1,1,1 -w 0`)
- bit 2 = is lossless mode
(shorthand for `-Q1,1,1 -w 0 --intra-only --no-perceptual-tuning --arate 384`)
- bit 3 = has region-of-interest coding (for still images only)
uint8 Encoder quality level (stored with bias of 1 (q0=1); used to derive anisotropy value)
uint8 Channel layout (bit-field: bit 0=has alpha, bit 1=has chroma inverted, bit 2=has luma inverted)

View File

@@ -4303,7 +4303,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// New tavDecode function that accepts compressed data and decompresses internally
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) {
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1): HashMap<String, Any> {
// Read compressed data from VM memory into byte array
val compressedData = ByteArray(compressedSize)
@@ -4311,7 +4311,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
compressedData[i] = vm.peek(compressedDataPtr + i)!!.toByte()
}
try {
return try {
// Decompress using Zstd
val bais = ByteArrayInputStream(compressedData)
val zis = ZstdInputStream(bais)
@@ -4349,7 +4349,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Original tavDecode function for backward compatibility (now handles decompressed data)
fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) {
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1): HashMap<String, Any> {
val dbgOut = HashMap<String, Any>()
tavDebugCurrentFrameNumber = frameCount
@@ -4382,6 +4384,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else TAV_QLUT[it - 1] }
val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else TAV_QLUT[it - 1] }
dbgOut["qY"] = qY
// debug print: raw decompressed bytes
/*print("TAV Decode raw bytes (Frame $frameCount, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ")
for (i in 0 until 32) {
@@ -4413,6 +4417,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} catch (e: Exception) {
println("TAV decode error: ${e.message}")
}
return dbgOut
}
private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,

View File

@@ -139,7 +139,6 @@ static void generate_random_filename(char *filename) {
char TEMP_AUDIO_FILE[42];
// Utility macros
static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
@@ -219,7 +218,7 @@ typedef struct {
} dwt_subband_info_t;
// TAV encoder structure
typedef struct {
typedef struct tav_encoder_s {
// Input/output files
char *input_file;
char *output_file;
@@ -245,6 +244,17 @@ typedef struct {
int decomp_levels;
int bitrate_mode;
int target_bitrate;
// Bitrate control (PID controller)
size_t *video_rate_bin; // Rolling window of compressed sizes
int video_rate_bin_size; // Current number of entries in bin
int video_rate_bin_capacity; // Maximum capacity (fps)
float pid_integral; // PID integral term
float pid_prev_error; // PID previous error for derivative
float pid_filtered_derivative; // Low-pass filtered derivative for smoothing
int adjusted_quantiser_y; // PID-adjusted qY value for current frame
size_t prev_frame_size; // Previous frame compressed size for scene change detection
int scene_change_cooldown; // Frames to wait after scene change before responding
// Flags
// int progressive; // no interlaced mode for TAV
@@ -318,6 +328,185 @@ typedef struct {
// Wavelet filter constants removed - using lifting scheme implementation instead
// Bitrate control functions
static void update_video_rate_bin(tav_encoder_t *enc, size_t compressed_size) {
if (!enc->bitrate_mode) return;
if (enc->video_rate_bin_size < enc->video_rate_bin_capacity) {
enc->video_rate_bin[enc->video_rate_bin_size++] = compressed_size;
} else {
// Shift old entries out
memmove(enc->video_rate_bin, enc->video_rate_bin + 1,
(enc->video_rate_bin_capacity - 1) * sizeof(size_t));
enc->video_rate_bin[enc->video_rate_bin_capacity - 1] = compressed_size;
}
}
static float get_video_rate_kbps(tav_encoder_t *enc) {
if (!enc->bitrate_mode || enc->video_rate_bin_size == 0) return 0.0f;
size_t base_rate = 0;
for (int i = 0; i < enc->video_rate_bin_size; i++) {
base_rate += enc->video_rate_bin[i];
}
float mult = (float)enc->output_fps / enc->video_rate_bin_size;
return (base_rate * mult / 1024.0f) * 8.0f; // Convert to kbps
}
// PID controller parameters - heavily damped to prevent oscillation
#define PID_KP 0.15f // Proportional gain - very conservative
#define PID_KI 0.01f // Integral gain - slow convergence
#define PID_KD 0.6f // Derivative gain - heavy damping to resist changes
#define MAX_QY_CHANGE 1 // Maximum quantiser change per frame - very conservative
#define DERIVATIVE_FILTER 0.75f // Heavy low-pass filter for derivative
#define INTEGRAL_DEADBAND 0.10f // Don't accumulate integral within ±10% of target
static void adjust_quantiser_for_bitrate(tav_encoder_t *enc) {
if (!enc->bitrate_mode) {
// Not in bitrate mode, use base quantiser
enc->adjusted_quantiser_y = enc->quantiser_y;
return;
}
// Need at least a few frames to measure bitrate
if (enc->video_rate_bin_size < (enc->video_rate_bin_capacity / 2)) {
// Not enough data yet, use base quantiser
enc->adjusted_quantiser_y = enc->quantiser_y;
return;
}
float current_bitrate = get_video_rate_kbps(enc);
float target_bitrate = (float)enc->target_bitrate;
// Calculate error (positive = over target, negative = under target)
float error = current_bitrate - target_bitrate;
// Calculate error percentage for adaptive scaling
float error_percent = fabsf(error) / target_bitrate;
// Detect scene changes by looking at sudden bitrate jumps
// Scene changes cause temporary spikes that shouldn't trigger aggressive corrections
float derivative_abs = fabsf(error - enc->pid_prev_error);
float derivative_threshold = target_bitrate * 0.4f; // 40% jump = scene change
if (derivative_abs > derivative_threshold && enc->scene_change_cooldown == 0) {
// Scene change detected - start cooldown
enc->scene_change_cooldown = 5; // Wait 5 frames before responding aggressively
}
// Reduce responsiveness during scene change cooldown
float response_factor = (enc->scene_change_cooldown > 0) ? 0.3f : 1.0f;
if (enc->scene_change_cooldown > 0) {
enc->scene_change_cooldown--;
}
// PID calculations with scene change damping
float proportional = error * response_factor;
// Conditional integration: only accumulate when error is outside deadband
// This prevents windup when close to target
// Also don't accumulate during scene change cooldown to prevent overreaction
if (error_percent > INTEGRAL_DEADBAND && enc->scene_change_cooldown == 0) {
enc->pid_integral += error;
} else {
// Decay integral when within deadband or during scene changes
enc->pid_integral *= 0.95f;
}
float derivative = error - enc->pid_prev_error;
enc->pid_prev_error = error;
// Apply low-pass filter to derivative to reduce noise from scene changes
// This smooths out sudden spikes and prevents oscillation
enc->pid_filtered_derivative = (DERIVATIVE_FILTER * enc->pid_filtered_derivative) +
((1.0f - DERIVATIVE_FILTER) * derivative);
// Clamp integral to prevent windup (tighter bounds)
enc->pid_integral = FCLAMP(enc->pid_integral, -200.0f, 200.0f);
// Calculate adjustment using filtered derivative for smoother response
float pid_output = (PID_KP * proportional) + (PID_KI * enc->pid_integral) +
(PID_KD * enc->pid_filtered_derivative);
// Adaptive scaling based on error magnitude and current quantiser position
// At low quantisers (0-10), QLUT is exponential and small changes cause huge bitrate swings
float scale_factor = 50.0f; // Base: ~50 kbps error = 1 quantiser step
int max_change = MAX_QY_CHANGE;
if (enc->adjusted_quantiser_y < 5) {
// Extreme lossless (qY 0-4) - be very conservative but still responsive
// At qY=0, QLUT[0]=1, which is essentially lossless and bitrate is huge
// Use fixed scale factor to ensure controller can actually respond
scale_factor = 150.0f; // ~150 kbps error = 1 step
max_change = 1;
} else if (enc->adjusted_quantiser_y < 15) {
// Very near lossless (qY 5-14) - very conservative
scale_factor = 300.0f; // ~300 kbps error = 1 step
max_change = 1;
} else if (enc->adjusted_quantiser_y < 30) {
// Near lossless range (qY 15-29) - be conservative
scale_factor = 120.0f; // ~120 kbps error = 1 step
max_change = 2;
} else if (error_percent > 0.5f) {
// Large error - be more aggressive
scale_factor = 30.0f;
}
int adjustment = (int)(pid_output / scale_factor);
// Limit maximum change per frame to prevent wild swings (adaptive limit)
adjustment = CLAMP(adjustment, -max_change, max_change);
// Apply logarithmic scaling to adjustment based on current qY
// At low qY (0-10), QLUT is exponential so we need much smaller steps
// At high qY (40+), bitrate changes are small so we can take larger steps
// This makes it "hard to reach towards 1, easy to reach towards large value"
float log_scale = 1.0f;
int current_qy = enc->adjusted_quantiser_y;
// Only apply log scaling when moving deeper into low qY region
// If we're at low qY and want to move up (increase qY), use faster response
int wants_to_increase = (adjustment > 0);
if (current_qy < 10 && !wants_to_increase) {
// Moving down into very near lossless - be very careful
log_scale = 0.15f + (current_qy / 10.0f) * 0.35f; // 0.15 at qY=0, 0.5 at qY=10
} else if (current_qy < 10 && wants_to_increase) {
// Escaping from very low qY - allow faster movement
log_scale = 0.8f; // Much faster escape from qY < 10
} else if (current_qy < 20) {
// Near lossless - small adjustments
log_scale = 0.5f + ((current_qy - 10) / 10.0f) * 0.3f; // 0.5 at qY=10, 0.8 at qY=20
} else if (current_qy < 40) {
// Moderate quality - normal adjustments
log_scale = 0.8f + ((current_qy - 20) / 20.0f) * 0.2f; // 0.8 at qY=20, 1.0 at qY=40
}
// else: qY >= 40, use full scale (1.0)
adjustment = (int)(adjustment * log_scale);
// Ensure at least ±1 step if PID wants to move strongly (prevent sticking)
if (adjustment == 0 && fabsf(pid_output / scale_factor) > 0.8f) {
adjustment = (pid_output > 0) ? 1 : -1;
}
// Calculate adjusted quantiser (but don't modify base quantiser)
int new_quantiser_y = enc->adjusted_quantiser_y + adjustment;
// Avoid extremely low qY values where QLUT is exponential and causes wild swings
// For 5000 kbps target, qY < 3 is usually too low and causes oscillation
int min_qy = (target_bitrate >= 8000) ? 0 : (target_bitrate >= 4000) ? 3 : 5;
new_quantiser_y = CLAMP(new_quantiser_y, min_qy, 254); // Max index is 254
if (enc->verbose) {
printf("Bitrate control: %.1f kbps (target: %.1f kbps) -> qY %d->%d (adj: %d, err: %.1f%%)\n",
current_bitrate, target_bitrate, enc->adjusted_quantiser_y, new_quantiser_y, adjustment, error_percent * 100);
}
enc->adjusted_quantiser_y = new_quantiser_y;
}
// Swap ping-pong frame buffers (eliminates need for memcpy)
static void swap_frame_buffers(tav_encoder_t *enc) {
// Flip the buffer index
@@ -388,7 +577,7 @@ static void show_usage(const char *program_name) {
printf(" -q, --quality N Quality level 0-5 (default: 2)\n");
printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 1-255 for each channel (1: lossless, 255: potato)\n");
printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible, 2=DD-4 (default: 1)\n");
// printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
printf(" -c, --channel-layout N Channel layout: 0=Y-Co-Cg, 1=Y-Co-Cg-A, 2=Y-only, 3=Y-A, 4=Co-Cg, 5=Co-Cg-A (default: 0)\n");
printf(" --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
@@ -436,7 +625,7 @@ static void show_usage(const char *program_name) {
printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name);
printf(" %s -i input.mkv -q 4 -o output.mv3 # At maximum quality\n", program_name);
printf(" %s -i input.avi --lossless -o output.mv3 # Lossless encoding\n", program_name);
// printf(" %s -i input.mp4 -b 800 -o output.mv3 # 800 kbps bitrate target\n", program_name);
printf(" %s -i input.mp4 -b 6000 -o output.mv3 # 6000 kbps bitrate target\n", program_name);
printf(" %s -i input.webm -S subs.srt -o output.mv3 # With subtitles\n", program_name);
}
@@ -537,6 +726,23 @@ static int initialise_encoder(tav_encoder_t *enc) {
enc->previous_coeffs_alpha = malloc(total_coeff_size);
enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame
// Initialize bitrate control if in bitrate mode
if (enc->bitrate_mode) {
enc->video_rate_bin_capacity = enc->output_fps > 0 ? enc->output_fps : enc->fps;
enc->video_rate_bin = calloc(enc->video_rate_bin_capacity, sizeof(size_t));
enc->video_rate_bin_size = 0;
enc->pid_integral = 0.0f;
enc->pid_prev_error = 0.0f;
enc->adjusted_quantiser_y = enc->quantiser_y; // Start with base quantiser
if (!enc->video_rate_bin) {
return -1;
}
printf("Bitrate control enabled: target = %d kbps, initial quality = %d\n",
enc->target_bitrate, enc->quality_level);
}
if (!enc->frame_rgb[0] || !enc->frame_rgb[1] ||
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg || !enc->current_frame_alpha ||
!enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer ||
@@ -1367,12 +1573,14 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
// Write tile header
buffer[offset++] = mode;
// TODO calculate frame complexity and create quantiser overrides
buffer[offset++] = 0; // qY override
buffer[offset++] = 0; // qCo override
buffer[offset++] = 0; // qCg override
// technically, putting this in here would create three redundant copies of the same value, but it's much easier to code this way :v
int this_frame_qY = QLUT[enc->quantiser_y];
// Use adjusted quantiser from bitrate control, or base quantiser if not in bitrate mode
int qY_override = enc->bitrate_mode ? enc->adjusted_quantiser_y : enc->quantiser_y;
buffer[offset++] = (!enc->bitrate_mode) ? 0 : qY_override + 1; // qY override; must be stored with bias of 1
buffer[offset++] = 0; // qCo override, currently unused
buffer[offset++] = 0; // qCg override, currently unused
int this_frame_qY = QLUT[qY_override];
int this_frame_qCo = QLUT[enc->quantiser_co];
int this_frame_qCg = QLUT[enc->quantiser_cg];
@@ -2967,6 +3175,35 @@ int main(int argc, char *argv[]) {
case 'w':
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 255);
break;
case 'b': {
int bitrate = atoi(optarg);
if (bitrate <= 0) {
fprintf(stderr, "Error: Invalid target bitrate: %d\n", bitrate);
cleanup_encoder(enc);
return 1;
}
enc->bitrate_mode = 1;
enc->target_bitrate = bitrate;
// Choose initial q-index based on target bitrate
if (bitrate >= 64000) {
enc->quality_level = 5;
} else if (bitrate >= 32000) {
enc->quality_level = 4;
} else if (bitrate >= 16000) {
enc->quality_level = 3;
} else if (bitrate >= 8000) {
enc->quality_level = 2;
} else if (bitrate >= 4000) {
enc->quality_level = 1;
} else {
enc->quality_level = 0;
}
enc->quantiser_y = QUALITY_Y[enc->quality_level];
enc->quantiser_co = QUALITY_CO[enc->quality_level];
enc->quantiser_cg = QUALITY_CG[enc->quality_level];
break;
}
case 'c': {
int layout = atoi(optarg);
if (layout < 0 || layout > 5) {
@@ -3067,6 +3304,15 @@ int main(int argc, char *argv[]) {
}
}
if (enc->lossless) {
enc->perceptual_tuning = 0;
enc->quantiser_y = 0; // will be resolved to 1
enc->quantiser_co = 0; // ditto
enc->quantiser_cg = 0; // do.
enc->intra_only = 1;
enc->audio_bitrate = 384;
}
if ((!enc->input_file && !enc->test_mode) || !enc->output_file) {
fprintf(stderr, "Error: Input and output files must be specified\n");
show_usage(argv[0]);
@@ -3301,6 +3547,13 @@ int main(int argc, char *argv[]) {
break;
}
else {
// Update bitrate tracking with compressed video packet size
if (enc->bitrate_mode) {
// packet_size includes packet header (5 bytes), subtract to get just video data
size_t video_data_size = packet_size > 5 ? packet_size - 5 : 0;
update_video_rate_bin(enc, video_data_size);
adjust_quantiser_for_bitrate(enc);
}
// Process audio for this frame
process_audio(enc, true_frame_count, enc->output_fp);
@@ -3342,8 +3595,9 @@ int main(int argc, char *argv[]) {
double elapsed = (now.tv_sec - enc->start_time.tv_sec) +
(now.tv_usec - enc->start_time.tv_usec) / 1000000.0;
double fps = frame_count / elapsed;
printf("Encoded frame %d (%s, %.1f fps)\n", frame_count,
is_keyframe ? "I-frame" : "P-frame", fps);
printf("Encoded frame %d (%s, %.1f fps, qY=%d)\n", frame_count,
is_keyframe ? "I-frame" : "P-frame", fps, QLUT[enc->bitrate_mode ? enc->adjusted_quantiser_y : enc->quantiser_y]);
}
}
@@ -3419,15 +3673,18 @@ static void cleanup_encoder(tav_encoder_t *enc) {
free(enc->previous_coeffs_co);
free(enc->previous_coeffs_cg);
free(enc->previous_coeffs_alpha);
// Free bitrate control structures
free(enc->video_rate_bin);
// Free subtitle list
if (enc->subtitles) {
free_subtitle_list(enc->subtitles);
}
if (enc->zstd_ctx) {
ZSTD_freeCCtx(enc->zstd_ctx);
}
free(enc);
}