rate control not quite working but committing anyway due to format change

This commit is contained in:
minjaesong
2025-08-24 13:35:08 +09:00
parent b1857e355a
commit cb07ab1f3b
4 changed files with 409 additions and 219 deletions

View File

@@ -201,8 +201,20 @@ try {
sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3) sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3)
} else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) { } else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
// Video frame packet // Video frame packet (always includes rate control factor)
let payloadLen = seqread.readInt() let payloadLen = seqread.readInt()
// Always read rate control factor (4 bytes, little-endian float)
let rateFactorBytes = seqread.readBytes(4)
let view = new DataView(new ArrayBuffer(4))
for (let i = 0; i < 4; i++) {
view.setUint8(i, sys.peek(rateFactorBytes + i))
}
let rateControlFactor = view.getFloat32(0, true) // true = little-endian
//serial.println(`rateControlFactor = ${rateControlFactor}`)
sys.free(rateFactorBytes)
payloadLen -= 4 // Subtract rate factor size from payload
let compressedPtr = seqread.readBytes(payloadLen) let compressedPtr = seqread.readBytes(payloadLen)
updateDataRateBin(payloadLen) updateDataRateBin(payloadLen)
@@ -232,9 +244,9 @@ try {
continue continue
} }
// Hardware-accelerated TEV YCoCg-R decoding to RGB buffers // Hardware-accelerated TEV YCoCg-R decoding to RGB buffers (with rate control factor)
try { try {
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors) graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors, rateControlFactor)
// Upload RGB buffer to display framebuffer with dithering // Upload RGB buffer to display framebuffer with dithering
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR, graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR,

View File

@@ -678,6 +678,12 @@ Created by Claude on 2025-08-17
TEV is a modern video codec optimized for TSVM's 4096-color hardware, featuring TEV is a modern video codec optimized for TSVM's 4096-color hardware, featuring
DCT-based compression, motion compensation, and efficient temporal coding. DCT-based compression, motion compensation, and efficient temporal coding.
## Version History
- Version 2.0: YCoCg-R 4:2:0 with 16x16/8x8 DCT blocks
- Version 2.1: Added Rate Control Factor to all video packets (breaking change)
* Enables bitrate-constrained encoding alongside quality modes
* All video frames now include 4-byte rate control factor after payload size
# File Structure # File Structure
\x1F T S V M T E V \x1F T S V M T E V
[HEADER] [HEADER]
@@ -694,7 +700,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
uint16 Height: video height in pixels uint16 Height: video height in pixels
uint16 FPS: frames per second uint16 FPS: frames per second
uint32 Total Frames: number of video frames uint32 Total Frames: number of video frames
uint8 Quality: quantization quality (0-7, higher = better) uint8 Quality: quantization quality (0-4, higher = better)
byte[5] Reserved byte[5] Reserved
## Packet Types ## Packet Types
@@ -705,7 +711,8 @@ DCT-based compression, motion compensation, and efficient temporal coding.
## Video Packet Structure ## Video Packet Structure
uint8 Packet Type uint8 Packet Type
uint32 Compressed Size uint32 Compressed Size (includes rate control factor size)
float Rate Control Factor (4 bytes, little-endian)
* Gzip-compressed Block Data * Gzip-compressed Block Data
## Block Data (per 16x16 block) ## Block Data (per 16x16 block)
@@ -722,11 +729,19 @@ DCT-based compression, motion compensation, and efficient temporal coding.
int16[64] DCT Coefficients Cg (subsampled by two, aggressively quantised) int16[64] DCT Coefficients Cg (subsampled by two, aggressively quantised)
For SKIP and MOTION mode, DCT coefficients are filled with zero For SKIP and MOTION mode, DCT coefficients are filled with zero
## DCT Quantization ## DCT Quantization and Rate Control
TEV uses 8 quality levels (0=lowest, 7=highest) with progressive quantization TEV uses 8 quality levels (0=lowest, 7=highest) with progressive quantization
tables optimized for perceptual quality. DC coefficients use fixed quantizer tables optimized for perceptual quality. DC coefficients use fixed quantizer
of 8, while AC coefficients are quantized according to quality tables. of 8, while AC coefficients are quantized according to quality tables.
### Rate Control Factor
Each video frame includes a Rate Control Factor that modifies quantization:
- Quality mode: Factor = 1.0 (fixed quantization based on quality level)
- Bitrate mode: Factor varies per frame based on content complexity and target bitrate
- Encoder: quantized_coeff = dct_coeff / (base_quant * rate_factor)
- Decoder: dequantized_coeff = quantized_coeff * (base_quant / rate_factor)
- Optimization: When factor ≈ 1.0 (0.999-1.001), decoder uses original tables
## Motion Compensation ## Motion Compensation
- Search range: ±8 pixels - Search range: ±8 pixels
- Sub-pixel precision: 1/4 pixel (again, integer precision for now) - Sub-pixel precision: 1/4 pixel (again, integer precision for now)

View File

@@ -1567,7 +1567,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
* @param frameCounter Frame counter for temporal patterns * @param frameCounter Frame counter for temporal patterns
*/ */
fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, quality: Int, debugMotionVectors: Boolean = false) { width: Int, height: Int, quality: Int, debugMotionVectors: Boolean = false,
rateControlFactor: Float = 1.0f) {
val blocksX = (width + 15) / 16 // 16x16 blocks now val blocksX = (width + 15) / 16 // 16x16 blocks now
val blocksY = (height + 15) / 16 val blocksY = (height + 15) / 16
@@ -1576,9 +1577,22 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val quantCOmult = QUANT_MULT_CO[quality] val quantCOmult = QUANT_MULT_CO[quality]
val quantCGmult = QUANT_MULT_CG[quality] val quantCGmult = QUANT_MULT_CG[quality]
val quantTableY = QUANT_TABLE_Y.map { it * quantYmult }.toIntArray() // Apply rate control factor to quantization tables (if not ~1.0, skip optimization)
val quantTableCo = QUANT_TABLE_C.map { it * quantCOmult }.toIntArray() val quantTableY = if (rateControlFactor in 0.999f..1.001f) {
val quantTableCg = QUANT_TABLE_C.map { it * quantCGmult }.toIntArray() QUANT_TABLE_Y.map { it * quantYmult }.toIntArray()
} else {
QUANT_TABLE_Y.map { (it * quantYmult * rateControlFactor).toInt() }.toIntArray()
}
val quantTableCo = if (rateControlFactor in 0.999f..1.001f) {
QUANT_TABLE_C.map { it * quantCOmult }.toIntArray()
} else {
QUANT_TABLE_C.map { (it * quantCOmult * rateControlFactor).toInt() }.toIntArray()
}
val quantTableCg = if (rateControlFactor in 0.999f..1.001f) {
QUANT_TABLE_C.map { it * quantCGmult }.toIntArray()
} else {
QUANT_TABLE_C.map { (it * quantCGmult * rateControlFactor).toInt() }.toIntArray()
}
var readPtr = blockDataPtr var readPtr = blockDataPtr

View File

@@ -16,7 +16,7 @@
#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV" #define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV"
#define TEV_VERSION 2 // Updated for YCoCg-R 4:2:0 #define TEV_VERSION 2 // Updated for YCoCg-R 4:2:0
// version 1: 8x8 RGB // version 1: 8x8 RGB
// version 2: 16x16 Y, 8x8 Co/Cg, asymetric quantisation (current winner) // version 2: 16x16 Y, 8x8 Co/Cg, asymetric quantisation, optional quantiser multiplier for rate control multiplier (1.0 when unused) {current winner}
// version 3: version 2 + internal 6-bit processing (discarded due to higher noise floor) // version 3: version 2 + internal 6-bit processing (discarded due to higher noise floor)
// Block encoding modes (16x16 blocks) // Block encoding modes (16x16 blocks)
@@ -35,6 +35,9 @@
static inline int CLAMP(int x, int min, int max) { static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x); return x < min ? min : (x > max ? max : x);
} }
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
static const int MP2_RATE_TABLE[5] = {64, 112, 160, 224, 384}; static const int MP2_RATE_TABLE[5] = {64, 112, 160, 224, 384};
static const int QUANT_MULT_Y[5] = {40, 10, 6, 4, 1}; static const int QUANT_MULT_Y[5] = {40, 10, 6, 4, 1};
@@ -42,7 +45,7 @@ static const int QUANT_MULT_CO[5] = {40, 10, 6, 4, 1};
static const int QUANT_MULT_CG[5] = {106, 22, 10, 5, 1}; // CO[i] * sqrt(7 - 2i) static const int QUANT_MULT_CG[5] = {106, 22, 10, 5, 1}; // CO[i] * sqrt(7 - 2i)
// only leave (4, 6, 7) // only leave (4, 6, 7)
// Quality settings for quantization (Y channel) - 16x16 tables // Quality settings for quantisation (Y channel) - 16x16 tables
static const uint32_t QUANT_TABLE_Y[256] = static const uint32_t QUANT_TABLE_Y[256] =
// Quality 7 (highest) // Quality 7 (highest)
{2, 1, 1, 2, 3, 5, 6, 7, 6, 7, 8, 9, 10, 11, 12, 13, {2, 1, 1, 2, 3, 5, 6, 7, 6, 7, 8, 9, 10, 11, 12, 13,
@@ -62,7 +65,7 @@ static const uint32_t QUANT_TABLE_Y[256] =
13, 14, 15, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 24, 25, 26, 13, 14, 15, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 24, 25, 26,
14, 15, 16, 17, 18, 19, 20, 21, 20, 21, 22, 23, 24, 25, 26, 27}; 14, 15, 16, 17, 18, 19, 20, 21, 20, 21, 22, 23, 24, 25, 26, 27};
// Quality settings for quantization (Co channel - 8x8) // Quality settings for quantisation (Co channel - 8x8)
static const uint32_t QUANT_TABLE_C[64] = static const uint32_t QUANT_TABLE_C[64] =
{2, 3, 4, 6, 8, 12, 16, 20, {2, 3, 4, 6, 8, 12, 16, 20,
3, 4, 6, 8, 12, 16, 20, 24, 3, 4, 6, 8, 12, 16, 20, 24,
@@ -92,9 +95,9 @@ typedef struct __attribute__((packed)) {
uint8_t mode; // Block encoding mode uint8_t mode; // Block encoding mode
int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision) int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision)
uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs) uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs)
int16_t y_coeffs[256]; // Quantized Y DCT coefficients (16x16) int16_t y_coeffs[256]; // quantised Y DCT coefficients (16x16)
int16_t co_coeffs[64]; // Quantized Co DCT coefficients (8x8) int16_t co_coeffs[64]; // quantised Co DCT coefficients (8x8)
int16_t cg_coeffs[64]; // Quantized Cg DCT coefficients (8x8) int16_t cg_coeffs[64]; // quantised Cg DCT coefficients (8x8)
} tev_block_t; } tev_block_t;
typedef struct { typedef struct {
@@ -111,6 +114,11 @@ typedef struct {
int quality; // 0-4, higher = better quality int quality; // 0-4, higher = better quality
int verbose; int verbose;
// Bitrate control
int target_bitrate_kbps; // Target bitrate in kbps (0 = quality mode)
int bitrate_mode; // 0 = quality, 1 = bitrate, 2 = hybrid
float rate_control_factor; // Dynamic adjustment factor
// Frame buffers (8-bit RGB format for encoding) // Frame buffers (8-bit RGB format for encoding)
uint8_t *current_rgb, *previous_rgb, *reference_rgb; uint8_t *current_rgb, *previous_rgb, *reference_rgb;
@@ -141,6 +149,13 @@ typedef struct {
// Statistics // Statistics
int blocks_skip, blocks_intra, blocks_inter, blocks_motion; int blocks_skip, blocks_intra, blocks_inter, blocks_motion;
// Rate control statistics
size_t frame_bits_accumulator;
size_t target_bits_per_frame;
float complexity_history[60]; // Rolling window for complexity
int complexity_history_index;
float average_complexity;
} tev_encoder_t; } tev_encoder_t;
// RGB to YCoCg-R transform (per YCoCg-R specification with truncated division) // RGB to YCoCg-R transform (per YCoCg-R specification with truncated division)
@@ -312,19 +327,21 @@ static void dct_8x8(float *input, float *output) {
} }
} }
// Quantize DCT coefficient using quality table // quantise DCT coefficient using quality table with rate control
static int16_t quantize_coeff(float coeff, uint32_t quant, int is_dc, int is_chroma) { static int16_t quantise_coeff(float coeff, uint32_t quant, int is_dc, int is_chroma, float rate_factor) {
if (is_dc) { if (is_dc) {
if (is_chroma) { if (is_chroma) {
// Chroma DC: range -256 to +255, use lossless quantization for testing // Chroma DC: range -256 to +255, use lossless quantisation for testing
return (int16_t)roundf(coeff); return (int16_t)roundf(coeff);
} else { } else {
// Luma DC: range -128 to +127, use lossless quantization for testing // Luma DC: range -128 to +127, use lossless quantisation for testing
return (int16_t)roundf(coeff); return (int16_t)roundf(coeff);
} }
} else { } else {
// AC coefficients use quality table // AC coefficients use quality table with rate control adjustment
return (int16_t)roundf(coeff / quant); float adjusted_quant = quant * rate_factor;
adjusted_quant = fmaxf(adjusted_quant, 1.0f); // Prevent division by zero
return (int16_t)roundf(coeff / adjusted_quant);
} }
} }
@@ -570,6 +587,57 @@ static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y
} }
} }
// Calculate block complexity for rate control
static float calculate_block_complexity(float *workspace, int size) {
float complexity = 0.0f;
for (int i = 1; i < size; i++) { // Skip DC component
complexity += fabsf(workspace[i]);
}
return complexity;
}
const float EPSILON = 1.0f / 16777216.0f;
const float RATE_CONTROL_CLAMP_MAX = 64.0f;
const float RATE_CONTROL_CLAMP_MIN = 1.0f / RATE_CONTROL_CLAMP_MAX;
// Update rate control factor based on target bitrate
static void update_rate_control(tev_encoder_t *enc, float frame_complexity, size_t frame_bits) {
if (enc->bitrate_mode == 0) {
// Quality mode - no rate control
enc->rate_control_factor = 1.0f;
return;
}
// Update complexity history
enc->complexity_history[enc->complexity_history_index] = frame_complexity;
enc->complexity_history_index = (enc->complexity_history_index + 1) % 60;
// Calculate rolling average complexity
float sum = 0.0f;
int count = 0;
for (int i = 0; i < 60; i++) {
if (enc->complexity_history[i] > 0.0f) {
sum += enc->complexity_history[i];
count++;
}
}
enc->average_complexity = (count > 0) ? sum / count : frame_complexity;
// Calculate rate adjustment
if (enc->target_bits_per_frame > 0 && frame_bits > 0) {
float bitrate_ratio = (float)enc->target_bits_per_frame / frame_bits;
float complexity_ratio = frame_complexity / fmaxf(enc->average_complexity, 1.0f);
// Adaptive adjustment with damping
float adjustment = 1.0f / (bitrate_ratio * complexity_ratio);
enc->rate_control_factor = adjustment;
enc->rate_control_factor = 0.8f * enc->rate_control_factor + 0.2f * adjustment;
// Clamp to reasonable range
enc->rate_control_factor = FCLAMP(enc->rate_control_factor, RATE_CONTROL_CLAMP_MIN, RATE_CONTROL_CLAMP_MAX);
}
}
// Encode a 16x16 block // Encode a 16x16 block
static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) { static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x]; tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
@@ -709,31 +777,31 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
// Apply fast DCT transform // Apply fast DCT transform
dct_16x16_fast(enc->y_workspace, enc->dct_workspace); dct_16x16_fast(enc->y_workspace, enc->dct_workspace);
// Quantize Y coefficients (luma) // quantise Y coefficients (luma)
const uint32_t *y_quant = QUANT_TABLE_Y; const uint32_t *y_quant = QUANT_TABLE_Y;
const uint32_t qmult_y = QUANT_MULT_Y[enc->quality]; const uint32_t qmult_y = QUANT_MULT_Y[enc->quality];
for (int i = 0; i < 256; i++) { for (int i = 0; i < 256; i++) {
block->y_coeffs[i] = quantize_coeff(enc->dct_workspace[i], y_quant[i] * qmult_y, i == 0, 0); block->y_coeffs[i] = quantise_coeff(enc->dct_workspace[i], y_quant[i] * qmult_y, i == 0, 0, enc->rate_control_factor);
} }
// Apply fast DCT transform to chroma // Apply fast DCT transform to chroma
dct_8x8_fast(enc->co_workspace, enc->dct_workspace); dct_8x8_fast(enc->co_workspace, enc->dct_workspace);
// Quantize Co coefficients (chroma - orange-blue) // quantise Co coefficients (chroma - orange-blue)
const uint32_t *co_quant = QUANT_TABLE_C; const uint32_t *co_quant = QUANT_TABLE_C;
const uint32_t qmult_co = QUANT_MULT_CO[enc->quality]; const uint32_t qmult_co = QUANT_MULT_CO[enc->quality];
for (int i = 0; i < 64; i++) { for (int i = 0; i < 64; i++) {
block->co_coeffs[i] = quantize_coeff(enc->dct_workspace[i], co_quant[i] * qmult_co, i == 0, 1); block->co_coeffs[i] = quantise_coeff(enc->dct_workspace[i], co_quant[i] * qmult_co, i == 0, 1, enc->rate_control_factor);
} }
// Apply fast DCT transform to Cg // Apply fast DCT transform to Cg
dct_8x8_fast(enc->cg_workspace, enc->dct_workspace); dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);
// Quantize Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q) // quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q)
const uint32_t *cg_quant = QUANT_TABLE_C; const uint32_t *cg_quant = QUANT_TABLE_C;
const uint32_t qmult_cg = QUANT_MULT_CG[enc->quality]; const uint32_t qmult_cg = QUANT_MULT_CG[enc->quality];
for (int i = 0; i < 64; i++) { for (int i = 0; i < 64; i++) {
block->cg_coeffs[i] = quantize_coeff(enc->dct_workspace[i], cg_quant[i] * qmult_cg, i == 0, 1); block->cg_coeffs[i] = quantise_coeff(enc->dct_workspace[i], cg_quant[i] * qmult_cg, i == 0, 1, enc->rate_control_factor);
} }
// Set CBP (simplified - always encode all channels) // Set CBP (simplified - always encode all channels)
@@ -757,6 +825,16 @@ static tev_encoder_t* init_encoder(void) {
enc->output_fps = 0; // No frame rate conversion by default enc->output_fps = 0; // No frame rate conversion by default
enc->verbose = 0; enc->verbose = 0;
// Rate control defaults
enc->target_bitrate_kbps = 0; // 0 = quality mode
enc->bitrate_mode = 0; // Quality mode by default
enc->rate_control_factor = 1.0f; // No adjustment initially
enc->frame_bits_accumulator = 0;
enc->target_bits_per_frame = 0;
enc->complexity_history_index = 0;
enc->average_complexity = 0.0f;
memset(enc->complexity_history, 0, sizeof(enc->complexity_history));
init_dct_tables(); init_dct_tables();
return enc; return enc;
@@ -858,10 +936,25 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) {
int blocks_x = (enc->width + 15) / 16; int blocks_x = (enc->width + 15) / 16;
int blocks_y = (enc->height + 15) / 16; int blocks_y = (enc->height + 15) / 16;
// Track frame complexity for rate control
float frame_complexity = 0.0f;
size_t frame_start_bits = enc->total_output_bytes * 8;
// Encode all blocks // Encode all blocks
for (int by = 0; by < blocks_y; by++) { for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) { for (int bx = 0; bx < blocks_x; bx++) {
encode_block(enc, bx, by, is_keyframe); encode_block(enc, bx, by, is_keyframe);
// Calculate complexity for rate control (if enabled)
if (enc->bitrate_mode > 0) {
tev_block_t *block = &enc->block_data[by * blocks_x + bx];
if (block->mode == TEV_MODE_INTRA || block->mode == TEV_MODE_INTER) {
// Sum absolute values of quantised coefficients as complexity metric
for (int i = 1; i < 256; i++) frame_complexity += abs(block->y_coeffs[i]);
for (int i = 1; i < 64; i++) frame_complexity += abs(block->co_coeffs[i]);
for (int i = 1; i < 64; i++) frame_complexity += abs(block->cg_coeffs[i]);
}
}
} }
} }
@@ -900,15 +993,26 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) {
// Clean up frame stream // Clean up frame stream
deflateEnd(&frame_stream); deflateEnd(&frame_stream);
// Write frame packet header // Write frame packet header (always include rate control factor)
uint8_t packet_type = is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME; uint8_t packet_type = is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME;
uint32_t payload_size = compressed_size; uint32_t payload_size = compressed_size + 4; // +4 bytes for rate control factor (always)
fwrite(&packet_type, 1, 1, output); fwrite(&packet_type, 1, 1, output);
fwrite(&payload_size, 4, 1, output); fwrite(&payload_size, 4, 1, output);
fwrite(&enc->rate_control_factor, 4, 1, output); // Always store rate control factor
fwrite(enc->compressed_buffer, 1, compressed_size, output); fwrite(enc->compressed_buffer, 1, compressed_size, output);
enc->total_output_bytes += 5 + compressed_size; if (enc->verbose) {
printf("rateControlFactor=%.6f\n", enc->rate_control_factor);
}
enc->total_output_bytes += 5 + 4 + compressed_size; // packet + size + rate_factor + data
// Update rate control for next frame
if (enc->bitrate_mode > 0) {
size_t frame_bits = (enc->total_output_bytes * 8) - frame_start_bits;
update_rate_control(enc, frame_complexity, frame_bits);
}
// Swap frame buffers for next frame // Swap frame buffers for next frame
uint8_t *temp_rgb = enc->previous_rgb; uint8_t *temp_rgb = enc->previous_rgb;
@@ -990,6 +1094,15 @@ static int get_video_metadata(tev_encoder_t *enc) {
// set keyframe interval // set keyframe interval
KEYFRAME_INTERVAL = 2 * enc->fps; KEYFRAME_INTERVAL = 2 * enc->fps;
// Calculate target bits per frame for bitrate mode
if (enc->target_bitrate_kbps > 0) {
enc->target_bits_per_frame = (enc->target_bitrate_kbps * 1000) / enc->fps;
if (enc->verbose) {
printf("Target bitrate: %d kbps (%zu bits per frame)\n",
enc->target_bitrate_kbps, enc->target_bits_per_frame);
}
}
// Check for audio stream // Check for audio stream
snprintf(command, sizeof(command), snprintf(command, sizeof(command),
"ffprobe -v quiet -select_streams a:0 -show_entries stream=codec_type -of csv=p=0 \"%s\" 2>/dev/null", "ffprobe -v quiet -select_streams a:0 -show_entries stream=codec_type -of csv=p=0 \"%s\" 2>/dev/null",
@@ -1167,7 +1280,7 @@ static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) {
// Show usage information // Show usage information
static void show_usage(const char *program_name) { static void show_usage(const char *program_name) {
printf("TEV YCoCg-R 4:2:0 Video Encoder\n"); printf("TEV YCoCg-R 4:2:0 Video Encoder with Bitrate Control\n");
printf("Usage: %s [options] -i input.mp4 -o output.tev\n\n", program_name); printf("Usage: %s [options] -i input.mp4 -o output.tev\n\n", program_name);
printf("Options:\n"); printf("Options:\n");
printf(" -i, --input FILE Input video file\n"); printf(" -i, --input FILE Input video file\n");
@@ -1175,18 +1288,30 @@ static void show_usage(const char *program_name) {
printf(" -w, --width N Video width (default: %d)\n", DEFAULT_WIDTH); printf(" -w, --width N Video width (default: %d)\n", DEFAULT_WIDTH);
printf(" -h, --height N Video height (default: %d)\n", DEFAULT_HEIGHT); printf(" -h, --height N Video height (default: %d)\n", DEFAULT_HEIGHT);
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n"); printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
printf(" -q, --quality N Quality level 0-4 (default: 2)\n"); printf(" -q, --quality N Quality level 0-4 (default: 2, only decides audio quality in bitrate mode)\n");
printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode; DON'T USE - NOT WORKING AS INTENDED)\n");
printf(" -v, --verbose Verbose output\n"); printf(" -v, --verbose Verbose output\n");
printf(" -t, --test Test mode: generate solid color frames\n"); printf(" -t, --test Test mode: generate solid colour frames\n");
printf(" --help Show this help\n\n"); printf(" --help Show this help\n\n");
printf("Rate Control Modes:\n");
printf(" Quality mode (default): Fixed quantisation based on -q parameter\n");
printf(" Bitrate mode (-b N): Dynamic quantisation targeting N kbps average\n\n");
printf("Audio rate by quality:\n");
printf(" ");
for (int i = 0; i < sizeof(MP2_RATE_TABLE) / sizeof(int); i++) {
printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]);
}
printf("\n\n");
printf("Features:\n"); printf("Features:\n");
printf(" - YCoCg-R 4:2:0 chroma subsampling for 50%% compression improvement\n"); printf(" - YCoCg-R 4:2:0 chroma subsampling for 50%% compression improvement\n");
printf(" - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n"); printf(" - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n");
printf(" - Frame rate conversion with FFmpeg temporal filtering\n"); printf(" - Frame rate conversion with FFmpeg temporal filtering\n");
printf(" - Adaptive bitrate control with complexity-based adjustment\n");
printf("Examples:\n"); printf("Examples:\n");
printf(" %s -i input.mp4 -o output.tev\n", program_name); printf(" %s -i input.mp4 -o output.tev # Quality mode (q=2)\n", program_name);
printf(" %s -i input.avi -f 15 -q 7 -o output.tev # Convert 25fps to 15fps\n", program_name); printf(" %s -i input.mp4 -b 800 -o output.tev # 800 kbps bitrate target\n", program_name);
printf(" %s --test -o test.tev # Generate solid color test frames\n", program_name); printf(" %s -i input.avi -f 15 -b 500 -o output.tev # 15fps @ 500 kbps\n", program_name);
printf(" %s --test -b 1000 -o test.tev # Test with 1000 kbps target\n", program_name);
} }
@@ -1222,6 +1347,7 @@ int main(int argc, char *argv[]) {
{"height", required_argument, 0, 'h'}, {"height", required_argument, 0, 'h'},
{"fps", required_argument, 0, 'f'}, {"fps", required_argument, 0, 'f'},
{"quality", required_argument, 0, 'q'}, {"quality", required_argument, 0, 'q'},
{"bitrate", required_argument, 0, 'b'},
{"verbose", no_argument, 0, 'v'}, {"verbose", no_argument, 0, 'v'},
{"test", no_argument, 0, 't'}, {"test", no_argument, 0, 't'},
{"help", no_argument, 0, '?'}, {"help", no_argument, 0, '?'},
@@ -1231,7 +1357,7 @@ int main(int argc, char *argv[]) {
int option_index = 0; int option_index = 0;
int c; int c;
while ((c = getopt_long(argc, argv, "i:o:w:h:f:q:vt", long_options, &option_index)) != -1) { while ((c = getopt_long(argc, argv, "i:o:w:h:f:q:b:vt", long_options, &option_index)) != -1) {
switch (c) { switch (c) {
case 'i': case 'i':
enc->input_file = strdup(optarg); enc->input_file = strdup(optarg);
@@ -1255,9 +1381,13 @@ int main(int argc, char *argv[]) {
} }
break; break;
case 'q': case 'q':
enc->quality = atoi(optarg); enc->quality = CLAMP(atoi(optarg), 0, 4);
if (enc->quality < 0) enc->quality = 0; break;
if (enc->quality > 7) enc->quality = 7; case 'b':
enc->target_bitrate_kbps = atoi(optarg);
if (enc->target_bitrate_kbps > 0) {
enc->bitrate_mode = 1; // Enable bitrate control
}
break; break;
case 'v': case 'v':
enc->verbose = 1; enc->verbose = 1;
@@ -1295,11 +1425,11 @@ int main(int argc, char *argv[]) {
// Handle test mode or real video // Handle test mode or real video
if (test_mode) { if (test_mode) {
// Test mode: generate solid color frames // Test mode: generate solid colour frames
enc->fps = 1; enc->fps = 1;
enc->total_frames = 15; enc->total_frames = 15;
enc->has_audio = 0; enc->has_audio = 0;
printf("Test mode: Generating 15 solid color frames\n"); printf("Test mode: Generating 15 solid colour frames\n");
} else { } else {
// Get video metadata and start FFmpeg processes // Get video metadata and start FFmpeg processes
if (!get_video_metadata(enc)) { if (!get_video_metadata(enc)) {
@@ -1348,42 +1478,47 @@ int main(int argc, char *argv[]) {
if (enc->output_fps > 0) { if (enc->output_fps > 0) {
printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps); printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
} }
if (enc->bitrate_mode > 0) {
printf("Bitrate control enabled: targeting %d kbps\n", enc->target_bitrate_kbps);
} else {
printf("Quality mode: q=%d\n", enc->quality);
}
// Process frames // Process frames
int frame_count = 0; int frame_count = 0;
while (frame_count < enc->total_frames) { while (frame_count < enc->total_frames) {
if (test_mode) { if (test_mode) {
// Generate test frame with solid colors // Generate test frame with solid colours
size_t rgb_size = enc->width * enc->height * 3; size_t rgb_size = enc->width * enc->height * 3;
uint8_t test_r = 0, test_g = 0, test_b = 0; uint8_t test_r = 0, test_g = 0, test_b = 0;
const char* color_name = "unknown"; const char* colour_name = "unknown";
switch (frame_count) { switch (frame_count) {
case 0: test_r = 0; test_g = 0; test_b = 0; color_name = "black"; break; case 0: test_r = 0; test_g = 0; test_b = 0; colour_name = "black"; break;
case 1: test_r = 127; test_g = 127; test_b = 127; color_name = "grey"; break; case 1: test_r = 127; test_g = 127; test_b = 127; colour_name = "grey"; break;
case 2: test_r = 255; test_g = 255; test_b = 255; color_name = "white"; break; case 2: test_r = 255; test_g = 255; test_b = 255; colour_name = "white"; break;
case 3: test_r = 127; test_g = 0; test_b = 0; color_name = "half red"; break; case 3: test_r = 127; test_g = 0; test_b = 0; colour_name = "half red"; break;
case 4: test_r = 127; test_g = 127; test_b = 0; color_name = "half yellow"; break; case 4: test_r = 127; test_g = 127; test_b = 0; colour_name = "half yellow"; break;
case 5: test_r = 0; test_g = 127; test_b = 0; color_name = "half green"; break; case 5: test_r = 0; test_g = 127; test_b = 0; colour_name = "half green"; break;
case 6: test_r = 0; test_g = 127; test_b = 127; color_name = "half cyan"; break; case 6: test_r = 0; test_g = 127; test_b = 127; colour_name = "half cyan"; break;
case 7: test_r = 0; test_g = 0; test_b = 127; color_name = "half blue"; break; case 7: test_r = 0; test_g = 0; test_b = 127; colour_name = "half blue"; break;
case 8: test_r = 127; test_g = 0; test_b = 127; color_name = "half magenta"; break; case 8: test_r = 127; test_g = 0; test_b = 127; colour_name = "half magenta"; break;
case 9: test_r = 255; test_g = 0; test_b = 0; color_name = "red"; break; case 9: test_r = 255; test_g = 0; test_b = 0; colour_name = "red"; break;
case 10: test_r = 255; test_g = 255; test_b = 0; color_name = "yellow"; break; case 10: test_r = 255; test_g = 255; test_b = 0; colour_name = "yellow"; break;
case 11: test_r = 0; test_g = 255; test_b = 0; color_name = "green"; break; case 11: test_r = 0; test_g = 255; test_b = 0; colour_name = "green"; break;
case 12: test_r = 0; test_g = 255; test_b = 255; color_name = "cyan"; break; case 12: test_r = 0; test_g = 255; test_b = 255; colour_name = "cyan"; break;
case 13: test_r = 0; test_g = 0; test_b = 255; color_name = "blue"; break; case 13: test_r = 0; test_g = 0; test_b = 255; colour_name = "blue"; break;
case 14: test_r = 255; test_g = 0; test_b = 255; color_name = "magenta"; break; case 14: test_r = 255; test_g = 0; test_b = 255; colour_name = "magenta"; break;
} }
// Fill entire frame with solid color // Fill entire frame with solid colour
for (size_t i = 0; i < rgb_size; i += 3) { for (size_t i = 0; i < rgb_size; i += 3) {
enc->current_rgb[i] = test_r; enc->current_rgb[i] = test_r;
enc->current_rgb[i + 1] = test_g; enc->current_rgb[i + 1] = test_g;
enc->current_rgb[i + 2] = test_b; enc->current_rgb[i + 2] = test_b;
} }
printf("Frame %d: %s (%d,%d,%d)\n", frame_count, color_name, test_r, test_g, test_b); printf("Frame %d: %s (%d,%d,%d)\n", frame_count, colour_name, test_r, test_g, test_b);
// Test YCoCg-R conversion // Test YCoCg-R conversion
int y_test, co_test, cg_test; int y_test, co_test, cg_test;
@@ -1462,10 +1597,24 @@ int main(int argc, char *argv[]) {
printf(" - sync packets: %d\n", sync_packet_count); printf(" - sync packets: %d\n", sync_packet_count);
printf(" Framerate: %d\n", enc->fps); printf(" Framerate: %d\n", enc->fps);
printf(" Output size: %zu bytes\n", enc->total_output_bytes); printf(" Output size: %zu bytes\n", enc->total_output_bytes);
// Calculate achieved bitrate
double achieved_bitrate_kbps = (enc->total_output_bytes * 8.0) / 1000.0 / total_time;
printf(" Achieved bitrate: %.1f kbps", achieved_bitrate_kbps);
if (enc->bitrate_mode > 0) {
printf(" (target: %d kbps, %.1f%%)", enc->target_bitrate_kbps,
(achieved_bitrate_kbps / enc->target_bitrate_kbps) * 100.0);
}
printf("\n");
printf(" Encoding time: %.2fs (%.1f fps)\n", total_time, frame_count / total_time); printf(" Encoding time: %.2fs (%.1f fps)\n", total_time, frame_count / total_time);
printf(" Block statistics: INTRA=%d, INTER=%d, MOTION=%d, SKIP=%d\n", printf(" Block statistics: INTRA=%d, INTER=%d, MOTION=%d, SKIP=%d\n",
enc->blocks_intra, enc->blocks_inter, enc->blocks_motion, enc->blocks_skip); enc->blocks_intra, enc->blocks_inter, enc->blocks_motion, enc->blocks_skip);
if (enc->bitrate_mode > 0) {
printf(" Rate control factor: %.3f\n", enc->rate_control_factor);
}
cleanup_encoder(enc); cleanup_encoder(enc);
return 0; return 0;
} }