various encoder bug fixes

This commit is contained in:
minjaesong
2025-09-13 00:39:12 +09:00
parent 1f5f72733a
commit 198e951102
4 changed files with 553 additions and 79 deletions

View File

@@ -14,6 +14,58 @@
#include <sys/time.h>
#include <time.h>
// Float16 conversion functions (adapted from Float16.kt)
static inline uint16_t float_to_float16(float fval) {
uint32_t fbits = *(uint32_t*)&fval;
uint16_t sign = (fbits >> 16) & 0x8000; // sign only
uint32_t val = (fbits & 0x7fffffff) + 0x1000; // rounded value
if (val >= 0x47800000) { // might be or become NaN/Inf
if ((fbits & 0x7fffffff) >= 0x47800000) { // is or must become NaN/Inf
if (val < 0x7f800000) // was value but too large
return sign | 0x7c00; // make it +/-Inf
return sign | 0x7c00 | // remains +/-Inf or NaN
((fbits & 0x007fffff) >> 13); // keep NaN (and Inf) bits
}
return sign | 0x7bff; // unrounded not quite Inf
}
if (val >= 0x38800000) // remains normalized value
return sign | ((val - 0x38000000) >> 13); // exp - 127 + 15
if (val < 0x33000000) // too small for subnormal
return sign; // becomes +/-0
val = (fbits & 0x7fffffff) >> 23; // tmp exp for subnormal calc
return sign | (((fbits & 0x7fffff) | 0x800000) + // add subnormal bit
(0x800000 >> (val - 102)) // round depending on cut off
) >> (126 - val); // div by 2^(1-(exp-127+15)) and >> 13 | exp=0
}
static inline float float16_to_float(uint16_t hbits) {
uint32_t mant = hbits & 0x03ff; // 10 bits mantissa
uint32_t exp = hbits & 0x7c00; // 5 bits exponent
if (exp == 0x7c00) // NaN/Inf
exp = 0x3fc00; // -> NaN/Inf
else if (exp != 0) { // normalized value
exp += 0x1c000; // exp - 15 + 127
if (mant == 0 && exp > 0x1c400) { // smooth transition
uint32_t fbits = ((hbits & 0x8000) << 16) | (exp << 13) | 0x3ff;
return *(float*)&fbits;
}
}
else if (mant != 0) { // && exp==0 -> subnormal
exp = 0x1c400; // make it normal
do {
mant <<= 1; // mantissa * 2
exp -= 0x400; // decrease exp by 1
} while ((mant & 0x400) == 0); // while not normal
mant &= 0x3ff; // discard subnormal bit
} // else +/-0 -> +/-0
uint32_t fbits = ((hbits & 0x8000) << 16) | ((exp | mant) << 13);
return *(float*)&fbits;
}
// TSVM Enhanced Video (TEV) format constants
#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV"
#define TEV_VERSION 2 // Updated for YCoCg-R 4:2:0
@@ -103,7 +155,7 @@ static const uint32_t QUANT_TABLE_C[HALF_BLOCK_SIZE_SQR] =
// Audio constants (reuse MP2 from existing system)
#define MP2_SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 0x240
#define MP2_DEFAULT_PACKET_SIZE 1728
// Default values
#define DEFAULT_WIDTH 560
@@ -140,6 +192,17 @@ typedef struct __attribute__((packed)) {
int16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // quantised Cg DCT coefficients (8x8)
} tev_block_t;
// Lossless TEV block structure (uses float32 internally, converted to float16 during serialization)
typedef struct __attribute__((packed)) {
uint8_t mode; // Block encoding mode
int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision)
float rate_control_factor; // Always 1.0f in lossless mode
uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs)
float y_coeffs[BLOCK_SIZE_SQR]; // lossless Y DCT coefficients (16x16)
float co_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Co DCT coefficients (8x8)
float cg_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Cg DCT coefficients (8x8)
} tev_lossless_block_t;
// Subtitle entry structure
typedef struct subtitle_entry {
int start_frame;
@@ -168,6 +231,8 @@ typedef struct {
int qualityCo;
int qualityCg;
int verbose;
int disable_rcf; // 0 = rcf enabled, 1 = disabled
int lossless_mode; // 0 = lossy (default), 1 = lossless mode
// Bitrate control
int target_bitrate_kbps; // Target bitrate in kbps (0 = quality mode)
@@ -216,10 +281,9 @@ typedef struct {
// Subtitle handling
subtitle_entry_t *subtitle_list;
subtitle_entry_t *current_subtitle;
// Complexity statistics collection
int stats_mode; // 0 = disabled, 1 = enabled
int disable_rcf; // 0 = rcf enabled, 1 = disabled
float *complexity_values; // Array to store all complexity values
int complexity_count; // Current count of complexity values
int complexity_capacity; // Capacity of complexity_values array
@@ -1041,6 +1105,107 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
block->cbp = 0x07; // Y, Co, Cg all present
}
// Encode a 16x16 block in lossless mode
static void encode_block_lossless(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
tev_lossless_block_t *block = (tev_lossless_block_t*)&enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
// Extract YCoCg-R block
extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
block_x, block_y,
enc->y_workspace, enc->co_workspace, enc->cg_workspace);
if (is_keyframe) {
// Intra coding for keyframes
block->mode = TEV_MODE_INTRA;
block->mv_x = block->mv_y = 0;
enc->blocks_intra++;
} else {
// Same mode decision logic as regular encode_block
// For simplicity, using INTRA for now in lossless mode
block->mode = TEV_MODE_INTRA;
block->mv_x = block->mv_y = 0;
enc->blocks_intra++;
}
// Lossless mode: rate control factor is always 1.0f
block->rate_control_factor = 1.0f;
// Apply DCT transforms using the same pattern as regular encoding
// Y channel (16x16)
dct_16x16_fast(enc->y_workspace, enc->dct_workspace);
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
block->y_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Co channel (8x8)
dct_8x8_fast(enc->co_workspace, enc->dct_workspace);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
block->co_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Cg channel (8x8)
dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
block->cg_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Set CBP (simplified - always encode all channels)
block->cbp = 0x07; // Y, Co, Cg all present
}
// Serialized lossless block structure (for writing to file with float16 coefficients)
typedef struct __attribute__((packed)) {
uint8_t mode;
int16_t mv_x, mv_y;
float rate_control_factor; // Always 1.0f in lossless mode
uint16_t cbp;
uint16_t y_coeffs[BLOCK_SIZE_SQR]; // float16 Y coefficients
uint16_t co_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Co coefficients
uint16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Cg coefficients
} tev_serialized_lossless_block_t;
// Convert lossless blocks to serialized format with float16 coefficients
static void serialize_lossless_blocks(tev_encoder_t *enc, int blocks_x, int blocks_y,
tev_serialized_lossless_block_t *serialized_blocks) {
for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) {
tev_lossless_block_t *src = (tev_lossless_block_t*)&enc->block_data[by * blocks_x + bx];
tev_serialized_lossless_block_t *dst = &serialized_blocks[by * blocks_x + bx];
// Copy basic fields
dst->mode = src->mode;
dst->mv_x = src->mv_x;
dst->mv_y = src->mv_y;
dst->rate_control_factor = src->rate_control_factor;
dst->cbp = src->cbp;
// Convert float32 coefficients to float16 with range clamping
// Float16 max finite value is approximately 65504
const float FLOAT16_MAX = 65504.0f;
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
float coeff = FCLAMP(src->y_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
dst->y_coeffs[i] = float_to_float16(coeff);
if (enc->verbose && fabsf(src->y_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Y coefficient %d clamped: %f -> %f\n", i, src->y_coeffs[i], coeff);
}
}
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
float co_coeff = FCLAMP(src->co_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
float cg_coeff = FCLAMP(src->cg_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
dst->co_coeffs[i] = float_to_float16(co_coeff);
dst->cg_coeffs[i] = float_to_float16(cg_coeff);
if (enc->verbose && fabsf(src->co_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Co coefficient %d clamped: %f -> %f\n", i, src->co_coeffs[i], co_coeff);
}
if (enc->verbose && fabsf(src->cg_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Cg coefficient %d clamped: %f -> %f\n", i, src->cg_coeffs[i], cg_coeff);
}
}
}
}
}
// Convert SubRip time format (HH:MM:SS,mmm) to frame number
static int srt_time_to_frame(const char *time_str, int fps) {
int hours, minutes, seconds, milliseconds;
@@ -1182,7 +1347,7 @@ static subtitle_entry_t* parse_srt_file(const char *filename, int fps) {
}
}
fclose(file);
//fclose(file); // why uncommenting it errors out with "Fatal error: glibc detected an invalid stdio handle"?
return head;
}
@@ -1613,6 +1778,7 @@ static tev_encoder_t* init_encoder(void) {
enc->output_fps = 0; // No frame rate conversion by default
enc->is_ntsc_framerate = 0; // Will be detected from input
enc->verbose = 0;
enc->disable_rcf = 1;
enc->subtitle_file = NULL;
enc->has_subtitles = 0;
enc->subtitle_list = NULL;
@@ -1655,7 +1821,16 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) {
enc->dct_workspace = malloc(16 * 16 * sizeof(float));
enc->block_data = malloc(total_blocks * sizeof(tev_block_t));
enc->compressed_buffer = malloc(total_blocks * sizeof(tev_block_t) * 2);
// Allocate compression buffer large enough for both regular and lossless modes
size_t max_block_size = sizeof(tev_block_t) > sizeof(tev_serialized_lossless_block_t) ?
sizeof(tev_block_t) : sizeof(tev_serialized_lossless_block_t);
size_t compressed_buffer_size = total_blocks * max_block_size * 2;
enc->compressed_buffer = malloc(compressed_buffer_size);
if (enc->verbose) {
printf("Allocated compressed buffer: %zu bytes for %d blocks (max_block_size: %zu)\n",
compressed_buffer_size, total_blocks, max_block_size);
}
enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE);
if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb ||
@@ -1726,7 +1901,7 @@ static int write_tev_header(FILE *output, tev_encoder_t *enc) {
uint8_t qualityCo = enc->qualityCo;
uint8_t qualityCg = enc->qualityCg;
uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1);
uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate
uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0) | (enc->lossless_mode ? 4 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate, bit 2 = is_lossless
uint8_t reserved = 0;
fwrite(&width, 2, 1, output);
@@ -1833,7 +2008,11 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
// Encode all blocks
for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) {
encode_block(enc, bx, by, is_keyframe);
if (enc->lossless_mode) {
encode_block_lossless(enc, bx, by, is_keyframe);
} else {
encode_block(enc, bx, by, is_keyframe);
}
// Calculate complexity for rate control (if enabled)
if (enc->bitrate_mode > 0) {
@@ -1849,13 +2028,34 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
}
// Compress block data using Zstd (compatible with TSVM decoder)
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
// Compress using Zstd with controlled memory usage
size_t compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, block_data_size * 2,
enc->block_data, block_data_size,
ZSTD_COMPRESSON_LEVEL);
size_t compressed_size;
if (enc->lossless_mode) {
// Lossless mode: serialize blocks with float16 coefficients
size_t serialized_block_data_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t);
tev_serialized_lossless_block_t *serialized_blocks = malloc(serialized_block_data_size);
if (!serialized_blocks) {
fprintf(stderr, "Failed to allocate memory for serialized lossless blocks\n");
return -1;
}
serialize_lossless_blocks(enc, blocks_x, blocks_y, serialized_blocks);
// Use the pre-allocated buffer size instead of calculating dynamically
size_t output_buffer_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t) * 2;
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, output_buffer_size,
serialized_blocks, serialized_block_data_size,
ZSTD_COMPRESSON_LEVEL);
free(serialized_blocks);
} else {
// Regular mode: use regular block data
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, block_data_size * 2,
enc->block_data, block_data_size,
ZSTD_COMPRESSON_LEVEL);
}
if (ZSTD_isError(compressed_size)) {
fprintf(stderr, "Zstd compression failed: %s\n", ZSTD_getErrorName(compressed_size));
@@ -2088,7 +2288,7 @@ static int start_audio_conversion(tev_encoder_t *enc) {
char command[2048];
snprintf(command, sizeof(command),
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null",
enc->input_file, MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
enc->input_file, enc->lossless_mode ? 384 : MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
int result = system(command);
if (result == 0) {
@@ -2236,15 +2436,16 @@ static void show_usage(const char *program_name) {
printf(" -o, --output FILE Output video file (use '-' for stdout)\n");
printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
printf(" -q, --quality N Quality level 0-4 (default: 2, only decides audio rate in quantiser mode)\n");
printf(" -q, --quality N Quality level 0-4 (default: 2, only decides audio rate in quantiser/lossless mode)\n");
printf(" -Q, --quantiser N Quantiser level 0-100 (100: lossless, 0: potato)\n");
// printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode; DON'T USE - NOT WORKING AS INTENDED)\n");
printf(" -p, --progressive Use progressive scan (default: interlaced)\n");
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
printf(" -v, --verbose Verbose output\n");
printf(" -t, --test Test mode: generate solid colour frames\n");
printf(" --lossless Lossless mode: store coefficients as float16 (no quantisation, implies -p, 384k audio)\n");
printf(" --enable-rcf Enable per-block rate control (experimental)\n");
printf(" --enable-encode-stats Collect and report block complexity statistics\n");
printf(" --disable-rcf Disable per-block rate control\n");
printf(" --help Show this help\n\n");
// printf("Rate Control Modes:\n");
// printf(" Quality mode (default): Fixed quantisation based on -q parameter\n");
@@ -2334,7 +2535,8 @@ int main(int argc, char *argv[]) {
{"verbose", no_argument, 0, 'v'},
{"test", no_argument, 0, 't'},
{"enable-encode-stats", no_argument, 0, 1000},
{"disable-rcf", no_argument, 0, 1100},
{"enable-rcf", no_argument, 0, 1100},
{"lossless", no_argument, 0, 1200},
{"help", no_argument, 0, '?'},
{0, 0, 0, 0}
};
@@ -2403,11 +2605,14 @@ int main(int argc, char *argv[]) {
case 't':
test_mode = 1;
break;
case 1000: // --enable-encode-stats
case 1000: // --enable-encode-stats
enc->stats_mode = 1;
break;
case 1100: // --disable-rcf
enc->disable_rcf = 1;
case 1100: // --enable-rcf
enc->disable_rcf = 0;
break;
case 1200: // --lossless
enc->lossless_mode = 1;
break;
case 0:
if (strcmp(long_options[option_index].name, "help") == 0) {
@@ -2419,7 +2624,7 @@ int main(int argc, char *argv[]) {
case 'Q':
enc->qualityY = CLAMP(atoi(optarg), 0, 100);
enc->qualityCo = enc->qualityY;
enc->qualityCg = (enc->qualityY == 100) ? enc->qualityY : enc->qualityCo >> 2;
enc->qualityCg = (enc->qualityY == 100) ? enc->qualityY : enc->qualityCo >> 1;
break;
default:
show_usage(argv[0]);
@@ -2428,6 +2633,19 @@ int main(int argc, char *argv[]) {
}
}
// Lossless mode validation and adjustments
if (enc->lossless_mode) {
// In lossless mode, disable rate control and set quality to maximum
enc->bitrate_mode = 0;
enc->disable_rcf = 1;
enc->progressive_mode = 1;
enc->qualityIndex = 5;
enc->qualityY = enc->qualityCo = enc->qualityCg = 255; // Use 255 as a redundant lossless marker
if (enc->verbose) {
printf("Lossless mode enabled: Rate control disabled, quality set to maximum, enabling progressive scan\n");
}
}
// halve the internal representation of frame height
if (!enc->progressive_mode) {
enc->height /= 2;