diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index c796b82..83d29e0 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -389,7 +389,7 @@ function setBiasLighting() { function updateDataRateBin(rate) { videoRateBin.push(rate) - if (videoRateBin.length > header.fps) { + if (videoRateBin.length > 10) { videoRateBin.shift() } } diff --git a/terranmon.txt b/terranmon.txt index 2002089..98231da 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -919,7 +919,7 @@ transmission capability, and region-of-interest coding. - bit 0 = reserved - bit 1 = is NTSC framerate - bit 2 = is lossless mode - (shorthand for `-Q1,1,1 -w 0 --intra-only --no-perceptual-tuning --arate 384`) + (shorthand for `-q 6 -Q0,0,0 -w 0 --intra-only --no-perceptual-tuning --arate 384`) - bit 3 = has region-of-interest coding (for still images only) uint8 Encoder quality level (stored with bias of 1 (q0=1); used to derive anisotropy value) uint8 Channel layout (bit-field: bit 0=has alpha, bit 1=has chroma inverted, bit 2=has luma inverted) diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index c753dac..b14617a 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -98,9 +98,8 @@ static int needs_alpha_channel(int channel_layout) { #define DEFAULT_WIDTH 560 #define DEFAULT_HEIGHT 448 #define DEFAULT_FPS 30 -#define DEFAULT_QUALITY 2 +#define DEFAULT_QUALITY 3 int KEYFRAME_INTERVAL = 2; // refresh often because deltas in DWT are more visible than DCT -#define ZSTD_COMPRESSON_LEVEL 15 // Audio/subtitle constants (reused from TEV) #define MP2_DEFAULT_PACKET_SIZE 1152 @@ -162,7 +161,7 @@ static int calculate_max_decomp_levels(int width, int height) { } // MP2 audio rate table (same as TEV) -static const int MP2_RATE_TABLE[] = {128, 160, 224, 320, 384, 384}; +static const int MP2_RATE_TABLE[] = {96, 128, 160, 224, 320, 384, 384}; // Valid MP2 bitrates as per MPEG-1 Layer II specification static const int MP2_VALID_BITRATES[] = {32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384}; @@ -181,14 +180,14 @@ static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21 // Quality level to quantisation mapping for different channels // the values are indices to the QLUT -static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2}; // 96, 48, 24, 12, 6, 3 -static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29}; // 240, 180, 120, 90, 60, 30 -static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39}; // 424, 304, 200, 144, 90, 40 -static const int QUALITY_ALPHA[] = {79, 47, 23, 11, 5, 2}; // 96, 48, 24, 12, 6, 3 +static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2, 0}; // 96, 48, 24, 12, 6, 3, 1 +static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29, 4}; // 240, 180, 120, 90, 60, 30, 5 +static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39, 7}; // 424, 304, 200, 144, 90, 40, 8 +static const int QUALITY_ALPHA[] = {79, 47, 23, 11, 5, 2, 0}; // 96, 48, 24, 12, 6, 3, 1 -// Dead-zone quantization thresholds per quality level +// Dead-zone quantisation thresholds per quality level // Higher values = more aggressive (more coefficients set to zero) -static const float DEAD_ZONE_THRESHOLD[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; +static const float DEAD_ZONE_THRESHOLD[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 0.0f}; // Dead-zone scaling factors for different subband levels #define DEAD_ZONE_FINEST_SCALE 1.0f // Full dead-zone for finest level (level 6) @@ -196,11 +195,11 @@ static const float DEAD_ZONE_THRESHOLD[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; // Coarser levels (0-4) use 0.0f (no dead-zone) to preserve structural information // psychovisual tuning parameters -static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f}; -static const float ANISOTROPY_BIAS[] = {0.4f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f}; +static const float ANISOTROPY_MULT[] = {2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f}; +static const float ANISOTROPY_BIAS[] = {0.4f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f}; -static const float ANISOTROPY_MULT_CHROMA[] = {6.6f, 5.5f, 4.4f, 3.3f, 2.2f, 1.1f}; -static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f}; +static const float ANISOTROPY_MULT_CHROMA[] = {6.6f, 5.5f, 4.4f, 3.3f, 2.2f, 1.1f, 1.0f}; +static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f}; // DWT coefficient structure for each subband typedef struct { @@ -252,7 +251,7 @@ typedef struct tav_encoder_s { int quantiser_y, quantiser_co, quantiser_cg; int wavelet_filter; int decomp_levels; - float dead_zone_threshold; // Dead-zone quantization threshold (0 = disabled) + float dead_zone_threshold; // Dead-zone quantisation threshold (0 = disabled) int bitrate_mode; int target_bitrate; @@ -314,6 +313,7 @@ typedef struct tav_encoder_s { ZSTD_CCtx *zstd_ctx; void *compressed_buffer; size_t compressed_buffer_size; + int zstd_level; // Zstd compression level (default: 15) // OPTIMISATION: Pre-allocated buffers to avoid malloc/free per tile int16_t *reusable_quantised_y; @@ -601,6 +601,8 @@ static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output); // Show usage information static void show_usage(const char *program_name) { + int qtsize = sizeof(MP2_RATE_TABLE) / sizeof(int); + printf("TAV DWT-based Video Encoder\n"); printf("Usage: %s [options] -i input.mp4 -o output.mv3\n\n", program_name); printf("Options:\n"); @@ -608,42 +610,43 @@ static void show_usage(const char *program_name) { printf(" -o, --output FILE Output video file (use '-' for stdout)\n"); printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT); printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n"); - printf(" -q, --quality N Quality level 0-5 (default: 2)\n"); - printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 1-255 for each channel (1: lossless, 255: potato)\n"); + printf(" -q, --quality N Quality level 0-5 (default: 3)\n"); + printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 0-255 for each channel (0: lossless, 255: potato)\n"); printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n"); printf(" -c, --channel-layout N Channel layout: 0=Y-Co-Cg, 1=Y-Co-Cg-A, 2=Y-only, 3=Y-A, 4=Co-Cg, 5=Co-Cg-A (default: 0)\n"); printf(" -a, --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n"); printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n"); printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); - printf(" --fontrom-lo FILE Low font ROM file (max 1920 bytes) for internationalized subtitles\n"); - printf(" --fontrom-hi FILE High font ROM file (max 1920 bytes) for internationalized subtitles\n"); + printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n"); + printf(" --fontrom-hi FILE High font ROM file for internationalised subtitles\n"); printf(" -v, --verbose Verbose output\n"); printf(" -t, --test Test mode: generate solid colour frames\n"); - printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n"); - printf(" --intra-only Disable delta encoding (less noisy picture at the cost of larger file)\n"); + printf(" --lossless Lossless mode (-q %d -Q1,1,1 -w 0 --intra-only --no-perceptual-tuning --no-dead-zone --arate 384)\n", qtsize); + printf(" --intra-only Disable delta encoding\n"); printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n"); printf(" --no-perceptual-tuning Disable perceptual quantisation\n"); - printf(" --no-dead-zone Disable dead-zone quantization (for comparison/testing)\n"); + printf(" --no-dead-zone Disable dead-zone quantisation (for comparison/testing)\n"); printf(" --encode-limit N Encode only first N frames (useful for testing/analysis)\n"); printf(" --dump-frame N Dump quantised coefficients for frame N (creates .bin files)\n"); - printf(" --wavelet N Wavelet filter: 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n"); + printf(" --wavelet N Wavelet filter: 0=LGT 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar (default: 1)\n"); + printf(" --zstd-level N Zstd compression level 1-22 (default: 19, higher = better compression but slower)\n"); printf(" --help Show this help\n\n"); printf("Audio Rate by Quality:\n "); - for (int i = 0; i < sizeof(MP2_RATE_TABLE) / sizeof(int); i++) { + for (int i = 0; i < qtsize; i++) { printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]); } printf("\n\nQuantiser Value by Quality:\n"); printf(" Y - "); - for (int i = 0; i < 6; i++) { + for (int i = 0; i < qtsize; i++) { printf("%d: Q %d%s(→%d) \t", i, QUALITY_Y[i], QUALITY_Y[i] < 10 ? " " : QUALITY_Y[i] < 100 ? " " : "", QLUT[QUALITY_Y[i]]); } printf("\n Co - "); - for (int i = 0; i < 6; i++) { + for (int i = 0; i < qtsize; i++) { printf("%d: Q %d%s(→%d) \t", i, QUALITY_CO[i], QUALITY_CO[i] < 10 ? " " : QUALITY_CO[i] < 100 ? " " : "", QLUT[QUALITY_CO[i]]); } printf("\n Cg - "); - for (int i = 0; i < 6; i++) { + for (int i = 0; i < qtsize; i++) { printf("%d: Q %d%s(→%d) \t", i, QUALITY_CG[i], QUALITY_CG[i] < 10 ? " " : QUALITY_CG[i] < 100 ? " " : "", QLUT[QUALITY_CG[i]]); } printf("\n\nVideo Size Keywords:"); @@ -657,7 +660,6 @@ static void show_usage(const char *program_name) { printf(" - Perceptual quantisation optimised for human visual system (default)\n"); printf(" - Full resolution YCoCg-R/ICtCp colour space\n"); printf(" - Lossless and lossy compression modes\n"); - printf(" - Versions 5/6: Perceptual quantisation, Versions 3/4: Uniform quantisation\n"); printf("\nExamples:\n"); printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name); @@ -689,6 +691,7 @@ static tav_encoder_t* create_encoder(void) { enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Default to Y-Co-Cg enc->audio_bitrate = 0; // 0 = use quality table enc->encode_limit = 0; // Default: no frame limit + enc->zstd_level = 19; // Default Zstd compression level return enc; } @@ -763,7 +766,7 @@ static int initialise_encoder(tav_encoder_t *enc) { enc->previous_coeffs_alpha = malloc(total_coeff_size); enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame - // Initialize bitrate control if in bitrate mode + // Initialise bitrate control if in bitrate mode if (enc->bitrate_mode) { enc->video_rate_bin_capacity = enc->output_fps > 0 ? enc->output_fps : enc->fps; enc->video_rate_bin = calloc(enc->video_rate_bin_capacity, sizeof(size_t)); @@ -1403,8 +1406,8 @@ static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int siz for (int i = 0; i < size; i++) { float quantised_val = coeffs[i] / effective_q; - // Apply dead-zone quantization ONLY to luma channel and finest subbands - // Chroma channels skip dead-zone (already heavily quantized, avoid color banding) + // Apply dead-zone quantisation ONLY to luma channel and finest subbands + // Chroma channels skip dead-zone (already heavily quantised, avoid colour banding) if (dead_zone_threshold > 0.0f && !is_chroma) { int level = get_subband_level(i, width, height, decomp_levels); float level_threshold = 0.0f; @@ -1610,8 +1613,8 @@ static void quantise_dwt_coefficients_perceptual_per_coeff(tav_encoder_t *enc, float effective_q = effective_base_q * weight; float quantised_val = coeffs[i] / effective_q; - // Apply dead-zone quantization ONLY to luma channel and finest subbands - // Chroma channels skip dead-zone (already heavily quantized, avoid color banding) + // Apply dead-zone quantisation ONLY to luma channel and finest subbands + // Chroma channels skip dead-zone (already heavily quantised, avoid colour banding) if (enc->dead_zone_threshold > 0.0f && !is_chroma) { int level = get_subband_level(i, width, height, decomp_levels); float level_threshold = 0.0f; @@ -1967,7 +1970,7 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) // Compress with zstd size_t compressed_size = ZSTD_compress(enc->compressed_buffer, enc->compressed_buffer_size, - uncompressed_buffer, uncompressed_offset, ZSTD_COMPRESSON_LEVEL); + uncompressed_buffer, uncompressed_offset, enc->zstd_level); if (ZSTD_isError(compressed_size)) { fprintf(stderr, "Error: ZSTD compression failed: %s\n", ZSTD_getErrorName(compressed_size)); @@ -2241,7 +2244,7 @@ static void rgba_to_colour_space_frame(tav_encoder_t *enc, const uint8_t *rgba, c1[i] = (float)I; c2[i] = (float)Ct; c3[i] = (float)Cp; - alpha[i] = (float)rgba[i*4+3] / 255.0f; // Normalize alpha to [0,1] + alpha[i] = (float)rgba[i*4+3] / 255.0f; // Normalise alpha to [0,1] } } else { // YCoCg mode with alpha - extract RGB first, then convert @@ -2250,7 +2253,7 @@ static void rgba_to_colour_space_frame(tav_encoder_t *enc, const uint8_t *rgba, temp_rgb[i*3] = rgba[i*4]; // R temp_rgb[i*3+1] = rgba[i*4+1]; // G temp_rgb[i*3+2] = rgba[i*4+2]; // B - alpha[i] = (float)rgba[i*4+3] / 255.0f; // Normalize alpha to [0,1] + alpha[i] = (float)rgba[i*4+3] / 255.0f; // Normalise alpha to [0,1] } rgb_to_ycocg(temp_rgb, c1, c2, c3, width, height); free(temp_rgb); @@ -3277,7 +3280,7 @@ static int detect_scene_change(tav_encoder_t *enc) { for (int x = 0; x < enc->width; x += 2) { int offset = (y * enc->width + x) * 3; - // Calculate color difference + // Calculate colour difference int r_diff = abs(enc->current_frame_rgb[offset] - comparison_buffer[offset]); int g_diff = abs(enc->current_frame_rgb[offset + 1] - comparison_buffer[offset + 1]); int b_diff = abs(enc->current_frame_rgb[offset + 2] - comparison_buffer[offset + 2]); @@ -3345,6 +3348,7 @@ int main(int argc, char *argv[]) { {"dump-frame", required_argument, 0, 1009}, {"fontrom-lo", required_argument, 0, 1011}, {"fontrom-hi", required_argument, 0, 1012}, + {"zstd-level", required_argument, 0, 1014}, {"help", no_argument, 0, '?'}, {0, 0, 0, 0} }; @@ -3366,7 +3370,7 @@ int main(int argc, char *argv[]) { } break; case 'q': - enc->quality_level = CLAMP(atoi(optarg), 0, 5); + enc->quality_level = CLAMP(atoi(optarg), 0, 6); enc->quantiser_y = QUALITY_Y[enc->quality_level]; enc->quantiser_co = QUALITY_CO[enc->quality_level]; enc->quantiser_cg = QUALITY_CG[enc->quality_level]; @@ -3397,7 +3401,9 @@ int main(int argc, char *argv[]) { enc->target_bitrate = bitrate; // Choose initial q-index based on target bitrate - if (bitrate >= 32000) { + if (bitrate >= 64000) { + enc->quality_level = 6; + } else if (bitrate >= 32000) { enc->quality_level = 5; } else if (bitrate >= 16000) { enc->quality_level = 4; @@ -3481,6 +3487,14 @@ int main(int argc, char *argv[]) { case 1012: // --fontrom-hi enc->fontrom_hi_file = strdup(optarg); break; + case 1014: // --zstd-level + enc->zstd_level = atoi(optarg); + if (enc->zstd_level < 1 || enc->zstd_level > 22) { + fprintf(stderr, "Error: Zstd compression level must be between 1 and 22 (got %d)\n", enc->zstd_level); + cleanup_encoder(enc); + return 1; + } + break; case 'a': int bitrate = atoi(optarg); int valid_bitrate = validate_mp2_bitrate(bitrate); @@ -3522,14 +3536,24 @@ int main(int argc, char *argv[]) { } if (enc->lossless) { + enc->quality_level = sizeof(MP2_RATE_TABLE) / sizeof(int); // use maximum quality table to disable anisotropy enc->perceptual_tuning = 0; enc->quantiser_y = 0; // will be resolved to 1 enc->quantiser_co = 0; // ditto enc->quantiser_cg = 0; // do. enc->intra_only = 1; + enc->dead_zone_threshold = 0.0f; enc->audio_bitrate = 384; } + // if user made `-q 6 -Q0,0,0 -w 0 --intra-only --no-perceptual-tuning --arate 384` manually, mark the video as lossless + int qtsize = sizeof(MP2_RATE_TABLE) / sizeof(int); + if (enc->quality_level == qtsize && enc->quantiser_y == 0 && enc->quantiser_co == 0 && enc->quantiser_cg == 0 && + enc->perceptual_tuning == 0 && enc->intra_only == 1 && enc->dead_zone_threshold == 0.0f && enc->audio_bitrate == 384 + ) { + enc->lossless = 1; + } + if ((!enc->input_file && !enc->test_mode) || !enc->output_file) { fprintf(stderr, "Error: Input and output files must be specified\n"); show_usage(argv[0]);