TAV: preset implementation

This commit is contained in:
minjaesong
2025-11-24 17:40:45 +09:00
parent 6132012e74
commit 08bb33bf27
6 changed files with 152 additions and 66 deletions

View File

@@ -17,7 +17,7 @@
#include "decoder_tad.h" // Shared TAD decoder library
#include "tav_avx512.h" // AVX-512 SIMD optimisations
#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512)"
#define DECODER_VENDOR_STRING "Decoder-TAV 20251124 (avx512,presets)"
// TAV format constants
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56"
@@ -95,7 +95,8 @@ typedef struct {
uint8_t encoder_quality;
uint8_t channel_layout;
uint8_t entropy_coder;
uint8_t reserved[2];
uint8_t encoder_preset; // Byte 28: bit 0 = sports, bit 1 = anime
uint8_t reserved;
uint8_t device_orientation;
uint8_t file_role;
} __attribute__((packed)) tav_header_t;
@@ -394,10 +395,20 @@ static inline float tav_grain_triangular_noise(uint32_t rng_val) {
return (u1 + u2) - 1.0f;
}
// Remove grain synthesis from DWT coefficients (decoder subtracts noise)
// Apply grain synthesis from DWT coefficients (decoder subtracts noise)
// This must be called AFTER dequantisation but BEFORE inverse DWT
static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
int decomp_levels, int frame_num, int q_y_global) {
static void apply_grain_synthesis(float *coeffs, int width, int height,
int decomp_levels, int frame_num, int q_y_global, uint8_t encoder_preset, int no_grain_synthesis) {
// Command-line override: disable grain synthesis
if (no_grain_synthesis) {
return; // Skip grain synthesis entirely
}
// Anime preset: completely disable grain synthesis
if (encoder_preset & 0x02) {
return; // Skip grain synthesis entirely
}
dwt_subband_info_t subbands[32];
const int subband_count = calculate_subband_layout(width, height, decomp_levels, subbands);
@@ -412,7 +423,7 @@ static void remove_grain_synthesis_decoder(float *coeffs, int width, int height,
// Calculate band index for RNG (matches Kotlin: level + subbandType * 31 + 16777619)
uint32_t band = subband->level + subband->subband_type * 31 + 16777619;
// Remove noise from each coefficient in this subband
// Apply noise from each coefficient in this subband
for (int i = 0; i < subband->coeff_count; i++) {
const int idx = subband->coeff_start + i;
if (idx < width * height) {
@@ -1226,14 +1237,14 @@ static int get_temporal_subband_level(int frame_idx, int num_frames, int tempora
}
// Calculate temporal quantiser scale for a given temporal subband level
static float get_temporal_quantiser_scale(int temporal_level) {
static float get_temporal_quantiser_scale(uint8_t encoder_preset, int temporal_level) {
// Uses exponential scaling: 2^(BETA × level^KAPPA)
// With BETA=0.6, KAPPA=1.14:
// - Level 0 (tLL): 2^0.0 = 1.00
// - Level 1 (tH): 2^0.68 = 1.61
// - Level 2 (tHH): 2^1.29 = 2.45
const float BETA = 0.6f; // Temporal scaling exponent
const float KAPPA = 1.14f;
const float BETA = (encoder_preset & 0x01) ? 0.0f : 0.6f;
const float KAPPA = (encoder_preset & 0x01) ? 1.0f : 1.14f;
return powf(2.0f, BETA * powf(temporal_level, KAPPA));
}
@@ -1812,6 +1823,7 @@ typedef struct {
int frame_size;
int is_monoblock; // True if version 3-6 (single tile mode)
int temporal_motion_coder; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (extracted from version)
int no_grain_synthesis; // Command-line flag: disable grain synthesis
// Screen masking (letterbox/pillarbox) - array of geometry changes
screen_mask_entry_t *screen_masks;
@@ -2023,10 +2035,11 @@ static int extract_audio_to_wav(const char *input_file, const char *wav_file, in
// Decoder Initialisation and Cleanup
//=============================================================================
static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file) {
static tav_decoder_t* tav_decoder_init(const char *input_file, const char *output_file, const char *audio_file, int no_grain_synthesis) {
tav_decoder_t *decoder = calloc(1, sizeof(tav_decoder_t));
if (!decoder) return NULL;
decoder->no_grain_synthesis = no_grain_synthesis;
decoder->input_fp = fopen(input_file, "rb");
if (!decoder->input_fp) {
free(decoder);
@@ -2511,8 +2524,9 @@ static int decode_i_or_p_frame(tav_decoder_t *decoder, uint8_t packet_type, uint
// Remove grain synthesis from Y channel (must happen after dequantisation, before inverse DWT)
// Phase 2: Use decoding dimensions and temporary buffer
remove_grain_synthesis_decoder(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y);
apply_grain_synthesis(temp_dwt_y, decoder->decoding_width, decoder->decoding_height,
decoder->header.decomp_levels, decoder->frame_count, decoder->header.quantiser_y,
decoder->header.encoder_preset, decoder->no_grain_synthesis);
// Debug: Check LL band AFTER grain removal
// if (decoder->frame_count == 32) {
@@ -2712,10 +2726,11 @@ static void print_usage(const char *prog) {
printf("Version: %s\n\n", DECODER_VENDOR_STRING);
printf("Usage: %s -i input.tav -o output.mkv\n\n", prog);
printf("Options:\n");
printf(" -i <file> Input TAV file\n");
printf(" -o <file> Output MKV file (optional, auto-generated from input)\n");
printf(" -v Verbose output\n");
printf(" -h, --help Show this help\n\n");
printf(" -i <file> Input TAV file\n");
printf(" -o <file> Output MKV file (optional, auto-generated from input)\n");
printf(" -v Verbose output\n");
printf(" --no-grain-synthesis Disable grain synthesis (override encoder preset)\n");
printf(" -h, --help Show this help\n\n");
printf("Supported features (matches TSVM decoder):\n");
printf(" - I-frames and P-frames (delta mode)\n");
printf(" - GOP unified 3D DWT (temporal compression)\n");
@@ -2740,9 +2755,11 @@ int main(int argc, char *argv[]) {
char *input_file = NULL;
char *output_file = NULL;
int verbose = 0;
int no_grain_synthesis = 0;
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"no-grain-synthesis", no_argument, 0, 1000},
{0, 0, 0, 0}
};
@@ -2761,6 +2778,12 @@ int main(int argc, char *argv[]) {
case 'h':
print_usage(argv[0]);
return 0;
case 1000: // --no-grain-synthesis
no_grain_synthesis = 1;
if (verbose) {
printf("Grain synthesis disabled\n");
}
break;
default:
print_usage(argv[0]);
return 1;
@@ -2819,7 +2842,7 @@ int main(int argc, char *argv[]) {
}
// Pass 2: Decode video with audio file
tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file);
tav_decoder_t *decoder = tav_decoder_init(input_file, output_file, temp_audio_file, no_grain_synthesis);
if (!decoder) {
fprintf(stderr, "Failed to initialise decoder\n");
unlink(temp_audio_file); // Clean up temp file
@@ -3126,7 +3149,7 @@ int main(int argc, char *argv[]) {
// EZBC mode with perceptual quantisation: coefficients are normalised
// Need to dequantise using perceptual weights (same as twobit-map mode)
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level);
// FIX: Use QLUT to convert header quantiser indices to actual values
const float base_q_y = roundf(QLUT[decoder->header.quantiser_y] * temporal_scale);
@@ -3160,7 +3183,7 @@ int main(int argc, char *argv[]) {
} else if (!is_ezbc) {
// Normal mode: multiply by quantiser
const int temporal_level = get_temporal_subband_level(t, gop_size, temporal_levels);
const float temporal_scale = get_temporal_quantiser_scale(temporal_level);
const float temporal_scale = get_temporal_quantiser_scale(decoder->header.encoder_preset, temporal_level);
// CRITICAL: Must ROUND temporal quantiser to match encoder's roundf() behavior
// FIX: Use QLUT to convert header quantiser indices to actual values
@@ -3206,9 +3229,10 @@ int main(int argc, char *argv[]) {
// Phase 2: Use GOP dimensions (may be cropped) for grain removal
for (int t = 0; t < gop_size; t++) {
remove_grain_synthesis_decoder(gop_y[t], gop_width, gop_height,
apply_grain_synthesis(gop_y[t], gop_width, gop_height,
decoder->header.decomp_levels, decoder->frame_count + t,
decoder->header.quantiser_y);
decoder->header.quantiser_y, decoder->header.encoder_preset,
decoder->no_grain_synthesis);
}
// Apply inverse 3D DWT (spatial + temporal)