mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAV: base code for adding psychovisual model
This commit is contained in:
@@ -22,12 +22,14 @@
|
||||
|
||||
// TSVM Advanced Video (TAV) format constants
|
||||
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV"
|
||||
// TAV version - dynamic based on colour space mode
|
||||
// Version 3: YCoCg-R monoblock (default)
|
||||
// Version 4: ICtCp monoblock (--ictcp flag)
|
||||
// Legacy versions (4-tile mode, code preserved but not accessible):
|
||||
// Version 1: YCoCg-R 4-tile
|
||||
// Version 2: ICtCp 4-tile
|
||||
// TAV version - dynamic based on colour space and perceptual tuning
|
||||
// Version 5: YCoCg-R monoblock with perceptual quantization (default)
|
||||
// Version 6: ICtCp monoblock with perceptual quantization (--ictcp flag)
|
||||
// Legacy versions (uniform quantization):
|
||||
// Version 3: YCoCg-R monoblock uniform (--no-perceptual-tuning)
|
||||
// Version 4: ICtCp monoblock uniform (--ictcp --no-perceptual-tuning)
|
||||
// Version 1: YCoCg-R 4-tile (legacy, code preserved but not accessible)
|
||||
// Version 2: ICtCp 4-tile (legacy, code preserved but not accessible)
|
||||
|
||||
// Tile encoding modes (280x224 tiles)
|
||||
#define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference)
|
||||
@@ -142,6 +144,9 @@ static int validate_mp2_bitrate(int bitrate) {
|
||||
static const int QUALITY_Y[] = {60, 42, 25, 12, 6, 2};
|
||||
static const int QUALITY_CO[] = {120, 90, 60, 30, 15, 3};
|
||||
static const int QUALITY_CG[] = {240, 180, 120, 60, 30, 5};
|
||||
//static const int QUALITY_Y[] = { 25, 12, 6, 3, 2, 1};
|
||||
//static const int QUALITY_CO[] = {60, 30, 15, 7, 5, 2};
|
||||
//static const int QUALITY_CG[] = {120, 60, 30, 15, 10, 4};
|
||||
|
||||
// DWT coefficient structure for each subband
|
||||
typedef struct {
|
||||
@@ -157,6 +162,15 @@ typedef struct {
|
||||
int tile_x, tile_y;
|
||||
} dwt_tile_t;
|
||||
|
||||
// DWT subband information for perceptual quantization
|
||||
typedef struct {
|
||||
int level; // Decomposition level (1 to enc->decomp_levels)
|
||||
int subband_type; // 0=LL, 1=LH, 2=HL, 3=HH
|
||||
int coeff_start; // Starting index in linear coefficient array
|
||||
int coeff_count; // Number of coefficients in this subband
|
||||
float perceptual_weight; // Quantization multiplier for this subband
|
||||
} dwt_subband_info_t;
|
||||
|
||||
// TAV encoder structure
|
||||
typedef struct {
|
||||
// Input/output files
|
||||
@@ -196,6 +210,7 @@ typedef struct {
|
||||
int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp colour space
|
||||
int intra_only; // Force all tiles to use INTRA mode (disable delta encoding)
|
||||
int monoblock; // Single DWT tile mode (encode entire frame as one tile)
|
||||
int perceptual_tuning; // 1 = perceptual quantization (default), 0 = uniform quantization
|
||||
|
||||
// Frame buffers - ping-pong implementation
|
||||
uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous
|
||||
@@ -247,6 +262,7 @@ typedef struct {
|
||||
|
||||
// Progress tracking
|
||||
struct timeval start_time;
|
||||
int encode_limit; // Maximum number of frames to encode (0 = no limit)
|
||||
|
||||
} tav_encoder_t;
|
||||
|
||||
@@ -331,6 +347,8 @@ static void show_usage(const char *program_name) {
|
||||
printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n");
|
||||
printf(" --delta Enable delta encoding (improved compression but noisy picture)\n");
|
||||
printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
|
||||
printf(" --no-perceptual-tuning Disable perceptual quantization (uniform quantization like versions 3/4)\n");
|
||||
printf(" --encode-limit N Encode only first N frames (useful for testing/analysis)\n");
|
||||
printf(" --help Show this help\n\n");
|
||||
|
||||
printf("Audio Rate by Quality:\n ");
|
||||
@@ -358,8 +376,10 @@ static void show_usage(const char *program_name) {
|
||||
printf("\n\n");
|
||||
printf("Features:\n");
|
||||
printf(" - Single DWT tile (monoblock) encoding for optimal quality\n");
|
||||
printf(" - Perceptual quantization optimized for human visual system (default)\n");
|
||||
printf(" - Full resolution YCoCg-R/ICtCp colour space\n");
|
||||
printf(" - Lossless and lossy compression modes\n");
|
||||
printf(" - Versions 5/6: Perceptual quantization, Versions 3/4: Uniform quantization\n");
|
||||
|
||||
printf("\nExamples:\n");
|
||||
printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name);
|
||||
@@ -386,7 +406,9 @@ static tav_encoder_t* create_encoder(void) {
|
||||
enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY];
|
||||
enc->intra_only = 1;
|
||||
enc->monoblock = 1; // Default to monoblock mode
|
||||
enc->perceptual_tuning = 1; // Default to perceptual quantization (versions 5/6)
|
||||
enc->audio_bitrate = 0; // 0 = use quality table
|
||||
enc->encode_limit = 0; // Default: no frame limit
|
||||
|
||||
return enc;
|
||||
}
|
||||
@@ -775,6 +797,143 @@ static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int siz
|
||||
}
|
||||
}
|
||||
|
||||
// Get perceptual weight for specific subband - Data-driven model based on coefficient variance analysis
|
||||
static float get_perceptual_weight(int level, int subband_type, int is_chroma, int max_levels) {
|
||||
// TEMPORARY: Test with uniform weights to verify linear layout works correctly
|
||||
return 1.0f;
|
||||
|
||||
if (!is_chroma) {
|
||||
// Luma strategy based on statistical variance analysis from real video data
|
||||
if (subband_type == 0) { // LL
|
||||
// LL6 has extremely high variance (Range=8026.7) but contains most image energy
|
||||
// Moderate quantization appropriate due to high variance tolerance
|
||||
return 1.1f;
|
||||
} else if (subband_type == 1) { // LH (horizontal detail)
|
||||
// Data-driven weights based on observed coefficient patterns
|
||||
if (level >= 6) return 0.7f; // LH6: significant coefficients (Range=243.1)
|
||||
else if (level == 5) return 0.8f; // LH5: moderate coefficients (Range=264.3)
|
||||
else if (level == 4) return 1.0f; // LH4: small coefficients (Range=50.8)
|
||||
else if (level == 3) return 1.4f; // LH3: sparse but large outliers (Range=11909.1)
|
||||
else if (level == 2) return 1.6f; // LH2: fewer coefficients (Range=6720.2)
|
||||
else return 1.9f; // LH1: smallest detail (Range=1606.3)
|
||||
} else if (subband_type == 2) { // HL (vertical detail)
|
||||
// Similar pattern to LH but slightly different variance
|
||||
if (level >= 6) return 0.8f; // HL6: moderate coefficients (Range=181.6)
|
||||
else if (level == 5) return 0.9f; // HL5: small coefficients (Range=80.4)
|
||||
else if (level == 4) return 1.2f; // HL4: surprising large outliers (Range=9737.9)
|
||||
else if (level == 3) return 1.3f; // HL3: very large outliers (Range=13698.2)
|
||||
else if (level == 2) return 1.5f; // HL2: moderate range (Range=2099.4)
|
||||
else return 1.8f; // HL1: small coefficients (Range=851.1)
|
||||
} else { // HH (diagonal detail)
|
||||
// HH bands generally have lower energy but important for texture
|
||||
if (level >= 6) return 1.0f; // HH6: some significant coefficients (Range=95.8)
|
||||
else if (level == 5) return 1.1f; // HH5: small coefficients (Range=75.9)
|
||||
else if (level == 4) return 1.3f; // HH4: moderate range (Range=89.8)
|
||||
else if (level == 3) return 1.5f; // HH3: large outliers (Range=11611.2)
|
||||
else if (level == 2) return 1.8f; // HH2: moderate range (Range=2499.2)
|
||||
else return 2.1f; // HH1: smallest coefficients (Range=761.6)
|
||||
}
|
||||
} else {
|
||||
// Chroma strategy - apply 0.85x reduction to luma weights for color preservation
|
||||
float luma_weight = get_perceptual_weight(level, subband_type, 0, max_levels);
|
||||
return luma_weight * 0.85f;
|
||||
}
|
||||
}
|
||||
|
||||
// Determine perceptual weight for coefficient at linear position (matches actual DWT layout)
|
||||
static float get_perceptual_weight_for_position(int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
||||
// For now, return uniform weight while we figure out the actual DWT layout
|
||||
// TODO: Map linear_idx to correct DWT subband and return appropriate weight
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
// Apply perceptual quantization per-coefficient (same loop as uniform but with spatial weights)
|
||||
static void quantise_dwt_coefficients_perceptual_per_coeff(float *coeffs, int16_t *quantised, int size,
|
||||
int base_quantizer, int width, int height,
|
||||
int decomp_levels, int is_chroma, int frame_count) {
|
||||
// EXACTLY the same approach as uniform quantization but apply weight per coefficient
|
||||
float effective_base_q = base_quantizer;
|
||||
effective_base_q = FCLAMP(effective_base_q, 1.0f, 255.0f);
|
||||
|
||||
// Debug coefficient analysis
|
||||
if (frame_count == 1 || frame_count == 120) {
|
||||
int nonzero = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
// Apply perceptual weight based on coefficient's position in DWT layout
|
||||
float weight = get_perceptual_weight_for_position(i, width, height, decomp_levels, is_chroma);
|
||||
float effective_q = effective_base_q * weight;
|
||||
float quantised_val = coeffs[i] / effective_q;
|
||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||||
if (quantised[i] != 0) nonzero++;
|
||||
}
|
||||
printf("DEBUG: Frame 120 - %s channel: %d/%d nonzero coeffs after perceptual per-coeff quantization\n",
|
||||
is_chroma ? "Chroma" : "Luma", nonzero, size);
|
||||
} else {
|
||||
// Normal quantization loop
|
||||
for (int i = 0; i < size; i++) {
|
||||
// Apply perceptual weight based on coefficient's position in DWT layout
|
||||
float weight = get_perceptual_weight_for_position(i, width, height, decomp_levels, is_chroma);
|
||||
float effective_q = effective_base_q * weight;
|
||||
float quantised_val = coeffs[i] / effective_q;
|
||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Convert 2D spatial DWT layout to linear subband layout (for decoder compatibility)
|
||||
static void convert_2d_to_linear_layout(const int16_t *spatial_2d, int16_t *linear_subbands,
|
||||
int width, int height, int decomp_levels) {
|
||||
int linear_offset = 0;
|
||||
|
||||
// First: LL subband (top-left corner at finest decomposition level)
|
||||
int ll_width = width >> decomp_levels;
|
||||
int ll_height = height >> decomp_levels;
|
||||
for (int y = 0; y < ll_height; y++) {
|
||||
for (int x = 0; x < ll_width; x++) {
|
||||
int spatial_idx = y * width + x;
|
||||
linear_subbands[linear_offset++] = spatial_2d[spatial_idx];
|
||||
}
|
||||
}
|
||||
|
||||
// Then: LH, HL, HH subbands for each level from max down to 1
|
||||
for (int level = decomp_levels; level >= 1; level--) {
|
||||
int level_width = width >> (decomp_levels - level + 1);
|
||||
int level_height = height >> (decomp_levels - level + 1);
|
||||
|
||||
// LH subband (top-right quadrant)
|
||||
for (int y = 0; y < level_height; y++) {
|
||||
for (int x = level_width; x < level_width * 2; x++) {
|
||||
if (y < height && x < width) {
|
||||
int spatial_idx = y * width + x;
|
||||
linear_subbands[linear_offset++] = spatial_2d[spatial_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// HL subband (bottom-left quadrant)
|
||||
for (int y = level_height; y < level_height * 2; y++) {
|
||||
for (int x = 0; x < level_width; x++) {
|
||||
if (y < height && x < width) {
|
||||
int spatial_idx = y * width + x;
|
||||
linear_subbands[linear_offset++] = spatial_2d[spatial_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// HH subband (bottom-right quadrant)
|
||||
for (int y = level_height; y < level_height * 2; y++) {
|
||||
for (int x = level_width; x < level_width * 2; x++) {
|
||||
if (y < height && x < width) {
|
||||
int spatial_idx = y * width + x;
|
||||
linear_subbands[linear_offset++] = spatial_2d[spatial_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Serialise tile data for compression
|
||||
static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
const float *tile_y_data, const float *tile_co_data, const float *tile_cg_data,
|
||||
@@ -820,9 +979,17 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
|
||||
if (mode == TAV_MODE_INTRA) {
|
||||
// INTRA mode: quantise coefficients directly and store for future reference
|
||||
quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, this_frame_qY);
|
||||
quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, this_frame_qCo);
|
||||
quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg);
|
||||
if (enc->perceptual_tuning) {
|
||||
// Perceptual quantization: EXACTLY like uniform but with per-coefficient weights
|
||||
quantise_dwt_coefficients_perceptual_per_coeff((float*)tile_y_data, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, enc->frame_count);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff((float*)tile_co_data, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff((float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, enc->frame_count);
|
||||
} else {
|
||||
// Legacy uniform quantization
|
||||
quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, this_frame_qY);
|
||||
quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, this_frame_qCo);
|
||||
quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, this_frame_qCg);
|
||||
}
|
||||
|
||||
// Store current coefficients for future delta reference
|
||||
int tile_idx = tile_y * enc->tiles_x + tile_x;
|
||||
@@ -851,20 +1018,121 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
delta_cg[i] = tile_cg_data[i] - prev_cg[i];
|
||||
}
|
||||
|
||||
// Quantise the deltas
|
||||
quantise_dwt_coefficients(delta_y, quantised_y, tile_size, this_frame_qY);
|
||||
quantise_dwt_coefficients(delta_co, quantised_co, tile_size, this_frame_qCo);
|
||||
quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, this_frame_qCg);
|
||||
// Quantise the deltas with per-coefficient perceptual quantization
|
||||
if (enc->perceptual_tuning) {
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(delta_y, quantised_y, tile_size, this_frame_qY, enc->width, enc->height, enc->decomp_levels, 0, 0);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(delta_co, quantised_co, tile_size, this_frame_qCo, enc->width, enc->height, enc->decomp_levels, 1, 0);
|
||||
quantise_dwt_coefficients_perceptual_per_coeff(delta_cg, quantised_cg, tile_size, this_frame_qCg, enc->width, enc->height, enc->decomp_levels, 1, 0);
|
||||
} else {
|
||||
// Legacy uniform delta quantization
|
||||
quantise_dwt_coefficients(delta_y, quantised_y, tile_size, this_frame_qY);
|
||||
quantise_dwt_coefficients(delta_co, quantised_co, tile_size, this_frame_qCo);
|
||||
quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, this_frame_qCg);
|
||||
}
|
||||
|
||||
// Reconstruct coefficients like decoder will (previous + dequantised_delta)
|
||||
for (int i = 0; i < tile_size; i++) {
|
||||
float dequant_delta_y = (float)quantised_y[i] * this_frame_qY;
|
||||
float dequant_delta_co = (float)quantised_co[i] * this_frame_qCo;
|
||||
float dequant_delta_cg = (float)quantised_cg[i] * this_frame_qCg;
|
||||
|
||||
prev_y[i] = prev_y[i] + dequant_delta_y;
|
||||
prev_co[i] = prev_co[i] + dequant_delta_co;
|
||||
prev_cg[i] = prev_cg[i] + dequant_delta_cg;
|
||||
if (enc->perceptual_tuning) {
|
||||
// Apply 2D perceptual dequantization using same logic as quantization
|
||||
|
||||
// First, apply uniform dequantization baseline
|
||||
for (int i = 0; i < tile_size; i++) {
|
||||
prev_y[i] = prev_y[i] + ((float)quantised_y[i] * (float)this_frame_qY);
|
||||
prev_co[i] = prev_co[i] + ((float)quantised_co[i] * (float)this_frame_qCo);
|
||||
prev_cg[i] = prev_cg[i] + ((float)quantised_cg[i] * (float)this_frame_qCg);
|
||||
}
|
||||
|
||||
// Then apply perceptual correction by re-dequantizing specific subbands
|
||||
for (int level = 1; level <= enc->decomp_levels; level++) {
|
||||
int level_width = enc->width >> (enc->decomp_levels - level + 1);
|
||||
int level_height = enc->height >> (enc->decomp_levels - level + 1);
|
||||
|
||||
// Skip if subband is too small
|
||||
if (level_width < 1 || level_height < 1) continue;
|
||||
|
||||
// Get perceptual weights for this level
|
||||
float lh_weight_y = get_perceptual_weight(level, 1, 0, enc->decomp_levels);
|
||||
float hl_weight_y = get_perceptual_weight(level, 2, 0, enc->decomp_levels);
|
||||
float hh_weight_y = get_perceptual_weight(level, 3, 0, enc->decomp_levels);
|
||||
float lh_weight_co = get_perceptual_weight(level, 1, 1, enc->decomp_levels);
|
||||
float hl_weight_co = get_perceptual_weight(level, 2, 1, enc->decomp_levels);
|
||||
float hh_weight_co = get_perceptual_weight(level, 3, 1, enc->decomp_levels);
|
||||
|
||||
// Correct LH subband (top-right quadrant)
|
||||
for (int y = 0; y < level_height; y++) {
|
||||
for (int x = level_width; x < level_width * 2; x++) {
|
||||
if (y < enc->height && x < enc->width) {
|
||||
int idx = y * enc->width + x;
|
||||
// Remove uniform dequantization and apply perceptual
|
||||
prev_y[idx] -= ((float)quantised_y[idx] * (float)this_frame_qY);
|
||||
prev_y[idx] += ((float)quantised_y[idx] * ((float)this_frame_qY * lh_weight_y));
|
||||
prev_co[idx] -= ((float)quantised_co[idx] * (float)this_frame_qCo);
|
||||
prev_co[idx] += ((float)quantised_co[idx] * ((float)this_frame_qCo * lh_weight_co));
|
||||
prev_cg[idx] -= ((float)quantised_cg[idx] * (float)this_frame_qCg);
|
||||
prev_cg[idx] += ((float)quantised_cg[idx] * ((float)this_frame_qCg * lh_weight_co));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Correct HL subband (bottom-left quadrant)
|
||||
for (int y = level_height; y < level_height * 2; y++) {
|
||||
for (int x = 0; x < level_width; x++) {
|
||||
if (y < enc->height && x < enc->width) {
|
||||
int idx = y * enc->width + x;
|
||||
prev_y[idx] -= ((float)quantised_y[idx] * (float)this_frame_qY);
|
||||
prev_y[idx] += ((float)quantised_y[idx] * ((float)this_frame_qY * hl_weight_y));
|
||||
prev_co[idx] -= ((float)quantised_co[idx] * (float)this_frame_qCo);
|
||||
prev_co[idx] += ((float)quantised_co[idx] * ((float)this_frame_qCo * hl_weight_co));
|
||||
prev_cg[idx] -= ((float)quantised_cg[idx] * (float)this_frame_qCg);
|
||||
prev_cg[idx] += ((float)quantised_cg[idx] * ((float)this_frame_qCg * hl_weight_co));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Correct HH subband (bottom-right quadrant)
|
||||
for (int y = level_height; y < level_height * 2; y++) {
|
||||
for (int x = level_width; x < level_width * 2; x++) {
|
||||
if (y < enc->height && x < enc->width) {
|
||||
int idx = y * enc->width + x;
|
||||
prev_y[idx] -= ((float)quantised_y[idx] * (float)this_frame_qY);
|
||||
prev_y[idx] += ((float)quantised_y[idx] * ((float)this_frame_qY * hh_weight_y));
|
||||
prev_co[idx] -= ((float)quantised_co[idx] * (float)this_frame_qCo);
|
||||
prev_co[idx] += ((float)quantised_co[idx] * ((float)this_frame_qCo * hh_weight_co));
|
||||
prev_cg[idx] -= ((float)quantised_cg[idx] * (float)this_frame_qCg);
|
||||
prev_cg[idx] += ((float)quantised_cg[idx] * ((float)this_frame_qCg * hh_weight_co));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, correct LL subband (top-left corner at finest level)
|
||||
int ll_width = enc->width >> enc->decomp_levels;
|
||||
int ll_height = enc->height >> enc->decomp_levels;
|
||||
float ll_weight_y = get_perceptual_weight(enc->decomp_levels, 0, 0, enc->decomp_levels);
|
||||
float ll_weight_co = get_perceptual_weight(enc->decomp_levels, 0, 1, enc->decomp_levels);
|
||||
for (int y = 0; y < ll_height; y++) {
|
||||
for (int x = 0; x < ll_width; x++) {
|
||||
if (y < enc->height && x < enc->width) {
|
||||
int idx = y * enc->width + x;
|
||||
prev_y[idx] -= ((float)quantised_y[idx] * (float)this_frame_qY);
|
||||
prev_y[idx] += ((float)quantised_y[idx] * ((float)this_frame_qY * ll_weight_y));
|
||||
prev_co[idx] -= ((float)quantised_co[idx] * (float)this_frame_qCo);
|
||||
prev_co[idx] += ((float)quantised_co[idx] * ((float)this_frame_qCo * ll_weight_co));
|
||||
prev_cg[idx] -= ((float)quantised_cg[idx] * (float)this_frame_qCg);
|
||||
prev_cg[idx] += ((float)quantised_cg[idx] * ((float)this_frame_qCg * ll_weight_co));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Legacy uniform dequantization
|
||||
for (int i = 0; i < tile_size; i++) {
|
||||
float dequant_delta_y = (float)quantised_y[i] * this_frame_qY;
|
||||
float dequant_delta_co = (float)quantised_co[i] * this_frame_qCo;
|
||||
float dequant_delta_cg = (float)quantised_cg[i] * this_frame_qCg;
|
||||
|
||||
prev_y[i] = prev_y[i] + dequant_delta_y;
|
||||
prev_co[i] = prev_co[i] + dequant_delta_co;
|
||||
prev_cg[i] = prev_cg[i] + dequant_delta_cg;
|
||||
}
|
||||
}
|
||||
|
||||
free(delta_y);
|
||||
@@ -881,7 +1149,7 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
printf("\n");
|
||||
}*/
|
||||
|
||||
// Write quantised coefficients
|
||||
// Write quantised coefficients (both uniform and perceptual use same linear layout)
|
||||
memcpy(buffer + offset, quantised_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
||||
memcpy(buffer + offset, quantised_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
||||
memcpy(buffer + offset, quantised_cg, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
|
||||
@@ -950,6 +1218,19 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
||||
printf("\n");
|
||||
}*/
|
||||
|
||||
// Debug: Check Y data before DWT transform
|
||||
if (enc->frame_count == 120 && enc->verbose) {
|
||||
float max_y_before = 0.0f;
|
||||
int nonzero_before = 0;
|
||||
int total_pixels = enc->monoblock ? (enc->width * enc->height) : (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y);
|
||||
for (int i = 0; i < total_pixels; i++) {
|
||||
float abs_val = fabsf(tile_y_data[i]);
|
||||
if (abs_val > max_y_before) max_y_before = abs_val;
|
||||
if (abs_val > 0.1f) nonzero_before++;
|
||||
}
|
||||
printf("DEBUG: Y data before DWT: max=%.2f, nonzero=%d/%d\n", max_y_before, nonzero_before, total_pixels);
|
||||
}
|
||||
|
||||
// Apply DWT transform to each channel
|
||||
if (enc->monoblock) {
|
||||
// Monoblock mode: transform entire frame
|
||||
@@ -962,6 +1243,16 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
||||
dwt_2d_forward_padded(tile_co_data, enc->decomp_levels, enc->wavelet_filter);
|
||||
dwt_2d_forward_padded(tile_cg_data, enc->decomp_levels, enc->wavelet_filter);
|
||||
}
|
||||
|
||||
// Debug: Check Y data after DWT transform for high-frequency content
|
||||
if (enc->frame_count == 120 && enc->verbose) {
|
||||
printf("DEBUG: Y data after DWT (some high-freq samples): ");
|
||||
int sample_indices[] = {47034, 47035, 47036, 47037, 47038}; // HH1 start + some samples
|
||||
for (int i = 0; i < 5; i++) {
|
||||
printf("%.3f ", tile_y_data[sample_indices[i]]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Serialise tile
|
||||
size_t tile_size = serialise_tile_data(enc, tile_x, tile_y,
|
||||
@@ -1245,12 +1536,16 @@ static int write_tav_header(tav_encoder_t *enc) {
|
||||
// Magic number
|
||||
fwrite(TAV_MAGIC, 1, 8, enc->output_fp);
|
||||
|
||||
// Version (dynamic based on colour space and monoblock mode)
|
||||
// Version (dynamic based on colour space, monoblock mode, and perceptual tuning)
|
||||
uint8_t version;
|
||||
if (enc->monoblock) {
|
||||
version = enc->ictcp_mode ? 4 : 3; // Version 4 for ICtCp monoblock, 3 for YCoCg-R monoblock
|
||||
if (enc->perceptual_tuning) {
|
||||
version = enc->ictcp_mode ? 6 : 5; // Version 6 for ICtCp perceptual, 5 for YCoCg-R perceptual
|
||||
} else {
|
||||
version = enc->ictcp_mode ? 4 : 3; // Version 4 for ICtCp uniform, 3 for YCoCg-R uniform
|
||||
}
|
||||
} else {
|
||||
version = enc->ictcp_mode ? 2 : 1; // Version 2 for ICtCp, 1 for YCoCg-R
|
||||
version = enc->ictcp_mode ? 2 : 1; // Legacy 4-tile versions
|
||||
}
|
||||
fputc(version, enc->output_fp);
|
||||
|
||||
@@ -2231,6 +2526,8 @@ int main(int argc, char *argv[]) {
|
||||
{"lossless", no_argument, 0, 1000},
|
||||
{"delta", no_argument, 0, 1006},
|
||||
{"ictcp", no_argument, 0, 1005},
|
||||
{"no-perceptual-tuning", no_argument, 0, 1007},
|
||||
{"encode-limit", required_argument, 0, 1008},
|
||||
{"help", no_argument, 0, '?'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
@@ -2301,6 +2598,17 @@ int main(int argc, char *argv[]) {
|
||||
case 1006: // --intra-only
|
||||
enc->intra_only = 0;
|
||||
break;
|
||||
case 1007: // --no-perceptual-tuning
|
||||
enc->perceptual_tuning = 0;
|
||||
break;
|
||||
case 1008: // --encode-limit
|
||||
enc->encode_limit = atoi(optarg);
|
||||
if (enc->encode_limit < 0) {
|
||||
fprintf(stderr, "Error: Invalid encode limit: %d\n", enc->encode_limit);
|
||||
cleanup_encoder(enc);
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 1400: // --arate
|
||||
{
|
||||
int bitrate = atoi(optarg);
|
||||
@@ -2353,10 +2661,19 @@ int main(int argc, char *argv[]) {
|
||||
printf("Wavelet: %s\n", enc->wavelet_filter ? "9/7 irreversible" : "5/3 reversible");
|
||||
printf("Decomposition levels: %d\n", enc->decomp_levels);
|
||||
printf("Colour space: %s\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
|
||||
printf("Quantization: %s\n", enc->perceptual_tuning ? "Perceptual (HVS-optimized)" : "Uniform (legacy)");
|
||||
if (enc->ictcp_mode) {
|
||||
printf("Quantiser: I=%d, Ct=%d, Cp=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg);
|
||||
printf("Base quantiser: I=%d, Ct=%d, Cp=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg);
|
||||
} else {
|
||||
printf("Quantiser: Y=%d, Co=%d, Cg=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg);
|
||||
printf("Base quantiser: Y=%d, Co=%d, Cg=%d\n", enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg);
|
||||
}
|
||||
if (enc->perceptual_tuning) {
|
||||
printf("Perceptual weights: LL=%.1fx, LH/HL=%.1f-%.1fx, HH=%.1f-%.1fx (varies by level)\n",
|
||||
get_perceptual_weight(enc->decomp_levels, 0, 0, enc->decomp_levels),
|
||||
get_perceptual_weight(enc->decomp_levels, 1, 0, enc->decomp_levels),
|
||||
get_perceptual_weight(1, 1, 0, enc->decomp_levels),
|
||||
get_perceptual_weight(enc->decomp_levels, 3, 0, enc->decomp_levels),
|
||||
get_perceptual_weight(1, 3, 0, enc->decomp_levels));
|
||||
}
|
||||
|
||||
// Open output file
|
||||
@@ -2436,6 +2753,13 @@ int main(int argc, char *argv[]) {
|
||||
int count_pframe = 0;
|
||||
|
||||
while (continue_encoding) {
|
||||
// Check encode limit if specified
|
||||
if (enc->encode_limit > 0 && frame_count >= enc->encode_limit) {
|
||||
printf("Reached encode limit of %d frames, finalizing...\n", enc->encode_limit);
|
||||
continue_encoding = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (enc->test_mode) {
|
||||
// Test mode has a fixed frame count
|
||||
if (frame_count >= enc->total_frames) {
|
||||
|
||||
Reference in New Issue
Block a user