mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAD: psychoacoustically optimised quantisation
This commit is contained in:
@@ -955,7 +955,7 @@ transmission capability, and region-of-interest coding.
|
|||||||
- 7 = Mirrored, Clockwise 270 deg
|
- 7 = Mirrored, Clockwise 270 deg
|
||||||
uint8 File Role
|
uint8 File Role
|
||||||
- 0 = generic
|
- 0 = generic
|
||||||
- 1 = this file is header-only, and UCF payload will be followed (used by seekable movie file)
|
- 1 = this file is header-only, and UCF payload will be followed (used by movie file with chapters)
|
||||||
When header-only file contain video packets, they should be presented as an Intro Movie
|
When header-only file contain video packets, they should be presented as an Intro Movie
|
||||||
before the user-interactable selector (served by the UCF payoad)
|
before the user-interactable selector (served by the UCF payoad)
|
||||||
|
|
||||||
@@ -1256,7 +1256,7 @@ TAV decoder requires new GraphicsJSR223Delegate functions:
|
|||||||
- tavQuantise(): Multi-band quantisation
|
- tavQuantise(): Multi-band quantisation
|
||||||
|
|
||||||
## Audio Support
|
## Audio Support
|
||||||
Reuses existing MP2 audio infrastructure from TEV/MOV formats for compatibility.
|
MP2 frames, raw PCMu8, and TAD formats are supported.
|
||||||
|
|
||||||
## Subtitle Support
|
## Subtitle Support
|
||||||
Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality.
|
Uses same Simple Subtitle Format (SSF) as TEV for text overlay functionality.
|
||||||
|
|||||||
@@ -19,20 +19,32 @@
|
|||||||
static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};
|
static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};
|
||||||
|
|
||||||
// Base quantiser weight table (10 subbands: LL + 9 H bands)
|
// Base quantiser weight table (10 subbands: LL + 9 H bands)
|
||||||
// Linearly spaced from 1.0 (LL) to 2.0 (H9)
|
// These weights are multiplied by quantiser_scale during quantization
|
||||||
// These weights are multiplied by quantiser_scale during dequantization
|
static const float BASE_QUANTISER_WEIGHTS[2][10] = {
|
||||||
static const float BASE_QUANTISER_WEIGHTS[] = {
|
{ // mid channel
|
||||||
1.0f, // LL (L9) - finest preservation
|
4.0f, // LL (L9) DC
|
||||||
1.0f, // H (L9)
|
2.0f, // H (L9) 31.25 hz
|
||||||
1.0f, // H (L8)
|
1.8f, // H (L8) 62.5 hz
|
||||||
1.0f, // H (L7)
|
1.6f, // H (L7) 125 hz
|
||||||
1.0f, // H (L6)
|
1.4f, // H (L6) 250 hz
|
||||||
1.1f, // H (L5)
|
1.2f, // H (L5) 500 hz
|
||||||
1.2f, // H (L4)
|
1.0f, // H (L4) 1 khz
|
||||||
1.3f, // H (L3)
|
1.0f, // H (L3) 2 khz
|
||||||
1.4f, // H (L2)
|
1.3f, // H (L2) 4 khz
|
||||||
1.5f // H (L1) - coarsest quantization
|
1.8f // H (L1) 8 khz
|
||||||
};
|
},
|
||||||
|
{ // side channel
|
||||||
|
6.0f, // LL (L9) DC
|
||||||
|
5.0f, // H (L9) 31.25 hz
|
||||||
|
2.6f, // H (L8) 62.5 hz
|
||||||
|
2.4f, // H (L7) 125 hz
|
||||||
|
1.8f, // H (L6) 250 hz
|
||||||
|
1.3f, // H (L5) 500 hz
|
||||||
|
1.0f, // H (L4) 1 khz
|
||||||
|
1.0f, // H (L3) 2 khz
|
||||||
|
1.6f, // H (L2) 4 khz
|
||||||
|
3.2f // H (L1) 8 khz
|
||||||
|
}};
|
||||||
|
|
||||||
#define TAD_DEFAULT_CHUNK_SIZE 32768
|
#define TAD_DEFAULT_CHUNK_SIZE 32768
|
||||||
#define TAD_MIN_CHUNK_SIZE 1024
|
#define TAD_MIN_CHUNK_SIZE 1024
|
||||||
@@ -90,7 +102,7 @@ static void spectral_interpolate_band(float *c, size_t len, float Q, float lower
|
|||||||
if (len < 4) return;
|
if (len < 4) return;
|
||||||
|
|
||||||
uint32_t seed = 0x9E3779B9u ^ (uint32_t)len ^ (uint32_t)(Q * 65536.0f);
|
uint32_t seed = 0x9E3779B9u ^ (uint32_t)len ^ (uint32_t)(Q * 65536.0f);
|
||||||
const float dither_amp = 0.05f * Q; // Very light dither (~-60 dBFS)
|
const float dither_amp = 0.02f * Q; // Very light dither
|
||||||
|
|
||||||
// Just add ultra-light TPDF dither to reduce quantization grain
|
// Just add ultra-light TPDF dither to reduce quantization grain
|
||||||
// No aggressive hole filling or AR prediction that might create artifacts
|
// No aggressive hole filling or AR prediction that might create artifacts
|
||||||
@@ -468,7 +480,7 @@ static float lambda_decompanding(int8_t quant_val, int max_index) {
|
|||||||
return sign * abs_val;
|
return sign * abs_val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index, float quantiser_scale) {
|
static void dequantize_dwt_coefficients(int channel, const int8_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int max_index, float quantiser_scale) {
|
||||||
|
|
||||||
// Calculate sideband boundaries dynamically
|
// Calculate sideband boundaries dynamically
|
||||||
int first_band_size = chunk_size >> dwt_levels;
|
int first_band_size = chunk_size >> dwt_levels;
|
||||||
@@ -494,7 +506,7 @@ static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs,
|
|||||||
float normalized_val = lambda_decompanding(quantized[i], max_index);
|
float normalized_val = lambda_decompanding(quantized[i], max_index);
|
||||||
|
|
||||||
// Denormalize using the subband scalar and apply base weight + quantiser scaling
|
// Denormalize using the subband scalar and apply base weight + quantiser scaling
|
||||||
float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale;
|
float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
|
||||||
coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight;
|
coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband] * weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -509,7 +521,7 @@ static void dequantize_dwt_coefficients(const int8_t *quantized, float *coeffs,
|
|||||||
size_t band_len = band_end - band_start;
|
size_t band_len = band_end - band_start;
|
||||||
|
|
||||||
// Calculate quantization step Q for this band
|
// Calculate quantization step Q for this band
|
||||||
float weight = BASE_QUANTISER_WEIGHTS[band] * quantiser_scale;
|
float weight = BASE_QUANTISER_WEIGHTS[channel][band] * quantiser_scale;
|
||||||
float scalar = TAD32_COEFF_SCALARS[band] * weight;
|
float scalar = TAD32_COEFF_SCALARS[band] * weight;
|
||||||
float Q = scalar / max_index;
|
float Q = scalar / max_index;
|
||||||
|
|
||||||
@@ -585,8 +597,8 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
|
|||||||
// Dequantize with quantiser scaling and spectral interpolation
|
// Dequantize with quantiser scaling and spectral interpolation
|
||||||
// Use quantiser_scale = 1.0f for baseline (must match encoder)
|
// Use quantiser_scale = 1.0f for baseline (must match encoder)
|
||||||
float quantiser_scale = 1.0f;
|
float quantiser_scale = 1.0f;
|
||||||
dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);
|
dequantize_dwt_coefficients(0, quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);
|
||||||
dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);
|
dequantize_dwt_coefficients(1, quant_side, dwt_side, sample_count, sample_count, dwt_levels, max_index, quantiser_scale);
|
||||||
|
|
||||||
// Inverse DWT
|
// Inverse DWT
|
||||||
dwt_inverse_multilevel(dwt_mid, sample_count, dwt_levels);
|
dwt_inverse_multilevel(dwt_mid, sample_count, dwt_levels);
|
||||||
|
|||||||
@@ -19,25 +19,32 @@
|
|||||||
static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};
|
static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f};
|
||||||
|
|
||||||
// Base quantiser weight table (10 subbands: LL + 9 H bands)
|
// Base quantiser weight table (10 subbands: LL + 9 H bands)
|
||||||
// Linearly spaced from 1.0 (LL) to 2.0 (H9)
|
|
||||||
// These weights are multiplied by quantiser_scale during quantization
|
// These weights are multiplied by quantiser_scale during quantization
|
||||||
static const float BASE_QUANTISER_WEIGHTS[] = {
|
static const float BASE_QUANTISER_WEIGHTS[2][10] = {
|
||||||
1.0f, // LL (L9) - finest preservation
|
{ // mid channel
|
||||||
1.0f, // H (L9)
|
4.0f, // LL (L9) DC
|
||||||
1.0f, // H (L8)
|
2.0f, // H (L9) 31.25 hz
|
||||||
1.0f, // H (L7)
|
1.8f, // H (L8) 62.5 hz
|
||||||
1.0f, // H (L6)
|
1.6f, // H (L7) 125 hz
|
||||||
1.1f, // H (L5)
|
1.4f, // H (L6) 250 hz
|
||||||
1.2f, // H (L4)
|
1.2f, // H (L5) 500 hz
|
||||||
1.3f, // H (L3)
|
1.0f, // H (L4) 1 khz
|
||||||
1.4f, // H (L2)
|
1.0f, // H (L3) 2 khz
|
||||||
1.5f // H (L1) - coarsest quantization
|
1.3f, // H (L2) 4 khz
|
||||||
};
|
1.8f // H (L1) 8 khz
|
||||||
|
},
|
||||||
// Forward declarations for internal functions
|
{ // side channel
|
||||||
static void dwt_dd4_forward_1d(float *data, int length);
|
6.0f, // LL (L9) DC
|
||||||
static void dwt_forward_multilevel(float *data, int length, int levels);
|
5.0f, // H (L9) 31.25 hz
|
||||||
static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index, float quantiser_scale);
|
2.6f, // H (L8) 62.5 hz
|
||||||
|
2.4f, // H (L7) 125 hz
|
||||||
|
1.8f, // H (L6) 250 hz
|
||||||
|
1.3f, // H (L5) 500 hz
|
||||||
|
1.0f, // H (L4) 1 khz
|
||||||
|
1.0f, // H (L3) 2 khz
|
||||||
|
1.6f, // H (L2) 4 khz
|
||||||
|
3.2f // H (L1) 8 khz
|
||||||
|
}};
|
||||||
|
|
||||||
static inline float FCLAMP(float x, float min, float max) {
|
static inline float FCLAMP(float x, float min, float max) {
|
||||||
return x < min ? min : (x > max ? max : x);
|
return x < min ? min : (x > max ? max : x);
|
||||||
@@ -279,7 +286,7 @@ static int8_t lambda_companding(float val, int max_index) {
|
|||||||
return (int8_t)(sign * index);
|
return (int8_t)(sign * index);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index, float quantiser_scale) {
|
static void quantize_dwt_coefficients(int channel, const float *coeffs, int8_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int max_index, int *current_subband_index, float quantiser_scale) {
|
||||||
int first_band_size = chunk_size >> dwt_levels;
|
int first_band_size = chunk_size >> dwt_levels;
|
||||||
|
|
||||||
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
|
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
|
||||||
@@ -304,7 +311,7 @@ static void quantize_dwt_coefficients(const float *coeffs, int8_t *quantized, si
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Apply base weight and quantiser scaling
|
// Apply base weight and quantiser scaling
|
||||||
float weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiser_scale;
|
float weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiser_scale;
|
||||||
float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1]
|
float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband] * weight)); // val is normalised to [-1,1]
|
||||||
int8_t quant_val = lambda_companding(val, max_index);
|
int8_t quant_val = lambda_companding(val, max_index);
|
||||||
|
|
||||||
@@ -779,8 +786,8 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Step 4: Quantize with frequency-dependent weights and quantiser scaling
|
// Step 4: Quantize with frequency-dependent weights and quantiser scaling
|
||||||
quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
|
quantize_dwt_coefficients(0, dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
|
||||||
quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
|
quantize_dwt_coefficients(1, dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, max_index, NULL, quantiser_scale);
|
||||||
|
|
||||||
// Step 4.5: Accumulate quantized coefficient statistics if enabled
|
// Step 4.5: Accumulate quantized coefficient statistics if enabled
|
||||||
if (stats_enabled) {
|
if (stats_enabled) {
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include "encoder_tad.h"
|
#include "encoder_tad.h"
|
||||||
|
|
||||||
#define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251026"
|
#define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251107"
|
||||||
|
|
||||||
// TAD32 format constants
|
// TAD32 format constants
|
||||||
#define TAD32_DEFAULT_CHUNK_SIZE 32768 // Default: power of 2 for optimal performance (2^15)
|
#define TAD32_DEFAULT_CHUNK_SIZE 32768 // Default: power of 2 for optimal performance (2^15)
|
||||||
@@ -55,9 +55,6 @@ static void print_usage(const char *prog_name) {
|
|||||||
printf(" 3 = good quality (max_index=47) [DEFAULT]\n");
|
printf(" 3 = good quality (max_index=47) [DEFAULT]\n");
|
||||||
printf(" 4 = high quality (max_index=56)\n");
|
printf(" 4 = high quality (max_index=56)\n");
|
||||||
printf(" 5 = very high quality/largest (max_index=89)\n");
|
printf(" 5 = very high quality/largest (max_index=89)\n");
|
||||||
printf(" -s <scale> Quantiser scaling factor (default: 1.0, range: 0.5-4.0)\n");
|
|
||||||
printf(" Higher = more aggressive quantization, smaller files\n");
|
|
||||||
printf(" 2.0 = quantize 2x coarser than baseline\n");
|
|
||||||
printf(" -v Verbose output\n");
|
printf(" -v Verbose output\n");
|
||||||
printf(" -h, --help Show this help\n");
|
printf(" -h, --help Show this help\n");
|
||||||
printf("\nVersion: %s\n", ENCODER_VENDOR_STRING);
|
printf("\nVersion: %s\n", ENCODER_VENDOR_STRING);
|
||||||
|
|||||||
Reference in New Issue
Block a user