mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 11:51:49 +09:00
TAD: better bit allocation using statistics
This commit is contained in:
@@ -1068,9 +1068,12 @@ transmission capability, and region-of-interest coding.
|
||||
|
||||
## TAD Packet Structure
|
||||
uint8 Packet type (0x24)
|
||||
<header for decoding packet>
|
||||
uint16 Sample Count
|
||||
uint32 Compressed Size + 6
|
||||
uint32 Compressed Size + 7
|
||||
<header for decoding TAD chunk>
|
||||
uint16 Sample Count
|
||||
uint8 Quantiser Bits
|
||||
uint32 Compressed Size
|
||||
* Zstd-compressed TAD
|
||||
|
||||
|
||||
@@ -256,8 +256,8 @@ static void ms_correlate(const float *mid, const float *side, float *left, float
|
||||
// Decode M/S → L/R
|
||||
float m = mid[i];
|
||||
float s = side[i];
|
||||
left[i] = FCLAMP((m + s) * 1.7321f, -1.0f, 1.0f);
|
||||
right[i] = FCLAMP((m - s) * 1.7321f, -1.0f, 1.0f);
|
||||
left[i] = FCLAMP((m + s), -1.0f, 1.0f);
|
||||
right[i] = FCLAMP((m - s), -1.0f, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -320,38 +320,44 @@ static void pcm32f_to_pcm8(const float *fleft, const float *fright, uint8_t *lef
|
||||
// Dequantization (inverse of quantization)
|
||||
//=============================================================================
|
||||
|
||||
static void get_quantization_weights(int quality, int dwt_levels, float *weights) {
|
||||
const float base_weights[16][16] = {
|
||||
/* 0*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
/* 1*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
/* 2*/{1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 3*/{0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 4*/{0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 5*/{0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 6*/{0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 7*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 8*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 9*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*10*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*11*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f},
|
||||
/*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f}
|
||||
};
|
||||
|
||||
float quality_scale = 1.0f * (1.0f + FCLAMP((5 - quality) * 0.5f, 0.0f, 1000.0f));
|
||||
#define LAMBDA_FIXED 5.8f
|
||||
|
||||
for (int i = 0; i < dwt_levels; i++) {
|
||||
weights[i] = 1.0f;//base_weights[dwt_levels][i] * quality_scale;
|
||||
// Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder)
|
||||
// Converts quantized index back to normalized float in [-1, 1]
|
||||
static float lambda_decompanding(int16_t quant_val, int quant_bits) {
|
||||
// Handle zero
|
||||
if (quant_val == 0) {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
int sign = (quant_val < 0) ? -1 : 1;
|
||||
int abs_index = abs(quant_val);
|
||||
|
||||
// Maximum index for the given quant_bits
|
||||
int max_index = (1 << (quant_bits - 1)) - 1;
|
||||
|
||||
// Clamp to valid range
|
||||
if (abs_index > max_index) abs_index = max_index;
|
||||
|
||||
// Map index back to normalized CDF [0, 1]
|
||||
float normalized_cdf = (float)abs_index / max_index;
|
||||
|
||||
// Map from [0, 1] back to [0.5, 1.0] (CDF range for positive half)
|
||||
float cdf = 0.5f + normalized_cdf * 0.5f;
|
||||
|
||||
// Inverse Laplacian CDF for x >= 0: x = -(1/λ) * ln(2*(1-F))
|
||||
// For F in [0.5, 1.0]: x = -(1/λ) * ln(2*(1-F))
|
||||
float abs_val = -(1.0f / LAMBDA_FIXED) * logf(2.0f * (1.0f - cdf));
|
||||
|
||||
// Clamp to [0, 1]
|
||||
if (abs_val > 1.0f) abs_val = 1.0f;
|
||||
if (abs_val < 0.0f) abs_val = 0.0f;
|
||||
|
||||
return sign * abs_val;
|
||||
}
|
||||
|
||||
#define QUANT_STEPS 8.0f // 64 -> [-64..64] -> 7 bits for LL
|
||||
|
||||
static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int quality, int chunk_size, int dwt_levels) {
|
||||
float weights[16];
|
||||
get_quantization_weights(quality, dwt_levels, weights);
|
||||
static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs, size_t count, int chunk_size, int dwt_levels, int quant_bits) {
|
||||
|
||||
// Calculate sideband boundaries dynamically
|
||||
int first_band_size = chunk_size >> dwt_levels;
|
||||
@@ -372,12 +378,11 @@ static void dequantize_dwt_coefficients(const int16_t *quantized, float *coeffs,
|
||||
}
|
||||
}
|
||||
|
||||
// Map (dwt_levels+1) sidebands to dwt_levels weights
|
||||
int weight_idx = (sideband == 0) ? 0 : sideband - 1;
|
||||
if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1;
|
||||
// Decode using lambda companding
|
||||
float normalized_val = lambda_decompanding(quantized[i], quant_bits);
|
||||
|
||||
float weight = weights[weight_idx];
|
||||
coeffs[i] = ((float)quantized[i] * TAD32_COEFF_SCALARS[sideband]) / (QUANT_STEPS * weight);
|
||||
// Denormalize using the subband scalar
|
||||
coeffs[i] = normalized_val * TAD32_COEFF_SCALARS[sideband];
|
||||
}
|
||||
|
||||
free(sideband_starts);
|
||||
@@ -423,12 +428,16 @@ static size_t decode_sigmap_2bit(const uint8_t *input, int16_t *values, size_t c
|
||||
//=============================================================================
|
||||
|
||||
static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
|
||||
int quality, size_t *bytes_consumed, size_t *samples_decoded) {
|
||||
size_t *bytes_consumed, size_t *samples_decoded) {
|
||||
const uint8_t *read_ptr = input;
|
||||
|
||||
// Read chunk header
|
||||
uint16_t sample_count = *((const uint16_t*)read_ptr);
|
||||
read_ptr += sizeof(uint16_t);
|
||||
|
||||
uint8_t quant_bits = *read_ptr;
|
||||
read_ptr += sizeof(uint8_t);
|
||||
|
||||
uint32_t payload_size = *((const uint32_t*)read_ptr);
|
||||
read_ptr += sizeof(uint32_t);
|
||||
|
||||
@@ -479,8 +488,8 @@ static int decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_
|
||||
side_bytes = decode_sigmap_2bit(payload_ptr + mid_bytes, quant_side, sample_count);
|
||||
|
||||
// Dequantize
|
||||
dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, quality, sample_count, dwt_levels);
|
||||
dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, quality, sample_count, dwt_levels);
|
||||
dequantize_dwt_coefficients(quant_mid, dwt_mid, sample_count, sample_count, dwt_levels, quant_bits);
|
||||
dequantize_dwt_coefficients(quant_side, dwt_side, sample_count, sample_count, dwt_levels, quant_bits);
|
||||
|
||||
// Inverse DWT
|
||||
dwt_haar_inverse_multilevel(dwt_mid, sample_count, dwt_levels);
|
||||
@@ -520,7 +529,6 @@ static void print_usage(const char *prog_name) {
|
||||
printf("Options:\n");
|
||||
printf(" -i <file> Input TAD file\n");
|
||||
printf(" -o <file> Output PCMu8 file (raw 8-bit unsigned stereo @ 32kHz)\n");
|
||||
printf(" -q <0-5> Quality level used during encoding (default: 3)\n");
|
||||
printf(" -v Verbose output\n");
|
||||
printf(" -h, --help Show this help\n");
|
||||
printf("\nVersion: %s\n", DECODER_VENDOR_STRING);
|
||||
@@ -531,11 +539,10 @@ static void print_usage(const char *prog_name) {
|
||||
int main(int argc, char *argv[]) {
|
||||
char *input_file = NULL;
|
||||
char *output_file = NULL;
|
||||
int quality = 3; // Must match encoder quality
|
||||
int verbose = 0;
|
||||
|
||||
int opt;
|
||||
while ((opt = getopt(argc, argv, "i:o:q:vh")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "i:o:vh")) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
input_file = optarg;
|
||||
@@ -543,14 +550,6 @@ int main(int argc, char *argv[]) {
|
||||
case 'o':
|
||||
output_file = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
quality = atoi(optarg);
|
||||
if (quality < TAD_QUALITY_MIN || quality > TAD_QUALITY_MAX) {
|
||||
fprintf(stderr, "Error: Quality must be between %d and %d\n",
|
||||
TAD_QUALITY_MIN, TAD_QUALITY_MAX);
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
verbose = 1;
|
||||
break;
|
||||
@@ -573,7 +572,6 @@ int main(int argc, char *argv[]) {
|
||||
printf("%s\n", DECODER_VENDOR_STRING);
|
||||
printf("Input: %s\n", input_file);
|
||||
printf("Output: %s\n", output_file);
|
||||
printf("Quality: %d\n", quality);
|
||||
}
|
||||
|
||||
// Open input file
|
||||
@@ -611,7 +609,7 @@ int main(int argc, char *argv[]) {
|
||||
while (offset < input_size) {
|
||||
size_t bytes_consumed, samples_decoded;
|
||||
int result = decode_chunk(input_data + offset, input_size - offset,
|
||||
chunk_output, quality, &bytes_consumed, &samples_decoded);
|
||||
chunk_output, &bytes_consumed, &samples_decoded);
|
||||
|
||||
if (result != 0) {
|
||||
fprintf(stderr, "Error: Chunk decoding failed at offset %zu\n", offset);
|
||||
|
||||
@@ -22,9 +22,8 @@ static const float TAD32_COEFF_SCALARS[] = {64.0f, 45.255f, 32.0f, 22.627f, 16.0
|
||||
static void dwt_dd4_forward_1d(float *data, int length);
|
||||
static void dwt_dd4_forward_multilevel(float *data, int length, int levels);
|
||||
static void ms_decorrelate_16(const float *left, const float *right, float *mid, float *side, size_t count);
|
||||
static void get_quantization_weights(int quality, int dwt_levels, float *weights);
|
||||
static int get_deadzone_threshold(int quality);
|
||||
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels, int *current_subband_index);
|
||||
static void get_quantization_weights(int dwt_levels, float *weights);
|
||||
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index);
|
||||
static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output);
|
||||
|
||||
static inline float FCLAMP(float x, float min, float max) {
|
||||
@@ -220,45 +219,44 @@ static void compress_gamma(float *left, float *right, size_t count) {
|
||||
// Quantization with Frequency-Dependent Weighting
|
||||
//=============================================================================
|
||||
|
||||
static void get_quantization_weights(int quality, int dwt_levels, float *weights) {
|
||||
const float base_weights[16][16] = {
|
||||
/* 0*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
/* 1*/{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
|
||||
/* 2*/{1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 3*/{0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 4*/{0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 5*/{0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 6*/{0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 7*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 8*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/* 9*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*10*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*11*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*12*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*13*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f},
|
||||
/*14*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f},
|
||||
/*15*/{0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f}
|
||||
};
|
||||
#define LAMBDA_FIXED 5.8f
|
||||
|
||||
float quality_scale = 1.0f * (1.0f + FCLAMP((5 - quality) * 0.5f, 0.0f, 1000.0f));
|
||||
|
||||
for (int i = 0; i < dwt_levels; i++) {
|
||||
weights[i] = 1.0f;//base_weights[dwt_levels][i] * quality_scale;
|
||||
// Lambda-based companding encoder (based on Laplacian distribution CDF)
|
||||
// val must be normalised to [-1,1]
|
||||
// Returns quantized index in range [-(2^quant_bits-1), +(2^quant_bits-1)]
|
||||
static int16_t lambda_companding(float val, int quant_bits) {
|
||||
// Handle zero
|
||||
if (fabsf(val) < 1e-9f) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sign = (val < 0) ? -1 : 1;
|
||||
float abs_val = fabsf(val);
|
||||
|
||||
// Clamp to [0, 1]
|
||||
if (abs_val > 1.0f) abs_val = 1.0f;
|
||||
|
||||
// Maximum index for the given quant_bits
|
||||
int max_index = (1 << (quant_bits - 1)) - 1;
|
||||
|
||||
// Laplacian CDF for x >= 0: F(x) = 1 - 0.5 * exp(-λ*x)
|
||||
// Map to [0.5, 1.0] range (half of CDF for positive values)
|
||||
float cdf = 1.0f - 0.5f * expf(-LAMBDA_FIXED * abs_val);
|
||||
|
||||
// Map CDF from [0.5, 1.0] to [0, 1] for positive half
|
||||
float normalized_cdf = (cdf - 0.5f) * 2.0f;
|
||||
|
||||
// Quantize to index
|
||||
int index = (int)roundf(normalized_cdf * max_index);
|
||||
|
||||
// Clamp index to valid range [0, max_index]
|
||||
if (index < 0) index = 0;
|
||||
if (index > max_index) index = max_index;
|
||||
|
||||
return (int16_t)(sign * index);
|
||||
}
|
||||
|
||||
#define QUANT_STEPS 512.0f // 64 -> [-64..64] -> 7 bits for LL
|
||||
|
||||
static int get_deadzone_threshold(int quality) {
|
||||
const int thresholds[] = {0,0,0,0,0,0}; // Q0 to Q5
|
||||
return thresholds[quality];
|
||||
}
|
||||
|
||||
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int quality, int apply_deadzone, int chunk_size, int dwt_levels, int *current_subband_index) {
|
||||
float weights[16];
|
||||
get_quantization_weights(quality, dwt_levels, weights);
|
||||
int deadzone = apply_deadzone ? get_deadzone_threshold(quality) : 0;
|
||||
|
||||
static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, size_t count, int apply_deadzone, int chunk_size, int dwt_levels, int quant_bits, int *current_subband_index) {
|
||||
int first_band_size = chunk_size >> dwt_levels;
|
||||
|
||||
int *sideband_starts = malloc((dwt_levels + 2) * sizeof(int));
|
||||
@@ -282,19 +280,8 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s
|
||||
current_subband_index[i] = sideband;
|
||||
}
|
||||
|
||||
int weight_idx = (sideband == 0) ? 0 : sideband - 1;
|
||||
if (weight_idx >= dwt_levels) weight_idx = dwt_levels - 1;
|
||||
|
||||
float weight = weights[weight_idx];
|
||||
float val = (coeffs[i] / TAD32_COEFF_SCALARS[sideband]) * (QUANT_STEPS * weight);
|
||||
// (coeffs[i] / TAD32_COEFF_SCALARS[sideband]) normalises coeffs to -1..1
|
||||
int16_t quant_val = (int16_t)roundf(val);
|
||||
|
||||
if (apply_deadzone && sideband >= dwt_levels - 1) {
|
||||
if (quant_val > -deadzone && quant_val < deadzone) {
|
||||
quant_val = 0;
|
||||
}
|
||||
}
|
||||
float val = (coeffs[i] / (TAD32_COEFF_SCALARS[sideband])); // val is normalised to [-1,1]
|
||||
int16_t quant_val = lambda_companding(val, quant_bits);
|
||||
|
||||
quantized[i] = quant_val;
|
||||
}
|
||||
@@ -302,25 +289,44 @@ static void quantize_dwt_coefficients(const float *coeffs, int16_t *quantized, s
|
||||
free(sideband_starts);
|
||||
}
|
||||
|
||||
// idea 1: power-of-two companding
|
||||
// for quant step 8:
|
||||
// Q -> Float
|
||||
// 0 -> 0
|
||||
// 1 -> 1/128
|
||||
// 2 -> 1/64
|
||||
// 3 -> 1/32
|
||||
// 4 -> 1/16
|
||||
// 5 -> 1/8
|
||||
// 6 -> 1/4
|
||||
// 7 -> 1/2
|
||||
// 8 -> 1/1
|
||||
// for -1 to -8, just invert the sign
|
||||
|
||||
|
||||
//=============================================================================
|
||||
// Significance Map Encoding
|
||||
//=============================================================================
|
||||
|
||||
static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output) {
|
||||
size_t map_bytes = (count * 2 + 7) / 8;
|
||||
uint8_t *map = output;
|
||||
memset(map, 0, map_bytes);
|
||||
|
||||
uint8_t *write_ptr = output + map_bytes;
|
||||
int16_t *value_ptr = (int16_t*)write_ptr;
|
||||
uint32_t other_count = 0;
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
int16_t val = values[i];
|
||||
uint8_t code;
|
||||
|
||||
if (val == 0) code = 0; // 00
|
||||
else if (val == 1) code = 1; // 01
|
||||
else if (val == -1) code = 2; // 10
|
||||
else {
|
||||
code = 3; // 11
|
||||
value_ptr[other_count++] = val;
|
||||
}
|
||||
|
||||
size_t bit_pos = i * 2;
|
||||
size_t byte_idx = bit_pos / 8;
|
||||
size_t bit_offset = bit_pos % 8;
|
||||
|
||||
map[byte_idx] |= (code << bit_offset);
|
||||
if (bit_offset == 7 && byte_idx + 1 < map_bytes) {
|
||||
map[byte_idx + 1] |= (code >> 1);
|
||||
}
|
||||
}
|
||||
|
||||
return map_bytes + other_count * sizeof(int16_t);
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Coefficient Statistics
|
||||
//=============================================================================
|
||||
@@ -339,6 +345,7 @@ typedef struct {
|
||||
float median;
|
||||
float q3;
|
||||
float max;
|
||||
float lambda; // Laplacian distribution parameter (1/b, where b is scale)
|
||||
} CoeffStats;
|
||||
|
||||
typedef struct {
|
||||
@@ -410,6 +417,7 @@ static void accumulate_coefficients(const float *coeffs, int dwt_levels, int chu
|
||||
static void calculate_coeff_stats(const float *coeffs, size_t count, CoeffStats *stats) {
|
||||
if (count == 0) {
|
||||
stats->min = stats->q1 = stats->median = stats->q3 = stats->max = 0.0f;
|
||||
stats->lambda = 0.0f;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -425,6 +433,16 @@ static void calculate_coeff_stats(const float *coeffs, size_t count, CoeffStats
|
||||
stats->q3 = sorted[(3 * count) / 4];
|
||||
|
||||
free(sorted);
|
||||
|
||||
// Estimate Laplacian distribution parameter λ = 1/b
|
||||
// For Laplacian centered at μ=0, MLE gives: b = mean(|x|)
|
||||
// Therefore: λ = 1/b = 1/mean(|x|)
|
||||
double sum_abs = 0.0;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
sum_abs += fabs(coeffs[i]);
|
||||
}
|
||||
double mean_abs = sum_abs / count;
|
||||
stats->lambda = (mean_abs > 1e-9) ? (1.0f / mean_abs) : 0.0f;
|
||||
}
|
||||
|
||||
#define HISTOGRAM_BINS 40
|
||||
@@ -492,9 +510,9 @@ void tad32_print_statistics(void) {
|
||||
|
||||
// Print Mid channel statistics
|
||||
fprintf(stderr, "\nMid Channel:\n");
|
||||
fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s\n",
|
||||
"Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max");
|
||||
fprintf(stderr, "--------------------------------------------------------------------------------\n");
|
||||
fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s %10s\n",
|
||||
"Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max", "Lambda");
|
||||
fprintf(stderr, "----------------------------------------------------------------------------------------\n");
|
||||
|
||||
for (int s = 0; s < num_subbands; s++) {
|
||||
CoeffStats stats;
|
||||
@@ -507,9 +525,9 @@ void tad32_print_statistics(void) {
|
||||
snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f\n",
|
||||
fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f %10.3f\n",
|
||||
band_name, mid_accumulators[s].count,
|
||||
stats.min, stats.q1, stats.median, stats.q3, stats.max);
|
||||
stats.min, stats.q1, stats.median, stats.q3, stats.max, stats.lambda);
|
||||
}
|
||||
|
||||
// Print Mid channel histograms
|
||||
@@ -526,9 +544,9 @@ void tad32_print_statistics(void) {
|
||||
|
||||
// Print Side channel statistics
|
||||
fprintf(stderr, "\nSide Channel:\n");
|
||||
fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s\n",
|
||||
"Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max");
|
||||
fprintf(stderr, "--------------------------------------------------------------------------------\n");
|
||||
fprintf(stderr, "%-12s %10s %10s %10s %10s %10s %10s %10s\n",
|
||||
"Subband", "Samples", "Min", "Q1", "Median", "Q3", "Max", "Lambda");
|
||||
fprintf(stderr, "----------------------------------------------------------------------------------------\n");
|
||||
|
||||
for (int s = 0; s < num_subbands; s++) {
|
||||
CoeffStats stats;
|
||||
@@ -541,9 +559,9 @@ void tad32_print_statistics(void) {
|
||||
snprintf(band_name, sizeof(band_name), "H (L%d)", stats_dwt_levels - s + 1);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f\n",
|
||||
fprintf(stderr, "%-12s %10zu %10.3f %10.3f %10.3f %10.3f %10.3f %10.3f\n",
|
||||
band_name, side_accumulators[s].count,
|
||||
stats.min, stats.q1, stats.median, stats.q3, stats.max);
|
||||
stats.min, stats.q1, stats.median, stats.q3, stats.max, stats.lambda);
|
||||
}
|
||||
|
||||
// Print Side channel histograms
|
||||
@@ -576,46 +594,12 @@ void tad32_free_statistics(void) {
|
||||
stats_initialized = 0;
|
||||
}
|
||||
|
||||
static size_t encode_sigmap_2bit(const int16_t *values, size_t count, uint8_t *output) {
|
||||
size_t map_bytes = (count * 2 + 7) / 8;
|
||||
uint8_t *map = output;
|
||||
memset(map, 0, map_bytes);
|
||||
|
||||
uint8_t *write_ptr = output + map_bytes;
|
||||
int16_t *value_ptr = (int16_t*)write_ptr;
|
||||
uint32_t other_count = 0;
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
int16_t val = values[i];
|
||||
uint8_t code;
|
||||
|
||||
if (val == 0) code = 0; // 00
|
||||
else if (val == 1) code = 1; // 01
|
||||
else if (val == -1) code = 2; // 10
|
||||
else {
|
||||
code = 3; // 11
|
||||
value_ptr[other_count++] = val;
|
||||
}
|
||||
|
||||
size_t bit_pos = i * 2;
|
||||
size_t byte_idx = bit_pos / 8;
|
||||
size_t bit_offset = bit_pos % 8;
|
||||
|
||||
map[byte_idx] |= (code << bit_offset);
|
||||
if (bit_offset == 7 && byte_idx + 1 < map_bytes) {
|
||||
map[byte_idx + 1] |= (code >> 1);
|
||||
}
|
||||
}
|
||||
|
||||
return map_bytes + other_count * sizeof(int16_t);
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Public API: Chunk Encoding
|
||||
//=============================================================================
|
||||
|
||||
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int quality,
|
||||
int use_zstd, uint8_t *output) {
|
||||
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
||||
int quant_bits, int use_zstd, uint8_t *output) {
|
||||
// Calculate DWT levels from chunk size
|
||||
int dwt_levels = calculate_dwt_levels(num_samples);
|
||||
if (dwt_levels < 0) {
|
||||
@@ -670,8 +654,8 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int qua
|
||||
}
|
||||
|
||||
// Step 4: Quantize with frequency-dependent weights and dead zone
|
||||
quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, quality, 1, num_samples, dwt_levels, NULL);
|
||||
quantize_dwt_coefficients(dwt_side, quant_side, num_samples, quality, 1, num_samples, dwt_levels, NULL);
|
||||
quantize_dwt_coefficients(dwt_mid, quant_mid, num_samples, 1, num_samples, dwt_levels, quant_bits, NULL);
|
||||
quantize_dwt_coefficients(dwt_side, quant_side, num_samples, 1, num_samples, dwt_levels, quant_bits, NULL);
|
||||
|
||||
// Step 5: Encode with 2-bit significance map (32-bit version)
|
||||
uint8_t *temp_buffer = malloc(num_samples * 4 * sizeof(int32_t));
|
||||
@@ -683,9 +667,13 @@ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int qua
|
||||
// Step 6: Optional Zstd compression
|
||||
uint8_t *write_ptr = output;
|
||||
|
||||
// Write chunk header
|
||||
*((uint16_t*)write_ptr) = (uint16_t)num_samples;
|
||||
write_ptr += sizeof(uint16_t);
|
||||
|
||||
*write_ptr = (uint8_t)quant_bits;
|
||||
write_ptr += sizeof(uint8_t);
|
||||
|
||||
uint32_t *payload_size_ptr = (uint32_t*)write_ptr;
|
||||
write_ptr += sizeof(uint32_t);
|
||||
|
||||
|
||||
@@ -24,18 +24,19 @@
|
||||
*
|
||||
* @param pcm32_stereo Input PCM32fLE stereo samples (interleaved L,R)
|
||||
* @param num_samples Number of samples per channel (min 1024)
|
||||
* @param quality Quality level 0-5 (0=lowest, 5=highest)
|
||||
* @param quant_bits Quantization bits 4-12 (default: 7)
|
||||
* @param use_zstd 1=enable Zstd compression, 0=disable
|
||||
* @param output Output buffer (must be large enough)
|
||||
* @return Number of bytes written to output, or 0 on error
|
||||
*
|
||||
* Output format:
|
||||
* uint16 sample_count (samples per channel)
|
||||
* uint8 quant_bits (quantization bits used)
|
||||
* uint32 payload_size (bytes in payload)
|
||||
* * payload (encoded M/S data, optionally Zstd-compressed)
|
||||
*/
|
||||
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, int quality,
|
||||
int use_zstd, uint8_t *output);
|
||||
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
|
||||
int quant_bits, int use_zstd, uint8_t *output);
|
||||
|
||||
/**
|
||||
* Print accumulated coefficient statistics
|
||||
|
||||
@@ -47,7 +47,8 @@ static void print_usage(const char *prog_name) {
|
||||
printf("Options:\n");
|
||||
printf(" -i <file> Input audio file (any format supported by FFmpeg)\n");
|
||||
printf(" -o <file> Output TAD32 file\n");
|
||||
printf(" -q <0-5> Quality level (default: %d, higher = better quality)\n", TAD32_QUALITY_DEFAULT);
|
||||
printf(" -q <bits> Quantization bits (default: 7, range: 4-8)\n");
|
||||
printf(" Higher = more precision, larger files\n");
|
||||
printf(" --no-zstd Disable Zstd compression\n");
|
||||
printf(" -v Verbose output\n");
|
||||
printf(" -h, --help Show this help\n");
|
||||
@@ -62,6 +63,7 @@ int main(int argc, char *argv[]) {
|
||||
char *input_file = NULL;
|
||||
char *output_file = NULL;
|
||||
int quality = TAD32_QUALITY_DEFAULT;
|
||||
int quant_bits = 7; // Default QUANT_BITS
|
||||
int use_zstd = 1;
|
||||
int verbose = 0;
|
||||
|
||||
@@ -83,10 +85,9 @@ int main(int argc, char *argv[]) {
|
||||
output_file = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
quality = atoi(optarg);
|
||||
if (quality < TAD32_QUALITY_MIN || quality > TAD32_QUALITY_MAX) {
|
||||
fprintf(stderr, "Error: Quality must be between %d and %d\n",
|
||||
TAD32_QUALITY_MIN, TAD32_QUALITY_MAX);
|
||||
quant_bits = atoi(optarg);
|
||||
if (quant_bits < 4 || quant_bits > 8) {
|
||||
fprintf(stderr, "Error: Quantization bits must be between 4 and 8\n");
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
@@ -115,7 +116,6 @@ int main(int argc, char *argv[]) {
|
||||
printf("%s\n", ENCODER_VENDOR_STRING);
|
||||
printf("Input: %s\n", input_file);
|
||||
printf("Output: %s\n", output_file);
|
||||
printf("Quality: %d\n", quality);
|
||||
printf("Significance map: 2-bit\n");
|
||||
printf("Zstd compression: %s\n", use_zstd ? "enabled" : "disabled");
|
||||
}
|
||||
@@ -242,8 +242,8 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
// Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
|
||||
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE, quality,
|
||||
use_zstd, output_buffer);
|
||||
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
|
||||
quant_bits, use_zstd, output_buffer);
|
||||
|
||||
if (encoded_size == 0) {
|
||||
fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);
|
||||
@@ -291,7 +291,7 @@ int main(int argc, char *argv[]) {
|
||||
compression_ratio, (total_output_size * 100.0) / pcmu8_size);
|
||||
|
||||
if (compression_ratio < 1.8) {
|
||||
printf("Warning: Compression ratio below 2:1 target. Try higher quality or different settings.\n");
|
||||
printf("Warning: Compression ratio below 2:1 target. Try lower quantisation bits or different settings.\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
Reference in New Issue
Block a user