diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index bbdc914..775a44a 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -1348,7 +1348,7 @@ try { } else if (packetType === TAV_PACKET_AUDIO_TAD) { let sampleLen = seqread.readShort() - let payloadLen = seqread.readInt() // compressed size + 6 + let payloadLen = seqread.readInt() // compressed size + 7 if (!tadInitialised) { tadInitialised = true diff --git a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt index f172ec4..9b051cb 100644 --- a/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt +++ b/tsvm_core/src/net/torvald/tsvm/peripheral/AudioAdapter.kt @@ -13,6 +13,7 @@ import net.torvald.tsvm.VM import net.torvald.tsvm.getHashStr import net.torvald.tsvm.toInt import java.io.ByteArrayInputStream +import kotlin.math.pow import kotlin.math.roundToInt private class RenderRunnable(val playhead: AudioAdapter.Playhead) : Runnable { @@ -134,8 +135,27 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { internal var tadQuality = 2 // Quality level used during encoding (0-5) @Volatile private var tadBusy = false - // TAD decoder constants - private val TAD_COEFF_SCALAR = 1024.0f + // TAD decoder constants - Coefficient scalars for each subband (matching C decoder) + // Index 0 = LL band, Index 1-9 = H bands (L9 to L1) + private val TAD32_COEFF_SCALARS = floatArrayOf( + 64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f + ) + + // Base quantiser weight table (10 subbands: LL + 9 H bands) + private val BASE_QUANTISER_WEIGHTS = floatArrayOf( + 1.0f, // LL (L9) - finest preservation + 1.0f, // H (L9) + 1.0f, // H (L8) + 1.0f, // H (L7) + 1.0f, // H (L6) + 1.1f, // H (L5) + 1.2f, // H (L4) + 1.3f, // H (L3) + 1.4f, // H (L2) + 1.5f // H (L1) - coarsest quantization + ) + + private val LAMBDA_FIXED = 6.0f // Dither state for noise shaping (2 channels, 2 history samples each) private val ditherError = Array(2) { FloatArray(2) } @@ -350,24 +370,84 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { return frand01() - frand01() } - // M/S stereo correlation with noise-shaped dithering (matches C implementation) - private fun msCorrelate(mid: FloatArray, side: FloatArray, sampleCount: Int) { + // Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder) + // Converts quantized index back to normalized float in [-1, 1] + private fun lambdaDecompanding(quantVal: Short, maxIndex: Int): Float { + // Handle zero + if (quantVal == 0.toShort()) { + return 0.0f + } + + val sign = if (quantVal < 0) -1 else 1 + var absIndex = kotlin.math.abs(quantVal.toInt()) + + // Clamp to valid range + if (absIndex > maxIndex) absIndex = maxIndex + + // Map index back to normalized CDF [0, 1] + val normalizedCdf = absIndex.toFloat() / maxIndex + + // Map from [0, 1] back to [0.5, 1.0] (CDF range for positive half) + val cdf = 0.5f + normalizedCdf * 0.5f + + // Inverse Laplacian CDF for x >= 0: x = -(1/λ) * ln(2*(1-F)) + // For F in [0.5, 1.0]: x = -(1/λ) * ln(2*(1-F)) + var absVal = -(1.0f / LAMBDA_FIXED) * kotlin.math.ln(2.0f * (1.0f - cdf)) + + // Clamp to [0, 1] + absVal = absVal.coerceIn(0.0f, 1.0f) + + return sign * absVal + } + + private fun signum(x: Float): Float { + return when { + x > 0.0f -> 1.0f + x < 0.0f -> -1.0f + else -> 0.0f + } + } + + // Gamma expansion (inverse of gamma compression) + private fun expandGamma(left: FloatArray, right: FloatArray, count: Int) { + for (i in 0 until count) { + // decode(y) = sign(y) * |y|^(1/γ) where γ=0.5 + val x = left[i] + val a = kotlin.math.abs(x) + left[i] = signum(x) * a.pow(1.4142f) + + val y = right[i] + val b = kotlin.math.abs(y) + right[i] = signum(y) * b.pow(1.4142f) + } + } + + // M/S stereo correlation (no dithering - that's now in spectral interpolation) + private fun msCorrelate(mid: FloatArray, side: FloatArray, left: FloatArray, right: FloatArray, sampleCount: Int) { + for (i in 0 until sampleCount) { + // Decode M/S → L/R + val m = mid[i] + val s = side[i] + left[i] = (m + s).coerceIn(-1.0f, 1.0f) + right[i] = (m - s).coerceIn(-1.0f, 1.0f) + } + } + + // PCM32f to PCM8 conversion with noise-shaped dithering + private fun pcm32fToPcm8(fleft: FloatArray, fright: FloatArray, sampleCount: Int) { val b1 = 1.5f // 1st feedback coefficient val b2 = -0.75f // 2nd feedback coefficient val scale = 127.5f val bias = 128 - for (i in 0 until sampleCount) { - // Decode M/S → L/R - val m = mid[i] - val s = side[i] - val l = (m + s).coerceIn(-1.0f, 1.0f) - val r = (m - s).coerceIn(-1.0f, 1.0f) + // Reduced dither amplitude to coordinate with coefficient-domain dithering + val ditherScale = 0.2f // Reduced from 0.5 + for (i in 0 until sampleCount) { // --- LEFT channel --- val feedbackL = b1 * ditherError[0][0] + b2 * ditherError[0][1] - val ditherL = 0.5f * tpdf1() // ±0.5 LSB TPDF - val shapedL = (l + feedbackL + ditherL / scale).coerceIn(-1.0f, 1.0f) + val ditherL = ditherScale * tpdf1() // Reduced TPDF dither + val shapedL = (fleft[i] + feedbackL + ditherL / scale).coerceIn(-1.0f, 1.0f) val qL = (shapedL * scale).roundToInt().coerceIn(-128, 127) tadDecodedBin[i * 2L] = (qL + bias).toByte() @@ -378,8 +458,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { // --- RIGHT channel --- val feedbackR = b1 * ditherError[1][0] + b2 * ditherError[1][1] - val ditherR = 0.5f * tpdf1() - val shapedR = (r + feedbackR + ditherR / scale).coerceIn(-1.0f, 1.0f) + val ditherR = ditherScale * tpdf1() + val shapedR = (fright[i] + feedbackR + ditherR / scale).coerceIn(-1.0f, 1.0f) val qR = (shapedR * scale).roundToInt().coerceIn(-128, 127) tadDecodedBin[i * 2L + 1] = (qR + bias).toByte() @@ -400,6 +480,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { (tadInputBin[offset++].toInt() and 0xFF) or ((tadInputBin[offset++].toInt() and 0xFF) shl 8) ) + val maxIndex = tadInputBin[offset++].toInt() and 0xFF val payloadSize = ( (tadInputBin[offset++].toInt() and 0xFF) or ((tadInputBin[offset++].toInt() and 0xFF) shl 8) or @@ -436,16 +517,23 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { // Dequantize to Float32 val dwtMid = FloatArray(sampleCount) val dwtSide = FloatArray(sampleCount) - dequantizeDwtCoefficients(quantMid, dwtMid, sampleCount, tadQuality, dwtLevels) - dequantizeDwtCoefficients(quantSide, dwtSide, sampleCount, tadQuality, dwtLevels) + dequantizeDwtCoefficients(quantMid, dwtMid, sampleCount, maxIndex, dwtLevels) + dequantizeDwtCoefficients(quantSide, dwtSide, sampleCount, maxIndex, dwtLevels) - // Inverse DWT (produces Float32 samples in range [-1.0, 1.0]) - dwtDD4InverseMultilevel(dwtMid, sampleCount, dwtLevels) - dwtDD4InverseMultilevel(dwtSide, sampleCount, dwtLevels) + // Inverse DWT using CDF 9/7 wavelet (produces Float32 samples in range [-1.0, 1.0]) + dwt97InverseMultilevel(dwtMid, sampleCount, dwtLevels) + dwt97InverseMultilevel(dwtSide, sampleCount, dwtLevels) - // M/S to L/R correlation with noise-shaped dithering - // Output is PCMu8 stereo written directly to tadDecodedBin - msCorrelate(dwtMid, dwtSide, sampleCount) + // M/S to L/R correlation + val pcm32Left = FloatArray(sampleCount) + val pcm32Right = FloatArray(sampleCount) + msCorrelate(dwtMid, dwtSide, pcm32Left, pcm32Right, sampleCount) + + // Expand dynamic range (gamma expansion) + expandGamma(pcm32Left, pcm32Right, sampleCount) + + // Dither to 8-bit PCMu8 + pcm32fToPcm8(pcm32Left, pcm32Right, sampleCount) } catch (e: Exception) { e.printStackTrace() @@ -491,49 +579,58 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { } private fun calculateDwtLevels(chunkSize: Int): Int { - if (chunkSize < 1024) { - throw IllegalArgumentException("Chunk size $chunkSize is below minimum 1024") - } - - var levels = 0 - var size = chunkSize - while (size > 1) { - size = size shr 1 - levels++ - } - return levels - 2 // Maximum decomposition leaves 4-sample approximation + // Hard-coded to 9 levels to match C decoder + return 9 } - private fun getQuantizationWeights(quality: Int, dwtLevels: Int): FloatArray { - // Extended base weights to support up to 16 DWT levels - val baseWeights = arrayOf( - /* 0*/floatArrayOf(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), - /* 1*/floatArrayOf(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), - /* 2*/floatArrayOf(1.0f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /* 3*/floatArrayOf(0.2f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /* 4*/floatArrayOf(0.2f, 0.8f, 1.0f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /* 5*/floatArrayOf(0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /* 6*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /* 7*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /* 8*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /* 9*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /*10*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /*11*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /*12*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f), - /*13*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f, 1.5f), - /*14*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f, 1.5f), - /*15*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f, 1.5f), - /*16*/floatArrayOf(0.2f, 0.2f, 0.8f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.25f, 1.5f) - ) - // Updated quality scale to match C implementation - val qualityScale = 4.0f + ((3 - quality) * 0.5f).coerceIn(0.0f, 1000.0f) - return FloatArray(dwtLevels) { i -> (baseWeights[dwtLevels][i.coerceIn(0, 15)] * qualityScale).coerceAtLeast(1.0f) } + // Compute RMS energy of a coefficient band + private fun computeBandRms(c: FloatArray, start: Int, len: Int): Float { + if (len == 0) return 0.0f + var sumsq = 0.0 + for (i in 0 until len) { + val v = c[start + i].toDouble() + sumsq += v * v + } + return kotlin.math.sqrt((sumsq / len)).toFloat() } - private fun dequantizeDwtCoefficients(quantized: ShortArray, coeffs: FloatArray, count: Int, quality: Int, dwtLevels: Int) { - val weights = getQuantizationWeights(quality, dwtLevels) + // Fast PRNG for light dithering (xorshift32) + private var xorshift32State = 0x9E3779B9u - // Calculate sideband boundaries dynamically based on chunk size and DWT levels + private fun xorshift32(): UInt { + var x = xorshift32State + x = x xor (x shl 13) + x = x xor (x shr 17) + x = x xor (x shl 5) + xorshift32State = x + return x + } + + private fun urand(): Float { + return (xorshift32() and 0xFFFFFFu).toFloat() / 16777216.0f + } + + private fun tpdf(): Float { + return urand() - urand() + } + + // Simplified spectral reconstruction for wavelet coefficients + // Conservative approach: only add light dither to reduce quantization grain + private fun spectralInterpolateBand(c: FloatArray, start: Int, len: Int, Q: Float, lowerBandRms: Float) { + if (len < 4) return + + xorshift32State = 0x9E3779B9u xor len.toUInt() xor (Q * 65536.0f).toUInt() + val ditherAmp = 0.05f * Q // Very light dither (~-60 dBFS) + + // Just add ultra-light TPDF dither to reduce quantization grain + for (i in 0 until len) { + c[start + i] += tpdf() * ditherAmp + } + } + + private fun dequantizeDwtCoefficients(quantized: ShortArray, coeffs: FloatArray, count: Int, + maxIndex: Int, dwtLevels: Int) { + // Calculate sideband boundaries dynamically val firstBandSize = count shr dwtLevels val sidebandStarts = IntArray(dwtLevels + 2) sidebandStarts[0] = 0 @@ -542,63 +639,131 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { sidebandStarts[i] = sidebandStarts[i - 1] + (firstBandSize shl (i - 2)) } + // Step 1: Dequantize all coefficients using lambda decompanding + val quantiserScale = 1.0f for (i in 0 until count) { var sideband = dwtLevels - for (s in 0 until dwtLevels + 1) { + for (s in 0..dwtLevels) { if (i < sidebandStarts[s + 1]) { sideband = s break } } - val weightIdx = if (sideband == 0) 0 else sideband - 1 - val weight = weights[weightIdx.coerceIn(0, dwtLevels - 1)] - // Updated to match C implementation: divide by TAD_COEFF_SCALAR - coeffs[i] = quantized[i].toFloat() * weight / TAD_COEFF_SCALAR + // Decode using lambda companding + val normalizedVal = lambdaDecompanding(quantized[i], maxIndex) + + // Denormalize using the subband scalar and apply base weight + quantiser scaling + val weight = BASE_QUANTISER_WEIGHTS[sideband] * quantiserScale + coeffs[i] = normalizedVal * TAD32_COEFF_SCALARS[sideband] * weight + } + + // Step 2: Apply spectral interpolation per band + // Process bands from high to low frequency (dwtLevels down to 0) + var prevBandRms = 0.0f + + for (band in dwtLevels downTo 0) { + val bandStart = sidebandStarts[band] + val bandEnd = sidebandStarts[band + 1] + val bandLen = bandEnd - bandStart + + // Calculate quantization step Q for this band + val weight = BASE_QUANTISER_WEIGHTS[band] * quantiserScale + val scalar = TAD32_COEFF_SCALARS[band] * weight + val Q = scalar / maxIndex + + // Apply spectral interpolation to this band + spectralInterpolateBand(coeffs, bandStart, bandLen, Q, prevBandRms) + + // Compute RMS for this band to use as reference for next (lower frequency) band + prevBandRms = computeBandRms(coeffs, bandStart, bandLen) } } - private fun dwtDD4Inverse1d(data: FloatArray, length: Int) { + // 9/7 inverse DWT (CDF 9/7 wavelet - matches C implementation) + private fun dwt97Inverse1d(data: FloatArray, length: Int) { if (length < 2) return val temp = FloatArray(length) val half = (length + 1) / 2 - // Split into low and high parts + // Split into low and high frequency components (matching TSVM layout) for (i in 0 until half) { - temp[i] = data[i] // Even (low-pass) + temp[i] = data[i] // Low-pass coefficients (first half) } for (i in 0 until length / 2) { - temp[half + i] = data[half + i] // Odd (high-pass) + if (half + i < length) { + temp[half + i] = data[half + i] // High-pass coefficients (second half) + } } - // Undo update step: s[i] -= 0.25 * (d[i-1] + d[i]) + // 9/7 inverse lifting coefficients from TSVM + val alpha = -1.586134342f + val beta = -0.052980118f + val gamma = 0.882911076f + val delta = 0.443506852f + val K = 1.230174105f + + // Step 1: Undo scaling for (i in 0 until half) { - val dCurr = if (i < length / 2) temp[half + i] else 0.0f - val dPrev = if (i > 0 && i - 1 < length / 2) temp[half + i - 1] else 0.0f - temp[i] -= 0.25f * (dPrev + dCurr) + temp[i] /= K // Low-pass coefficients } - - // Undo prediction step: d[i] += P(s[i-1], s[i], s[i+1], s[i+2]) for (i in 0 until length / 2) { - val sM1 = if (i > 0) temp[i - 1] else temp[0] // mirror boundary - val s0 = temp[i] - val s1 = if (i + 1 < half) temp[i + 1] else temp[half - 1] - val s2 = if (i + 2 < half) temp[i + 2] else if (half > 1) temp[half - 2] else temp[half - 1] - - val prediction = (-1.0f/16.0f)*sM1 + (9.0f/16.0f)*s0 + (9.0f/16.0f)*s1 + (-1.0f/16.0f)*s2 - temp[half + i] += prediction + if (half + i < length) { + temp[half + i] *= K // High-pass coefficients + } } - // Merge evens and odds back + // Step 2: Undo δ update for (i in 0 until half) { - data[2 * i] = temp[i] - if (2 * i + 1 < length) - data[2 * i + 1] = temp[half + i] + val dCurr = if (half + i < length) temp[half + i] else 0.0f + val dPrev = if (i > 0 && half + i - 1 < length) temp[half + i - 1] else dCurr + temp[i] -= delta * (dCurr + dPrev) + } + + // Step 3: Undo γ predict + for (i in 0 until length / 2) { + if (half + i < length) { + val sCurr = temp[i] + val sNext = if (i + 1 < half) temp[i + 1] else sCurr + temp[half + i] -= gamma * (sCurr + sNext) + } + } + + // Step 4: Undo β update + for (i in 0 until half) { + val dCurr = if (half + i < length) temp[half + i] else 0.0f + val dPrev = if (i > 0 && half + i - 1 < length) temp[half + i - 1] else dCurr + temp[i] -= beta * (dCurr + dPrev) + } + + // Step 5: Undo α predict + for (i in 0 until length / 2) { + if (half + i < length) { + val sCurr = temp[i] + val sNext = if (i + 1 < half) temp[i + 1] else sCurr + temp[half + i] -= alpha * (sCurr + sNext) + } + } + + // Reconstruction - interleave low and high pass + for (i in 0 until length) { + if (i % 2 == 0) { + // Even positions: low-pass coefficients + data[i] = temp[i / 2] + } else { + // Odd positions: high-pass coefficients + val idx = i / 2 + if (half + idx < length) { + data[i] = temp[half + idx] + } else { + data[i] = 0.0f + } + } } } - private fun dwtDD4InverseMultilevel(data: FloatArray, length: Int, levels: Int) { + private fun dwt97InverseMultilevel(data: FloatArray, length: Int, levels: Int) { // Calculate the length at the deepest level var currentLength = length for (level in 0 until levels) { @@ -609,7 +774,7 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) { for (level in levels - 1 downTo 0) { currentLength *= 2 // MULTIPLY FIRST if (currentLength > length) currentLength = length - dwtDD4Inverse1d(data, currentLength) // THEN apply inverse + dwt97Inverse1d(data, currentLength) // THEN apply inverse } } diff --git a/video_encoder/encoder_tad.h b/video_encoder/encoder_tad.h index e421c64..6985e0e 100644 --- a/video_encoder/encoder_tad.h +++ b/video_encoder/encoder_tad.h @@ -19,14 +19,28 @@ #define TAD32_QUALITY_DEFAULT 3 #define TAD32_ZSTD_LEVEL 15 +/** + * Convert quality level (0-5) to max_index for quantization + * Quality 0 = very low quality, small file (max_index=7, 3-bit) + * Quality 1 = low quality (max_index=15, 4-bit) + * Quality 2 = medium quality (max_index=31, 5-bit) + * Quality 3 = good quality (max_index=63, 6-bit) [DEFAULT] + * Quality 4 = high quality (max_index=127, 7-bit) + * Quality 5 = very high quality (max_index=255, 8-bit) + */ +static inline int tad32_quality_to_max_index(int quality) { + static const int quality_map[6] = {31, 35, 39, 47, 56, 89}; + if (quality < 0) quality = 0; + if (quality > 5) quality = 5; + return quality_map[quality]; +} + /** * Encode audio chunk with TAD32 codec (PCM32f version) * * @param pcm32_stereo Input PCM32fLE stereo samples (interleaved L,R) * @param num_samples Number of samples per channel (min 1024) - * @param quant_bits Quantization bits 4-12 (default: 7) - * @param use_zstd 1=enable Zstd compression, 0=disable - * @param use_twobitmap 1=enable twobitmap encoding, 0=raw int8_t storage + * @param max_index Maximum quantization index (7=3bit, 15=4bit, 31=5bit, 63=6bit, 127=7bit) * @param quantiser_scale Quantiser scaling factor (1.0=baseline, 2.0=2x coarser quantization) * Higher values = more aggressive quantization = smaller files * @param output Output buffer (must be large enough) @@ -34,12 +48,12 @@ * * Output format: * uint16 sample_count (samples per channel) - * uint8 quant_bits (quantization bits used) + * uint8 max_index (maximum quantization index) * uint32 payload_size (bytes in payload) - * * payload (encoded M/S data, optionally Zstd-compressed) + * * payload (encoded M/S data, Zstd-compressed with 2-bit twobitmap) */ size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples, - int quant_bits, + int max_index, float quantiser_scale, uint8_t *output); /** diff --git a/video_encoder/encoder_tad_standalone.c b/video_encoder/encoder_tad_standalone.c index 1bf7ba5..1d1b4eb 100644 --- a/video_encoder/encoder_tad_standalone.c +++ b/video_encoder/encoder_tad_standalone.c @@ -48,13 +48,11 @@ static void print_usage(const char *prog_name) { printf("Options:\n"); printf(" -i Input audio file (any format supported by FFmpeg)\n"); printf(" -o Output TAD32 file (optional, auto-generated as input.qN.tad)\n"); - printf(" -q Quantization bits (default: 7, range: 4-8)\n"); + printf(" -q Positive side quantization steps (default: 47, range: up to 127)\n"); printf(" Higher = more precision, larger files\n"); printf(" -s Quantiser scaling factor (default: 1.0, range: 0.5-4.0)\n"); printf(" Higher = more aggressive quantization, smaller files\n"); printf(" 2.0 = quantize 2x coarser than baseline\n"); - printf(" --no-zstd Disable Zstd compression\n"); - printf(" --no-twobitmap Disable twobitmap encoding (use raw int8_t storage)\n"); printf(" -v Verbose output\n"); printf(" -h, --help Show this help\n"); printf("\nVersion: %s\n", ENCODER_VENDOR_STRING); @@ -67,7 +65,7 @@ int main(int argc, char *argv[]) { char *input_file = NULL; char *output_file = NULL; - int max_index = 7; // Default QUANT_BITS + int max_index = 47; // Default QUANT_BITS float quantiser_scale = 1.0f; // Default quantiser scaling int verbose = 0; diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 2584aa5..f5a7a23 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -18,7 +18,7 @@ #include #include -#define ENCODER_VENDOR_STRING "Encoder-TAV 20251024 (3d-dwt,tad)" +#define ENCODER_VENDOR_STRING "Encoder-TAV 20251030 (3d-dwt,tad)" // TSVM Advanced Video (TAV) format constants #define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVM TAV" @@ -8996,11 +8996,15 @@ static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int sample if (tad_quality > TAD32_QUALITY_MAX) tad_quality = TAD32_QUALITY_MAX; if (tad_quality < TAD32_QUALITY_MIN) tad_quality = TAD32_QUALITY_MIN; + // Convert quality (0-5) to max_index for quantization + int max_index = tad32_quality_to_max_index(tad_quality); + float quantiser_scale = 1.0f; // Baseline quantizer scaling + // Allocate output buffer (generous size for TAD chunk) size_t max_output_size = samples_to_read * 4 * sizeof(int16_t) + 1024; uint8_t *tad_output = malloc(max_output_size); - size_t tad_encoded_size = tad32_encode_chunk(pcm32_buffer, samples_to_read, tad_quality, 1, tad_output); + size_t tad_encoded_size = tad32_encode_chunk(pcm32_buffer, samples_to_read, max_index, quantiser_scale, tad_output); if (tad_encoded_size == 0) { fprintf(stderr, "Error: TAD32 encoding failed\n"); @@ -9009,10 +9013,12 @@ static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int sample return 0; } - // Parse TAD chunk format: [sample_count][payload_size][payload] + // Parse TAD chunk format: [sample_count][quantisation index][payload_size][payload] uint8_t *read_ptr = tad_output; uint16_t sample_count = *((uint16_t*)read_ptr); read_ptr += sizeof(uint16_t); + uint8_t quant_size = *((uint8_t*)read_ptr); + read_ptr += sizeof(uint8_t); uint32_t tad_payload_size = *((uint32_t*)read_ptr); read_ptr += sizeof(uint32_t); uint8_t *tad_payload = read_ptr; @@ -9022,10 +9028,11 @@ static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int sample fwrite(&packet_type, 1, 1, output); uint32_t tav_payload_size = (uint32_t)tad_payload_size; - uint32_t tav_payload_size_plus_6 = (uint32_t)tad_payload_size + 6; + uint32_t tav_payload_size_plus_6 = (uint32_t)tad_payload_size + 7; fwrite(&sample_count, sizeof(uint16_t), 1, output); fwrite(&tav_payload_size_plus_6, sizeof(uint32_t), 1, output); fwrite(&sample_count, sizeof(uint16_t), 1, output); + fwrite(&quant_size, sizeof(uint8_t), 1, output); fwrite(&tav_payload_size, sizeof(uint32_t), 1, output); fwrite(tad_payload, 1, tad_payload_size, output); @@ -10579,10 +10586,12 @@ int main(int argc, char *argv[]) { break; case 1027: // --pcm8-audio enc->pcm8_audio = 1; + enc->tad_audio = 0; printf("8-bit PCM audio mode enabled (packet 0x21)\n"); break; case 1028: // --tad-audio enc->tad_audio = 1; + enc->pcm8_audio = 0; printf("TAD audio mode enabled (packet 0x24, quality follows -q)\n"); break; case 1050: // --single-pass @@ -10659,7 +10668,7 @@ int main(int argc, char *argv[]) { // if temporal-dwt is used, and user did not select suitable audio codec, force PCMu8 (or TAD when it's production-ready) if (enc->enable_temporal_dwt && !enc->pcm8_audio && !enc->tad_audio) { - enc->pcm8_audio = 1; // TODO replace with tad_audio when it's production-ready + enc->tad_audio = 1; } if ((!enc->input_file && !enc->test_mode) || !enc->output_file) { diff --git a/video_encoder/tav_inspector.c b/video_encoder/tav_inspector.c index f8be00e..2b49907 100644 --- a/video_encoder/tav_inspector.c +++ b/video_encoder/tav_inspector.c @@ -738,14 +738,23 @@ static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, un case TAV_PACKET_AUDIO_TAD: { stats.audio_count++; stats.audio_tad_count++; - // Read payload_size + 2 - uint32_t payload_size_plus_6; - if (fread(&payload_size_plus_6, sizeof(uint32_t), 1, fp) != 1) break; + + // Read sample count + uint16_t sample_count0; + if (fread(&sample_count0, sizeof(uint16_t), 1, fp) != 1) break; + + // Read payload_size + 7 + uint32_t payload_size_plus_7; + if (fread(&payload_size_plus_7, sizeof(uint32_t), 1, fp) != 1) break; // Read sample count uint16_t sample_count; if (fread(&sample_count, sizeof(uint16_t), 1, fp) != 1) break; + // Read quantiser index + uint8_t quantiser; + if (fread(&quantiser, sizeof(uint8_t), 1, fp) != 1) break; + // Read compressed size uint32_t compressed_size; if (fread(&compressed_size, sizeof(uint32_t), 1, fp) != 1) break; @@ -754,8 +763,8 @@ static const char* VERDESC[] = {"null", "YCoCg tiled, uniform", "ICtCp tiled, un stats.audio_tad_bytes += compressed_size; if (!opts.summary_only && display) { - printf(" - samples=%u, size=%u bytes (zstd compressed TAD32)", - sample_count, compressed_size); + printf(" - samples=%u, size=%u bytes, quantiser=%u steps (index %u)", + sample_count, compressed_size, quantiser * 2 + 1, quantiser); } // Skip compressed data