From ad232d1c8458f703e580396c4a98644b31cca125 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Tue, 7 Oct 2025 03:55:56 +0900 Subject: [PATCH] TAV: twobitmap for better compression --- assets/disk0/tvdos/bin/playtav.js | 3 +- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 128 ++++++++----- video_encoder/encoder_tav.c | 174 ++++++------------ 3 files changed, 141 insertions(+), 164 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 83d29e0..c0793a1 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -99,8 +99,7 @@ audio.setPcmMode(0) audio.setMasterVolume(0, 255) // set colour zero as half-opaque black -graphics.setPalette(0, 0, 0, 0, 9) - +graphics.setPalette(0, 0, 0, 0, 7) function processSubtitlePacket(packetSize) { diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 53414dc..d0c6bdb 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4051,10 +4051,12 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Variable channel layout postprocessing for concatenated maps + // Significance Map v2.1 (twobit-map): 2 bits per coefficient + // 00=zero, 01=+1, 10=-1, 11=other (stored as int16) private fun postprocessCoefficientsVariableLayout(compressedData: ByteArray, compressedOffset: Int, coeffCount: Int, channelLayout: Int, outputY: ShortArray?, outputCo: ShortArray?, outputCg: ShortArray?, outputAlpha: ShortArray?) { - val mapBytes = (coeffCount + 7) / 8 + val mapBytes = (coeffCount * 2 + 7) / 8 // 2 bits per coefficient // Determine active channels based on layout (bit-field design) val hasY = channelLayout and 4 == 0 // bit 2 inverted: 0 means has luma @@ -4077,28 +4079,43 @@ class GraphicsJSR223Delegate(private val vm: VM) { val cgMapOffset = if (hasCg) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1 val alphaMapOffset = if (hasAlpha) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1 - // Count non-zeros for each active channel - var yNonZeros = 0 - var coNonZeros = 0 - var cgNonZeros = 0 - var alphaNonZeros = 0 + // Helper function to extract 2-bit code + fun getTwoBitCode(mapStart: Int, coeffIdx: Int): Int { + val bitPos = coeffIdx * 2 + val byteIdx = bitPos / 8 + val bitOffset = bitPos % 8 - for (i in 0 until coeffCount) { - val byteIdx = i / 8 - val bitIdx = i % 8 + val byte0 = compressedData[mapStart + byteIdx].toInt() and 0xFF + val code = (byte0 shr bitOffset) and 0x03 - if (hasY && yMapOffset >= 0 && (compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) yNonZeros++ - if (hasCo && coMapOffset >= 0 && (compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) coNonZeros++ - if (hasCg && cgMapOffset >= 0 && (compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) cgNonZeros++ - if (hasAlpha && alphaMapOffset >= 0 && (compressedData[alphaMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) alphaNonZeros++ + // Handle byte boundary crossing + return if (bitOffset == 7 && byteIdx + 1 < mapBytes) { + val byte1 = compressedData[mapStart + byteIdx + 1].toInt() and 0xFF + ((byte0 shr 7) and 0x01) or ((byte1 shl 1) and 0x02) + } else { + code + } } - // Calculate value array offsets + // Count "other" values (code 11) for each active channel + var yOthers = 0 + var coOthers = 0 + var cgOthers = 0 + var alphaOthers = 0 + + for (i in 0 until coeffCount) { + if (hasY && yMapOffset >= 0 && getTwoBitCode(yMapOffset, i) == 3) yOthers++ + if (hasCo && coMapOffset >= 0 && getTwoBitCode(coMapOffset, i) == 3) coOthers++ + if (hasCg && cgMapOffset >= 0 && getTwoBitCode(cgMapOffset, i) == 3) cgOthers++ + if (hasAlpha && alphaMapOffset >= 0 && getTwoBitCode(alphaMapOffset, i) == 3) alphaOthers++ + } + + // Calculate value array offsets (only for "other" values) var valueOffset = mapOffset - val yValuesOffset = if (hasY) { val offset = valueOffset; valueOffset += yNonZeros * 2; offset } else -1 - val coValuesOffset = if (hasCo) { val offset = valueOffset; valueOffset += coNonZeros * 2; offset } else -1 - val cgValuesOffset = if (hasCg) { val offset = valueOffset; valueOffset += cgNonZeros * 2; offset } else -1 - val alphaValuesOffset = if (hasAlpha) { val offset = valueOffset; valueOffset += alphaNonZeros * 2; offset } else -1 + val yValuesOffset = if (hasY) { val offset = valueOffset; valueOffset += yOthers * 2; offset } else -1 + val coValuesOffset = if (hasCo) { val offset = valueOffset; valueOffset += coOthers * 2; offset } else -1 + val cgValuesOffset = if (hasCg) { val offset = valueOffset; valueOffset += cgOthers * 2; offset } else -1 + val alphaValuesOffset = if (hasAlpha) { val offset = valueOffset; valueOffset += alphaOthers * 2; offset } else -1 // Reconstruct coefficients var yValueIdx = 0 @@ -4107,43 +4124,64 @@ class GraphicsJSR223Delegate(private val vm: VM) { var alphaValueIdx = 0 for (i in 0 until coeffCount) { - val byteIdx = i / 8 - val bitIdx = i % 8 - // Y channel - if (hasY && yMapOffset >= 0 && outputY != null && - (compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { - val valuePos = yValuesOffset + yValueIdx * 2 - outputY[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or - (compressedData[valuePos].toInt() and 0xFF)).toShort() - yValueIdx++ + if (hasY && yMapOffset >= 0 && outputY != null) { + when (getTwoBitCode(yMapOffset, i)) { + 0 -> outputY[i] = 0 // 00 = zero + 1 -> outputY[i] = 1 // 01 = +1 + 2 -> outputY[i] = -1 // 10 = -1 + 3 -> { // 11 = other (read int16) + val valuePos = yValuesOffset + yValueIdx * 2 + outputY[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or + (compressedData[valuePos].toInt() and 0xFF)).toShort() + yValueIdx++ + } + } } // Co channel - if (hasCo && coMapOffset >= 0 && outputCo != null && - (compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { - val valuePos = coValuesOffset + coValueIdx * 2 - outputCo[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or - (compressedData[valuePos].toInt() and 0xFF)).toShort() - coValueIdx++ + if (hasCo && coMapOffset >= 0 && outputCo != null) { + when (getTwoBitCode(coMapOffset, i)) { + 0 -> outputCo[i] = 0 + 1 -> outputCo[i] = 1 + 2 -> outputCo[i] = -1 + 3 -> { + val valuePos = coValuesOffset + coValueIdx * 2 + outputCo[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or + (compressedData[valuePos].toInt() and 0xFF)).toShort() + coValueIdx++ + } + } } // Cg channel - if (hasCg && cgMapOffset >= 0 && outputCg != null && - (compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { - val valuePos = cgValuesOffset + cgValueIdx * 2 - outputCg[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or - (compressedData[valuePos].toInt() and 0xFF)).toShort() - cgValueIdx++ + if (hasCg && cgMapOffset >= 0 && outputCg != null) { + when (getTwoBitCode(cgMapOffset, i)) { + 0 -> outputCg[i] = 0 + 1 -> outputCg[i] = 1 + 2 -> outputCg[i] = -1 + 3 -> { + val valuePos = cgValuesOffset + cgValueIdx * 2 + outputCg[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or + (compressedData[valuePos].toInt() and 0xFF)).toShort() + cgValueIdx++ + } + } } // Alpha channel - if (hasAlpha && alphaMapOffset >= 0 && outputAlpha != null && - (compressedData[alphaMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { - val valuePos = alphaValuesOffset + alphaValueIdx * 2 - outputAlpha[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or - (compressedData[valuePos].toInt() and 0xFF)).toShort() - alphaValueIdx++ + if (hasAlpha && alphaMapOffset >= 0 && outputAlpha != null) { + when (getTwoBitCode(alphaMapOffset, i)) { + 0 -> outputAlpha[i] = 0 + 1 -> outputAlpha[i] = 1 + 2 -> outputAlpha[i] = -1 + 3 -> { + val valuePos = alphaValuesOffset + alphaValueIdx * 2 + outputAlpha[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or + (compressedData[valuePos].toInt() and 0xFF)).toShort() + alphaValueIdx++ + } + } } } } diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 66371a0..8ed21c4 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -1247,104 +1247,34 @@ static void dwt_2d_forward_flexible(float *tile_data, int width, int height, int free(temp_col); } -// Preprocess coefficients using significance map for better compression -static size_t preprocess_coefficients(int16_t *coeffs, int coeff_count, uint8_t *output_buffer) { - // Count non-zero coefficients - int nonzero_count = 0; - for (int i = 0; i < coeff_count; i++) { - if (coeffs[i] != 0) nonzero_count++; - } - - // Create significance map (1 bit per coefficient, packed into bytes) - int map_bytes = (coeff_count + 7) / 8; // Round up to nearest byte - uint8_t *sig_map = output_buffer; - int16_t *values = (int16_t *)(output_buffer + map_bytes); - - // Clear significance map - memset(sig_map, 0, map_bytes); - - // Fill significance map and extract non-zero values - int value_idx = 0; - for (int i = 0; i < coeff_count; i++) { - if (coeffs[i] != 0) { - // Set bit in significance map - int byte_idx = i / 8; - int bit_idx = i % 8; - sig_map[byte_idx] |= (1 << bit_idx); - - // Store the value - values[value_idx++] = coeffs[i]; - } - } - - return map_bytes + (nonzero_count * sizeof(int16_t)); -} - -// Preprocess coefficients using concatenated significance maps for optimal cross-channel compression -static size_t preprocess_coefficients_concatenated(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, - int coeff_count, uint8_t *output_buffer) { - int map_bytes = (coeff_count + 7) / 8; - - // Count non-zeros per channel - int nonzero_y = 0, nonzero_co = 0, nonzero_cg = 0; - for (int i = 0; i < coeff_count; i++) { - if (coeffs_y[i] != 0) nonzero_y++; - if (coeffs_co[i] != 0) nonzero_co++; - if (coeffs_cg[i] != 0) nonzero_cg++; - } - - // Layout: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals] - uint8_t *y_map = output_buffer; - uint8_t *co_map = output_buffer + map_bytes; - uint8_t *cg_map = output_buffer + map_bytes * 2; - int16_t *y_values = (int16_t *)(output_buffer + map_bytes * 3); - int16_t *co_values = y_values + nonzero_y; - int16_t *cg_values = co_values + nonzero_co; - - // Clear significance maps - memset(y_map, 0, map_bytes); - memset(co_map, 0, map_bytes); - memset(cg_map, 0, map_bytes); - - // Fill significance maps and extract values - int y_idx = 0, co_idx = 0, cg_idx = 0; - for (int i = 0; i < coeff_count; i++) { - int byte_idx = i / 8; - int bit_idx = i % 8; - - if (coeffs_y[i] != 0) { - y_map[byte_idx] |= (1 << bit_idx); - y_values[y_idx++] = coeffs_y[i]; - } - - if (coeffs_co[i] != 0) { - co_map[byte_idx] |= (1 << bit_idx); - co_values[co_idx++] = coeffs_co[i]; - } - - if (coeffs_cg[i] != 0) { - cg_map[byte_idx] |= (1 << bit_idx); - cg_values[cg_idx++] = coeffs_cg[i]; - } - } - - return map_bytes * 3 + (nonzero_y + nonzero_co + nonzero_cg) * sizeof(int16_t); -} - // Variable channel layout preprocessing for concatenated maps +// Significance Map v2.1 (twobit-map): 2 bits per coefficient +// 00=zero, 01=+1, 10=-1, 11=other (stored as int16) static size_t preprocess_coefficients_variable_layout(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha, int coeff_count, int channel_layout, uint8_t *output_buffer) { const channel_layout_config_t *config = &channel_layouts[channel_layout]; - int map_bytes = (coeff_count + 7) / 8; + int map_bytes = (coeff_count * 2 + 7) / 8; // 2 bits per coefficient int total_maps = config->num_channels; - // Count non-zeros per active channel - int nonzero_counts[4] = {0}; // Y, Co, Cg, Alpha + // Count "other" values (not 0, +1, or -1) per active channel + int other_counts[4] = {0}; // Y, Co, Cg, Alpha for (int i = 0; i < coeff_count; i++) { - if (config->has_y && coeffs_y && coeffs_y[i] != 0) nonzero_counts[0]++; - if (config->has_co && coeffs_co && coeffs_co[i] != 0) nonzero_counts[1]++; - if (config->has_cg && coeffs_cg && coeffs_cg[i] != 0) nonzero_counts[2]++; - if (config->has_alpha && coeffs_alpha && coeffs_alpha[i] != 0) nonzero_counts[3]++; + if (config->has_y && coeffs_y) { + int16_t val = coeffs_y[i]; + if (val != 0 && val != 1 && val != -1) other_counts[0]++; + } + if (config->has_co && coeffs_co) { + int16_t val = coeffs_co[i]; + if (val != 0 && val != 1 && val != -1) other_counts[1]++; + } + if (config->has_cg && coeffs_cg) { + int16_t val = coeffs_cg[i]; + if (val != 0 && val != 1 && val != -1) other_counts[2]++; + } + if (config->has_alpha && coeffs_alpha) { + int16_t val = coeffs_alpha[i]; + if (val != 0 && val != 1 && val != -1) other_counts[3]++; + } } // Layout maps in order based on channel layout @@ -1355,48 +1285,58 @@ static size_t preprocess_coefficients_variable_layout(int16_t *coeffs_y, int16_t if (config->has_cg) maps[2] = output_buffer + map_bytes * map_idx++; if (config->has_alpha) maps[3] = output_buffer + map_bytes * map_idx++; - // Calculate value array positions + // Calculate value array positions (only for "other" values) int16_t *values[4]; int16_t *value_start = (int16_t *)(output_buffer + map_bytes * total_maps); int value_offset = 0; - if (config->has_y) { values[0] = value_start + value_offset; value_offset += nonzero_counts[0]; } - if (config->has_co) { values[1] = value_start + value_offset; value_offset += nonzero_counts[1]; } - if (config->has_cg) { values[2] = value_start + value_offset; value_offset += nonzero_counts[2]; } - if (config->has_alpha) { values[3] = value_start + value_offset; value_offset += nonzero_counts[3]; } + if (config->has_y) { values[0] = value_start + value_offset; value_offset += other_counts[0]; } + if (config->has_co) { values[1] = value_start + value_offset; value_offset += other_counts[1]; } + if (config->has_cg) { values[2] = value_start + value_offset; value_offset += other_counts[2]; } + if (config->has_alpha) { values[3] = value_start + value_offset; value_offset += other_counts[3]; } // Clear significance maps memset(output_buffer, 0, map_bytes * total_maps); - // Fill significance maps and extract values + // Fill twobit-maps and extract "other" values int value_indices[4] = {0}; + int16_t *channel_coeffs[4] = {coeffs_y, coeffs_co, coeffs_cg, coeffs_alpha}; + int channel_active[4] = {config->has_y, config->has_co, config->has_cg, config->has_alpha}; + for (int i = 0; i < coeff_count; i++) { - int byte_idx = i / 8; - int bit_idx = i % 8; + for (int ch = 0; ch < 4; ch++) { + if (!channel_active[ch] || !channel_coeffs[ch]) continue; - if (config->has_y && coeffs_y && coeffs_y[i] != 0) { - maps[0][byte_idx] |= (1 << bit_idx); - values[0][value_indices[0]++] = coeffs_y[i]; - } + int16_t val = channel_coeffs[ch][i]; + uint8_t code; - if (config->has_co && coeffs_co && coeffs_co[i] != 0) { - maps[1][byte_idx] |= (1 << bit_idx); - values[1][value_indices[1]++] = coeffs_co[i]; - } + if (val == 0) { + code = 0; // 00 + } else if (val == 1) { + code = 1; // 01 + } else if (val == -1) { + code = 2; // 10 + } else { + code = 3; // 11 + values[ch][value_indices[ch]++] = val; + } - if (config->has_cg && coeffs_cg && coeffs_cg[i] != 0) { - maps[2][byte_idx] |= (1 << bit_idx); - values[2][value_indices[2]++] = coeffs_cg[i]; - } + // Store 2-bit code (interleaved) + size_t bit_pos = i * 2; + size_t byte_idx = bit_pos / 8; + size_t bit_offset = bit_pos % 8; - if (config->has_alpha && coeffs_alpha && coeffs_alpha[i] != 0) { - maps[3][byte_idx] |= (1 << bit_idx); - values[3][value_indices[3]++] = coeffs_alpha[i]; + maps[ch][byte_idx] |= (code << bit_offset); + + // Handle byte boundary crossing + if (bit_offset == 7 && byte_idx + 1 < map_bytes) { + maps[ch][byte_idx + 1] |= (code >> 1); + } } } - // Return total size: maps + all non-zero values - int total_nonzeros = nonzero_counts[0] + nonzero_counts[1] + nonzero_counts[2] + nonzero_counts[3]; - return map_bytes * total_maps + total_nonzeros * sizeof(int16_t); + // Return total size: maps + all "other" values + int total_others = other_counts[0] + other_counts[1] + other_counts[2] + other_counts[3]; + return map_bytes * total_maps + total_others * sizeof(int16_t); } // Quantisation for DWT subbands with rate control