diff --git a/CLAUDE.md b/CLAUDE.md index 6e99451..493435e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -245,10 +245,11 @@ Original: [coeff_array] → [concatenated_significance_maps + nonzero_values] Concatenated Maps Layout: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals] (channel layout 0) -[Y_map][Y_vals] (channel layout 1) -[Y_map][Co_map][Cg_map][A_map][Y_vals][Co_vals][Cg_vals][A_map] (channel layout 2) -[Y_map][A_map][Y_vals][A_map] (channel layout 3) -[Co_map][Cg_map][Co_vals][Cg_map] (channel layout 4) +[Y_map][Co_map][Cg_map][A_map][Y_vals][Co_vals][Cg_vals][A_vals] (channel layout 1) +[Y_map][Y_vals] (channel layout 2) +[Y_map][A_map][Y_vals][A_vals] (channel layout 3) +[Co_map][Cg_map][Co_vals][Cg_vals] (channel layout 4) +[Co_map][Cg_map][A_map][Co_vals][Cg_vals][A_vals] (channel layout 5) (replace Y->I, Co->Ct, Cg->Cp for ICtCp colour space) diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index 04ea151..0be8573 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -25,6 +25,7 @@ const TAV_PACKET_IFRAME = 0x10 const TAV_PACKET_PFRAME = 0x11 const TAV_PACKET_AUDIO_MP2 = 0x20 const TAV_PACKET_SUBTITLE = 0x30 +const TAV_PACKET_SYNC_NTSC = 0xFE const TAV_PACKET_SYNC = 0xFF const TAV_FILE_HEADER_FIRST = 0x1F @@ -378,6 +379,7 @@ let header = { extraFlags: 0, videoFlags: 0, qualityLevel: 0, + channelLayout: 0, fileRole: 0 } @@ -414,10 +416,11 @@ header.qualityCg = seqread.readOneByte() header.extraFlags = seqread.readOneByte() header.videoFlags = seqread.readOneByte() header.qualityLevel = seqread.readOneByte() // the decoder expects biased value +header.channelLayout = seqread.readOneByte() header.fileRole = seqread.readOneByte() // Skip reserved bytes -seqread.skip(5) +seqread.skip(4) if (header.version < 1 || header.version > 6) { printerrln(`Error: Unsupported TAV version ${header.version}`) @@ -425,6 +428,19 @@ if (header.version < 1 || header.version > 6) { return } +// Helper function to decode channel layout name +function getChannelLayoutName(layout) { + switch (layout) { + case 0: return "Y-Co-Cg" + case 1: return "Y-Co-Cg-A" + case 2: return "Y-only" + case 3: return "Y-A" + case 4: return "Co-Cg" + case 5: return "Co-Cg-A" + default: return `Unknown (${layout})` + } +} + const hasAudio = (header.extraFlags & 0x01) !== 0 const hasSubtitles = (header.extraFlags & 0x02) !== 0 const progressiveTransmission = (header.extraFlags & 0x04) !== 0 @@ -446,6 +462,7 @@ console.log(`Total frames: ${header.totalFrames}`) console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ? "5/3 reversible" : header.waveletFilter === WAVELET_9_7_IRREVERSIBLE ? "9/7 irreversible" : header.waveletFilter === WAVELET_BIORTHOGONAL_13_7 ? "Biorthogonal 13/7" : header.waveletFilter === WAVELET_DD4 ? "DD-4" : header.waveletFilter === WAVELET_HAAR ? "Haar" : "unknown"}`) console.log(`Decomposition levels: ${header.decompLevels}`) console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`) +console.log(`Channel layout: ${getChannelLayoutName(header.channelLayout)}`) console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`) console.log(`Colour space: ${header.version === 2 ? "ICtCp" : "YCoCg-R"}`) console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`) @@ -599,13 +616,16 @@ function tryReadNextTAVHeader() { qualityCg: seqread.readOneByte(), extraFlags: seqread.readOneByte(), videoFlags: seqread.readOneByte(), - reserved: new Array(7) + qualityLevel: seqread.readOneByte(), + channelLayout: seqread.readOneByte(), + fileRole: seqread.readOneByte(), + reserved: new Array(4) } serial.println("File header: " + JSON.stringify(newHeader)) // Skip reserved bytes - for (let i = 0; i < 7; i++) { + for (let i = 0; i < 4; i++) { seqread.readOneByte() } @@ -670,10 +690,13 @@ try { break } - if (packetType === TAV_PACKET_SYNC) { + if (packetType === TAV_PACKET_SYNC || packetType == TAV_PACKET_SYNC_NTSC) { // Sync packet - no additional data akku -= FRAME_TIME - frameCount++ + if (packetType == TAV_PACKET_SYNC) { + frameCount++ + } + trueFrameCount++ // Swap ping-pong buffers instead of expensive memcpy (752KB copy eliminated!) @@ -701,7 +724,8 @@ try { CURRENT_RGB_ADDR, PREV_RGB_ADDR, // RGB buffer pointers header.width, header.height, header.qualityLevel, header.qualityY, header.qualityCo, header.qualityCg, - frameCount, + header.channelLayout, // Channel layout for variable processing + trueFrameCount, header.waveletFilter, // TAV-specific parameter header.decompLevels, // TAV-specific parameter isLossless, @@ -713,7 +737,7 @@ try { // Upload RGB buffer to display framebuffer (like TEV) let uploadStart = sys.nanoTime() - graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, frameCount, false) + graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, trueFrameCount, false) uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 // Defer audio playback until a first frame is sent diff --git a/terranmon.txt b/terranmon.txt index aaffb18..617aa57 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -920,12 +920,14 @@ transmission capability, and region-of-interest coding. - bit 2 = is lossless mode (shorthand for `-q 5 -Q1,1,1 -w 0`) - bit 3 = has region-of-interest coding (for still images only) uint8 Encoder quality level (stored with bias of 1 (q0=1); used to derive anisotropy value) - uint8 Channel layout - - 0 = Y-Co-Cg/I-Ct-Cp - - 1 = Y/I only - - 2 = Y-Co-Cg-A/I-Ct-Cp-A - - 3 = Y-A/I-A - - 4 = Co-Cg/Ct-Cp + uint8 Channel layout (bit-field: bit 0=has alpha, bit 1=has chroma inverted, bit 2=has luma inverted) + - 0 = Y-Co-Cg/I-Ct-Cp (000: no alpha, has chroma, has luma) + - 1 = Y-Co-Cg-A/I-Ct-Cp-A (001: has alpha, has chroma, has luma) + - 2 = Y/I only (010: no alpha, no chroma, has luma) + - 3 = Y-A/I-A (011: has alpha, no chroma, has luma) + - 4 = Co-Cg/Ct-Cp (100: no alpha, has chroma, no luma) + - 5 = Co-Cg-A/Ct-Cp-A (101: has alpha, has chroma, no luma) + - 6-7 = Reserved/invalid (would indicate no luma and no chroma) uint8 Reserved[4]: fill with zeros uint8 File Role - 0 = generic diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index c95c9a9..1b2cf77 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -3943,71 +3943,111 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Postprocess coefficients from concatenated significance maps format (current - optimal) - private fun postprocessCoefficientsConcatenated(compressedData: ByteArray, compressedOffset: Int, coeffCount: Int, - outputY: ShortArray, outputCo: ShortArray, outputCg: ShortArray) { + // Channel layout constants (bit-field design) + companion object { + const val CHANNEL_LAYOUT_YCOCG = 0 // Y-Co-Cg (000: no alpha, has chroma, has luma) + const val CHANNEL_LAYOUT_YCOCG_A = 1 // Y-Co-Cg-A (001: has alpha, has chroma, has luma) + const val CHANNEL_LAYOUT_Y_ONLY = 2 // Y only (010: no alpha, no chroma, has luma) + const val CHANNEL_LAYOUT_Y_A = 3 // Y-A (011: has alpha, no chroma, has luma) + const val CHANNEL_LAYOUT_COCG = 4 // Co-Cg (100: no alpha, has chroma, no luma) + const val CHANNEL_LAYOUT_COCG_A = 5 // Co-Cg-A (101: has alpha, has chroma, no luma) + } + + // Variable channel layout postprocessing for concatenated maps + private fun postprocessCoefficientsVariableLayout(compressedData: ByteArray, compressedOffset: Int, coeffCount: Int, + channelLayout: Int, outputY: ShortArray?, outputCo: ShortArray?, + outputCg: ShortArray?, outputAlpha: ShortArray?) { val mapBytes = (coeffCount + 7) / 8 + // Determine active channels based on layout (bit-field design) + val hasY = channelLayout and 4 == 0 // bit 2 inverted: 0 means has luma + val hasCo = channelLayout and 2 == 0 // bit 1 inverted: 0 means has chroma + val hasCg = channelLayout and 2 == 0 // bit 1 inverted: 0 means has chroma (same as Co) + val hasAlpha = channelLayout and 1 != 0 // bit 0: 1 means has alpha + // Clear output arrays - outputY.fill(0) - outputCo.fill(0) - outputCg.fill(0) + outputY?.fill(0) + outputCo?.fill(0) + outputCg?.fill(0) + outputAlpha?.fill(0) - // Extract significance maps: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals] - val yMapOffset = compressedOffset - val coMapOffset = compressedOffset + mapBytes - val cgMapOffset = compressedOffset + mapBytes * 2 + var mapOffset = compressedOffset + var mapIndex = 0 - // Count non-zeros in each channel to determine value array boundaries + // Map offsets for active channels + val yMapOffset = if (hasY) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1 + val coMapOffset = if (hasCo) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1 + val cgMapOffset = if (hasCg) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1 + val alphaMapOffset = if (hasAlpha) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1 + + // Count non-zeros for each active channel var yNonZeros = 0 var coNonZeros = 0 var cgNonZeros = 0 + var alphaNonZeros = 0 for (i in 0 until coeffCount) { val byteIdx = i / 8 val bitIdx = i % 8 - if ((compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) yNonZeros++ - if ((compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) coNonZeros++ - if ((compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) cgNonZeros++ + if (hasY && yMapOffset >= 0 && (compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) yNonZeros++ + if (hasCo && coMapOffset >= 0 && (compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) coNonZeros++ + if (hasCg && cgMapOffset >= 0 && (compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) cgNonZeros++ + if (hasAlpha && alphaMapOffset >= 0 && (compressedData[alphaMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) alphaNonZeros++ } // Calculate value array offsets - val yValuesOffset = compressedOffset + mapBytes * 3 - val coValuesOffset = yValuesOffset + yNonZeros * 2 - val cgValuesOffset = coValuesOffset + coNonZeros * 2 + var valueOffset = mapOffset + val yValuesOffset = if (hasY) { val offset = valueOffset; valueOffset += yNonZeros * 2; offset } else -1 + val coValuesOffset = if (hasCo) { val offset = valueOffset; valueOffset += coNonZeros * 2; offset } else -1 + val cgValuesOffset = if (hasCg) { val offset = valueOffset; valueOffset += cgNonZeros * 2; offset } else -1 + val alphaValuesOffset = if (hasAlpha) { val offset = valueOffset; valueOffset += alphaNonZeros * 2; offset } else -1 - // Extract coefficients using significance maps + // Reconstruct coefficients var yValueIdx = 0 var coValueIdx = 0 var cgValueIdx = 0 + var alphaValueIdx = 0 for (i in 0 until coeffCount) { val byteIdx = i / 8 val bitIdx = i % 8 // Y channel - if ((compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { - val valueOffset = yValuesOffset + yValueIdx * 2 - outputY[i] = (((compressedData[valueOffset + 1].toInt() and 0xFF) shl 8) or - (compressedData[valueOffset].toInt() and 0xFF)).toShort() + if (hasY && yMapOffset >= 0 && outputY != null && + (compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { + val valuePos = yValuesOffset + yValueIdx * 2 + outputY[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or + (compressedData[valuePos].toInt() and 0xFF)).toShort() yValueIdx++ } // Co channel - if ((compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { - val valueOffset = coValuesOffset + coValueIdx * 2 - outputCo[i] = (((compressedData[valueOffset + 1].toInt() and 0xFF) shl 8) or - (compressedData[valueOffset].toInt() and 0xFF)).toShort() + if (hasCo && coMapOffset >= 0 && outputCo != null && + (compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { + val valuePos = coValuesOffset + coValueIdx * 2 + outputCo[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or + (compressedData[valuePos].toInt() and 0xFF)).toShort() coValueIdx++ } // Cg channel - if ((compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { - val valueOffset = cgValuesOffset + cgValueIdx * 2 - outputCg[i] = (((compressedData[valueOffset + 1].toInt() and 0xFF) shl 8) or - (compressedData[valueOffset].toInt() and 0xFF)).toShort() + if (hasCg && cgMapOffset >= 0 && outputCg != null && + (compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { + val valuePos = cgValuesOffset + cgValueIdx * 2 + outputCg[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or + (compressedData[valuePos].toInt() and 0xFF)).toShort() cgValueIdx++ } + + // Alpha channel + if (hasAlpha && alphaMapOffset >= 0 && outputAlpha != null && + (compressedData[alphaMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) { + val valuePos = alphaValuesOffset + alphaValueIdx * 2 + outputAlpha[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or + (compressedData[valuePos].toInt() and 0xFF)).toShort() + alphaValueIdx++ + } } } @@ -4261,8 +4301,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { // New tavDecode function that accepts compressed data and decompresses internally fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long, - width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, frameCount: Int, - waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) { + width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int, + frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) { // Read compressed data from VM memory into byte array val compressedData = ByteArray(compressedSize) @@ -4291,8 +4331,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Call the existing tavDecode function with decompressed data tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr, - width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, frameCount, - waveletFilter, decompLevels, isLossless, tavVersion) + width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout, + frameCount, waveletFilter, decompLevels, isLossless, tavVersion) } finally { // Clean up allocated buffer @@ -4307,8 +4347,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Original tavDecode function for backward compatibility (now handles decompressed data) fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, - width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, frameCount: Int, - waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) { + width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int, + frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) { tavDebugCurrentFrameNumber = frameCount @@ -4355,13 +4395,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { } 0x01 -> { // TAV_MODE_INTRA // Decode DWT coefficients directly to RGB buffer - readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, readPtr, tileX, tileY, currentRGBAddr, + readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock) } 0x02 -> { // TAV_MODE_DELTA // Coefficient delta encoding for efficient P-frames - readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr, + readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock) } @@ -4374,7 +4414,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, + private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long { // Determine coefficient count based on mode @@ -4430,15 +4470,26 @@ class GraphicsJSR223Delegate(private val vm: VM) { return count } - // Use concatenated maps format: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals] - postprocessCoefficientsConcatenated(coeffBuffer, 0, coeffCount, quantisedY, quantisedCo, quantisedCg) + // Use variable channel layout concatenated maps format + postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, quantisedY, quantisedCo, quantisedCg, null) - // Calculate total size for concatenated format - val totalMapSize = mapBytes * 3 - val yNonZeros = countNonZerosInMapConcatenated(0, mapBytes) - val coNonZeros = countNonZerosInMapConcatenated(mapBytes, mapBytes) - val cgNonZeros = countNonZerosInMapConcatenated(mapBytes * 2, mapBytes) - val totalValueSize = (yNonZeros + coNonZeros + cgNonZeros) * 2 + // Calculate total size for variable channel layout format + val numChannels = when (channelLayout) { + CHANNEL_LAYOUT_YCOCG -> 3 // Y-Co-Cg + CHANNEL_LAYOUT_YCOCG_A -> 4 // Y-Co-Cg-A + CHANNEL_LAYOUT_Y_ONLY -> 1 // Y only + CHANNEL_LAYOUT_Y_A -> 2 // Y-A + CHANNEL_LAYOUT_COCG -> 2 // Co-Cg + CHANNEL_LAYOUT_COCG_A -> 3 // Co-Cg-A + else -> 3 // fallback to Y-Co-Cg + } + + val totalMapSize = mapBytes * numChannels + var totalNonZeros = 0 + for (ch in 0 until numChannels) { + totalNonZeros += countNonZerosInMapConcatenated(mapBytes * ch, mapBytes) + } + val totalValueSize = totalNonZeros * 2 ptr += (totalMapSize + totalValueSize) @@ -4993,7 +5044,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { return 1.0f - (1.0f / (0.5f * qualityLevel * qualityLevel + 1.0f)) * (level - 4.0f) } - private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, + private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long { @@ -5060,15 +5111,26 @@ class GraphicsJSR223Delegate(private val vm: VM) { return count } - // Use concatenated maps format for deltas: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals] - postprocessCoefficientsConcatenated(coeffBuffer, 0, coeffCount, deltaY, deltaCo, deltaCg) + // Use variable channel layout concatenated maps format for deltas + postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, deltaY, deltaCo, deltaCg, null) - // Calculate total size for concatenated format - val totalMapSize = mapBytes * 3 - val yNonZeros = countNonZerosInMapConcatenated(0, mapBytes) - val coNonZeros = countNonZerosInMapConcatenated(mapBytes, mapBytes) - val cgNonZeros = countNonZerosInMapConcatenated(mapBytes * 2, mapBytes) - val totalValueSize = (yNonZeros + coNonZeros + cgNonZeros) * 2 + // Calculate total size for variable channel layout format (deltas) + val numChannels = when (channelLayout) { + CHANNEL_LAYOUT_YCOCG -> 3 // Y-Co-Cg + CHANNEL_LAYOUT_YCOCG_A -> 4 // Y-Co-Cg-A + CHANNEL_LAYOUT_Y_ONLY -> 1 // Y only + CHANNEL_LAYOUT_Y_A -> 2 // Y-A + CHANNEL_LAYOUT_COCG -> 2 // Co-Cg + CHANNEL_LAYOUT_COCG_A -> 3 // Co-Cg-A + else -> 3 // fallback to Y-Co-Cg + } + + val totalMapSize = mapBytes * numChannels + var totalNonZeros = 0 + for (ch in 0 until numChannels) { + totalNonZeros += countNonZerosInMapConcatenated(mapBytes * ch, mapBytes) + } + val totalValueSize = totalNonZeros * 2 ptr += (totalMapSize + totalValueSize) diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c index f6c0c09..ed5d5dd 100644 --- a/video_encoder/decoder_tav.c +++ b/video_encoder/decoder_tav.c @@ -114,8 +114,9 @@ typedef struct { uint8_t extra_flags; uint8_t video_flags; uint8_t encoder_quality; + uint8_t channel_layout; uint8_t file_role; - uint8_t reserved[5]; + uint8_t reserved[4]; } __attribute__((packed)) tav_header_t; // Decoder state diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 2b5e4e3..00d611f 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -41,6 +41,7 @@ #define TAV_PACKET_PFRAME 0x11 // Predicted frame #define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio #define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet +#define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet #define TAV_PACKET_SYNC 0xFF // Sync packet // DWT settings @@ -62,6 +63,32 @@ #define WAVELET_DD4 16 // Four-point interpolating Deslauriers-Dubuc (DD-4) #define WAVELET_HAAR 255 // Haar wavelet (simplest wavelet transform) +// Channel layout definitions (bit-field design) +// Bit 0: has alpha, Bit 1: has chroma (inverted), Bit 2: has luma (inverted) +#define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg/I-Ct-Cp (000: no alpha, has chroma, has luma) +#define CHANNEL_LAYOUT_YCOCG_A 1 // Y-Co-Cg-A/I-Ct-Cp-A (001: has alpha, has chroma, has luma) +#define CHANNEL_LAYOUT_Y_ONLY 2 // Y/I only (010: no alpha, no chroma, has luma) +#define CHANNEL_LAYOUT_Y_A 3 // Y-A/I-A (011: has alpha, no chroma, has luma) +#define CHANNEL_LAYOUT_COCG 4 // Co-Cg/Ct-Cp (100: no alpha, has chroma, no luma) +#define CHANNEL_LAYOUT_COCG_A 5 // Co-Cg-A/Ct-Cp-A (101: has alpha, has chroma, no luma) + +// Channel layout configuration structure +typedef struct { + int layout_id; + int num_channels; + const char* channels[4]; // channel names for display + int has_y, has_co, has_cg, has_alpha; +} channel_layout_config_t; + +static const channel_layout_config_t channel_layouts[] = { + {CHANNEL_LAYOUT_YCOCG, 3, {"Y", "Co", "Cg", NULL}, 1, 1, 1, 0}, // 0: Y-Co-Cg + {CHANNEL_LAYOUT_YCOCG_A, 4, {"Y", "Co", "Cg", "A"}, 1, 1, 1, 1}, // 1: Y-Co-Cg-A + {CHANNEL_LAYOUT_Y_ONLY, 1, {"Y", NULL, NULL, NULL}, 1, 0, 0, 0}, // 2: Y only + {CHANNEL_LAYOUT_Y_A, 2, {"Y", NULL, NULL, "A"}, 1, 0, 0, 1}, // 3: Y-A + {CHANNEL_LAYOUT_COCG, 2, {NULL, "Co", "Cg", NULL}, 0, 1, 1, 0}, // 4: Co-Cg + {CHANNEL_LAYOUT_COCG_A, 3, {NULL, "Co", "Cg", "A"}, 0, 1, 1, 1} // 5: Co-Cg-A +}; + // Default settings #define DEFAULT_WIDTH 560 #define DEFAULT_HEIGHT 448 @@ -224,6 +251,7 @@ typedef struct { int intra_only; // Force all tiles to use INTRA mode (disable delta encoding) int monoblock; // Single DWT tile mode (encode entire frame as one tile) int perceptual_tuning; // 1 = perceptual quantisation (default), 0 = uniform quantisation + int channel_layout; // Channel layout: 0=Y-Co-Cg, 1=Y-only, 2=Y-Co-Cg-A, 3=Y-A, 4=Co-Cg // Frame buffers - ping-pong implementation uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous @@ -352,6 +380,7 @@ static void show_usage(const char *program_name) { printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 1-255 for each channel (1: lossless, 255: potato)\n"); printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible, 2=DD-4 (default: 1)\n"); // printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n"); + printf(" -c, --channel-layout N Channel layout: 0=Y-Co-Cg, 1=Y-Co-Cg-A, 2=Y-only, 3=Y-A, 4=Co-Cg, 5=Co-Cg-A (default: 0)\n"); printf(" --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n"); printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n"); printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); @@ -420,6 +449,7 @@ static tav_encoder_t* create_encoder(void) { enc->intra_only = 0; enc->monoblock = 1; // Default to monoblock mode enc->perceptual_tuning = 1; // Default to perceptual quantisation (versions 5/6) + enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Default to Y-Co-Cg enc->audio_bitrate = 0; // 0 = use quality table enc->encode_limit = 0; // Default: no frame limit @@ -1041,6 +1071,74 @@ static size_t preprocess_coefficients_concatenated(int16_t *coeffs_y, int16_t *c return map_bytes * 3 + (nonzero_y + nonzero_co + nonzero_cg) * sizeof(int16_t); } +// Variable channel layout preprocessing for concatenated maps +static size_t preprocess_coefficients_variable_layout(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha, + int coeff_count, int channel_layout, uint8_t *output_buffer) { + const channel_layout_config_t *config = &channel_layouts[channel_layout]; + int map_bytes = (coeff_count + 7) / 8; + int total_maps = config->num_channels; + + // Count non-zeros per active channel + int nonzero_counts[4] = {0}; // Y, Co, Cg, Alpha + for (int i = 0; i < coeff_count; i++) { + if (config->has_y && coeffs_y && coeffs_y[i] != 0) nonzero_counts[0]++; + if (config->has_co && coeffs_co && coeffs_co[i] != 0) nonzero_counts[1]++; + if (config->has_cg && coeffs_cg && coeffs_cg[i] != 0) nonzero_counts[2]++; + if (config->has_alpha && coeffs_alpha && coeffs_alpha[i] != 0) nonzero_counts[3]++; + } + + // Layout maps in order based on channel layout + uint8_t *maps[4]; + int map_idx = 0; + if (config->has_y) maps[0] = output_buffer + map_bytes * map_idx++; + if (config->has_co) maps[1] = output_buffer + map_bytes * map_idx++; + if (config->has_cg) maps[2] = output_buffer + map_bytes * map_idx++; + if (config->has_alpha) maps[3] = output_buffer + map_bytes * map_idx++; + + // Calculate value array positions + int16_t *values[4]; + int16_t *value_start = (int16_t *)(output_buffer + map_bytes * total_maps); + int value_offset = 0; + if (config->has_y) { values[0] = value_start + value_offset; value_offset += nonzero_counts[0]; } + if (config->has_co) { values[1] = value_start + value_offset; value_offset += nonzero_counts[1]; } + if (config->has_cg) { values[2] = value_start + value_offset; value_offset += nonzero_counts[2]; } + if (config->has_alpha) { values[3] = value_start + value_offset; value_offset += nonzero_counts[3]; } + + // Clear significance maps + memset(output_buffer, 0, map_bytes * total_maps); + + // Fill significance maps and extract values + int value_indices[4] = {0}; + for (int i = 0; i < coeff_count; i++) { + int byte_idx = i / 8; + int bit_idx = i % 8; + + if (config->has_y && coeffs_y && coeffs_y[i] != 0) { + maps[0][byte_idx] |= (1 << bit_idx); + values[0][value_indices[0]++] = coeffs_y[i]; + } + + if (config->has_co && coeffs_co && coeffs_co[i] != 0) { + maps[1][byte_idx] |= (1 << bit_idx); + values[1][value_indices[1]++] = coeffs_co[i]; + } + + if (config->has_cg && coeffs_cg && coeffs_cg[i] != 0) { + maps[2][byte_idx] |= (1 << bit_idx); + values[2][value_indices[2]++] = coeffs_cg[i]; + } + + if (config->has_alpha && coeffs_alpha && coeffs_alpha[i] != 0) { + maps[3][byte_idx] |= (1 << bit_idx); + values[3][value_indices[3]++] = coeffs_alpha[i]; + } + } + + // Return total size: maps + all non-zero values + int total_nonzeros = nonzero_counts[0] + nonzero_counts[1] + nonzero_counts[2] + nonzero_counts[3]; + return map_bytes * total_maps + total_nonzeros * sizeof(int16_t); +} + // Quantisation for DWT subbands with rate control static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) { float effective_q = quantiser; @@ -1362,9 +1460,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, printf("\n"); }*/ - // Preprocess and write quantised coefficients using concatenated significance maps for optimal compression - size_t total_compressed_size = preprocess_coefficients_concatenated(quantised_y, quantised_co, quantised_cg, - tile_size, buffer + offset); + // Preprocess and write quantised coefficients using variable channel layout concatenated significance maps + size_t total_compressed_size = preprocess_coefficients_variable_layout(quantised_y, quantised_co, quantised_cg, NULL, + tile_size, enc->channel_layout, buffer + offset); offset += total_compressed_size; // DEBUG: Dump raw DWT coefficients for frame ~60 when it's an intra-frame @@ -1831,9 +1929,10 @@ static int write_tav_header(tav_encoder_t *enc) { fputc(video_flags, enc->output_fp); fputc(enc->quality_level+1, enc->output_fp); + fputc(enc->channel_layout, enc->output_fp); - // Reserved bytes (6 bytes) - for (int i = 0; i < 6; i++) { + // Reserved bytes (5 bytes - one used for channel layout) + for (int i = 0; i < 5; i++) { fputc(0, enc->output_fp); } @@ -2773,6 +2872,7 @@ int main(int argc, char *argv[]) { {"quantiser", required_argument, 0, 'Q'}, {"quantiser", required_argument, 0, 'Q'}, {"wavelet", required_argument, 0, 'w'}, + {"channel-layout", required_argument, 0, 'c'}, {"bitrate", required_argument, 0, 'b'}, {"arate", required_argument, 0, 1400}, {"subtitle", required_argument, 0, 'S'}, @@ -2789,7 +2889,7 @@ int main(int argc, char *argv[]) { }; int c, option_index = 0; - while ((c = getopt_long(argc, argv, "i:o:s:f:q:Q:w:d:b:pS:vt", long_options, &option_index)) != -1) { + while ((c = getopt_long(argc, argv, "i:o:s:f:q:Q:w:c:d:b:pS:vt", long_options, &option_index)) != -1) { switch (c) { case 'i': enc->input_file = strdup(optarg); @@ -2824,6 +2924,21 @@ int main(int argc, char *argv[]) { case 'w': enc->wavelet_filter = CLAMP(atoi(optarg), 0, 255); break; + case 'c': { + int layout = atoi(optarg); + if (layout < 0 || layout > 5) { + fprintf(stderr, "Error: Invalid channel layout %d. Valid range: 0-5\n", layout); + cleanup_encoder(enc); + return 1; + } + enc->channel_layout = layout; + if (enc->verbose) { + printf("Channel layout set to %d (%s)\n", enc->channel_layout, + channel_layouts[enc->channel_layout].channels[0] ? + channel_layouts[enc->channel_layout].channels[0] : "unknown"); + } + break; + } case 'f': enc->output_fps = atoi(optarg); if (enc->output_fps <= 0) { @@ -3160,7 +3275,8 @@ int main(int argc, char *argv[]) { process_audio(enc, true_frame_count, enc->output_fp); process_subtitles(enc, true_frame_count, enc->output_fp); - fwrite(&sync_packet, 1, 1, enc->output_fp); + uint8_t sync_packet_ntsc = TAV_PACKET_SYNC_NTSC; + fwrite(&sync_packet_ntsc, 1, 1, enc->output_fp); printf("Frame %d: NTSC duplication - extra sync packet emitted with audio/subtitle sync\n", frame_count); }