TAV: channel layouts

This commit is contained in:
minjaesong
2025-09-29 16:34:08 +09:00
parent 907cc37b01
commit 7608e7433a
6 changed files with 286 additions and 80 deletions

View File

@@ -245,10 +245,11 @@ Original: [coeff_array] → [concatenated_significance_maps + nonzero_values]
Concatenated Maps Layout:
[Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals] (channel layout 0)
[Y_map][Y_vals] (channel layout 1)
[Y_map][Co_map][Cg_map][A_map][Y_vals][Co_vals][Cg_vals][A_map] (channel layout 2)
[Y_map][A_map][Y_vals][A_map] (channel layout 3)
[Co_map][Cg_map][Co_vals][Cg_map] (channel layout 4)
[Y_map][Co_map][Cg_map][A_map][Y_vals][Co_vals][Cg_vals][A_vals] (channel layout 1)
[Y_map][Y_vals] (channel layout 2)
[Y_map][A_map][Y_vals][A_vals] (channel layout 3)
[Co_map][Cg_map][Co_vals][Cg_vals] (channel layout 4)
[Co_map][Cg_map][A_map][Co_vals][Cg_vals][A_vals] (channel layout 5)
(replace Y->I, Co->Ct, Cg->Cp for ICtCp colour space)

View File

@@ -25,6 +25,7 @@ const TAV_PACKET_IFRAME = 0x10
const TAV_PACKET_PFRAME = 0x11
const TAV_PACKET_AUDIO_MP2 = 0x20
const TAV_PACKET_SUBTITLE = 0x30
const TAV_PACKET_SYNC_NTSC = 0xFE
const TAV_PACKET_SYNC = 0xFF
const TAV_FILE_HEADER_FIRST = 0x1F
@@ -378,6 +379,7 @@ let header = {
extraFlags: 0,
videoFlags: 0,
qualityLevel: 0,
channelLayout: 0,
fileRole: 0
}
@@ -414,10 +416,11 @@ header.qualityCg = seqread.readOneByte()
header.extraFlags = seqread.readOneByte()
header.videoFlags = seqread.readOneByte()
header.qualityLevel = seqread.readOneByte() // the decoder expects biased value
header.channelLayout = seqread.readOneByte()
header.fileRole = seqread.readOneByte()
// Skip reserved bytes
seqread.skip(5)
seqread.skip(4)
if (header.version < 1 || header.version > 6) {
printerrln(`Error: Unsupported TAV version ${header.version}`)
@@ -425,6 +428,19 @@ if (header.version < 1 || header.version > 6) {
return
}
// Helper function to decode channel layout name
function getChannelLayoutName(layout) {
switch (layout) {
case 0: return "Y-Co-Cg"
case 1: return "Y-Co-Cg-A"
case 2: return "Y-only"
case 3: return "Y-A"
case 4: return "Co-Cg"
case 5: return "Co-Cg-A"
default: return `Unknown (${layout})`
}
}
const hasAudio = (header.extraFlags & 0x01) !== 0
const hasSubtitles = (header.extraFlags & 0x02) !== 0
const progressiveTransmission = (header.extraFlags & 0x04) !== 0
@@ -446,6 +462,7 @@ console.log(`Total frames: ${header.totalFrames}`)
console.log(`Wavelet filter: ${header.waveletFilter === WAVELET_5_3_REVERSIBLE ? "5/3 reversible" : header.waveletFilter === WAVELET_9_7_IRREVERSIBLE ? "9/7 irreversible" : header.waveletFilter === WAVELET_BIORTHOGONAL_13_7 ? "Biorthogonal 13/7" : header.waveletFilter === WAVELET_DD4 ? "DD-4" : header.waveletFilter === WAVELET_HAAR ? "Haar" : "unknown"}`)
console.log(`Decomposition levels: ${header.decompLevels}`)
console.log(`Quality: Y=${header.qualityY}, Co=${header.qualityCo}, Cg=${header.qualityCg}`)
console.log(`Channel layout: ${getChannelLayoutName(header.channelLayout)}`)
console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`)
console.log(`Colour space: ${header.version === 2 ? "ICtCp" : "YCoCg-R"}`)
console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`)
@@ -599,13 +616,16 @@ function tryReadNextTAVHeader() {
qualityCg: seqread.readOneByte(),
extraFlags: seqread.readOneByte(),
videoFlags: seqread.readOneByte(),
reserved: new Array(7)
qualityLevel: seqread.readOneByte(),
channelLayout: seqread.readOneByte(),
fileRole: seqread.readOneByte(),
reserved: new Array(4)
}
serial.println("File header: " + JSON.stringify(newHeader))
// Skip reserved bytes
for (let i = 0; i < 7; i++) {
for (let i = 0; i < 4; i++) {
seqread.readOneByte()
}
@@ -670,10 +690,13 @@ try {
break
}
if (packetType === TAV_PACKET_SYNC) {
if (packetType === TAV_PACKET_SYNC || packetType == TAV_PACKET_SYNC_NTSC) {
// Sync packet - no additional data
akku -= FRAME_TIME
frameCount++
if (packetType == TAV_PACKET_SYNC) {
frameCount++
}
trueFrameCount++
// Swap ping-pong buffers instead of expensive memcpy (752KB copy eliminated!)
@@ -701,7 +724,8 @@ try {
CURRENT_RGB_ADDR, PREV_RGB_ADDR, // RGB buffer pointers
header.width, header.height,
header.qualityLevel, header.qualityY, header.qualityCo, header.qualityCg,
frameCount,
header.channelLayout, // Channel layout for variable processing
trueFrameCount,
header.waveletFilter, // TAV-specific parameter
header.decompLevels, // TAV-specific parameter
isLossless,
@@ -713,7 +737,7 @@ try {
// Upload RGB buffer to display framebuffer (like TEV)
let uploadStart = sys.nanoTime()
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, frameCount, false)
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, header.width, header.height, trueFrameCount, false)
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
// Defer audio playback until a first frame is sent

View File

@@ -920,12 +920,14 @@ transmission capability, and region-of-interest coding.
- bit 2 = is lossless mode (shorthand for `-q 5 -Q1,1,1 -w 0`)
- bit 3 = has region-of-interest coding (for still images only)
uint8 Encoder quality level (stored with bias of 1 (q0=1); used to derive anisotropy value)
uint8 Channel layout
- 0 = Y-Co-Cg/I-Ct-Cp
- 1 = Y/I only
- 2 = Y-Co-Cg-A/I-Ct-Cp-A
- 3 = Y-A/I-A
- 4 = Co-Cg/Ct-Cp
uint8 Channel layout (bit-field: bit 0=has alpha, bit 1=has chroma inverted, bit 2=has luma inverted)
- 0 = Y-Co-Cg/I-Ct-Cp (000: no alpha, has chroma, has luma)
- 1 = Y-Co-Cg-A/I-Ct-Cp-A (001: has alpha, has chroma, has luma)
- 2 = Y/I only (010: no alpha, no chroma, has luma)
- 3 = Y-A/I-A (011: has alpha, no chroma, has luma)
- 4 = Co-Cg/Ct-Cp (100: no alpha, has chroma, no luma)
- 5 = Co-Cg-A/Ct-Cp-A (101: has alpha, has chroma, no luma)
- 6-7 = Reserved/invalid (would indicate no luma and no chroma)
uint8 Reserved[4]: fill with zeros
uint8 File Role
- 0 = generic

View File

@@ -3943,71 +3943,111 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
// Postprocess coefficients from concatenated significance maps format (current - optimal)
private fun postprocessCoefficientsConcatenated(compressedData: ByteArray, compressedOffset: Int, coeffCount: Int,
outputY: ShortArray, outputCo: ShortArray, outputCg: ShortArray) {
// Channel layout constants (bit-field design)
companion object {
const val CHANNEL_LAYOUT_YCOCG = 0 // Y-Co-Cg (000: no alpha, has chroma, has luma)
const val CHANNEL_LAYOUT_YCOCG_A = 1 // Y-Co-Cg-A (001: has alpha, has chroma, has luma)
const val CHANNEL_LAYOUT_Y_ONLY = 2 // Y only (010: no alpha, no chroma, has luma)
const val CHANNEL_LAYOUT_Y_A = 3 // Y-A (011: has alpha, no chroma, has luma)
const val CHANNEL_LAYOUT_COCG = 4 // Co-Cg (100: no alpha, has chroma, no luma)
const val CHANNEL_LAYOUT_COCG_A = 5 // Co-Cg-A (101: has alpha, has chroma, no luma)
}
// Variable channel layout postprocessing for concatenated maps
private fun postprocessCoefficientsVariableLayout(compressedData: ByteArray, compressedOffset: Int, coeffCount: Int,
channelLayout: Int, outputY: ShortArray?, outputCo: ShortArray?,
outputCg: ShortArray?, outputAlpha: ShortArray?) {
val mapBytes = (coeffCount + 7) / 8
// Determine active channels based on layout (bit-field design)
val hasY = channelLayout and 4 == 0 // bit 2 inverted: 0 means has luma
val hasCo = channelLayout and 2 == 0 // bit 1 inverted: 0 means has chroma
val hasCg = channelLayout and 2 == 0 // bit 1 inverted: 0 means has chroma (same as Co)
val hasAlpha = channelLayout and 1 != 0 // bit 0: 1 means has alpha
// Clear output arrays
outputY.fill(0)
outputCo.fill(0)
outputCg.fill(0)
outputY?.fill(0)
outputCo?.fill(0)
outputCg?.fill(0)
outputAlpha?.fill(0)
// Extract significance maps: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals]
val yMapOffset = compressedOffset
val coMapOffset = compressedOffset + mapBytes
val cgMapOffset = compressedOffset + mapBytes * 2
var mapOffset = compressedOffset
var mapIndex = 0
// Count non-zeros in each channel to determine value array boundaries
// Map offsets for active channels
val yMapOffset = if (hasY) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1
val coMapOffset = if (hasCo) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1
val cgMapOffset = if (hasCg) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1
val alphaMapOffset = if (hasAlpha) { val offset = mapOffset; mapOffset += mapBytes; offset } else -1
// Count non-zeros for each active channel
var yNonZeros = 0
var coNonZeros = 0
var cgNonZeros = 0
var alphaNonZeros = 0
for (i in 0 until coeffCount) {
val byteIdx = i / 8
val bitIdx = i % 8
if ((compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) yNonZeros++
if ((compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) coNonZeros++
if ((compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) cgNonZeros++
if (hasY && yMapOffset >= 0 && (compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) yNonZeros++
if (hasCo && coMapOffset >= 0 && (compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) coNonZeros++
if (hasCg && cgMapOffset >= 0 && (compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) cgNonZeros++
if (hasAlpha && alphaMapOffset >= 0 && (compressedData[alphaMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) alphaNonZeros++
}
// Calculate value array offsets
val yValuesOffset = compressedOffset + mapBytes * 3
val coValuesOffset = yValuesOffset + yNonZeros * 2
val cgValuesOffset = coValuesOffset + coNonZeros * 2
var valueOffset = mapOffset
val yValuesOffset = if (hasY) { val offset = valueOffset; valueOffset += yNonZeros * 2; offset } else -1
val coValuesOffset = if (hasCo) { val offset = valueOffset; valueOffset += coNonZeros * 2; offset } else -1
val cgValuesOffset = if (hasCg) { val offset = valueOffset; valueOffset += cgNonZeros * 2; offset } else -1
val alphaValuesOffset = if (hasAlpha) { val offset = valueOffset; valueOffset += alphaNonZeros * 2; offset } else -1
// Extract coefficients using significance maps
// Reconstruct coefficients
var yValueIdx = 0
var coValueIdx = 0
var cgValueIdx = 0
var alphaValueIdx = 0
for (i in 0 until coeffCount) {
val byteIdx = i / 8
val bitIdx = i % 8
// Y channel
if ((compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) {
val valueOffset = yValuesOffset + yValueIdx * 2
outputY[i] = (((compressedData[valueOffset + 1].toInt() and 0xFF) shl 8) or
(compressedData[valueOffset].toInt() and 0xFF)).toShort()
if (hasY && yMapOffset >= 0 && outputY != null &&
(compressedData[yMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) {
val valuePos = yValuesOffset + yValueIdx * 2
outputY[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or
(compressedData[valuePos].toInt() and 0xFF)).toShort()
yValueIdx++
}
// Co channel
if ((compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) {
val valueOffset = coValuesOffset + coValueIdx * 2
outputCo[i] = (((compressedData[valueOffset + 1].toInt() and 0xFF) shl 8) or
(compressedData[valueOffset].toInt() and 0xFF)).toShort()
if (hasCo && coMapOffset >= 0 && outputCo != null &&
(compressedData[coMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) {
val valuePos = coValuesOffset + coValueIdx * 2
outputCo[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or
(compressedData[valuePos].toInt() and 0xFF)).toShort()
coValueIdx++
}
// Cg channel
if ((compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) {
val valueOffset = cgValuesOffset + cgValueIdx * 2
outputCg[i] = (((compressedData[valueOffset + 1].toInt() and 0xFF) shl 8) or
(compressedData[valueOffset].toInt() and 0xFF)).toShort()
if (hasCg && cgMapOffset >= 0 && outputCg != null &&
(compressedData[cgMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) {
val valuePos = cgValuesOffset + cgValueIdx * 2
outputCg[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or
(compressedData[valuePos].toInt() and 0xFF)).toShort()
cgValueIdx++
}
// Alpha channel
if (hasAlpha && alphaMapOffset >= 0 && outputAlpha != null &&
(compressedData[alphaMapOffset + byteIdx].toInt() and 0xFF) and (1 shl bitIdx) != 0) {
val valuePos = alphaValuesOffset + alphaValueIdx * 2
outputAlpha[i] = (((compressedData[valuePos + 1].toInt() and 0xFF) shl 8) or
(compressedData[valuePos].toInt() and 0xFF)).toShort()
alphaValueIdx++
}
}
}
@@ -4261,8 +4301,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// New tavDecode function that accepts compressed data and decompresses internally
fun tavDecodeCompressed(compressedDataPtr: Long, compressedSize: Int, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, frameCount: Int,
waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) {
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) {
// Read compressed data from VM memory into byte array
val compressedData = ByteArray(compressedSize)
@@ -4291,8 +4331,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Call the existing tavDecode function with decompressed data
tavDecode(decompressedBuffer.toLong(), currentRGBAddr, prevRGBAddr,
width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, frameCount,
waveletFilter, decompLevels, isLossless, tavVersion)
width, height, qIndex, qYGlobal, qCoGlobal, qCgGlobal, channelLayout,
frameCount, waveletFilter, decompLevels, isLossless, tavVersion)
} finally {
// Clean up allocated buffer
@@ -4307,8 +4347,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Original tavDecode function for backward compatibility (now handles decompressed data)
fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, frameCount: Int,
waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) {
width: Int, height: Int, qIndex: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, channelLayout: Int,
frameCount: Int, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) {
tavDebugCurrentFrameNumber = frameCount
@@ -4355,13 +4395,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
0x01 -> { // TAV_MODE_INTRA
// Decode DWT coefficients directly to RGB buffer
readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, readPtr, tileX, tileY, currentRGBAddr,
readPtr = tavDecodeDWTIntraTileRGB(qIndex, qYGlobal, channelLayout, readPtr, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg,
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock)
}
0x02 -> { // TAV_MODE_DELTA
// Coefficient delta encoding for efficient P-frames
readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr,
readPtr = tavDecodeDeltaTileRGB(readPtr, channelLayout, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg,
waveletFilter, decompLevels, isLossless, tavVersion, isMonoblock)
}
@@ -4374,7 +4414,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
}
private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
private fun tavDecodeDWTIntraTileRGB(qIndex: Int, qYGlobal: Int, channelLayout: Int, readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long {
// Determine coefficient count based on mode
@@ -4430,15 +4470,26 @@ class GraphicsJSR223Delegate(private val vm: VM) {
return count
}
// Use concatenated maps format: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals]
postprocessCoefficientsConcatenated(coeffBuffer, 0, coeffCount, quantisedY, quantisedCo, quantisedCg)
// Use variable channel layout concatenated maps format
postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, quantisedY, quantisedCo, quantisedCg, null)
// Calculate total size for concatenated format
val totalMapSize = mapBytes * 3
val yNonZeros = countNonZerosInMapConcatenated(0, mapBytes)
val coNonZeros = countNonZerosInMapConcatenated(mapBytes, mapBytes)
val cgNonZeros = countNonZerosInMapConcatenated(mapBytes * 2, mapBytes)
val totalValueSize = (yNonZeros + coNonZeros + cgNonZeros) * 2
// Calculate total size for variable channel layout format
val numChannels = when (channelLayout) {
CHANNEL_LAYOUT_YCOCG -> 3 // Y-Co-Cg
CHANNEL_LAYOUT_YCOCG_A -> 4 // Y-Co-Cg-A
CHANNEL_LAYOUT_Y_ONLY -> 1 // Y only
CHANNEL_LAYOUT_Y_A -> 2 // Y-A
CHANNEL_LAYOUT_COCG -> 2 // Co-Cg
CHANNEL_LAYOUT_COCG_A -> 3 // Co-Cg-A
else -> 3 // fallback to Y-Co-Cg
}
val totalMapSize = mapBytes * numChannels
var totalNonZeros = 0
for (ch in 0 until numChannels) {
totalNonZeros += countNonZerosInMapConcatenated(mapBytes * ch, mapBytes)
}
val totalValueSize = totalNonZeros * 2
ptr += (totalMapSize + totalValueSize)
@@ -4993,7 +5044,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
return 1.0f - (1.0f / (0.5f * qualityLevel * qualityLevel + 1.0f)) * (level - 4.0f)
}
private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
private fun tavDecodeDeltaTileRGB(readPtr: Long, channelLayout: Int, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int, isMonoblock: Boolean = false): Long {
@@ -5060,15 +5111,26 @@ class GraphicsJSR223Delegate(private val vm: VM) {
return count
}
// Use concatenated maps format for deltas: [Y_map][Co_map][Cg_map][Y_vals][Co_vals][Cg_vals]
postprocessCoefficientsConcatenated(coeffBuffer, 0, coeffCount, deltaY, deltaCo, deltaCg)
// Use variable channel layout concatenated maps format for deltas
postprocessCoefficientsVariableLayout(coeffBuffer, 0, coeffCount, channelLayout, deltaY, deltaCo, deltaCg, null)
// Calculate total size for concatenated format
val totalMapSize = mapBytes * 3
val yNonZeros = countNonZerosInMapConcatenated(0, mapBytes)
val coNonZeros = countNonZerosInMapConcatenated(mapBytes, mapBytes)
val cgNonZeros = countNonZerosInMapConcatenated(mapBytes * 2, mapBytes)
val totalValueSize = (yNonZeros + coNonZeros + cgNonZeros) * 2
// Calculate total size for variable channel layout format (deltas)
val numChannels = when (channelLayout) {
CHANNEL_LAYOUT_YCOCG -> 3 // Y-Co-Cg
CHANNEL_LAYOUT_YCOCG_A -> 4 // Y-Co-Cg-A
CHANNEL_LAYOUT_Y_ONLY -> 1 // Y only
CHANNEL_LAYOUT_Y_A -> 2 // Y-A
CHANNEL_LAYOUT_COCG -> 2 // Co-Cg
CHANNEL_LAYOUT_COCG_A -> 3 // Co-Cg-A
else -> 3 // fallback to Y-Co-Cg
}
val totalMapSize = mapBytes * numChannels
var totalNonZeros = 0
for (ch in 0 until numChannels) {
totalNonZeros += countNonZerosInMapConcatenated(mapBytes * ch, mapBytes)
}
val totalValueSize = totalNonZeros * 2
ptr += (totalMapSize + totalValueSize)

View File

@@ -114,8 +114,9 @@ typedef struct {
uint8_t extra_flags;
uint8_t video_flags;
uint8_t encoder_quality;
uint8_t channel_layout;
uint8_t file_role;
uint8_t reserved[5];
uint8_t reserved[4];
} __attribute__((packed)) tav_header_t;
// Decoder state

View File

@@ -41,6 +41,7 @@
#define TAV_PACKET_PFRAME 0x11 // Predicted frame
#define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio
#define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet
#define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet
#define TAV_PACKET_SYNC 0xFF // Sync packet
// DWT settings
@@ -62,6 +63,32 @@
#define WAVELET_DD4 16 // Four-point interpolating Deslauriers-Dubuc (DD-4)
#define WAVELET_HAAR 255 // Haar wavelet (simplest wavelet transform)
// Channel layout definitions (bit-field design)
// Bit 0: has alpha, Bit 1: has chroma (inverted), Bit 2: has luma (inverted)
#define CHANNEL_LAYOUT_YCOCG 0 // Y-Co-Cg/I-Ct-Cp (000: no alpha, has chroma, has luma)
#define CHANNEL_LAYOUT_YCOCG_A 1 // Y-Co-Cg-A/I-Ct-Cp-A (001: has alpha, has chroma, has luma)
#define CHANNEL_LAYOUT_Y_ONLY 2 // Y/I only (010: no alpha, no chroma, has luma)
#define CHANNEL_LAYOUT_Y_A 3 // Y-A/I-A (011: has alpha, no chroma, has luma)
#define CHANNEL_LAYOUT_COCG 4 // Co-Cg/Ct-Cp (100: no alpha, has chroma, no luma)
#define CHANNEL_LAYOUT_COCG_A 5 // Co-Cg-A/Ct-Cp-A (101: has alpha, has chroma, no luma)
// Channel layout configuration structure
typedef struct {
int layout_id;
int num_channels;
const char* channels[4]; // channel names for display
int has_y, has_co, has_cg, has_alpha;
} channel_layout_config_t;
static const channel_layout_config_t channel_layouts[] = {
{CHANNEL_LAYOUT_YCOCG, 3, {"Y", "Co", "Cg", NULL}, 1, 1, 1, 0}, // 0: Y-Co-Cg
{CHANNEL_LAYOUT_YCOCG_A, 4, {"Y", "Co", "Cg", "A"}, 1, 1, 1, 1}, // 1: Y-Co-Cg-A
{CHANNEL_LAYOUT_Y_ONLY, 1, {"Y", NULL, NULL, NULL}, 1, 0, 0, 0}, // 2: Y only
{CHANNEL_LAYOUT_Y_A, 2, {"Y", NULL, NULL, "A"}, 1, 0, 0, 1}, // 3: Y-A
{CHANNEL_LAYOUT_COCG, 2, {NULL, "Co", "Cg", NULL}, 0, 1, 1, 0}, // 4: Co-Cg
{CHANNEL_LAYOUT_COCG_A, 3, {NULL, "Co", "Cg", "A"}, 0, 1, 1, 1} // 5: Co-Cg-A
};
// Default settings
#define DEFAULT_WIDTH 560
#define DEFAULT_HEIGHT 448
@@ -224,6 +251,7 @@ typedef struct {
int intra_only; // Force all tiles to use INTRA mode (disable delta encoding)
int monoblock; // Single DWT tile mode (encode entire frame as one tile)
int perceptual_tuning; // 1 = perceptual quantisation (default), 0 = uniform quantisation
int channel_layout; // Channel layout: 0=Y-Co-Cg, 1=Y-only, 2=Y-Co-Cg-A, 3=Y-A, 4=Co-Cg
// Frame buffers - ping-pong implementation
uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous
@@ -352,6 +380,7 @@ static void show_usage(const char *program_name) {
printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 1-255 for each channel (1: lossless, 255: potato)\n");
printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible, 2=DD-4 (default: 1)\n");
// printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
printf(" -c, --channel-layout N Channel layout: 0=Y-Co-Cg, 1=Y-Co-Cg-A, 2=Y-only, 3=Y-A, 4=Co-Cg, 5=Co-Cg-A (default: 0)\n");
printf(" --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
@@ -420,6 +449,7 @@ static tav_encoder_t* create_encoder(void) {
enc->intra_only = 0;
enc->monoblock = 1; // Default to monoblock mode
enc->perceptual_tuning = 1; // Default to perceptual quantisation (versions 5/6)
enc->channel_layout = CHANNEL_LAYOUT_YCOCG; // Default to Y-Co-Cg
enc->audio_bitrate = 0; // 0 = use quality table
enc->encode_limit = 0; // Default: no frame limit
@@ -1041,6 +1071,74 @@ static size_t preprocess_coefficients_concatenated(int16_t *coeffs_y, int16_t *c
return map_bytes * 3 + (nonzero_y + nonzero_co + nonzero_cg) * sizeof(int16_t);
}
// Variable channel layout preprocessing for concatenated maps
static size_t preprocess_coefficients_variable_layout(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
int coeff_count, int channel_layout, uint8_t *output_buffer) {
const channel_layout_config_t *config = &channel_layouts[channel_layout];
int map_bytes = (coeff_count + 7) / 8;
int total_maps = config->num_channels;
// Count non-zeros per active channel
int nonzero_counts[4] = {0}; // Y, Co, Cg, Alpha
for (int i = 0; i < coeff_count; i++) {
if (config->has_y && coeffs_y && coeffs_y[i] != 0) nonzero_counts[0]++;
if (config->has_co && coeffs_co && coeffs_co[i] != 0) nonzero_counts[1]++;
if (config->has_cg && coeffs_cg && coeffs_cg[i] != 0) nonzero_counts[2]++;
if (config->has_alpha && coeffs_alpha && coeffs_alpha[i] != 0) nonzero_counts[3]++;
}
// Layout maps in order based on channel layout
uint8_t *maps[4];
int map_idx = 0;
if (config->has_y) maps[0] = output_buffer + map_bytes * map_idx++;
if (config->has_co) maps[1] = output_buffer + map_bytes * map_idx++;
if (config->has_cg) maps[2] = output_buffer + map_bytes * map_idx++;
if (config->has_alpha) maps[3] = output_buffer + map_bytes * map_idx++;
// Calculate value array positions
int16_t *values[4];
int16_t *value_start = (int16_t *)(output_buffer + map_bytes * total_maps);
int value_offset = 0;
if (config->has_y) { values[0] = value_start + value_offset; value_offset += nonzero_counts[0]; }
if (config->has_co) { values[1] = value_start + value_offset; value_offset += nonzero_counts[1]; }
if (config->has_cg) { values[2] = value_start + value_offset; value_offset += nonzero_counts[2]; }
if (config->has_alpha) { values[3] = value_start + value_offset; value_offset += nonzero_counts[3]; }
// Clear significance maps
memset(output_buffer, 0, map_bytes * total_maps);
// Fill significance maps and extract values
int value_indices[4] = {0};
for (int i = 0; i < coeff_count; i++) {
int byte_idx = i / 8;
int bit_idx = i % 8;
if (config->has_y && coeffs_y && coeffs_y[i] != 0) {
maps[0][byte_idx] |= (1 << bit_idx);
values[0][value_indices[0]++] = coeffs_y[i];
}
if (config->has_co && coeffs_co && coeffs_co[i] != 0) {
maps[1][byte_idx] |= (1 << bit_idx);
values[1][value_indices[1]++] = coeffs_co[i];
}
if (config->has_cg && coeffs_cg && coeffs_cg[i] != 0) {
maps[2][byte_idx] |= (1 << bit_idx);
values[2][value_indices[2]++] = coeffs_cg[i];
}
if (config->has_alpha && coeffs_alpha && coeffs_alpha[i] != 0) {
maps[3][byte_idx] |= (1 << bit_idx);
values[3][value_indices[3]++] = coeffs_alpha[i];
}
}
// Return total size: maps + all non-zero values
int total_nonzeros = nonzero_counts[0] + nonzero_counts[1] + nonzero_counts[2] + nonzero_counts[3];
return map_bytes * total_maps + total_nonzeros * sizeof(int16_t);
}
// Quantisation for DWT subbands with rate control
static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) {
float effective_q = quantiser;
@@ -1362,9 +1460,9 @@ static size_t serialise_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
printf("\n");
}*/
// Preprocess and write quantised coefficients using concatenated significance maps for optimal compression
size_t total_compressed_size = preprocess_coefficients_concatenated(quantised_y, quantised_co, quantised_cg,
tile_size, buffer + offset);
// Preprocess and write quantised coefficients using variable channel layout concatenated significance maps
size_t total_compressed_size = preprocess_coefficients_variable_layout(quantised_y, quantised_co, quantised_cg, NULL,
tile_size, enc->channel_layout, buffer + offset);
offset += total_compressed_size;
// DEBUG: Dump raw DWT coefficients for frame ~60 when it's an intra-frame
@@ -1831,9 +1929,10 @@ static int write_tav_header(tav_encoder_t *enc) {
fputc(video_flags, enc->output_fp);
fputc(enc->quality_level+1, enc->output_fp);
fputc(enc->channel_layout, enc->output_fp);
// Reserved bytes (6 bytes)
for (int i = 0; i < 6; i++) {
// Reserved bytes (5 bytes - one used for channel layout)
for (int i = 0; i < 5; i++) {
fputc(0, enc->output_fp);
}
@@ -2773,6 +2872,7 @@ int main(int argc, char *argv[]) {
{"quantiser", required_argument, 0, 'Q'},
{"quantiser", required_argument, 0, 'Q'},
{"wavelet", required_argument, 0, 'w'},
{"channel-layout", required_argument, 0, 'c'},
{"bitrate", required_argument, 0, 'b'},
{"arate", required_argument, 0, 1400},
{"subtitle", required_argument, 0, 'S'},
@@ -2789,7 +2889,7 @@ int main(int argc, char *argv[]) {
};
int c, option_index = 0;
while ((c = getopt_long(argc, argv, "i:o:s:f:q:Q:w:d:b:pS:vt", long_options, &option_index)) != -1) {
while ((c = getopt_long(argc, argv, "i:o:s:f:q:Q:w:c:d:b:pS:vt", long_options, &option_index)) != -1) {
switch (c) {
case 'i':
enc->input_file = strdup(optarg);
@@ -2824,6 +2924,21 @@ int main(int argc, char *argv[]) {
case 'w':
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 255);
break;
case 'c': {
int layout = atoi(optarg);
if (layout < 0 || layout > 5) {
fprintf(stderr, "Error: Invalid channel layout %d. Valid range: 0-5\n", layout);
cleanup_encoder(enc);
return 1;
}
enc->channel_layout = layout;
if (enc->verbose) {
printf("Channel layout set to %d (%s)\n", enc->channel_layout,
channel_layouts[enc->channel_layout].channels[0] ?
channel_layouts[enc->channel_layout].channels[0] : "unknown");
}
break;
}
case 'f':
enc->output_fps = atoi(optarg);
if (enc->output_fps <= 0) {
@@ -3160,7 +3275,8 @@ int main(int argc, char *argv[]) {
process_audio(enc, true_frame_count, enc->output_fp);
process_subtitles(enc, true_frame_count, enc->output_fp);
fwrite(&sync_packet, 1, 1, enc->output_fp);
uint8_t sync_packet_ntsc = TAV_PACKET_SYNC_NTSC;
fwrite(&sync_packet_ntsc, 1, 1, enc->output_fp);
printf("Frame %d: NTSC duplication - extra sync packet emitted with audio/subtitle sync\n", frame_count);
}