112x112 blocks for TAV, which greatly improves the encoding speed

This commit is contained in:
minjaesong
2025-09-15 19:08:46 +09:00
parent 1343dd10cf
commit 113c01b851
4 changed files with 816 additions and 83 deletions

View File

@@ -8,7 +8,7 @@
const WIDTH = 560 const WIDTH = 560
const HEIGHT = 448 const HEIGHT = 448
const TILE_SIZE = 64 // 64x64 tiles for DWT (vs 16x16 blocks in TEV) const TILE_SIZE = 112 // 112x112 tiles for DWT (perfect fit for TSVM 560x448 resolution)
const TAV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x41, 0x56] // "\x1FTSVM TAV" const TAV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x41, 0x56] // "\x1FTSVM TAV"
const TAV_VERSION = 1 // Initial DWT version const TAV_VERSION = 1 // Initial DWT version
const SND_BASE_ADDR = audio.getBaseAddr() const SND_BASE_ADDR = audio.getBaseAddr()
@@ -99,6 +99,275 @@ graphics.clearPixels2(0)
// Initialize audio // Initialize audio
audio.resetParams(0) audio.resetParams(0)
audio.purgeQueue(0) audio.purgeQueue(0)
audio.setPcmMode(0)
audio.setMasterVolume(0, 255)
// Subtitle display functions
function clearSubtitleArea() {
// Clear the subtitle area at the bottom of the screen
// Text mode is 80x32, so clear the bottom few lines
let oldFgColor = con.get_color_fore()
let oldBgColor = con.get_color_back()
con.color_pair(255, 255) // transparent to clear
// Clear bottom 4 lines for subtitles
for (let row = 29; row <= 32; row++) {
con.move(row, 1)
for (let col = 1; col <= 80; col++) {
print(" ")
}
}
con.color_pair(oldFgColor, oldBgColor)
}
function getVisualLength(line) {
// Calculate the visual length of a line excluding formatting tags
let visualLength = 0
let i = 0
while (i < line.length) {
if (i < line.length - 2 && line[i] === '<') {
// Check for formatting tags and skip them
if (line.substring(i, i + 3).toLowerCase() === '<b>' ||
line.substring(i, i + 3).toLowerCase() === '<i>') {
i += 3 // Skip tag
} else if (i < line.length - 3 &&
(line.substring(i, i + 4).toLowerCase() === '</b>' ||
line.substring(i, i + 4).toLowerCase() === '</i>')) {
i += 4 // Skip closing tag
} else {
// Not a formatting tag, count the character
visualLength++
i++
}
} else {
// Regular character, count it
visualLength++
i++
}
}
return visualLength
}
function displayFormattedLine(line) {
// Parse line and handle <b> and <i> tags with color changes
// Default subtitle color: yellow (231), formatted text: white (254)
let i = 0
let inBoldOrItalic = false
// insert initial padding block
con.color_pair(0, 255)
con.prnch(0xDE)
con.color_pair(231, 0)
while (i < line.length) {
if (i < line.length - 2 && line[i] === '<') {
// Check for opening tags
if (line.substring(i, i + 3).toLowerCase() === '<b>' ||
line.substring(i, i + 3).toLowerCase() === '<i>') {
con.color_pair(254, 0) // Switch to white for formatted text
inBoldOrItalic = true
i += 3
} else if (i < line.length - 3 &&
(line.substring(i, i + 4).toLowerCase() === '</b>' ||
line.substring(i, i + 4).toLowerCase() === '</i>')) {
con.color_pair(231, 0) // Switch back to yellow for normal text
inBoldOrItalic = false
i += 4
} else {
// Not a formatting tag, print the character
print(line[i])
i++
}
} else {
// Regular character, print it
print(line[i])
i++
}
}
// insert final padding block
con.color_pair(0, 255)
con.prnch(0xDD)
con.color_pair(231, 0)
}
function displaySubtitle(text, position = 0) {
if (!text || text.length === 0) {
clearSubtitleArea()
return
}
// Set subtitle colors: yellow (231) on black (0)
let oldFgColor = con.get_color_fore()
let oldBgColor = con.get_color_back()
con.color_pair(231, 0)
// Split text into lines
let lines = text.split('\n')
// Calculate position based on subtitle position setting
let startRow, startCol
// Calculate visual length without formatting tags for positioning
let longestLineLength = lines.map(s => getVisualLength(s)).sort().last()
switch (position) {
case 2: // center left
case 6: // center right
case 8: // dead center
startRow = 16 - Math.floor(lines.length / 2)
break
case 3: // top left
case 4: // top center
case 5: // top right
startRow = 2
break
case 0: // bottom center
case 1: // bottom left
case 7: // bottom right
default:
startRow = 32 - lines.length
startRow = 32 - lines.length
startRow = 32 - lines.length // Default to bottom center
}
// Display each line
for (let i = 0; i < lines.length; i++) {
let line = lines[i].trim()
if (line.length === 0) continue
let row = startRow + i
if (row < 1) row = 1
if (row > 32) row = 32
// Calculate column based on alignment
switch (position) {
case 1: // bottom left
case 2: // center left
case 3: // top left
startCol = 1
break
case 5: // top right
case 6: // center right
case 7: // bottom right
startCol = Math.max(1, 78 - getVisualLength(line) - 2)
break
case 0: // bottom center
case 4: // top center
case 8: // dead center
default:
startCol = Math.max(1, Math.floor((80 - longestLineLength - 2) / 2) + 1)
break
}
con.move(row, startCol)
// Parse and display line with formatting tag support
displayFormattedLine(line)
}
con.color_pair(oldFgColor, oldBgColor)
}
function processSubtitlePacket(packetSize) {
// Read subtitle packet data according to SSF format
// uint24 index + uint8 opcode + variable arguments
let index = 0
// Read 24-bit index (little-endian)
let indexByte0 = seqread.readOneByte()
let indexByte1 = seqread.readOneByte()
let indexByte2 = seqread.readOneByte()
index = indexByte0 | (indexByte1 << 8) | (indexByte2 << 16)
let opcode = seqread.readOneByte()
let remainingBytes = packetSize - 4 // Subtract 3 bytes for index + 1 byte for opcode
switch (opcode) {
case SSF_OP_SHOW: {
// Read UTF-8 text until null terminator
if (remainingBytes > 1) {
let textBytes = seqread.readBytes(remainingBytes)
let textStr = ""
// Convert bytes to string, stopping at null terminator
for (let i = 0; i < remainingBytes - 1; i++) { // -1 for null terminator
let byte = sys.peek(textBytes + i)
if (byte === 0) break
textStr += String.fromCharCode(byte)
}
sys.free(textBytes)
subtitleText = textStr
subtitleVisible = true
displaySubtitle(subtitleText, subtitlePosition)
}
break
}
case SSF_OP_HIDE: {
subtitleVisible = false
subtitleText = ""
clearSubtitleArea()
break
}
case SSF_OP_MOVE: {
if (remainingBytes >= 2) { // Need at least 1 byte for position + 1 null terminator
let newPosition = seqread.readOneByte()
seqread.readOneByte() // Read null terminator
if (newPosition >= 0 && newPosition <= 7) {
subtitlePosition = newPosition
// Re-display current subtitle at new position if visible
if (subtitleVisible && subtitleText.length > 0) {
clearSubtitleArea()
displaySubtitle(subtitleText, subtitlePosition)
}
}
}
break
}
case SSF_OP_UPLOAD_LOW_FONT:
case SSF_OP_UPLOAD_HIGH_FONT: {
// Font upload - read payload length and font data
if (remainingBytes >= 3) { // uint16 length + at least 1 byte data
let payloadLen = seqread.readShort()
if (remainingBytes >= payloadLen + 2) {
let fontData = seqread.readBytes(payloadLen)
// upload font data
for (let i = 0; i < Math.min(payloadLen, 1920); i++) sys.poke(-1300607 - i, sys.peek(fontData + i))
sys.poke(-1299460, (opcode == SSF_OP_UPLOAD_LOW_FONT) ? 18 : 19)
sys.free(fontData)
}
}
break
}
case SSF_OP_NOP:
default: {
// Skip remaining bytes
if (remainingBytes > 0) {
let skipBytes = seqread.readBytes(remainingBytes)
sys.free(skipBytes)
}
if (interactive && opcode !== SSF_OP_NOP) {
serial.println(`[SUBTITLE UNKNOWN] Index: ${index}, Opcode: 0x${opcode.toString(16).padStart(2, '0')}`)
}
break
}
}
}
// TAV header structure (32 bytes vs TEV's 24 bytes) // TAV header structure (32 bytes vs TEV's 24 bytes)
let header = { let header = {
@@ -172,7 +441,7 @@ const isNTSC = (header.videoFlags & 0x02) !== 0
const isLossless = (header.videoFlags & 0x04) !== 0 const isLossless = (header.videoFlags & 0x04) !== 0
const multiResolution = (header.videoFlags & 0x08) !== 0 const multiResolution = (header.videoFlags & 0x08) !== 0
// Calculate tile dimensions (64x64 vs TEV's 16x16 blocks) // Calculate tile dimensions (112x112 vs TEV's 16x16 blocks)
const tilesX = Math.ceil(header.width / TILE_SIZE) const tilesX = Math.ceil(header.width / TILE_SIZE)
const tilesY = Math.ceil(header.height / TILE_SIZE) const tilesY = Math.ceil(header.height / TILE_SIZE)
const numTiles = tilesX * tilesY const numTiles = tilesX * tilesY
@@ -210,6 +479,9 @@ let audioBufferBytesLastFrame = 0
let frame_cnt = 0 let frame_cnt = 0
let frametime = 1000000000.0 / header.fps let frametime = 1000000000.0 / header.fps
let nextFrameTime = 0 let nextFrameTime = 0
let mp2Initialised = false
let audioFired = false
// Performance tracking variables (from TEV) // Performance tracking variables (from TEV)
let decompressTime = 0 let decompressTime = 0
@@ -374,6 +646,21 @@ try {
console.log(`Frame ${frameCount}: Duplicating previous frame`) console.log(`Frame ${frameCount}: Duplicating previous frame`)
} }
// Defer audio playback until a first frame is sent
if (isInterlaced) {
// fire audio after frame 1
if (!audioFired && frameCount > 0) {
audio.play(0)
audioFired = true
}
}
else {
// fire audio after frame 0
if (!audioFired) {
audio.play(0)
audioFired = true
}
}
} catch (e) { } catch (e) {
console.log(`Frame ${frameCount}: decode failed: ${e}`) console.log(`Frame ${frameCount}: decode failed: ${e}`)
} }
@@ -390,38 +677,23 @@ try {
console.log(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`) console.log(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`)
} }
} else if (packetType === TAV_PACKET_AUDIO_MP2 && hasAudio) { } else if (packetType === TAV_PACKET_AUDIO_MP2) {
// Audio packet - same as TEV // MP2 Audio packet
let audioPtr = seqread.readBytes(compressedSize) let audioLen = seqread.readInt()
// Send to audio hardware if (!mp2Initialised) {
for (let i = 0; i < compressedSize; i++) { mp2Initialised = true
vm.poke(SND_BASE_ADDR + audioBufferBytesLastFrame + i, sys.peek(audioPtr + i)) audio.mp2Init()
} }
audioBufferBytesLastFrame += compressedSize
sys.free(audioPtr)
} else if (packetType === TAV_PACKET_SUBTITLE && hasSubtitles) { seqread.readBytes(audioLen, SND_BASE_ADDR - 2368)
audio.mp2Decode()
audio.mp2UploadDecoded(0)
} else if (packetType === TAV_PACKET_SUBTITLE) {
// Subtitle packet - same format as TEV // Subtitle packet - same format as TEV
let subtitlePtr = seqread.readBytes(compressedSize) let packetSize = seqread.readInt()
processSubtitlePacket(packetSize)
// Process subtitle (simplified)
if (compressedSize >= 4) {
const index = (sys.peek(subtitlePtr) << 16) | (sys.peek(subtitlePtr + 1) << 8) | sys.peek(subtitlePtr + 2)
const opcode = sys.peek(subtitlePtr + 3)
if (opcode === SSF_OP_SHOW && compressedSize > 4) {
let text = ""
for (let i = 4; i < compressedSize && sys.peek(subtitlePtr + i) !== 0; i++) {
text += String.fromCharCode(sys.peek(subtitlePtr + i))
}
subtitleText = text
subtitleVisible = true
} else if (opcode === SSF_OP_HIDE) {
subtitleVisible = false
}
}
sys.free(subtitlePtr)
} else if (packetType == 0x00) { } else if (packetType == 0x00) {
// Silently discard, faulty subtitle creation can cause this as 0x00 is used as an argument terminator // Silently discard, faulty subtitle creation can cause this as 0x00 is used as an argument terminator
} else { } else {
@@ -463,14 +735,13 @@ finally {
sys.free(RGB_BUFFER_A) sys.free(RGB_BUFFER_A)
sys.free(RGB_BUFFER_B) sys.free(RGB_BUFFER_B)
graphics.setGraphicsMode(0) // Return to text mode
con.curs_set(1) con.curs_set(1)
con.clear() con.clear()
if (errorlevel === 0) { if (errorlevel === 0) {
console.log(`Playback completed: ${frameCount} frames`) console.log(`Playback completed: ${frameCount} frames`)
} else { } else {
console.log(`Playbook failed with error ${errorlevel}`) console.log(`Playback failed with error ${errorlevel}`)
} }
} }

View File

@@ -807,6 +807,7 @@ transmission capability, and region-of-interest coding.
- Version 1.0: Initial DWT-based implementation with 5/3 reversible filter - Version 1.0: Initial DWT-based implementation with 5/3 reversible filter
- Version 1.1: Added 9/7 irreversible filter for higher compression - Version 1.1: Added 9/7 irreversible filter for higher compression
- Version 1.2: Multi-resolution pyramid encoding with up to 4 decomposition levels - Version 1.2: Multi-resolution pyramid encoding with up to 4 decomposition levels
- Version 1.3: Optimized 112x112 tiles for TSVM resolution with up to 6 decomposition levels
# File Structure # File Structure
\x1F T S V M T A V \x1F T S V M T A V
@@ -852,7 +853,7 @@ transmission capability, and region-of-interest coding.
uint32 Compressed Size uint32 Compressed Size
* Zstd-compressed Block Data * Zstd-compressed Block Data
## Block Data (per 64x64 tile) ## Block Data (per 112x112 tile)
uint8 Mode: encoding mode uint8 Mode: encoding mode
0x00 = SKIP (copy from previous frame) 0x00 = SKIP (copy from previous frame)
0x01 = INTRA (DWT-coded, no prediction) 0x01 = INTRA (DWT-coded, no prediction)
@@ -885,10 +886,12 @@ transmission capability, and region-of-interest coding.
* Provides better energy compaction than 5/3 but lossy reconstruction * Provides better energy compaction than 5/3 but lossy reconstruction
### Decomposition Levels ### Decomposition Levels
- Level 1: 64x64 → 32x32 (LL) + 3×32x32 subbands (LH,HL,HH) - Level 1: 112x112 → 56x56 (LL) + 3×56x56 subbands (LH,HL,HH)
- Level 2: 32x32 → 16x16 (LL) + 3×16x16 subbands - Level 2: 56x56 → 28x28 (LL) + 3×28x28 subbands
- Level 3: 16x16 → 8x8 (LL) + 3×8x8 subbands - Level 3: 28x28 → 14x14 (LL) + 3×14x14 subbands
- Level 4: 8x8 → 4x4 (LL) + 3×4x4 subbands - Level 4: 14x14 → 7x7 (LL) + 3×7x7 subbands
- Level 5: 7x7 → 3x3 (LL) + 3×3x3 subbands
- Level 6: 3x3 → 1x1 (LL) + 3×1x1 subbands (maximum)
### Quantization Strategy ### Quantization Strategy
TAV uses different quantization steps for each subband based on human visual TAV uses different quantization steps for each subband based on human visual
@@ -904,9 +907,11 @@ When enabled, coefficients are transmitted in order of visual importance:
3. Higher frequency subbands for refinement 3. Higher frequency subbands for refinement
## Motion Compensation ## Motion Compensation
- Search range: ±16 pixels (larger than TEV due to 64x64 tiles) - Search range: ±28 pixels (optimized for 112x112 tiles)
- Sub-pixel precision: 1/4 pixel with bilinear interpolation - Sub-pixel precision: 1/4 pixel with bilinear interpolation
- Tile size: 64x64 pixels (4x larger than TEV blocks) - Tile size: 112x112 pixels (perfect fit for TSVM 560x448 resolution)
* Exactly 5×4 = 20 tiles per frame (560÷112 = 5, 448÷112 = 4)
* No partial tiles needed - optimal for processing efficiency
- Uses Sum of Absolute Differences (SAD) for motion estimation - Uses Sum of Absolute Differences (SAD) for motion estimation
- Overlapped block motion compensation (OBMC) for smooth boundaries - Overlapped block motion compensation (OBMC) for smooth boundaries
@@ -917,7 +922,7 @@ TAV operates in YCoCg-R colour space with full resolution channels:
- Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default) - Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default)
## Compression Features ## Compression Features
- 64x64 DWT tiles vs 16x16 DCT blocks in TEV - 112x112 DWT tiles vs 16x16 DCT blocks in TEV
- Multi-resolution representation enables scalable decoding - Multi-resolution representation enables scalable decoding
- Better frequency localization than DCT - Better frequency localization than DCT
- Reduced blocking artifacts due to overlapping basis functions - Reduced blocking artifacts due to overlapping basis functions

View File

@@ -3930,8 +3930,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
var readPtr = blockDataPtr var readPtr = blockDataPtr
try { try {
val tilesX = (width + 63) / 64 // 64x64 tiles val tilesX = (width + 111) / 112 // 112x112 tiles
val tilesY = (height + 63) / 64 val tilesY = (height + 111) / 112
// Process each tile // Process each tile
for (tileY in 0 until tilesY) { for (tileY in 0 until tilesY) {
@@ -3949,8 +3949,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
when (mode) { when (mode) {
0x00 -> { // TAV_MODE_SKIP 0x00 -> { // TAV_MODE_SKIP
// Copy 64x64 tile from previous frame to current frame // Copy 112x112 tile from previous frame to current frame
copyTile64x64RGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height) copyTile112x112RGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
} }
0x01 -> { // TAV_MODE_INTRA 0x01 -> { // TAV_MODE_INTRA
// Decode DWT coefficients directly to RGB buffer // Decode DWT coefficients directly to RGB buffer
@@ -3967,8 +3967,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
0x03 -> { // TAV_MODE_MOTION 0x03 -> { // TAV_MODE_MOTION
// Motion compensation only (no residual) // Motion compensation only (no residual)
applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY, applyMotionCompensation112x112RGB(tileX, tileY, mvX, mvY,
currentRGBAddr, prevRGBAddr, width, height) currentRGBAddr, prevRGBAddr, width, height)
} }
} }
} }
@@ -3982,7 +3982,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun decodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, private fun decodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
val tileSize = 64 val tileSize = 112
val coeffCount = tileSize * tileSize val coeffCount = tileSize * tileSize
var ptr = readPtr var ptr = readPtr
@@ -4043,7 +4043,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun convertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray, private fun convertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray,
rgbAddr: Long, width: Int, height: Int) { rgbAddr: Long, width: Int, height: Int) {
val tileSize = 64 val tileSize = 112
val startX = tileX * tileSize val startX = tileX * tileSize
val startY = tileY * tileSize val startY = tileY * tileSize
@@ -4078,7 +4078,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun convertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray, private fun convertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray,
rgbAddr: Long, width: Int, height: Int) { rgbAddr: Long, width: Int, height: Int) {
val tileSize = 64 val tileSize = 112
val startX = tileX * tileSize val startX = tileX * tileSize
val startY = tileY * tileSize val startY = tileY * tileSize
@@ -4127,7 +4127,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private fun addYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray, private fun addYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray,
rgbAddr: Long, width: Int, height: Int) { rgbAddr: Long, width: Int, height: Int) {
val tileSize = 64 val tileSize = 112
val startX = tileX * tileSize val startX = tileX * tileSize
val startY = tileY * tileSize val startY = tileY * tileSize
@@ -4172,8 +4172,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
// Helper functions (simplified versions of existing DWT functions) // Helper functions (simplified versions of existing DWT functions)
private fun copyTile64x64RGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { private fun copyTile112x112RGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) {
val tileSize = 64 val tileSize = 112
val startX = tileX * tileSize val startX = tileX * tileSize
val startY = tileY * tileSize val startY = tileY * tileSize
@@ -4205,17 +4205,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
// Step 1: Apply motion compensation // Step 1: Apply motion compensation
applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height) applyMotionCompensation112x112RGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height)
// Step 2: Add DWT residual (same as intra but add to existing pixels) // Step 2: Add DWT residual (same as intra but add to existing pixels)
return decodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, rcf, return decodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, rcf,
waveletFilter, decompLevels, isLossless, tavVersion) waveletFilter, decompLevels, isLossless, tavVersion)
} }
private fun applyMotionCompensation64x64RGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int, private fun applyMotionCompensation112x112RGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int,
currentRGBAddr: Long, prevRGBAddr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int) { width: Int, height: Int) {
val tileSize = 64 val tileSize = 112
val startX = tileX * tileSize val startX = tileX * tileSize
val startY = tileY * tileSize val startY = tileY * tileSize

View File

@@ -73,7 +73,7 @@ static inline float float16_to_float(uint16_t hbits) {
// Version 1: YCoCg-R (default) // Version 1: YCoCg-R (default)
// Version 2: ICtCp (--ictcp flag) // Version 2: ICtCp (--ictcp flag)
// Tile encoding modes (64x64 tiles) // Tile encoding modes (112x112 tiles)
#define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference) #define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference)
#define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles) #define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles)
#define TAV_MODE_INTER 0x02 // Inter DWT coding with motion compensation #define TAV_MODE_INTER 0x02 // Inter DWT coding with motion compensation
@@ -87,9 +87,9 @@ static inline float float16_to_float(uint16_t hbits) {
#define TAV_PACKET_SYNC 0xFF // Sync packet #define TAV_PACKET_SYNC 0xFF // Sync packet
// DWT settings // DWT settings
#define TILE_SIZE 64 #define TILE_SIZE 112 // 112x112 tiles - perfect fit for TSVM 560x448 (GCD = 112)
#define MAX_DECOMP_LEVELS 4 #define MAX_DECOMP_LEVELS 6 // Can go deeper: 112→56→28→14→7→3→1
#define DEFAULT_DECOMP_LEVELS 3 #define DEFAULT_DECOMP_LEVELS 4 // Increased default for better compression
// Wavelet filter types // Wavelet filter types
#define WAVELET_5_3_REVERSIBLE 0 // Lossless capable #define WAVELET_5_3_REVERSIBLE 0 // Lossless capable
@@ -101,6 +101,18 @@ static inline float float16_to_float(uint16_t hbits) {
#define DEFAULT_FPS 30 #define DEFAULT_FPS 30
#define DEFAULT_QUALITY 2 #define DEFAULT_QUALITY 2
// Audio/subtitle constants (reused from TEV)
#define MP2_DEFAULT_PACKET_SIZE 1152
#define MAX_SUBTITLE_LENGTH 2048
// Subtitle structure
typedef struct subtitle_entry {
int start_frame;
int end_frame;
char *text;
struct subtitle_entry *next;
} subtitle_entry_t;
static void generate_random_filename(char *filename) { static void generate_random_filename(char *filename) {
srand(time(NULL)); srand(time(NULL));
@@ -208,8 +220,18 @@ typedef struct {
dwt_tile_t *tiles; dwt_tile_t *tiles;
motion_vector_t *motion_vectors; motion_vector_t *motion_vectors;
// Audio processing // Audio processing (expanded from TEV)
size_t audio_remaining; size_t audio_remaining;
uint8_t *mp2_buffer;
size_t mp2_buffer_size;
int mp2_packet_size;
int mp2_rate_index;
int target_audio_buffer_size;
// Subtitle processing
subtitle_entry_t *subtitles;
subtitle_entry_t *current_subtitle;
int subtitle_visible;
// Compression // Compression
ZSTD_CCtx *zstd_ctx; ZSTD_CCtx *zstd_ctx;
@@ -245,13 +267,27 @@ static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int
static void dwt_2d_forward(float *tile_data, int levels, int filter_type); static void dwt_2d_forward(float *tile_data, int levels, int filter_type);
static void dwt_2d_inverse(dwt_tile_t *tile, float *output, int filter_type); static void dwt_2d_inverse(dwt_tile_t *tile, float *output, int filter_type);
static void quantize_subbands(dwt_tile_t *tile, int q_y, int q_co, int q_cg, float rcf); static void quantize_subbands(dwt_tile_t *tile, int q_y, int q_co, int q_cg, float rcf);
static int estimate_motion_64x64(const float *current, const float *reference, static int estimate_motion_112x112(const float *current, const float *reference,
int width, int height, int tile_x, int tile_y, int width, int height, int tile_x, int tile_y,
motion_vector_t *mv); motion_vector_t *mv);
static size_t compress_tile_data(tav_encoder_t *enc, const dwt_tile_t *tiles, static size_t compress_tile_data(tav_encoder_t *enc, const dwt_tile_t *tiles,
const motion_vector_t *mvs, int num_tiles, const motion_vector_t *mvs, int num_tiles,
uint8_t packet_type); uint8_t packet_type);
// Audio and subtitle processing prototypes (from TEV)
static int start_audio_conversion(tav_encoder_t *enc);
static int get_mp2_packet_size(uint8_t *header);
static int mp2_packet_size_to_rate_index(int packet_size, int is_mono);
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output);
static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps);
static subtitle_entry_t* parse_srt_file(const char *filename, int fps);
static subtitle_entry_t* parse_smi_file(const char *filename, int fps);
static int srt_time_to_frame(const char *time_str, int fps);
static int sami_ms_to_frame(int milliseconds, int fps);
static void free_subtitle_list(subtitle_entry_t *list);
static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text);
static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output);
// Show usage information // Show usage information
static void show_usage(const char *program_name) { static void show_usage(const char *program_name) {
printf("TAV DWT-based Video Encoder\n"); printf("TAV DWT-based Video Encoder\n");
@@ -264,7 +300,7 @@ static void show_usage(const char *program_name) {
printf(" -q, --quality N Quality level 0-5 (default: 2)\n"); printf(" -q, --quality N Quality level 0-5 (default: 2)\n");
printf(" -Q, --quantizer Y,Co,Cg Quantizer levels 0-100 for each channel\n"); printf(" -Q, --quantizer Y,Co,Cg Quantizer levels 0-100 for each channel\n");
printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n"); printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n");
printf(" -d, --decomp N Decomposition levels 1-4 (default: 3)\n"); printf(" -d, --decomp N Decomposition levels 1-6 (default: 4)\n");
printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n"); printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
printf(" -p, --progressive Use progressive scan (default: interlaced)\n"); printf(" -p, --progressive Use progressive scan (default: interlaced)\n");
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
@@ -296,7 +332,7 @@ static void show_usage(const char *program_name) {
} }
printf("\n\nFeatures:\n"); printf("\n\nFeatures:\n");
printf(" - 64x64 DWT tiles with multi-resolution encoding\n"); printf(" - 112x112 DWT tiles with multi-resolution encoding\n");
printf(" - Full resolution YCoCg-R color space\n"); printf(" - Full resolution YCoCg-R color space\n");
printf(" - Progressive transmission and ROI coding\n"); printf(" - Progressive transmission and ROI coding\n");
printf(" - Motion compensation with ±16 pixel search range\n"); printf(" - Motion compensation with ±16 pixel search range\n");
@@ -304,7 +340,7 @@ static void show_usage(const char *program_name) {
printf("\nExamples:\n"); printf("\nExamples:\n");
printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name); printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name);
printf(" %s -i input.mkv -q 3 -w 1 -d 4 -o output.mv3 # High quality with 9/7 wavelet\n", program_name); printf(" %s -i input.mkv -q 3 -w 1 -d 6 -o output.mv3 # Maximum quality with 9/7 wavelet\n", program_name);
printf(" %s -i input.avi --lossless -o output.mv3 # Lossless encoding\n", program_name); printf(" %s -i input.avi --lossless -o output.mv3 # Lossless encoding\n", program_name);
printf(" %s -i input.mp4 -b 800 -o output.mv3 # 800 kbps bitrate target\n", program_name); printf(" %s -i input.mp4 -b 800 -o output.mv3 # 800 kbps bitrate target\n", program_name);
printf(" %s -i input.webm -S subs.srt -o output.mv3 # With subtitles\n", program_name); printf(" %s -i input.webm -S subs.srt -o output.mv3 # With subtitles\n", program_name);
@@ -487,9 +523,9 @@ static void dwt_97_forward_1d(float *data, int length) {
free(temp); free(temp);
} }
// 2D DWT forward transform for 64x64 tile // 2D DWT forward transform for 112x112 tile
static void dwt_2d_forward(float *tile_data, int levels, int filter_type) { static void dwt_2d_forward(float *tile_data, int levels, int filter_type) {
const int size = 64; const int size = TILE_SIZE;
float *temp_row = malloc(size * sizeof(float)); float *temp_row = malloc(size * sizeof(float));
float *temp_col = malloc(size * sizeof(float)); float *temp_col = malloc(size * sizeof(float));
@@ -565,7 +601,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
} }
// Quantize and serialize DWT coefficients // Quantize and serialize DWT coefficients
const int tile_size = 64 * 64; const int tile_size = TILE_SIZE * TILE_SIZE;
int16_t *quantized_y = malloc(tile_size * sizeof(int16_t)); int16_t *quantized_y = malloc(tile_size * sizeof(int16_t));
int16_t *quantized_co = malloc(tile_size * sizeof(int16_t)); int16_t *quantized_co = malloc(tile_size * sizeof(int16_t));
int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t)); int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t));
@@ -609,7 +645,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
// Compress and write frame data // Compress and write frame data
static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) { static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) {
// Calculate total uncompressed size // Calculate total uncompressed size
const size_t max_tile_size = 9 + (64 * 64 * 3 * sizeof(int16_t)); // header + 3 channels of coefficients const size_t max_tile_size = 9 + (TILE_SIZE * TILE_SIZE * 3 * sizeof(int16_t)); // header + 3 channels of coefficients
const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size; const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size;
// Allocate buffer for uncompressed tile data // Allocate buffer for uncompressed tile data
@@ -625,17 +661,17 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
uint8_t mode = TAV_MODE_INTRA; // For now, all tiles are INTRA uint8_t mode = TAV_MODE_INTRA; // For now, all tiles are INTRA
// Extract tile data (already processed) // Extract tile data (already processed)
float tile_y_data[64 * 64]; float tile_y_data[TILE_SIZE * TILE_SIZE];
float tile_co_data[64 * 64]; float tile_co_data[TILE_SIZE * TILE_SIZE];
float tile_cg_data[64 * 64]; float tile_cg_data[TILE_SIZE * TILE_SIZE];
// Extract tile data from frame buffers // Extract tile data from frame buffers
for (int y = 0; y < 64; y++) { for (int y = 0; y < TILE_SIZE; y++) {
for (int x = 0; x < 64; x++) { for (int x = 0; x < TILE_SIZE; x++) {
int src_x = tile_x * 64 + x; int src_x = tile_x * TILE_SIZE + x;
int src_y = tile_y * 64 + y; int src_y = tile_y * TILE_SIZE + y;
int src_idx = src_y * enc->width + src_x; int src_idx = src_y * enc->width + src_x;
int tile_idx_local = y * 64 + x; int tile_idx_local = y * TILE_SIZE + x;
if (src_x < enc->width && src_y < enc->height) { if (src_x < enc->width && src_y < enc->height) {
tile_y_data[tile_idx_local] = enc->current_frame_y[src_idx]; tile_y_data[tile_idx_local] = enc->current_frame_y[src_idx];
@@ -698,12 +734,12 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
return compressed_size + 5; // packet type + size field + compressed data return compressed_size + 5; // packet type + size field + compressed data
} }
// Motion estimation for 64x64 tiles using SAD // Motion estimation for 112x112 tiles using SAD
static int estimate_motion_64x64(const float *current, const float *reference, static int estimate_motion_112x112(const float *current, const float *reference,
int width, int height, int tile_x, int tile_y, int width, int height, int tile_x, int tile_y,
motion_vector_t *mv) { motion_vector_t *mv) {
const int tile_size = 64; const int tile_size = TILE_SIZE;
const int search_range = 16; // ±16 pixels const int search_range = 28; // ±28 pixels (increased proportionally: 16 * 112/64 = 28)
const int start_x = tile_x * tile_size; const int start_x = tile_x * tile_size;
const int start_y = tile_y * tile_size; const int start_y = tile_y * tile_size;
@@ -1131,6 +1167,7 @@ static int start_video_conversion(tav_encoder_t *enc) {
// Start audio conversion // Start audio conversion
static int start_audio_conversion(tav_encoder_t *enc) { static int start_audio_conversion(tav_encoder_t *enc) {
return 1;
if (!enc->has_audio) return 1; if (!enc->has_audio) return 1;
char command[2048]; char command[2048];
@@ -1151,6 +1188,400 @@ static int start_audio_conversion(tav_encoder_t *enc) {
return 0; return 0;
} }
// Get MP2 packet size from header (copied from TEV)
static int get_mp2_packet_size(uint8_t *header) {
int bitrate_index = (header[2] >> 4) & 0x0F;
int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
int bitrate = bitrates[bitrate_index];
if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE;
int sampling_freq_index = (header[2] >> 2) & 0x03;
int sampling_freqs[] = {44100, 48000, 32000, 0};
int sampling_freq = sampling_freqs[sampling_freq_index];
if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE;
int padding = (header[2] >> 1) & 0x01;
return (144 * bitrate * 1000) / sampling_freq + padding;
}
// Convert MP2 packet size to rate index (copied from TEV)
static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) {
// Map packet size to rate index for MP2_RATE_TABLE
if (packet_size <= 576) return is_mono ? 0 : 0; // 128k
else if (packet_size <= 720) return 1; // 160k
else if (packet_size <= 1008) return 2; // 224k
else if (packet_size <= 1440) return 3; // 320k
else return 4; // 384k
}
// Convert SRT time format to frame number (copied from TEV)
static int srt_time_to_frame(const char *time_str, int fps) {
int hours, minutes, seconds, milliseconds;
if (sscanf(time_str, "%d:%d:%d,%d", &hours, &minutes, &seconds, &milliseconds) != 4) {
return -1;
}
double total_seconds = hours * 3600.0 + minutes * 60.0 + seconds + milliseconds / 1000.0;
return (int)(total_seconds * fps + 0.5); // Round to nearest frame
}
// Convert SAMI milliseconds to frame number (copied from TEV)
static int sami_ms_to_frame(int milliseconds, int fps) {
double seconds = milliseconds / 1000.0;
return (int)(seconds * fps + 0.5); // Round to nearest frame
}
// Parse SubRip subtitle file (copied from TEV)
static subtitle_entry_t* parse_srt_file(const char *filename, int fps) {
FILE *file = fopen(filename, "r");
if (!file) {
fprintf(stderr, "Failed to open subtitle file: %s\n", filename);
return NULL;
}
subtitle_entry_t *head = NULL;
subtitle_entry_t *tail = NULL;
char line[1024];
int state = 0; // 0=index, 1=time, 2=text, 3=blank
subtitle_entry_t *current_entry = NULL;
char *text_buffer = NULL;
size_t text_buffer_size = 0;
while (fgets(line, sizeof(line), file)) {
// Remove trailing newline
size_t len = strlen(line);
if (len > 0 && line[len-1] == '\n') {
line[len-1] = '\0';
len--;
}
if (len > 0 && line[len-1] == '\r') {
line[len-1] = '\0';
len--;
}
if (state == 0) { // Expecting subtitle index
if (strlen(line) == 0) continue; // Skip empty lines
// Create new subtitle entry
current_entry = calloc(1, sizeof(subtitle_entry_t));
if (!current_entry) break;
state = 1;
} else if (state == 1) { // Expecting time range
char start_time[32], end_time[32];
if (sscanf(line, "%31s --> %31s", start_time, end_time) == 2) {
current_entry->start_frame = srt_time_to_frame(start_time, fps);
current_entry->end_frame = srt_time_to_frame(end_time, fps);
if (current_entry->start_frame < 0 || current_entry->end_frame < 0) {
free(current_entry);
current_entry = NULL;
state = 3; // Skip to next blank line
continue;
}
// Initialize text buffer
text_buffer_size = 256;
text_buffer = malloc(text_buffer_size);
if (!text_buffer) {
free(current_entry);
current_entry = NULL;
fprintf(stderr, "Memory allocation failed while parsing subtitles\n");
break;
}
text_buffer[0] = '\0';
state = 2;
} else {
free(current_entry);
current_entry = NULL;
state = 3; // Skip malformed entry
}
} else if (state == 2) { // Collecting subtitle text
if (strlen(line) == 0) {
// End of subtitle text
current_entry->text = strdup(text_buffer);
free(text_buffer);
text_buffer = NULL;
// Add to list
if (!head) {
head = current_entry;
tail = current_entry;
} else {
tail->next = current_entry;
tail = current_entry;
}
current_entry = NULL;
state = 0;
} else {
// Append text line
size_t current_len = strlen(text_buffer);
size_t line_len = strlen(line);
size_t needed = current_len + line_len + 2; // +2 for newline and null
if (needed > text_buffer_size) {
text_buffer_size = needed + 256;
char *new_buffer = realloc(text_buffer, text_buffer_size);
if (!new_buffer) {
free(text_buffer);
free(current_entry);
current_entry = NULL;
fprintf(stderr, "Memory reallocation failed while parsing subtitles\n");
break;
}
text_buffer = new_buffer;
}
if (current_len > 0) {
strcat(text_buffer, "\\n"); // Use \n as newline marker in subtitle text
}
strcat(text_buffer, line);
}
} else if (state == 3) { // Skip to next blank line
if (strlen(line) == 0) {
state = 0;
}
}
}
// Handle final subtitle if file doesn't end with blank line
if (current_entry && state == 2) {
current_entry->text = strdup(text_buffer);
if (!head) {
head = current_entry;
} else {
tail->next = current_entry;
}
free(text_buffer);
}
fclose(file);
return head;
}
// Parse SAMI subtitle file (simplified version from TEV)
static subtitle_entry_t* parse_smi_file(const char *filename, int fps) {
FILE *file = fopen(filename, "r");
if (!file) {
fprintf(stderr, "Failed to open subtitle file: %s\n", filename);
return NULL;
}
subtitle_entry_t *head = NULL;
subtitle_entry_t *tail = NULL;
char line[2048];
while (fgets(line, sizeof(line), file)) {
// Look for SYNC tags with Start= attribute
char *sync_pos = strstr(line, "<SYNC");
if (sync_pos) {
char *start_pos = strstr(sync_pos, "Start=");
if (start_pos) {
int start_ms;
if (sscanf(start_pos, "Start=%d", &start_ms) == 1) {
// Look for P tag with subtitle text
char *p_start = strstr(sync_pos, "<P");
if (p_start) {
char *text_start = strchr(p_start, '>');
if (text_start) {
text_start++;
char *text_end = strstr(text_start, "</P>");
if (text_end) {
size_t text_len = text_end - text_start;
if (text_len > 0 && text_len < MAX_SUBTITLE_LENGTH) {
subtitle_entry_t *entry = calloc(1, sizeof(subtitle_entry_t));
if (entry) {
entry->start_frame = sami_ms_to_frame(start_ms, fps);
entry->end_frame = entry->start_frame + fps * 3; // Default 3 second duration
entry->text = strndup(text_start, text_len);
// Add to list
if (!head) {
head = entry;
tail = entry;
} else {
tail->next = entry;
tail = entry;
}
}
}
}
}
}
}
}
}
}
fclose(file);
return head;
}
// Parse subtitle file based on extension (copied from TEV)
static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps) {
if (!filename) return NULL;
size_t len = strlen(filename);
if (len > 4 && strcasecmp(filename + len - 4, ".smi") == 0) {
return parse_smi_file(filename, fps);
} else {
return parse_srt_file(filename, fps);
}
}
// Free subtitle list (copied from TEV)
static void free_subtitle_list(subtitle_entry_t *list) {
while (list) {
subtitle_entry_t *next = list->next;
free(list->text);
free(list);
list = next;
}
}
// Write subtitle packet (copied from TEV)
static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text) {
// Calculate packet size
size_t text_len = text ? strlen(text) : 0;
size_t packet_size = 3 + 1 + text_len + 1; // index (3 bytes) + opcode + text + null terminator
// Write packet type and size
uint8_t packet_type = TAV_PACKET_SUBTITLE;
fwrite(&packet_type, 1, 1, output);
uint32_t size32 = (uint32_t)packet_size;
fwrite(&size32, 4, 1, output);
// Write subtitle data
uint8_t index_bytes[3] = {
(uint8_t)(index & 0xFF),
(uint8_t)((index >> 8) & 0xFF),
(uint8_t)((index >> 16) & 0xFF)
};
fwrite(index_bytes, 3, 1, output);
fwrite(&opcode, 1, 1, output);
if (text && text_len > 0) {
fwrite(text, 1, text_len, output);
}
uint8_t null_terminator = 0;
fwrite(&null_terminator, 1, 1, output);
return 1 + 4 + packet_size; // Total bytes written
}
// Process audio for current frame (copied and adapted from TEV)
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) {
return 1;
}
// Initialize packet size on first frame
if (frame_num == 0) {
uint8_t header[4];
if (fread(header, 1, 4, enc->mp2_file) != 4) return 1;
fseek(enc->mp2_file, 0, SEEK_SET);
enc->mp2_packet_size = get_mp2_packet_size(header);
int is_mono = (header[3] >> 6) == 3;
enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono);
enc->target_audio_buffer_size = 4; // 4 audio packets in buffer
}
// Calculate how much audio we need for this frame
double frame_duration = 1.0 / enc->fps;
double samples_per_frame = 32000.0 * frame_duration; // 32kHz sample rate
int target_buffer_samples = (int)(samples_per_frame * enc->target_audio_buffer_size);
int target_buffer_bytes = (target_buffer_samples * enc->mp2_packet_size) / 1152; // 1152 samples per MP2 frame
if (!enc->mp2_buffer) {
enc->mp2_buffer_size = target_buffer_bytes * 2; // Extra buffer space
enc->mp2_buffer = malloc(enc->mp2_buffer_size);
if (!enc->mp2_buffer) {
fprintf(stderr, "Failed to allocate audio buffer\n");
return 1;
}
}
// Read audio data
size_t bytes_to_read = target_buffer_bytes;
if (bytes_to_read > enc->audio_remaining) {
bytes_to_read = enc->audio_remaining;
}
if (bytes_to_read > enc->mp2_buffer_size) {
bytes_to_read = enc->mp2_buffer_size;
}
size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file);
if (bytes_read == 0) {
return 1; // No more audio
}
// Write audio packet
uint8_t audio_packet_type = TAV_PACKET_AUDIO_MP2;
uint32_t audio_len = (uint32_t)bytes_read;
fwrite(&audio_packet_type, 1, 1, output);
fwrite(&audio_len, 4, 1, output);
fwrite(enc->mp2_buffer, 1, bytes_read, output);
// Track audio bytes written
enc->audio_remaining -= bytes_read;
if (enc->verbose) {
printf("Frame %d: Audio packet %zu bytes (remaining: %zu)\n",
frame_num, bytes_read, enc->audio_remaining);
}
return 1;
}
// Process subtitles for current frame (copied and adapted from TEV)
static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) {
if (!enc->subtitles) {
return 1; // No subtitles to process
}
int bytes_written = 0;
// Check if we need to show a new subtitle
if (!enc->subtitle_visible) {
subtitle_entry_t *sub = enc->current_subtitle;
if (!sub) sub = enc->subtitles; // Start from beginning if not set
// Find next subtitle to show
while (sub && sub->start_frame <= frame_num) {
if (sub->end_frame > frame_num) {
// This subtitle should be shown
if (sub != enc->current_subtitle) {
enc->current_subtitle = sub;
enc->subtitle_visible = 1;
bytes_written += write_subtitle_packet(output, 0, 0x01, sub->text);
if (enc->verbose) {
printf("Frame %d: Showing subtitle: %.50s%s\n",
frame_num, sub->text, strlen(sub->text) > 50 ? "..." : "");
}
}
break;
}
sub = sub->next;
}
}
// Check if we need to hide current subtitle
if (enc->subtitle_visible && enc->current_subtitle) {
if (frame_num >= enc->current_subtitle->end_frame) {
enc->subtitle_visible = 0;
bytes_written += write_subtitle_packet(output, 0, 0x02, NULL);
if (enc->verbose) {
printf("Frame %d: Hiding subtitle\n", frame_num);
}
}
}
return bytes_written;
}
// Main function // Main function
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
generate_random_filename(TEMP_AUDIO_FILE); generate_random_filename(TEMP_AUDIO_FILE);
@@ -1231,6 +1662,9 @@ int main(int argc, char *argv[]) {
case 't': case 't':
enc->test_mode = 1; enc->test_mode = 1;
break; break;
case 'S':
enc->subtitle_file = strdup(optarg);
break;
case 1000: // --lossless case 1000: // --lossless
enc->lossless = 1; enc->lossless = 1;
enc->wavelet_filter = WAVELET_5_3_REVERSIBLE; enc->wavelet_filter = WAVELET_5_3_REVERSIBLE;
@@ -1317,6 +1751,17 @@ int main(int argc, char *argv[]) {
} }
} }
// Parse subtitles if provided
if (enc->subtitle_file) {
printf("Parsing subtitles: %s\n", enc->subtitle_file);
enc->subtitles = parse_subtitle_file(enc->subtitle_file, enc->fps);
if (!enc->subtitles) {
fprintf(stderr, "Warning: Failed to parse subtitle file\n");
} else {
printf("Loaded subtitles successfully\n");
}
}
// Write TAV header // Write TAV header
if (write_tav_header(enc) != 0) { if (write_tav_header(enc) != 0) {
fprintf(stderr, "Error: Failed to write TAV header\n"); fprintf(stderr, "Error: Failed to write TAV header\n");
@@ -1430,7 +1875,7 @@ int main(int argc, char *argv[]) {
int tile_y = tile_idx / enc->tiles_x; int tile_y = tile_idx / enc->tiles_x;
if (!is_keyframe && frame_count > 0) { if (!is_keyframe && frame_count > 0) {
estimate_motion_64x64(enc->current_frame_y, enc->previous_frame_y, estimate_motion_112x112(enc->current_frame_y, enc->previous_frame_y,
enc->width, enc->height, tile_x, tile_y, enc->width, enc->height, tile_x, tile_y,
&enc->motion_vectors[tile_idx]); &enc->motion_vectors[tile_idx]);
} else { } else {
@@ -1449,6 +1894,12 @@ int main(int argc, char *argv[]) {
break; break;
} }
else { else {
// Process audio for this frame
process_audio(enc, frame_count, enc->output_fp);
// Process subtitles for this frame
process_subtitles(enc, frame_count, enc->output_fp);
// Write a sync packet only after a video is been coded // Write a sync packet only after a video is been coded
uint8_t sync_packet = TAV_PACKET_SYNC; uint8_t sync_packet = TAV_PACKET_SYNC;
fwrite(&sync_packet, 1, 1, enc->output_fp); fwrite(&sync_packet, 1, 1, enc->output_fp);
@@ -1526,6 +1977,12 @@ static void cleanup_encoder(tav_encoder_t *enc) {
free(enc->tiles); free(enc->tiles);
free(enc->motion_vectors); free(enc->motion_vectors);
free(enc->compressed_buffer); free(enc->compressed_buffer);
free(enc->mp2_buffer);
// Free subtitle list
if (enc->subtitles) {
free_subtitle_list(enc->subtitles);
}
if (enc->zstd_ctx) { if (enc->zstd_ctx) {
ZSTD_freeCCtx(enc->zstd_ctx); ZSTD_freeCCtx(enc->zstd_ctx);