mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
112x112 blocks for TAV, which greatly improves the encoding speed
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
|
||||
const WIDTH = 560
|
||||
const HEIGHT = 448
|
||||
const TILE_SIZE = 64 // 64x64 tiles for DWT (vs 16x16 blocks in TEV)
|
||||
const TILE_SIZE = 112 // 112x112 tiles for DWT (perfect fit for TSVM 560x448 resolution)
|
||||
const TAV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x41, 0x56] // "\x1FTSVM TAV"
|
||||
const TAV_VERSION = 1 // Initial DWT version
|
||||
const SND_BASE_ADDR = audio.getBaseAddr()
|
||||
@@ -99,6 +99,275 @@ graphics.clearPixels2(0)
|
||||
// Initialize audio
|
||||
audio.resetParams(0)
|
||||
audio.purgeQueue(0)
|
||||
audio.setPcmMode(0)
|
||||
audio.setMasterVolume(0, 255)
|
||||
|
||||
// Subtitle display functions
|
||||
function clearSubtitleArea() {
|
||||
// Clear the subtitle area at the bottom of the screen
|
||||
// Text mode is 80x32, so clear the bottom few lines
|
||||
let oldFgColor = con.get_color_fore()
|
||||
let oldBgColor = con.get_color_back()
|
||||
|
||||
con.color_pair(255, 255) // transparent to clear
|
||||
|
||||
// Clear bottom 4 lines for subtitles
|
||||
for (let row = 29; row <= 32; row++) {
|
||||
con.move(row, 1)
|
||||
for (let col = 1; col <= 80; col++) {
|
||||
print(" ")
|
||||
}
|
||||
}
|
||||
|
||||
con.color_pair(oldFgColor, oldBgColor)
|
||||
}
|
||||
|
||||
function getVisualLength(line) {
|
||||
// Calculate the visual length of a line excluding formatting tags
|
||||
let visualLength = 0
|
||||
let i = 0
|
||||
|
||||
while (i < line.length) {
|
||||
if (i < line.length - 2 && line[i] === '<') {
|
||||
// Check for formatting tags and skip them
|
||||
if (line.substring(i, i + 3).toLowerCase() === '<b>' ||
|
||||
line.substring(i, i + 3).toLowerCase() === '<i>') {
|
||||
i += 3 // Skip tag
|
||||
} else if (i < line.length - 3 &&
|
||||
(line.substring(i, i + 4).toLowerCase() === '</b>' ||
|
||||
line.substring(i, i + 4).toLowerCase() === '</i>')) {
|
||||
i += 4 // Skip closing tag
|
||||
} else {
|
||||
// Not a formatting tag, count the character
|
||||
visualLength++
|
||||
i++
|
||||
}
|
||||
} else {
|
||||
// Regular character, count it
|
||||
visualLength++
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
return visualLength
|
||||
}
|
||||
|
||||
function displayFormattedLine(line) {
|
||||
// Parse line and handle <b> and <i> tags with color changes
|
||||
// Default subtitle color: yellow (231), formatted text: white (254)
|
||||
|
||||
let i = 0
|
||||
let inBoldOrItalic = false
|
||||
|
||||
// insert initial padding block
|
||||
con.color_pair(0, 255)
|
||||
con.prnch(0xDE)
|
||||
con.color_pair(231, 0)
|
||||
|
||||
while (i < line.length) {
|
||||
if (i < line.length - 2 && line[i] === '<') {
|
||||
// Check for opening tags
|
||||
if (line.substring(i, i + 3).toLowerCase() === '<b>' ||
|
||||
line.substring(i, i + 3).toLowerCase() === '<i>') {
|
||||
con.color_pair(254, 0) // Switch to white for formatted text
|
||||
inBoldOrItalic = true
|
||||
i += 3
|
||||
} else if (i < line.length - 3 &&
|
||||
(line.substring(i, i + 4).toLowerCase() === '</b>' ||
|
||||
line.substring(i, i + 4).toLowerCase() === '</i>')) {
|
||||
con.color_pair(231, 0) // Switch back to yellow for normal text
|
||||
inBoldOrItalic = false
|
||||
i += 4
|
||||
} else {
|
||||
// Not a formatting tag, print the character
|
||||
print(line[i])
|
||||
i++
|
||||
}
|
||||
} else {
|
||||
// Regular character, print it
|
||||
print(line[i])
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
// insert final padding block
|
||||
con.color_pair(0, 255)
|
||||
con.prnch(0xDD)
|
||||
con.color_pair(231, 0)
|
||||
}
|
||||
|
||||
function displaySubtitle(text, position = 0) {
|
||||
if (!text || text.length === 0) {
|
||||
clearSubtitleArea()
|
||||
return
|
||||
}
|
||||
|
||||
// Set subtitle colors: yellow (231) on black (0)
|
||||
let oldFgColor = con.get_color_fore()
|
||||
let oldBgColor = con.get_color_back()
|
||||
con.color_pair(231, 0)
|
||||
|
||||
// Split text into lines
|
||||
let lines = text.split('\n')
|
||||
|
||||
// Calculate position based on subtitle position setting
|
||||
let startRow, startCol
|
||||
// Calculate visual length without formatting tags for positioning
|
||||
let longestLineLength = lines.map(s => getVisualLength(s)).sort().last()
|
||||
|
||||
switch (position) {
|
||||
case 2: // center left
|
||||
case 6: // center right
|
||||
case 8: // dead center
|
||||
startRow = 16 - Math.floor(lines.length / 2)
|
||||
break
|
||||
case 3: // top left
|
||||
case 4: // top center
|
||||
case 5: // top right
|
||||
startRow = 2
|
||||
break
|
||||
case 0: // bottom center
|
||||
case 1: // bottom left
|
||||
case 7: // bottom right
|
||||
default:
|
||||
startRow = 32 - lines.length
|
||||
startRow = 32 - lines.length
|
||||
startRow = 32 - lines.length // Default to bottom center
|
||||
}
|
||||
|
||||
// Display each line
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
let line = lines[i].trim()
|
||||
if (line.length === 0) continue
|
||||
|
||||
let row = startRow + i
|
||||
if (row < 1) row = 1
|
||||
if (row > 32) row = 32
|
||||
|
||||
// Calculate column based on alignment
|
||||
switch (position) {
|
||||
case 1: // bottom left
|
||||
case 2: // center left
|
||||
case 3: // top left
|
||||
startCol = 1
|
||||
break
|
||||
case 5: // top right
|
||||
case 6: // center right
|
||||
case 7: // bottom right
|
||||
startCol = Math.max(1, 78 - getVisualLength(line) - 2)
|
||||
break
|
||||
case 0: // bottom center
|
||||
case 4: // top center
|
||||
case 8: // dead center
|
||||
default:
|
||||
startCol = Math.max(1, Math.floor((80 - longestLineLength - 2) / 2) + 1)
|
||||
break
|
||||
}
|
||||
|
||||
con.move(row, startCol)
|
||||
|
||||
// Parse and display line with formatting tag support
|
||||
displayFormattedLine(line)
|
||||
}
|
||||
|
||||
con.color_pair(oldFgColor, oldBgColor)
|
||||
}
|
||||
|
||||
function processSubtitlePacket(packetSize) {
|
||||
// Read subtitle packet data according to SSF format
|
||||
// uint24 index + uint8 opcode + variable arguments
|
||||
|
||||
let index = 0
|
||||
// Read 24-bit index (little-endian)
|
||||
let indexByte0 = seqread.readOneByte()
|
||||
let indexByte1 = seqread.readOneByte()
|
||||
let indexByte2 = seqread.readOneByte()
|
||||
index = indexByte0 | (indexByte1 << 8) | (indexByte2 << 16)
|
||||
|
||||
let opcode = seqread.readOneByte()
|
||||
let remainingBytes = packetSize - 4 // Subtract 3 bytes for index + 1 byte for opcode
|
||||
|
||||
switch (opcode) {
|
||||
case SSF_OP_SHOW: {
|
||||
// Read UTF-8 text until null terminator
|
||||
if (remainingBytes > 1) {
|
||||
let textBytes = seqread.readBytes(remainingBytes)
|
||||
let textStr = ""
|
||||
|
||||
// Convert bytes to string, stopping at null terminator
|
||||
for (let i = 0; i < remainingBytes - 1; i++) { // -1 for null terminator
|
||||
let byte = sys.peek(textBytes + i)
|
||||
if (byte === 0) break
|
||||
textStr += String.fromCharCode(byte)
|
||||
}
|
||||
|
||||
sys.free(textBytes)
|
||||
subtitleText = textStr
|
||||
subtitleVisible = true
|
||||
displaySubtitle(subtitleText, subtitlePosition)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case SSF_OP_HIDE: {
|
||||
subtitleVisible = false
|
||||
subtitleText = ""
|
||||
clearSubtitleArea()
|
||||
break
|
||||
}
|
||||
|
||||
case SSF_OP_MOVE: {
|
||||
if (remainingBytes >= 2) { // Need at least 1 byte for position + 1 null terminator
|
||||
let newPosition = seqread.readOneByte()
|
||||
seqread.readOneByte() // Read null terminator
|
||||
|
||||
if (newPosition >= 0 && newPosition <= 7) {
|
||||
subtitlePosition = newPosition
|
||||
|
||||
// Re-display current subtitle at new position if visible
|
||||
if (subtitleVisible && subtitleText.length > 0) {
|
||||
clearSubtitleArea()
|
||||
displaySubtitle(subtitleText, subtitlePosition)
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case SSF_OP_UPLOAD_LOW_FONT:
|
||||
case SSF_OP_UPLOAD_HIGH_FONT: {
|
||||
// Font upload - read payload length and font data
|
||||
if (remainingBytes >= 3) { // uint16 length + at least 1 byte data
|
||||
let payloadLen = seqread.readShort()
|
||||
if (remainingBytes >= payloadLen + 2) {
|
||||
let fontData = seqread.readBytes(payloadLen)
|
||||
|
||||
// upload font data
|
||||
for (let i = 0; i < Math.min(payloadLen, 1920); i++) sys.poke(-1300607 - i, sys.peek(fontData + i))
|
||||
sys.poke(-1299460, (opcode == SSF_OP_UPLOAD_LOW_FONT) ? 18 : 19)
|
||||
|
||||
sys.free(fontData)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case SSF_OP_NOP:
|
||||
default: {
|
||||
// Skip remaining bytes
|
||||
if (remainingBytes > 0) {
|
||||
let skipBytes = seqread.readBytes(remainingBytes)
|
||||
sys.free(skipBytes)
|
||||
}
|
||||
|
||||
if (interactive && opcode !== SSF_OP_NOP) {
|
||||
serial.println(`[SUBTITLE UNKNOWN] Index: ${index}, Opcode: 0x${opcode.toString(16).padStart(2, '0')}`)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// TAV header structure (32 bytes vs TEV's 24 bytes)
|
||||
let header = {
|
||||
@@ -172,7 +441,7 @@ const isNTSC = (header.videoFlags & 0x02) !== 0
|
||||
const isLossless = (header.videoFlags & 0x04) !== 0
|
||||
const multiResolution = (header.videoFlags & 0x08) !== 0
|
||||
|
||||
// Calculate tile dimensions (64x64 vs TEV's 16x16 blocks)
|
||||
// Calculate tile dimensions (112x112 vs TEV's 16x16 blocks)
|
||||
const tilesX = Math.ceil(header.width / TILE_SIZE)
|
||||
const tilesY = Math.ceil(header.height / TILE_SIZE)
|
||||
const numTiles = tilesX * tilesY
|
||||
@@ -210,6 +479,9 @@ let audioBufferBytesLastFrame = 0
|
||||
let frame_cnt = 0
|
||||
let frametime = 1000000000.0 / header.fps
|
||||
let nextFrameTime = 0
|
||||
let mp2Initialised = false
|
||||
let audioFired = false
|
||||
|
||||
|
||||
// Performance tracking variables (from TEV)
|
||||
let decompressTime = 0
|
||||
@@ -374,6 +646,21 @@ try {
|
||||
console.log(`Frame ${frameCount}: Duplicating previous frame`)
|
||||
}
|
||||
|
||||
// Defer audio playback until a first frame is sent
|
||||
if (isInterlaced) {
|
||||
// fire audio after frame 1
|
||||
if (!audioFired && frameCount > 0) {
|
||||
audio.play(0)
|
||||
audioFired = true
|
||||
}
|
||||
}
|
||||
else {
|
||||
// fire audio after frame 0
|
||||
if (!audioFired) {
|
||||
audio.play(0)
|
||||
audioFired = true
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log(`Frame ${frameCount}: decode failed: ${e}`)
|
||||
}
|
||||
@@ -390,38 +677,23 @@ try {
|
||||
console.log(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`)
|
||||
}
|
||||
|
||||
} else if (packetType === TAV_PACKET_AUDIO_MP2 && hasAudio) {
|
||||
// Audio packet - same as TEV
|
||||
let audioPtr = seqread.readBytes(compressedSize)
|
||||
} else if (packetType === TAV_PACKET_AUDIO_MP2) {
|
||||
// MP2 Audio packet
|
||||
let audioLen = seqread.readInt()
|
||||
|
||||
// Send to audio hardware
|
||||
for (let i = 0; i < compressedSize; i++) {
|
||||
vm.poke(SND_BASE_ADDR + audioBufferBytesLastFrame + i, sys.peek(audioPtr + i))
|
||||
if (!mp2Initialised) {
|
||||
mp2Initialised = true
|
||||
audio.mp2Init()
|
||||
}
|
||||
audioBufferBytesLastFrame += compressedSize
|
||||
sys.free(audioPtr)
|
||||
|
||||
} else if (packetType === TAV_PACKET_SUBTITLE && hasSubtitles) {
|
||||
seqread.readBytes(audioLen, SND_BASE_ADDR - 2368)
|
||||
audio.mp2Decode()
|
||||
audio.mp2UploadDecoded(0)
|
||||
|
||||
} else if (packetType === TAV_PACKET_SUBTITLE) {
|
||||
// Subtitle packet - same format as TEV
|
||||
let subtitlePtr = seqread.readBytes(compressedSize)
|
||||
|
||||
// Process subtitle (simplified)
|
||||
if (compressedSize >= 4) {
|
||||
const index = (sys.peek(subtitlePtr) << 16) | (sys.peek(subtitlePtr + 1) << 8) | sys.peek(subtitlePtr + 2)
|
||||
const opcode = sys.peek(subtitlePtr + 3)
|
||||
|
||||
if (opcode === SSF_OP_SHOW && compressedSize > 4) {
|
||||
let text = ""
|
||||
for (let i = 4; i < compressedSize && sys.peek(subtitlePtr + i) !== 0; i++) {
|
||||
text += String.fromCharCode(sys.peek(subtitlePtr + i))
|
||||
}
|
||||
subtitleText = text
|
||||
subtitleVisible = true
|
||||
} else if (opcode === SSF_OP_HIDE) {
|
||||
subtitleVisible = false
|
||||
}
|
||||
}
|
||||
sys.free(subtitlePtr)
|
||||
let packetSize = seqread.readInt()
|
||||
processSubtitlePacket(packetSize)
|
||||
} else if (packetType == 0x00) {
|
||||
// Silently discard, faulty subtitle creation can cause this as 0x00 is used as an argument terminator
|
||||
} else {
|
||||
@@ -463,14 +735,13 @@ finally {
|
||||
sys.free(RGB_BUFFER_A)
|
||||
sys.free(RGB_BUFFER_B)
|
||||
|
||||
graphics.setGraphicsMode(0) // Return to text mode
|
||||
con.curs_set(1)
|
||||
con.clear()
|
||||
|
||||
if (errorlevel === 0) {
|
||||
console.log(`Playback completed: ${frameCount} frames`)
|
||||
} else {
|
||||
console.log(`Playbook failed with error ${errorlevel}`)
|
||||
console.log(`Playback failed with error ${errorlevel}`)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -807,6 +807,7 @@ transmission capability, and region-of-interest coding.
|
||||
- Version 1.0: Initial DWT-based implementation with 5/3 reversible filter
|
||||
- Version 1.1: Added 9/7 irreversible filter for higher compression
|
||||
- Version 1.2: Multi-resolution pyramid encoding with up to 4 decomposition levels
|
||||
- Version 1.3: Optimized 112x112 tiles for TSVM resolution with up to 6 decomposition levels
|
||||
|
||||
# File Structure
|
||||
\x1F T S V M T A V
|
||||
@@ -852,7 +853,7 @@ transmission capability, and region-of-interest coding.
|
||||
uint32 Compressed Size
|
||||
* Zstd-compressed Block Data
|
||||
|
||||
## Block Data (per 64x64 tile)
|
||||
## Block Data (per 112x112 tile)
|
||||
uint8 Mode: encoding mode
|
||||
0x00 = SKIP (copy from previous frame)
|
||||
0x01 = INTRA (DWT-coded, no prediction)
|
||||
@@ -885,10 +886,12 @@ transmission capability, and region-of-interest coding.
|
||||
* Provides better energy compaction than 5/3 but lossy reconstruction
|
||||
|
||||
### Decomposition Levels
|
||||
- Level 1: 64x64 → 32x32 (LL) + 3×32x32 subbands (LH,HL,HH)
|
||||
- Level 2: 32x32 → 16x16 (LL) + 3×16x16 subbands
|
||||
- Level 3: 16x16 → 8x8 (LL) + 3×8x8 subbands
|
||||
- Level 4: 8x8 → 4x4 (LL) + 3×4x4 subbands
|
||||
- Level 1: 112x112 → 56x56 (LL) + 3×56x56 subbands (LH,HL,HH)
|
||||
- Level 2: 56x56 → 28x28 (LL) + 3×28x28 subbands
|
||||
- Level 3: 28x28 → 14x14 (LL) + 3×14x14 subbands
|
||||
- Level 4: 14x14 → 7x7 (LL) + 3×7x7 subbands
|
||||
- Level 5: 7x7 → 3x3 (LL) + 3×3x3 subbands
|
||||
- Level 6: 3x3 → 1x1 (LL) + 3×1x1 subbands (maximum)
|
||||
|
||||
### Quantization Strategy
|
||||
TAV uses different quantization steps for each subband based on human visual
|
||||
@@ -904,9 +907,11 @@ When enabled, coefficients are transmitted in order of visual importance:
|
||||
3. Higher frequency subbands for refinement
|
||||
|
||||
## Motion Compensation
|
||||
- Search range: ±16 pixels (larger than TEV due to 64x64 tiles)
|
||||
- Search range: ±28 pixels (optimized for 112x112 tiles)
|
||||
- Sub-pixel precision: 1/4 pixel with bilinear interpolation
|
||||
- Tile size: 64x64 pixels (4x larger than TEV blocks)
|
||||
- Tile size: 112x112 pixels (perfect fit for TSVM 560x448 resolution)
|
||||
* Exactly 5×4 = 20 tiles per frame (560÷112 = 5, 448÷112 = 4)
|
||||
* No partial tiles needed - optimal for processing efficiency
|
||||
- Uses Sum of Absolute Differences (SAD) for motion estimation
|
||||
- Overlapped block motion compensation (OBMC) for smooth boundaries
|
||||
|
||||
@@ -917,7 +922,7 @@ TAV operates in YCoCg-R colour space with full resolution channels:
|
||||
- Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default)
|
||||
|
||||
## Compression Features
|
||||
- 64x64 DWT tiles vs 16x16 DCT blocks in TEV
|
||||
- 112x112 DWT tiles vs 16x16 DCT blocks in TEV
|
||||
- Multi-resolution representation enables scalable decoding
|
||||
- Better frequency localization than DCT
|
||||
- Reduced blocking artifacts due to overlapping basis functions
|
||||
|
||||
@@ -3930,8 +3930,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
var readPtr = blockDataPtr
|
||||
|
||||
try {
|
||||
val tilesX = (width + 63) / 64 // 64x64 tiles
|
||||
val tilesY = (height + 63) / 64
|
||||
val tilesX = (width + 111) / 112 // 112x112 tiles
|
||||
val tilesY = (height + 111) / 112
|
||||
|
||||
// Process each tile
|
||||
for (tileY in 0 until tilesY) {
|
||||
@@ -3949,8 +3949,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
when (mode) {
|
||||
0x00 -> { // TAV_MODE_SKIP
|
||||
// Copy 64x64 tile from previous frame to current frame
|
||||
copyTile64x64RGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
|
||||
// Copy 112x112 tile from previous frame to current frame
|
||||
copyTile112x112RGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height)
|
||||
}
|
||||
0x01 -> { // TAV_MODE_INTRA
|
||||
// Decode DWT coefficients directly to RGB buffer
|
||||
@@ -3967,8 +3967,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
0x03 -> { // TAV_MODE_MOTION
|
||||
// Motion compensation only (no residual)
|
||||
applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY,
|
||||
currentRGBAddr, prevRGBAddr, width, height)
|
||||
applyMotionCompensation112x112RGB(tileX, tileY, mvX, mvY,
|
||||
currentRGBAddr, prevRGBAddr, width, height)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3982,7 +3982,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
private fun decodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
|
||||
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
|
||||
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
|
||||
val tileSize = 64
|
||||
val tileSize = 112
|
||||
val coeffCount = tileSize * tileSize
|
||||
var ptr = readPtr
|
||||
|
||||
@@ -4043,7 +4043,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
private fun convertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray,
|
||||
rgbAddr: Long, width: Int, height: Int) {
|
||||
val tileSize = 64
|
||||
val tileSize = 112
|
||||
val startX = tileX * tileSize
|
||||
val startY = tileY * tileSize
|
||||
|
||||
@@ -4078,7 +4078,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
private fun convertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray,
|
||||
rgbAddr: Long, width: Int, height: Int) {
|
||||
val tileSize = 64
|
||||
val tileSize = 112
|
||||
val startX = tileX * tileSize
|
||||
val startY = tileY * tileSize
|
||||
|
||||
@@ -4127,7 +4127,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
|
||||
private fun addYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray,
|
||||
rgbAddr: Long, width: Int, height: Int) {
|
||||
val tileSize = 64
|
||||
val tileSize = 112
|
||||
val startX = tileX * tileSize
|
||||
val startY = tileY * tileSize
|
||||
|
||||
@@ -4172,8 +4172,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
|
||||
// Helper functions (simplified versions of existing DWT functions)
|
||||
private fun copyTile64x64RGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) {
|
||||
val tileSize = 64
|
||||
private fun copyTile112x112RGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) {
|
||||
val tileSize = 112
|
||||
val startX = tileX * tileSize
|
||||
val startY = tileY * tileSize
|
||||
|
||||
@@ -4205,17 +4205,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
|
||||
|
||||
// Step 1: Apply motion compensation
|
||||
applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height)
|
||||
applyMotionCompensation112x112RGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height)
|
||||
|
||||
// Step 2: Add DWT residual (same as intra but add to existing pixels)
|
||||
return decodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, rcf,
|
||||
waveletFilter, decompLevels, isLossless, tavVersion)
|
||||
}
|
||||
|
||||
private fun applyMotionCompensation64x64RGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int,
|
||||
private fun applyMotionCompensation112x112RGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int,
|
||||
currentRGBAddr: Long, prevRGBAddr: Long,
|
||||
width: Int, height: Int) {
|
||||
val tileSize = 64
|
||||
val tileSize = 112
|
||||
val startX = tileX * tileSize
|
||||
val startY = tileY * tileSize
|
||||
|
||||
|
||||
@@ -73,7 +73,7 @@ static inline float float16_to_float(uint16_t hbits) {
|
||||
// Version 1: YCoCg-R (default)
|
||||
// Version 2: ICtCp (--ictcp flag)
|
||||
|
||||
// Tile encoding modes (64x64 tiles)
|
||||
// Tile encoding modes (112x112 tiles)
|
||||
#define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference)
|
||||
#define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles)
|
||||
#define TAV_MODE_INTER 0x02 // Inter DWT coding with motion compensation
|
||||
@@ -87,9 +87,9 @@ static inline float float16_to_float(uint16_t hbits) {
|
||||
#define TAV_PACKET_SYNC 0xFF // Sync packet
|
||||
|
||||
// DWT settings
|
||||
#define TILE_SIZE 64
|
||||
#define MAX_DECOMP_LEVELS 4
|
||||
#define DEFAULT_DECOMP_LEVELS 3
|
||||
#define TILE_SIZE 112 // 112x112 tiles - perfect fit for TSVM 560x448 (GCD = 112)
|
||||
#define MAX_DECOMP_LEVELS 6 // Can go deeper: 112→56→28→14→7→3→1
|
||||
#define DEFAULT_DECOMP_LEVELS 4 // Increased default for better compression
|
||||
|
||||
// Wavelet filter types
|
||||
#define WAVELET_5_3_REVERSIBLE 0 // Lossless capable
|
||||
@@ -101,6 +101,18 @@ static inline float float16_to_float(uint16_t hbits) {
|
||||
#define DEFAULT_FPS 30
|
||||
#define DEFAULT_QUALITY 2
|
||||
|
||||
// Audio/subtitle constants (reused from TEV)
|
||||
#define MP2_DEFAULT_PACKET_SIZE 1152
|
||||
#define MAX_SUBTITLE_LENGTH 2048
|
||||
|
||||
// Subtitle structure
|
||||
typedef struct subtitle_entry {
|
||||
int start_frame;
|
||||
int end_frame;
|
||||
char *text;
|
||||
struct subtitle_entry *next;
|
||||
} subtitle_entry_t;
|
||||
|
||||
static void generate_random_filename(char *filename) {
|
||||
srand(time(NULL));
|
||||
|
||||
@@ -208,8 +220,18 @@ typedef struct {
|
||||
dwt_tile_t *tiles;
|
||||
motion_vector_t *motion_vectors;
|
||||
|
||||
// Audio processing
|
||||
// Audio processing (expanded from TEV)
|
||||
size_t audio_remaining;
|
||||
uint8_t *mp2_buffer;
|
||||
size_t mp2_buffer_size;
|
||||
int mp2_packet_size;
|
||||
int mp2_rate_index;
|
||||
int target_audio_buffer_size;
|
||||
|
||||
// Subtitle processing
|
||||
subtitle_entry_t *subtitles;
|
||||
subtitle_entry_t *current_subtitle;
|
||||
int subtitle_visible;
|
||||
|
||||
// Compression
|
||||
ZSTD_CCtx *zstd_ctx;
|
||||
@@ -245,13 +267,27 @@ static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int
|
||||
static void dwt_2d_forward(float *tile_data, int levels, int filter_type);
|
||||
static void dwt_2d_inverse(dwt_tile_t *tile, float *output, int filter_type);
|
||||
static void quantize_subbands(dwt_tile_t *tile, int q_y, int q_co, int q_cg, float rcf);
|
||||
static int estimate_motion_64x64(const float *current, const float *reference,
|
||||
int width, int height, int tile_x, int tile_y,
|
||||
motion_vector_t *mv);
|
||||
static int estimate_motion_112x112(const float *current, const float *reference,
|
||||
int width, int height, int tile_x, int tile_y,
|
||||
motion_vector_t *mv);
|
||||
static size_t compress_tile_data(tav_encoder_t *enc, const dwt_tile_t *tiles,
|
||||
const motion_vector_t *mvs, int num_tiles,
|
||||
uint8_t packet_type);
|
||||
|
||||
// Audio and subtitle processing prototypes (from TEV)
|
||||
static int start_audio_conversion(tav_encoder_t *enc);
|
||||
static int get_mp2_packet_size(uint8_t *header);
|
||||
static int mp2_packet_size_to_rate_index(int packet_size, int is_mono);
|
||||
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output);
|
||||
static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps);
|
||||
static subtitle_entry_t* parse_srt_file(const char *filename, int fps);
|
||||
static subtitle_entry_t* parse_smi_file(const char *filename, int fps);
|
||||
static int srt_time_to_frame(const char *time_str, int fps);
|
||||
static int sami_ms_to_frame(int milliseconds, int fps);
|
||||
static void free_subtitle_list(subtitle_entry_t *list);
|
||||
static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text);
|
||||
static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output);
|
||||
|
||||
// Show usage information
|
||||
static void show_usage(const char *program_name) {
|
||||
printf("TAV DWT-based Video Encoder\n");
|
||||
@@ -264,7 +300,7 @@ static void show_usage(const char *program_name) {
|
||||
printf(" -q, --quality N Quality level 0-5 (default: 2)\n");
|
||||
printf(" -Q, --quantizer Y,Co,Cg Quantizer levels 0-100 for each channel\n");
|
||||
printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n");
|
||||
printf(" -d, --decomp N Decomposition levels 1-4 (default: 3)\n");
|
||||
printf(" -d, --decomp N Decomposition levels 1-6 (default: 4)\n");
|
||||
printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
|
||||
printf(" -p, --progressive Use progressive scan (default: interlaced)\n");
|
||||
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
||||
@@ -296,7 +332,7 @@ static void show_usage(const char *program_name) {
|
||||
}
|
||||
|
||||
printf("\n\nFeatures:\n");
|
||||
printf(" - 64x64 DWT tiles with multi-resolution encoding\n");
|
||||
printf(" - 112x112 DWT tiles with multi-resolution encoding\n");
|
||||
printf(" - Full resolution YCoCg-R color space\n");
|
||||
printf(" - Progressive transmission and ROI coding\n");
|
||||
printf(" - Motion compensation with ±16 pixel search range\n");
|
||||
@@ -304,7 +340,7 @@ static void show_usage(const char *program_name) {
|
||||
|
||||
printf("\nExamples:\n");
|
||||
printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name);
|
||||
printf(" %s -i input.mkv -q 3 -w 1 -d 4 -o output.mv3 # High quality with 9/7 wavelet\n", program_name);
|
||||
printf(" %s -i input.mkv -q 3 -w 1 -d 6 -o output.mv3 # Maximum quality with 9/7 wavelet\n", program_name);
|
||||
printf(" %s -i input.avi --lossless -o output.mv3 # Lossless encoding\n", program_name);
|
||||
printf(" %s -i input.mp4 -b 800 -o output.mv3 # 800 kbps bitrate target\n", program_name);
|
||||
printf(" %s -i input.webm -S subs.srt -o output.mv3 # With subtitles\n", program_name);
|
||||
@@ -487,9 +523,9 @@ static void dwt_97_forward_1d(float *data, int length) {
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// 2D DWT forward transform for 64x64 tile
|
||||
// 2D DWT forward transform for 112x112 tile
|
||||
static void dwt_2d_forward(float *tile_data, int levels, int filter_type) {
|
||||
const int size = 64;
|
||||
const int size = TILE_SIZE;
|
||||
float *temp_row = malloc(size * sizeof(float));
|
||||
float *temp_col = malloc(size * sizeof(float));
|
||||
|
||||
@@ -565,7 +601,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
}
|
||||
|
||||
// Quantize and serialize DWT coefficients
|
||||
const int tile_size = 64 * 64;
|
||||
const int tile_size = TILE_SIZE * TILE_SIZE;
|
||||
int16_t *quantized_y = malloc(tile_size * sizeof(int16_t));
|
||||
int16_t *quantized_co = malloc(tile_size * sizeof(int16_t));
|
||||
int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t));
|
||||
@@ -609,7 +645,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
|
||||
// Compress and write frame data
|
||||
static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) {
|
||||
// Calculate total uncompressed size
|
||||
const size_t max_tile_size = 9 + (64 * 64 * 3 * sizeof(int16_t)); // header + 3 channels of coefficients
|
||||
const size_t max_tile_size = 9 + (TILE_SIZE * TILE_SIZE * 3 * sizeof(int16_t)); // header + 3 channels of coefficients
|
||||
const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size;
|
||||
|
||||
// Allocate buffer for uncompressed tile data
|
||||
@@ -625,17 +661,17 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
||||
uint8_t mode = TAV_MODE_INTRA; // For now, all tiles are INTRA
|
||||
|
||||
// Extract tile data (already processed)
|
||||
float tile_y_data[64 * 64];
|
||||
float tile_co_data[64 * 64];
|
||||
float tile_cg_data[64 * 64];
|
||||
float tile_y_data[TILE_SIZE * TILE_SIZE];
|
||||
float tile_co_data[TILE_SIZE * TILE_SIZE];
|
||||
float tile_cg_data[TILE_SIZE * TILE_SIZE];
|
||||
|
||||
// Extract tile data from frame buffers
|
||||
for (int y = 0; y < 64; y++) {
|
||||
for (int x = 0; x < 64; x++) {
|
||||
int src_x = tile_x * 64 + x;
|
||||
int src_y = tile_y * 64 + y;
|
||||
for (int y = 0; y < TILE_SIZE; y++) {
|
||||
for (int x = 0; x < TILE_SIZE; x++) {
|
||||
int src_x = tile_x * TILE_SIZE + x;
|
||||
int src_y = tile_y * TILE_SIZE + y;
|
||||
int src_idx = src_y * enc->width + src_x;
|
||||
int tile_idx_local = y * 64 + x;
|
||||
int tile_idx_local = y * TILE_SIZE + x;
|
||||
|
||||
if (src_x < enc->width && src_y < enc->height) {
|
||||
tile_y_data[tile_idx_local] = enc->current_frame_y[src_idx];
|
||||
@@ -698,12 +734,12 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
|
||||
return compressed_size + 5; // packet type + size field + compressed data
|
||||
}
|
||||
|
||||
// Motion estimation for 64x64 tiles using SAD
|
||||
static int estimate_motion_64x64(const float *current, const float *reference,
|
||||
// Motion estimation for 112x112 tiles using SAD
|
||||
static int estimate_motion_112x112(const float *current, const float *reference,
|
||||
int width, int height, int tile_x, int tile_y,
|
||||
motion_vector_t *mv) {
|
||||
const int tile_size = 64;
|
||||
const int search_range = 16; // ±16 pixels
|
||||
const int tile_size = TILE_SIZE;
|
||||
const int search_range = 28; // ±28 pixels (increased proportionally: 16 * 112/64 = 28)
|
||||
const int start_x = tile_x * tile_size;
|
||||
const int start_y = tile_y * tile_size;
|
||||
|
||||
@@ -1131,6 +1167,7 @@ static int start_video_conversion(tav_encoder_t *enc) {
|
||||
|
||||
// Start audio conversion
|
||||
static int start_audio_conversion(tav_encoder_t *enc) {
|
||||
return 1;
|
||||
if (!enc->has_audio) return 1;
|
||||
|
||||
char command[2048];
|
||||
@@ -1151,6 +1188,400 @@ static int start_audio_conversion(tav_encoder_t *enc) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Get MP2 packet size from header (copied from TEV)
|
||||
static int get_mp2_packet_size(uint8_t *header) {
|
||||
int bitrate_index = (header[2] >> 4) & 0x0F;
|
||||
int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
|
||||
if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
|
||||
|
||||
int bitrate = bitrates[bitrate_index];
|
||||
if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE;
|
||||
|
||||
int sampling_freq_index = (header[2] >> 2) & 0x03;
|
||||
int sampling_freqs[] = {44100, 48000, 32000, 0};
|
||||
int sampling_freq = sampling_freqs[sampling_freq_index];
|
||||
if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE;
|
||||
|
||||
int padding = (header[2] >> 1) & 0x01;
|
||||
return (144 * bitrate * 1000) / sampling_freq + padding;
|
||||
}
|
||||
|
||||
// Convert MP2 packet size to rate index (copied from TEV)
|
||||
static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) {
|
||||
// Map packet size to rate index for MP2_RATE_TABLE
|
||||
if (packet_size <= 576) return is_mono ? 0 : 0; // 128k
|
||||
else if (packet_size <= 720) return 1; // 160k
|
||||
else if (packet_size <= 1008) return 2; // 224k
|
||||
else if (packet_size <= 1440) return 3; // 320k
|
||||
else return 4; // 384k
|
||||
}
|
||||
|
||||
// Convert SRT time format to frame number (copied from TEV)
|
||||
static int srt_time_to_frame(const char *time_str, int fps) {
|
||||
int hours, minutes, seconds, milliseconds;
|
||||
if (sscanf(time_str, "%d:%d:%d,%d", &hours, &minutes, &seconds, &milliseconds) != 4) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
double total_seconds = hours * 3600.0 + minutes * 60.0 + seconds + milliseconds / 1000.0;
|
||||
return (int)(total_seconds * fps + 0.5); // Round to nearest frame
|
||||
}
|
||||
|
||||
// Convert SAMI milliseconds to frame number (copied from TEV)
|
||||
static int sami_ms_to_frame(int milliseconds, int fps) {
|
||||
double seconds = milliseconds / 1000.0;
|
||||
return (int)(seconds * fps + 0.5); // Round to nearest frame
|
||||
}
|
||||
|
||||
// Parse SubRip subtitle file (copied from TEV)
|
||||
static subtitle_entry_t* parse_srt_file(const char *filename, int fps) {
|
||||
FILE *file = fopen(filename, "r");
|
||||
if (!file) {
|
||||
fprintf(stderr, "Failed to open subtitle file: %s\n", filename);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
subtitle_entry_t *head = NULL;
|
||||
subtitle_entry_t *tail = NULL;
|
||||
char line[1024];
|
||||
int state = 0; // 0=index, 1=time, 2=text, 3=blank
|
||||
|
||||
subtitle_entry_t *current_entry = NULL;
|
||||
char *text_buffer = NULL;
|
||||
size_t text_buffer_size = 0;
|
||||
|
||||
while (fgets(line, sizeof(line), file)) {
|
||||
// Remove trailing newline
|
||||
size_t len = strlen(line);
|
||||
if (len > 0 && line[len-1] == '\n') {
|
||||
line[len-1] = '\0';
|
||||
len--;
|
||||
}
|
||||
if (len > 0 && line[len-1] == '\r') {
|
||||
line[len-1] = '\0';
|
||||
len--;
|
||||
}
|
||||
|
||||
if (state == 0) { // Expecting subtitle index
|
||||
if (strlen(line) == 0) continue; // Skip empty lines
|
||||
// Create new subtitle entry
|
||||
current_entry = calloc(1, sizeof(subtitle_entry_t));
|
||||
if (!current_entry) break;
|
||||
state = 1;
|
||||
} else if (state == 1) { // Expecting time range
|
||||
char start_time[32], end_time[32];
|
||||
if (sscanf(line, "%31s --> %31s", start_time, end_time) == 2) {
|
||||
current_entry->start_frame = srt_time_to_frame(start_time, fps);
|
||||
current_entry->end_frame = srt_time_to_frame(end_time, fps);
|
||||
|
||||
if (current_entry->start_frame < 0 || current_entry->end_frame < 0) {
|
||||
free(current_entry);
|
||||
current_entry = NULL;
|
||||
state = 3; // Skip to next blank line
|
||||
continue;
|
||||
}
|
||||
|
||||
// Initialize text buffer
|
||||
text_buffer_size = 256;
|
||||
text_buffer = malloc(text_buffer_size);
|
||||
if (!text_buffer) {
|
||||
free(current_entry);
|
||||
current_entry = NULL;
|
||||
fprintf(stderr, "Memory allocation failed while parsing subtitles\n");
|
||||
break;
|
||||
}
|
||||
text_buffer[0] = '\0';
|
||||
state = 2;
|
||||
} else {
|
||||
free(current_entry);
|
||||
current_entry = NULL;
|
||||
state = 3; // Skip malformed entry
|
||||
}
|
||||
} else if (state == 2) { // Collecting subtitle text
|
||||
if (strlen(line) == 0) {
|
||||
// End of subtitle text
|
||||
current_entry->text = strdup(text_buffer);
|
||||
free(text_buffer);
|
||||
text_buffer = NULL;
|
||||
|
||||
// Add to list
|
||||
if (!head) {
|
||||
head = current_entry;
|
||||
tail = current_entry;
|
||||
} else {
|
||||
tail->next = current_entry;
|
||||
tail = current_entry;
|
||||
}
|
||||
current_entry = NULL;
|
||||
state = 0;
|
||||
} else {
|
||||
// Append text line
|
||||
size_t current_len = strlen(text_buffer);
|
||||
size_t line_len = strlen(line);
|
||||
size_t needed = current_len + line_len + 2; // +2 for newline and null
|
||||
|
||||
if (needed > text_buffer_size) {
|
||||
text_buffer_size = needed + 256;
|
||||
char *new_buffer = realloc(text_buffer, text_buffer_size);
|
||||
if (!new_buffer) {
|
||||
free(text_buffer);
|
||||
free(current_entry);
|
||||
current_entry = NULL;
|
||||
fprintf(stderr, "Memory reallocation failed while parsing subtitles\n");
|
||||
break;
|
||||
}
|
||||
text_buffer = new_buffer;
|
||||
}
|
||||
|
||||
if (current_len > 0) {
|
||||
strcat(text_buffer, "\\n"); // Use \n as newline marker in subtitle text
|
||||
}
|
||||
strcat(text_buffer, line);
|
||||
}
|
||||
} else if (state == 3) { // Skip to next blank line
|
||||
if (strlen(line) == 0) {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle final subtitle if file doesn't end with blank line
|
||||
if (current_entry && state == 2) {
|
||||
current_entry->text = strdup(text_buffer);
|
||||
if (!head) {
|
||||
head = current_entry;
|
||||
} else {
|
||||
tail->next = current_entry;
|
||||
}
|
||||
free(text_buffer);
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
return head;
|
||||
}
|
||||
|
||||
// Parse SAMI subtitle file (simplified version from TEV)
|
||||
static subtitle_entry_t* parse_smi_file(const char *filename, int fps) {
|
||||
FILE *file = fopen(filename, "r");
|
||||
if (!file) {
|
||||
fprintf(stderr, "Failed to open subtitle file: %s\n", filename);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
subtitle_entry_t *head = NULL;
|
||||
subtitle_entry_t *tail = NULL;
|
||||
char line[2048];
|
||||
|
||||
while (fgets(line, sizeof(line), file)) {
|
||||
// Look for SYNC tags with Start= attribute
|
||||
char *sync_pos = strstr(line, "<SYNC");
|
||||
if (sync_pos) {
|
||||
char *start_pos = strstr(sync_pos, "Start=");
|
||||
if (start_pos) {
|
||||
int start_ms;
|
||||
if (sscanf(start_pos, "Start=%d", &start_ms) == 1) {
|
||||
// Look for P tag with subtitle text
|
||||
char *p_start = strstr(sync_pos, "<P");
|
||||
if (p_start) {
|
||||
char *text_start = strchr(p_start, '>');
|
||||
if (text_start) {
|
||||
text_start++;
|
||||
char *text_end = strstr(text_start, "</P>");
|
||||
if (text_end) {
|
||||
size_t text_len = text_end - text_start;
|
||||
if (text_len > 0 && text_len < MAX_SUBTITLE_LENGTH) {
|
||||
subtitle_entry_t *entry = calloc(1, sizeof(subtitle_entry_t));
|
||||
if (entry) {
|
||||
entry->start_frame = sami_ms_to_frame(start_ms, fps);
|
||||
entry->end_frame = entry->start_frame + fps * 3; // Default 3 second duration
|
||||
entry->text = strndup(text_start, text_len);
|
||||
|
||||
// Add to list
|
||||
if (!head) {
|
||||
head = entry;
|
||||
tail = entry;
|
||||
} else {
|
||||
tail->next = entry;
|
||||
tail = entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
return head;
|
||||
}
|
||||
|
||||
// Parse subtitle file based on extension (copied from TEV)
|
||||
static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps) {
|
||||
if (!filename) return NULL;
|
||||
|
||||
size_t len = strlen(filename);
|
||||
if (len > 4 && strcasecmp(filename + len - 4, ".smi") == 0) {
|
||||
return parse_smi_file(filename, fps);
|
||||
} else {
|
||||
return parse_srt_file(filename, fps);
|
||||
}
|
||||
}
|
||||
|
||||
// Free subtitle list (copied from TEV)
|
||||
static void free_subtitle_list(subtitle_entry_t *list) {
|
||||
while (list) {
|
||||
subtitle_entry_t *next = list->next;
|
||||
free(list->text);
|
||||
free(list);
|
||||
list = next;
|
||||
}
|
||||
}
|
||||
|
||||
// Write subtitle packet (copied from TEV)
|
||||
static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text) {
|
||||
// Calculate packet size
|
||||
size_t text_len = text ? strlen(text) : 0;
|
||||
size_t packet_size = 3 + 1 + text_len + 1; // index (3 bytes) + opcode + text + null terminator
|
||||
|
||||
// Write packet type and size
|
||||
uint8_t packet_type = TAV_PACKET_SUBTITLE;
|
||||
fwrite(&packet_type, 1, 1, output);
|
||||
uint32_t size32 = (uint32_t)packet_size;
|
||||
fwrite(&size32, 4, 1, output);
|
||||
|
||||
// Write subtitle data
|
||||
uint8_t index_bytes[3] = {
|
||||
(uint8_t)(index & 0xFF),
|
||||
(uint8_t)((index >> 8) & 0xFF),
|
||||
(uint8_t)((index >> 16) & 0xFF)
|
||||
};
|
||||
fwrite(index_bytes, 3, 1, output);
|
||||
fwrite(&opcode, 1, 1, output);
|
||||
|
||||
if (text && text_len > 0) {
|
||||
fwrite(text, 1, text_len, output);
|
||||
}
|
||||
|
||||
uint8_t null_terminator = 0;
|
||||
fwrite(&null_terminator, 1, 1, output);
|
||||
|
||||
return 1 + 4 + packet_size; // Total bytes written
|
||||
}
|
||||
|
||||
// Process audio for current frame (copied and adapted from TEV)
|
||||
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
|
||||
if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Initialize packet size on first frame
|
||||
if (frame_num == 0) {
|
||||
uint8_t header[4];
|
||||
if (fread(header, 1, 4, enc->mp2_file) != 4) return 1;
|
||||
fseek(enc->mp2_file, 0, SEEK_SET);
|
||||
enc->mp2_packet_size = get_mp2_packet_size(header);
|
||||
int is_mono = (header[3] >> 6) == 3;
|
||||
enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono);
|
||||
enc->target_audio_buffer_size = 4; // 4 audio packets in buffer
|
||||
}
|
||||
|
||||
// Calculate how much audio we need for this frame
|
||||
double frame_duration = 1.0 / enc->fps;
|
||||
double samples_per_frame = 32000.0 * frame_duration; // 32kHz sample rate
|
||||
int target_buffer_samples = (int)(samples_per_frame * enc->target_audio_buffer_size);
|
||||
int target_buffer_bytes = (target_buffer_samples * enc->mp2_packet_size) / 1152; // 1152 samples per MP2 frame
|
||||
|
||||
if (!enc->mp2_buffer) {
|
||||
enc->mp2_buffer_size = target_buffer_bytes * 2; // Extra buffer space
|
||||
enc->mp2_buffer = malloc(enc->mp2_buffer_size);
|
||||
if (!enc->mp2_buffer) {
|
||||
fprintf(stderr, "Failed to allocate audio buffer\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Read audio data
|
||||
size_t bytes_to_read = target_buffer_bytes;
|
||||
if (bytes_to_read > enc->audio_remaining) {
|
||||
bytes_to_read = enc->audio_remaining;
|
||||
}
|
||||
if (bytes_to_read > enc->mp2_buffer_size) {
|
||||
bytes_to_read = enc->mp2_buffer_size;
|
||||
}
|
||||
|
||||
size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file);
|
||||
if (bytes_read == 0) {
|
||||
return 1; // No more audio
|
||||
}
|
||||
|
||||
// Write audio packet
|
||||
uint8_t audio_packet_type = TAV_PACKET_AUDIO_MP2;
|
||||
uint32_t audio_len = (uint32_t)bytes_read;
|
||||
|
||||
fwrite(&audio_packet_type, 1, 1, output);
|
||||
fwrite(&audio_len, 4, 1, output);
|
||||
fwrite(enc->mp2_buffer, 1, bytes_read, output);
|
||||
|
||||
// Track audio bytes written
|
||||
enc->audio_remaining -= bytes_read;
|
||||
|
||||
if (enc->verbose) {
|
||||
printf("Frame %d: Audio packet %zu bytes (remaining: %zu)\n",
|
||||
frame_num, bytes_read, enc->audio_remaining);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Process subtitles for current frame (copied and adapted from TEV)
|
||||
static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) {
|
||||
if (!enc->subtitles) {
|
||||
return 1; // No subtitles to process
|
||||
}
|
||||
|
||||
int bytes_written = 0;
|
||||
|
||||
// Check if we need to show a new subtitle
|
||||
if (!enc->subtitle_visible) {
|
||||
subtitle_entry_t *sub = enc->current_subtitle;
|
||||
if (!sub) sub = enc->subtitles; // Start from beginning if not set
|
||||
|
||||
// Find next subtitle to show
|
||||
while (sub && sub->start_frame <= frame_num) {
|
||||
if (sub->end_frame > frame_num) {
|
||||
// This subtitle should be shown
|
||||
if (sub != enc->current_subtitle) {
|
||||
enc->current_subtitle = sub;
|
||||
enc->subtitle_visible = 1;
|
||||
bytes_written += write_subtitle_packet(output, 0, 0x01, sub->text);
|
||||
if (enc->verbose) {
|
||||
printf("Frame %d: Showing subtitle: %.50s%s\n",
|
||||
frame_num, sub->text, strlen(sub->text) > 50 ? "..." : "");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
sub = sub->next;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we need to hide current subtitle
|
||||
if (enc->subtitle_visible && enc->current_subtitle) {
|
||||
if (frame_num >= enc->current_subtitle->end_frame) {
|
||||
enc->subtitle_visible = 0;
|
||||
bytes_written += write_subtitle_packet(output, 0, 0x02, NULL);
|
||||
if (enc->verbose) {
|
||||
printf("Frame %d: Hiding subtitle\n", frame_num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bytes_written;
|
||||
}
|
||||
|
||||
// Main function
|
||||
int main(int argc, char *argv[]) {
|
||||
generate_random_filename(TEMP_AUDIO_FILE);
|
||||
@@ -1231,6 +1662,9 @@ int main(int argc, char *argv[]) {
|
||||
case 't':
|
||||
enc->test_mode = 1;
|
||||
break;
|
||||
case 'S':
|
||||
enc->subtitle_file = strdup(optarg);
|
||||
break;
|
||||
case 1000: // --lossless
|
||||
enc->lossless = 1;
|
||||
enc->wavelet_filter = WAVELET_5_3_REVERSIBLE;
|
||||
@@ -1317,6 +1751,17 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
|
||||
// Parse subtitles if provided
|
||||
if (enc->subtitle_file) {
|
||||
printf("Parsing subtitles: %s\n", enc->subtitle_file);
|
||||
enc->subtitles = parse_subtitle_file(enc->subtitle_file, enc->fps);
|
||||
if (!enc->subtitles) {
|
||||
fprintf(stderr, "Warning: Failed to parse subtitle file\n");
|
||||
} else {
|
||||
printf("Loaded subtitles successfully\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Write TAV header
|
||||
if (write_tav_header(enc) != 0) {
|
||||
fprintf(stderr, "Error: Failed to write TAV header\n");
|
||||
@@ -1430,7 +1875,7 @@ int main(int argc, char *argv[]) {
|
||||
int tile_y = tile_idx / enc->tiles_x;
|
||||
|
||||
if (!is_keyframe && frame_count > 0) {
|
||||
estimate_motion_64x64(enc->current_frame_y, enc->previous_frame_y,
|
||||
estimate_motion_112x112(enc->current_frame_y, enc->previous_frame_y,
|
||||
enc->width, enc->height, tile_x, tile_y,
|
||||
&enc->motion_vectors[tile_idx]);
|
||||
} else {
|
||||
@@ -1449,6 +1894,12 @@ int main(int argc, char *argv[]) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
// Process audio for this frame
|
||||
process_audio(enc, frame_count, enc->output_fp);
|
||||
|
||||
// Process subtitles for this frame
|
||||
process_subtitles(enc, frame_count, enc->output_fp);
|
||||
|
||||
// Write a sync packet only after a video is been coded
|
||||
uint8_t sync_packet = TAV_PACKET_SYNC;
|
||||
fwrite(&sync_packet, 1, 1, enc->output_fp);
|
||||
@@ -1526,6 +1977,12 @@ static void cleanup_encoder(tav_encoder_t *enc) {
|
||||
free(enc->tiles);
|
||||
free(enc->motion_vectors);
|
||||
free(enc->compressed_buffer);
|
||||
free(enc->mp2_buffer);
|
||||
|
||||
// Free subtitle list
|
||||
if (enc->subtitles) {
|
||||
free_subtitle_list(enc->subtitles);
|
||||
}
|
||||
|
||||
if (enc->zstd_ctx) {
|
||||
ZSTD_freeCCtx(enc->zstd_ctx);
|
||||
|
||||
Reference in New Issue
Block a user