subtitle wip

This commit is contained in:
minjaesong
2025-08-26 00:35:53 +09:00
parent 3820b2a2db
commit 6d982a9786
4 changed files with 596 additions and 59 deletions

View File

@@ -67,6 +67,203 @@ audio.purgeQueue(0)
audio.setPcmMode(0) audio.setPcmMode(0)
audio.setMasterVolume(0, 255) audio.setMasterVolume(0, 255)
// Subtitle display functions
function clearSubtitleArea() {
// Clear the subtitle area at the bottom of the screen
// Text mode is 80x32, so clear the bottom few lines
let oldFgColor = con.get_color_fore()
let oldBgColor = con.get_color_back()
con.color_pair(255, 255) // transparent to clear
// Clear bottom 4 lines for subtitles
for (let row = 29; row <= 32; row++) {
con.move(row, 1)
for (let col = 1; col <= 80; col++) {
print(" ")
}
}
con.color_pair(oldFgColor, oldBgColor)
}
function displaySubtitle(text, position = 0) {
if (!text || text.length === 0) {
clearSubtitleArea()
return
}
// Set subtitle colors: yellow (230) on black (0)
let oldFgColor = con.get_color_fore()
let oldBgColor = con.get_color_back()
con.color_pair_pair(230, 0)
// Split text into lines
let lines = text.split('\n')
// Calculate position based on subtitle position setting
let startRow, startCol
switch (position) {
case 0: // bottom center
startRow = 32 - lines.length + 1
break
case 1: // bottom left
startRow = 32 - lines.length + 1
break
case 2: // center left
startRow = 16 - Math.floor(lines.length / 2)
break
case 3: // top left
startRow = 2
break
case 4: // top center
startRow = 2
break
case 5: // top right
startRow = 2
break
case 6: // center right
startRow = 16 - Math.floor(lines.length / 2)
break
case 7: // bottom right
startRow = 32 - lines.length + 1
break
default:
startRow = 32 - lines.length + 1 // Default to bottom center
}
// Display each line
for (let i = 0; i < lines.length; i++) {
let line = lines[i].trim()
if (line.length === 0) continue
let row = startRow + i
if (row < 1) row = 1
if (row > 32) row = 32
// Calculate column based on alignment
switch (position) {
case 0: // bottom center
case 4: // top center
startCol = Math.max(1, Math.floor((80 - line.length) / 2) + 1)
break
case 1: // bottom left
case 2: // center left
case 3: // top left
startCol = 2
break
case 5: // top right
case 6: // center right
case 7: // bottom right
startCol = Math.max(1, 80 - line.length)
break
default:
startCol = Math.max(1, Math.floor((80 - line.length) / 2) + 1)
}
con.move(row, startCol)
print(line) // Unicode-capable print function
}
con.color_pair(oldFgColor, oldBgColor)
}
function processSubtitlePacket(packetSize) {
// Read subtitle packet data according to SSF format
// uint24 index + uint8 opcode + variable arguments
let index = 0
// Read 24-bit index (little-endian)
let indexByte0 = seqread.readOneByte()
let indexByte1 = seqread.readOneByte()
let indexByte2 = seqread.readOneByte()
index = indexByte0 | (indexByte1 << 8) | (indexByte2 << 16)
let opcode = seqread.readOneByte()
let remainingBytes = packetSize - 4 // Subtract 3 bytes for index + 1 byte for opcode
switch (opcode) {
case SSF_OP_SHOW: {
// Read UTF-8 text until null terminator
if (remainingBytes > 1) {
let textBytes = seqread.readBytes(remainingBytes)
let textStr = ""
// Convert bytes to string, stopping at null terminator
for (let i = 0; i < remainingBytes - 1; i++) { // -1 for null terminator
let byte = sys.peek(textBytes + i)
if (byte === 0) break
textStr += String.fromCharCode(byte)
}
sys.free(textBytes)
subtitleText = textStr
subtitleVisible = true
displaySubtitle(subtitleText, subtitlePosition)
}
break
}
case SSF_OP_HIDE: {
subtitleVisible = false
subtitleText = ""
clearSubtitleArea()
break
}
case SSF_OP_MOVE: {
if (remainingBytes >= 2) { // Need at least 1 byte for position + 1 null terminator
let newPosition = seqread.readOneByte()
seqread.readOneByte() // Read null terminator
if (newPosition >= 0 && newPosition <= 7) {
subtitlePosition = newPosition
// Re-display current subtitle at new position if visible
if (subtitleVisible && subtitleText.length > 0) {
clearSubtitleArea()
displaySubtitle(subtitleText, subtitlePosition)
}
}
}
break
}
case SSF_OP_UPLOAD_LOW_FONT:
case SSF_OP_UPLOAD_HIGH_FONT: {
// Font upload - read payload length and font data
if (remainingBytes >= 3) { // uint16 length + at least 1 byte data
let payloadLen = seqread.readShort()
if (remainingBytes >= payloadLen + 2) {
let fontData = seqread.readBytes(payloadLen)
// upload font data
for (let i = 0; i < Math.min(payloadLen, 1920); i++) sys.poke(-1300607 - i, sys.peek(fontData + i))
sys.poke(-1299460, (opcode == SSF_OP_UPLOAD_LOW_FONT) ? 18 : 19)
sys.free(fontData)
}
}
break
}
case SSF_OP_NOP:
default: {
// Skip remaining bytes
if (remainingBytes > 0) {
let skipBytes = seqread.readBytes(remainingBytes)
sys.free(skipBytes)
}
if (interactive && opcode !== SSF_OP_NOP) {
serial.println(`[SUBTITLE UNKNOWN] Index: ${index}, Opcode: 0x${opcode.toString(16).padStart(2, '0')}`)
}
break
}
}
}
// Check magic number // Check magic number
let magic = seqread.readBytes(8) let magic = seqread.readBytes(8)
let magicMatching = true let magicMatching = true
@@ -124,18 +321,17 @@ const DISPLAY_BA_ADDR = -1310721 // Main graphics BA plane (displayed)
// RGB frame buffers (24-bit: R,G,B per pixel) // RGB frame buffers (24-bit: R,G,B per pixel)
const FRAME_SIZE = 560*448*3 // Total frame size = 752,640 bytes const FRAME_SIZE = 560*448*3 // Total frame size = 752,640 bytes
// Allocate frame buffers - malloc works correctly, addresses are start addresses // Ping-pong frame buffers to eliminate memcpy overhead
const CURRENT_RGB_ADDR = sys.malloc(FRAME_SIZE) const RGB_BUFFER_A = sys.malloc(FRAME_SIZE)
const PREV_RGB_ADDR = sys.malloc(FRAME_SIZE) const RGB_BUFFER_B = sys.malloc(FRAME_SIZE)
// Ping-pong buffer pointers (swap instead of copy)
// Working memory for blocks (minimal allocation) let CURRENT_RGB_ADDR = RGB_BUFFER_A
let ycocgWorkspace = sys.malloc(BLOCK_SIZE * BLOCK_SIZE * 3) // Y+Co+Cg workspace let PREV_RGB_ADDR = RGB_BUFFER_B
let dctWorkspace = sys.malloc(BLOCK_SIZE * BLOCK_SIZE * 4) // DCT coefficients (floats)
// Initialize RGB frame buffers to black (0,0,0) // Initialize RGB frame buffers to black (0,0,0)
sys.memset(CURRENT_RGB_ADDR, 0, FRAME_PIXELS * 3) sys.memset(RGB_BUFFER_A, 0, FRAME_PIXELS * 3)
sys.memset(PREV_RGB_ADDR, 0, FRAME_PIXELS * 3) sys.memset(RGB_BUFFER_B, 0, FRAME_PIXELS * 3)
// Initialize display framebuffer to black // Initialize display framebuffer to black
sys.memset(DISPLAY_RG_ADDR, 0, FRAME_PIXELS) // Black in RG plane sys.memset(DISPLAY_RG_ADDR, 0, FRAME_PIXELS) // Black in RG plane
@@ -148,31 +344,15 @@ let akku2 = 0.0
let mp2Initialised = false let mp2Initialised = false
let audioFired = false let audioFired = false
// Performance tracking variables
let decompressTime = 0
let decodeTime = 0
let uploadTime = 0
let biasTime = 0
const BIAS_LIGHTING_MIN = 1.0 / 16.0 const BIAS_LIGHTING_MIN = 1.0 / 16.0
let oldBgcol = [BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN] let oldBgcol = [BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN, BIAS_LIGHTING_MIN]
// 4x4 Bayer dithering matrix
const BAYER_MATRIX = [
[ 0, 8, 2,10],
[12, 4,14, 6],
[ 3,11, 1, 9],
[15, 7,13, 5]
]
// Apply Bayer dithering to reduce banding when quantizing to 4-bit
function ditherValue(value, x, y) {
// Get the dither threshold for this pixel position
const threshold = BAYER_MATRIX[y & 3][x & 3]
// Scale threshold from 0-15 to 0-15.9375 (16 steps over 16 values)
const scaledThreshold = threshold / 16.0
// Add dither and quantize to 4-bit (0-15)
const dithered = value + scaledThreshold
return Math.max(0, Math.min(15, Math.floor(dithered * 15 / 255)))
}
function getRGBfromScr(x, y) { function getRGBfromScr(x, y) {
let offset = y * WIDTH + x let offset = y * WIDTH + x
let rg = sys.peek(-1048577 - offset) let rg = sys.peek(-1048577 - offset)
@@ -237,9 +417,10 @@ try {
// Sync packet - frame complete // Sync packet - frame complete
frameCount++ frameCount++
// Copy current RGB frame to previous frame buffer for next frame reference // Swap ping-pong buffers instead of expensive memcpy (752KB copy eliminated!)
// memcpy(source, destination, length) - so CURRENT (source) -> PREV (destination) let temp = CURRENT_RGB_ADDR
sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3) CURRENT_RGB_ADDR = PREV_RGB_ADDR
PREV_RGB_ADDR = temp
} else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) { } else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
// Video frame packet (always includes rate control factor) // Video frame packet (always includes rate control factor)
@@ -275,11 +456,14 @@ try {
let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing
let actualSize let actualSize
let decompressStart = sys.nanoTime()
try { try {
// Use gzip decompression (only compression format supported in TSVM JS) // Use gzip decompression (only compression format supported in TSVM JS)
actualSize = gzip.decompFromTo(compressedPtr, payloadLen, blockDataPtr) actualSize = gzip.decompFromTo(compressedPtr, payloadLen, blockDataPtr)
decompressTime = (sys.nanoTime() - decompressStart) / 1000000.0 // Convert to milliseconds
} catch (e) { } catch (e) {
// Decompression failed - skip this frame // Decompression failed - skip this frame
decompressTime = (sys.nanoTime() - decompressStart) / 1000000.0 // Still measure time
serial.println(`Frame ${frameCount}: Gzip decompression failed, skipping (compressed size: ${payloadLen}, error: ${e})`) serial.println(`Frame ${frameCount}: Gzip decompression failed, skipping (compressed size: ${payloadLen}, error: ${e})`)
sys.free(compressedPtr) sys.free(compressedPtr)
continue continue
@@ -287,11 +471,15 @@ try {
// Hardware-accelerated TEV YCoCg-R decoding to RGB buffers (with rate control factor) // Hardware-accelerated TEV YCoCg-R decoding to RGB buffers (with rate control factor)
try { try {
let decodeStart = sys.nanoTime()
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors, rateControlFactor) graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors, rateControlFactor)
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds
// Upload RGB buffer to display framebuffer with dithering // Upload RGB buffer to display framebuffer with dithering
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR, let uploadStart = sys.nanoTime()
width, height, frameCount) graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, width, height, frameCount)
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 // Convert to milliseconds
// Defer audio playback until a first frame is sent // Defer audio playback until a first frame is sent
if (!audioFired) { if (!audioFired) {
@@ -304,7 +492,15 @@ try {
sys.free(compressedPtr) sys.free(compressedPtr)
let biasStart = sys.nanoTime()
setBiasLighting() setBiasLighting()
biasTime = (sys.nanoTime() - biasStart) / 1000000.0 // Convert to milliseconds
// Log performance data every 60 frames (and also frame 0 for debugging)
if (frameCount % 60 == 0 || frameCount == 0) {
let totalTime = decompressTime + decodeTime + uploadTime + biasTime
serial.println(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`)
}
} else if (packetType == TEV_PACKET_AUDIO_MP2) { } else if (packetType == TEV_PACKET_AUDIO_MP2) {
// MP2 Audio packet // MP2 Audio packet
@@ -319,6 +515,10 @@ try {
audio.mp2Decode() audio.mp2Decode()
audio.mp2UploadDecoded(0) audio.mp2UploadDecoded(0)
} else if (packetType == TEV_PACKET_SUBTITLE) {
// Subtitle packet - NEW!
let packetSize = seqread.readInt()
processSubtitlePacket(packetSize)
} else { } else {
println(`Unknown packet type: 0x${packetType.toString(16)}`) println(`Unknown packet type: 0x${packetType.toString(16)}`)
break break
@@ -349,11 +549,9 @@ catch (e) {
} }
finally { finally {
// Cleanup working memory (graphics memory is automatically managed) // Cleanup working memory (graphics memory is automatically managed)
sys.free(ycocgWorkspace)
sys.free(dctWorkspace)
sys.free(blockDataPtr) sys.free(blockDataPtr)
if (CURRENT_RGB_ADDR > 0) sys.free(CURRENT_RGB_ADDR) if (RGB_BUFFER_A > 0) sys.free(RGB_BUFFER_A)
if (PREV_RGB_ADDR > 0) sys.free(PREV_RGB_ADDR) if (RGB_BUFFER_B > 0) sys.free(RGB_BUFFER_B)
audio.stop(0) audio.stop(0)
audio.purgeQueue(0) audio.purgeQueue(0)

View File

@@ -707,6 +707,7 @@ DCT-based compression, motion compensation, and efficient temporal coding.
0x10: I-frame (intra-coded frame) 0x10: I-frame (intra-coded frame)
0x11: P-frame (predicted frame) 0x11: P-frame (predicted frame)
0x20: MP2 audio packet 0x20: MP2 audio packet
0x30: Subtitle in "Simple" format
0xFF: sync packet 0xFF: sync packet
## Video Packet Structure ## Video Packet Structure
@@ -767,6 +768,22 @@ to larger block sizes and hardware acceleration.
Reuses existing MP2 audio infrastructure from TSVM MOV format for seamless Reuses existing MP2 audio infrastructure from TSVM MOV format for seamless
compatibility with existing audio processing pipeline. compatibility with existing audio processing pipeline.
## Simple Subtitle Format
SSF is a simple subtitle that is intended to use text buffer to display texts.
The format is designed to be compatible with SubRip and SAMI (without markups).
### SSF Packet Structure
uint24 index (used to specify target subtitle object)
uint8 opcode
0x00 = <argument terminator>, is NOP when used here
0x01 = show (arguments: UTF-8 text)
0x02 = hide (arguments: none)
0x10 = move to different nonant (arguments: 0x00-bottom centre; 0x01-bottom left; 0x02-centre left; 0x03-top left; 0x04-top centre; 0x05-top right; 0x06-centre right; 0x07-bottom right; 0x08-centre
0x30 = upload to low font rom (arguments: uint16 payload length, var bytes)
0x31 = upload to high font rom (arguments: uint16 payload length, var bytes)
note: changing the font rom will change the appearance of the every subtitle currently being displayed
* arguments separated AND terminated by 0x00
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
Sound Adapter Sound Adapter

View File

@@ -18,6 +18,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private val idct16TempBuffer = FloatArray(256) // For 16x16 IDCT private val idct16TempBuffer = FloatArray(256) // For 16x16 IDCT
private val idct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT private val idct16SeparableBuffer = FloatArray(256) // For separable 16x16 IDCT
private fun getFirstGPU(): GraphicsAdapter? { private fun getFirstGPU(): GraphicsAdapter? {
return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter return vm.findPeribyType(VM.PERITYPE_GPU_AND_TERM)?.peripheral as? GraphicsAdapter
} }
@@ -483,6 +484,33 @@ class GraphicsJSR223Delegate(private val vm: VM) {
) )
).map{ it.map { (it.toFloat() + 0.5f) / 16f }.toFloatArray() } ).map{ it.map { (it.toFloat() + 0.5f) / 16f }.toFloatArray() }
private val bayerKernels2 = arrayOf(
intArrayOf(
0,8,2,10,
12,4,14,6,
3,11,1,9,
15,7,13,5,
),
intArrayOf(
8,2,10,0,
4,14,6,12,
11,1,9,3,
7,13,5,15,
),
intArrayOf(
7,13,5,15,
8,2,10,0,
4,14,6,12,
11,1,9,3,
),
intArrayOf(
15,7,13,5,
0,8,2,10,
12,4,14,6,
3,11,1,9,
)
)
/** /**
* This method always assume that you're using the default palette * This method always assume that you're using the default palette
* *
@@ -1307,22 +1335,35 @@ class GraphicsJSR223Delegate(private val vm: VM) {
/** /**
* Upload RGB frame buffer to graphics framebuffer with dithering * Upload RGB frame buffer to graphics framebuffer with dithering
* @param rgbAddr Source RGB buffer (24-bit: R,G,B bytes) * @param rgbAddr Source RGB buffer (24-bit: R,G,B bytes)
* @param rgPlaneAddr Destination RG framebuffer
* @param baPlaneAddr Destination BA framebuffer
* @param width Frame width * @param width Frame width
* @param height Frame height * @param height Frame height
*/ */
fun uploadRGBToFramebuffer(rgbAddr: Long, rgPlaneAddr: Long, baPlaneAddr: Long, width: Int, height: Int, frameCounter: Int) { fun uploadRGBToFramebuffer(rgbAddr: Long, width: Int, height: Int, frameCounter: Int) {
val rgAddrIncVec = if (rgPlaneAddr >= 0) 1 else -1 val gpu = (vm.peripheralTable[1].peripheral as GraphicsAdapter)
val baAddrIncVec = if (baPlaneAddr >= 0) 1 else -1
val rgbAddrIncVec = if (rgbAddr >= 0) 1 else -1 val rgbAddrIncVec = if (rgbAddr >= 0) 1 else -1
for (y in 0 until height) { val totalPixels = width * height
for (x in 0 until width) {
val pixelOffset = y.toLong() * width + x
val rgbOffset = pixelOffset * 3 * rgbAddrIncVec
// Read RGB values // Process in 8KB chunks to balance memory usage and performance
val chunkSize = 8192
val rgChunk = ByteArray(chunkSize)
val baChunk = ByteArray(chunkSize)
var pixelsProcessed = 0
while (pixelsProcessed < totalPixels) {
val pixelsInChunk = kotlin.math.min(chunkSize, totalPixels - pixelsProcessed)
// Batch process chunk of pixels
for (i in 0 until pixelsInChunk) {
val pixelIndex = pixelsProcessed + i
val y = pixelIndex / width
val x = pixelIndex % width
val rgbOffset = (pixelIndex.toLong() * 3) * rgbAddrIncVec
// Read RGB values (3 peek operations per pixel - still the bottleneck)
val r = vm.peek(rgbAddr + rgbOffset)!!.toUint() val r = vm.peek(rgbAddr + rgbOffset)!!.toUint()
val g = vm.peek(rgbAddr + rgbOffset + rgbAddrIncVec)!!.toUint() val g = vm.peek(rgbAddr + rgbOffset + rgbAddrIncVec)!!.toUint()
val b = vm.peek(rgbAddr + rgbOffset + rgbAddrIncVec * 2)!!.toUint() val b = vm.peek(rgbAddr + rgbOffset + rgbAddrIncVec * 2)!!.toUint()
@@ -1332,14 +1373,24 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val g4 = ditherValue(g, x, y, frameCounter) val g4 = ditherValue(g, x, y, frameCounter)
val b4 = ditherValue(b, x, y, frameCounter) val b4 = ditherValue(b, x, y, frameCounter)
// Pack into 4096-color format // Pack and store in chunk buffers
val rgValue = (r4 shl 4) or g4 // R in MSB, G in LSB rgChunk[i] = ((r4 shl 4) or g4).toByte()
val baValue = (b4 shl 4) or 15 // B in MSB, A=15 (opaque) in LSB baChunk[i] = ((b4 shl 4) or 15).toByte()
// Write to framebuffer
vm.poke(rgPlaneAddr + pixelOffset * rgAddrIncVec, rgValue.toByte())
vm.poke(baPlaneAddr + pixelOffset * baAddrIncVec, baValue.toByte())
} }
// Batch write entire chunk to framebuffer
val pixelOffset = (pixelsProcessed).toLong()
gpu.let {
UnsafeHelper.memcpyRaw(
rgChunk, UnsafeHelper.getArrayOffset(rgChunk),
null, it.framebuffer.ptr + pixelOffset, pixelsInChunk.toLong())
UnsafeHelper.memcpyRaw(
baChunk, UnsafeHelper.getArrayOffset(baChunk),
null, it.framebuffer2!!.ptr + pixelOffset, pixelsInChunk.toLong())
}
pixelsProcessed += pixelsInChunk
} }
} }
@@ -1356,6 +1407,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
return round(15f * q) return round(15f * q)
} }
val dctBasis8 = Array(8) { u -> val dctBasis8 = Array(8) { u ->
FloatArray(8) { x -> FloatArray(8) { x ->
val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0 val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0

View File

@@ -29,6 +29,7 @@
#define TEV_PACKET_IFRAME 0x10 // Intra frame (keyframe) #define TEV_PACKET_IFRAME 0x10 // Intra frame (keyframe)
#define TEV_PACKET_PFRAME 0x11 // Predicted frame #define TEV_PACKET_PFRAME 0x11 // Predicted frame
#define TEV_PACKET_AUDIO_MP2 0x20 // MP2 audio #define TEV_PACKET_AUDIO_MP2 0x20 // MP2 audio
#define TEV_PACKET_SUBTITLE 0x30 // Subtitle packet
#define TEV_PACKET_SYNC 0xFF // Sync packet #define TEV_PACKET_SYNC 0xFF // Sync packet
// Utility macros // Utility macros
@@ -100,9 +101,18 @@ typedef struct __attribute__((packed)) {
int16_t cg_coeffs[64]; // quantised Cg DCT coefficients (8x8) int16_t cg_coeffs[64]; // quantised Cg DCT coefficients (8x8)
} tev_block_t; } tev_block_t;
// Subtitle entry structure
typedef struct subtitle_entry {
int start_frame;
int end_frame;
char *text;
struct subtitle_entry *next;
} subtitle_entry_t;
typedef struct { typedef struct {
char *input_file; char *input_file;
char *output_file; char *output_file;
char *subtitle_file; // SubRip (.srt) file path
int width; int width;
int height; int height;
int fps; int fps;
@@ -110,6 +120,7 @@ typedef struct {
int total_frames; int total_frames;
double duration; double duration;
int has_audio; int has_audio;
int has_subtitles;
int output_to_stdout; int output_to_stdout;
int quality; // 0-4, higher = better quality int quality; // 0-4, higher = better quality
int verbose; int verbose;
@@ -156,6 +167,10 @@ typedef struct {
float complexity_history[60]; // Rolling window for complexity float complexity_history[60]; // Rolling window for complexity
int complexity_history_index; int complexity_history_index;
float average_complexity; float average_complexity;
// Subtitle handling
subtitle_entry_t *subtitle_list;
subtitle_entry_t *current_subtitle;
} tev_encoder_t; } tev_encoder_t;
// RGB to YCoCg-R transform (per YCoCg-R specification with truncated division) // RGB to YCoCg-R transform (per YCoCg-R specification with truncated division)
@@ -820,6 +835,223 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
block->cbp = 0x07; // Y, Co, Cg all present block->cbp = 0x07; // Y, Co, Cg all present
} }
// Convert SubRip time format (HH:MM:SS,mmm) to frame number
static int srt_time_to_frame(const char *time_str, int fps) {
int hours, minutes, seconds, milliseconds;
if (sscanf(time_str, "%d:%d:%d,%d", &hours, &minutes, &seconds, &milliseconds) != 4) {
return -1;
}
double total_seconds = hours * 3600.0 + minutes * 60.0 + seconds + milliseconds / 1000.0;
return (int)(total_seconds * fps + 0.5); // Round to nearest frame
}
// Parse SubRip subtitle file
static subtitle_entry_t* parse_srt_file(const char *filename, int fps) {
FILE *file = fopen(filename, "r");
if (!file) {
fprintf(stderr, "Failed to open subtitle file: %s\n", filename);
return NULL;
}
subtitle_entry_t *head = NULL;
subtitle_entry_t *tail = NULL;
char line[1024];
int state = 0; // 0=index, 1=time, 2=text, 3=blank
subtitle_entry_t *current_entry = NULL;
char *text_buffer = NULL;
size_t text_buffer_size = 0;
while (fgets(line, sizeof(line), file)) {
// Remove trailing newline
size_t len = strlen(line);
if (len > 0 && line[len-1] == '\n') {
line[len-1] = '\0';
len--;
}
if (len > 0 && line[len-1] == '\r') {
line[len-1] = '\0';
len--;
}
if (state == 0) { // Expecting subtitle index
if (strlen(line) == 0) continue; // Skip empty lines
// Create new subtitle entry
current_entry = calloc(1, sizeof(subtitle_entry_t));
if (!current_entry) break;
state = 1;
} else if (state == 1) { // Expecting time range
char start_time[32], end_time[32];
if (sscanf(line, "%31s --> %31s", start_time, end_time) == 2) {
current_entry->start_frame = srt_time_to_frame(start_time, fps);
current_entry->end_frame = srt_time_to_frame(end_time, fps);
if (current_entry->start_frame < 0 || current_entry->end_frame < 0) {
free(current_entry);
current_entry = NULL;
state = 3; // Skip to next blank line
continue;
}
// Initialize text buffer
text_buffer_size = 256;
text_buffer = malloc(text_buffer_size);
if (!text_buffer) {
free(current_entry);
current_entry = NULL;
fprintf(stderr, "Memory allocation failed while parsing subtitles\n");
break;
}
text_buffer[0] = '\0';
state = 2;
} else {
free(current_entry);
current_entry = NULL;
state = 3; // Skip malformed entry
}
} else if (state == 2) { // Collecting subtitle text
if (strlen(line) == 0) {
// End of subtitle text
current_entry->text = strdup(text_buffer);
free(text_buffer);
text_buffer = NULL;
// Add to list
if (!head) {
head = current_entry;
tail = current_entry;
} else {
tail->next = current_entry;
tail = current_entry;
}
current_entry = NULL;
state = 0;
} else {
// Append text line
size_t current_len = strlen(text_buffer);
size_t line_len = strlen(line);
size_t needed = current_len + line_len + 2; // +2 for newline and null
if (needed > text_buffer_size) {
text_buffer_size = needed + 256;
char *new_buffer = realloc(text_buffer, text_buffer_size);
if (!new_buffer) {
free(text_buffer);
free(current_entry);
current_entry = NULL;
fprintf(stderr, "Memory allocation failed while parsing subtitles\n");
break;
}
text_buffer = new_buffer;
}
if (current_len > 0) {
strcat(text_buffer, "\n");
}
strcat(text_buffer, line);
}
} else if (state == 3) { // Skip to next blank line
if (strlen(line) == 0) {
state = 0;
}
}
}
// Handle final subtitle if file doesn't end with blank line
if (current_entry && text_buffer) {
current_entry->text = strdup(text_buffer);
free(text_buffer);
if (!head) {
head = current_entry;
} else {
tail->next = current_entry;
}
}
fclose(file);
return head;
}
// Free subtitle list
static void free_subtitle_list(subtitle_entry_t *list) {
while (list) {
subtitle_entry_t *next = list->next;
free(list->text);
free(list);
list = next;
}
}
// Write subtitle packet to output
static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text) {
// Calculate packet size
size_t text_len = text ? strlen(text) : 0;
size_t packet_size = 3 + 1 + text_len + 1; // index (3 bytes) + opcode + text + null terminator
// Write packet type and size
uint8_t packet_type = TEV_PACKET_SUBTITLE;
fwrite(&packet_type, 1, 1, output);
fwrite(&packet_size, 4, 1, output);
// Write subtitle packet data
uint8_t index_bytes[3];
index_bytes[0] = index & 0xFF;
index_bytes[1] = (index >> 8) & 0xFF;
index_bytes[2] = (index >> 16) & 0xFF;
fwrite(index_bytes, 1, 3, output);
fwrite(&opcode, 1, 1, output);
if (text && text_len > 0) {
fwrite(text, 1, text_len, output);
}
// Write null terminator
uint8_t null_term = 0x00;
fwrite(&null_term, 1, 1, output);
return packet_size + 5; // packet_size + packet_type + size field
}
// Process subtitles for the current frame
static int process_subtitles(tev_encoder_t *enc, int frame_num, FILE *output) {
if (!enc->has_subtitles) return 0;
int bytes_written = 0;
// Check if any subtitles need to be shown at this frame
subtitle_entry_t *sub = enc->current_subtitle;
while (sub && sub->start_frame <= frame_num) {
if (sub->start_frame == frame_num) {
// Show subtitle
bytes_written += write_subtitle_packet(output, 0, 0x01, sub->text);
if (enc->verbose) {
printf("Frame %d: Showing subtitle: %.50s%s\n",
frame_num, sub->text, strlen(sub->text) > 50 ? "..." : "");
}
}
if (sub->end_frame == frame_num) {
// Hide subtitle
bytes_written += write_subtitle_packet(output, 0, 0x02, NULL);
if (enc->verbose) {
printf("Frame %d: Hiding subtitle\n", frame_num);
}
}
// Move to next subtitle if we're past the end of current one
if (sub->end_frame <= frame_num) {
enc->current_subtitle = sub->next;
}
sub = sub->next;
}
return bytes_written;
}
// Initialize encoder // Initialize encoder
static tev_encoder_t* init_encoder(void) { static tev_encoder_t* init_encoder(void) {
tev_encoder_t *enc = calloc(1, sizeof(tev_encoder_t)); tev_encoder_t *enc = calloc(1, sizeof(tev_encoder_t));
@@ -836,6 +1068,10 @@ static tev_encoder_t* init_encoder(void) {
enc->fps = 0; // Will be detected from input enc->fps = 0; // Will be detected from input
enc->output_fps = 0; // No frame rate conversion by default enc->output_fps = 0; // No frame rate conversion by default
enc->verbose = 0; enc->verbose = 0;
enc->subtitle_file = NULL;
enc->has_subtitles = 0;
enc->subtitle_list = NULL;
enc->current_subtitle = NULL;
// Rate control defaults // Rate control defaults
enc->target_bitrate_kbps = 0; // 0 = quality mode enc->target_bitrate_kbps = 0; // 0 = quality mode
@@ -1092,6 +1328,11 @@ static char *execute_command(const char *command) {
if (!pipe) return NULL; if (!pipe) return NULL;
char *result = malloc(4096); char *result = malloc(4096);
if (!result) {
pclose(pipe);
return NULL;
}
size_t len = fread(result, 1, 4095, pipe); size_t len = fread(result, 1, 4095, pipe);
result[len] = '\0'; result[len] = '\0';
@@ -1197,7 +1438,7 @@ static int start_video_conversion(tev_encoder_t *enc) {
"ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 " "ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 "
"-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " "-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
"-y - 2>&1", "-y - 2>&1",
enc->input_file, enc->width, enc->height, enc->width, enc->height, enc->output_fps); enc->input_file, enc->output_fps, enc->width, enc->height, enc->width, enc->height);
} else { } else {
// No frame rate conversion // No frame rate conversion
snprintf(command, sizeof(command), snprintf(command, sizeof(command),
@@ -1373,6 +1614,7 @@ static void show_usage(const char *program_name) {
printf("Options:\n"); printf("Options:\n");
printf(" -i, --input FILE Input video file\n"); printf(" -i, --input FILE Input video file\n");
printf(" -o, --output FILE Output video file (use '-' for stdout)\n"); printf(" -o, --output FILE Output video file (use '-' for stdout)\n");
printf(" -s, --subtitles FILE SubRip (.srt) subtitle file\n");
printf(" -w, --width N Video width (default: %d)\n", DEFAULT_WIDTH); printf(" -w, --width N Video width (default: %d)\n", DEFAULT_WIDTH);
printf(" -h, --height N Video height (default: %d)\n", DEFAULT_HEIGHT); printf(" -h, --height N Video height (default: %d)\n", DEFAULT_HEIGHT);
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n"); printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
@@ -1398,6 +1640,7 @@ static void show_usage(const char *program_name) {
printf("Examples:\n"); printf("Examples:\n");
printf(" %s -i input.mp4 -o output.mv2 # Use default setting (q=2)\n", program_name); printf(" %s -i input.mp4 -o output.mv2 # Use default setting (q=2)\n", program_name);
printf(" %s -i input.avi -f 15 -q 3 -o output.mv2 # 15fps @ q=3\n", program_name); printf(" %s -i input.avi -f 15 -q 3 -o output.mv2 # 15fps @ q=3\n", program_name);
printf(" %s -i input.mp4 -s input.srt -o output.mv2 # With SubRip subtitles\n", program_name);
// printf(" %s -i input.mp4 -b 800 -o output.mv2 # 800 kbps bitrate target\n", program_name); // printf(" %s -i input.mp4 -b 800 -o output.mv2 # 800 kbps bitrate target\n", program_name);
// printf(" %s -i input.avi -f 15 -b 500 -o output.mv2 # 15fps @ 500 kbps\n", program_name); // printf(" %s -i input.avi -f 15 -b 500 -o output.mv2 # 15fps @ 500 kbps\n", program_name);
// printf(" %s --test -b 1000 -o test.mv2 # Test with 1000 kbps target\n", program_name); // printf(" %s --test -b 1000 -o test.mv2 # Test with 1000 kbps target\n", program_name);
@@ -1414,6 +1657,11 @@ static void cleanup_encoder(tev_encoder_t *enc) {
unlink(TEMP_AUDIO_FILE); // Remove temporary audio file unlink(TEMP_AUDIO_FILE); // Remove temporary audio file
} }
free(enc->input_file);
free(enc->output_file);
free(enc->subtitle_file);
free_subtitle_list(enc->subtitle_list);
free_encoder(enc); free_encoder(enc);
} }
@@ -1432,6 +1680,7 @@ int main(int argc, char *argv[]) {
static struct option long_options[] = { static struct option long_options[] = {
{"input", required_argument, 0, 'i'}, {"input", required_argument, 0, 'i'},
{"output", required_argument, 0, 'o'}, {"output", required_argument, 0, 'o'},
{"subtitles", required_argument, 0, 's'},
{"width", required_argument, 0, 'w'}, {"width", required_argument, 0, 'w'},
{"height", required_argument, 0, 'h'}, {"height", required_argument, 0, 'h'},
{"fps", required_argument, 0, 'f'}, {"fps", required_argument, 0, 'f'},
@@ -1446,7 +1695,7 @@ int main(int argc, char *argv[]) {
int option_index = 0; int option_index = 0;
int c; int c;
while ((c = getopt_long(argc, argv, "i:o:w:h:f:q:b:vt", long_options, &option_index)) != -1) { while ((c = getopt_long(argc, argv, "i:o:s:w:h:f:q:b:vt", long_options, &option_index)) != -1) {
switch (c) { switch (c) {
case 'i': case 'i':
enc->input_file = strdup(optarg); enc->input_file = strdup(optarg);
@@ -1455,6 +1704,9 @@ int main(int argc, char *argv[]) {
enc->output_file = strdup(optarg); enc->output_file = strdup(optarg);
enc->output_to_stdout = (strcmp(optarg, "-") == 0); enc->output_to_stdout = (strcmp(optarg, "-") == 0);
break; break;
case 's':
enc->subtitle_file = strdup(optarg);
break;
case 'w': case 'w':
enc->width = atoi(optarg); enc->width = atoi(optarg);
break; break;
@@ -1528,6 +1780,21 @@ int main(int argc, char *argv[]) {
} }
} }
// Load subtitle file if specified
if (enc->subtitle_file) {
enc->subtitle_list = parse_srt_file(enc->subtitle_file, enc->fps);
if (enc->subtitle_list) {
enc->has_subtitles = 1;
enc->current_subtitle = enc->subtitle_list;
if (enc->verbose) {
printf("Loaded subtitles from: %s\n", enc->subtitle_file);
}
} else {
fprintf(stderr, "Failed to parse subtitle file: %s\n", enc->subtitle_file);
// Continue without subtitles
}
}
// Allocate buffers // Allocate buffers
if (!alloc_encoder_buffers(enc)) { if (!alloc_encoder_buffers(enc)) {
fprintf(stderr, "Failed to allocate encoder buffers\n"); fprintf(stderr, "Failed to allocate encoder buffers\n");
@@ -1641,6 +1908,9 @@ int main(int argc, char *argv[]) {
// Process audio for this frame // Process audio for this frame
process_audio(enc, frame_count, output); process_audio(enc, frame_count, output);
// Process subtitles for this frame
process_subtitles(enc, frame_count, output);
// Encode frame // Encode frame
if (!encode_frame(enc, output, frame_count)) { if (!encode_frame(enc, output, frame_count)) {
fprintf(stderr, "Failed to encode frame %d\n", frame_count); fprintf(stderr, "Failed to encode frame %d\n", frame_count);