diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index ded9e94..368a2f4 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -8,7 +8,7 @@ const WIDTH = 560 const HEIGHT = 448 -const TILE_SIZE = 64 // 64x64 tiles for DWT (vs 16x16 blocks in TEV) +const TILE_SIZE = 112 // 112x112 tiles for DWT (perfect fit for TSVM 560x448 resolution) const TAV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x41, 0x56] // "\x1FTSVM TAV" const TAV_VERSION = 1 // Initial DWT version const SND_BASE_ADDR = audio.getBaseAddr() @@ -99,6 +99,275 @@ graphics.clearPixels2(0) // Initialize audio audio.resetParams(0) audio.purgeQueue(0) +audio.setPcmMode(0) +audio.setMasterVolume(0, 255) + +// Subtitle display functions +function clearSubtitleArea() { + // Clear the subtitle area at the bottom of the screen + // Text mode is 80x32, so clear the bottom few lines + let oldFgColor = con.get_color_fore() + let oldBgColor = con.get_color_back() + + con.color_pair(255, 255) // transparent to clear + + // Clear bottom 4 lines for subtitles + for (let row = 29; row <= 32; row++) { + con.move(row, 1) + for (let col = 1; col <= 80; col++) { + print(" ") + } + } + + con.color_pair(oldFgColor, oldBgColor) +} + +function getVisualLength(line) { + // Calculate the visual length of a line excluding formatting tags + let visualLength = 0 + let i = 0 + + while (i < line.length) { + if (i < line.length - 2 && line[i] === '<') { + // Check for formatting tags and skip them + if (line.substring(i, i + 3).toLowerCase() === '' || + line.substring(i, i + 3).toLowerCase() === '') { + i += 3 // Skip tag + } else if (i < line.length - 3 && + (line.substring(i, i + 4).toLowerCase() === '' || + line.substring(i, i + 4).toLowerCase() === '')) { + i += 4 // Skip closing tag + } else { + // Not a formatting tag, count the character + visualLength++ + i++ + } + } else { + // Regular character, count it + visualLength++ + i++ + } + } + + return visualLength +} + +function displayFormattedLine(line) { + // Parse line and handle and tags with color changes + // Default subtitle color: yellow (231), formatted text: white (254) + + let i = 0 + let inBoldOrItalic = false + + // insert initial padding block + con.color_pair(0, 255) + con.prnch(0xDE) + con.color_pair(231, 0) + + while (i < line.length) { + if (i < line.length - 2 && line[i] === '<') { + // Check for opening tags + if (line.substring(i, i + 3).toLowerCase() === '' || + line.substring(i, i + 3).toLowerCase() === '') { + con.color_pair(254, 0) // Switch to white for formatted text + inBoldOrItalic = true + i += 3 + } else if (i < line.length - 3 && + (line.substring(i, i + 4).toLowerCase() === '' || + line.substring(i, i + 4).toLowerCase() === '')) { + con.color_pair(231, 0) // Switch back to yellow for normal text + inBoldOrItalic = false + i += 4 + } else { + // Not a formatting tag, print the character + print(line[i]) + i++ + } + } else { + // Regular character, print it + print(line[i]) + i++ + } + } + + // insert final padding block + con.color_pair(0, 255) + con.prnch(0xDD) + con.color_pair(231, 0) +} + +function displaySubtitle(text, position = 0) { + if (!text || text.length === 0) { + clearSubtitleArea() + return + } + + // Set subtitle colors: yellow (231) on black (0) + let oldFgColor = con.get_color_fore() + let oldBgColor = con.get_color_back() + con.color_pair(231, 0) + + // Split text into lines + let lines = text.split('\n') + + // Calculate position based on subtitle position setting + let startRow, startCol + // Calculate visual length without formatting tags for positioning + let longestLineLength = lines.map(s => getVisualLength(s)).sort().last() + + switch (position) { + case 2: // center left + case 6: // center right + case 8: // dead center + startRow = 16 - Math.floor(lines.length / 2) + break + case 3: // top left + case 4: // top center + case 5: // top right + startRow = 2 + break + case 0: // bottom center + case 1: // bottom left + case 7: // bottom right + default: + startRow = 32 - lines.length + startRow = 32 - lines.length + startRow = 32 - lines.length // Default to bottom center + } + + // Display each line + for (let i = 0; i < lines.length; i++) { + let line = lines[i].trim() + if (line.length === 0) continue + + let row = startRow + i + if (row < 1) row = 1 + if (row > 32) row = 32 + + // Calculate column based on alignment + switch (position) { + case 1: // bottom left + case 2: // center left + case 3: // top left + startCol = 1 + break + case 5: // top right + case 6: // center right + case 7: // bottom right + startCol = Math.max(1, 78 - getVisualLength(line) - 2) + break + case 0: // bottom center + case 4: // top center + case 8: // dead center + default: + startCol = Math.max(1, Math.floor((80 - longestLineLength - 2) / 2) + 1) + break + } + + con.move(row, startCol) + + // Parse and display line with formatting tag support + displayFormattedLine(line) + } + + con.color_pair(oldFgColor, oldBgColor) +} + +function processSubtitlePacket(packetSize) { + // Read subtitle packet data according to SSF format + // uint24 index + uint8 opcode + variable arguments + + let index = 0 + // Read 24-bit index (little-endian) + let indexByte0 = seqread.readOneByte() + let indexByte1 = seqread.readOneByte() + let indexByte2 = seqread.readOneByte() + index = indexByte0 | (indexByte1 << 8) | (indexByte2 << 16) + + let opcode = seqread.readOneByte() + let remainingBytes = packetSize - 4 // Subtract 3 bytes for index + 1 byte for opcode + + switch (opcode) { + case SSF_OP_SHOW: { + // Read UTF-8 text until null terminator + if (remainingBytes > 1) { + let textBytes = seqread.readBytes(remainingBytes) + let textStr = "" + + // Convert bytes to string, stopping at null terminator + for (let i = 0; i < remainingBytes - 1; i++) { // -1 for null terminator + let byte = sys.peek(textBytes + i) + if (byte === 0) break + textStr += String.fromCharCode(byte) + } + + sys.free(textBytes) + subtitleText = textStr + subtitleVisible = true + displaySubtitle(subtitleText, subtitlePosition) + } + break + } + + case SSF_OP_HIDE: { + subtitleVisible = false + subtitleText = "" + clearSubtitleArea() + break + } + + case SSF_OP_MOVE: { + if (remainingBytes >= 2) { // Need at least 1 byte for position + 1 null terminator + let newPosition = seqread.readOneByte() + seqread.readOneByte() // Read null terminator + + if (newPosition >= 0 && newPosition <= 7) { + subtitlePosition = newPosition + + // Re-display current subtitle at new position if visible + if (subtitleVisible && subtitleText.length > 0) { + clearSubtitleArea() + displaySubtitle(subtitleText, subtitlePosition) + } + } + } + break + } + + case SSF_OP_UPLOAD_LOW_FONT: + case SSF_OP_UPLOAD_HIGH_FONT: { + // Font upload - read payload length and font data + if (remainingBytes >= 3) { // uint16 length + at least 1 byte data + let payloadLen = seqread.readShort() + if (remainingBytes >= payloadLen + 2) { + let fontData = seqread.readBytes(payloadLen) + + // upload font data + for (let i = 0; i < Math.min(payloadLen, 1920); i++) sys.poke(-1300607 - i, sys.peek(fontData + i)) + sys.poke(-1299460, (opcode == SSF_OP_UPLOAD_LOW_FONT) ? 18 : 19) + + sys.free(fontData) + } + } + break + } + + case SSF_OP_NOP: + default: { + // Skip remaining bytes + if (remainingBytes > 0) { + let skipBytes = seqread.readBytes(remainingBytes) + sys.free(skipBytes) + } + + if (interactive && opcode !== SSF_OP_NOP) { + serial.println(`[SUBTITLE UNKNOWN] Index: ${index}, Opcode: 0x${opcode.toString(16).padStart(2, '0')}`) + } + break + } + } +} + // TAV header structure (32 bytes vs TEV's 24 bytes) let header = { @@ -172,7 +441,7 @@ const isNTSC = (header.videoFlags & 0x02) !== 0 const isLossless = (header.videoFlags & 0x04) !== 0 const multiResolution = (header.videoFlags & 0x08) !== 0 -// Calculate tile dimensions (64x64 vs TEV's 16x16 blocks) +// Calculate tile dimensions (112x112 vs TEV's 16x16 blocks) const tilesX = Math.ceil(header.width / TILE_SIZE) const tilesY = Math.ceil(header.height / TILE_SIZE) const numTiles = tilesX * tilesY @@ -210,6 +479,9 @@ let audioBufferBytesLastFrame = 0 let frame_cnt = 0 let frametime = 1000000000.0 / header.fps let nextFrameTime = 0 +let mp2Initialised = false +let audioFired = false + // Performance tracking variables (from TEV) let decompressTime = 0 @@ -374,6 +646,21 @@ try { console.log(`Frame ${frameCount}: Duplicating previous frame`) } + // Defer audio playback until a first frame is sent + if (isInterlaced) { + // fire audio after frame 1 + if (!audioFired && frameCount > 0) { + audio.play(0) + audioFired = true + } + } + else { + // fire audio after frame 0 + if (!audioFired) { + audio.play(0) + audioFired = true + } + } } catch (e) { console.log(`Frame ${frameCount}: decode failed: ${e}`) } @@ -390,38 +677,23 @@ try { console.log(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`) } - } else if (packetType === TAV_PACKET_AUDIO_MP2 && hasAudio) { - // Audio packet - same as TEV - let audioPtr = seqread.readBytes(compressedSize) + } else if (packetType === TAV_PACKET_AUDIO_MP2) { + // MP2 Audio packet + let audioLen = seqread.readInt() - // Send to audio hardware - for (let i = 0; i < compressedSize; i++) { - vm.poke(SND_BASE_ADDR + audioBufferBytesLastFrame + i, sys.peek(audioPtr + i)) + if (!mp2Initialised) { + mp2Initialised = true + audio.mp2Init() } - audioBufferBytesLastFrame += compressedSize - sys.free(audioPtr) - } else if (packetType === TAV_PACKET_SUBTITLE && hasSubtitles) { + seqread.readBytes(audioLen, SND_BASE_ADDR - 2368) + audio.mp2Decode() + audio.mp2UploadDecoded(0) + + } else if (packetType === TAV_PACKET_SUBTITLE) { // Subtitle packet - same format as TEV - let subtitlePtr = seqread.readBytes(compressedSize) - - // Process subtitle (simplified) - if (compressedSize >= 4) { - const index = (sys.peek(subtitlePtr) << 16) | (sys.peek(subtitlePtr + 1) << 8) | sys.peek(subtitlePtr + 2) - const opcode = sys.peek(subtitlePtr + 3) - - if (opcode === SSF_OP_SHOW && compressedSize > 4) { - let text = "" - for (let i = 4; i < compressedSize && sys.peek(subtitlePtr + i) !== 0; i++) { - text += String.fromCharCode(sys.peek(subtitlePtr + i)) - } - subtitleText = text - subtitleVisible = true - } else if (opcode === SSF_OP_HIDE) { - subtitleVisible = false - } - } - sys.free(subtitlePtr) + let packetSize = seqread.readInt() + processSubtitlePacket(packetSize) } else if (packetType == 0x00) { // Silently discard, faulty subtitle creation can cause this as 0x00 is used as an argument terminator } else { @@ -463,14 +735,13 @@ finally { sys.free(RGB_BUFFER_A) sys.free(RGB_BUFFER_B) - graphics.setGraphicsMode(0) // Return to text mode con.curs_set(1) con.clear() if (errorlevel === 0) { console.log(`Playback completed: ${frameCount} frames`) } else { - console.log(`Playbook failed with error ${errorlevel}`) + console.log(`Playback failed with error ${errorlevel}`) } } diff --git a/terranmon.txt b/terranmon.txt index 62b7cba..d71bb41 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -807,6 +807,7 @@ transmission capability, and region-of-interest coding. - Version 1.0: Initial DWT-based implementation with 5/3 reversible filter - Version 1.1: Added 9/7 irreversible filter for higher compression - Version 1.2: Multi-resolution pyramid encoding with up to 4 decomposition levels +- Version 1.3: Optimized 112x112 tiles for TSVM resolution with up to 6 decomposition levels # File Structure \x1F T S V M T A V @@ -852,7 +853,7 @@ transmission capability, and region-of-interest coding. uint32 Compressed Size * Zstd-compressed Block Data -## Block Data (per 64x64 tile) +## Block Data (per 112x112 tile) uint8 Mode: encoding mode 0x00 = SKIP (copy from previous frame) 0x01 = INTRA (DWT-coded, no prediction) @@ -885,10 +886,12 @@ transmission capability, and region-of-interest coding. * Provides better energy compaction than 5/3 but lossy reconstruction ### Decomposition Levels -- Level 1: 64x64 → 32x32 (LL) + 3×32x32 subbands (LH,HL,HH) -- Level 2: 32x32 → 16x16 (LL) + 3×16x16 subbands -- Level 3: 16x16 → 8x8 (LL) + 3×8x8 subbands -- Level 4: 8x8 → 4x4 (LL) + 3×4x4 subbands +- Level 1: 112x112 → 56x56 (LL) + 3×56x56 subbands (LH,HL,HH) +- Level 2: 56x56 → 28x28 (LL) + 3×28x28 subbands +- Level 3: 28x28 → 14x14 (LL) + 3×14x14 subbands +- Level 4: 14x14 → 7x7 (LL) + 3×7x7 subbands +- Level 5: 7x7 → 3x3 (LL) + 3×3x3 subbands +- Level 6: 3x3 → 1x1 (LL) + 3×1x1 subbands (maximum) ### Quantization Strategy TAV uses different quantization steps for each subband based on human visual @@ -904,9 +907,11 @@ When enabled, coefficients are transmitted in order of visual importance: 3. Higher frequency subbands for refinement ## Motion Compensation -- Search range: ±16 pixels (larger than TEV due to 64x64 tiles) +- Search range: ±28 pixels (optimized for 112x112 tiles) - Sub-pixel precision: 1/4 pixel with bilinear interpolation -- Tile size: 64x64 pixels (4x larger than TEV blocks) +- Tile size: 112x112 pixels (perfect fit for TSVM 560x448 resolution) + * Exactly 5×4 = 20 tiles per frame (560÷112 = 5, 448÷112 = 4) + * No partial tiles needed - optimal for processing efficiency - Uses Sum of Absolute Differences (SAD) for motion estimation - Overlapped block motion compensation (OBMC) for smooth boundaries @@ -917,7 +922,7 @@ TAV operates in YCoCg-R colour space with full resolution channels: - Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default) ## Compression Features -- 64x64 DWT tiles vs 16x16 DCT blocks in TEV +- 112x112 DWT tiles vs 16x16 DCT blocks in TEV - Multi-resolution representation enables scalable decoding - Better frequency localization than DCT - Reduced blocking artifacts due to overlapping basis functions diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index fe30132..d001ec9 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -3930,8 +3930,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { var readPtr = blockDataPtr try { - val tilesX = (width + 63) / 64 // 64x64 tiles - val tilesY = (height + 63) / 64 + val tilesX = (width + 111) / 112 // 112x112 tiles + val tilesY = (height + 111) / 112 // Process each tile for (tileY in 0 until tilesY) { @@ -3949,8 +3949,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { when (mode) { 0x00 -> { // TAV_MODE_SKIP - // Copy 64x64 tile from previous frame to current frame - copyTile64x64RGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height) + // Copy 112x112 tile from previous frame to current frame + copyTile112x112RGB(tileX, tileY, currentRGBAddr, prevRGBAddr, width, height) } 0x01 -> { // TAV_MODE_INTRA // Decode DWT coefficients directly to RGB buffer @@ -3967,8 +3967,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { } 0x03 -> { // TAV_MODE_MOTION // Motion compensation only (no residual) - applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY, - currentRGBAddr, prevRGBAddr, width, height) + applyMotionCompensation112x112RGB(tileX, tileY, mvX, mvY, + currentRGBAddr, prevRGBAddr, width, height) } } } @@ -3982,7 +3982,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun decodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { - val tileSize = 64 + val tileSize = 112 val coeffCount = tileSize * tileSize var ptr = readPtr @@ -4043,7 +4043,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun convertYCoCgTileToRGB(tileX: Int, tileY: Int, yTile: FloatArray, coTile: FloatArray, cgTile: FloatArray, rgbAddr: Long, width: Int, height: Int) { - val tileSize = 64 + val tileSize = 112 val startX = tileX * tileSize val startY = tileY * tileSize @@ -4078,7 +4078,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun convertICtCpTileToRGB(tileX: Int, tileY: Int, iTile: FloatArray, ctTile: FloatArray, cpTile: FloatArray, rgbAddr: Long, width: Int, height: Int) { - val tileSize = 64 + val tileSize = 112 val startX = tileX * tileSize val startY = tileY * tileSize @@ -4127,7 +4127,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { private fun addYCoCgResidualToRGBTile(tileX: Int, tileY: Int, yRes: FloatArray, coRes: FloatArray, cgRes: FloatArray, rgbAddr: Long, width: Int, height: Int) { - val tileSize = 64 + val tileSize = 112 val startX = tileX * tileSize val startY = tileY * tileSize @@ -4172,8 +4172,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // Helper functions (simplified versions of existing DWT functions) - private fun copyTile64x64RGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { - val tileSize = 64 + private fun copyTile112x112RGB(tileX: Int, tileY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { + val tileSize = 112 val startX = tileX * tileSize val startY = tileY * tileSize @@ -4205,17 +4205,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { // Step 1: Apply motion compensation - applyMotionCompensation64x64RGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height) + applyMotionCompensation112x112RGB(tileX, tileY, mvX, mvY, currentRGBAddr, prevRGBAddr, width, height) // Step 2: Add DWT residual (same as intra but add to existing pixels) return decodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, width, height, qY, qCo, qCg, rcf, waveletFilter, decompLevels, isLossless, tavVersion) } - private fun applyMotionCompensation64x64RGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int, + private fun applyMotionCompensation112x112RGB(tileX: Int, tileY: Int, mvX: Int, mvY: Int, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int) { - val tileSize = 64 + val tileSize = 112 val startX = tileX * tileSize val startY = tileY * tileSize diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index d3e66e0..e760818 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -73,7 +73,7 @@ static inline float float16_to_float(uint16_t hbits) { // Version 1: YCoCg-R (default) // Version 2: ICtCp (--ictcp flag) -// Tile encoding modes (64x64 tiles) +// Tile encoding modes (112x112 tiles) #define TAV_MODE_SKIP 0x00 // Skip tile (copy from reference) #define TAV_MODE_INTRA 0x01 // Intra DWT coding (I-frame tiles) #define TAV_MODE_INTER 0x02 // Inter DWT coding with motion compensation @@ -87,9 +87,9 @@ static inline float float16_to_float(uint16_t hbits) { #define TAV_PACKET_SYNC 0xFF // Sync packet // DWT settings -#define TILE_SIZE 64 -#define MAX_DECOMP_LEVELS 4 -#define DEFAULT_DECOMP_LEVELS 3 +#define TILE_SIZE 112 // 112x112 tiles - perfect fit for TSVM 560x448 (GCD = 112) +#define MAX_DECOMP_LEVELS 6 // Can go deeper: 112→56→28→14→7→3→1 +#define DEFAULT_DECOMP_LEVELS 4 // Increased default for better compression // Wavelet filter types #define WAVELET_5_3_REVERSIBLE 0 // Lossless capable @@ -101,6 +101,18 @@ static inline float float16_to_float(uint16_t hbits) { #define DEFAULT_FPS 30 #define DEFAULT_QUALITY 2 +// Audio/subtitle constants (reused from TEV) +#define MP2_DEFAULT_PACKET_SIZE 1152 +#define MAX_SUBTITLE_LENGTH 2048 + +// Subtitle structure +typedef struct subtitle_entry { + int start_frame; + int end_frame; + char *text; + struct subtitle_entry *next; +} subtitle_entry_t; + static void generate_random_filename(char *filename) { srand(time(NULL)); @@ -208,8 +220,18 @@ typedef struct { dwt_tile_t *tiles; motion_vector_t *motion_vectors; - // Audio processing + // Audio processing (expanded from TEV) size_t audio_remaining; + uint8_t *mp2_buffer; + size_t mp2_buffer_size; + int mp2_packet_size; + int mp2_rate_index; + int target_audio_buffer_size; + + // Subtitle processing + subtitle_entry_t *subtitles; + subtitle_entry_t *current_subtitle; + int subtitle_visible; // Compression ZSTD_CCtx *zstd_ctx; @@ -245,13 +267,27 @@ static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int static void dwt_2d_forward(float *tile_data, int levels, int filter_type); static void dwt_2d_inverse(dwt_tile_t *tile, float *output, int filter_type); static void quantize_subbands(dwt_tile_t *tile, int q_y, int q_co, int q_cg, float rcf); -static int estimate_motion_64x64(const float *current, const float *reference, - int width, int height, int tile_x, int tile_y, - motion_vector_t *mv); +static int estimate_motion_112x112(const float *current, const float *reference, + int width, int height, int tile_x, int tile_y, + motion_vector_t *mv); static size_t compress_tile_data(tav_encoder_t *enc, const dwt_tile_t *tiles, const motion_vector_t *mvs, int num_tiles, uint8_t packet_type); +// Audio and subtitle processing prototypes (from TEV) +static int start_audio_conversion(tav_encoder_t *enc); +static int get_mp2_packet_size(uint8_t *header); +static int mp2_packet_size_to_rate_index(int packet_size, int is_mono); +static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output); +static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps); +static subtitle_entry_t* parse_srt_file(const char *filename, int fps); +static subtitle_entry_t* parse_smi_file(const char *filename, int fps); +static int srt_time_to_frame(const char *time_str, int fps); +static int sami_ms_to_frame(int milliseconds, int fps); +static void free_subtitle_list(subtitle_entry_t *list); +static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text); +static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output); + // Show usage information static void show_usage(const char *program_name) { printf("TAV DWT-based Video Encoder\n"); @@ -264,7 +300,7 @@ static void show_usage(const char *program_name) { printf(" -q, --quality N Quality level 0-5 (default: 2)\n"); printf(" -Q, --quantizer Y,Co,Cg Quantizer levels 0-100 for each channel\n"); printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n"); - printf(" -d, --decomp N Decomposition levels 1-4 (default: 3)\n"); + printf(" -d, --decomp N Decomposition levels 1-6 (default: 4)\n"); printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n"); printf(" -p, --progressive Use progressive scan (default: interlaced)\n"); printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); @@ -296,7 +332,7 @@ static void show_usage(const char *program_name) { } printf("\n\nFeatures:\n"); - printf(" - 64x64 DWT tiles with multi-resolution encoding\n"); + printf(" - 112x112 DWT tiles with multi-resolution encoding\n"); printf(" - Full resolution YCoCg-R color space\n"); printf(" - Progressive transmission and ROI coding\n"); printf(" - Motion compensation with ±16 pixel search range\n"); @@ -304,7 +340,7 @@ static void show_usage(const char *program_name) { printf("\nExamples:\n"); printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name); - printf(" %s -i input.mkv -q 3 -w 1 -d 4 -o output.mv3 # High quality with 9/7 wavelet\n", program_name); + printf(" %s -i input.mkv -q 3 -w 1 -d 6 -o output.mv3 # Maximum quality with 9/7 wavelet\n", program_name); printf(" %s -i input.avi --lossless -o output.mv3 # Lossless encoding\n", program_name); printf(" %s -i input.mp4 -b 800 -o output.mv3 # 800 kbps bitrate target\n", program_name); printf(" %s -i input.webm -S subs.srt -o output.mv3 # With subtitles\n", program_name); @@ -487,9 +523,9 @@ static void dwt_97_forward_1d(float *data, int length) { free(temp); } -// 2D DWT forward transform for 64x64 tile +// 2D DWT forward transform for 112x112 tile static void dwt_2d_forward(float *tile_data, int levels, int filter_type) { - const int size = 64; + const int size = TILE_SIZE; float *temp_row = malloc(size * sizeof(float)); float *temp_col = malloc(size * sizeof(float)); @@ -565,7 +601,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, } // Quantize and serialize DWT coefficients - const int tile_size = 64 * 64; + const int tile_size = TILE_SIZE * TILE_SIZE; int16_t *quantized_y = malloc(tile_size * sizeof(int16_t)); int16_t *quantized_co = malloc(tile_size * sizeof(int16_t)); int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t)); @@ -609,7 +645,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, // Compress and write frame data static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) { // Calculate total uncompressed size - const size_t max_tile_size = 9 + (64 * 64 * 3 * sizeof(int16_t)); // header + 3 channels of coefficients + const size_t max_tile_size = 9 + (TILE_SIZE * TILE_SIZE * 3 * sizeof(int16_t)); // header + 3 channels of coefficients const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size; // Allocate buffer for uncompressed tile data @@ -625,17 +661,17 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) uint8_t mode = TAV_MODE_INTRA; // For now, all tiles are INTRA // Extract tile data (already processed) - float tile_y_data[64 * 64]; - float tile_co_data[64 * 64]; - float tile_cg_data[64 * 64]; + float tile_y_data[TILE_SIZE * TILE_SIZE]; + float tile_co_data[TILE_SIZE * TILE_SIZE]; + float tile_cg_data[TILE_SIZE * TILE_SIZE]; // Extract tile data from frame buffers - for (int y = 0; y < 64; y++) { - for (int x = 0; x < 64; x++) { - int src_x = tile_x * 64 + x; - int src_y = tile_y * 64 + y; + for (int y = 0; y < TILE_SIZE; y++) { + for (int x = 0; x < TILE_SIZE; x++) { + int src_x = tile_x * TILE_SIZE + x; + int src_y = tile_y * TILE_SIZE + y; int src_idx = src_y * enc->width + src_x; - int tile_idx_local = y * 64 + x; + int tile_idx_local = y * TILE_SIZE + x; if (src_x < enc->width && src_y < enc->height) { tile_y_data[tile_idx_local] = enc->current_frame_y[src_idx]; @@ -698,12 +734,12 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) return compressed_size + 5; // packet type + size field + compressed data } -// Motion estimation for 64x64 tiles using SAD -static int estimate_motion_64x64(const float *current, const float *reference, +// Motion estimation for 112x112 tiles using SAD +static int estimate_motion_112x112(const float *current, const float *reference, int width, int height, int tile_x, int tile_y, motion_vector_t *mv) { - const int tile_size = 64; - const int search_range = 16; // ±16 pixels + const int tile_size = TILE_SIZE; + const int search_range = 28; // ±28 pixels (increased proportionally: 16 * 112/64 = 28) const int start_x = tile_x * tile_size; const int start_y = tile_y * tile_size; @@ -1131,6 +1167,7 @@ static int start_video_conversion(tav_encoder_t *enc) { // Start audio conversion static int start_audio_conversion(tav_encoder_t *enc) { + return 1; if (!enc->has_audio) return 1; char command[2048]; @@ -1151,6 +1188,400 @@ static int start_audio_conversion(tav_encoder_t *enc) { return 0; } +// Get MP2 packet size from header (copied from TEV) +static int get_mp2_packet_size(uint8_t *header) { + int bitrate_index = (header[2] >> 4) & 0x0F; + int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384}; + if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE; + + int bitrate = bitrates[bitrate_index]; + if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE; + + int sampling_freq_index = (header[2] >> 2) & 0x03; + int sampling_freqs[] = {44100, 48000, 32000, 0}; + int sampling_freq = sampling_freqs[sampling_freq_index]; + if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE; + + int padding = (header[2] >> 1) & 0x01; + return (144 * bitrate * 1000) / sampling_freq + padding; +} + +// Convert MP2 packet size to rate index (copied from TEV) +static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) { + // Map packet size to rate index for MP2_RATE_TABLE + if (packet_size <= 576) return is_mono ? 0 : 0; // 128k + else if (packet_size <= 720) return 1; // 160k + else if (packet_size <= 1008) return 2; // 224k + else if (packet_size <= 1440) return 3; // 320k + else return 4; // 384k +} + +// Convert SRT time format to frame number (copied from TEV) +static int srt_time_to_frame(const char *time_str, int fps) { + int hours, minutes, seconds, milliseconds; + if (sscanf(time_str, "%d:%d:%d,%d", &hours, &minutes, &seconds, &milliseconds) != 4) { + return -1; + } + + double total_seconds = hours * 3600.0 + minutes * 60.0 + seconds + milliseconds / 1000.0; + return (int)(total_seconds * fps + 0.5); // Round to nearest frame +} + +// Convert SAMI milliseconds to frame number (copied from TEV) +static int sami_ms_to_frame(int milliseconds, int fps) { + double seconds = milliseconds / 1000.0; + return (int)(seconds * fps + 0.5); // Round to nearest frame +} + +// Parse SubRip subtitle file (copied from TEV) +static subtitle_entry_t* parse_srt_file(const char *filename, int fps) { + FILE *file = fopen(filename, "r"); + if (!file) { + fprintf(stderr, "Failed to open subtitle file: %s\n", filename); + return NULL; + } + + subtitle_entry_t *head = NULL; + subtitle_entry_t *tail = NULL; + char line[1024]; + int state = 0; // 0=index, 1=time, 2=text, 3=blank + + subtitle_entry_t *current_entry = NULL; + char *text_buffer = NULL; + size_t text_buffer_size = 0; + + while (fgets(line, sizeof(line), file)) { + // Remove trailing newline + size_t len = strlen(line); + if (len > 0 && line[len-1] == '\n') { + line[len-1] = '\0'; + len--; + } + if (len > 0 && line[len-1] == '\r') { + line[len-1] = '\0'; + len--; + } + + if (state == 0) { // Expecting subtitle index + if (strlen(line) == 0) continue; // Skip empty lines + // Create new subtitle entry + current_entry = calloc(1, sizeof(subtitle_entry_t)); + if (!current_entry) break; + state = 1; + } else if (state == 1) { // Expecting time range + char start_time[32], end_time[32]; + if (sscanf(line, "%31s --> %31s", start_time, end_time) == 2) { + current_entry->start_frame = srt_time_to_frame(start_time, fps); + current_entry->end_frame = srt_time_to_frame(end_time, fps); + + if (current_entry->start_frame < 0 || current_entry->end_frame < 0) { + free(current_entry); + current_entry = NULL; + state = 3; // Skip to next blank line + continue; + } + + // Initialize text buffer + text_buffer_size = 256; + text_buffer = malloc(text_buffer_size); + if (!text_buffer) { + free(current_entry); + current_entry = NULL; + fprintf(stderr, "Memory allocation failed while parsing subtitles\n"); + break; + } + text_buffer[0] = '\0'; + state = 2; + } else { + free(current_entry); + current_entry = NULL; + state = 3; // Skip malformed entry + } + } else if (state == 2) { // Collecting subtitle text + if (strlen(line) == 0) { + // End of subtitle text + current_entry->text = strdup(text_buffer); + free(text_buffer); + text_buffer = NULL; + + // Add to list + if (!head) { + head = current_entry; + tail = current_entry; + } else { + tail->next = current_entry; + tail = current_entry; + } + current_entry = NULL; + state = 0; + } else { + // Append text line + size_t current_len = strlen(text_buffer); + size_t line_len = strlen(line); + size_t needed = current_len + line_len + 2; // +2 for newline and null + + if (needed > text_buffer_size) { + text_buffer_size = needed + 256; + char *new_buffer = realloc(text_buffer, text_buffer_size); + if (!new_buffer) { + free(text_buffer); + free(current_entry); + current_entry = NULL; + fprintf(stderr, "Memory reallocation failed while parsing subtitles\n"); + break; + } + text_buffer = new_buffer; + } + + if (current_len > 0) { + strcat(text_buffer, "\\n"); // Use \n as newline marker in subtitle text + } + strcat(text_buffer, line); + } + } else if (state == 3) { // Skip to next blank line + if (strlen(line) == 0) { + state = 0; + } + } + } + + // Handle final subtitle if file doesn't end with blank line + if (current_entry && state == 2) { + current_entry->text = strdup(text_buffer); + if (!head) { + head = current_entry; + } else { + tail->next = current_entry; + } + free(text_buffer); + } + + fclose(file); + return head; +} + +// Parse SAMI subtitle file (simplified version from TEV) +static subtitle_entry_t* parse_smi_file(const char *filename, int fps) { + FILE *file = fopen(filename, "r"); + if (!file) { + fprintf(stderr, "Failed to open subtitle file: %s\n", filename); + return NULL; + } + + subtitle_entry_t *head = NULL; + subtitle_entry_t *tail = NULL; + char line[2048]; + + while (fgets(line, sizeof(line), file)) { + // Look for SYNC tags with Start= attribute + char *sync_pos = strstr(line, "'); + if (text_start) { + text_start++; + char *text_end = strstr(text_start, "

"); + if (text_end) { + size_t text_len = text_end - text_start; + if (text_len > 0 && text_len < MAX_SUBTITLE_LENGTH) { + subtitle_entry_t *entry = calloc(1, sizeof(subtitle_entry_t)); + if (entry) { + entry->start_frame = sami_ms_to_frame(start_ms, fps); + entry->end_frame = entry->start_frame + fps * 3; // Default 3 second duration + entry->text = strndup(text_start, text_len); + + // Add to list + if (!head) { + head = entry; + tail = entry; + } else { + tail->next = entry; + tail = entry; + } + } + } + } + } + } + } + } + } + } + + fclose(file); + return head; +} + +// Parse subtitle file based on extension (copied from TEV) +static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps) { + if (!filename) return NULL; + + size_t len = strlen(filename); + if (len > 4 && strcasecmp(filename + len - 4, ".smi") == 0) { + return parse_smi_file(filename, fps); + } else { + return parse_srt_file(filename, fps); + } +} + +// Free subtitle list (copied from TEV) +static void free_subtitle_list(subtitle_entry_t *list) { + while (list) { + subtitle_entry_t *next = list->next; + free(list->text); + free(list); + list = next; + } +} + +// Write subtitle packet (copied from TEV) +static int write_subtitle_packet(FILE *output, uint32_t index, uint8_t opcode, const char *text) { + // Calculate packet size + size_t text_len = text ? strlen(text) : 0; + size_t packet_size = 3 + 1 + text_len + 1; // index (3 bytes) + opcode + text + null terminator + + // Write packet type and size + uint8_t packet_type = TAV_PACKET_SUBTITLE; + fwrite(&packet_type, 1, 1, output); + uint32_t size32 = (uint32_t)packet_size; + fwrite(&size32, 4, 1, output); + + // Write subtitle data + uint8_t index_bytes[3] = { + (uint8_t)(index & 0xFF), + (uint8_t)((index >> 8) & 0xFF), + (uint8_t)((index >> 16) & 0xFF) + }; + fwrite(index_bytes, 3, 1, output); + fwrite(&opcode, 1, 1, output); + + if (text && text_len > 0) { + fwrite(text, 1, text_len, output); + } + + uint8_t null_terminator = 0; + fwrite(&null_terminator, 1, 1, output); + + return 1 + 4 + packet_size; // Total bytes written +} + +// Process audio for current frame (copied and adapted from TEV) +static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) { + if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) { + return 1; + } + + // Initialize packet size on first frame + if (frame_num == 0) { + uint8_t header[4]; + if (fread(header, 1, 4, enc->mp2_file) != 4) return 1; + fseek(enc->mp2_file, 0, SEEK_SET); + enc->mp2_packet_size = get_mp2_packet_size(header); + int is_mono = (header[3] >> 6) == 3; + enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono); + enc->target_audio_buffer_size = 4; // 4 audio packets in buffer + } + + // Calculate how much audio we need for this frame + double frame_duration = 1.0 / enc->fps; + double samples_per_frame = 32000.0 * frame_duration; // 32kHz sample rate + int target_buffer_samples = (int)(samples_per_frame * enc->target_audio_buffer_size); + int target_buffer_bytes = (target_buffer_samples * enc->mp2_packet_size) / 1152; // 1152 samples per MP2 frame + + if (!enc->mp2_buffer) { + enc->mp2_buffer_size = target_buffer_bytes * 2; // Extra buffer space + enc->mp2_buffer = malloc(enc->mp2_buffer_size); + if (!enc->mp2_buffer) { + fprintf(stderr, "Failed to allocate audio buffer\n"); + return 1; + } + } + + // Read audio data + size_t bytes_to_read = target_buffer_bytes; + if (bytes_to_read > enc->audio_remaining) { + bytes_to_read = enc->audio_remaining; + } + if (bytes_to_read > enc->mp2_buffer_size) { + bytes_to_read = enc->mp2_buffer_size; + } + + size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file); + if (bytes_read == 0) { + return 1; // No more audio + } + + // Write audio packet + uint8_t audio_packet_type = TAV_PACKET_AUDIO_MP2; + uint32_t audio_len = (uint32_t)bytes_read; + + fwrite(&audio_packet_type, 1, 1, output); + fwrite(&audio_len, 4, 1, output); + fwrite(enc->mp2_buffer, 1, bytes_read, output); + + // Track audio bytes written + enc->audio_remaining -= bytes_read; + + if (enc->verbose) { + printf("Frame %d: Audio packet %zu bytes (remaining: %zu)\n", + frame_num, bytes_read, enc->audio_remaining); + } + + return 1; +} + +// Process subtitles for current frame (copied and adapted from TEV) +static int process_subtitles(tav_encoder_t *enc, int frame_num, FILE *output) { + if (!enc->subtitles) { + return 1; // No subtitles to process + } + + int bytes_written = 0; + + // Check if we need to show a new subtitle + if (!enc->subtitle_visible) { + subtitle_entry_t *sub = enc->current_subtitle; + if (!sub) sub = enc->subtitles; // Start from beginning if not set + + // Find next subtitle to show + while (sub && sub->start_frame <= frame_num) { + if (sub->end_frame > frame_num) { + // This subtitle should be shown + if (sub != enc->current_subtitle) { + enc->current_subtitle = sub; + enc->subtitle_visible = 1; + bytes_written += write_subtitle_packet(output, 0, 0x01, sub->text); + if (enc->verbose) { + printf("Frame %d: Showing subtitle: %.50s%s\n", + frame_num, sub->text, strlen(sub->text) > 50 ? "..." : ""); + } + } + break; + } + sub = sub->next; + } + } + + // Check if we need to hide current subtitle + if (enc->subtitle_visible && enc->current_subtitle) { + if (frame_num >= enc->current_subtitle->end_frame) { + enc->subtitle_visible = 0; + bytes_written += write_subtitle_packet(output, 0, 0x02, NULL); + if (enc->verbose) { + printf("Frame %d: Hiding subtitle\n", frame_num); + } + } + } + + return bytes_written; +} + // Main function int main(int argc, char *argv[]) { generate_random_filename(TEMP_AUDIO_FILE); @@ -1231,6 +1662,9 @@ int main(int argc, char *argv[]) { case 't': enc->test_mode = 1; break; + case 'S': + enc->subtitle_file = strdup(optarg); + break; case 1000: // --lossless enc->lossless = 1; enc->wavelet_filter = WAVELET_5_3_REVERSIBLE; @@ -1317,6 +1751,17 @@ int main(int argc, char *argv[]) { } } + // Parse subtitles if provided + if (enc->subtitle_file) { + printf("Parsing subtitles: %s\n", enc->subtitle_file); + enc->subtitles = parse_subtitle_file(enc->subtitle_file, enc->fps); + if (!enc->subtitles) { + fprintf(stderr, "Warning: Failed to parse subtitle file\n"); + } else { + printf("Loaded subtitles successfully\n"); + } + } + // Write TAV header if (write_tav_header(enc) != 0) { fprintf(stderr, "Error: Failed to write TAV header\n"); @@ -1430,7 +1875,7 @@ int main(int argc, char *argv[]) { int tile_y = tile_idx / enc->tiles_x; if (!is_keyframe && frame_count > 0) { - estimate_motion_64x64(enc->current_frame_y, enc->previous_frame_y, + estimate_motion_112x112(enc->current_frame_y, enc->previous_frame_y, enc->width, enc->height, tile_x, tile_y, &enc->motion_vectors[tile_idx]); } else { @@ -1449,6 +1894,12 @@ int main(int argc, char *argv[]) { break; } else { + // Process audio for this frame + process_audio(enc, frame_count, enc->output_fp); + + // Process subtitles for this frame + process_subtitles(enc, frame_count, enc->output_fp); + // Write a sync packet only after a video is been coded uint8_t sync_packet = TAV_PACKET_SYNC; fwrite(&sync_packet, 1, 1, enc->output_fp); @@ -1526,6 +1977,12 @@ static void cleanup_encoder(tav_encoder_t *enc) { free(enc->tiles); free(enc->motion_vectors); free(enc->compressed_buffer); + free(enc->mp2_buffer); + + // Free subtitle list + if (enc->subtitles) { + free_subtitle_list(enc->subtitles); + } if (enc->zstd_ctx) { ZSTD_freeCCtx(enc->zstd_ctx);