MP2 decoding

2026-06-06 05:28:31 +09:00 · 2025-08-23 00:54:32 +09:00
parent 3c135e48e0
commit 6f6d10cc1b
2 changed files with 248 additions and 90 deletions
--- a/assets/disk0/tvdos/bin/playtev.js
+++ b/assets/disk0/tvdos/bin/playtev.js
@@ -8,6 +8,9 @@ const HEIGHT = 448
 const BLOCK_SIZE = 16  // 16x16 blocks for YCoCg-R
 const TEV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x45, 0x56] // "\x1FTSVM TEV"
 const TEV_VERSION = 2  // YCoCg-R version
+const SND_BASE_ADDR = audio.getBaseAddr()
+const pcm = require("pcm")
+const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728]

 // Block encoding modes
 const TEV_MODE_SKIP = 0x00
@@ -58,6 +61,12 @@ graphics.setGraphicsMode(4) // 4096-color mode
 graphics.clearPixels(0)
 graphics.clearPixels2(0)

+// Initialize audio
+audio.resetParams(0)
+audio.purgeQueue(0)
+audio.setPcmMode(0)
+audio.setMasterVolume(0, 255)
+
 // Check magic number
 let magic = seqread.readBytes(8)
 let magicMatching = true
@@ -135,6 +144,9 @@ sys.memset(DISPLAY_BA_ADDR, 15, FRAME_PIXELS) // Black with alpha=15 (opaque) in
 let frameCount = 0
 let stopPlay = false
 let akku = FRAME_TIME
+let akku2 = 0.0
+let mp2Initialised = false
+let audioFired = false

 // 4x4 Bayer dithering matrix
 const BAYER_MATRIX = [
@@ -158,13 +170,13 @@ function ditherValue(value, x, y) {
    return Math.max(0, Math.min(15, Math.floor(dithered * 15 / 255)))
 }

+let blockDataPtr = sys.malloc(560 * 448 * 3)
+
 // Main decoding loop - simplified for performance
 try {
    let t1 = sys.nanoTime()
    while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && frameCount < totalFrames) {

-        if (akku >= FRAME_TIME) {
-
        // Handle interactive controls
        if (interactive) {
            sys.poke(-40, 1)
@@ -173,87 +185,100 @@ try {
                break
            }
        }
-            
-        // Read packet (1 byte: type)
-        let packetType = seqread.readOneByte()

-        if (packetType == 0xFF) { // Sync packet
-            // Sync packet - frame complete
-            frameCount++
+        if (akku >= FRAME_TIME) {
+            // Read packet (1 byte: type)
+            let packetType = seqread.readOneByte()

-            // Copy current RGB frame to previous frame buffer for next frame reference
-            // memcpy(source, destination, length) - so CURRENT (source) -> PREV (destination)
-            sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3)
+            if (packetType == 0xFF) { // Sync packet
+                akku -= FRAME_TIME
+
+                // Sync packet - frame complete
+                frameCount++
+
+                // Copy current RGB frame to previous frame buffer for next frame reference
+                // memcpy(source, destination, length) - so CURRENT (source) -> PREV (destination)
+                sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3)
+
+            } else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
+                // Video frame packet
+                let payloadLen = seqread.readInt()
+                let compressedPtr = seqread.readBytes(payloadLen)
+                updateDataRateBin(payloadLen)
+
+
+                // Basic sanity check on compressed data
+                if (payloadLen <= 0 || payloadLen > 1000000) {
+                    serial.println(`Frame ${frameCount}: Invalid payload length: ${payloadLen}`)
+                    sys.free(compressedPtr)
+                    continue
+                }
+
+                // Decompress using gzip
+                // Optimized buffer size calculation for TEV YCoCg-R blocks
+                let blocksX = (width + 15) >> 4  // 16x16 blocks
+                let blocksY = (height + 15) >> 4
+                let tevBlockSize = 1 + 4 + 2 + (256 * 2) + (64 * 2) + (64 * 2) // mode + mv + cbp + Y(16x16) + Co(8x8) + Cg(8x8)
+                let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing
+
+                let actualSize
+                try {
+                    // Use gzip decompression (only compression format supported in TSVM JS)
+                    actualSize = gzip.decompFromTo(compressedPtr, payloadLen, blockDataPtr)
+                } catch (e) {
+                    // Decompression failed - skip this frame
+                    serial.println(`Frame ${frameCount}: Gzip decompression failed, skipping (compressed size: ${payloadLen}, error: ${e})`)
+                    sys.free(compressedPtr)
+                    continue
+                }
+
+                // Hardware-accelerated TEV YCoCg-R decoding to RGB buffers
+                try {
+                    graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors)
+
+                    // Upload RGB buffer to display framebuffer with dithering
+                    graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR,
+                                                  width, height, frameCount)
+
+                    // Defer audio playback until a first frame is sent
+                    if (!audioFired) {
+                        audio.play(0)
+                        audioFired = true
+                    }
+                } catch (e) {
+                    serial.println(`Frame ${frameCount}: Hardware YCoCg-R decode failed: ${e}`)
+                }

-        } else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
-            // Video frame packet
-            let payloadLen = seqread.readInt()
-            let compressedPtr = seqread.readBytes(payloadLen)
-            updateDataRateBin(payloadLen)
-            
-            
-            // Basic sanity check on compressed data
-            if (payloadLen <= 0 || payloadLen > 1000000) {
-                serial.println(`Frame ${frameCount}: Invalid payload length: ${payloadLen}`)
                sys.free(compressedPtr)
-                continue
+
+            } else if (packetType == TEV_PACKET_AUDIO_MP2) {
+                // MP2 Audio packet
+                let audioLen = seqread.readInt()
+
+                if (!mp2Initialised) {
+                    mp2Initialised = true
+                    audio.mp2Init()
+                }
+
+                seqread.readBytes(audioLen, SND_BASE_ADDR - 2368)
+                audio.mp2Decode()
+                audio.mp2UploadDecoded(0)
+
+            } else {
+                println(`Unknown packet type: 0x${packetType.toString(16)}`)
+                break
            }
-
-            // Decompress using gzip
-            // Optimized buffer size calculation for TEV YCoCg-R blocks
-            let blocksX = (width + 15) >> 4  // 16x16 blocks
-            let blocksY = (height + 15) >> 4
-            let tevBlockSize = 1 + 4 + 2 + (256 * 2) + (64 * 2) + (64 * 2) // mode + mv + cbp + Y(16x16) + Co(8x8) + Cg(8x8)
-            let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing
-            let blockDataPtr = sys.malloc(decompressedSize)
-
-            let actualSize
-            try {
-                // Use gzip decompression (only compression format supported in TSVM JS)
-                actualSize = gzip.decompFromTo(compressedPtr, payloadLen, blockDataPtr)
-            } catch (e) {
-                // Decompression failed - skip this frame
-                serial.println(`Frame ${frameCount}: Gzip decompression failed, skipping (compressed size: ${payloadLen}, error: ${e})`)
-                sys.free(blockDataPtr)
-                sys.free(compressedPtr)
-                continue
-            }
-            
-            // Hardware-accelerated TEV YCoCg-R decoding to RGB buffers
-            try {
-                graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors)
-
-                // Upload RGB buffer to display framebuffer with dithering
-                graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR,
-                                              width, height, frameCount)
-            } catch (e) {
-                serial.println(`Frame ${frameCount}: Hardware YCoCg-R decode failed: ${e}`)
-            }
-
-            sys.free(blockDataPtr)
-            sys.free(compressedPtr)
-
-        } else if (packetType == TEV_PACKET_AUDIO_MP2) {
-            // Audio packet - skip for now
-            let audioLen = seqread.readInt()
-            seqread.skip(audioLen)
-
-        } else {
-            println(`Unknown packet type: 0x${packetType.toString(16)}`)
-            break
        }
-        }
-
-        sys.sleep(1)

        let t2 = sys.nanoTime()
        akku += (t2 - t1) / 1000000000.0
+        akku2 += (t2 - t1) / 1000000000.0

        // Simple progress display
        if (interactive) {
            con.move(31, 1)
            graphics.setTextFore(161)
-            print(`Frame: ${frameCount}/${totalFrames} (${Math.round(frameCount * 100 / totalFrames)}%) YCoCg-R`)
+            print(`Frame: ${frameCount}/${totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f)         `)
            con.move(32, 1)
            graphics.setTextFore(161)
            print(`VRate: ${(getVideoRate() / 1024 * 8)|0} kbps                               `)
@@ -262,14 +287,16 @@ try {

        t1 = t2
    }
-
-} catch (e) {
+}
+catch (e) {
    printerrln(`TEV YCoCg-R decode error: ${e}`)
    errorlevel = 1
-} finally {
+}
+finally {
    // Cleanup working memory (graphics memory is automatically managed)
    sys.free(ycocgWorkspace)
    sys.free(dctWorkspace)
+    sys.free(blockDataPtr)
    if (CURRENT_RGB_ADDR > 0) sys.free(CURRENT_RGB_ADDR)
    if (PREV_RGB_ADDR > 0) sys.free(PREV_RGB_ADDR)

--- a/video_encoder/encoder_tev.c
+++ b/video_encoder/encoder_tev.c
@@ -376,8 +376,11 @@ typedef struct {
    // Audio handling
    FILE *mp2_file;
    int mp2_packet_size;
+    int mp2_rate_index;
    size_t audio_remaining;
    uint8_t *mp2_buffer;
+    int audio_frames_in_buffer;
+    int target_audio_buffer_size;
    
    // Compression context
    z_stream gzip_stream;
@@ -993,7 +996,10 @@ static tev_encoder_t* init_encoder(void) {
    if (!enc) return NULL;
    
    enc->quality = 4;  // Default quality
-    enc->mp2_packet_size = MP2_DEFAULT_PACKET_SIZE;
+    enc->mp2_packet_size = 0; // Will be detected from MP2 header
+    enc->mp2_rate_index = 0;
+    enc->audio_frames_in_buffer = 0;
+    enc->target_audio_buffer_size = 4;

    init_dct_tables();

@@ -1106,24 +1112,37 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) {
    // Compress block data using gzip (compatible with TSVM decoder)
    size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
    
-    // Reset compression stream
-    enc->gzip_stream.next_in = (Bytef*)enc->block_data;
-    enc->gzip_stream.avail_in = block_data_size;
-    enc->gzip_stream.next_out = (Bytef*)enc->compressed_buffer;
-    enc->gzip_stream.avail_out = block_data_size * 2;
+    // Initialize fresh gzip stream for each frame (since Z_FINISH terminates the stream)
+    z_stream frame_stream;
+    frame_stream.zalloc = Z_NULL;
+    frame_stream.zfree = Z_NULL;
+    frame_stream.opaque = Z_NULL;
    
-    if (deflateReset(&enc->gzip_stream) != Z_OK) {
-        fprintf(stderr, "Gzip deflateReset failed\n");
+    int init_result = deflateInit2(&frame_stream, Z_DEFAULT_COMPRESSION, 
+                                   Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY); // 15+16 for gzip format
+    
+    if (init_result != Z_OK) {
+        fprintf(stderr, "Failed to initialize gzip compression for frame\n");
        return 0;
    }
    
-    int result = deflate(&enc->gzip_stream, Z_FINISH);
+    // Set up compression stream
+    frame_stream.next_in = (Bytef*)enc->block_data;
+    frame_stream.avail_in = block_data_size;
+    frame_stream.next_out = (Bytef*)enc->compressed_buffer;
+    frame_stream.avail_out = block_data_size * 2;
+    
+    int result = deflate(&frame_stream, Z_FINISH);
    if (result != Z_STREAM_END) {
        fprintf(stderr, "Gzip compression failed: %d\n", result);
+        deflateEnd(&frame_stream);
        return 0;
    }
    
-    size_t compressed_size = enc->gzip_stream.total_out;
+    size_t compressed_size = frame_stream.total_out;
+    
+    // Clean up frame stream
+    deflateEnd(&frame_stream);
    
    // Write frame packet header
    uint8_t packet_type = is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME;
@@ -1185,7 +1204,7 @@ static int get_video_metadata(tev_encoder_t *enc) {
    
    int num, den;
    if (sscanf(output, "%d/%d", &num, &den) == 2) {
-        enc->fps = (den > 0) ? (num / den) : 30;
+        enc->fps = (den > 0) ? (int)round((float)num/(float)den) : 30;
    } else {
        enc->fps = (int)round(atof(output));
    }
@@ -1242,14 +1261,14 @@ static int start_video_conversion(tev_encoder_t *enc) {
    if (enc->output_fps > 0 && enc->output_fps != enc->fps) {
        // Frame rate conversion requested
        snprintf(command, sizeof(command),
-            "ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 "
+            "ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 "
            "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,fps=%d\" "
            "-y - 2>&1",
            enc->input_file, enc->width, enc->height, enc->width, enc->height, enc->output_fps);
    } else {
        // No frame rate conversion
        snprintf(command, sizeof(command),
-            "ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 "
+            "ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 "
            "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
            "-y -",
            enc->input_file, enc->width, enc->height, enc->width, enc->height);
@@ -1274,7 +1293,7 @@ static int start_audio_conversion(tev_encoder_t *enc) {
    
    char command[2048];
    snprintf(command, sizeof(command),
-        "ffmpeg -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
+        "ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
        enc->input_file, MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
    
    int result = system(command);
@@ -1290,6 +1309,106 @@ static int start_audio_conversion(tev_encoder_t *enc) {
    return (result == 0);
 }

+// Get MP2 packet size and rate index from header
+static int get_mp2_packet_size(uint8_t *header) {
+    int bitrate_index = (header[2] >> 4) & 0x0F;
+    int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
+    if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
+    
+    int bitrate = bitrates[bitrate_index];
+    int padding_bit = (header[2] >> 1) & 0x01;
+    if (bitrate <= 0) return MP2_DEFAULT_PACKET_SIZE;
+    
+    int frame_size = (144 * bitrate * 1000) / MP2_SAMPLE_RATE + padding_bit;
+    return frame_size;
+}
+
+static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) {
+    // Map packet sizes to rate indices for TEV format
+    const int mp2_frame_sizes[] = {144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728};
+    for (int i = 0; i < 14; i++) {
+        if (packet_size <= mp2_frame_sizes[i]) {
+            return i;
+        }
+    }
+    return 13; // Default to highest rate
+}
+
+// Process audio for current frame
+static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) {
+    if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) {
+        return 1;
+    }
+    
+    // Initialize packet size on first frame
+    if (enc->mp2_packet_size == 0) {
+        uint8_t header[4];
+        if (fread(header, 1, 4, enc->mp2_file) != 4) return 1;
+        fseek(enc->mp2_file, 0, SEEK_SET);
+        
+        enc->mp2_packet_size = get_mp2_packet_size(header);
+        int is_mono = (header[3] >> 6) == 3;
+        enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono);
+        enc->target_audio_buffer_size = 4; // 4 audio packets in buffer
+    }
+    
+    // Calculate how much audio time each frame represents (in seconds)
+    double frame_audio_time = 1.0 / enc->fps;
+    
+    // Calculate how much audio time each MP2 packet represents
+    // MP2 frame contains 1152 samples at 32kHz = 0.036 seconds
+    double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE;
+    
+    // Estimate how many packets we consume per video frame
+    double packets_per_frame = frame_audio_time / packet_audio_time;
+    
+    // Only insert audio when buffer would go below 2 frames
+    // Initialize with 2 packets on first frame to prime the buffer
+    int packets_to_insert = 0;
+    if (frame_num == 0) {
+        packets_to_insert = 2;
+        enc->audio_frames_in_buffer = 2;
+    } else {
+        // Simulate buffer consumption (packets consumed per frame)
+        enc->audio_frames_in_buffer -= (int)ceil(packets_per_frame);
+        
+        // Only insert packets when buffer gets low (≤ 2 frames)
+        if (enc->audio_frames_in_buffer <= 2) {
+            packets_to_insert = enc->target_audio_buffer_size - enc->audio_frames_in_buffer;
+            packets_to_insert = (packets_to_insert > 0) ? packets_to_insert : 1;
+        }
+    }
+    
+    // Insert the calculated number of audio packets
+    for (int q = 0; q < packets_to_insert; q++) {
+        size_t bytes_to_read = enc->mp2_packet_size;
+        if (bytes_to_read > enc->audio_remaining) {
+            bytes_to_read = enc->audio_remaining;
+        }
+        
+        size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file);
+        if (bytes_read == 0) break;
+        
+        // Write TEV MP2 audio packet
+        uint8_t audio_packet_type = TEV_PACKET_AUDIO_MP2;
+        uint32_t audio_len = (uint32_t)bytes_read;
+        fwrite(&audio_packet_type, 1, 1, output);
+        fwrite(&audio_len, 4, 1, output);
+        fwrite(enc->mp2_buffer, 1, bytes_read, output);
+        
+        // Track audio bytes written
+        enc->total_output_bytes += 1 + 4 + bytes_read;
+        enc->audio_remaining -= bytes_read;
+        enc->audio_frames_in_buffer++;
+        
+        if (enc->verbose) {
+            printf("Audio packet %d: %zu bytes\n", q, bytes_read);
+        }
+    }
+    
+    return 1;
+}
+
 // Show usage information
 static void show_usage(const char *program_name) {
    printf("TEV YCoCg-R 4:2:0 Video Encoder\n");
@@ -1329,6 +1448,8 @@ static void cleanup_encoder(tev_encoder_t *enc) {
    free_encoder(enc);
 }

+int sync_packet_count = 0;
+
 // Main function
 int main(int argc, char *argv[]) {
    tev_encoder_t *enc = init_encoder();
@@ -1544,16 +1665,23 @@ int main(int argc, char *argv[]) {
                break; // End of video or error
            }
        }
-        
+
+        // Process audio for this frame
+        process_audio(enc, frame_count, output);
+
        // Encode frame
        if (!encode_frame(enc, output, frame_count)) {
            fprintf(stderr, "Failed to encode frame %d\n", frame_count);
            break;
        }
+        else {
+            // Write a sync packet only after a video is been coded
+            uint8_t sync_packet = TEV_PACKET_SYNC;
+            fwrite(&sync_packet, 1, 1, output);
+            sync_packet_count++;
+        }
+

-        // Write a sync packet
-        uint8_t sync_packet = TEV_PACKET_SYNC;
-        fwrite(&sync_packet, 1, 1, output);

        frame_count++;
        if (enc->verbose || frame_count % 30 == 0) {
@@ -1569,6 +1697,7 @@ int main(int argc, char *argv[]) {
    // Write final sync packet
    uint8_t sync_packet = TEV_PACKET_SYNC;
    fwrite(&sync_packet, 1, 1, output);
+    sync_packet_count++;

    if (!enc->output_to_stdout) {
        fclose(output);
@@ -1582,6 +1711,8 @@ int main(int argc, char *argv[]) {
    
    printf("\nEncoding complete!\n");
    printf("  Frames encoded: %d\n", frame_count);
+    printf("  - sync packets: %d\n", sync_packet_count);
+    printf("  Framerate: %d\n", enc->fps);
    printf("  Output size: %zu bytes\n", enc->total_output_bytes);
    printf("  Encoding time: %.2fs (%.1f fps)\n", total_time, frame_count / total_time);
    printf("  Block statistics: INTRA=%d, INTER=%d, MOTION=%d, SKIP=%d\n",