MP2 decoding

This commit is contained in:
minjaesong
2025-08-23 00:54:32 +09:00
parent 3c135e48e0
commit 6f6d10cc1b
2 changed files with 248 additions and 90 deletions

View File

@@ -8,6 +8,9 @@ const HEIGHT = 448
const BLOCK_SIZE = 16 // 16x16 blocks for YCoCg-R
const TEV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x45, 0x56] // "\x1FTSVM TEV"
const TEV_VERSION = 2 // YCoCg-R version
const SND_BASE_ADDR = audio.getBaseAddr()
const pcm = require("pcm")
const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728]
// Block encoding modes
const TEV_MODE_SKIP = 0x00
@@ -58,6 +61,12 @@ graphics.setGraphicsMode(4) // 4096-color mode
graphics.clearPixels(0)
graphics.clearPixels2(0)
// Initialize audio
audio.resetParams(0)
audio.purgeQueue(0)
audio.setPcmMode(0)
audio.setMasterVolume(0, 255)
// Check magic number
let magic = seqread.readBytes(8)
let magicMatching = true
@@ -135,6 +144,9 @@ sys.memset(DISPLAY_BA_ADDR, 15, FRAME_PIXELS) // Black with alpha=15 (opaque) in
let frameCount = 0
let stopPlay = false
let akku = FRAME_TIME
let akku2 = 0.0
let mp2Initialised = false
let audioFired = false
// 4x4 Bayer dithering matrix
const BAYER_MATRIX = [
@@ -158,13 +170,13 @@ function ditherValue(value, x, y) {
return Math.max(0, Math.min(15, Math.floor(dithered * 15 / 255)))
}
let blockDataPtr = sys.malloc(560 * 448 * 3)
// Main decoding loop - simplified for performance
try {
let t1 = sys.nanoTime()
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && frameCount < totalFrames) {
if (akku >= FRAME_TIME) {
// Handle interactive controls
if (interactive) {
sys.poke(-40, 1)
@@ -173,87 +185,100 @@ try {
break
}
}
// Read packet (1 byte: type)
let packetType = seqread.readOneByte()
if (packetType == 0xFF) { // Sync packet
// Sync packet - frame complete
frameCount++
if (akku >= FRAME_TIME) {
// Read packet (1 byte: type)
let packetType = seqread.readOneByte()
// Copy current RGB frame to previous frame buffer for next frame reference
// memcpy(source, destination, length) - so CURRENT (source) -> PREV (destination)
sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3)
if (packetType == 0xFF) { // Sync packet
akku -= FRAME_TIME
// Sync packet - frame complete
frameCount++
// Copy current RGB frame to previous frame buffer for next frame reference
// memcpy(source, destination, length) - so CURRENT (source) -> PREV (destination)
sys.memcpy(CURRENT_RGB_ADDR, PREV_RGB_ADDR, FRAME_PIXELS * 3)
} else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
// Video frame packet
let payloadLen = seqread.readInt()
let compressedPtr = seqread.readBytes(payloadLen)
updateDataRateBin(payloadLen)
// Basic sanity check on compressed data
if (payloadLen <= 0 || payloadLen > 1000000) {
serial.println(`Frame ${frameCount}: Invalid payload length: ${payloadLen}`)
sys.free(compressedPtr)
continue
}
// Decompress using gzip
// Optimized buffer size calculation for TEV YCoCg-R blocks
let blocksX = (width + 15) >> 4 // 16x16 blocks
let blocksY = (height + 15) >> 4
let tevBlockSize = 1 + 4 + 2 + (256 * 2) + (64 * 2) + (64 * 2) // mode + mv + cbp + Y(16x16) + Co(8x8) + Cg(8x8)
let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing
let actualSize
try {
// Use gzip decompression (only compression format supported in TSVM JS)
actualSize = gzip.decompFromTo(compressedPtr, payloadLen, blockDataPtr)
} catch (e) {
// Decompression failed - skip this frame
serial.println(`Frame ${frameCount}: Gzip decompression failed, skipping (compressed size: ${payloadLen}, error: ${e})`)
sys.free(compressedPtr)
continue
}
// Hardware-accelerated TEV YCoCg-R decoding to RGB buffers
try {
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors)
// Upload RGB buffer to display framebuffer with dithering
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR,
width, height, frameCount)
// Defer audio playback until a first frame is sent
if (!audioFired) {
audio.play(0)
audioFired = true
}
} catch (e) {
serial.println(`Frame ${frameCount}: Hardware YCoCg-R decode failed: ${e}`)
}
} else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
// Video frame packet
let payloadLen = seqread.readInt()
let compressedPtr = seqread.readBytes(payloadLen)
updateDataRateBin(payloadLen)
// Basic sanity check on compressed data
if (payloadLen <= 0 || payloadLen > 1000000) {
serial.println(`Frame ${frameCount}: Invalid payload length: ${payloadLen}`)
sys.free(compressedPtr)
continue
} else if (packetType == TEV_PACKET_AUDIO_MP2) {
// MP2 Audio packet
let audioLen = seqread.readInt()
if (!mp2Initialised) {
mp2Initialised = true
audio.mp2Init()
}
seqread.readBytes(audioLen, SND_BASE_ADDR - 2368)
audio.mp2Decode()
audio.mp2UploadDecoded(0)
} else {
println(`Unknown packet type: 0x${packetType.toString(16)}`)
break
}
// Decompress using gzip
// Optimized buffer size calculation for TEV YCoCg-R blocks
let blocksX = (width + 15) >> 4 // 16x16 blocks
let blocksY = (height + 15) >> 4
let tevBlockSize = 1 + 4 + 2 + (256 * 2) + (64 * 2) + (64 * 2) // mode + mv + cbp + Y(16x16) + Co(8x8) + Cg(8x8)
let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing
let blockDataPtr = sys.malloc(decompressedSize)
let actualSize
try {
// Use gzip decompression (only compression format supported in TSVM JS)
actualSize = gzip.decompFromTo(compressedPtr, payloadLen, blockDataPtr)
} catch (e) {
// Decompression failed - skip this frame
serial.println(`Frame ${frameCount}: Gzip decompression failed, skipping (compressed size: ${payloadLen}, error: ${e})`)
sys.free(blockDataPtr)
sys.free(compressedPtr)
continue
}
// Hardware-accelerated TEV YCoCg-R decoding to RGB buffers
try {
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, quality, debugMotionVectors)
// Upload RGB buffer to display framebuffer with dithering
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, DISPLAY_RG_ADDR, DISPLAY_BA_ADDR,
width, height, frameCount)
} catch (e) {
serial.println(`Frame ${frameCount}: Hardware YCoCg-R decode failed: ${e}`)
}
sys.free(blockDataPtr)
sys.free(compressedPtr)
} else if (packetType == TEV_PACKET_AUDIO_MP2) {
// Audio packet - skip for now
let audioLen = seqread.readInt()
seqread.skip(audioLen)
} else {
println(`Unknown packet type: 0x${packetType.toString(16)}`)
break
}
}
sys.sleep(1)
let t2 = sys.nanoTime()
akku += (t2 - t1) / 1000000000.0
akku2 += (t2 - t1) / 1000000000.0
// Simple progress display
if (interactive) {
con.move(31, 1)
graphics.setTextFore(161)
print(`Frame: ${frameCount}/${totalFrames} (${Math.round(frameCount * 100 / totalFrames)}%) YCoCg-R`)
print(`Frame: ${frameCount}/${totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f) `)
con.move(32, 1)
graphics.setTextFore(161)
print(`VRate: ${(getVideoRate() / 1024 * 8)|0} kbps `)
@@ -262,14 +287,16 @@ try {
t1 = t2
}
} catch (e) {
}
catch (e) {
printerrln(`TEV YCoCg-R decode error: ${e}`)
errorlevel = 1
} finally {
}
finally {
// Cleanup working memory (graphics memory is automatically managed)
sys.free(ycocgWorkspace)
sys.free(dctWorkspace)
sys.free(blockDataPtr)
if (CURRENT_RGB_ADDR > 0) sys.free(CURRENT_RGB_ADDR)
if (PREV_RGB_ADDR > 0) sys.free(PREV_RGB_ADDR)

View File

@@ -376,8 +376,11 @@ typedef struct {
// Audio handling
FILE *mp2_file;
int mp2_packet_size;
int mp2_rate_index;
size_t audio_remaining;
uint8_t *mp2_buffer;
int audio_frames_in_buffer;
int target_audio_buffer_size;
// Compression context
z_stream gzip_stream;
@@ -993,7 +996,10 @@ static tev_encoder_t* init_encoder(void) {
if (!enc) return NULL;
enc->quality = 4; // Default quality
enc->mp2_packet_size = MP2_DEFAULT_PACKET_SIZE;
enc->mp2_packet_size = 0; // Will be detected from MP2 header
enc->mp2_rate_index = 0;
enc->audio_frames_in_buffer = 0;
enc->target_audio_buffer_size = 4;
init_dct_tables();
@@ -1106,24 +1112,37 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) {
// Compress block data using gzip (compatible with TSVM decoder)
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
// Reset compression stream
enc->gzip_stream.next_in = (Bytef*)enc->block_data;
enc->gzip_stream.avail_in = block_data_size;
enc->gzip_stream.next_out = (Bytef*)enc->compressed_buffer;
enc->gzip_stream.avail_out = block_data_size * 2;
// Initialize fresh gzip stream for each frame (since Z_FINISH terminates the stream)
z_stream frame_stream;
frame_stream.zalloc = Z_NULL;
frame_stream.zfree = Z_NULL;
frame_stream.opaque = Z_NULL;
if (deflateReset(&enc->gzip_stream) != Z_OK) {
fprintf(stderr, "Gzip deflateReset failed\n");
int init_result = deflateInit2(&frame_stream, Z_DEFAULT_COMPRESSION,
Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY); // 15+16 for gzip format
if (init_result != Z_OK) {
fprintf(stderr, "Failed to initialize gzip compression for frame\n");
return 0;
}
int result = deflate(&enc->gzip_stream, Z_FINISH);
// Set up compression stream
frame_stream.next_in = (Bytef*)enc->block_data;
frame_stream.avail_in = block_data_size;
frame_stream.next_out = (Bytef*)enc->compressed_buffer;
frame_stream.avail_out = block_data_size * 2;
int result = deflate(&frame_stream, Z_FINISH);
if (result != Z_STREAM_END) {
fprintf(stderr, "Gzip compression failed: %d\n", result);
deflateEnd(&frame_stream);
return 0;
}
size_t compressed_size = enc->gzip_stream.total_out;
size_t compressed_size = frame_stream.total_out;
// Clean up frame stream
deflateEnd(&frame_stream);
// Write frame packet header
uint8_t packet_type = is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME;
@@ -1185,7 +1204,7 @@ static int get_video_metadata(tev_encoder_t *enc) {
int num, den;
if (sscanf(output, "%d/%d", &num, &den) == 2) {
enc->fps = (den > 0) ? (num / den) : 30;
enc->fps = (den > 0) ? (int)round((float)num/(float)den) : 30;
} else {
enc->fps = (int)round(atof(output));
}
@@ -1242,14 +1261,14 @@ static int start_video_conversion(tev_encoder_t *enc) {
if (enc->output_fps > 0 && enc->output_fps != enc->fps) {
// Frame rate conversion requested
snprintf(command, sizeof(command),
"ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 "
"ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 "
"-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,fps=%d\" "
"-y - 2>&1",
enc->input_file, enc->width, enc->height, enc->width, enc->height, enc->output_fps);
} else {
// No frame rate conversion
snprintf(command, sizeof(command),
"ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 "
"ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 "
"-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
"-y -",
enc->input_file, enc->width, enc->height, enc->width, enc->height);
@@ -1274,7 +1293,7 @@ static int start_audio_conversion(tev_encoder_t *enc) {
char command[2048];
snprintf(command, sizeof(command),
"ffmpeg -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
enc->input_file, MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
int result = system(command);
@@ -1290,6 +1309,106 @@ static int start_audio_conversion(tev_encoder_t *enc) {
return (result == 0);
}
// Get MP2 packet size and rate index from header
static int get_mp2_packet_size(uint8_t *header) {
int bitrate_index = (header[2] >> 4) & 0x0F;
int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
int bitrate = bitrates[bitrate_index];
int padding_bit = (header[2] >> 1) & 0x01;
if (bitrate <= 0) return MP2_DEFAULT_PACKET_SIZE;
int frame_size = (144 * bitrate * 1000) / MP2_SAMPLE_RATE + padding_bit;
return frame_size;
}
static int mp2_packet_size_to_rate_index(int packet_size, int is_mono) {
// Map packet sizes to rate indices for TEV format
const int mp2_frame_sizes[] = {144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728};
for (int i = 0; i < 14; i++) {
if (packet_size <= mp2_frame_sizes[i]) {
return i;
}
}
return 13; // Default to highest rate
}
// Process audio for current frame
static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) {
if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) {
return 1;
}
// Initialize packet size on first frame
if (enc->mp2_packet_size == 0) {
uint8_t header[4];
if (fread(header, 1, 4, enc->mp2_file) != 4) return 1;
fseek(enc->mp2_file, 0, SEEK_SET);
enc->mp2_packet_size = get_mp2_packet_size(header);
int is_mono = (header[3] >> 6) == 3;
enc->mp2_rate_index = mp2_packet_size_to_rate_index(enc->mp2_packet_size, is_mono);
enc->target_audio_buffer_size = 4; // 4 audio packets in buffer
}
// Calculate how much audio time each frame represents (in seconds)
double frame_audio_time = 1.0 / enc->fps;
// Calculate how much audio time each MP2 packet represents
// MP2 frame contains 1152 samples at 32kHz = 0.036 seconds
double packet_audio_time = 1152.0 / MP2_SAMPLE_RATE;
// Estimate how many packets we consume per video frame
double packets_per_frame = frame_audio_time / packet_audio_time;
// Only insert audio when buffer would go below 2 frames
// Initialize with 2 packets on first frame to prime the buffer
int packets_to_insert = 0;
if (frame_num == 0) {
packets_to_insert = 2;
enc->audio_frames_in_buffer = 2;
} else {
// Simulate buffer consumption (packets consumed per frame)
enc->audio_frames_in_buffer -= (int)ceil(packets_per_frame);
// Only insert packets when buffer gets low (≤ 2 frames)
if (enc->audio_frames_in_buffer <= 2) {
packets_to_insert = enc->target_audio_buffer_size - enc->audio_frames_in_buffer;
packets_to_insert = (packets_to_insert > 0) ? packets_to_insert : 1;
}
}
// Insert the calculated number of audio packets
for (int q = 0; q < packets_to_insert; q++) {
size_t bytes_to_read = enc->mp2_packet_size;
if (bytes_to_read > enc->audio_remaining) {
bytes_to_read = enc->audio_remaining;
}
size_t bytes_read = fread(enc->mp2_buffer, 1, bytes_to_read, enc->mp2_file);
if (bytes_read == 0) break;
// Write TEV MP2 audio packet
uint8_t audio_packet_type = TEV_PACKET_AUDIO_MP2;
uint32_t audio_len = (uint32_t)bytes_read;
fwrite(&audio_packet_type, 1, 1, output);
fwrite(&audio_len, 4, 1, output);
fwrite(enc->mp2_buffer, 1, bytes_read, output);
// Track audio bytes written
enc->total_output_bytes += 1 + 4 + bytes_read;
enc->audio_remaining -= bytes_read;
enc->audio_frames_in_buffer++;
if (enc->verbose) {
printf("Audio packet %d: %zu bytes\n", q, bytes_read);
}
}
return 1;
}
// Show usage information
static void show_usage(const char *program_name) {
printf("TEV YCoCg-R 4:2:0 Video Encoder\n");
@@ -1329,6 +1448,8 @@ static void cleanup_encoder(tev_encoder_t *enc) {
free_encoder(enc);
}
int sync_packet_count = 0;
// Main function
int main(int argc, char *argv[]) {
tev_encoder_t *enc = init_encoder();
@@ -1544,16 +1665,23 @@ int main(int argc, char *argv[]) {
break; // End of video or error
}
}
// Process audio for this frame
process_audio(enc, frame_count, output);
// Encode frame
if (!encode_frame(enc, output, frame_count)) {
fprintf(stderr, "Failed to encode frame %d\n", frame_count);
break;
}
else {
// Write a sync packet only after a video is been coded
uint8_t sync_packet = TEV_PACKET_SYNC;
fwrite(&sync_packet, 1, 1, output);
sync_packet_count++;
}
// Write a sync packet
uint8_t sync_packet = TEV_PACKET_SYNC;
fwrite(&sync_packet, 1, 1, output);
frame_count++;
if (enc->verbose || frame_count % 30 == 0) {
@@ -1569,6 +1697,7 @@ int main(int argc, char *argv[]) {
// Write final sync packet
uint8_t sync_packet = TEV_PACKET_SYNC;
fwrite(&sync_packet, 1, 1, output);
sync_packet_count++;
if (!enc->output_to_stdout) {
fclose(output);
@@ -1582,6 +1711,8 @@ int main(int argc, char *argv[]) {
printf("\nEncoding complete!\n");
printf(" Frames encoded: %d\n", frame_count);
printf(" - sync packets: %d\n", sync_packet_count);
printf(" Framerate: %d\n", enc->fps);
printf(" Output size: %zu bytes\n", enc->total_output_bytes);
printf(" Encoding time: %.2fs (%.1f fps)\n", total_time, frame_count / total_time);
printf(" Block statistics: INTRA=%d, INTER=%d, MOTION=%d, SKIP=%d\n",