TAV: pcm8 audio

This commit is contained in:
minjaesong
2025-10-22 10:05:54 +09:00
parent 758b134abd
commit 4265891093
15 changed files with 298 additions and 47 deletions

View File

@@ -1,7 +1,7 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// High Speed Disk Peripheral Adapter (HSDPA) Driver for TVDOS // High Speed Disk Peripheral Adapter (HSDPA) Driver for TVDOS
// This driver treats each disk from HSDPA as a single large file // This driver treats each disk from HSDPA as a single large file
// Created by Claude on 2025-08-16 // Created by CuriousTorvald and Claude on 2025-08-16
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Add TAPE device names to reserved names // Add TAPE device names to reserved names

View File

@@ -1,4 +1,4 @@
// Created by Claude on 2025-09-13. // Created by CuriousTorvald and Claude on 2025-09-13.
// TSVM Advanced Video (TAV) Format Decoder - DWT-based compression // TSVM Advanced Video (TAV) Format Decoder - DWT-based compression
// Adapted from the working playtev.js decoder // Adapted from the working playtev.js decoder
// Usage: playtav moviefile.tav [options] // Usage: playtav moviefile.tav [options]
@@ -1279,10 +1279,18 @@ try {
let zstdLen = seqread.readInt() let zstdLen = seqread.readInt()
let zstdPtr = sys.malloc(zstdLen) let zstdPtr = sys.malloc(zstdLen)
seqread.readBytes(zstdLen, zstdPtr) seqread.readBytes(zstdLen, zstdPtr)
let pcmLen = gzip.decompFromTo(zstdPtr, zstdLen, SND_BASE_ADDR - 65536) // serial.println(`PCM8 audio (${zstdLen} -> ????)`)
let pcmPtr = sys.malloc(65536) //SND_BASE_ADDR - 65536
let pcmLen = gzip.decompFromTo(zstdPtr, zstdLen, pcmPtr) // <- segfaults!
if (pcmLen > 65536) throw Error(`PCM data too long -- got ${pcmLen} bytes`) if (pcmLen > 65536) throw Error(`PCM data too long -- got ${pcmLen} bytes`)
audio.putPcmDataByPtr(pcmPtr, pcmLen, 0)
audio.setSampleUploadLength(0, pcmLen) audio.setSampleUploadLength(0, pcmLen)
audio.startSampleUpload(0) audio.startSampleUpload(0)
sys.free(zstdPtr)
sys.free(pcmPtr)
} }
else if (packetType === TAV_PACKET_SUBTITLE) { else if (packetType === TAV_PACKET_SUBTITLE) {
// Subtitle packet - same format as TEV // Subtitle packet - same format as TEV

View File

@@ -1,4 +1,4 @@
// Created by Claude on 2025-08-18. // Created by CuriousTorvald and Claude on 2025-08-18.
// TSVM Enhanced Video (TEV) Format Decoder - YCoCg-R 4:2:0 Version // TSVM Enhanced Video (TEV) Format Decoder - YCoCg-R 4:2:0 Version
// Usage: playtev moviefile.tev [options] // Usage: playtev moviefile.tev [options]
// Options: -i (interactive), -debug-mv (show motion vector debug visualization) // Options: -i (interactive), -debug-mv (show motion vector debug visualization)

View File

@@ -1,5 +1,5 @@
// TSVM Universal Cue Format (UCF) Player // TSVM Universal Cue Format (UCF) Player
// Created by Claude on 2025-09-22 // Created by CuriousTorvald and Claude on 2025-09-22
// Usage: playucf cuefile.ucf [options] // Usage: playucf cuefile.ucf [options]
// Options: -i (interactive mode) // Options: -i (interactive mode)

View File

@@ -674,7 +674,7 @@ Delta block format:
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
TSVM Enhanced Video (TEV) Format TSVM Enhanced Video (TEV) Format
Created by Claude on 2025-08-17 Created by CuriousTorvald and Claude on 2025-08-17
TEV is a modern video codec optimized for TSVM's 4096-color hardware, featuring TEV is a modern video codec optimized for TSVM's 4096-color hardware, featuring
DCT-based compression, motion compensation, and efficient temporal coding. DCT-based compression, motion compensation, and efficient temporal coding.
@@ -877,7 +877,7 @@ When KSF is interleaved with MP2 audio, the payload must be inserted in-between
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
TSVM Advanced Video (TAV) Format TSVM Advanced Video (TAV) Format
Created by Claude on 2025-09-13 Created by CuriousTorvald and Claude on 2025-09-13
TAV is a next-generation video codec for TSVM utilizing Discrete Wavelet Transform (DWT) TAV is a next-generation video codec for TSVM utilizing Discrete Wavelet Transform (DWT)
similar to JPEG2000, providing superior compression efficiency and scalability compared similar to JPEG2000, providing superior compression efficiency and scalability compared

View File

@@ -23,7 +23,7 @@ class CompressorDelegate(private val vm: VM) {
val bytes = comp(inbytes) val bytes = comp(inbytes)
vm.getDev(output.toLong(), bytes.size.toLong(), true).let { vm.getDev(output.toLong(), bytes.size.toLong(), true).let {
if (it != null) { if (it != null) {
val bytesReversed = bytes.reversedArray() // copy over reversed bytes starting from the end of the destination val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong()) UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong())
} }
else { else {
@@ -39,7 +39,7 @@ class CompressorDelegate(private val vm: VM) {
val bytes = comp(str) val bytes = comp(str)
vm.getDev(output.toLong(), bytes.size.toLong(), true).let { vm.getDev(output.toLong(), bytes.size.toLong(), true).let {
if (it != null) { if (it != null) {
val bytesReversed = bytes.reversedArray() // copy over reversed bytes starting from the end of the destination val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong()) UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong())
} }
else { else {
@@ -55,7 +55,7 @@ class CompressorDelegate(private val vm: VM) {
val bytes = comp(ba) val bytes = comp(ba)
vm.getDev(output.toLong(), bytes.size.toLong(), true).let { vm.getDev(output.toLong(), bytes.size.toLong(), true).let {
if (it != null) { if (it != null) {
val bytesReversed = bytes.reversedArray() // copy over reversed bytes starting from the end of the destination val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong()) UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong())
} }
else { else {
@@ -75,7 +75,7 @@ class CompressorDelegate(private val vm: VM) {
val bytes = decomp(str) val bytes = decomp(str)
vm.getDev(pointer.toLong(), bytes.size.toLong(), true).let { vm.getDev(pointer.toLong(), bytes.size.toLong(), true).let {
if (it != null) { if (it != null) {
val bytesReversed = bytes.reversedArray() // copy over reversed bytes starting from the end of the destination val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, pointer.toLong() - bytes.size, bytes.size.toLong()) UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, pointer.toLong() - bytes.size, bytes.size.toLong())
} }
else { else {
@@ -91,7 +91,7 @@ class CompressorDelegate(private val vm: VM) {
val bytes = decomp(ba) val bytes = decomp(ba)
vm.getDev(pointer.toLong(), bytes.size.toLong(), true).let { vm.getDev(pointer.toLong(), bytes.size.toLong(), true).let {
if (it != null) { if (it != null) {
val bytesReversed = bytes.reversedArray() // copy over reversed bytes starting from the end of the destination val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, pointer.toLong() - bytes.size, bytes.size.toLong()) UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, pointer.toLong() - bytes.size, bytes.size.toLong())
} }
else { else {
@@ -110,15 +110,15 @@ class CompressorDelegate(private val vm: VM) {
val inbytes = ByteArray(len) { vm.peek(input.toLong() + it)!! } val inbytes = ByteArray(len) { vm.peek(input.toLong() + it)!! }
val bytes = decomp(inbytes) val bytes = decomp(inbytes)
vm.getDev(output.toLong(), bytes.size.toLong(), true).let { vm.getDev(output.toLong(), bytes.size.toLong(), true).let {
if (it != null) { // if (it != null) {
val bytesReversed = bytes.reversedArray() // copy over reversed bytes starting from the end of the destination // val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong()) // UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong())
} // }
else { // else {
bytes.forEachIndexed { index, byte -> bytes.forEachIndexed { index, byte ->
vm.poke(output.toLong() + index, byte) vm.poke(output.toLong() + index, byte)
} }
} // }
} }
return bytes.size return bytes.size
} }

View File

@@ -1348,7 +1348,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
private val END = 0xFF.toByte() private val END = 0xFF.toByte()
// TEV (TSVM Enhanced Video) format support // TEV (TSVM Enhanced Video) format support
// Created by Claude on 2025-08-17 // Created by CuriousTorvald and Claude on 2025-08-17
// Reusable working arrays to reduce allocation overhead // Reusable working arrays to reduce allocation overhead
private val tevIdct8TempBuffer = FloatArray(64) private val tevIdct8TempBuffer = FloatArray(64)

View File

@@ -12,7 +12,7 @@ import java.nio.channels.FileChannel
* A testing version of HSDPA that uses actual files on the host computer as disk sources. * A testing version of HSDPA that uses actual files on the host computer as disk sources.
* Each disk corresponds to a single file on the host filesystem. * Each disk corresponds to a single file on the host filesystem.
* *
* Created by Claude on 2025-08-16. * Created by CuriousTorvald and Claude on 2025-08-16.
*/ */
class HostFileHSDPA : HSDPA { class HostFileHSDPA : HSDPA {

View File

@@ -1,4 +1,4 @@
# Created by Claude on 2025-08-17. # Created by CuriousTorvald and Claude on 2025-08-17.
# Makefile for TSVM Enhanced Video (TEV) encoder # Makefile for TSVM Enhanced Video (TEV) encoder
CC = gcc CC = gcc

View File

@@ -1,4 +1,4 @@
// Created by Claude on 2025-09-13. // Created by CuriousTorvald and Claude on 2025-09-13.
// TAV (TSVM Advanced Video) Encoder - DWT-based compression with full resolution YCoCg-R // TAV (TSVM Advanced Video) Encoder - DWT-based compression with full resolution YCoCg-R
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@@ -54,6 +54,7 @@
#define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning #define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning
#define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction) #define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction)
#define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio #define TAV_PACKET_AUDIO_MP2 0x20 // MP2 audio
#define TAV_PACKET_AUDIO_PCM8 0x21 // 8-bit PCM audio (zstd compressed)
#define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet #define TAV_PACKET_SUBTITLE 0x30 // Subtitle packet
#define TAV_PACKET_AUDIO_TRACK 0x40 // Separate audio track (full MP2 file) #define TAV_PACKET_AUDIO_TRACK 0x40 // Separate audio track (full MP2 file)
#define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet #define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet
@@ -118,6 +119,7 @@ static int needs_alpha_channel(int channel_layout) {
#define DEFAULT_FPS 30 #define DEFAULT_FPS 30
#define DEFAULT_QUALITY 3 #define DEFAULT_QUALITY 3
#define DEFAULT_ZSTD_LEVEL 3 #define DEFAULT_ZSTD_LEVEL 3
#define DEFAULT_PCM_ZSTD_LEVEL 3
#define TEMPORAL_GOP_SIZE 20 #define TEMPORAL_GOP_SIZE 20
#define TEMPORAL_DECOMP_LEVEL 2 #define TEMPORAL_DECOMP_LEVEL 2
#define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction #define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction
@@ -159,6 +161,7 @@ static void generate_random_filename(char *filename) {
} }
char TEMP_AUDIO_FILE[42]; char TEMP_AUDIO_FILE[42];
char TEMP_PCM_FILE[42];
// Utility macros // Utility macros
static inline int CLAMP(int x, int min, int max) { static inline int CLAMP(int x, int min, int max) {
@@ -1694,7 +1697,8 @@ typedef struct tav_encoder_s {
FILE *output_fp; FILE *output_fp;
FILE *mp2_file; FILE *mp2_file;
FILE *ffmpeg_video_pipe; FILE *ffmpeg_video_pipe;
FILE *pcm_file; // PCM16LE audio file for PCM8 mode
// Video parameters // Video parameters
int width, height; int width, height;
int fps; int fps;
@@ -1744,6 +1748,7 @@ typedef struct tav_encoder_s {
int use_delta_encoding; int use_delta_encoding;
int delta_haar_levels; // Number of Haar DWT levels to apply to delta coefficients (0 = disabled) int delta_haar_levels; // Number of Haar DWT levels to apply to delta coefficients (0 = disabled)
int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default) int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default)
int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default)
// Frame buffers - ping-pong implementation // Frame buffers - ping-pong implementation
uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous
@@ -1846,6 +1851,12 @@ typedef struct tav_encoder_s {
int audio_bitrate; // Custom audio bitrate (0 = use quality table) int audio_bitrate; // Custom audio bitrate (0 = use quality table)
int target_audio_buffer_size; int target_audio_buffer_size;
double audio_frames_in_buffer; double audio_frames_in_buffer;
// PCM8 audio processing
int samples_per_frame; // Number of stereo samples per video frame
int16_t *pcm16_buffer; // Buffer for reading PCM16LE data
uint8_t *pcm8_buffer; // Buffer for converted PCM8 data
int16_t dither_error[2]; // Dithering error for stereo channels [L, R]
// Subtitle processing // Subtitle processing
subtitle_entry_t *subtitles; subtitle_entry_t *subtitles;
@@ -2256,6 +2267,7 @@ static void show_usage(const char *program_name) {
printf(" -a, --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n"); printf(" -a, --arate N MP2 audio bitrate in kbps (overrides quality-based audio rate)\n");
printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n"); printf(" Valid values: 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384\n");
printf(" --separate-audio-track Write entire MP2 file as single packet 0x40 (instead of interleaved)\n"); printf(" --separate-audio-track Write entire MP2 file as single packet 0x40 (instead of interleaved)\n");
printf(" --pcm8-audio Use 8-bit PCM audio (packet 0x21, zstd compressed, per-frame packets)\n");
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n"); printf(" --fontrom-lo FILE Low font ROM file for internationalised subtitles\n");
printf(" --fontrom-hi FILE High font ROM file for internationalised subtitles\n"); printf(" --fontrom-hi FILE High font ROM file for internationalised subtitles\n");
@@ -2344,6 +2356,7 @@ static tav_encoder_t* create_encoder(void) {
enc->use_delta_encoding = 0; enc->use_delta_encoding = 0;
enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL; enc->delta_haar_levels = TEMPORAL_DECOMP_LEVEL;
enc->separate_audio_track = 0; // Default: interleave audio packets enc->separate_audio_track = 0; // Default: interleave audio packets
enc->pcm8_audio = 0; // Default: use MP2 audio
// GOP / temporal DWT settings // GOP / temporal DWT settings
enc->enable_temporal_dwt = 1; // Mutually exclusive with use_delta_encoding enc->enable_temporal_dwt = 1; // Mutually exclusive with use_delta_encoding
@@ -7957,28 +7970,61 @@ static int start_audio_conversion(tav_encoder_t *enc) {
if (!enc->has_audio) return 1; if (!enc->has_audio) return 1;
char command[2048]; char command[2048];
int bitrate;
if (enc->audio_bitrate > 0) {
bitrate = enc->audio_bitrate;
} else {
bitrate = enc->lossless ? 384 : MP2_RATE_TABLE[enc->quality_level];
}
printf(" Audio format: MP2 %dkbps (via libtwolame)\n", bitrate);
snprintf(command, sizeof(command),
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar 32000 -ac 2 -y \"%s\" 2>/dev/null",
enc->input_file, bitrate, TEMP_AUDIO_FILE);
int result = system(command); if (enc->pcm8_audio) {
if (result == 0) { // Extract PCM16LE for PCM8 mode
enc->mp2_file = fopen(TEMP_AUDIO_FILE, "rb"); printf(" Audio format: PCM16LE 32kHz stereo (will be converted to 8-bit)\n");
if (enc->mp2_file) { snprintf(command, sizeof(command),
fseek(enc->mp2_file, 0, SEEK_END); "ffmpeg -v quiet -i \"%s\" -f s16le -acodec pcm_s16le -ar %d -ac 2 -y \"%s\" 2>/dev/null",
enc->audio_remaining = ftell(enc->mp2_file); enc->input_file, TSVM_AUDIO_SAMPLE_RATE, TEMP_PCM_FILE);
fseek(enc->mp2_file, 0, SEEK_SET);
int result = system(command);
if (result == 0) {
enc->pcm_file = fopen(TEMP_PCM_FILE, "rb");
if (enc->pcm_file) {
fseek(enc->pcm_file, 0, SEEK_END);
enc->audio_remaining = ftell(enc->pcm_file);
fseek(enc->pcm_file, 0, SEEK_SET);
// Calculate samples per frame: ceil(sample_rate / fps)
enc->samples_per_frame = (TSVM_AUDIO_SAMPLE_RATE + enc->output_fps - 1) / enc->output_fps;
// Initialize dithering error
enc->dither_error[0] = 0;
enc->dither_error[1] = 0;
if (enc->verbose) {
printf(" PCM8: %d samples per frame\n", enc->samples_per_frame);
}
}
return 1;
} }
return 1; return 0;
} else {
// Extract MP2 for normal mode
int bitrate;
if (enc->audio_bitrate > 0) {
bitrate = enc->audio_bitrate;
} else {
bitrate = enc->lossless ? 384 : MP2_RATE_TABLE[enc->quality_level];
}
printf(" Audio format: MP2 %dkbps (via libtwolame)\n", bitrate);
snprintf(command, sizeof(command),
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null",
enc->input_file, bitrate, TSVM_AUDIO_SAMPLE_RATE, TEMP_AUDIO_FILE);
int result = system(command);
if (result == 0) {
enc->mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
if (enc->mp2_file) {
fseek(enc->mp2_file, 0, SEEK_END);
enc->audio_remaining = ftell(enc->mp2_file);
fseek(enc->mp2_file, 0, SEEK_SET);
}
return 1;
}
return 0;
} }
return 0;
} }
// Get MP2 packet size from header (copied from TEV) // Get MP2 packet size from header (copied from TEV)
@@ -8599,6 +8645,36 @@ static long write_extended_header(tav_encoder_t *enc) {
return endt_offset + 4 + 1; // 4 bytes for "ENDT", 1 byte for type return endt_offset + 4 + 1; // 4 bytes for "ENDT", 1 byte for type
} }
// Convert PCM16LE to unsigned 8-bit PCM with error-diffusion dithering
static void convert_pcm16_to_pcm8_dithered(tav_encoder_t *enc, const int16_t *pcm16, uint8_t *pcm8, int num_samples) {
for (int i = 0; i < num_samples; i++) {
for (int ch = 0; ch < 2; ch++) { // Stereo: L and R
int idx = i * 2 + ch;
// Convert signed 16-bit [-32768, 32767] to unsigned 8-bit [0, 255]
// First scale to [0, 65535], then add dithering error
int32_t sample = (int32_t)pcm16[idx] + 32768; // Now in [0, 65535]
// Add accumulated dithering error
sample += enc->dither_error[ch];
// Quantize to 8-bit (divide by 256)
int32_t quantized = sample >> 8;
// Clamp to [0, 255]
if (quantized < 0) quantized = 0;
if (quantized > 255) quantized = 255;
// Store 8-bit value
pcm8[idx] = (uint8_t)quantized;
// Calculate quantization error for next sample (error diffusion)
// Error = original - (quantized * 256)
enc->dither_error[ch] = sample - (quantized << 8);
}
}
}
// Write separate audio track packet (0x40) - entire MP2 file in one packet // Write separate audio track packet (0x40) - entire MP2 file in one packet
static int write_separate_audio_track(tav_encoder_t *enc, FILE *output) { static int write_separate_audio_track(tav_encoder_t *enc, FILE *output) {
if (!enc->has_audio || !enc->mp2_file) { if (!enc->has_audio || !enc->mp2_file) {
@@ -8651,6 +8727,97 @@ static int write_separate_audio_track(tav_encoder_t *enc, FILE *output) {
return 1; return 1;
} }
// Write PCM8 audio packet (0x21) with specified sample count
static int write_pcm8_packet_samples(tav_encoder_t *enc, FILE *output, int samples_to_read) {
if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) {
return 0;
}
size_t bytes_to_read = samples_to_read * 2 * sizeof(int16_t); // Stereo PCM16LE
// Don't read more than what's available
if (bytes_to_read > enc->audio_remaining) {
bytes_to_read = enc->audio_remaining;
samples_to_read = bytes_to_read / (2 * sizeof(int16_t));
}
if (samples_to_read == 0) {
return 0;
}
// Allocate buffers if needed (size for max samples: 32768)
int max_samples = 32768; // Maximum samples per packet
if (!enc->pcm16_buffer) {
enc->pcm16_buffer = malloc(max_samples * 2 * sizeof(int16_t));
}
if (!enc->pcm8_buffer) {
enc->pcm8_buffer = malloc(max_samples * 2);
}
// Read PCM16LE data
size_t bytes_read = fread(enc->pcm16_buffer, 1, bytes_to_read, enc->pcm_file);
if (bytes_read == 0) {
return 0;
}
int samples_read = bytes_read / (2 * sizeof(int16_t));
// Convert to PCM8 with dithering
convert_pcm16_to_pcm8_dithered(enc, enc->pcm16_buffer, enc->pcm8_buffer, samples_read);
// Compress with zstd
size_t pcm8_size = samples_read * 2; // Stereo
size_t max_compressed_size = ZSTD_compressBound(pcm8_size);
uint8_t *compressed_buffer = malloc(max_compressed_size);
size_t compressed_size = ZSTD_compress(compressed_buffer, max_compressed_size,
enc->pcm8_buffer, pcm8_size,
(DEFAULT_PCM_ZSTD_LEVEL > enc->zstd_level) ? DEFAULT_PCM_ZSTD_LEVEL : enc->zstd_level);
if (ZSTD_isError(compressed_size)) {
fprintf(stderr, "Error: Zstd compression failed for PCM8 audio\n");
free(compressed_buffer);
return 0;
}
// Write packet: [0x21][uint32 compressed_size][compressed_data]
uint8_t packet_type = TAV_PACKET_AUDIO_PCM8;
fwrite(&packet_type, 1, 1, output);
uint32_t compressed_size_32 = (uint32_t)compressed_size;
fwrite(&compressed_size_32, sizeof(uint32_t), 1, output);
fwrite(compressed_buffer, 1, compressed_size, output);
// Cleanup
free(compressed_buffer);
// Update audio remaining
enc->audio_remaining -= bytes_read;
if (enc->verbose) {
printf("PCM8 packet: %d samples, %zu bytes raw, %zu bytes compressed\n",
samples_read, pcm8_size, compressed_size);
// Debug: Show first few samples
if (samples_read > 0) {
printf(" First samples (PCM16→PCM8): ");
for (int i = 0; i < 4 && i < samples_read; i++) {
printf("[%d,%d]→[%d,%d] ",
enc->pcm16_buffer[i*2], enc->pcm16_buffer[i*2+1],
enc->pcm8_buffer[i*2], enc->pcm8_buffer[i*2+1]);
}
printf("\n");
}
}
return 1;
}
// Write PCM8 audio packet (0x21) for one frame's worth of audio
static int write_pcm8_packet(tav_encoder_t *enc, FILE *output) {
return write_pcm8_packet_samples(enc, output, enc->samples_per_frame);
}
// Process audio for current frame (copied and adapted from TEV) // Process audio for current frame (copied and adapted from TEV)
static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) { static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
// Skip if separate audio track mode is enabled // Skip if separate audio track mode is enabled
@@ -8658,6 +8825,16 @@ static int process_audio(tav_encoder_t *enc, int frame_num, FILE *output) {
return 1; return 1;
} }
// Handle PCM8 mode
if (enc->pcm8_audio) {
if (!enc->has_audio || !enc->pcm_file) {
return 1;
}
// Write one PCM8 packet per frame
return write_pcm8_packet(enc, output);
}
// Handle MP2 mode
if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) { if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0) {
return 1; return 1;
} }
@@ -8764,6 +8941,41 @@ static int process_audio_for_gop(tav_encoder_t *enc, int *frame_numbers, int num
return 1; return 1;
} }
// Handle PCM8 mode: emit mega packet(s) evenly divided if exceeding 32768 samples
if (enc->pcm8_audio) {
if (!enc->has_audio || !enc->pcm_file || num_frames == 0) {
return 1;
}
// Calculate total samples for this GOP
int total_samples = num_frames * enc->samples_per_frame;
int max_samples_per_packet = 32768; // Architectural limit
// Calculate how many packets we need
int num_packets = (total_samples + max_samples_per_packet - 1) / max_samples_per_packet;
// Divide samples evenly across packets
int samples_per_packet = total_samples / num_packets;
int remainder = total_samples % num_packets;
if (enc->verbose) {
printf("PCM8 GOP: %d frames, %d total samples, %d packets (%d samples/packet)\n",
num_frames, total_samples, num_packets, samples_per_packet);
}
// Emit evenly-divided packets
for (int i = 0; i < num_packets; i++) {
// Distribute remainder across first packets
int samples_this_packet = samples_per_packet + (i < remainder ? 1 : 0);
if (!write_pcm8_packet_samples(enc, output, samples_this_packet)) {
break; // No more audio data
}
}
return 1;
}
// Handle MP2 mode
if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0 || num_frames == 0) { if (!enc->has_audio || !enc->mp2_file || enc->audio_remaining <= 0 || num_frames == 0) {
return 1; return 1;
} }
@@ -9094,6 +9306,9 @@ static int detect_still_frame_dwt(tav_encoder_t *enc) {
// Main function // Main function
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
generate_random_filename(TEMP_AUDIO_FILE); generate_random_filename(TEMP_AUDIO_FILE);
generate_random_filename(TEMP_PCM_FILE);
// Change extension to .pcm
strcpy(TEMP_PCM_FILE + 37, ".pcm");
printf("Initialising encoder...\n"); printf("Initialising encoder...\n");
tav_encoder_t *enc = create_encoder(); tav_encoder_t *enc = create_encoder();
@@ -9148,6 +9363,7 @@ int main(int argc, char *argv[]) {
{"gop-size", required_argument, 0, 1024}, {"gop-size", required_argument, 0, 1024},
{"ezbc", no_argument, 0, 1025}, {"ezbc", no_argument, 0, 1025},
{"separate-audio-track", no_argument, 0, 1026}, {"separate-audio-track", no_argument, 0, 1026},
{"pcm8-audio", no_argument, 0, 1027},
{"help", no_argument, 0, '?'}, {"help", no_argument, 0, '?'},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@@ -9361,6 +9577,10 @@ int main(int argc, char *argv[]) {
enc->separate_audio_track = 1; enc->separate_audio_track = 1;
printf("Separate audio track mode enabled (packet 0x40)\n"); printf("Separate audio track mode enabled (packet 0x40)\n");
break; break;
case 1027: // --pcm8-audio
enc->pcm8_audio = 1;
printf("8-bit PCM audio mode enabled (packet 0x21)\n");
break;
case 'a': case 'a':
int bitrate = atoi(optarg); int bitrate = atoi(optarg);
int valid_bitrate = validate_mp2_bitrate(bitrate); int valid_bitrate = validate_mp2_bitrate(bitrate);
@@ -10095,10 +10315,18 @@ static void cleanup_encoder(tav_encoder_t *enc) {
fclose(enc->mp2_file); fclose(enc->mp2_file);
unlink(TEMP_AUDIO_FILE); unlink(TEMP_AUDIO_FILE);
} }
if (enc->pcm_file) {
fclose(enc->pcm_file);
unlink(TEMP_PCM_FILE);
}
if (enc->output_fp) { if (enc->output_fp) {
fclose(enc->output_fp); fclose(enc->output_fp);
} }
// Free PCM8 buffers
free(enc->pcm16_buffer);
free(enc->pcm8_buffer);
free(enc->input_file); free(enc->input_file);
free(enc->output_file); free(enc->output_file);
free(enc->subtitle_file); free(enc->subtitle_file);

View File

@@ -1,4 +1,4 @@
// Created by Claude on 2025-10-17 // Created by CuriousTorvald and Claude on 2025-10-17
// MPEG-style bidirectional block motion compensation for TAV encoder // MPEG-style bidirectional block motion compensation for TAV encoder
// Simplified: Single-level diamond search, variable blocks, overlaps, sub-pixel refinement // Simplified: Single-level diamond search, variable blocks, overlaps, sub-pixel refinement

View File

@@ -1,4 +1,4 @@
// Created by Claude on 2025-08-18. // Created by CuriousTorvald and Claude on 2025-08-18.
// TEV (TSVM Enhanced Video) Encoder - YCoCg-R/ICtCp 4:2:0 16x16 Block Version // TEV (TSVM Enhanced Video) Encoder - YCoCg-R/ICtCp 4:2:0 16x16 Block Version
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>

View File

@@ -1,4 +1,4 @@
// Created by Claude on 2025-08-17. // Created by CuriousTorvald and Claude on 2025-08-17.
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>

View File

@@ -1,4 +1,4 @@
// Created by Claude on 2025-08-18. // Created by CuriousTorvald and Claude on 2025-08-18.
// TEV (TSVM Enhanced Video) Encoder - XYB 4:2:0 16x16 Block Version // TEV (TSVM Enhanced Video) Encoder - XYB 4:2:0 16x16 Block Version
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>

View File

@@ -1,6 +1,6 @@
// TAV Packet Inspector - Comprehensive packet analysis tool for TAV files // TAV Packet Inspector - Comprehensive packet analysis tool for TAV files
// to compile: gcc -o tav_inspector tav_inspector.c -lzstd -lm // to compile: gcc -o tav_inspector tav_inspector.c -lzstd -lm
// Created by Claude on 2025-10-14 // Created by CuriousTorvald and Claude on 2025-10-14
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
@@ -24,6 +24,7 @@
#define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning #define TAV_PACKET_PFRAME_ADAPTIVE 0x16 // P-frame with adaptive quad-tree block partitioning
#define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction) #define TAV_PACKET_BFRAME_ADAPTIVE 0x17 // B-frame with adaptive quad-tree block partitioning (bidirectional prediction)
#define TAV_PACKET_AUDIO_MP2 0x20 #define TAV_PACKET_AUDIO_MP2 0x20
#define TAV_PACKET_AUDIO_PCM8 0x21
#define TAV_PACKET_SUBTITLE 0x30 #define TAV_PACKET_SUBTITLE 0x30
#define TAV_PACKET_SUBTITLE_KAR 0x31 #define TAV_PACKET_SUBTITLE_KAR 0x31
#define TAV_PACKET_AUDIO_TRACK 0x40 #define TAV_PACKET_AUDIO_TRACK 0x40
@@ -107,6 +108,7 @@ const char* get_packet_type_name(uint8_t type) {
case TAV_PACKET_PFRAME_ADAPTIVE: return "P-FRAME (quadtree)"; case TAV_PACKET_PFRAME_ADAPTIVE: return "P-FRAME (quadtree)";
case TAV_PACKET_BFRAME_ADAPTIVE: return "B-FRAME (quadtree)"; case TAV_PACKET_BFRAME_ADAPTIVE: return "B-FRAME (quadtree)";
case TAV_PACKET_AUDIO_MP2: return "AUDIO MP2"; case TAV_PACKET_AUDIO_MP2: return "AUDIO MP2";
case TAV_PACKET_AUDIO_PCM8: return "AUDIO PCM8 (zstd)";
case TAV_PACKET_SUBTITLE: return "SUBTITLE (Simple)"; case TAV_PACKET_SUBTITLE: return "SUBTITLE (Simple)";
case TAV_PACKET_SUBTITLE_KAR: return "SUBTITLE (Karaoke)"; case TAV_PACKET_SUBTITLE_KAR: return "SUBTITLE (Karaoke)";
case TAV_PACKET_AUDIO_TRACK: return "AUDIO TRACK (Separate MP2)"; case TAV_PACKET_AUDIO_TRACK: return "AUDIO TRACK (Separate MP2)";
@@ -631,6 +633,19 @@ int main(int argc, char *argv[]) {
break; break;
} }
case TAV_PACKET_AUDIO_PCM8: {
stats.audio_count++;
uint32_t size;
if (fread(&size, sizeof(uint32_t), 1, fp) != 1) break;
stats.total_audio_bytes += size;
if (!opts.summary_only && display) {
printf(" - size=%u bytes (zstd compressed)", size);
}
fseek(fp, size, SEEK_CUR);
break;
}
case TAV_PACKET_AUDIO_TRACK: { case TAV_PACKET_AUDIO_TRACK: {
stats.audio_count++; stats.audio_count++;
uint32_t size; uint32_t size;