final encoder code cleanup

This commit is contained in:
minjaesong
2025-09-17 00:55:23 +09:00
parent 9ca575eee4
commit 89e8fc39ce
7 changed files with 68 additions and 115 deletions

View File

@@ -1,4 +1,4 @@
// usage: playmov moviefile.mov [/i]
// usage: playmv1 moviefile.mv1 [/i]
const SND_BASE_ADDR = audio.getBaseAddr()
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i"
const WIDTH = 560

View File

@@ -557,7 +557,7 @@ let stopPlay = false
let akku = FRAME_TIME
let akku2 = 0.0
let blockDataPtr = sys.malloc(2377764)
let blockDataPtr = sys.malloc(2377744)
// Playback loop - properly adapted from TEV
try {
@@ -621,10 +621,8 @@ try {
header.width, header.height,
header.qualityY, header.qualityCo, header.qualityCg,
frameCount,
debugMotionVectors,
header.waveletFilter, // TAV-specific parameter
header.decompLevels, // TAV-specific parameter
enableDeblocking,
isLossless,
header.version // TAV version for colour space detection
)

View File

@@ -28,11 +28,11 @@ const COL_HL_EXT = {
"pcm": 32,
"mp3": 33,
"mp2": 34,
"mov": 213,
"mv2": 214,
"mv3": 214,
"mv1": 213,
"mv2": 213,
"mv3": 213,
"ipf1": 190,
"ipf2": 191,
"ipf2": 190,
"txt": 223,
"md": 223,
"log": 223
@@ -43,9 +43,9 @@ const EXEC_FUNS = {
"adpcm": (f) => _G.shell.execute(`playwav "${f}" -i`),
"mp3": (f) => _G.shell.execute(`playmp3 "${f}" -i`),
"mp2": (f) => _G.shell.execute(`playmp2 "${f}" -i`),
"mov": (f) => _G.shell.execute(`playmov "${f}" -i`),
"mv1": (f) => _G.shell.execute(`playmv1 "${f}" -i`),
"mv2": (f) => _G.shell.execute(`playtev "${f}" -i`),
"mv3": (f) => _G.shell.execute(`playtev "${f}" -i`),
"mv3": (f) => _G.shell.execute(`playtav "${f}" -i`),
"pcm": (f) => _G.shell.execute(`playpcm "${f}" -i`),
"ipf1": (f) => _G.shell.execute(`decodeipf "${f}" -i`),
"ipf2": (f) => _G.shell.execute(`decodeipf "${f}" -i`),

View File

@@ -854,16 +854,16 @@ transmission capability, and region-of-interest coding.
uint32 Compressed Size
* Zstd-compressed Block Data
## Block Data (per 112x112 tile)
## Block Data (per 280x224 tile)
uint8 Mode: encoding mode
0x00 = SKIP (copy from previous frame)
0x01 = INTRA (DWT-coded, no prediction)
0x02 = INTER (DWT-coded with motion compensation)
0x03 = MOTION (motion vector only, no residual)
int16 Motion Vector X (1/4 pixel precision)
int16 Motion Vector Y (1/4 pixel precision)
float32 Rate Control Factor (4 bytes, little-endian)
uint8 Quantiser override Y (use 0 to disable overriding)
uint8 Quantiser override Co (use 0 to disable overriding)
uint8 Quantiser override Cg (use 0 to disable overriding)
## DWT Coefficient Structure (per tile)
For each decomposition level L (from highest to lowest):
uint16 LL_size: size of LL subband coefficients
@@ -886,14 +886,6 @@ transmission capability, and region-of-interest coding.
* Analysis: Daubechies 9/7 coefficients optimized for image compression
* Provides better energy compaction than 5/3 but lossy reconstruction
### Decomposition Levels
- Level 1: 112x112 → 56x56 (LL) + 3×56x56 subbands (LH,HL,HH)
- Level 2: 56x56 → 28x28 (LL) + 3×28x28 subbands
- Level 3: 28x28 → 14x14 (LL) + 3×14x14 subbands
- Level 4: 14x14 → 7x7 (LL) + 3×7x7 subbands
- Level 5: 7x7 → 3x3 (LL) + 3×3x3 subbands
- Level 6: 3x3 → 1x1 (LL) + 3×1x1 subbands (maximum)
### Quantization Strategy
TAV uses different quantization steps for each subband based on human visual
system sensitivity:
@@ -901,21 +893,6 @@ system sensitivity:
- LH/HL subbands: Medium quantization (diagonal details less critical)
- HH subbands: Coarse quantization (high frequency noise can be discarded)
### Progressive Transmission
When enabled, coefficients are transmitted in order of visual importance:
1. LL subband of highest decomposition level (thumbnail)
2. Lower frequency subbands first
3. Higher frequency subbands for refinement
## Motion Compensation
- Search range: ±28 pixels (optimized for 112x112 tiles)
- Sub-pixel precision: 1/4 pixel with bilinear interpolation
- Tile size: 112x112 pixels (perfect fit for TSVM 560x448 resolution)
* Exactly 5×4 = 20 tiles per frame (560÷112 = 5, 448÷112 = 4)
* No partial tiles needed - optimal for processing efficiency
- Uses Sum of Absolute Differences (SAD) for motion estimation
- Overlapped block motion compensation (OBMC) for smooth boundaries
## Colour Space
TAV operates in YCoCg-R colour space with full resolution channels:
- Y: Luma channel (full resolution, fine quantization)
@@ -923,12 +900,10 @@ TAV operates in YCoCg-R colour space with full resolution channels:
- Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default)
## Compression Features
- 112x112 DWT tiles vs 16x16 DCT blocks in TEV
- 280x224 DWT tiles vs 16x16 DCT blocks in TEV
- Multi-resolution representation enables scalable decoding
- Better frequency localization than DCT
- Reduced blocking artifacts due to overlapping basis functions
- Region-of-Interest (ROI) coding for selective quality enhancement
- Progressive transmission for bandwidth adaptation
## Performance Comparison
Expected improvements over TEV:

View File

@@ -3816,10 +3816,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// DWT-based video codec with ICtCp colour space support
fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int,
debugMotionVectors: Boolean = false, waveletFilter: Int = 1,
decompLevels: Int = 6, enableDeblocking: Boolean = true,
isLossless: Boolean = false, tavVersion: Int = 1) {
width: Int, height: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, frameCounter: Int,
waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) {
var readPtr = blockDataPtr
@@ -3832,14 +3830,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
for (tileX in 0 until tilesX) {
// Read tile header (9 bytes: mode + mvX + mvY + rcf)
val mode = vm.peek(readPtr).toInt() and 0xFF
readPtr += 1
val mvX = vm.peekShort(readPtr).toInt()
readPtr += 2
val mvY = vm.peekShort(readPtr).toInt()
readPtr += 2
val rcf = vm.peekFloat(readPtr)
readPtr += 4
val mode = vm.peek(readPtr++).toUint()
val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else it }
val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else it }
val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else it }
// debug print: raw decompressed bytes
/*print("TAV Decode raw bytes (Frame $frameCounter, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ")
@@ -3856,13 +3850,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
0x01 -> { // TAV_MODE_INTRA
// Decode DWT coefficients directly to RGB buffer
readPtr = tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg, rcf,
width, height, qY, qCo, qCg,
waveletFilter, decompLevels, isLossless, tavVersion)
}
0x02 -> { // TAV_MODE_DELTA
// Coefficient delta encoding for efficient P-frames
readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg, rcf,
width, height, qY, qCo, qCg,
waveletFilter, decompLevels, isLossless, tavVersion)
}
}
@@ -3875,7 +3869,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
private fun tavDecodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
// Now reading padded coefficient tiles (344x288) instead of core tiles (280x224)
val paddedCoeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y
@@ -3914,9 +3908,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val cgPaddedTile = FloatArray(paddedCoeffCount)
for (i in 0 until paddedCoeffCount) {
yPaddedTile[i] = quantisedY[i] * qY * rcf
coPaddedTile[i] = quantisedCo[i] * qCo * rcf
cgPaddedTile[i] = quantisedCg[i] * qCg * rcf
yPaddedTile[i] = quantisedY[i] * qY.toFloat()
coPaddedTile[i] = quantisedCo[i] * qCo.toFloat()
cgPaddedTile[i] = quantisedCg[i] * qCg.toFloat()
}
// Store coefficients for future delta reference (for P-frames)
@@ -4150,7 +4144,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX
@@ -4189,9 +4183,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val currentCg = FloatArray(coeffCount)
for (i in 0 until coeffCount) {
currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY * rcf)
currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo * rcf)
currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg * rcf)
currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY)
currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo)
currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg)
}
// Store current coefficients as previous for next frame

View File

@@ -491,8 +491,8 @@ vec4 grading(vec4 col0, vec4 args) {
return pow(rgb, power);
}
const vec4 gradLow = vec4(0.05, 0.05, 0.05, 0.8);
const vec4 gradHigh = vec4(0.2, 0.2, 0.2, 1.0);
const vec4 gradLow = vec4(0.02, 0.02, 0.02, 1.0);
const vec4 gradHigh = vec4(0.12, 0.12, 0.12, 1.0);
const float SQRT_2 = 1.4142135623730950488;
vec4 getRadialGrad(vec2 uv0) {

View File

@@ -125,12 +125,6 @@ typedef struct {
int tile_x, tile_y;
} dwt_tile_t;
// Motion vector structure
typedef struct {
int16_t mv_x, mv_y; // 1/4 pixel precision
float rate_control_factor;
} motion_vector_t;
// TAV encoder structure
typedef struct {
// Input/output files
@@ -179,8 +173,7 @@ typedef struct {
// Tile processing
int tiles_x, tiles_y;
dwt_tile_t *tiles;
motion_vector_t *motion_vectors;
// Audio processing (expanded from TEV)
size_t audio_remaining;
uint8_t *mp2_buffer;
@@ -260,7 +253,7 @@ static void show_usage(const char *program_name) {
printf(" -v, --verbose Verbose output\n");
printf(" -t, --test Test mode: generate solid colour frames\n");
printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n");
printf(" --intra-only Disable delta encoding (improves quality but larger file)\n");
printf(" --delta-code Enable delta encoding (improved compression but noisy picture)\n");
printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
printf(" --help Show this help\n\n");
@@ -269,21 +262,21 @@ static void show_usage(const char *program_name) {
printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]);
}
printf("\n\nQuantiser Value by Quality:\n");
printf(" Y (Luma): ");
printf(" Y (Luma): ");
for (int i = 0; i < 6; i++) {
printf("%d: Q%d ", i, QUALITY_Y[i]);
printf("%d: Q %d \t", i, QUALITY_Y[i]);
}
printf("\n Co (Chroma): ");
for (int i = 0; i < 6; i++) {
printf("%d: Q%d ", i, QUALITY_CO[i]);
printf("%d: Q %d \t", i, QUALITY_CO[i]);
}
printf("\n Cg (Chroma): ");
for (int i = 0; i < 6; i++) {
printf("%d: Q%d ", i, QUALITY_CG[i]);
printf("%d: Q %d \t", i, QUALITY_CG[i]);
}
printf("\n\nFeatures:\n");
printf(" - 112x112 DWT tiles with multi-resolution encoding\n");
printf(" - 280x224 DWT tiles with multi-resolution encoding\n");
printf(" - Full resolution YCoCg-R/ICtCp colour space\n");
printf(" - Lossless and lossy compression modes\n");
@@ -310,6 +303,7 @@ static tav_encoder_t* create_encoder(void) {
enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY];
enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY];
enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY];
enc->intra_only = 1;
return enc;
}
@@ -336,15 +330,7 @@ static int initialize_encoder(tav_encoder_t *enc) {
// Allocate tile structures
enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t));
enc->motion_vectors = malloc(num_tiles * sizeof(motion_vector_t));
// Initialize motion vectors
for (int i = 0; i < num_tiles; i++) {
enc->motion_vectors[i].mv_x = 0;
enc->motion_vectors[i].mv_y = 0;
enc->motion_vectors[i].rate_control_factor = 1.0f; // Initialize to 1.0f
}
// Initialize ZSTD compression
enc->zstd_ctx = ZSTD_createCCtx();
enc->compressed_buffer_size = ZSTD_compressBound(1024 * 1024); // 1MB max
@@ -366,7 +352,7 @@ static int initialize_encoder(tav_encoder_t *enc) {
if (!enc->current_frame_rgb || !enc->previous_frame_rgb ||
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg ||
!enc->previous_frame_y || !enc->previous_frame_co || !enc->previous_frame_cg ||
!enc->tiles || !enc->motion_vectors || !enc->zstd_ctx || !enc->compressed_buffer ||
!enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer ||
!enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg ||
!enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) {
return -1;
@@ -622,8 +608,8 @@ static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type)
// Quantisation for DWT subbands with rate control
static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser, float rcf) {
float effective_q = quantiser * rcf;
static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) {
float effective_q = quantiser;
effective_q = FCLAMP(effective_q, 1.0f, 255.0f);
for (int i = 0; i < size; i++) {
@@ -635,15 +621,17 @@ static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int siz
// Serialize tile data for compression
static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
const float *tile_y_data, const float *tile_co_data, const float *tile_cg_data,
const motion_vector_t *mv, uint8_t mode, uint8_t *buffer) {
uint8_t mode, uint8_t *buffer) {
size_t offset = 0;
// Write tile header
buffer[offset++] = mode;
memcpy(buffer + offset, &mv->mv_x, sizeof(int16_t)); offset += sizeof(int16_t);
memcpy(buffer + offset, &mv->mv_y, sizeof(int16_t)); offset += sizeof(int16_t);
memcpy(buffer + offset, &mv->rate_control_factor, sizeof(float)); offset += sizeof(float);
// TODO calculate frame complexity and create quantiser overrides
buffer[offset++] = 0; // qY override
buffer[offset++] = 0; // qCo override
buffer[offset++] = 0; // qCg override
if (mode == TAV_MODE_SKIP) {
// No coefficient data for SKIP/MOTION modes
return offset;
@@ -664,14 +652,14 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
}
printf("\n");
printf("Encoder Debug: Quantisers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n",
enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg, mv->rate_control_factor);
enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg);
}*/
if (mode == TAV_MODE_INTRA) {
// INTRA mode: quantise coefficients directly and store for future reference
quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor);
quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor);
quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor);
quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, enc->quantiser_y);
quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, enc->quantiser_co);
quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, enc->quantiser_cg);
// Store current coefficients for future delta reference
int tile_idx = tile_y * enc->tiles_x + tile_x;
@@ -701,15 +689,15 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
}
// Quantise the deltas
quantise_dwt_coefficients(delta_y, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor);
quantise_dwt_coefficients(delta_co, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor);
quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor);
quantise_dwt_coefficients(delta_y, quantised_y, tile_size, enc->quantiser_y);
quantise_dwt_coefficients(delta_co, quantised_co, tile_size, enc->quantiser_co);
quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, enc->quantiser_cg);
// Reconstruct coefficients like decoder will (previous + dequantised_delta)
for (int i = 0; i < tile_size; i++) {
float dequant_delta_y = (float)quantised_y[i] * enc->quantiser_y * mv->rate_control_factor;
float dequant_delta_co = (float)quantised_co[i] * enc->quantiser_co * mv->rate_control_factor;
float dequant_delta_cg = (float)quantised_cg[i] * enc->quantiser_cg * mv->rate_control_factor;
float dequant_delta_y = (float)quantised_y[i] * enc->quantiser_y;
float dequant_delta_co = (float)quantised_co[i] * enc->quantiser_co;
float dequant_delta_cg = (float)quantised_cg[i] * enc->quantiser_cg;
prev_y[i] = prev_y[i] + dequant_delta_y;
prev_co[i] = prev_co[i] + dequant_delta_co;
@@ -743,7 +731,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
// Compress and write frame data
static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) {
// Calculate total uncompressed size (for padded tile coefficients: 344x288)
const size_t max_tile_size = 9 + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y * 3 * sizeof(int16_t)); // header + 3 channels of coefficients
const size_t max_tile_size = 4 + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y * 3 * sizeof(int16_t)); // header + 3 channels of coefficients
const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size;
// Allocate buffer for uncompressed tile data
@@ -789,8 +777,7 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
// Serialize tile
size_t tile_size = serialize_tile_data(enc, tile_x, tile_y,
tile_y_data, tile_co_data, tile_cg_data,
&enc->motion_vectors[tile_idx], mode,
uncompressed_buffer + uncompressed_offset);
mode, uncompressed_buffer + uncompressed_offset);
uncompressed_offset += tile_size;
}
}
@@ -1781,7 +1768,7 @@ int main(int argc, char *argv[]) {
{"lossless", no_argument, 0, 1000},
// {"enable-progressive", no_argument, 0, 1002},
// {"enable-roi", no_argument, 0, 1003},
{"intra-only", no_argument, 0, 1006},
{"delta-code", no_argument, 0, 1006},
{"ictcp", no_argument, 0, 1005},
{"help", no_argument, 0, 1004},
{0, 0, 0, 0}
@@ -1809,9 +1796,9 @@ int main(int argc, char *argv[]) {
cleanup_encoder(enc);
return 1;
}
enc->quantiser_y = CLAMP(enc->quantiser_y, 1, 100);
enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 100);
enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 100);
enc->quantiser_y = CLAMP(enc->quantiser_y, 1, 255);
enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 255);
enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 255);
break;
/*case 'w':
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1);
@@ -1845,7 +1832,7 @@ int main(int argc, char *argv[]) {
enc->ictcp_mode = 1;
break;
case 1006: // --intra-only
enc->intra_only = 1;
enc->intra_only = 0;
break;
case 1004: // --help
show_usage(argv[0]);
@@ -2185,7 +2172,6 @@ static void cleanup_encoder(tav_encoder_t *enc) {
free(enc->previous_frame_co);
free(enc->previous_frame_cg);
free(enc->tiles);
free(enc->motion_vectors);
free(enc->compressed_buffer);
free(enc->mp2_buffer);