final encoder code cleanup

This commit is contained in:
minjaesong
2025-09-17 00:55:23 +09:00
parent 9ca575eee4
commit 89e8fc39ce
7 changed files with 68 additions and 115 deletions

View File

@@ -1,4 +1,4 @@
// usage: playmov moviefile.mov [/i] // usage: playmv1 moviefile.mv1 [/i]
const SND_BASE_ADDR = audio.getBaseAddr() const SND_BASE_ADDR = audio.getBaseAddr()
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i" const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i"
const WIDTH = 560 const WIDTH = 560

View File

@@ -557,7 +557,7 @@ let stopPlay = false
let akku = FRAME_TIME let akku = FRAME_TIME
let akku2 = 0.0 let akku2 = 0.0
let blockDataPtr = sys.malloc(2377764) let blockDataPtr = sys.malloc(2377744)
// Playback loop - properly adapted from TEV // Playback loop - properly adapted from TEV
try { try {
@@ -621,10 +621,8 @@ try {
header.width, header.height, header.width, header.height,
header.qualityY, header.qualityCo, header.qualityCg, header.qualityY, header.qualityCo, header.qualityCg,
frameCount, frameCount,
debugMotionVectors,
header.waveletFilter, // TAV-specific parameter header.waveletFilter, // TAV-specific parameter
header.decompLevels, // TAV-specific parameter header.decompLevels, // TAV-specific parameter
enableDeblocking,
isLossless, isLossless,
header.version // TAV version for colour space detection header.version // TAV version for colour space detection
) )

View File

@@ -28,11 +28,11 @@ const COL_HL_EXT = {
"pcm": 32, "pcm": 32,
"mp3": 33, "mp3": 33,
"mp2": 34, "mp2": 34,
"mov": 213, "mv1": 213,
"mv2": 214, "mv2": 213,
"mv3": 214, "mv3": 213,
"ipf1": 190, "ipf1": 190,
"ipf2": 191, "ipf2": 190,
"txt": 223, "txt": 223,
"md": 223, "md": 223,
"log": 223 "log": 223
@@ -43,9 +43,9 @@ const EXEC_FUNS = {
"adpcm": (f) => _G.shell.execute(`playwav "${f}" -i`), "adpcm": (f) => _G.shell.execute(`playwav "${f}" -i`),
"mp3": (f) => _G.shell.execute(`playmp3 "${f}" -i`), "mp3": (f) => _G.shell.execute(`playmp3 "${f}" -i`),
"mp2": (f) => _G.shell.execute(`playmp2 "${f}" -i`), "mp2": (f) => _G.shell.execute(`playmp2 "${f}" -i`),
"mov": (f) => _G.shell.execute(`playmov "${f}" -i`), "mv1": (f) => _G.shell.execute(`playmv1 "${f}" -i`),
"mv2": (f) => _G.shell.execute(`playtev "${f}" -i`), "mv2": (f) => _G.shell.execute(`playtev "${f}" -i`),
"mv3": (f) => _G.shell.execute(`playtev "${f}" -i`), "mv3": (f) => _G.shell.execute(`playtav "${f}" -i`),
"pcm": (f) => _G.shell.execute(`playpcm "${f}" -i`), "pcm": (f) => _G.shell.execute(`playpcm "${f}" -i`),
"ipf1": (f) => _G.shell.execute(`decodeipf "${f}" -i`), "ipf1": (f) => _G.shell.execute(`decodeipf "${f}" -i`),
"ipf2": (f) => _G.shell.execute(`decodeipf "${f}" -i`), "ipf2": (f) => _G.shell.execute(`decodeipf "${f}" -i`),

View File

@@ -854,16 +854,16 @@ transmission capability, and region-of-interest coding.
uint32 Compressed Size uint32 Compressed Size
* Zstd-compressed Block Data * Zstd-compressed Block Data
## Block Data (per 112x112 tile) ## Block Data (per 280x224 tile)
uint8 Mode: encoding mode uint8 Mode: encoding mode
0x00 = SKIP (copy from previous frame) 0x00 = SKIP (copy from previous frame)
0x01 = INTRA (DWT-coded, no prediction) 0x01 = INTRA (DWT-coded, no prediction)
0x02 = INTER (DWT-coded with motion compensation) 0x02 = INTER (DWT-coded with motion compensation)
0x03 = MOTION (motion vector only, no residual) 0x03 = MOTION (motion vector only, no residual)
int16 Motion Vector X (1/4 pixel precision) uint8 Quantiser override Y (use 0 to disable overriding)
int16 Motion Vector Y (1/4 pixel precision) uint8 Quantiser override Co (use 0 to disable overriding)
float32 Rate Control Factor (4 bytes, little-endian) uint8 Quantiser override Cg (use 0 to disable overriding)
## DWT Coefficient Structure (per tile) ## DWT Coefficient Structure (per tile)
For each decomposition level L (from highest to lowest): For each decomposition level L (from highest to lowest):
uint16 LL_size: size of LL subband coefficients uint16 LL_size: size of LL subband coefficients
@@ -886,14 +886,6 @@ transmission capability, and region-of-interest coding.
* Analysis: Daubechies 9/7 coefficients optimized for image compression * Analysis: Daubechies 9/7 coefficients optimized for image compression
* Provides better energy compaction than 5/3 but lossy reconstruction * Provides better energy compaction than 5/3 but lossy reconstruction
### Decomposition Levels
- Level 1: 112x112 → 56x56 (LL) + 3×56x56 subbands (LH,HL,HH)
- Level 2: 56x56 → 28x28 (LL) + 3×28x28 subbands
- Level 3: 28x28 → 14x14 (LL) + 3×14x14 subbands
- Level 4: 14x14 → 7x7 (LL) + 3×7x7 subbands
- Level 5: 7x7 → 3x3 (LL) + 3×3x3 subbands
- Level 6: 3x3 → 1x1 (LL) + 3×1x1 subbands (maximum)
### Quantization Strategy ### Quantization Strategy
TAV uses different quantization steps for each subband based on human visual TAV uses different quantization steps for each subband based on human visual
system sensitivity: system sensitivity:
@@ -901,21 +893,6 @@ system sensitivity:
- LH/HL subbands: Medium quantization (diagonal details less critical) - LH/HL subbands: Medium quantization (diagonal details less critical)
- HH subbands: Coarse quantization (high frequency noise can be discarded) - HH subbands: Coarse quantization (high frequency noise can be discarded)
### Progressive Transmission
When enabled, coefficients are transmitted in order of visual importance:
1. LL subband of highest decomposition level (thumbnail)
2. Lower frequency subbands first
3. Higher frequency subbands for refinement
## Motion Compensation
- Search range: ±28 pixels (optimized for 112x112 tiles)
- Sub-pixel precision: 1/4 pixel with bilinear interpolation
- Tile size: 112x112 pixels (perfect fit for TSVM 560x448 resolution)
* Exactly 5×4 = 20 tiles per frame (560÷112 = 5, 448÷112 = 4)
* No partial tiles needed - optimal for processing efficiency
- Uses Sum of Absolute Differences (SAD) for motion estimation
- Overlapped block motion compensation (OBMC) for smooth boundaries
## Colour Space ## Colour Space
TAV operates in YCoCg-R colour space with full resolution channels: TAV operates in YCoCg-R colour space with full resolution channels:
- Y: Luma channel (full resolution, fine quantization) - Y: Luma channel (full resolution, fine quantization)
@@ -923,12 +900,10 @@ TAV operates in YCoCg-R colour space with full resolution channels:
- Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default) - Cg: Green-Magenta chroma (full resolution, very aggressive quantization by default)
## Compression Features ## Compression Features
- 112x112 DWT tiles vs 16x16 DCT blocks in TEV - 280x224 DWT tiles vs 16x16 DCT blocks in TEV
- Multi-resolution representation enables scalable decoding - Multi-resolution representation enables scalable decoding
- Better frequency localization than DCT - Better frequency localization than DCT
- Reduced blocking artifacts due to overlapping basis functions - Reduced blocking artifacts due to overlapping basis functions
- Region-of-Interest (ROI) coding for selective quality enhancement
- Progressive transmission for bandwidth adaptation
## Performance Comparison ## Performance Comparison
Expected improvements over TEV: Expected improvements over TEV:

View File

@@ -3816,10 +3816,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// DWT-based video codec with ICtCp colour space support // DWT-based video codec with ICtCp colour space support
fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, fun tavDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, frameCounter: Int, width: Int, height: Int, qYGlobal: Int, qCoGlobal: Int, qCgGlobal: Int, frameCounter: Int,
debugMotionVectors: Boolean = false, waveletFilter: Int = 1, waveletFilter: Int = 1, decompLevels: Int = 6, isLossless: Boolean = false, tavVersion: Int = 1) {
decompLevels: Int = 6, enableDeblocking: Boolean = true,
isLossless: Boolean = false, tavVersion: Int = 1) {
var readPtr = blockDataPtr var readPtr = blockDataPtr
@@ -3832,14 +3830,10 @@ class GraphicsJSR223Delegate(private val vm: VM) {
for (tileX in 0 until tilesX) { for (tileX in 0 until tilesX) {
// Read tile header (9 bytes: mode + mvX + mvY + rcf) // Read tile header (9 bytes: mode + mvX + mvY + rcf)
val mode = vm.peek(readPtr).toInt() and 0xFF val mode = vm.peek(readPtr++).toUint()
readPtr += 1 val qY = vm.peek(readPtr++).toUint().let { if (it == 0) qYGlobal else it }
val mvX = vm.peekShort(readPtr).toInt() val qCo = vm.peek(readPtr++).toUint().let { if (it == 0) qCoGlobal else it }
readPtr += 2 val qCg = vm.peek(readPtr++).toUint().let { if (it == 0) qCgGlobal else it }
val mvY = vm.peekShort(readPtr).toInt()
readPtr += 2
val rcf = vm.peekFloat(readPtr)
readPtr += 4
// debug print: raw decompressed bytes // debug print: raw decompressed bytes
/*print("TAV Decode raw bytes (Frame $frameCounter, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ") /*print("TAV Decode raw bytes (Frame $frameCounter, mode: ${arrayOf("SKIP", "INTRA", "DELTA")[mode]}): ")
@@ -3856,13 +3850,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
0x01 -> { // TAV_MODE_INTRA 0x01 -> { // TAV_MODE_INTRA
// Decode DWT coefficients directly to RGB buffer // Decode DWT coefficients directly to RGB buffer
readPtr = tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr, readPtr = tavDecodeDWTIntraTileRGB(readPtr, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg, rcf, width, height, qY, qCo, qCg,
waveletFilter, decompLevels, isLossless, tavVersion) waveletFilter, decompLevels, isLossless, tavVersion)
} }
0x02 -> { // TAV_MODE_DELTA 0x02 -> { // TAV_MODE_DELTA
// Coefficient delta encoding for efficient P-frames // Coefficient delta encoding for efficient P-frames
readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr, readPtr = tavDecodeDeltaTileRGB(readPtr, tileX, tileY, currentRGBAddr,
width, height, qY, qCo, qCg, rcf, width, height, qY, qCo, qCg,
waveletFilter, decompLevels, isLossless, tavVersion) waveletFilter, decompLevels, isLossless, tavVersion)
} }
} }
@@ -3875,7 +3869,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
private fun tavDecodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, private fun tavDecodeDWTIntraTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
// Now reading padded coefficient tiles (344x288) instead of core tiles (280x224) // Now reading padded coefficient tiles (344x288) instead of core tiles (280x224)
val paddedCoeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y val paddedCoeffCount = PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y
@@ -3914,9 +3908,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val cgPaddedTile = FloatArray(paddedCoeffCount) val cgPaddedTile = FloatArray(paddedCoeffCount)
for (i in 0 until paddedCoeffCount) { for (i in 0 until paddedCoeffCount) {
yPaddedTile[i] = quantisedY[i] * qY * rcf yPaddedTile[i] = quantisedY[i] * qY.toFloat()
coPaddedTile[i] = quantisedCo[i] * qCo * rcf coPaddedTile[i] = quantisedCo[i] * qCo.toFloat()
cgPaddedTile[i] = quantisedCg[i] * qCg * rcf cgPaddedTile[i] = quantisedCg[i] * qCg.toFloat()
} }
// Store coefficients for future delta reference (for P-frames) // Store coefficients for future delta reference (for P-frames)
@@ -4150,7 +4144,7 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long, private fun tavDecodeDeltaTileRGB(readPtr: Long, tileX: Int, tileY: Int, currentRGBAddr: Long,
width: Int, height: Int, qY: Int, qCo: Int, qCg: Int, rcf: Float, width: Int, height: Int, qY: Int, qCo: Int, qCg: Int,
waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long { waveletFilter: Int, decompLevels: Int, isLossless: Boolean, tavVersion: Int): Long {
val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX val tileIdx = tileY * ((width + TILE_SIZE_X - 1) / TILE_SIZE_X) + tileX
@@ -4189,9 +4183,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val currentCg = FloatArray(coeffCount) val currentCg = FloatArray(coeffCount)
for (i in 0 until coeffCount) { for (i in 0 until coeffCount) {
currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY * rcf) currentY[i] = prevY[i] + (deltaY[i].toFloat() * qY)
currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo * rcf) currentCo[i] = prevCo[i] + (deltaCo[i].toFloat() * qCo)
currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg * rcf) currentCg[i] = prevCg[i] + (deltaCg[i].toFloat() * qCg)
} }
// Store current coefficients as previous for next frame // Store current coefficients as previous for next frame

View File

@@ -491,8 +491,8 @@ vec4 grading(vec4 col0, vec4 args) {
return pow(rgb, power); return pow(rgb, power);
} }
const vec4 gradLow = vec4(0.05, 0.05, 0.05, 0.8); const vec4 gradLow = vec4(0.02, 0.02, 0.02, 1.0);
const vec4 gradHigh = vec4(0.2, 0.2, 0.2, 1.0); const vec4 gradHigh = vec4(0.12, 0.12, 0.12, 1.0);
const float SQRT_2 = 1.4142135623730950488; const float SQRT_2 = 1.4142135623730950488;
vec4 getRadialGrad(vec2 uv0) { vec4 getRadialGrad(vec2 uv0) {

View File

@@ -125,12 +125,6 @@ typedef struct {
int tile_x, tile_y; int tile_x, tile_y;
} dwt_tile_t; } dwt_tile_t;
// Motion vector structure
typedef struct {
int16_t mv_x, mv_y; // 1/4 pixel precision
float rate_control_factor;
} motion_vector_t;
// TAV encoder structure // TAV encoder structure
typedef struct { typedef struct {
// Input/output files // Input/output files
@@ -179,8 +173,7 @@ typedef struct {
// Tile processing // Tile processing
int tiles_x, tiles_y; int tiles_x, tiles_y;
dwt_tile_t *tiles; dwt_tile_t *tiles;
motion_vector_t *motion_vectors;
// Audio processing (expanded from TEV) // Audio processing (expanded from TEV)
size_t audio_remaining; size_t audio_remaining;
uint8_t *mp2_buffer; uint8_t *mp2_buffer;
@@ -260,7 +253,7 @@ static void show_usage(const char *program_name) {
printf(" -v, --verbose Verbose output\n"); printf(" -v, --verbose Verbose output\n");
printf(" -t, --test Test mode: generate solid colour frames\n"); printf(" -t, --test Test mode: generate solid colour frames\n");
printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n"); printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n");
printf(" --intra-only Disable delta encoding (improves quality but larger file)\n"); printf(" --delta-code Enable delta encoding (improved compression but noisy picture)\n");
printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n"); printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
printf(" --help Show this help\n\n"); printf(" --help Show this help\n\n");
@@ -269,21 +262,21 @@ static void show_usage(const char *program_name) {
printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]); printf("%d: %d kbps\t", i, MP2_RATE_TABLE[i]);
} }
printf("\n\nQuantiser Value by Quality:\n"); printf("\n\nQuantiser Value by Quality:\n");
printf(" Y (Luma): "); printf(" Y (Luma): ");
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
printf("%d: Q%d ", i, QUALITY_Y[i]); printf("%d: Q %d \t", i, QUALITY_Y[i]);
} }
printf("\n Co (Chroma): "); printf("\n Co (Chroma): ");
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
printf("%d: Q%d ", i, QUALITY_CO[i]); printf("%d: Q %d \t", i, QUALITY_CO[i]);
} }
printf("\n Cg (Chroma): "); printf("\n Cg (Chroma): ");
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
printf("%d: Q%d ", i, QUALITY_CG[i]); printf("%d: Q %d \t", i, QUALITY_CG[i]);
} }
printf("\n\nFeatures:\n"); printf("\n\nFeatures:\n");
printf(" - 112x112 DWT tiles with multi-resolution encoding\n"); printf(" - 280x224 DWT tiles with multi-resolution encoding\n");
printf(" - Full resolution YCoCg-R/ICtCp colour space\n"); printf(" - Full resolution YCoCg-R/ICtCp colour space\n");
printf(" - Lossless and lossy compression modes\n"); printf(" - Lossless and lossy compression modes\n");
@@ -310,6 +303,7 @@ static tav_encoder_t* create_encoder(void) {
enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY]; enc->quantiser_y = QUALITY_Y[DEFAULT_QUALITY];
enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY]; enc->quantiser_co = QUALITY_CO[DEFAULT_QUALITY];
enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY]; enc->quantiser_cg = QUALITY_CG[DEFAULT_QUALITY];
enc->intra_only = 1;
return enc; return enc;
} }
@@ -336,15 +330,7 @@ static int initialize_encoder(tav_encoder_t *enc) {
// Allocate tile structures // Allocate tile structures
enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t)); enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t));
enc->motion_vectors = malloc(num_tiles * sizeof(motion_vector_t));
// Initialize motion vectors
for (int i = 0; i < num_tiles; i++) {
enc->motion_vectors[i].mv_x = 0;
enc->motion_vectors[i].mv_y = 0;
enc->motion_vectors[i].rate_control_factor = 1.0f; // Initialize to 1.0f
}
// Initialize ZSTD compression // Initialize ZSTD compression
enc->zstd_ctx = ZSTD_createCCtx(); enc->zstd_ctx = ZSTD_createCCtx();
enc->compressed_buffer_size = ZSTD_compressBound(1024 * 1024); // 1MB max enc->compressed_buffer_size = ZSTD_compressBound(1024 * 1024); // 1MB max
@@ -366,7 +352,7 @@ static int initialize_encoder(tav_encoder_t *enc) {
if (!enc->current_frame_rgb || !enc->previous_frame_rgb || if (!enc->current_frame_rgb || !enc->previous_frame_rgb ||
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg || !enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg ||
!enc->previous_frame_y || !enc->previous_frame_co || !enc->previous_frame_cg || !enc->previous_frame_y || !enc->previous_frame_co || !enc->previous_frame_cg ||
!enc->tiles || !enc->motion_vectors || !enc->zstd_ctx || !enc->compressed_buffer || !enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer ||
!enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg || !enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg ||
!enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) { !enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) {
return -1; return -1;
@@ -622,8 +608,8 @@ static void dwt_2d_forward_padded(float *tile_data, int levels, int filter_type)
// Quantisation for DWT subbands with rate control // Quantisation for DWT subbands with rate control
static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser, float rcf) { static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int size, int quantiser) {
float effective_q = quantiser * rcf; float effective_q = quantiser;
effective_q = FCLAMP(effective_q, 1.0f, 255.0f); effective_q = FCLAMP(effective_q, 1.0f, 255.0f);
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
@@ -635,15 +621,17 @@ static void quantise_dwt_coefficients(float *coeffs, int16_t *quantised, int siz
// Serialize tile data for compression // Serialize tile data for compression
static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
const float *tile_y_data, const float *tile_co_data, const float *tile_cg_data, const float *tile_y_data, const float *tile_co_data, const float *tile_cg_data,
const motion_vector_t *mv, uint8_t mode, uint8_t *buffer) { uint8_t mode, uint8_t *buffer) {
size_t offset = 0; size_t offset = 0;
// Write tile header // Write tile header
buffer[offset++] = mode; buffer[offset++] = mode;
memcpy(buffer + offset, &mv->mv_x, sizeof(int16_t)); offset += sizeof(int16_t);
memcpy(buffer + offset, &mv->mv_y, sizeof(int16_t)); offset += sizeof(int16_t); // TODO calculate frame complexity and create quantiser overrides
memcpy(buffer + offset, &mv->rate_control_factor, sizeof(float)); offset += sizeof(float); buffer[offset++] = 0; // qY override
buffer[offset++] = 0; // qCo override
buffer[offset++] = 0; // qCg override
if (mode == TAV_MODE_SKIP) { if (mode == TAV_MODE_SKIP) {
// No coefficient data for SKIP/MOTION modes // No coefficient data for SKIP/MOTION modes
return offset; return offset;
@@ -664,14 +652,14 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
} }
printf("\n"); printf("\n");
printf("Encoder Debug: Quantisers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n", printf("Encoder Debug: Quantisers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n",
enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg, mv->rate_control_factor); enc->quantiser_y, enc->quantiser_co, enc->quantiser_cg);
}*/ }*/
if (mode == TAV_MODE_INTRA) { if (mode == TAV_MODE_INTRA) {
// INTRA mode: quantise coefficients directly and store for future reference // INTRA mode: quantise coefficients directly and store for future reference
quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor); quantise_dwt_coefficients((float*)tile_y_data, quantised_y, tile_size, enc->quantiser_y);
quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor); quantise_dwt_coefficients((float*)tile_co_data, quantised_co, tile_size, enc->quantiser_co);
quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor); quantise_dwt_coefficients((float*)tile_cg_data, quantised_cg, tile_size, enc->quantiser_cg);
// Store current coefficients for future delta reference // Store current coefficients for future delta reference
int tile_idx = tile_y * enc->tiles_x + tile_x; int tile_idx = tile_y * enc->tiles_x + tile_x;
@@ -701,15 +689,15 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
} }
// Quantise the deltas // Quantise the deltas
quantise_dwt_coefficients(delta_y, quantised_y, tile_size, enc->quantiser_y, mv->rate_control_factor); quantise_dwt_coefficients(delta_y, quantised_y, tile_size, enc->quantiser_y);
quantise_dwt_coefficients(delta_co, quantised_co, tile_size, enc->quantiser_co, mv->rate_control_factor); quantise_dwt_coefficients(delta_co, quantised_co, tile_size, enc->quantiser_co);
quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, enc->quantiser_cg, mv->rate_control_factor); quantise_dwt_coefficients(delta_cg, quantised_cg, tile_size, enc->quantiser_cg);
// Reconstruct coefficients like decoder will (previous + dequantised_delta) // Reconstruct coefficients like decoder will (previous + dequantised_delta)
for (int i = 0; i < tile_size; i++) { for (int i = 0; i < tile_size; i++) {
float dequant_delta_y = (float)quantised_y[i] * enc->quantiser_y * mv->rate_control_factor; float dequant_delta_y = (float)quantised_y[i] * enc->quantiser_y;
float dequant_delta_co = (float)quantised_co[i] * enc->quantiser_co * mv->rate_control_factor; float dequant_delta_co = (float)quantised_co[i] * enc->quantiser_co;
float dequant_delta_cg = (float)quantised_cg[i] * enc->quantiser_cg * mv->rate_control_factor; float dequant_delta_cg = (float)quantised_cg[i] * enc->quantiser_cg;
prev_y[i] = prev_y[i] + dequant_delta_y; prev_y[i] = prev_y[i] + dequant_delta_y;
prev_co[i] = prev_co[i] + dequant_delta_co; prev_co[i] = prev_co[i] + dequant_delta_co;
@@ -743,7 +731,7 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
// Compress and write frame data // Compress and write frame data
static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) { static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) {
// Calculate total uncompressed size (for padded tile coefficients: 344x288) // Calculate total uncompressed size (for padded tile coefficients: 344x288)
const size_t max_tile_size = 9 + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y * 3 * sizeof(int16_t)); // header + 3 channels of coefficients const size_t max_tile_size = 4 + (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y * 3 * sizeof(int16_t)); // header + 3 channels of coefficients
const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size; const size_t total_uncompressed_size = enc->tiles_x * enc->tiles_y * max_tile_size;
// Allocate buffer for uncompressed tile data // Allocate buffer for uncompressed tile data
@@ -789,8 +777,7 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
// Serialize tile // Serialize tile
size_t tile_size = serialize_tile_data(enc, tile_x, tile_y, size_t tile_size = serialize_tile_data(enc, tile_x, tile_y,
tile_y_data, tile_co_data, tile_cg_data, tile_y_data, tile_co_data, tile_cg_data,
&enc->motion_vectors[tile_idx], mode, mode, uncompressed_buffer + uncompressed_offset);
uncompressed_buffer + uncompressed_offset);
uncompressed_offset += tile_size; uncompressed_offset += tile_size;
} }
} }
@@ -1781,7 +1768,7 @@ int main(int argc, char *argv[]) {
{"lossless", no_argument, 0, 1000}, {"lossless", no_argument, 0, 1000},
// {"enable-progressive", no_argument, 0, 1002}, // {"enable-progressive", no_argument, 0, 1002},
// {"enable-roi", no_argument, 0, 1003}, // {"enable-roi", no_argument, 0, 1003},
{"intra-only", no_argument, 0, 1006}, {"delta-code", no_argument, 0, 1006},
{"ictcp", no_argument, 0, 1005}, {"ictcp", no_argument, 0, 1005},
{"help", no_argument, 0, 1004}, {"help", no_argument, 0, 1004},
{0, 0, 0, 0} {0, 0, 0, 0}
@@ -1809,9 +1796,9 @@ int main(int argc, char *argv[]) {
cleanup_encoder(enc); cleanup_encoder(enc);
return 1; return 1;
} }
enc->quantiser_y = CLAMP(enc->quantiser_y, 1, 100); enc->quantiser_y = CLAMP(enc->quantiser_y, 1, 255);
enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 100); enc->quantiser_co = CLAMP(enc->quantiser_co, 1, 255);
enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 100); enc->quantiser_cg = CLAMP(enc->quantiser_cg, 1, 255);
break; break;
/*case 'w': /*case 'w':
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1); enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1);
@@ -1845,7 +1832,7 @@ int main(int argc, char *argv[]) {
enc->ictcp_mode = 1; enc->ictcp_mode = 1;
break; break;
case 1006: // --intra-only case 1006: // --intra-only
enc->intra_only = 1; enc->intra_only = 0;
break; break;
case 1004: // --help case 1004: // --help
show_usage(argv[0]); show_usage(argv[0]);
@@ -2185,7 +2172,6 @@ static void cleanup_encoder(tav_encoder_t *enc) {
free(enc->previous_frame_co); free(enc->previous_frame_co);
free(enc->previous_frame_cg); free(enc->previous_frame_cg);
free(enc->tiles); free(enc->tiles);
free(enc->motion_vectors);
free(enc->compressed_buffer); free(enc->compressed_buffer);
free(enc->mp2_buffer); free(enc->mp2_buffer);