first working version

This commit is contained in:
minjaesong
2025-09-15 12:56:42 +09:00
parent d446a4e2f5
commit 9f901681a6
2 changed files with 163 additions and 49 deletions

View File

@@ -4183,6 +4183,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
// Debug: check quantized values before dequantization // Debug: check quantized values before dequantization
if (tileX == 0 && tileY == 0 && frameCounter < 3) { if (tileX == 0 && tileY == 0 && frameCounter < 3) {
println("TAV Debug: Tile (0,0) frame $frameCounter - readPtr=0x${readPtr.toString(16)}")
println("TAV Debug: First 32 bytes at readPtr: ${(0 until 32).map { "0x%02x".format(vm.peek(readPtr + it).toInt() and 0xFF) }.joinToString(" ")}")
println("TAV Debug: Tile (0,0) frame $frameCounter - Quantized Y coeffs (first 64):") println("TAV Debug: Tile (0,0) frame $frameCounter - Quantized Y coeffs (first 64):")
for (i in 0 until 8) { for (i in 0 until 8) {
for (j in 0 until 8) { for (j in 0 until 8) {
@@ -4190,6 +4192,24 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
println() println()
} }
// Check how many non-zero coefficients we have
var nonZeroCount = 0
for (i in 0 until coeffCount) {
if (quantizedY[i] != 0.toShort()) nonZeroCount++
}
println("TAV Debug: Non-zero Y coefficients: $nonZeroCount out of $coeffCount")
// Show all non-zero coefficients with their positions
println("TAV Debug: All non-zero Y coefficients:")
for (i in 0 until coeffCount) {
if (quantizedY[i] != 0.toShort()) {
val row = i / 64
val col = i % 64
println(" Y[$row,$col] = ${quantizedY[i]}")
}
}
println("qY=$qY, qCo=$qCo, qCg=$qCg, rcf=$rcf") println("qY=$qY, qCo=$qCo, qCg=$qCg, rcf=$rcf")
} }
@@ -4199,22 +4219,19 @@ class GraphicsJSR223Delegate(private val vm: VM) {
cgTile[i] = quantizedCg[i] * qCg * rcf cgTile[i] = quantizedCg[i] * qCg * rcf
} }
// Debug: compare expected vs actual DC values
if (tileX == 0 && tileY == 0 && frameCounter < 3) {
val expectedDC = 195 * 5 * 1.0f // quantized_dc * qY * rcf
val actualDC = yTile[0]
println("TAV Debug: DC comparison - quantized=${quantizedY[0]}, expected_dc=$expectedDC, actual_dc=$actualDC")
println("TAV Debug: Dequantized Y[0-15]: ${yTile.sliceArray(0..15).joinToString { "%.1f".format(it) }}")
}
// Apply inverse DWT using 9/7 irreversible filter with 3 decomposition levels // Apply inverse DWT using 9/7 irreversible filter with 3 decomposition levels
applyDWTInverseMultiLevel(yTile, tileSize, tileSize, 3, 1) applyDWTInverseMultiLevel(yTile, tileSize, tileSize, 3, 1)
applyDWTInverseMultiLevel(coTile, tileSize, tileSize, 3, 1) applyDWTInverseMultiLevel(coTile, tileSize, tileSize, 3, 1)
applyDWTInverseMultiLevel(cgTile, tileSize, tileSize, 3, 1) applyDWTInverseMultiLevel(cgTile, tileSize, tileSize, 3, 1)
// DEBUG: Try replacing with reasonable test values to verify the rest of pipeline works
if (tileX == 0 && tileY == 0 && frameCounter < 3) {
println("TAV Debug: Before test override - Y[0-7]: ${yTile.sliceArray(0..7).joinToString { "%.1f".format(it) }}")
// Set reasonable test values
for (i in 0 until coeffCount) {
yTile[i] = 128.0f + (i % 32) * 2.0f // Reasonable Y values around middle gray
coTile[i] = (i % 16 - 8) * 4.0f // Small chroma values
cgTile[i] = (i % 16 - 8) * 4.0f // Small chroma values
}
println("TAV Debug: After test override - Y[0-7]: ${yTile.sliceArray(0..7).joinToString { "%.1f".format(it) }}")
}
// Debug: check if we get reasonable values after DWT // Debug: check if we get reasonable values after DWT
if (tileX == 0 && tileY == 0 && frameCounter < 3) { if (tileX == 0 && tileY == 0 && frameCounter < 3) {
@@ -4371,15 +4388,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val tileIdx = y * tileSize + x val tileIdx = y * tileSize + x
val pixelIdx = frameY * width + frameX val pixelIdx = frameY * width + frameX
// YCoCg-R to RGB conversion // YCoCg-R to RGB conversion (exact inverse of encoder)
val Y = yTile[tileIdx] val Y = yTile[tileIdx]
val Co = coTile[tileIdx] val Co = coTile[tileIdx]
val Cg = cgTile[tileIdx] val Cg = cgTile[tileIdx]
val tmp = Y - Cg // Inverse of encoder's YCoCg-R transform:
val g = Y + Cg // Forward: Co = r - b; tmp = b + Co/2; Cg = g - tmp; Y = tmp + Cg/2
val b = tmp - Co val tmp = Y - Cg / 2.0f
val r = tmp + Co val g = Cg + tmp
val b = tmp - Co / 2.0f
val r = Co + b
val rgbOffset = pixelIdx * 3L val rgbOffset = pixelIdx * 3L
vm.poke(rgbAddr + rgbOffset, r.toInt().coerceIn(0, 255).toByte()) vm.poke(rgbAddr + rgbOffset, r.toInt().coerceIn(0, 255).toByte())
@@ -4813,16 +4832,20 @@ class GraphicsJSR223Delegate(private val vm: VM) {
val currentSize = size shr level val currentSize = size shr level
if (currentSize < 2) break if (currentSize < 2) break
// Column transform (reverse order from encoder) // Apply inverse DWT to current subband region - EXACT match to encoder
// The encoder does ROW transform first, then COLUMN transform
// So inverse must do COLUMN inverse first, then ROW inverse
// Column inverse transform first
for (x in 0 until currentSize) { for (x in 0 until currentSize) {
for (y in 0 until currentSize) { for (y in 0 until currentSize) {
tempCol[y] = data[y * size + x] tempCol[y] = data[y * size + x]
} }
if (filterType == 0) { if (filterType == 0) {
applyLift53InverseVertical(tempCol, currentSize) applyDWT53Inverse1D(tempCol, currentSize)
} else { } else {
applyLift97InverseVertical(tempCol, currentSize) applyDWT97Inverse1D(tempCol, currentSize)
} }
for (y in 0 until currentSize) { for (y in 0 until currentSize) {
@@ -4830,16 +4853,16 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
} }
// Row transform (reverse order from encoder) // Row inverse transform second
for (y in 0 until currentSize) { for (y in 0 until currentSize) {
for (x in 0 until currentSize) { for (x in 0 until currentSize) {
tempRow[x] = data[y * size + x] tempRow[x] = data[y * size + x]
} }
if (filterType == 0) { if (filterType == 0) {
applyLift53InverseHorizontal(tempRow, currentSize) applyDWT53Inverse1D(tempRow, currentSize)
} else { } else {
applyLift97InverseHorizontal(tempRow, currentSize) applyDWT97Inverse1D(tempRow, currentSize)
} }
for (x in 0 until currentSize) { for (x in 0 until currentSize) {
@@ -4876,6 +4899,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
} }
private fun applyLift97InverseHorizontal(row: FloatArray, width: Int) { TODO() }
private fun applyLift97InverseVertical(col: FloatArray, height: Int) { TODO() }
// 1D lifting scheme implementations for 5/3 filter // 1D lifting scheme implementations for 5/3 filter
private fun applyLift53InverseHorizontal(data: FloatArray, length: Int) { private fun applyLift53InverseHorizontal(data: FloatArray, length: Int) {
if (length < 2) return if (length < 2) return
@@ -4925,38 +4951,35 @@ class GraphicsJSR223Delegate(private val vm: VM) {
} }
// 1D lifting scheme implementations for 9/7 irreversible filter // 1D lifting scheme implementations for 9/7 irreversible filter
private fun applyLift97InverseHorizontal(data: FloatArray, length: Int) { private fun applyDWT97Inverse1D(data: FloatArray, length: Int) {
if (length < 2) return if (length < 2) return
val temp = FloatArray(length) val temp = FloatArray(length)
val half = (length + 1) / 2 val half = length / 2
// Separate even and odd samples (inverse interleaving) // Split into low and high frequency components (matching encoder layout)
// After forward DWT: first half = low-pass, second half = high-pass
for (i in 0 until half) { for (i in 0 until half) {
temp[i] = data[2 * i] // Even samples (low-pass) temp[i] = data[i] // Low-pass coefficients (first half)
} temp[half + i] = data[half + i] // High-pass coefficients (second half)
for (i in 0 until length / 2) {
temp[half + i] = data[2 * i + 1] // Odd samples (high-pass)
} }
// 9/7 inverse lifting coefficients (must match encoder exactly) // 9/7 inverse lifting coefficients (exactly matching encoder)
val alpha = -1.586134342f // Inverse lifting coefficient val alpha = -1.586134342f
val beta = -0.052980118f // Inverse lifting coefficient (match encoder) val beta = -0.052980118f
val gamma = 0.882911076f // Inverse lifting coefficient (match encoder) val gamma = 0.882911076f
val delta = 0.443506852f // Inverse lifting coefficient (match encoder) val delta = 0.443506852f
val K = 1.230174105f // Scaling factor (match encoder) val K = 1.230174105f
val invK = 1.0f / K
// Inverse lifting steps for 9/7 filter (undo forward steps in reverse order) // Inverse lifting steps (undo forward steps in reverse order)
// Step 5: Undo scaling
// Step 5: Undo scaling (reverse of encoder's final step)
for (i in 0 until half) { for (i in 0 until half) {
temp[i] /= K // Undo temp[i] *= K temp[i] /= K // Undo temp[i] *= K
}
for (i in 0 until length / 2) {
temp[half + i] *= K // Undo temp[half + i] /= K temp[half + i] *= K // Undo temp[half + i] /= K
} }
// Step 4: Undo update step (delta) // Step 4: Undo update step (delta)
for (i in 0 until half) { for (i in 0 until half) {
val left = if (i > 0) temp[half + i - 1] else temp[half + i] val left = if (i > 0) temp[half + i - 1] else temp[half + i]
val right = if (i < half - 1) temp[half + i + 1] else temp[half + i] val right = if (i < half - 1) temp[half + i + 1] else temp[half + i]
@@ -4984,18 +5007,50 @@ class GraphicsJSR223Delegate(private val vm: VM) {
temp[half + i] -= alpha * (left + right) temp[half + i] -= alpha * (left + right)
} }
// Interleave back // Merge back (inverse of encoder's split)
for (i in 0 until half) { for (i in 0 until half) {
data[2 * i] = temp[i] data[2 * i] = temp[i] // Even positions get low-pass
} if (2 * i + 1 < length) {
for (i in 0 until length / 2) { data[2 * i + 1] = temp[half + i] // Odd positions get high-pass
data[2 * i + 1] = temp[half + i] }
} }
} }
private fun applyLift97InverseVertical(data: FloatArray, length: Int) { private fun applyDWT53Inverse1D(data: FloatArray, length: Int) {
// Same as horizontal but for vertical direction if (length < 2) return
applyLift97InverseHorizontal(data, length)
val temp = FloatArray(length)
val half = length / 2
// Split into low and high frequency components (matching encoder layout)
for (i in 0 until half) {
temp[i] = data[i] // Low-pass coefficients (first half)
temp[half + i] = data[half + i] // High-pass coefficients (second half)
}
// 5/3 inverse lifting (undo forward steps in reverse order)
// Step 2: Undo update step (1/4 coefficient)
for (i in 0 until half) {
val left = if (i > 0) temp[half + i - 1] else 0.0f
val right = if (i < half - 1) temp[half + i] else 0.0f
temp[i] -= 0.25f * (left + right)
}
// Step 1: Undo predict step (1/2 coefficient)
for (i in 0 until half) {
val left = temp[i]
val right = if (i < half - 1) temp[i + 1] else temp[i]
temp[half + i] -= 0.5f * (left + right)
}
// Merge back (inverse of encoder's split)
for (i in 0 until half) {
data[2 * i] = temp[i] // Even positions get low-pass
if (2 * i + 1 < length) {
data[2 * i + 1] = temp[half + i] // Odd positions get high-pass
}
}
} }

View File

@@ -566,10 +566,30 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
int16_t *quantized_co = malloc(tile_size * sizeof(int16_t)); int16_t *quantized_co = malloc(tile_size * sizeof(int16_t));
int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t)); int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t));
// Debug: check DWT coefficients before quantization
if (tile_x == 0 && tile_y == 0) {
printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantization (first 16): ");
for (int i = 0; i < 16; i++) {
printf("%.2f ", tile_y_data[i]);
}
printf("\n");
printf("Encoder Debug: Quantizers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n",
enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg, mv->rate_control_factor);
}
quantize_dwt_coefficients((float*)tile_y_data, quantized_y, tile_size, enc->quantizer_y, mv->rate_control_factor); quantize_dwt_coefficients((float*)tile_y_data, quantized_y, tile_size, enc->quantizer_y, mv->rate_control_factor);
quantize_dwt_coefficients((float*)tile_co_data, quantized_co, tile_size, enc->quantizer_co, mv->rate_control_factor); quantize_dwt_coefficients((float*)tile_co_data, quantized_co, tile_size, enc->quantizer_co, mv->rate_control_factor);
quantize_dwt_coefficients((float*)tile_cg_data, quantized_cg, tile_size, enc->quantizer_cg, mv->rate_control_factor); quantize_dwt_coefficients((float*)tile_cg_data, quantized_cg, tile_size, enc->quantizer_cg, mv->rate_control_factor);
// Debug: check quantized coefficients after quantization
if (tile_x == 0 && tile_y == 0) {
printf("Encoder Debug: Tile (0,0) - Quantized Y coeffs (first 16): ");
for (int i = 0; i < 16; i++) {
printf("%d ", quantized_y[i]);
}
printf("\n");
}
// Write quantized coefficients // Write quantized coefficients
memcpy(buffer + offset, quantized_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); memcpy(buffer + offset, quantized_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
memcpy(buffer + offset, quantized_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); memcpy(buffer + offset, quantized_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
@@ -626,6 +646,15 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
} }
} }
// Debug: check input data before DWT
if (tile_x == 0 && tile_y == 0) {
printf("Encoder Debug: Tile (0,0) - Y data before DWT (first 16): ");
for (int i = 0; i < 16; i++) {
printf("%.2f ", tile_y_data[i]);
}
printf("\n");
}
// Apply DWT transform to each channel // Apply DWT transform to each channel
dwt_2d_forward(tile_y_data, enc->decomp_levels, enc->wavelet_filter); dwt_2d_forward(tile_y_data, enc->decomp_levels, enc->wavelet_filter);
dwt_2d_forward(tile_co_data, enc->decomp_levels, enc->wavelet_filter); dwt_2d_forward(tile_co_data, enc->decomp_levels, enc->wavelet_filter);
@@ -981,6 +1010,17 @@ int main(int argc, char *argv[]) {
enc->quantizer_co = QUALITY_CO[enc->quality_level]; enc->quantizer_co = QUALITY_CO[enc->quality_level];
enc->quantizer_cg = QUALITY_CG[enc->quality_level]; enc->quantizer_cg = QUALITY_CG[enc->quality_level];
break; break;
case 'Q':
// Parse quantizer values Y,Co,Cg
if (sscanf(optarg, "%d,%d,%d", &enc->quantizer_y, &enc->quantizer_co, &enc->quantizer_cg) != 3) {
fprintf(stderr, "Error: Invalid quantizer format. Use Y,Co,Cg (e.g., 5,3,2)\n");
cleanup_encoder(enc);
return 1;
}
enc->quantizer_y = CLAMP(enc->quantizer_y, 1, 100);
enc->quantizer_co = CLAMP(enc->quantizer_co, 1, 100);
enc->quantizer_cg = CLAMP(enc->quantizer_cg, 1, 100);
break;
case 'w': case 'w':
enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1); enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1);
break; break;
@@ -1163,10 +1203,29 @@ int main(int argc, char *argv[]) {
// Determine frame type // Determine frame type
int is_keyframe = 1;//(frame_count % keyframe_interval == 0); int is_keyframe = 1;//(frame_count % keyframe_interval == 0);
// Debug: check RGB input data
if (frame_count < 3) {
printf("Encoder Debug: Frame %d - RGB data (first 16 bytes): ", frame_count);
for (int i = 0; i < 16; i++) {
printf("%d ", enc->current_frame_rgb[i]);
}
printf("\n");
}
// Convert RGB to YCoCg // Convert RGB to YCoCg
rgb_to_ycocg(enc->current_frame_rgb, rgb_to_ycocg(enc->current_frame_rgb,
enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg, enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg,
enc->width, enc->height); enc->width, enc->height);
// Debug: check YCoCg conversion result
if (frame_count < 3) {
printf("Encoder Debug: Frame %d - YCoCg result (first 16): ", frame_count);
for (int i = 0; i < 16; i++) {
printf("Y=%.1f Co=%.1f Cg=%.1f ", enc->current_frame_y[i], enc->current_frame_co[i], enc->current_frame_cg[i]);
if (i % 4 == 3) break; // Only show first 4 pixels for readability
}
printf("\n");
}
// Process motion vectors for P-frames // Process motion vectors for P-frames
int num_tiles = enc->tiles_x * enc->tiles_y; int num_tiles = enc->tiles_x * enc->tiles_y;