From 9f901681a60a286b1bdfd9d89653364f8d540fca Mon Sep 17 00:00:00 2001 From: minjaesong Date: Mon, 15 Sep 2025 12:56:42 +0900 Subject: [PATCH] first working version --- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 153 ++++++++++++------ video_encoder/encoder_tav.c | 59 +++++++ 2 files changed, 163 insertions(+), 49 deletions(-) diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 279ed19..ad6d078 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -4183,6 +4183,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Debug: check quantized values before dequantization if (tileX == 0 && tileY == 0 && frameCounter < 3) { + println("TAV Debug: Tile (0,0) frame $frameCounter - readPtr=0x${readPtr.toString(16)}") + println("TAV Debug: First 32 bytes at readPtr: ${(0 until 32).map { "0x%02x".format(vm.peek(readPtr + it).toInt() and 0xFF) }.joinToString(" ")}") println("TAV Debug: Tile (0,0) frame $frameCounter - Quantized Y coeffs (first 64):") for (i in 0 until 8) { for (j in 0 until 8) { @@ -4190,6 +4192,24 @@ class GraphicsJSR223Delegate(private val vm: VM) { } println() } + + // Check how many non-zero coefficients we have + var nonZeroCount = 0 + for (i in 0 until coeffCount) { + if (quantizedY[i] != 0.toShort()) nonZeroCount++ + } + println("TAV Debug: Non-zero Y coefficients: $nonZeroCount out of $coeffCount") + + // Show all non-zero coefficients with their positions + println("TAV Debug: All non-zero Y coefficients:") + for (i in 0 until coeffCount) { + if (quantizedY[i] != 0.toShort()) { + val row = i / 64 + val col = i % 64 + println(" Y[$row,$col] = ${quantizedY[i]}") + } + } + println("qY=$qY, qCo=$qCo, qCg=$qCg, rcf=$rcf") } @@ -4199,22 +4219,19 @@ class GraphicsJSR223Delegate(private val vm: VM) { cgTile[i] = quantizedCg[i] * qCg * rcf } + // Debug: compare expected vs actual DC values + if (tileX == 0 && tileY == 0 && frameCounter < 3) { + val expectedDC = 195 * 5 * 1.0f // quantized_dc * qY * rcf + val actualDC = yTile[0] + println("TAV Debug: DC comparison - quantized=${quantizedY[0]}, expected_dc=$expectedDC, actual_dc=$actualDC") + println("TAV Debug: Dequantized Y[0-15]: ${yTile.sliceArray(0..15).joinToString { "%.1f".format(it) }}") + } + // Apply inverse DWT using 9/7 irreversible filter with 3 decomposition levels applyDWTInverseMultiLevel(yTile, tileSize, tileSize, 3, 1) applyDWTInverseMultiLevel(coTile, tileSize, tileSize, 3, 1) applyDWTInverseMultiLevel(cgTile, tileSize, tileSize, 3, 1) - // DEBUG: Try replacing with reasonable test values to verify the rest of pipeline works - if (tileX == 0 && tileY == 0 && frameCounter < 3) { - println("TAV Debug: Before test override - Y[0-7]: ${yTile.sliceArray(0..7).joinToString { "%.1f".format(it) }}") - // Set reasonable test values - for (i in 0 until coeffCount) { - yTile[i] = 128.0f + (i % 32) * 2.0f // Reasonable Y values around middle gray - coTile[i] = (i % 16 - 8) * 4.0f // Small chroma values - cgTile[i] = (i % 16 - 8) * 4.0f // Small chroma values - } - println("TAV Debug: After test override - Y[0-7]: ${yTile.sliceArray(0..7).joinToString { "%.1f".format(it) }}") - } // Debug: check if we get reasonable values after DWT if (tileX == 0 && tileY == 0 && frameCounter < 3) { @@ -4371,15 +4388,17 @@ class GraphicsJSR223Delegate(private val vm: VM) { val tileIdx = y * tileSize + x val pixelIdx = frameY * width + frameX - // YCoCg-R to RGB conversion + // YCoCg-R to RGB conversion (exact inverse of encoder) val Y = yTile[tileIdx] val Co = coTile[tileIdx] val Cg = cgTile[tileIdx] - val tmp = Y - Cg - val g = Y + Cg - val b = tmp - Co - val r = tmp + Co + // Inverse of encoder's YCoCg-R transform: + // Forward: Co = r - b; tmp = b + Co/2; Cg = g - tmp; Y = tmp + Cg/2 + val tmp = Y - Cg / 2.0f + val g = Cg + tmp + val b = tmp - Co / 2.0f + val r = Co + b val rgbOffset = pixelIdx * 3L vm.poke(rgbAddr + rgbOffset, r.toInt().coerceIn(0, 255).toByte()) @@ -4813,16 +4832,20 @@ class GraphicsJSR223Delegate(private val vm: VM) { val currentSize = size shr level if (currentSize < 2) break - // Column transform (reverse order from encoder) + // Apply inverse DWT to current subband region - EXACT match to encoder + // The encoder does ROW transform first, then COLUMN transform + // So inverse must do COLUMN inverse first, then ROW inverse + + // Column inverse transform first for (x in 0 until currentSize) { for (y in 0 until currentSize) { tempCol[y] = data[y * size + x] } if (filterType == 0) { - applyLift53InverseVertical(tempCol, currentSize) + applyDWT53Inverse1D(tempCol, currentSize) } else { - applyLift97InverseVertical(tempCol, currentSize) + applyDWT97Inverse1D(tempCol, currentSize) } for (y in 0 until currentSize) { @@ -4830,16 +4853,16 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } - // Row transform (reverse order from encoder) + // Row inverse transform second for (y in 0 until currentSize) { for (x in 0 until currentSize) { tempRow[x] = data[y * size + x] } if (filterType == 0) { - applyLift53InverseHorizontal(tempRow, currentSize) + applyDWT53Inverse1D(tempRow, currentSize) } else { - applyLift97InverseHorizontal(tempRow, currentSize) + applyDWT97Inverse1D(tempRow, currentSize) } for (x in 0 until currentSize) { @@ -4876,6 +4899,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + private fun applyLift97InverseHorizontal(row: FloatArray, width: Int) { TODO() } + private fun applyLift97InverseVertical(col: FloatArray, height: Int) { TODO() } + // 1D lifting scheme implementations for 5/3 filter private fun applyLift53InverseHorizontal(data: FloatArray, length: Int) { if (length < 2) return @@ -4925,38 +4951,35 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // 1D lifting scheme implementations for 9/7 irreversible filter - private fun applyLift97InverseHorizontal(data: FloatArray, length: Int) { + private fun applyDWT97Inverse1D(data: FloatArray, length: Int) { if (length < 2) return val temp = FloatArray(length) - val half = (length + 1) / 2 + val half = length / 2 - // Separate even and odd samples (inverse interleaving) + // Split into low and high frequency components (matching encoder layout) + // After forward DWT: first half = low-pass, second half = high-pass for (i in 0 until half) { - temp[i] = data[2 * i] // Even samples (low-pass) - } - for (i in 0 until length / 2) { - temp[half + i] = data[2 * i + 1] // Odd samples (high-pass) + temp[i] = data[i] // Low-pass coefficients (first half) + temp[half + i] = data[half + i] // High-pass coefficients (second half) } - // 9/7 inverse lifting coefficients (must match encoder exactly) - val alpha = -1.586134342f // Inverse lifting coefficient - val beta = -0.052980118f // Inverse lifting coefficient (match encoder) - val gamma = 0.882911076f // Inverse lifting coefficient (match encoder) - val delta = 0.443506852f // Inverse lifting coefficient (match encoder) - val K = 1.230174105f // Scaling factor (match encoder) - val invK = 1.0f / K + // 9/7 inverse lifting coefficients (exactly matching encoder) + val alpha = -1.586134342f + val beta = -0.052980118f + val gamma = 0.882911076f + val delta = 0.443506852f + val K = 1.230174105f - // Inverse lifting steps for 9/7 filter (undo forward steps in reverse order) - // Step 5: Undo scaling + // Inverse lifting steps (undo forward steps in reverse order) + + // Step 5: Undo scaling (reverse of encoder's final step) for (i in 0 until half) { temp[i] /= K // Undo temp[i] *= K - } - for (i in 0 until length / 2) { temp[half + i] *= K // Undo temp[half + i] /= K } - // Step 4: Undo update step (delta) + // Step 4: Undo update step (delta) for (i in 0 until half) { val left = if (i > 0) temp[half + i - 1] else temp[half + i] val right = if (i < half - 1) temp[half + i + 1] else temp[half + i] @@ -4984,18 +5007,50 @@ class GraphicsJSR223Delegate(private val vm: VM) { temp[half + i] -= alpha * (left + right) } - // Interleave back + // Merge back (inverse of encoder's split) for (i in 0 until half) { - data[2 * i] = temp[i] - } - for (i in 0 until length / 2) { - data[2 * i + 1] = temp[half + i] + data[2 * i] = temp[i] // Even positions get low-pass + if (2 * i + 1 < length) { + data[2 * i + 1] = temp[half + i] // Odd positions get high-pass + } } } - private fun applyLift97InverseVertical(data: FloatArray, length: Int) { - // Same as horizontal but for vertical direction - applyLift97InverseHorizontal(data, length) + private fun applyDWT53Inverse1D(data: FloatArray, length: Int) { + if (length < 2) return + + val temp = FloatArray(length) + val half = length / 2 + + // Split into low and high frequency components (matching encoder layout) + for (i in 0 until half) { + temp[i] = data[i] // Low-pass coefficients (first half) + temp[half + i] = data[half + i] // High-pass coefficients (second half) + } + + // 5/3 inverse lifting (undo forward steps in reverse order) + + // Step 2: Undo update step (1/4 coefficient) + for (i in 0 until half) { + val left = if (i > 0) temp[half + i - 1] else 0.0f + val right = if (i < half - 1) temp[half + i] else 0.0f + temp[i] -= 0.25f * (left + right) + } + + // Step 1: Undo predict step (1/2 coefficient) + for (i in 0 until half) { + val left = temp[i] + val right = if (i < half - 1) temp[i + 1] else temp[i] + temp[half + i] -= 0.5f * (left + right) + } + + // Merge back (inverse of encoder's split) + for (i in 0 until half) { + data[2 * i] = temp[i] // Even positions get low-pass + if (2 * i + 1 < length) { + data[2 * i + 1] = temp[half + i] // Odd positions get high-pass + } + } } diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index d14d6bc..2953055 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -566,10 +566,30 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y, int16_t *quantized_co = malloc(tile_size * sizeof(int16_t)); int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t)); + // Debug: check DWT coefficients before quantization + if (tile_x == 0 && tile_y == 0) { + printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantization (first 16): "); + for (int i = 0; i < 16; i++) { + printf("%.2f ", tile_y_data[i]); + } + printf("\n"); + printf("Encoder Debug: Quantizers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n", + enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg, mv->rate_control_factor); + } + quantize_dwt_coefficients((float*)tile_y_data, quantized_y, tile_size, enc->quantizer_y, mv->rate_control_factor); quantize_dwt_coefficients((float*)tile_co_data, quantized_co, tile_size, enc->quantizer_co, mv->rate_control_factor); quantize_dwt_coefficients((float*)tile_cg_data, quantized_cg, tile_size, enc->quantizer_cg, mv->rate_control_factor); + // Debug: check quantized coefficients after quantization + if (tile_x == 0 && tile_y == 0) { + printf("Encoder Debug: Tile (0,0) - Quantized Y coeffs (first 16): "); + for (int i = 0; i < 16; i++) { + printf("%d ", quantized_y[i]); + } + printf("\n"); + } + // Write quantized coefficients memcpy(buffer + offset, quantized_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); memcpy(buffer + offset, quantized_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t); @@ -626,6 +646,15 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type) } } + // Debug: check input data before DWT + if (tile_x == 0 && tile_y == 0) { + printf("Encoder Debug: Tile (0,0) - Y data before DWT (first 16): "); + for (int i = 0; i < 16; i++) { + printf("%.2f ", tile_y_data[i]); + } + printf("\n"); + } + // Apply DWT transform to each channel dwt_2d_forward(tile_y_data, enc->decomp_levels, enc->wavelet_filter); dwt_2d_forward(tile_co_data, enc->decomp_levels, enc->wavelet_filter); @@ -981,6 +1010,17 @@ int main(int argc, char *argv[]) { enc->quantizer_co = QUALITY_CO[enc->quality_level]; enc->quantizer_cg = QUALITY_CG[enc->quality_level]; break; + case 'Q': + // Parse quantizer values Y,Co,Cg + if (sscanf(optarg, "%d,%d,%d", &enc->quantizer_y, &enc->quantizer_co, &enc->quantizer_cg) != 3) { + fprintf(stderr, "Error: Invalid quantizer format. Use Y,Co,Cg (e.g., 5,3,2)\n"); + cleanup_encoder(enc); + return 1; + } + enc->quantizer_y = CLAMP(enc->quantizer_y, 1, 100); + enc->quantizer_co = CLAMP(enc->quantizer_co, 1, 100); + enc->quantizer_cg = CLAMP(enc->quantizer_cg, 1, 100); + break; case 'w': enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1); break; @@ -1163,10 +1203,29 @@ int main(int argc, char *argv[]) { // Determine frame type int is_keyframe = 1;//(frame_count % keyframe_interval == 0); + // Debug: check RGB input data + if (frame_count < 3) { + printf("Encoder Debug: Frame %d - RGB data (first 16 bytes): ", frame_count); + for (int i = 0; i < 16; i++) { + printf("%d ", enc->current_frame_rgb[i]); + } + printf("\n"); + } + // Convert RGB to YCoCg rgb_to_ycocg(enc->current_frame_rgb, enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg, enc->width, enc->height); + + // Debug: check YCoCg conversion result + if (frame_count < 3) { + printf("Encoder Debug: Frame %d - YCoCg result (first 16): ", frame_count); + for (int i = 0; i < 16; i++) { + printf("Y=%.1f Co=%.1f Cg=%.1f ", enc->current_frame_y[i], enc->current_frame_co[i], enc->current_frame_cg[i]); + if (i % 4 == 3) break; // Only show first 4 pixels for readability + } + printf("\n"); + } // Process motion vectors for P-frames int num_tiles = enc->tiles_x * enc->tiles_y;