first working version

2026-06-06 13:38:30 +09:00 · 2025-09-15 12:56:42 +09:00
parent d446a4e2f5
commit 9f901681a6
2 changed files with 163 additions and 49 deletions
--- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
+++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt
@@ -4183,6 +4183,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        
        // Debug: check quantized values before dequantization
        if (tileX == 0 && tileY == 0 && frameCounter < 3) {
+            println("TAV Debug: Tile (0,0) frame $frameCounter - readPtr=0x${readPtr.toString(16)}")
+            println("TAV Debug: First 32 bytes at readPtr: ${(0 until 32).map { "0x%02x".format(vm.peek(readPtr + it).toInt() and 0xFF) }.joinToString(" ")}")
            println("TAV Debug: Tile (0,0) frame $frameCounter - Quantized Y coeffs (first 64):")
            for (i in 0 until 8) {
                for (j in 0 until 8) {
@@ -4190,6 +4192,24 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                }
                println()
            }
+            
+            // Check how many non-zero coefficients we have
+            var nonZeroCount = 0
+            for (i in 0 until coeffCount) {
+                if (quantizedY[i] != 0.toShort()) nonZeroCount++
+            }
+            println("TAV Debug: Non-zero Y coefficients: $nonZeroCount out of $coeffCount")
+            
+            // Show all non-zero coefficients with their positions
+            println("TAV Debug: All non-zero Y coefficients:")
+            for (i in 0 until coeffCount) {
+                if (quantizedY[i] != 0.toShort()) {
+                    val row = i / 64
+                    val col = i % 64
+                    println("  Y[$row,$col] = ${quantizedY[i]}")
+                }
+            }
+            
            println("qY=$qY, qCo=$qCo, qCg=$qCg, rcf=$rcf")
        }
        
@@ -4199,22 +4219,19 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            cgTile[i] = quantizedCg[i] * qCg * rcf
        }
        
+        // Debug: compare expected vs actual DC values
+        if (tileX == 0 && tileY == 0 && frameCounter < 3) {
+            val expectedDC = 195 * 5 * 1.0f  // quantized_dc * qY * rcf
+            val actualDC = yTile[0] 
+            println("TAV Debug: DC comparison - quantized=${quantizedY[0]}, expected_dc=$expectedDC, actual_dc=$actualDC")
+            println("TAV Debug: Dequantized Y[0-15]: ${yTile.sliceArray(0..15).joinToString { "%.1f".format(it) }}")
+        }
+        
        // Apply inverse DWT using 9/7 irreversible filter with 3 decomposition levels
        applyDWTInverseMultiLevel(yTile, tileSize, tileSize, 3, 1)
        applyDWTInverseMultiLevel(coTile, tileSize, tileSize, 3, 1)
        applyDWTInverseMultiLevel(cgTile, tileSize, tileSize, 3, 1)
        
-        // DEBUG: Try replacing with reasonable test values to verify the rest of pipeline works
-        if (tileX == 0 && tileY == 0 && frameCounter < 3) {
-            println("TAV Debug: Before test override - Y[0-7]: ${yTile.sliceArray(0..7).joinToString { "%.1f".format(it) }}")
-            // Set reasonable test values
-            for (i in 0 until coeffCount) {
-                yTile[i] = 128.0f + (i % 32) * 2.0f  // Reasonable Y values around middle gray
-                coTile[i] = (i % 16 - 8) * 4.0f      // Small chroma values  
-                cgTile[i] = (i % 16 - 8) * 4.0f      // Small chroma values
-            }
-            println("TAV Debug: After test override - Y[0-7]: ${yTile.sliceArray(0..7).joinToString { "%.1f".format(it) }}")
-        }
        
        // Debug: check if we get reasonable values after DWT
        if (tileX == 0 && tileY == 0 && frameCounter < 3) {
@@ -4371,15 +4388,17 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                    val tileIdx = y * tileSize + x
                    val pixelIdx = frameY * width + frameX
                    
-                    // YCoCg-R to RGB conversion
+                    // YCoCg-R to RGB conversion (exact inverse of encoder)
                    val Y = yTile[tileIdx]
                    val Co = coTile[tileIdx] 
                    val Cg = cgTile[tileIdx]
                    
-                    val tmp = Y - Cg
-                    val g = Y + Cg
-                    val b = tmp - Co
-                    val r = tmp + Co
+                    // Inverse of encoder's YCoCg-R transform:
+                    // Forward: Co = r - b; tmp = b + Co/2; Cg = g - tmp; Y = tmp + Cg/2
+                    val tmp = Y - Cg / 2.0f
+                    val g = Cg + tmp
+                    val b = tmp - Co / 2.0f
+                    val r = Co + b
                    
                    val rgbOffset = pixelIdx * 3L
                    vm.poke(rgbAddr + rgbOffset, r.toInt().coerceIn(0, 255).toByte())
@@ -4813,16 +4832,20 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            val currentSize = size shr level
            if (currentSize < 2) break
            
-            // Column transform (reverse order from encoder)
+            // Apply inverse DWT to current subband region - EXACT match to encoder
+            // The encoder does ROW transform first, then COLUMN transform
+            // So inverse must do COLUMN inverse first, then ROW inverse
+            
+            // Column inverse transform first
            for (x in 0 until currentSize) {
                for (y in 0 until currentSize) {
                    tempCol[y] = data[y * size + x]
                }
                
                if (filterType == 0) {
-                    applyLift53InverseVertical(tempCol, currentSize)
+                    applyDWT53Inverse1D(tempCol, currentSize)
                } else {
-                    applyLift97InverseVertical(tempCol, currentSize)
+                    applyDWT97Inverse1D(tempCol, currentSize)
                }
                
                for (y in 0 until currentSize) {
@@ -4830,16 +4853,16 @@ class GraphicsJSR223Delegate(private val vm: VM) {
                }
            }
            
-            // Row transform (reverse order from encoder)
+            // Row inverse transform second  
            for (y in 0 until currentSize) {
                for (x in 0 until currentSize) {
                    tempRow[x] = data[y * size + x]
                }
                
                if (filterType == 0) {
-                    applyLift53InverseHorizontal(tempRow, currentSize)
+                    applyDWT53Inverse1D(tempRow, currentSize)
                } else {
-                    applyLift97InverseHorizontal(tempRow, currentSize)
+                    applyDWT97Inverse1D(tempRow, currentSize)
                }
                
                for (x in 0 until currentSize) {
@@ -4876,6 +4899,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
        }
    }

+    private fun applyLift97InverseHorizontal(row: FloatArray, width: Int) { TODO() }
+    private fun applyLift97InverseVertical(col: FloatArray, height: Int) { TODO() }
+
    // 1D lifting scheme implementations for 5/3 filter
    private fun applyLift53InverseHorizontal(data: FloatArray, length: Int) {
        if (length < 2) return
@@ -4925,38 +4951,35 @@ class GraphicsJSR223Delegate(private val vm: VM) {
    }

    // 1D lifting scheme implementations for 9/7 irreversible filter
-    private fun applyLift97InverseHorizontal(data: FloatArray, length: Int) {
+    private fun applyDWT97Inverse1D(data: FloatArray, length: Int) {
        if (length < 2) return

        val temp = FloatArray(length)
-        val half = (length + 1) / 2
+        val half = length / 2

-        // Separate even and odd samples (inverse interleaving)
+        // Split into low and high frequency components (matching encoder layout)
+        // After forward DWT: first half = low-pass, second half = high-pass
        for (i in 0 until half) {
-            temp[i] = data[2 * i] // Even samples (low-pass)
-        }
-        for (i in 0 until length / 2) {
-            temp[half + i] = data[2 * i + 1] // Odd samples (high-pass)
+            temp[i] = data[i]              // Low-pass coefficients (first half)
+            temp[half + i] = data[half + i] // High-pass coefficients (second half)
        }

-        // 9/7 inverse lifting coefficients (must match encoder exactly)
-        val alpha = -1.586134342f   // Inverse lifting coefficient
-        val beta = -0.052980118f    // Inverse lifting coefficient (match encoder)  
-        val gamma = 0.882911076f    // Inverse lifting coefficient (match encoder)
-        val delta = 0.443506852f    // Inverse lifting coefficient (match encoder)
-        val K = 1.230174105f        // Scaling factor (match encoder)
-        val invK = 1.0f / K
+        // 9/7 inverse lifting coefficients (exactly matching encoder)
+        val alpha = -1.586134342f
+        val beta = -0.052980118f  
+        val gamma = 0.882911076f
+        val delta = 0.443506852f
+        val K = 1.230174105f

-        // Inverse lifting steps for 9/7 filter (undo forward steps in reverse order)
-        // Step 5: Undo scaling
+        // Inverse lifting steps (undo forward steps in reverse order)
+        
+        // Step 5: Undo scaling (reverse of encoder's final step)
        for (i in 0 until half) {
            temp[i] /= K  // Undo temp[i] *= K
-        }
-        for (i in 0 until length / 2) {
            temp[half + i] *= K  // Undo temp[half + i] /= K
        }

-        // Step 4: Undo update step (delta)
+        // Step 4: Undo update step (delta) 
        for (i in 0 until half) {
            val left = if (i > 0) temp[half + i - 1] else temp[half + i]
            val right = if (i < half - 1) temp[half + i + 1] else temp[half + i]
@@ -4984,18 +5007,50 @@ class GraphicsJSR223Delegate(private val vm: VM) {
            temp[half + i] -= alpha * (left + right)
        }

-        // Interleave back
+        // Merge back (inverse of encoder's split)
        for (i in 0 until half) {
-            data[2 * i] = temp[i]
-        }
-        for (i in 0 until length / 2) {
-            data[2 * i + 1] = temp[half + i]
+            data[2 * i] = temp[i]           // Even positions get low-pass
+            if (2 * i + 1 < length) {
+                data[2 * i + 1] = temp[half + i] // Odd positions get high-pass
+            }
        }
    }

-    private fun applyLift97InverseVertical(data: FloatArray, length: Int) {
-        // Same as horizontal but for vertical direction
-        applyLift97InverseHorizontal(data, length)
+    private fun applyDWT53Inverse1D(data: FloatArray, length: Int) {
+        if (length < 2) return
+
+        val temp = FloatArray(length)
+        val half = length / 2
+
+        // Split into low and high frequency components (matching encoder layout)
+        for (i in 0 until half) {
+            temp[i] = data[i]              // Low-pass coefficients (first half)
+            temp[half + i] = data[half + i] // High-pass coefficients (second half)
+        }
+
+        // 5/3 inverse lifting (undo forward steps in reverse order)
+        
+        // Step 2: Undo update step (1/4 coefficient)
+        for (i in 0 until half) {
+            val left = if (i > 0) temp[half + i - 1] else 0.0f
+            val right = if (i < half - 1) temp[half + i] else 0.0f
+            temp[i] -= 0.25f * (left + right)
+        }
+
+        // Step 1: Undo predict step (1/2 coefficient)
+        for (i in 0 until half) {
+            val left = temp[i]
+            val right = if (i < half - 1) temp[i + 1] else temp[i]
+            temp[half + i] -= 0.5f * (left + right)
+        }
+
+        // Merge back (inverse of encoder's split)
+        for (i in 0 until half) {
+            data[2 * i] = temp[i]           // Even positions get low-pass
+            if (2 * i + 1 < length) {
+                data[2 * i + 1] = temp[half + i] // Odd positions get high-pass
+            }
+        }
    }


--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -566,10 +566,30 @@ static size_t serialize_tile_data(tav_encoder_t *enc, int tile_x, int tile_y,
    int16_t *quantized_co = malloc(tile_size * sizeof(int16_t));
    int16_t *quantized_cg = malloc(tile_size * sizeof(int16_t));
    
+    // Debug: check DWT coefficients before quantization
+    if (tile_x == 0 && tile_y == 0) {
+        printf("Encoder Debug: Tile (0,0) - DWT Y coeffs before quantization (first 16): ");
+        for (int i = 0; i < 16; i++) {
+            printf("%.2f ", tile_y_data[i]);
+        }
+        printf("\n");
+        printf("Encoder Debug: Quantizers - Y=%d, Co=%d, Cg=%d, rcf=%.2f\n", 
+               enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg, mv->rate_control_factor);
+    }
+    
    quantize_dwt_coefficients((float*)tile_y_data, quantized_y, tile_size, enc->quantizer_y, mv->rate_control_factor);
    quantize_dwt_coefficients((float*)tile_co_data, quantized_co, tile_size, enc->quantizer_co, mv->rate_control_factor);
    quantize_dwt_coefficients((float*)tile_cg_data, quantized_cg, tile_size, enc->quantizer_cg, mv->rate_control_factor);
    
+    // Debug: check quantized coefficients after quantization
+    if (tile_x == 0 && tile_y == 0) {
+        printf("Encoder Debug: Tile (0,0) - Quantized Y coeffs (first 16): ");
+        for (int i = 0; i < 16; i++) {
+            printf("%d ", quantized_y[i]);
+        }
+        printf("\n");
+    }
+    
    // Write quantized coefficients
    memcpy(buffer + offset, quantized_y, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
    memcpy(buffer + offset, quantized_co, tile_size * sizeof(int16_t)); offset += tile_size * sizeof(int16_t);
@@ -626,6 +646,15 @@ static size_t compress_and_write_frame(tav_encoder_t *enc, uint8_t packet_type)
                }
            }
            
+            // Debug: check input data before DWT
+            if (tile_x == 0 && tile_y == 0) {
+                printf("Encoder Debug: Tile (0,0) - Y data before DWT (first 16): ");
+                for (int i = 0; i < 16; i++) {
+                    printf("%.2f ", tile_y_data[i]);
+                }
+                printf("\n");
+            }
+            
            // Apply DWT transform to each channel
            dwt_2d_forward(tile_y_data, enc->decomp_levels, enc->wavelet_filter);
            dwt_2d_forward(tile_co_data, enc->decomp_levels, enc->wavelet_filter);
@@ -981,6 +1010,17 @@ int main(int argc, char *argv[]) {
                enc->quantizer_co = QUALITY_CO[enc->quality_level];
                enc->quantizer_cg = QUALITY_CG[enc->quality_level];
                break;
+            case 'Q':
+                // Parse quantizer values Y,Co,Cg
+                if (sscanf(optarg, "%d,%d,%d", &enc->quantizer_y, &enc->quantizer_co, &enc->quantizer_cg) != 3) {
+                    fprintf(stderr, "Error: Invalid quantizer format. Use Y,Co,Cg (e.g., 5,3,2)\n");
+                    cleanup_encoder(enc);
+                    return 1;
+                }
+                enc->quantizer_y = CLAMP(enc->quantizer_y, 1, 100);
+                enc->quantizer_co = CLAMP(enc->quantizer_co, 1, 100);
+                enc->quantizer_cg = CLAMP(enc->quantizer_cg, 1, 100);
+                break;
            case 'w':
                enc->wavelet_filter = CLAMP(atoi(optarg), 0, 1);
                break;
@@ -1163,10 +1203,29 @@ int main(int argc, char *argv[]) {
        // Determine frame type
        int is_keyframe = 1;//(frame_count % keyframe_interval == 0);
        
+        // Debug: check RGB input data
+        if (frame_count < 3) {
+            printf("Encoder Debug: Frame %d - RGB data (first 16 bytes): ", frame_count);
+            for (int i = 0; i < 16; i++) {
+                printf("%d ", enc->current_frame_rgb[i]);
+            }
+            printf("\n");
+        }
+        
        // Convert RGB to YCoCg
        rgb_to_ycocg(enc->current_frame_rgb, 
                     enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg,
                     enc->width, enc->height);
+                     
+        // Debug: check YCoCg conversion result
+        if (frame_count < 3) {
+            printf("Encoder Debug: Frame %d - YCoCg result (first 16): ", frame_count);
+            for (int i = 0; i < 16; i++) {
+                printf("Y=%.1f Co=%.1f Cg=%.1f ", enc->current_frame_y[i], enc->current_frame_co[i], enc->current_frame_cg[i]);
+                if (i % 4 == 3) break; // Only show first 4 pixels for readability
+            }
+            printf("\n");
+        }
        
        // Process motion vectors for P-frames
        int num_tiles = enc->tiles_x * enc->tiles_y;