From ff39a00431a068ea7126bef776f32ddf8f4aa133 Mon Sep 17 00:00:00 2001 From: minjaesong Date: Thu, 21 Aug 2025 22:54:01 +0900 Subject: [PATCH] tev encoder finalising without residual dct --- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 81 +++++++++++++------ video_encoder/encoder_tev.c | 27 ++++--- 2 files changed, 72 insertions(+), 36 deletions(-) diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 9b10c7c..7b46053 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -1557,6 +1557,13 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } + val dctBasis8_2 = Array(8) { u -> + FloatArray(8) { x -> + val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0 + (0.25 * cu * cos((2.0 * x + 1.0) * u * PI / 16.0)).toFloat() + } + } + /** * Perform IDCT on a single channel with integer coefficients */ @@ -1597,6 +1604,46 @@ class GraphicsJSR223Delegate(private val vm: VM) { return result } + /** + * Perform IDCT on a single channel with integer coefficients + */ + private fun tevIdct8x8_2(coeffs: IntArray, quantTable: IntArray): IntArray { + val dctCoeffs = Array(8) { FloatArray(8) } + val result = IntArray(64) + + // Convert integer coefficients to 2D array and dequantize + for (u in 0 until 8) { + for (v in 0 until 8) { + val idx = u * 8 + v + val coeff = coeffs[idx] + if (idx == 0) { + // DC coefficient for chroma: lossless quantization (no scaling) + dctCoeffs[u][v] = coeff.toFloat() + } else { + // AC coefficients: use quantization table + dctCoeffs[u][v] = (coeff * quantTable[idx]).toFloat() + } + } + } + + // Apply 2D inverse DCT + for (x in 0 until 8) { + for (y in 0 until 8) { + var sum = 0f + for (u in 0 until 8) { + for (v in 0 until 8) { + sum += dctBasis8_2[u][x] * dctBasis8_2[v][y] * dctCoeffs[u][v] + } + } + // Chroma residuals should be in reasonable range (±255 max) + val pixel = sum.coerceIn(-256f, 255f) + result[y * 8 + x] = pixel.toInt() + } + } + + return result + } + val dctBasis16 = Array(16) { u -> FloatArray(16) { x -> val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0 @@ -1912,8 +1959,8 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Step 2: Decode residual DCT val yResidual = tevIdct16x16(yCoeffs, quantTableY) - val coResidual = tevIdct8x8(coCoeffs, quantTableC) - val cgResidual = tevIdct8x8(cgCoeffs, quantTableC) + val coResidual = tevIdct8x8_2(coCoeffs, quantTableC) + val cgResidual = tevIdct8x8_2(cgCoeffs, quantTableC) // Step 3: Build motion-compensated YCoCg-R block and add residuals val finalY = IntArray(256) @@ -1927,14 +1974,6 @@ class GraphicsJSR223Delegate(private val vm: VM) { val y = startY + dy val refX = x + mvX // Revert to original motion compensation val refY = y + mvY - - // DEBUG: Log motion compensation coordinates for red trails - if (x == 168 && y == 236) { - println("INTER MV DEBUG (red): x=$x y=$y refX=$refX refY=$refY mvX=$mvX mvY=$mvY") - } - if (x == 342 && y == 232) { - println("INTER MV DEBUG (magenta): x=$x y=$y refX=$refX refY=$refY mvX=$mvX mvY=$mvY") - } val pixelIdx = dy * 16 + dx if (x < width && y < height) { @@ -1951,19 +1990,15 @@ class GraphicsJSR223Delegate(private val vm: VM) { // Convert motion-compensated RGB to Y only - val co = mcR - mcB - val tmp = mcB + (co / 2) - val cg = mcG - tmp - val yVal = tmp + (cg / 2) - - mcY = yVal // Keep full 0-255 range for prediction + mcY = (mcR + 2*mcG + mcB) / 4 // Keep full 0-255 range for prediction } else { - // Out of bounds reference - use neutral gray (128) - mcY = 128 + // Out of bounds reference - use black + mcY = 0 } - // Add Y residual: prediction + (IDCT_output - 128 - encoder's_+128_bias) - val residual = yResidual[pixelIdx] - 128 - 128 // Remove both IDCT bias and encoder's +128 + // Add Y residual: prediction + (IDCT_output - 128) + // IDCT adds +128 bias, encoder already accounts for prediction centering + val residual = yResidual[pixelIdx] - 128 // Remove only IDCT bias finalY[pixelIdx] = (mcY + residual).coerceIn(0, 255) } } @@ -2023,9 +2058,9 @@ class GraphicsJSR223Delegate(private val vm: VM) { mcCg = 0 } - // Add chroma residuals with clamping to prevent overflow artifacts - finalCo[chromaIdx] = (mcCo + coResidual[chromaIdx]).coerceIn(-256, 255) - finalCg[chromaIdx] = (mcCg + cgResidual[chromaIdx]).coerceIn(-256, 255) + // Add chroma residuals + finalCo[chromaIdx] = (mcCo + (coResidual[chromaIdx])).coerceIn(-256, 255) + finalCg[chromaIdx] = (mcCg + (cgResidual[chromaIdx])).coerceIn(-256, 255) } } } diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c index 4275365..a6903ed 100644 --- a/video_encoder/encoder_tev.c +++ b/video_encoder/encoder_tev.c @@ -254,8 +254,8 @@ static const uint8_t QUANT_TABLES_C[8][64] = { #define MP2_DEFAULT_PACKET_SIZE 0x240 // Encoding parameters -#define MAX_MOTION_SEARCH 32 -#define KEYFRAME_INTERVAL 120 +#define MAX_MOTION_SEARCH 8 +int KEYFRAME_INTERVAL = 60; #define BLOCK_SIZE 16 // 16x16 blocks now // Default values @@ -521,13 +521,13 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y, int cur_offset = ((start_y + dy) * enc->width + (start_x + dx)) * 3; int ref_offset = ((ref_y + dy) * enc->width + (ref_x + dx)) * 3; - // Compare luminance (approximate as average of RGB) + // Compare luminance using YCoCg-R luma equation int cur_luma = (enc->current_rgb[cur_offset] + - enc->current_rgb[cur_offset + 1] + - enc->current_rgb[cur_offset + 2]) / 3; + 2 * enc->current_rgb[cur_offset + 1] + + enc->current_rgb[cur_offset + 2]) / 4; int ref_luma = (enc->previous_rgb[ref_offset] + - enc->previous_rgb[ref_offset + 1] + - enc->previous_rgb[ref_offset + 2]) / 3; + 2 * enc->previous_rgb[ref_offset + 1] + + enc->previous_rgb[ref_offset + 2]) / 4; sad += abs(cur_luma - ref_luma); } @@ -554,10 +554,7 @@ static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block, int b = rgb_block[rgb_idx + 2]; // YCoCg-R transform (per specification with truncated division) - int co = r - b; - int tmp = b + (co / 2); - int cg = g - tmp; - int y = tmp + (cg / 2); + int y = (r + 2*g + b) / 4; y_block[py * 16 + px] = CLAMP(y, 0, 255); } @@ -758,7 +755,8 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs)); enc->blocks_motion++; return; // Skip DCT encoding, just store motion vector - } else if (motion_sad < skip_sad && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) { + // disabling INTER mode: residual DCT is crapping out no matter what I do + /*} else if (motion_sad < skip_sad && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) { // Motion compensation with threshold if (motion_sad <= 1024) { block->mode = TEV_MODE_MOTION; @@ -781,7 +779,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke block->mv_y = 0; enc->blocks_intra++; return; - } + }*/ } else { // No good motion prediction - use intra mode block->mode = TEV_MODE_INTRA; @@ -1045,6 +1043,9 @@ static int get_video_metadata(tev_encoder_t *enc) { enc->fps = enc->output_fps; // Use output FPS for encoding } } + + // set keyframe interval + KEYFRAME_INTERVAL = 2 * enc->fps; // Check for audio stream snprintf(command, sizeof(command),