mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-09 22:54:03 +09:00
tev encoder finalising without residual dct
This commit is contained in:
@@ -1557,6 +1557,13 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val dctBasis8_2 = Array(8) { u ->
|
||||||
|
FloatArray(8) { x ->
|
||||||
|
val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0
|
||||||
|
(0.25 * cu * cos((2.0 * x + 1.0) * u * PI / 16.0)).toFloat()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform IDCT on a single channel with integer coefficients
|
* Perform IDCT on a single channel with integer coefficients
|
||||||
*/
|
*/
|
||||||
@@ -1597,6 +1604,46 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform IDCT on a single channel with integer coefficients
|
||||||
|
*/
|
||||||
|
private fun tevIdct8x8_2(coeffs: IntArray, quantTable: IntArray): IntArray {
|
||||||
|
val dctCoeffs = Array(8) { FloatArray(8) }
|
||||||
|
val result = IntArray(64)
|
||||||
|
|
||||||
|
// Convert integer coefficients to 2D array and dequantize
|
||||||
|
for (u in 0 until 8) {
|
||||||
|
for (v in 0 until 8) {
|
||||||
|
val idx = u * 8 + v
|
||||||
|
val coeff = coeffs[idx]
|
||||||
|
if (idx == 0) {
|
||||||
|
// DC coefficient for chroma: lossless quantization (no scaling)
|
||||||
|
dctCoeffs[u][v] = coeff.toFloat()
|
||||||
|
} else {
|
||||||
|
// AC coefficients: use quantization table
|
||||||
|
dctCoeffs[u][v] = (coeff * quantTable[idx]).toFloat()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply 2D inverse DCT
|
||||||
|
for (x in 0 until 8) {
|
||||||
|
for (y in 0 until 8) {
|
||||||
|
var sum = 0f
|
||||||
|
for (u in 0 until 8) {
|
||||||
|
for (v in 0 until 8) {
|
||||||
|
sum += dctBasis8_2[u][x] * dctBasis8_2[v][y] * dctCoeffs[u][v]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Chroma residuals should be in reasonable range (±255 max)
|
||||||
|
val pixel = sum.coerceIn(-256f, 255f)
|
||||||
|
result[y * 8 + x] = pixel.toInt()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
val dctBasis16 = Array(16) { u ->
|
val dctBasis16 = Array(16) { u ->
|
||||||
FloatArray(16) { x ->
|
FloatArray(16) { x ->
|
||||||
val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0
|
val cu = if (u == 0) 1.0 / sqrt(2.0) else 1.0
|
||||||
@@ -1912,8 +1959,8 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
// Step 2: Decode residual DCT
|
// Step 2: Decode residual DCT
|
||||||
val yResidual = tevIdct16x16(yCoeffs, quantTableY)
|
val yResidual = tevIdct16x16(yCoeffs, quantTableY)
|
||||||
val coResidual = tevIdct8x8(coCoeffs, quantTableC)
|
val coResidual = tevIdct8x8_2(coCoeffs, quantTableC)
|
||||||
val cgResidual = tevIdct8x8(cgCoeffs, quantTableC)
|
val cgResidual = tevIdct8x8_2(cgCoeffs, quantTableC)
|
||||||
|
|
||||||
// Step 3: Build motion-compensated YCoCg-R block and add residuals
|
// Step 3: Build motion-compensated YCoCg-R block and add residuals
|
||||||
val finalY = IntArray(256)
|
val finalY = IntArray(256)
|
||||||
@@ -1927,14 +1974,6 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
val y = startY + dy
|
val y = startY + dy
|
||||||
val refX = x + mvX // Revert to original motion compensation
|
val refX = x + mvX // Revert to original motion compensation
|
||||||
val refY = y + mvY
|
val refY = y + mvY
|
||||||
|
|
||||||
// DEBUG: Log motion compensation coordinates for red trails
|
|
||||||
if (x == 168 && y == 236) {
|
|
||||||
println("INTER MV DEBUG (red): x=$x y=$y refX=$refX refY=$refY mvX=$mvX mvY=$mvY")
|
|
||||||
}
|
|
||||||
if (x == 342 && y == 232) {
|
|
||||||
println("INTER MV DEBUG (magenta): x=$x y=$y refX=$refX refY=$refY mvX=$mvX mvY=$mvY")
|
|
||||||
}
|
|
||||||
val pixelIdx = dy * 16 + dx
|
val pixelIdx = dy * 16 + dx
|
||||||
|
|
||||||
if (x < width && y < height) {
|
if (x < width && y < height) {
|
||||||
@@ -1951,19 +1990,15 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
|
|
||||||
|
|
||||||
// Convert motion-compensated RGB to Y only
|
// Convert motion-compensated RGB to Y only
|
||||||
val co = mcR - mcB
|
mcY = (mcR + 2*mcG + mcB) / 4 // Keep full 0-255 range for prediction
|
||||||
val tmp = mcB + (co / 2)
|
|
||||||
val cg = mcG - tmp
|
|
||||||
val yVal = tmp + (cg / 2)
|
|
||||||
|
|
||||||
mcY = yVal // Keep full 0-255 range for prediction
|
|
||||||
} else {
|
} else {
|
||||||
// Out of bounds reference - use neutral gray (128)
|
// Out of bounds reference - use black
|
||||||
mcY = 128
|
mcY = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add Y residual: prediction + (IDCT_output - 128 - encoder's_+128_bias)
|
// Add Y residual: prediction + (IDCT_output - 128)
|
||||||
val residual = yResidual[pixelIdx] - 128 - 128 // Remove both IDCT bias and encoder's +128
|
// IDCT adds +128 bias, encoder already accounts for prediction centering
|
||||||
|
val residual = yResidual[pixelIdx] - 128 // Remove only IDCT bias
|
||||||
finalY[pixelIdx] = (mcY + residual).coerceIn(0, 255)
|
finalY[pixelIdx] = (mcY + residual).coerceIn(0, 255)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2023,9 +2058,9 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
|||||||
mcCg = 0
|
mcCg = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add chroma residuals with clamping to prevent overflow artifacts
|
// Add chroma residuals
|
||||||
finalCo[chromaIdx] = (mcCo + coResidual[chromaIdx]).coerceIn(-256, 255)
|
finalCo[chromaIdx] = (mcCo + (coResidual[chromaIdx])).coerceIn(-256, 255)
|
||||||
finalCg[chromaIdx] = (mcCg + cgResidual[chromaIdx]).coerceIn(-256, 255)
|
finalCg[chromaIdx] = (mcCg + (cgResidual[chromaIdx])).coerceIn(-256, 255)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -254,8 +254,8 @@ static const uint8_t QUANT_TABLES_C[8][64] = {
|
|||||||
#define MP2_DEFAULT_PACKET_SIZE 0x240
|
#define MP2_DEFAULT_PACKET_SIZE 0x240
|
||||||
|
|
||||||
// Encoding parameters
|
// Encoding parameters
|
||||||
#define MAX_MOTION_SEARCH 32
|
#define MAX_MOTION_SEARCH 8
|
||||||
#define KEYFRAME_INTERVAL 120
|
int KEYFRAME_INTERVAL = 60;
|
||||||
#define BLOCK_SIZE 16 // 16x16 blocks now
|
#define BLOCK_SIZE 16 // 16x16 blocks now
|
||||||
|
|
||||||
// Default values
|
// Default values
|
||||||
@@ -521,13 +521,13 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y,
|
|||||||
int cur_offset = ((start_y + dy) * enc->width + (start_x + dx)) * 3;
|
int cur_offset = ((start_y + dy) * enc->width + (start_x + dx)) * 3;
|
||||||
int ref_offset = ((ref_y + dy) * enc->width + (ref_x + dx)) * 3;
|
int ref_offset = ((ref_y + dy) * enc->width + (ref_x + dx)) * 3;
|
||||||
|
|
||||||
// Compare luminance (approximate as average of RGB)
|
// Compare luminance using YCoCg-R luma equation
|
||||||
int cur_luma = (enc->current_rgb[cur_offset] +
|
int cur_luma = (enc->current_rgb[cur_offset] +
|
||||||
enc->current_rgb[cur_offset + 1] +
|
2 * enc->current_rgb[cur_offset + 1] +
|
||||||
enc->current_rgb[cur_offset + 2]) / 3;
|
enc->current_rgb[cur_offset + 2]) / 4;
|
||||||
int ref_luma = (enc->previous_rgb[ref_offset] +
|
int ref_luma = (enc->previous_rgb[ref_offset] +
|
||||||
enc->previous_rgb[ref_offset + 1] +
|
2 * enc->previous_rgb[ref_offset + 1] +
|
||||||
enc->previous_rgb[ref_offset + 2]) / 3;
|
enc->previous_rgb[ref_offset + 2]) / 4;
|
||||||
|
|
||||||
sad += abs(cur_luma - ref_luma);
|
sad += abs(cur_luma - ref_luma);
|
||||||
}
|
}
|
||||||
@@ -554,10 +554,7 @@ static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
|
|||||||
int b = rgb_block[rgb_idx + 2];
|
int b = rgb_block[rgb_idx + 2];
|
||||||
|
|
||||||
// YCoCg-R transform (per specification with truncated division)
|
// YCoCg-R transform (per specification with truncated division)
|
||||||
int co = r - b;
|
int y = (r + 2*g + b) / 4;
|
||||||
int tmp = b + (co / 2);
|
|
||||||
int cg = g - tmp;
|
|
||||||
int y = tmp + (cg / 2);
|
|
||||||
|
|
||||||
y_block[py * 16 + px] = CLAMP(y, 0, 255);
|
y_block[py * 16 + px] = CLAMP(y, 0, 255);
|
||||||
}
|
}
|
||||||
@@ -758,7 +755,8 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs));
|
memset(block->cg_coeffs, 0, sizeof(block->cg_coeffs));
|
||||||
enc->blocks_motion++;
|
enc->blocks_motion++;
|
||||||
return; // Skip DCT encoding, just store motion vector
|
return; // Skip DCT encoding, just store motion vector
|
||||||
} else if (motion_sad < skip_sad && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
|
// disabling INTER mode: residual DCT is crapping out no matter what I do
|
||||||
|
/*} else if (motion_sad < skip_sad && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
|
||||||
// Motion compensation with threshold
|
// Motion compensation with threshold
|
||||||
if (motion_sad <= 1024) {
|
if (motion_sad <= 1024) {
|
||||||
block->mode = TEV_MODE_MOTION;
|
block->mode = TEV_MODE_MOTION;
|
||||||
@@ -781,7 +779,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
|
|||||||
block->mv_y = 0;
|
block->mv_y = 0;
|
||||||
enc->blocks_intra++;
|
enc->blocks_intra++;
|
||||||
return;
|
return;
|
||||||
}
|
}*/
|
||||||
} else {
|
} else {
|
||||||
// No good motion prediction - use intra mode
|
// No good motion prediction - use intra mode
|
||||||
block->mode = TEV_MODE_INTRA;
|
block->mode = TEV_MODE_INTRA;
|
||||||
@@ -1045,6 +1043,9 @@ static int get_video_metadata(tev_encoder_t *enc) {
|
|||||||
enc->fps = enc->output_fps; // Use output FPS for encoding
|
enc->fps = enc->output_fps; // Use output FPS for encoding
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set keyframe interval
|
||||||
|
KEYFRAME_INTERVAL = 2 * enc->fps;
|
||||||
|
|
||||||
// Check for audio stream
|
// Check for audio stream
|
||||||
snprintf(command, sizeof(command),
|
snprintf(command, sizeof(command),
|
||||||
|
|||||||
Reference in New Issue
Block a user