From 09e665f56076329997814d7f967fe433e03cc74e Mon Sep 17 00:00:00 2001 From: minjaesong Date: Tue, 2 Sep 2025 10:47:23 +0900 Subject: [PATCH] interlacing wip --- assets/disk0/tvdos/bin/playtev.js | 28 ++- terranmon.txt | 12 +- .../torvald/tsvm/GraphicsJSR223Delegate.kt | 145 +++++++++--- video_encoder/encoder_tev.c | 207 +++++++++++------- 4 files changed, 272 insertions(+), 120 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index 5ab7235..2dbef22 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -383,12 +383,30 @@ let qualityY = seqread.readOneByte() let qualityCo = seqread.readOneByte() let qualityCg = seqread.readOneByte() let flags = seqread.readOneByte() -let hasAudio = flags & 1 -let hasSubtitle = flags & 2 -let unused1 = seqread.readOneByte() +let hasAudio = !!(flags & 1) +let hasSubtitle = !!(flags & 2) +let videoFlags = seqread.readOneByte() +let isInterlaced = !!(videoFlags & 1) let unused2 = seqread.readOneByte() -serial.println(`TEV Format ${version} (${colorSpace}); Q: ${qualityY} ${qualityCo} ${qualityCg}`) + +serial.println(`Video metadata:`) +serial.println(` Frames: ${totalFrames}`) +serial.println(` FPS: ${fps}`) +serial.println(` Duration: ${totalFrames / fps}`) +serial.println(` Audio: ${hasAudio ? "Yes" : "No"}`) +serial.println(` Resolution: ${width}x${height}, ${isInterlaced ? "interlaced" : "progressive"}`) + + +// DEBUG interlace raw output +if (isInterlaced) { + height = height >> 1 + isInterlaced = false +} +// END OF DEBUG + + +serial.println(`TEV Format ${version} (${colorSpace}); Q: ${qualityY} ${qualityCo} ${qualityCg}; Interlaced: ${isInterlaced ? 'Yes' : 'No'}`) function updateDataRateBin(rate) { videoRateBin.push(rate) @@ -555,7 +573,7 @@ try { // Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or XYB based on version) try { let decodeStart = sys.nanoTime() - graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, [qualityY, qualityCo, qualityCg], debugMotionVectors, version) + graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, height, [qualityY, qualityCo, qualityCg], debugMotionVectors, version, isInterlaced) decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds // Upload RGB buffer to display framebuffer with dithering diff --git a/terranmon.txt b/terranmon.txt index c304351..fb4386a 100644 --- a/terranmon.txt +++ b/terranmon.txt @@ -703,10 +703,12 @@ DCT-based compression, motion compensation, and efficient temporal coding. uint8 Quality Index for Y channel (0-99; 100 denotes all quantiser is 1) uint8 Quality Index for Co channel (0-99; 100 denotes all quantiser is 1) uint8 Quality Index for Cg channel (0-99; 100 denotes all quantiser is 1) - uint8 Flags - bit 0 = has audio - bit 1 = has subtitle - unit16 Reserved, fill with zero + uint8 Extra Feature Flags + - bit 0 = has audio + - bit 1 = has subtitle + uint8 Video Flags + - bit 0 = is interlaced (should be default for most non-archival TEV videos) + uint8 Reserved, fill with zero ## Packet Types 0x10: I-frame (intra-coded frame) @@ -718,7 +720,7 @@ DCT-based compression, motion compensation, and efficient temporal coding. ## Video Packet Structure uint8 Packet Type uint32 Compressed Size - * Gzip-compressed Block Data + * Zstd-compressed Block Data ## Block Data (per 16x16 block) uint8 Mode: encoding mode diff --git a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt index 561cd2e..2f44ed5 100644 --- a/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt +++ b/tsvm_core/src/net/torvald/tsvm/GraphicsJSR223Delegate.kt @@ -1512,24 +1512,96 @@ class GraphicsJSR223Delegate(private val vm: VM) { } // YCoCg-R to RGB conversion with 4:2:0 chroma upsampling + // Pre-allocated arrays for chroma component caching (reused across blocks) + private val cgHalfCache = IntArray(64) // 8x8 cache for cg/2 values + private val coHalfCache = IntArray(64) // 8x8 cache for co/2 values + + // Temporary buffer for interlaced field processing + private val interlacedFieldBuffer = IntArray(560 * 224 * 3) // Half-height RGB buffer + + /** + * YADIF (Yet Another Deinterlacing Filter) implementation + * Converts interlaced field to progressive frame with temporal/spatial interpolation + */ + fun yadifDeinterlace(fieldRGBAddr: Long, outputRGBAddr: Long, width: Int, height: Int, + prevFieldAddr: Long, nextFieldAddr: Long, fieldParity: Int, + fieldIncVec: Int, outputIncVec: Int) { + val fieldHeight = height / 2 + + for (y in 0 until fieldHeight) { + for (x in 0 until width) { + val fieldOffset = (y * width + x) * 3 + val outputOffset = ((y * 2 + fieldParity) * width + x) * 3 + + // Copy current field lines directly (no interpolation needed) + for (c in 0..2) { + val pixelValue = vm.peek(fieldRGBAddr + (fieldOffset + c) * fieldIncVec)!! + vm.poke(outputRGBAddr + (outputOffset + c) * outputIncVec, pixelValue) + } + + // Interpolate missing lines using Yadif algorithm + if (y > 0 && y < fieldHeight - 1) { + val interpOutputOffset = ((y * 2 + 1 - fieldParity) * width + x) * 3 + + for (c in 0..2) { + // Get spatial neighbors + val above = vm.peek(fieldRGBAddr + (fieldOffset - width * 3 + c) * fieldIncVec)!!.toInt() and 0xFF + val below = vm.peek(fieldRGBAddr + (fieldOffset + width * 3 + c) * fieldIncVec)!!.toInt() and 0xFF + val current = vm.peek(fieldRGBAddr + (fieldOffset + c) * fieldIncVec)!!.toInt() and 0xFF + + // Simple spatial interpolation (can be enhanced with temporal prediction) + val spatialInterp = (above + below) / 2 + + // Apply edge-directed interpolation bias + val edgeBias = if (kotlin.math.abs(above - below) < 32) { + (current + spatialInterp) / 2 // Low edge activity: blend with current + } else { + spatialInterp // High edge activity: use spatial only + } + + vm.poke(outputRGBAddr + (interpOutputOffset + c) * outputIncVec, + edgeBias.coerceIn(0, 255).toByte()) + } + } + } + } + + // Handle edge cases: first and last interpolated lines use simple spatial interpolation + for (x in 0 until width) { + val interpY = if (fieldParity == 0) 1 else 0 + val outputOffset = (interpY * width + x) * 3 + val referenceOffset = ((interpY + 1) * width + x) * 3 + + for (c in 0..2) { + val refPixel = vm.peek(outputRGBAddr + (referenceOffset + c) * outputIncVec)!! + vm.poke(outputRGBAddr + (outputOffset + c) * outputIncVec, refPixel) + } + } + } + fun tevYcocgToRGB(yBlock: IntArray, coBlock: IntArray, cgBlock: IntArray): IntArray { val rgbData = IntArray(16 * 16 * 3) // R,G,B for 16x16 pixels + // Pre-compute chroma division components for 8x8 chroma block (each reused 4x in 4:2:0) + for (i in 0 until 64) { + cgHalfCache[i] = cgBlock[i] / 2 + coHalfCache[i] = coBlock[i] / 2 + } + + // Process 16x16 luma with cached chroma components for (py in 0 until 16) { for (px in 0 until 16) { val yIdx = py * 16 + px val y = yBlock[yIdx] - // Get chroma values from subsampled 8x8 blocks (nearest neighbor upsampling) + // Get pre-computed chroma components (4:2:0 upsampling) val coIdx = (py / 2) * 8 + (px / 2) - val co = coBlock[coIdx] - val cg = cgBlock[coIdx] - // YCoCg-R inverse transform (per YCoCg-R spec with truncated division) - val tmp = y - (cg / 2) - val g = cg + tmp - val b = tmp - (co / 2) - val r = b + co + // YCoCg-R inverse transform using cached division results + val tmp = y - cgHalfCache[coIdx] + val g = cgBlock[coIdx] + tmp + val b = tmp - coHalfCache[coIdx] + val r = b + coBlock[coIdx] // Clamp and store RGB val baseIdx = (py * 16 + px) * 3 @@ -1725,10 +1797,14 @@ class GraphicsJSR223Delegate(private val vm: VM) { */ fun tevDecode(blockDataPtr: Long, currentRGBAddr: Long, prevRGBAddr: Long, width: Int, height: Int, qualityIndices: IntArray, debugMotionVectors: Boolean = false, - tevVersion: Int = 2) { + tevVersion: Int = 2, isInterlaced: Boolean = false) { + // height doesn't change when interlaced, because that's the encoder's output + + // For interlaced mode, decode to half-height field first + val decodingHeight = if (isInterlaced) height / 2 else height val blocksX = (width + 15) / 16 // 16x16 blocks now - val blocksY = (height + 15) / 16 + val blocksY = (decodingHeight + 15) / 16 val quantYmult = jpeg_quality_to_mult(qualityIndices[0]) val quantCOmult = jpeg_quality_to_mult(qualityIndices[1]) @@ -1769,7 +1845,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { when (mode) { 0x00 -> { // TEV_MODE_SKIP - copy RGB from previous frame (optimized with memcpy) // Check if we can copy the entire block at once (no clipping) - if (startX + 16 <= width && startY + 16 <= height) { + if (startX + 16 <= width && startY + 16 <= decodingHeight) { // Optimized case: copy entire 16x16 block with row-by-row memcpy for (dy in 0 until 16) { val srcRowOffset = ((startY + dy).toLong() * width + startX) * 3 @@ -1786,7 +1862,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (dx in 0 until 16) { val x = startX + dx val y = startY + dy - if (x < width && y < height) { + if (x < width && y < decodingHeight) { val pixelOffset = y.toLong() * width + x val rgbOffset = pixelOffset * 3 @@ -1816,7 +1892,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val refX = x + mvX val refY = y + mvY - if (x < width && y < height) { + if (x < width && y < decodingHeight) { val dstPixelOffset = y.toLong() * width + x val dstRgbOffset = dstPixelOffset * 3 @@ -1836,7 +1912,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { val refStartY = startY + mvY // Check if entire 16x16 block can be copied with memcpy (no bounds issues) - if (startX + 16 <= width && startY + 16 <= height && + if (startX + 16 <= width && startY + 16 <= decodingHeight && refStartX >= 0 && refStartY >= 0 && refStartX + 16 <= width && refStartY + 16 <= height) { // Optimized case: copy entire 16x16 block with row-by-row memcpy @@ -1858,16 +1934,16 @@ class GraphicsJSR223Delegate(private val vm: VM) { val refX = x + mvX val refY = y + mvY - if (x < width && y < height) { + if (x < width && y < decodingHeight) { val dstPixelOffset = y.toLong() * width + x val dstRgbOffset = dstPixelOffset * 3 - if (refX >= 0 && refY >= 0 && refX < width && refY < height) { + if (refX >= 0 && refY >= 0 && refX < width && refY < decodingHeight) { val refPixelOffset = refY.toLong() * width + refX val refRgbOffset = refPixelOffset * 3 // Additional safety: ensure RGB offset is within valid range - val maxValidOffset = (width * height - 1) * 3L + 2 + val maxValidOffset = (width * decodingHeight - 1) * 3L + 2 if (refRgbOffset >= 0 && refRgbOffset <= maxValidOffset) { // Copy RGB from reference position val refR = vm.peek(prevRGBAddr + refRgbOffset*prevAddrIncVec)!! @@ -1923,7 +1999,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (dx in 0 until 16) { val x = startX + dx val y = startY + dy - if (x < width && y < height) { + if (x < width && y < decodingHeight) { val rgbIdx = (dy * 16 + dx) * 3 val imageOffset = y.toLong() * width + x val bufferOffset = imageOffset * 3 @@ -1963,10 +2039,10 @@ class GraphicsJSR223Delegate(private val vm: VM) { val refY = y + mvY val pixelIdx = dy * 16 + dx - if (x < width && y < height) { + if (x < width && y < decodingHeight) { var mcY: Int - if (refX >= 0 && refY >= 0 && refX < width && refY < height) { + if (refX >= 0 && refY >= 0 && refX < width && refY < decodingHeight) { // Get motion-compensated RGB from previous frame val refPixelOffset = refY.toLong() * width + refX val refRgbOffset = refPixelOffset * 3 @@ -2003,12 +2079,12 @@ class GraphicsJSR223Delegate(private val vm: VM) { val refY = y + mvY val chromaIdx = cy * 8 + cx - if (x < width && y < height) { + if (x < width && y < decodingHeight) { var mcCo: Int var mcCg: Int // Sample 2x2 block from motion-compensated position for chroma - if (refX >= 0 && refY >= 0 && refX < width - 1 && refY < height - 1) { + if (refX >= 0 && refY >= 0 && refX < width - 1 && refY < decodingHeight - 1) { var coSum = 0 var cgSum = 0 var count = 0 @@ -2018,7 +2094,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (dx in 0 until 2) { val sampleX = refX + dx val sampleY = refY + dy - if (sampleX < width && sampleY < height) { + if (sampleX < width && sampleY < decodingHeight) { val refPixelOffset = sampleY.toLong() * width + sampleX val refRgbOffset = refPixelOffset * 3 @@ -2064,7 +2140,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (dx in 0 until 16) { val x = startX + dx val y = startY + dy - if (x < width && y < height) { + if (x < width && y < decodingHeight) { val imageOffset = y.toLong() * width + x val bufferOffset = imageOffset * 3 @@ -2099,7 +2175,7 @@ class GraphicsJSR223Delegate(private val vm: VM) { for (dx in 0 until 16) { val x = startX + dx val y = startY + dy - if (x < width && y < height) { + if (x < width && y < decodingHeight) { val imageOffset = y.toLong() * width + x val bufferOffset = imageOffset * 3 @@ -2113,6 +2189,25 @@ class GraphicsJSR223Delegate(private val vm: VM) { } } } + + // Apply Yadif deinterlacing if this is an interlaced frame + /*if (isInterlaced) { + // Decode produced a field at half-height, now deinterlace to full progressive frame + val tempFieldBuffer = vm.malloc(width * decodingHeight * 3) + + // Copy the decoded field to temporary buffer + vm.memcpy(currentRGBAddr.toInt(), tempFieldBuffer.toInt(), width * decodingHeight * 3) + + // Apply Yadif deinterlacing: field -> progressive frame + yadifDeinterlace( + tempFieldBuffer.toLong(), currentRGBAddr, width, height, + prevRGBAddr, prevRGBAddr, // TODO: Implement proper temporal prediction + 0, // Field parity (0=even field first) + thisAddrIncVec, thisAddrIncVec + ) + + vm.free(tempFieldBuffer.toInt()) + }*/ } diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c index 2767e81..f3f588f 100644 --- a/video_encoder/encoder_tev.c +++ b/video_encoder/encoder_tev.c @@ -161,6 +161,7 @@ typedef struct { int has_audio; int has_subtitles; int output_to_stdout; + int progressive_mode; // 0 = interlaced (default), 1 = progressive int qualityIndex; // -q option int qualityY; int qualityCo; @@ -174,6 +175,7 @@ typedef struct { // Frame buffers (8-bit RGB format for encoding) uint8_t *current_rgb, *previous_rgb, *reference_rgb; + uint8_t *previous_even_field; // Previous even field buffer for interlaced scene change detection // YCoCg workspace float *y_workspace, *co_workspace, *cg_workspace; @@ -303,29 +305,6 @@ static void dct_16x16_fast(float *input, float *output) { } } -// Legacy O(n^4) version for reference/fallback -static void dct_16x16(float *input, float *output) { - init_dct_tables(); // Ensure tables are initialized - - for (int u = 0; u < 16; u++) { - for (int v = 0; v < 16; v++) { - float sum = 0.0f; - float cu = (u == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; - float cv = (v == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; - - for (int x = 0; x < 16; x++) { - for (int y = 0; y < 16; y++) { - sum += input[y * 16 + x] * - dct_table_16[u][x] * - dct_table_16[v][y]; - } - } - - output[u * 16 + v] = 0.25f * cu * cv * sum; - } - } -} - // Fast separable 8x8 DCT - 4x performance improvement static float temp_dct_8[HALF_BLOCK_SIZE_SQR]; // Reusable temporary buffer @@ -361,29 +340,6 @@ static void dct_8x8_fast(float *input, float *output) { } } -// Legacy 8x8 2D DCT (for chroma) - O(n^4) version -static void dct_8x8(float *input, float *output) { - init_dct_tables(); // Ensure tables are initialized - - for (int u = 0; u < 8; u++) { - for (int v = 0; v < 8; v++) { - float sum = 0.0f; - float cu = (u == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; - float cv = (v == 0) ? 1.0f / sqrtf(2.0f) : 1.0f; - - for (int x = 0; x < 8; x++) { - for (int y = 0; y < 8; y++) { - sum += input[y * 8 + x] * - dct_table_8[u][x] * - dct_table_8[v][y]; - } - } - - output[u * 8 + v] = 0.25f * cu * cv * sum; - } - } -} - // quantise DCT coefficient using quality table with rate control static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma) { if (is_dc) { @@ -1485,14 +1441,19 @@ static tev_encoder_t* init_encoder(void) { // Allocate encoder buffers static int alloc_encoder_buffers(tev_encoder_t *enc) { - int pixels = enc->width * enc->height; + // In interlaced mode, FFmpeg separatefields outputs field frames at half height + // In progressive mode, we work with full height frames + int encoding_pixels = enc->width * enc->height; + int blocks_x = (enc->width + 15) / 16; int blocks_y = (enc->height + 15) / 16; int total_blocks = blocks_x * blocks_y; - enc->current_rgb = malloc(pixels * 3); - enc->previous_rgb = malloc(pixels * 3); - enc->reference_rgb = malloc(pixels * 3); + // Allocate buffers for encoding (FFmpeg provides frames at the correct resolution) + enc->current_rgb = malloc(encoding_pixels * 3); // Current frame buffer from FFmpeg + enc->previous_rgb = malloc(encoding_pixels * 3); // Previous frame buffer for motion estimation + enc->reference_rgb = malloc(encoding_pixels * 3); // Reference frame buffer + enc->previous_even_field = malloc(encoding_pixels * 3); // Previous even field for interlaced scene change enc->y_workspace = malloc(16 * 16 * sizeof(float)); enc->co_workspace = malloc(8 * 8 * sizeof(float)); @@ -1504,6 +1465,7 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) { enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE); if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb || + !enc->previous_even_field || !enc->y_workspace || !enc->co_workspace || !enc->cg_workspace || !enc->dct_workspace || !enc->block_data || !enc->compressed_buffer || !enc->mp2_buffer) { @@ -1523,7 +1485,8 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) { ZSTD_CCtx_setParameter(enc->zstd_context, ZSTD_c_hashLog, 16); // Initialize previous frame to black - memset(enc->previous_rgb, 0, pixels * 3); + memset(enc->previous_rgb, 0, encoding_pixels * 3); + memset(enc->previous_even_field, 0, encoding_pixels * 3); return 1; } @@ -1540,6 +1503,7 @@ static void free_encoder(tev_encoder_t *enc) { if (enc->current_rgb) { free(enc->current_rgb); enc->current_rgb = NULL; } if (enc->previous_rgb) { free(enc->previous_rgb); enc->previous_rgb = NULL; } if (enc->reference_rgb) { free(enc->reference_rgb); enc->reference_rgb = NULL; } + if (enc->previous_even_field) { free(enc->previous_even_field); enc->previous_even_field = NULL; } if (enc->y_workspace) { free(enc->y_workspace); enc->y_workspace = NULL; } if (enc->co_workspace) { free(enc->co_workspace); enc->co_workspace = NULL; } if (enc->cg_workspace) { free(enc->cg_workspace); enc->cg_workspace = NULL; } @@ -1551,6 +1515,7 @@ static void free_encoder(tev_encoder_t *enc) { } // Write TEV header + static int write_tev_header(FILE *output, tev_encoder_t *enc) { // Magic + version fwrite(TEV_MAGIC, 1, 8, output); @@ -1559,14 +1524,15 @@ static int write_tev_header(FILE *output, tev_encoder_t *enc) { // Video parameters uint16_t width = enc->width; - uint16_t height = enc->height; + uint16_t height = enc->progressive_mode ? enc->height : enc->height * 2; uint8_t fps = enc->fps; uint32_t total_frames = enc->total_frames; uint8_t qualityY = enc->qualityY; uint8_t qualityCo = enc->qualityCo; uint8_t qualityCg = enc->qualityCg; uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1); - uint16_t unused = 0; + uint8_t video_flags = enc->progressive_mode ? 0 : 1; // bit 0 = is_interlaced (inverted from progressive) + uint8_t reserved = 0; fwrite(&width, 2, 1, output); fwrite(&height, 2, 1, output); @@ -1576,29 +1542,46 @@ static int write_tev_header(FILE *output, tev_encoder_t *enc) { fwrite(&qualityCo, 1, 1, output); fwrite(&qualityCg, 1, 1, output); fwrite(&flags, 1, 1, output); - fwrite(&unused, 2, 1, output); + fwrite(&video_flags, 1, 1, output); + fwrite(&reserved, 1, 1, output); return 0; } // Detect scene changes by analyzing frame differences -static int detect_scene_change(tev_encoder_t *enc) { - if (!enc->previous_rgb || !enc->current_rgb) { - return 0; // No previous frame to compare +static int detect_scene_change(tev_encoder_t *enc, int field_parity) { + if (!enc->current_rgb) { + return 0; // No current frame to compare + } + + // In interlaced mode, use previous even field for comparison + uint8_t *comparison_buffer = enc->previous_rgb; + if (!enc->progressive_mode && field_parity == 0) { + // Interlaced even field: compare to previous even field + if (!enc->previous_even_field) { + return 0; // No previous even field to compare + } + comparison_buffer = enc->previous_even_field; + } else { + // Progressive mode: use regular previous_rgb + if (!enc->previous_rgb) { + return 0; // No previous frame to compare + } + comparison_buffer = enc->previous_rgb; } long long total_diff = 0; int changed_pixels = 0; - + // Sample every 4th pixel for performance (still gives good detection) for (int y = 0; y < enc->height; y += 2) { for (int x = 0; x < enc->width; x += 2) { int offset = (y * enc->width + x) * 3; // Calculate color difference - int r_diff = abs(enc->current_rgb[offset] - enc->previous_rgb[offset]); - int g_diff = abs(enc->current_rgb[offset + 1] - enc->previous_rgb[offset + 1]); - int b_diff = abs(enc->current_rgb[offset + 2] - enc->previous_rgb[offset + 2]); + int r_diff = abs(enc->current_rgb[offset] - comparison_buffer[offset]); + int g_diff = abs(enc->current_rgb[offset + 1] - comparison_buffer[offset + 1]); + int b_diff = abs(enc->current_rgb[offset + 2] - comparison_buffer[offset + 2]); int pixel_diff = r_diff + g_diff + b_diff; total_diff += pixel_diff; @@ -1614,17 +1597,26 @@ static int detect_scene_change(tev_encoder_t *enc) { int sampled_pixels = (enc->height / 2) * (enc->width / 2); double avg_diff = (double)total_diff / sampled_pixels; double changed_ratio = (double)changed_pixels / sampled_pixels; + + if (enc->verbose) { + printf("Scene change detection: avg_diff=%.2f\tchanged_ratio=%.4f\n", avg_diff, changed_ratio); + } + + // Scene change thresholds - adjust for interlaced mode + // Interlaced fields have more natural differences due to temporal field separation + double threshold = 0.30; - // Scene change thresholds: - // - High average difference (> 40) OR - // - Large percentage of changed pixels (> 30%) - return (avg_diff > 40.0) || (changed_ratio > 0.30); + return changed_ratio > threshold; } // Encode and write a frame -static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) { - // Check for scene change or time-based keyframe - int is_scene_change = detect_scene_change(enc); +static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int field_parity) { + // In interlaced mode, only do scene change detection for even fields (field_parity = 0) + // to avoid false scene changes between fields of the same frame + int is_scene_change = 0; + if (enc->progressive_mode || field_parity == 0) { + is_scene_change = detect_scene_change(enc, field_parity); + } int is_time_keyframe = (frame_num % KEYFRAME_INTERVAL) == 0; int is_keyframe = is_time_keyframe || is_scene_change; @@ -1692,6 +1684,13 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) { // No global rate control needed - per-block complexity-based control only // Swap frame buffers for next frame + if (!enc->progressive_mode && field_parity == 0) { + // Interlaced even field: save to previous_even_field for scene change detection + size_t field_size = enc->width * enc->height * 3; + memcpy(enc->previous_even_field, enc->current_rgb, field_size); + } + + // Normal buffer swap for motion estimation uint8_t *temp_rgb = enc->previous_rgb; enc->previous_rgb = enc->current_rgb; enc->current_rgb = temp_rgb; @@ -1782,7 +1781,8 @@ static int get_video_metadata(tev_encoder_t *config) { fprintf(stderr, " FPS: %d\n", config->fps); fprintf(stderr, " Duration: %.2fs\n", config->duration); fprintf(stderr, " Audio: %s\n", config->has_audio ? "Yes" : "No"); - fprintf(stderr, " Resolution: %dx%d\n", config->width, config->height); + fprintf(stderr, " Resolution: %dx%d (%s)\n", config->width, config->height, + config->progressive_mode ? "progressive" : "interlaced"); return (config->total_frames > 0 && config->fps > 0); } @@ -1792,20 +1792,39 @@ static int start_video_conversion(tev_encoder_t *enc) { char command[2048]; // Build FFmpeg command with potential frame rate conversion - if (enc->output_fps > 0 && enc->output_fps != enc->fps) { - // Frame rate conversion requested - snprintf(command, sizeof(command), - "ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 " - "-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " - "-y - 2>&1", - enc->input_file, enc->output_fps, enc->width, enc->height, enc->width, enc->height); + if (enc->progressive_mode) { + if (enc->output_fps > 0 && enc->output_fps != enc->fps) { + // Frame rate conversion requested + snprintf(command, sizeof(command), + "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " + "-y - 2>&1", + enc->input_file, enc->output_fps, enc->width, enc->height, enc->width, enc->height); + } else { + // No frame rate conversion + snprintf(command, sizeof(command), + "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " + "-y -", + enc->input_file, enc->width, enc->height, enc->width, enc->height); + } + // let FFmpeg handle the interlacing } else { - // No frame rate conversion - snprintf(command, sizeof(command), - "ffmpeg -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 " - "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" " - "-y -", - enc->input_file, enc->width, enc->height, enc->width, enc->height); + if (enc->output_fps > 0 && enc->output_fps != enc->fps) { + // Frame rate conversion requested + snprintf(command, sizeof(command), + "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"fps=%d,scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,tinterlace=interleave_top,separatefields\" " + "-y - 2>&1", + enc->input_file, enc->output_fps, enc->width, enc->height * 2, enc->width, enc->height * 2); + } else { + // No frame rate conversion + snprintf(command, sizeof(command), + "ffmpeg -v error -i \"%s\" -f rawvideo -pix_fmt rgb24 " + "-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,tinterlace=interleave_top,separatefields\" " + "-y -", + enc->input_file, enc->width, enc->height * 2, enc->width, enc->height * 2); + } } if (enc->verbose) { @@ -1981,6 +2000,7 @@ static void show_usage(const char *program_name) { printf(" -q, --quality N Quality level 0-4 (default: 2, only decides audio rate in bitrate mode and quantiser mode)\n"); printf(" -Q, --quantiser N Quantiser level 0-100 (100: lossless, 0: potato)\n"); // printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode; DON'T USE - NOT WORKING AS INTENDED)\n"); + printf(" -p, --progressive Use progressive scan (default: interlaced)\n"); printf(" -v, --verbose Verbose output\n"); printf(" -t, --test Test mode: generate solid colour frames\n"); printf(" --help Show this help\n\n"); @@ -2062,6 +2082,7 @@ int main(int argc, char *argv[]) { {"quantiser", required_argument, 0, 'Q'}, {"quantizer", required_argument, 0, 'Q'}, {"bitrate", required_argument, 0, 'b'}, + {"progressive", no_argument, 0, 'p'}, {"verbose", no_argument, 0, 'v'}, {"test", no_argument, 0, 't'}, {"help", no_argument, 0, '?'}, @@ -2071,7 +2092,7 @@ int main(int argc, char *argv[]) { int option_index = 0; int c; - while ((c = getopt_long(argc, argv, "i:o:s:w:h:f:q:b:Q:vt", long_options, &option_index)) != -1) { + while ((c = getopt_long(argc, argv, "i:o:s:w:h:f:q:b:Q:pvt", long_options, &option_index)) != -1) { switch (c) { case 'i': enc->input_file = strdup(optarg); @@ -2109,6 +2130,9 @@ int main(int argc, char *argv[]) { enc->bitrate_mode = 1; // Enable bitrate control } break; + case 'p': + enc->progressive_mode = 1; + break; case 'v': enc->verbose = 1; break; @@ -2134,6 +2158,11 @@ int main(int argc, char *argv[]) { } } + // halve the internal representation of frame height + if (!enc->progressive_mode) { + enc->height /= 2; + } + if (!test_mode && (!enc->input_file || !enc->output_file)) { fprintf(stderr, "Input and output files are required (unless using --test mode)\n"); show_usage(argv[0]); @@ -2278,7 +2307,9 @@ int main(int argc, char *argv[]) { } else { // Read RGB data directly from FFmpeg pipe - size_t rgb_size = enc->width * enc->height * 3; + // height-halving is already done on the encoder initialisation + int frame_height = enc->height; + size_t rgb_size = enc->width * frame_height * 3; size_t bytes_read = fread(enc->current_rgb, 1, rgb_size, enc->ffmpeg_video_pipe); if (bytes_read != rgb_size) { @@ -2293,6 +2324,10 @@ int main(int argc, char *argv[]) { } break; // End of video or error } + + // In interlaced mode, FFmpeg separatefields filter already provides field-separated frames + // Each frame from FFmpeg is now a single field at half height + // Frame parity: even frames (0,2,4...) = bottom fields, odd frames (1,3,5...) = top fields } // Process audio for this frame @@ -2302,7 +2337,9 @@ int main(int argc, char *argv[]) { process_subtitles(enc, frame_count, output); // Encode frame - if (!encode_frame(enc, output, frame_count)) { + // Pass field parity for interlaced mode, -1 for progressive mode + int frame_field_parity = enc->progressive_mode ? -1 : (frame_count % 2); + if (!encode_frame(enc, output, frame_count, frame_field_parity)) { fprintf(stderr, "Failed to encode frame %d\n", frame_count); break; }