monoblock TAV

2026-06-06 05:28:31 +09:00 · 2025-09-18 00:07:03 +09:00
parent f4b03b55b6
commit 35f1a0c2f2
3 changed files with 44 additions and 40 deletions
--- a/terranmon.txt
+++ b/terranmon.txt
@@ -806,12 +806,6 @@ similar to JPEG2000, providing superior compression efficiency and scalability c
 to DCT-based codecs like TEV. Features include multi-resolution encoding, progressive
 transmission capability, and region-of-interest coding.

-## Version History
- Version 1.0: Initial DWT-based implementation with 5/3 reversible filter
- Version 1.1: Added 9/7 irreversible filter for higher compression
- Version 1.2: Multi-resolution pyramid encoding with up to 4 decomposition levels
- Version 1.3: Optimized 112x112 tiles for TSVM resolution with up to 6 decomposition levels
-
 # File Structure
 \x1F T S V M T A V
 [HEADER]
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -184,12 +184,15 @@ typedef struct {
    int intra_only;       // Force all tiles to use INTRA mode (disable delta encoding)
    int monoblock;        // Single DWT tile mode (encode entire frame as one tile)
    
-    // Frame buffers
+    // Frame buffers - ping-pong implementation
+    uint8_t *frame_rgb[2];      // [0] and [1] alternate between current and previous
+    int frame_buffer_index;     // 0 or 1, indicates which set is "current"
+    float *current_frame_y, *current_frame_co, *current_frame_cg;
+
+    // Convenience pointers (updated each frame to point to current ping-pong buffers)
    uint8_t *current_frame_rgb;
    uint8_t *previous_frame_rgb;
-    float *current_frame_y, *current_frame_co, *current_frame_cg;
-    float *previous_frame_y, *previous_frame_co, *previous_frame_cg;
-    
+
    // Tile processing
    int tiles_x, tiles_y;
    dwt_tile_t *tiles;
@@ -235,6 +238,16 @@ typedef struct {

 // Wavelet filter constants removed - using lifting scheme implementation instead

+// Swap ping-pong frame buffers (eliminates need for memcpy)
+static void swap_frame_buffers(tav_encoder_t *enc) {
+    // Flip the buffer index
+    enc->frame_buffer_index = 1 - enc->frame_buffer_index;
+
+    // Update convenience pointers to point to the new current/previous buffers
+    enc->current_frame_rgb = enc->frame_rgb[enc->frame_buffer_index];
+    enc->previous_frame_rgb = enc->frame_rgb[1 - enc->frame_buffer_index];
+}
+
 // Parse resolution string like "1024x768" with keyword recognition
 static int parse_resolution(const char *res_str, int *width, int *height) {
    if (!res_str) return 0;
@@ -293,14 +306,14 @@ static void show_usage(const char *program_name) {
    printf("  -s, --size WxH          Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
    printf("  -f, --fps N             Output frames per second (enables frame rate conversion)\n");
    printf("  -q, --quality N         Quality level 0-5 (default: 2)\n");
-    printf("  -Q, --quantiser Y,Co,Cg Quantiser levels 0-100 for each channel\n");
+    printf("  -Q, --quantiser Y,Co,Cg Quantiser levels 0-255 for each channel (1: lossless, 255: potato)\n");
 //    printf("  -w, --wavelet N         Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n");
 //    printf("  -b, --bitrate N         Target bitrate in kbps (enables bitrate control mode)\n");
    printf("  -S, --subtitles FILE    SubRip (.srt) or SAMI (.smi) subtitle file\n");
    printf("  -v, --verbose           Verbose output\n");
    printf("  -t, --test              Test mode: generate solid colour frames\n");
    printf("  --lossless              Lossless mode: use 5/3 reversible wavelet\n");
-    printf("  --delta-code            Enable delta encoding (improved compression but noisy picture)\n");
+    printf("  --delta                 Enable delta encoding (improved compression but noisy picture)\n");
    printf("  --ictcp                 Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
    printf("  --help                  Show this help\n\n");
    
@@ -321,15 +334,20 @@ static void show_usage(const char *program_name) {
    for (int i = 0; i < 6; i++) {
        printf("%d: Q %d  \t", i, QUALITY_CG[i]);
    }
-    
-    printf("\n\nFeatures:\n");
+    printf("\n\nVideo Size Keywords:");
+    printf("\n  -s cif: equal to 352x288");
+    printf("\n  -s qcif: equal to 176x144");
+    printf("\n  -s half: equal to %dx%d", DEFAULT_WIDTH >> 1, DEFAULT_HEIGHT >> 1);
+    printf("\n  -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT);
+    printf("\n\n");
+    printf("Features:\n");
    printf("  - Single DWT tile (monoblock) encoding for optimal quality\n");
    printf("  - Full resolution YCoCg-R/ICtCp colour space\n");
    printf("  - Lossless and lossy compression modes\n");
    
    printf("\nExamples:\n");
    printf("  %s -i input.mp4 -o output.mv3               # Default settings\n", program_name);
-    printf("  %s -i input.mkv -q 4 -w 1 -o output.mv3     # Maximum quality with 9/7 wavelet\n", program_name);
+    printf("  %s -i input.mkv -q 4 -o output.mv3          # At maximum quality\n", program_name);
    printf("  %s -i input.avi --lossless -o output.mv3    # Lossless encoding\n", program_name);
 //    printf("  %s -i input.mp4 -b 800 -o output.mv3        # 800 kbps bitrate target\n", program_name);
    printf("  %s -i input.webm -S subs.srt -o output.mv3  # With subtitles\n", program_name);
@@ -377,16 +395,18 @@ static int initialize_encoder(tav_encoder_t *enc) {
    }
    int num_tiles = enc->tiles_x * enc->tiles_y;
    
-    // Allocate frame buffers
+    // Allocate ping-pong frame buffers
    size_t frame_size = enc->width * enc->height;
-    enc->current_frame_rgb = malloc(frame_size * 3);
-    enc->previous_frame_rgb = malloc(frame_size * 3);
+    enc->frame_rgb[0] = malloc(frame_size * 3);
+    enc->frame_rgb[1] = malloc(frame_size * 3);
+
+    // Initialize ping-pong buffer index and convenience pointers
+    enc->frame_buffer_index = 0;
+    enc->current_frame_rgb = enc->frame_rgb[0];
+    enc->previous_frame_rgb = enc->frame_rgb[1];
    enc->current_frame_y = malloc(frame_size * sizeof(float));
    enc->current_frame_co = malloc(frame_size * sizeof(float));
    enc->current_frame_cg = malloc(frame_size * sizeof(float));
-    enc->previous_frame_y = malloc(frame_size * sizeof(float));
-    enc->previous_frame_co = malloc(frame_size * sizeof(float));
-    enc->previous_frame_cg = malloc(frame_size * sizeof(float));
    
    // Allocate tile structures
    enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t));
@@ -423,9 +443,8 @@ static int initialize_encoder(tav_encoder_t *enc) {
    enc->previous_coeffs_cg = malloc(total_coeff_size);
    enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame
    
-    if (!enc->current_frame_rgb || !enc->previous_frame_rgb || 
+    if (!enc->frame_rgb[0] || !enc->frame_rgb[1] ||
        !enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg ||
-        !enc->previous_frame_y || !enc->previous_frame_co || !enc->previous_frame_cg ||
        !enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer ||
        !enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg ||
        !enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) {
@@ -2180,13 +2199,14 @@ int main(int argc, char *argv[]) {
        {"quantizer", required_argument, 0, 'Q'},
 //        {"wavelet", required_argument, 0, 'w'},
        {"bitrate", required_argument, 0, 'b'},
+        {"subtitle", required_argument, 0, 'S'},
        {"subtitles", required_argument, 0, 'S'},
        {"verbose", no_argument, 0, 'v'},
        {"test", no_argument, 0, 't'},
        {"lossless", no_argument, 0, 1000},
-        {"delta-code", no_argument, 0, 1006},
+        {"delta", no_argument, 0, 1006},
        {"ictcp", no_argument, 0, 1005},
-        {"help", no_argument, 0, 1004},
+        {"help", no_argument, 0, '?'},
        {0, 0, 0, 0}
    };
    
@@ -2509,13 +2529,8 @@ int main(int argc, char *argv[]) {
                count_pframe++;
        }
        
-        // Copy current frame to previous frame buffer
-        size_t float_frame_size = enc->width * enc->height * sizeof(float);
-        size_t rgb_frame_size = enc->width * enc->height * 3;
-        memcpy(enc->previous_frame_y, enc->current_frame_y, float_frame_size);
-        memcpy(enc->previous_frame_co, enc->current_frame_co, float_frame_size);
-        memcpy(enc->previous_frame_cg, enc->current_frame_cg, float_frame_size);
-        memcpy(enc->previous_frame_rgb, enc->current_frame_rgb, rgb_frame_size);
+        // Swap ping-pong buffers (eliminates memcpy operations)
+        swap_frame_buffers(enc);
        
        frame_count++;
        enc->frame_count = frame_count;
@@ -2586,14 +2601,8 @@ static void cleanup_encoder(tav_encoder_t *enc) {
    free(enc->input_file);
    free(enc->output_file);
    free(enc->subtitle_file);
-    free(enc->current_frame_rgb);
-    free(enc->previous_frame_rgb);
-    free(enc->current_frame_y);
-    free(enc->current_frame_co);
-    free(enc->current_frame_cg);
-    free(enc->previous_frame_y);
-    free(enc->previous_frame_co);
-    free(enc->previous_frame_cg);
+    free(enc->frame_rgb[0]);
+    free(enc->frame_rgb[1]);
    free(enc->tiles);
    free(enc->compressed_buffer);
    free(enc->mp2_buffer);
--- a/video_encoder/encoder_tev.c
+++ b/video_encoder/encoder_tev.c
@@ -2626,6 +2626,7 @@ int main(int argc, char *argv[]) {
        {"input", required_argument, 0, 'i'},
        {"output", required_argument, 0, 'o'},
        {"size", required_argument, 0, 's'},
+        {"subtitle", required_argument, 0, 'S'},
        {"subtitles", required_argument, 0, 'S'},
        {"fps", required_argument, 0, 'f'},
        {"quality", required_argument, 0, 'q'},