tavenc: fixed malformed intra frame packets

2026-03-07 11:51:49 +09:00 · 2025-12-09 17:36:09 +09:00
parent 50092aef60
commit f1d1e36164
1 changed files with 16 additions and 217 deletions
--- a/video_encoder/lib/libtavenc/tav_encoder_lib.c
+++ b/video_encoder/lib/libtavenc/tav_encoder_lib.c
@@ -31,6 +31,8 @@
 #define ENCODER_VERSION "TAV Encoder Library v1.0"
 #define MAX_ERROR_MESSAGE 256
 #define GOP_SIZE_MAX 24
 // GOP status values
 #define GOP_STATUS_EMPTY      0
 #define GOP_STATUS_FILLING    1
@@ -530,9 +532,6 @@ tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params) {
        return NULL;
    }
    // TODO: Initialize thread pool if multi-threaded
    // (Thread pool implementation deferred - requires extracting worker logic)
    if (ctx->verbose) {
        printf("%s created:\n", ENCODER_VERSION);
        printf("  Resolution: %dx%d @ %d/%d fps\n",
@@ -618,9 +617,9 @@ int tav_encoder_validate_context(tav_encoder_context_t *ctx) {
    if (!ctx) return 0;
    // Basic sanity checks
-    if (ctx->width < 16 || ctx->width > 8192) return 0;
+    if (ctx->width < 16 || ctx->width > 16777215) return 0;
-    if (ctx->height < 16 || ctx->height > 8192) return 0;
+    if (ctx->height < 16 || ctx->height > 16777215) return 0;
-    if (ctx->gop_size < 1 || ctx->gop_size > 48) return 0;
+    if (ctx->gop_size < 1 || ctx->gop_size > GOP_SIZE_MAX) return 0;
    return 1;
 }
@@ -655,217 +654,10 @@ void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stat
    }
 }
 // =============================================================================
 // Frame Encoding - DEPRECATED, use tav_encoder_encode_gop() instead
 // =============================================================================
 /*
 * tav_encoder_encode_frame() is deprecated and will be removed.
 * Use tav_encoder_encode_gop() which works for both single-threaded and
 * multi-threaded modes. The CLI should buffer frames and call encode_gop()
 * when a full GOP is ready.
 */
 #if 0  // DEPRECATED - kept for reference, will be deleted
 int tav_encoder_encode_frame(tav_encoder_context_t *ctx,
                              const uint8_t *rgb_frame,
                              int64_t frame_pts,
                              tav_encoder_packet_t **packet) {
    if (!ctx || !rgb_frame || !packet) {
        if (ctx) {
            snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Invalid parameters");
        }
        return -1;
    }
    *packet = NULL;  // No packet until GOP is complete
    // Single-threaded implementation: buffer frames until GOP full
    if (ctx->num_threads == 0) {
        // Copy RGB frame to GOP buffer
        size_t frame_size = ctx->width * ctx->height * 3;
        memcpy(ctx->gop_rgb_frames[ctx->gop_frame_count], rgb_frame, frame_size);
        ctx->gop_frame_pts[ctx->gop_frame_count] = frame_pts;
        ctx->gop_frame_count++;
        // Check if GOP is full
        if (ctx->gop_frame_count >= ctx->gop_size) {
            // Create temporary GOP slot
            gop_slot_t slot = {0};
            slot.rgb_frames = ctx->gop_rgb_frames;
            slot.num_frames = ctx->gop_frame_count;
            slot.frame_numbers = tav_calloc(ctx->gop_frame_count, sizeof(int));
            for (int i = 0; i < ctx->gop_frame_count; i++) {
                slot.frame_numbers[i] = (int)(ctx->frames_encoded + i);
            }
            slot.width = ctx->width;
            slot.height = ctx->height;
            // Encode GOP
            int result;
            if (ctx->enable_temporal_dwt) {
                result = encode_gop_unified(ctx, &slot);
            } else {
                result = encode_gop_intra_only(ctx, &slot);
            }
            free(slot.frame_numbers);
            if (result < 0) {
                // Error message already set by encoding function
                return -1;
            }
            // Extract packets from slot
            if (slot.num_packets > 0) {
                *packet = &slot.packets[0];
            }
            // Update statistics
            ctx->frames_encoded += ctx->gop_frame_count;
            ctx->gops_encoded++;
            ctx->video_bytes += slot.packets[0].size;
            ctx->total_bytes += slot.packets[0].size;
            // Reset GOP buffer
            ctx->gop_frame_count = 0;
            return 1;  // Packet ready
        }
        return 0;  // Buffering, no packet yet
    }
    // Multi-threaded implementation
    // TODO: Submit frame to thread pool
    snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
             "Multi-threaded encoding not yet implemented");
    return -1;
 }
 #endif  // DEPRECATED
 // =============================================================================
 // Flush Encoder - DEPRECATED, CLI handles partial GOPs directly
 // =============================================================================
 /*
 * tav_encoder_flush() is deprecated and will be removed.
 * The CLI should track remaining frames and call tav_encoder_encode_gop()
 * directly for partial GOPs at the end of encoding.
 */
 #if 0  // DEPRECATED - kept for reference, will be deleted
 int tav_encoder_flush(tav_encoder_context_t *ctx,
                      tav_encoder_packet_t **packet) {
    if (!ctx || !packet) {
        if (ctx) {
            snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Invalid parameters");
        }
        return -1;
    }
    *packet = NULL;
    // Encode any remaining frames in GOP buffer
    if (ctx->num_threads == 0 && ctx->gop_frame_count > 0) {
        // Create temporary GOP slot for partial GOP
        gop_slot_t slot = {0};
        slot.rgb_frames = ctx->gop_rgb_frames;
        slot.num_frames = ctx->gop_frame_count;
        slot.frame_numbers = tav_calloc(ctx->gop_frame_count, sizeof(int));
        for (int i = 0; i < ctx->gop_frame_count; i++) {
            slot.frame_numbers[i] = (int)(ctx->frames_encoded + i);
        }
        slot.width = ctx->width;
        slot.height = ctx->height;
        int result;
        // For partial GOPs: use unified mode if temporal DWT enabled and >1 frame,
        // otherwise encode as I-frames one at a time
        if (ctx->enable_temporal_dwt && ctx->gop_frame_count > 1) {
            result = encode_gop_unified(ctx, &slot);
        } else if (ctx->gop_frame_count == 1) {
            result = encode_gop_intra_only(ctx, &slot);
        } else {
            // Encode each frame separately as I-frame
            // TODO: This is inefficient - should encode them in a batch
            // For now, just encode the first frame
            gop_slot_t single_slot = {0};
            single_slot.rgb_frames = malloc(sizeof(uint8_t*));
            single_slot.rgb_frames[0] = ctx->gop_rgb_frames[0];
            single_slot.num_frames = 1;
            single_slot.frame_numbers = malloc(sizeof(int));
            single_slot.frame_numbers[0] = (int)ctx->frames_encoded;
            single_slot.width = ctx->width;
            single_slot.height = ctx->height;
            result = encode_gop_intra_only(ctx, &single_slot);
            if (result == 0 && single_slot.num_packets > 0) {
                // Copy packet pointer
                slot.packets = single_slot.packets;
                slot.num_packets = single_slot.num_packets;
                // Don't free single_slot.packets - we transferred ownership
            }
            free(single_slot.rgb_frames);
            free(single_slot.frame_numbers);
            // Mark only 1 frame as encoded (we'll call flush again for others)
            ctx->gop_frame_count--;
            // Shift remaining frames down
            for (int i = 0; i < ctx->gop_frame_count; i++) {
                ctx->gop_rgb_frames[i] = ctx->gop_rgb_frames[i+1];
            }
        }
        free(slot.frame_numbers);
        if (result < 0) {
            // Error message already set by encoding function
            return -1;
        }
        // Extract packets from slot
        if (slot.num_packets > 0) {
            *packet = slot.packets;  // Transfer ownership to caller
        }
        // Update statistics (only for frames actually encoded)
        int frames_in_packet = (ctx->enable_temporal_dwt || ctx->gop_frame_count == 1)
                              ? slot.num_frames : 1;
        ctx->frames_encoded += frames_in_packet;
        ctx->gops_encoded++;
        if (slot.num_packets > 0) {
            ctx->video_bytes += slot.packets[0].size;
            ctx->total_bytes += slot.packets[0].size;
        }
        // Reset GOP buffer if we encoded everything
        if (!ctx->enable_temporal_dwt && ctx->gop_frame_count > 0) {
            // Still have frames to encode - return 1 to continue flushing
            return 1;
        }
        ctx->gop_frame_count = 0;
        return 1;  // Packet ready
    }
    // Multi-threaded: wait for all pending GOPs to complete
    if (ctx->num_threads > 0) {
        // TODO: Flush thread pool
        snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
                 "Multi-threaded flush not yet implemented");
        return -1;
    }
    return 0;  // No more packets
 }
 #endif  // DEPRECATED
 void tav_encoder_free_packet(tav_encoder_packet_t *packet) {
    if (!packet) return;
@@ -891,9 +683,9 @@ int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
        return -1;
    }
-    if (num_frames < 1 || num_frames > 24) {
+    if (num_frames < 1 || num_frames > GOP_SIZE_MAX) {
        snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
-                 "Invalid GOP size: %d (must be 1-24)", num_frames);
+                 "Invalid GOP size: %d (must be 1-%d)", num_frames, GOP_SIZE_MAX);
        return -1;
    }
@@ -1406,6 +1198,12 @@ static int encode_gop_intra_only(tav_encoder_context_t *ctx, gop_slot_t *slot) {
        int16_t *quant_co = tav_calloc(num_pixels, sizeof(int16_t));
        int16_t *quant_cg = tav_calloc(num_pixels, sizeof(int16_t));
        // Write tile header: [mode(1)][qY_override(1)][qCo_override(1)][qCg_override(1)]
        preprocess_buffer[preprocess_offset++] = 0x01;  // TAV_MODE_INTRA
        preprocess_buffer[preprocess_offset++] = 0;     // qY override (0 = use header)
        preprocess_buffer[preprocess_offset++] = 0;     // qCo override
        preprocess_buffer[preprocess_offset++] = 0;     // qCg override
        // Apply 2D DWT to full frame
        tav_dwt_2d_forward(frame_y, width, height, ctx->decomp_levels, ctx->wavelet_type);
        tav_dwt_2d_forward(frame_co, width, height, ctx->decomp_levels, ctx->wavelet_type);
@@ -1432,11 +1230,12 @@ static int encode_gop_intra_only(tav_encoder_context_t *ctx, gop_slot_t *slot) {
        }
        // EZBC encode
-        preprocess_offset = preprocess_coefficients_ezbc(
+        size_t tile_size = preprocess_coefficients_ezbc(
            quant_y, quant_co, quant_cg, NULL,
            num_pixels, width, height, ctx->channel_layout,
-            preprocess_buffer
+            preprocess_buffer + preprocess_offset
        );
        preprocess_offset += tile_size;
        free(quant_y); free(quant_co); free(quant_cg);