diff --git a/video_encoder/Makefile b/video_encoder/Makefile index 1c95f69..f115b3d 100644 --- a/video_encoder/Makefile +++ b/video_encoder/Makefile @@ -27,7 +27,8 @@ LIBTAVENC_OBJ = lib/libtavenc/tav_encoder_lib.o \ lib/libtavenc/tav_encoder_dwt.o \ lib/libtavenc/tav_encoder_quantize.o \ lib/libtavenc/tav_encoder_ezbc.o \ - lib/libtavenc/tav_encoder_utils.o + lib/libtavenc/tav_encoder_utils.o \ + lib/libtavenc/tav_encoder_tile.o # libtavdec - TAV decoder library LIBTAVDEC_OBJ = lib/libtavdec/tav_video_decoder.o diff --git a/video_encoder/include/tav_encoder_lib.h b/video_encoder/include/tav_encoder_lib.h index 1ec11e9..57fb0fb 100644 --- a/video_encoder/include/tav_encoder_lib.h +++ b/video_encoder/include/tav_encoder_lib.h @@ -153,38 +153,12 @@ int tav_encoder_validate_context(tav_encoder_context_t *ctx); // Video Encoding // ============================================================================= -/** - * Encode a single RGB24 frame. - * - * Frames are buffered internally until a GOP is full, then encoded and returned. - * For GOP encoding: returns NULL until GOP is complete. - * For intra-only: returns packet immediately. - * - * Thread-safety: NOT thread-safe. Caller must serialize calls to encode_frame(). - * - * @param ctx Encoder context - * @param rgb_frame RGB24 frame data (planar: [R...][G...][B...]), width×height×3 bytes - * @param frame_pts Presentation timestamp (frame number or time) - * @param packet Output packet pointer (NULL if GOP not yet complete) - * @return 1 if packet ready, 0 if buffering for GOP, -1 on error +/* + * DEPRECATED: tav_encoder_encode_frame() and tav_encoder_flush() have been + * removed. Use tav_encoder_encode_gop() instead, which works for both + * single-threaded and multi-threaded modes. The CLI should buffer frames + * and call encode_gop() when a full GOP is ready. */ -int tav_encoder_encode_frame(tav_encoder_context_t *ctx, - const uint8_t *rgb_frame, - int64_t frame_pts, - tav_encoder_packet_t **packet); - -/** - * Flush encoder and encode any remaining buffered frames. - * - * Call at end of encoding to output final GOP (even if not full). - * Returns packets one at a time through repeated calls. - * - * @param ctx Encoder context - * @param packet Output packet pointer (NULL when no more packets) - * @return 1 if packet ready, 0 if no more packets, -1 on error - */ -int tav_encoder_flush(tav_encoder_context_t *ctx, - tav_encoder_packet_t **packet); /** * Encode a complete GOP (Group of Pictures) directly. diff --git a/video_encoder/lib/libtavenc/tav_encoder_lib.c b/video_encoder/lib/libtavenc/tav_encoder_lib.c index 521f477..1320d8c 100644 --- a/video_encoder/lib/libtavenc/tav_encoder_lib.c +++ b/video_encoder/lib/libtavenc/tav_encoder_lib.c @@ -13,6 +13,7 @@ #include "tav_encoder_quantize.h" #include "tav_encoder_ezbc.h" #include "tav_encoder_utils.h" +#include "tav_encoder_tile.h" #include "encoder_tad.h" #include @@ -390,26 +391,26 @@ tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params) { // Auto mode: use monoblock for <= D1 PAL, tiled for larger if (ctx->width > TAV_MONOBLOCK_MAX_WIDTH || ctx->height > TAV_MONOBLOCK_MAX_HEIGHT) { ctx->monoblock = 0; - if (ctx->verbose) { +// if (ctx->verbose) { printf("Auto-selected Padded Tiling mode: %dx%d exceeds D1 PAL threshold (%dx%d)\n", ctx->width, ctx->height, TAV_MONOBLOCK_MAX_WIDTH, TAV_MONOBLOCK_MAX_HEIGHT); - } +// } } else { ctx->monoblock = 1; - if (ctx->verbose) { +// if (ctx->verbose) { printf("Auto-selected Monoblock mode: %dx%d within D1 PAL threshold\n", ctx->width, ctx->height); - } +// } } } else if (ctx->monoblock == 0) { - if (ctx->verbose) { +// if (ctx->verbose) { printf("Forced Padded Tiling mode (--tiled)\n"); - } +// } } else { // monoblock == 1: force monoblock even for large dimensions - if (ctx->verbose) { +// if (ctx->verbose) { printf("Forced Monoblock mode (--monoblock)\n"); - } +// } } // Calculate tile dimensions based on monoblock setting @@ -421,10 +422,10 @@ tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params) { // Padded Tiling mode: multiple tiles of TILE_SIZE_X × TILE_SIZE_Y ctx->tiles_x = (ctx->width + TAV_TILE_SIZE_X - 1) / TAV_TILE_SIZE_X; ctx->tiles_y = (ctx->height + TAV_TILE_SIZE_Y - 1) / TAV_TILE_SIZE_Y; - if (ctx->verbose) { +// if (ctx->verbose) { printf("Padded Tiling mode: %dx%d tiles (%d total)\n", ctx->tiles_x, ctx->tiles_y, ctx->tiles_x * ctx->tiles_y); - } +// } } // Calculate decomp levels if auto (0) @@ -655,9 +656,17 @@ void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stat } // ============================================================================= -// Frame Encoding (Single-threaded implementation for now) +// Frame Encoding - DEPRECATED, use tav_encoder_encode_gop() instead // ============================================================================= +/* + * tav_encoder_encode_frame() is deprecated and will be removed. + * Use tav_encoder_encode_gop() which works for both single-threaded and + * multi-threaded modes. The CLI should buffer frames and call encode_gop() + * when a full GOP is ready. + */ + +#if 0 // DEPRECATED - kept for reference, will be deleted int tav_encoder_encode_frame(tav_encoder_context_t *ctx, const uint8_t *rgb_frame, int64_t frame_pts, @@ -733,11 +742,19 @@ int tav_encoder_encode_frame(tav_encoder_context_t *ctx, "Multi-threaded encoding not yet implemented"); return -1; } +#endif // DEPRECATED // ============================================================================= -// Flush Encoder +// Flush Encoder - DEPRECATED, CLI handles partial GOPs directly // ============================================================================= +/* + * tav_encoder_flush() is deprecated and will be removed. + * The CLI should track remaining frames and call tav_encoder_encode_gop() + * directly for partial GOPs at the end of encoding. + */ + +#if 0 // DEPRECATED - kept for reference, will be deleted int tav_encoder_flush(tav_encoder_context_t *ctx, tav_encoder_packet_t **packet) { if (!ctx || !packet) { @@ -847,6 +864,7 @@ int tav_encoder_flush(tav_encoder_context_t *ctx, return 0; // No more packets } +#endif // DEPRECATED void tav_encoder_free_packet(tav_encoder_packet_t *packet) { if (!packet) return; @@ -1359,72 +1377,167 @@ static int encode_gop_intra_only(tav_encoder_context_t *ctx, gop_slot_t *slot) { return -1; } - // Allocate work buffers for single frame - float *work_y = tav_calloc(num_pixels, sizeof(float)); - float *work_co = tav_calloc(num_pixels, sizeof(float)); - float *work_cg = tav_calloc(num_pixels, sizeof(float)); - int16_t *quant_y = tav_calloc(num_pixels, sizeof(int16_t)); - int16_t *quant_co = tav_calloc(num_pixels, sizeof(int16_t)); - int16_t *quant_cg = tav_calloc(num_pixels, sizeof(int16_t)); + // Step 1: RGB to YCoCg-R (or ICtCp) for full frame + float *frame_y = tav_calloc(num_pixels, sizeof(float)); + float *frame_co = tav_calloc(num_pixels, sizeof(float)); + float *frame_cg = tav_calloc(num_pixels, sizeof(float)); - // Step 1: RGB to YCoCg-R (or ICtCp) - rgb_to_colour_space_frame(ctx, slot->rgb_frames[0], work_y, work_co, work_cg, width, height); + rgb_to_colour_space_frame(ctx, slot->rgb_frames[0], frame_y, frame_co, frame_cg, width, height); - // Step 2: Apply 2D DWT - tav_dwt_2d_forward(work_y, width, height, ctx->decomp_levels, ctx->wavelet_type); - tav_dwt_2d_forward(work_co, width, height, ctx->decomp_levels, ctx->wavelet_type); - tav_dwt_2d_forward(work_cg, width, height, ctx->decomp_levels, ctx->wavelet_type); - - // Step 3: Quantize coefficients - // ctx->quantiser_y/co/cg contain QLUT indices, lookup actual quantiser values + // Get quantiser values from QLUT indices int base_quantiser_y = QLUT[ctx->quantiser_y]; int base_quantiser_co = QLUT[ctx->quantiser_co]; int base_quantiser_cg = QLUT[ctx->quantiser_cg]; - if (ctx->perceptual_tuning) { - tav_quantise_perceptual(ctx->compat_enc, work_y, quant_y, num_pixels, - base_quantiser_y, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 0, 0); - tav_quantise_perceptual(ctx->compat_enc, work_co, quant_co, num_pixels, - base_quantiser_co, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0); - tav_quantise_perceptual(ctx->compat_enc, work_cg, quant_cg, num_pixels, - base_quantiser_cg, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0); + // Allocate preprocess buffer for all tiles + // For tiled mode: num_tiles * (4-byte header + max_tile_coeff_size * 3 * sizeof(int16_t)) + // For monoblock: just the frame + const int tile_coeff_count = ctx->monoblock ? num_pixels : (TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y); + const int num_tiles = ctx->tiles_x * ctx->tiles_y; + size_t preprocess_capacity = num_tiles * (4 + tile_coeff_count * 3 * sizeof(int16_t) * 2); // Conservative with EZBC overhead + uint8_t *preprocess_buffer = tav_malloc(preprocess_capacity); + size_t preprocess_offset = 0; + + if (ctx->monoblock) { + // ====================================================================== + // Monoblock mode: process entire frame as single tile + // ====================================================================== + int16_t *quant_y = tav_calloc(num_pixels, sizeof(int16_t)); + int16_t *quant_co = tav_calloc(num_pixels, sizeof(int16_t)); + int16_t *quant_cg = tav_calloc(num_pixels, sizeof(int16_t)); + + // Apply 2D DWT to full frame + tav_dwt_2d_forward(frame_y, width, height, ctx->decomp_levels, ctx->wavelet_type); + tav_dwt_2d_forward(frame_co, width, height, ctx->decomp_levels, ctx->wavelet_type); + tav_dwt_2d_forward(frame_cg, width, height, ctx->decomp_levels, ctx->wavelet_type); + + // Quantize + if (ctx->perceptual_tuning) { + tav_quantise_perceptual(ctx->compat_enc, frame_y, quant_y, num_pixels, + base_quantiser_y, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 0, 0); + tav_quantise_perceptual(ctx->compat_enc, frame_co, quant_co, num_pixels, + base_quantiser_co, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0); + tav_quantise_perceptual(ctx->compat_enc, frame_cg, quant_cg, num_pixels, + base_quantiser_cg, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0); + } else { + tav_quantise_uniform(frame_y, quant_y, num_pixels, base_quantiser_y, + (float)ctx->dead_zone_threshold, width, height, + ctx->decomp_levels, 0); + tav_quantise_uniform(frame_co, quant_co, num_pixels, base_quantiser_co, + (float)ctx->dead_zone_threshold, width, height, + ctx->decomp_levels, 1); + tav_quantise_uniform(frame_cg, quant_cg, num_pixels, base_quantiser_cg, + (float)ctx->dead_zone_threshold, width, height, + ctx->decomp_levels, 1); + } + + // EZBC encode + preprocess_offset = preprocess_coefficients_ezbc( + quant_y, quant_co, quant_cg, NULL, + num_pixels, width, height, ctx->channel_layout, + preprocess_buffer + ); + + free(quant_y); free(quant_co); free(quant_cg); + } else { - tav_quantise_uniform(work_y, quant_y, num_pixels, base_quantiser_y, - (float)ctx->dead_zone_threshold, width, height, - ctx->decomp_levels, 0); - tav_quantise_uniform(work_co, quant_co, num_pixels, base_quantiser_co, - (float)ctx->dead_zone_threshold, width, height, - ctx->decomp_levels, 1); - tav_quantise_uniform(work_cg, quant_cg, num_pixels, base_quantiser_cg, - (float)ctx->dead_zone_threshold, width, height, - ctx->decomp_levels, 1); + // ====================================================================== + // Tiled mode: process each tile independently + // ====================================================================== + const int padded_pixels = TAV_PADDED_TILE_SIZE_X * TAV_PADDED_TILE_SIZE_Y; + + // Allocate reusable tile buffers + float *tile_y = tav_calloc(padded_pixels, sizeof(float)); + float *tile_co = tav_calloc(padded_pixels, sizeof(float)); + float *tile_cg = tav_calloc(padded_pixels, sizeof(float)); + int16_t *quant_y = tav_calloc(padded_pixels, sizeof(int16_t)); + int16_t *quant_co = tav_calloc(padded_pixels, sizeof(int16_t)); + int16_t *quant_cg = tav_calloc(padded_pixels, sizeof(int16_t)); + + for (int tile_y_idx = 0; tile_y_idx < ctx->tiles_y; tile_y_idx++) { + for (int tile_x_idx = 0; tile_x_idx < ctx->tiles_x; tile_x_idx++) { + // Write tile header: [mode(1)][qY_override(1)][qCo_override(1)][qCg_override(1)] + preprocess_buffer[preprocess_offset++] = 0x01; // TAV_MODE_INTRA + preprocess_buffer[preprocess_offset++] = 0; // qY override (0 = use header) + preprocess_buffer[preprocess_offset++] = 0; // qCo override + preprocess_buffer[preprocess_offset++] = 0; // qCg override + + // Extract padded tile from full frame + tav_extract_padded_tile(frame_y, frame_co, frame_cg, + width, height, + tile_x_idx, tile_y_idx, + tile_y, tile_co, tile_cg); + + // Apply 2D DWT to padded tile + tav_dwt_2d_forward_padded_tile(tile_y, ctx->decomp_levels, ctx->wavelet_type); + tav_dwt_2d_forward_padded_tile(tile_co, ctx->decomp_levels, ctx->wavelet_type); + tav_dwt_2d_forward_padded_tile(tile_cg, ctx->decomp_levels, ctx->wavelet_type); + + // Quantize tile coefficients + if (ctx->perceptual_tuning) { + tav_quantise_perceptual(ctx->compat_enc, tile_y, quant_y, padded_pixels, + base_quantiser_y, (float)ctx->dead_zone_threshold, + TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, + ctx->decomp_levels, 0, 0); + tav_quantise_perceptual(ctx->compat_enc, tile_co, quant_co, padded_pixels, + base_quantiser_co, (float)ctx->dead_zone_threshold, + TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, + ctx->decomp_levels, 1, 0); + tav_quantise_perceptual(ctx->compat_enc, tile_cg, quant_cg, padded_pixels, + base_quantiser_cg, (float)ctx->dead_zone_threshold, + TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, + ctx->decomp_levels, 1, 0); + } else { + tav_quantise_uniform(tile_y, quant_y, padded_pixels, base_quantiser_y, + (float)ctx->dead_zone_threshold, + TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, + ctx->decomp_levels, 0); + tav_quantise_uniform(tile_co, quant_co, padded_pixels, base_quantiser_co, + (float)ctx->dead_zone_threshold, + TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, + ctx->decomp_levels, 1); + tav_quantise_uniform(tile_cg, quant_cg, padded_pixels, base_quantiser_cg, + (float)ctx->dead_zone_threshold, + TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, + ctx->decomp_levels, 1); + } + + // EZBC encode tile + size_t tile_size = preprocess_coefficients_ezbc( + quant_y, quant_co, quant_cg, NULL, + padded_pixels, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y, + ctx->channel_layout, + preprocess_buffer + preprocess_offset + ); + preprocess_offset += tile_size; + + // Clear tile buffers for next iteration + memset(tile_y, 0, padded_pixels * sizeof(float)); + memset(tile_co, 0, padded_pixels * sizeof(float)); + memset(tile_cg, 0, padded_pixels * sizeof(float)); + } + } + + free(tile_y); free(tile_co); free(tile_cg); + free(quant_y); free(quant_co); free(quant_cg); } - // Step 4: Preprocess coefficients - size_t preprocess_capacity = num_pixels * 3 * sizeof(int16_t) + 65536; // Conservative - uint8_t *preprocess_buffer = tav_malloc(preprocess_capacity); + // Free full-frame YCoCg buffers + free(frame_y); free(frame_co); free(frame_cg); - // Use EZBC preprocessing (Twobitmap is deprecated) - size_t preprocessed_size = preprocess_coefficients_ezbc( - quant_y, quant_co, quant_cg, NULL, - num_pixels, width, height, ctx->channel_layout, - preprocess_buffer - ); - - // Step 5: Zstd compress - size_t compressed_bound = ZSTD_compressBound(preprocessed_size); + // Step 5: Zstd compress all tile data + size_t compressed_bound = ZSTD_compressBound(preprocess_offset); uint8_t *compression_buffer = tav_malloc(compressed_bound); size_t compressed_size = ZSTD_compress( compression_buffer, compressed_bound, - preprocess_buffer, preprocessed_size, + preprocess_buffer, preprocess_offset, ctx->zstd_level ); + free(preprocess_buffer); + if (ZSTD_isError(compressed_size)) { - free(work_y); free(work_co); free(work_cg); - free(quant_y); free(quant_co); free(quant_cg); - free(preprocess_buffer); free(compression_buffer); snprintf(slot->error_message, MAX_ERROR_MESSAGE, "Zstd compression failed: %s", ZSTD_getErrorName(compressed_size)); @@ -1452,10 +1565,6 @@ static int encode_gop_intra_only(tav_encoder_context_t *ctx, gop_slot_t *slot) { slot->packets = pkt; slot->num_packets = 1; - // Cleanup - free(work_y); free(work_co); free(work_cg); - free(quant_y); free(quant_co); free(quant_cg); - free(preprocess_buffer); free(compression_buffer); return 0; // Success diff --git a/video_encoder/src/encoder_tav.c b/video_encoder/src/encoder_tav.c index 3fa7180..4ea9bd5 100644 --- a/video_encoder/src/encoder_tav.c +++ b/video_encoder/src/encoder_tav.c @@ -91,6 +91,11 @@ typedef struct { size_t total_bytes; time_t start_time; + // GOP frame buffer (for tav_encoder_encode_gop()) + uint8_t **gop_frames; // Array of frame pointers [gop_size] + int gop_frame_count; // Number of frames in current GOP + int *gop_frame_numbers; // Frame numbers for timecodes [gop_size] + // CLI options int verbose; int encode_limit; // Max frames to encode (0=all) @@ -254,7 +259,7 @@ static int get_video_info(const char *input_file, int *width, int *height, static FILE* open_ffmpeg_pipe(const char *input_file, int width, int height) { char cmd[MAX_PATH * 2]; snprintf(cmd, sizeof(cmd), - "ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" -", + "ffmpeg -hide_banner -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" -", input_file, width, height, width, height); FILE *fp = popen(cmd, "r"); @@ -305,12 +310,22 @@ static int write_tav_header(FILE *fp, const tav_encoder_params_t *params, int ha // 6 = ICtCp monoblock perceptual // Add 8 if using CDF 5/3 temporal wavelet uint8_t version; - if (params->perceptual_tuning) { - // Monoblock perceptual: version 5 (YCoCg-R) or 6 (ICtCp) - version = params->channel_layout ? 6 : 5; + if (params->monoblock) { + if (params->perceptual_tuning) { + // Monoblock perceptual: version 5 (YCoCg-R) or 6 (ICtCp) + version = params->channel_layout ? 6 : 5; + } else { + // Monoblock uniform: version 3 (YCoCg-R) or 4 (ICtCp) + version = params->channel_layout ? 4 : 3; + } } else { - // Monoblock uniform: version 3 (YCoCg-R) or 4 (ICtCp) - version = params->channel_layout ? 4 : 3; + if (params->perceptual_tuning) { + // Tiled perceptual: version 7 (YCoCg-R) or 8 (ICtCp) + version = params->channel_layout ? 7 : 8; + } else { + // Tiled uniform: version 1 (YCoCg-R) or 2 (ICtCp) + version = params->channel_layout ? 1 : 2; + } } // Add 8 if using CDF 5/3 temporal wavelet if (params->enable_temporal_dwt && params->temporal_wavelet == 0) { @@ -468,7 +483,7 @@ static int write_gop_sync_packet(FILE *fp, int frame_count) { static int extract_audio_to_file(const char *input_file, const char *output_file) { char cmd[MAX_PATH * 2]; snprintf(cmd, sizeof(cmd), - "ffmpeg -v quiet -i \"%s\" -f f32le -acodec pcm_f32le -ar %d -ac 2 " + "ffmpeg -hide_banner -v quiet -i \"%s\" -f f32le -acodec pcm_f32le -ar %d -ac 2 " "-af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" " "-y \"%s\" 2>/dev/null", input_file, AUDIO_SAMPLE_RATE, output_file); @@ -952,11 +967,51 @@ static int encode_video(cli_context_t *cli) { } } - // Allocate frame buffer + // Allocate GOP frame buffer for tav_encoder_encode_gop() size_t frame_size = cli->enc_params.width * cli->enc_params.height * 3; + int gop_size = cli->enc_params.gop_size; + + // In intra-only mode, encode each frame immediately (GOP size = 1) + if (!cli->enc_params.enable_temporal_dwt) { + gop_size = 1; + } + + cli->gop_frames = malloc(gop_size * sizeof(uint8_t*)); + cli->gop_frame_numbers = malloc(gop_size * sizeof(int)); + cli->gop_frame_count = 0; + + if (!cli->gop_frames || !cli->gop_frame_numbers) { + fprintf(stderr, "Error: Failed to allocate GOP frame buffer\n"); + tav_encoder_free(ctx); + pclose(cli->ffmpeg_pipe); + return -1; + } + + for (int i = 0; i < gop_size; i++) { + cli->gop_frames[i] = malloc(frame_size); + if (!cli->gop_frames[i]) { + fprintf(stderr, "Error: Failed to allocate GOP frame %d\n", i); + for (int j = 0; j < i; j++) free(cli->gop_frames[j]); + free(cli->gop_frames); + free(cli->gop_frame_numbers); + tav_encoder_free(ctx); + pclose(cli->ffmpeg_pipe); + return -1; + } + } + + if (cli->verbose) { + printf(" GOP frame buffer: %d frames x %zu bytes = %zu KB\n", + gop_size, frame_size, (gop_size * frame_size) / 1024); + } + + // Temporary frame buffer for reading from FFmpeg uint8_t *rgb_frame = malloc(frame_size); if (!rgb_frame) { fprintf(stderr, "Error: Failed to allocate frame buffer\n"); + for (int i = 0; i < gop_size; i++) free(cli->gop_frames[i]); + free(cli->gop_frames); + free(cli->gop_frame_numbers); tav_encoder_free(ctx); pclose(cli->ffmpeg_pipe); return -1; @@ -981,12 +1036,12 @@ static int encode_video(cli_context_t *cli) { write_fontrom_packet(cli->output_fp, cli->fontrom_high, FONTROM_OPCODE_HIGH, cli->verbose); } - // Encoding loop + // Encoding loop using tav_encoder_encode_gop() printf("Encoding frames...\n"); cli->start_time = time(NULL); - int64_t frame_pts = 0; tav_encoder_packet_t *packet = NULL; + int encoding_error = 0; while (1) { // Check encode limit @@ -1000,9 +1055,15 @@ static int encode_video(cli_context_t *cli) { break; // EOF } else if (result < 0) { fprintf(stderr, "Error reading frame\n"); + encoding_error = 1; break; } + // Copy frame to GOP buffer + memcpy(cli->gop_frames[cli->gop_frame_count], rgb_frame, frame_size); + cli->gop_frame_numbers[cli->gop_frame_count] = (int)cli->frame_count; + cli->gop_frame_count++; + // Accumulate audio samples for this frame (will write when GOP completes) if (cli->has_audio && cli->audio_buffer && cli->gop_audio_buffer) { size_t samples_read = read_audio_samples(cli, cli->audio_buffer, cli->samples_per_frame); @@ -1015,40 +1076,55 @@ static int encode_video(cli_context_t *cli) { } } - // Encode frame - result = tav_encoder_encode_frame(ctx, rgb_frame, frame_pts, &packet); + cli->frame_count++; - if (result < 0) { - fprintf(stderr, "Error: %s\n", tav_encoder_get_error(ctx)); - break; - } + // Check if GOP is full + if (cli->gop_frame_count >= gop_size) { + // Encode complete GOP + result = tav_encoder_encode_gop(ctx, + (const uint8_t**)cli->gop_frames, + cli->gop_frame_count, + cli->gop_frame_numbers, + &packet); - if (result > 0 && packet) { - // GOP is complete - write in correct order: TIMECODE, AUDIO, VIDEO, GOP_SYNC - - // 1. Write timecode before GOP - write_timecode_packet(cli->output_fp, cli->frame_count - (cli->frame_count % cli->enc_params.gop_size), - cli->enc_params.fps_num, cli->enc_params.fps_den); - - // 2. Write accumulated audio for this GOP as single TAD packet - if (cli->has_audio && cli->gop_audio_samples > 0) { - write_audio_packet(cli->output_fp, cli, cli->gop_audio_buffer, cli->gop_audio_samples); - cli->gop_audio_samples = 0; // Reset for next GOP + if (result < 0) { + fprintf(stderr, "Error: %s\n", tav_encoder_get_error(ctx)); + encoding_error = 1; + break; } - // 3. Write video GOP packet - write_tav_packet(cli->output_fp, packet); - cli->total_bytes += packet->size; - cli->gop_count++; + if (packet) { + // GOP is complete - write in correct order: TIMECODE, AUDIO, VIDEO, GOP_SYNC - // 4. Write GOP_SYNC after GOP packets (0x12 = GOP unified) - if (packet->packet_type == TAV_PACKET_GOP_UNIFIED) { - // Extract GOP size from packet (byte 1) - int gop_size = packet->data[1]; - write_gop_sync_packet(cli->output_fp, gop_size); + // 1. Write timecode before GOP (use first frame number in GOP) + write_timecode_packet(cli->output_fp, cli->gop_frame_numbers[0], + cli->enc_params.fps_num, cli->enc_params.fps_den); + + // 2. Write accumulated audio for this GOP as single TAD packet + if (cli->has_audio && cli->gop_audio_samples > 0) { + write_audio_packet(cli->output_fp, cli, cli->gop_audio_buffer, cli->gop_audio_samples); + cli->gop_audio_samples = 0; // Reset for next GOP + } + + // 3. Write video GOP packet + write_tav_packet(cli->output_fp, packet); + cli->total_bytes += packet->size; + cli->gop_count++; + + // 4. Write GOP_SYNC after GOP packets + if (packet->packet_type == TAV_PACKET_GOP_UNIFIED) { + int frames_in_gop = packet->data[1]; + write_gop_sync_packet(cli->output_fp, frames_in_gop); + } else if (packet->packet_type == TAV_PACKET_IFRAME) { + write_gop_sync_packet(cli->output_fp, 1); + } + + tav_encoder_free_packet(packet); + packet = NULL; } - tav_encoder_free_packet(packet); + // Reset GOP buffer + cli->gop_frame_count = 0; // Progress if (cli->verbose || cli->frame_count % 60 == 0) { @@ -1065,21 +1141,27 @@ static int encode_video(cli_context_t *cli) { fflush(stdout); } } - - cli->frame_count++; - frame_pts++; } printf("\n"); - // Flush encoder - printf("Flushing encoder...\n"); - while (tav_encoder_flush(ctx, &packet) > 0) { - if (packet) { + // Encode remaining frames in GOP buffer (partial GOP) + if (!encoding_error && cli->gop_frame_count > 0) { + printf("Encoding final partial GOP (%d frames)...\n", cli->gop_frame_count); + + int result = tav_encoder_encode_gop(ctx, + (const uint8_t**)cli->gop_frames, + cli->gop_frame_count, + cli->gop_frame_numbers, + &packet); + + if (result < 0) { + fprintf(stderr, "Error encoding final GOP: %s\n", tav_encoder_get_error(ctx)); + } else if (packet) { // Write remaining packets in correct order: TIMECODE, AUDIO, VIDEO, GOP_SYNC // 1. Write timecode - write_timecode_packet(cli->output_fp, cli->frame_count - (cli->frame_count % cli->enc_params.gop_size), + write_timecode_packet(cli->output_fp, cli->gop_frame_numbers[0], cli->enc_params.fps_num, cli->enc_params.fps_den); // 2. Write any remaining accumulated audio for this GOP @@ -1095,8 +1177,8 @@ static int encode_video(cli_context_t *cli) { // 4. Write GOP_SYNC after GOP packets if (packet->packet_type == TAV_PACKET_GOP_UNIFIED) { - int gop_size = packet->data[1]; - write_gop_sync_packet(cli->output_fp, gop_size); + int frames_in_gop = packet->data[1]; + write_gop_sync_packet(cli->output_fp, frames_in_gop); } else if (packet->packet_type == TAV_PACKET_IFRAME) { write_gop_sync_packet(cli->output_fp, 1); } @@ -1113,6 +1195,19 @@ static int encode_video(cli_context_t *cli) { tav_encoder_free(ctx); pclose(cli->ffmpeg_pipe); + // Cleanup GOP frame buffer + if (cli->gop_frames) { + for (int i = 0; i < gop_size; i++) { + free(cli->gop_frames[i]); + } + free(cli->gop_frames); + cli->gop_frames = NULL; + } + if (cli->gop_frame_numbers) { + free(cli->gop_frame_numbers); + cli->gop_frame_numbers = NULL; + } + // Cleanup audio resources if (cli->audio_buffer) { free(cli->audio_buffer);