diff --git a/video_encoder/src/decoder_tav_dt.c b/video_encoder/src/decoder_tav_dt.c index 0a85621..6a5acde 100644 --- a/video_encoder/src/decoder_tav_dt.c +++ b/video_encoder/src/decoder_tav_dt.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "tav_video_decoder.h" #include "decoder_tad.h" @@ -64,6 +65,53 @@ static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29}; static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39}; #define MAX_PATH 4096 +#define MAX_DECODE_THREADS 16 + +// ============================================================================= +// Multithreading Structures +// ============================================================================= + +#define DECODE_SLOT_EMPTY 0 +#define DECODE_SLOT_PENDING 1 +#define DECODE_SLOT_DONE 2 + +// GOP decode job structure +typedef struct { + // Input + uint8_t *compressed_data; // Raw GOP data to decode + size_t compressed_size; + int gop_size; // Number of frames in this GOP + int job_id; // Sequential job ID for ordering output + + // Output + uint8_t **rgb_frames; // Decoded RGB24 frames [gop_size] + int frames_allocated; // How many frames are allocated + int decode_result; // 0 = success, -1 = error + + // Status + volatile int status; +} gop_decode_job_t; + +/** + * Get number of available CPUs. + */ +static int get_available_cpus(void) { +#ifdef _SC_NPROCESSORS_ONLN + long nproc = sysconf(_SC_NPROCESSORS_ONLN); + if (nproc > 0) { + return (int)nproc; + } +#endif + return 1; // Fallback to single core +} + +/** + * Get default thread count (cap at 8) + */ +static int get_default_thread_count(void) { + int available = get_available_cpus(); + return available < 8 ? available : 8; +} // ============================================================================= // CRC-32 @@ -138,6 +186,23 @@ typedef struct { // Options int verbose; int dump_mode; // Just dump packets, don't decode + + // Multithreading + int num_threads; + int num_slots; + gop_decode_job_t *slots; + tav_video_context_t **worker_video_ctx; // Per-thread decoder contexts + pthread_t *worker_threads; + pthread_mutex_t mutex; + pthread_cond_t cond_job_available; + pthread_cond_t cond_slot_free; + volatile int threads_should_exit; + volatile int next_write_slot; // Next slot to write to output + volatile int jobs_submitted; + volatile int jobs_completed; + + // Timing + time_t start_time; } dt_decoder_t; // ============================================================================= @@ -151,6 +216,8 @@ static void print_usage(const char *program) { printf(" -i, --input FILE Input TAV-DT file\n"); printf(" -o, --output FILE Output video file (FFV1/MKV)\n"); printf("\nOptions:\n"); + printf(" -t, --threads N Number of decoder threads (default: min(8, available CPUs))\n"); + printf(" 0 or 1 = single-threaded, 2-16 = multithreaded\n"); printf(" --dump Dump packet info without decoding\n"); printf(" -v, --verbose Verbose output\n"); printf(" --help Show this help\n"); @@ -354,6 +421,12 @@ static int decode_audio_subpacket(dt_decoder_t *dec, const uint8_t *data, size_t // Calculate RS payload size size_t rs_total = rs_block_count * RS_BLOCK_SIZE; + // Handle empty audio packet (no samples in this GOP) + if (compressed_size == 0 || rs_block_count == 0 || sample_count == 0) { + *consumed = offset; + return 0; // Successfully processed empty audio packet + } + if (offset + rs_total > data_len) { if (dec->verbose) { fprintf(stderr, "Warning: Audio packet truncated\n"); @@ -386,8 +459,13 @@ static int decode_audio_subpacket(dt_decoder_t *dec, const uint8_t *data, size_t // [sample_count(2)][max_index(1)][payload_size(4)][zstd_data] // No need to rebuild the header - pass it directly to the TAD decoder - // Decode TAD to PCMu8 - uint8_t *pcmu8_output = malloc(sample_count * 2); + // Read the actual sample count from the TAD chunk header (not the wrapper header) + // The wrapper header sample_count might be incorrect or 0 in some cases + uint16_t tad_chunk_sample_count; + memcpy(&tad_chunk_sample_count, decoded_payload, 2); + + // Decode TAD to PCMu8 - allocate based on TAD chunk's sample count + uint8_t *pcmu8_output = malloc(tad_chunk_sample_count * 2); if (!pcmu8_output) { free(rs_data); free(decoded_payload); @@ -717,6 +795,231 @@ static int spawn_ffmpeg(dt_decoder_t *dec) { return 0; } +// ============================================================================= +// Multithreading Support +// ============================================================================= + +// Worker thread function - decodes GOPs in parallel +static void *decoder_worker_thread(void *arg) { + dt_decoder_t *dec = (dt_decoder_t *)arg; + + // Get thread index by finding our thread ID in the array + int thread_idx = -1; + pthread_t self = pthread_self(); + for (int i = 0; i < dec->num_threads; i++) { + if (pthread_equal(dec->worker_threads[i], self)) { + thread_idx = i; + break; + } + } + if (thread_idx < 0) thread_idx = 0; // Fallback + + tav_video_context_t *my_video_ctx = dec->worker_video_ctx[thread_idx]; + + while (1) { + pthread_mutex_lock(&dec->mutex); + + // Find a pending slot to work on + int slot_idx = -1; + while (slot_idx < 0 && !dec->threads_should_exit) { + for (int i = 0; i < dec->num_slots; i++) { + if (dec->slots[i].status == DECODE_SLOT_PENDING && + dec->slots[i].compressed_data != NULL) { + dec->slots[i].status = DECODE_SLOT_DONE; // Claim it temporarily + slot_idx = i; + break; + } + } + if (slot_idx < 0 && !dec->threads_should_exit) { + pthread_cond_wait(&dec->cond_job_available, &dec->mutex); + } + } + + if (dec->threads_should_exit && slot_idx < 0) { + pthread_mutex_unlock(&dec->mutex); + break; + } + + pthread_mutex_unlock(&dec->mutex); + + if (slot_idx < 0) continue; + + gop_decode_job_t *job = &dec->slots[slot_idx]; + + // Decode GOP using our thread's decoder context + job->decode_result = tav_video_decode_gop( + my_video_ctx, + job->compressed_data, + job->compressed_size, + job->gop_size, + job->rgb_frames + ); + + // Free compressed data + free(job->compressed_data); + job->compressed_data = NULL; + + // Mark as done + pthread_mutex_lock(&dec->mutex); + job->status = DECODE_SLOT_DONE; + dec->jobs_completed++; + pthread_cond_broadcast(&dec->cond_slot_free); + pthread_mutex_unlock(&dec->mutex); + } + + return NULL; +} + +static int init_decoder_threads(dt_decoder_t *dec) { + if (dec->num_threads <= 0) { + return 0; // Single-threaded mode + } + + // Limit threads + if (dec->num_threads > MAX_DECODE_THREADS) { + dec->num_threads = MAX_DECODE_THREADS; + } + + // Number of slots = threads + 2 for pipelining + dec->num_slots = dec->num_threads + 2; + + // Allocate slots + dec->slots = calloc(dec->num_slots, sizeof(gop_decode_job_t)); + if (!dec->slots) { + fprintf(stderr, "Error: Failed to allocate decode slots\n"); + return -1; + } + + // Allocate frame buffers for each slot + int internal_height = dec->is_interlaced ? dec->height / 2 : dec->height; + size_t frame_size = dec->width * internal_height * 3; + int max_gop_size = 16; // TAV-DT uses fixed 16-frame GOPs + + for (int i = 0; i < dec->num_slots; i++) { + dec->slots[i].rgb_frames = malloc(max_gop_size * sizeof(uint8_t*)); + if (!dec->slots[i].rgb_frames) { + fprintf(stderr, "Error: Failed to allocate frame pointers for slot %d\n", i); + return -1; + } + for (int f = 0; f < max_gop_size; f++) { + dec->slots[i].rgb_frames[f] = malloc(frame_size); + if (!dec->slots[i].rgb_frames[f]) { + fprintf(stderr, "Error: Failed to allocate frame buffer for slot %d\n", i); + return -1; + } + } + dec->slots[i].frames_allocated = max_gop_size; + dec->slots[i].status = DECODE_SLOT_EMPTY; + dec->slots[i].job_id = -1; + } + + // Create per-thread video decoder contexts + dec->worker_video_ctx = malloc(dec->num_threads * sizeof(tav_video_context_t*)); + if (!dec->worker_video_ctx) { + fprintf(stderr, "Error: Failed to allocate worker video contexts\n"); + return -1; + } + + tav_video_params_t video_params = { + .width = dec->width, + .height = internal_height, + .decomp_levels = DT_SPATIAL_LEVELS, + .temporal_levels = DT_TEMPORAL_LEVELS, + .wavelet_filter = 1, // CDF 9/7 + .temporal_wavelet = 255, // Haar + .entropy_coder = 1, // EZBC + .channel_layout = 0, // YCoCg-R + .perceptual_tuning = 1, + .quantiser_y = QUALITY_Y[dec->quality_index], + .quantiser_co = QUALITY_CO[dec->quality_index], + .quantiser_cg = QUALITY_CG[dec->quality_index], + .encoder_preset = 0x01, // Sports + .monoblock = 1 + }; + + for (int i = 0; i < dec->num_threads; i++) { + dec->worker_video_ctx[i] = tav_video_create(&video_params); + if (!dec->worker_video_ctx[i]) { + fprintf(stderr, "Error: Failed to create video context for thread %d\n", i); + return -1; + } + } + + // Initialize synchronization primitives + pthread_mutex_init(&dec->mutex, NULL); + pthread_cond_init(&dec->cond_job_available, NULL); + pthread_cond_init(&dec->cond_slot_free, NULL); + dec->threads_should_exit = 0; + dec->next_write_slot = 0; + dec->jobs_submitted = 0; + dec->jobs_completed = 0; + + // Create worker threads + dec->worker_threads = malloc(dec->num_threads * sizeof(pthread_t)); + if (!dec->worker_threads) { + fprintf(stderr, "Error: Failed to allocate worker threads\n"); + return -1; + } + + for (int i = 0; i < dec->num_threads; i++) { + if (pthread_create(&dec->worker_threads[i], NULL, decoder_worker_thread, dec) != 0) { + fprintf(stderr, "Error: Failed to create worker thread %d\n", i); + return -1; + } + } + + if (dec->verbose) { + printf("Initialized %d decoder worker threads with %d slots\n", + dec->num_threads, dec->num_slots); + } + + return 0; +} + +static void cleanup_decoder_threads(dt_decoder_t *dec) { + if (dec->num_threads <= 0) return; + + // Signal threads to exit + pthread_mutex_lock(&dec->mutex); + dec->threads_should_exit = 1; + pthread_cond_broadcast(&dec->cond_job_available); + pthread_mutex_unlock(&dec->mutex); + + // Wait for threads to finish + for (int i = 0; i < dec->num_threads; i++) { + pthread_join(dec->worker_threads[i], NULL); + } + free(dec->worker_threads); + dec->worker_threads = NULL; + + // Free per-thread video contexts + for (int i = 0; i < dec->num_threads; i++) { + tav_video_free(dec->worker_video_ctx[i]); + } + free(dec->worker_video_ctx); + dec->worker_video_ctx = NULL; + + // Free slots + for (int i = 0; i < dec->num_slots; i++) { + if (dec->slots[i].rgb_frames) { + for (int f = 0; f < dec->slots[i].frames_allocated; f++) { + free(dec->slots[i].rgb_frames[f]); + } + free(dec->slots[i].rgb_frames); + } + if (dec->slots[i].compressed_data) { + free(dec->slots[i].compressed_data); + } + } + free(dec->slots); + dec->slots = NULL; + + // Destroy sync primitives + pthread_mutex_destroy(&dec->mutex); + pthread_cond_destroy(&dec->cond_job_available); + pthread_cond_destroy(&dec->cond_slot_free); +} + // ============================================================================= // Main Decoding Loop // ============================================================================= @@ -860,6 +1163,9 @@ int main(int argc, char **argv) { dt_decoder_t dec; memset(&dec, 0, sizeof(dec)); + // Default thread count + dec.num_threads = get_default_thread_count(); + // Initialize FEC libraries rs_init(); ldpc_init(); @@ -867,6 +1173,7 @@ int main(int argc, char **argv) { static struct option long_options[] = { {"input", required_argument, 0, 'i'}, {"output", required_argument, 0, 'o'}, + {"threads", required_argument, 0, 't'}, {"dump", no_argument, 0, 'd'}, {"verbose", no_argument, 0, 'v'}, {"help", no_argument, 0, 'h'}, @@ -874,7 +1181,7 @@ int main(int argc, char **argv) { }; int opt; - while ((opt = getopt_long(argc, argv, "i:o:dvh", long_options, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "i:o:t:dvh", long_options, NULL)) != -1) { switch (opt) { case 'i': dec.input_file = optarg; @@ -882,6 +1189,17 @@ int main(int argc, char **argv) { case 'o': dec.output_file = optarg; break; + case 't': { + int threads = atoi(optarg); + if (threads < 0) { + fprintf(stderr, "Error: Thread count must be positive\n"); + return 1; + } + // Both 0 and 1 mean single-threaded (use value 0 internally) + dec.num_threads = (threads <= 1) ? 0 : threads; + if (dec.num_threads > MAX_DECODE_THREADS) dec.num_threads = MAX_DECODE_THREADS; + break; + } case 'd': dec.dump_mode = 1; break; diff --git a/video_encoder/src/encoder_tav_dt.c b/video_encoder/src/encoder_tav_dt.c index 07c64ce..4bb57ba 100644 --- a/video_encoder/src/encoder_tav_dt.c +++ b/video_encoder/src/encoder_tav_dt.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "tav_encoder_lib.h" #include "encoder_tad.h" @@ -67,6 +68,60 @@ static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39}; // Audio samples per GOP (32kHz / framerate * gop_size) #define AUDIO_SAMPLE_RATE 32000 +// ============================================================================= +// Multithreading Structures +// ============================================================================= + +#define GOP_SLOT_EMPTY 0 +#define GOP_SLOT_READY 1 +#define GOP_SLOT_ENCODING 2 +#define GOP_SLOT_COMPLETE 3 + +typedef struct { + // Input frames (copied from main thread) + uint8_t **rgb_frames; // Frame data pointers [gop_size] + int *frame_numbers; // Frame number array [gop_size] + int num_frames; // Actual number of frames in this GOP + int gop_index; // Sequential GOP index for ordering output + + // Audio samples for this GOP + float *audio_samples; // Interleaved stereo samples + size_t audio_sample_count; + + // Output + tav_encoder_packet_t *packet; // Encoded video packet + uint8_t *tad_output; // Encoded audio data + size_t tad_size; // Encoded audio size + int success; // 1 if encoding succeeded + + // Encoder params (copy for thread safety) + tav_encoder_params_t params; + + // Slot status + volatile int status; +} gop_job_t; + +/** + * Get number of available CPUs. + */ +static int get_available_cpus(void) { +#ifdef _SC_NPROCESSORS_ONLN + long nproc = sysconf(_SC_NPROCESSORS_ONLN); + if (nproc > 0) { + return (int)nproc; + } +#endif + return 1; // Fallback to single core +} + +/** + * Get default thread count (cap at 8) + */ +static int get_default_thread_count(void) { + int available = get_available_cpus(); + return available < 8 ? available : 8; +} + // ============================================================================= // CRC-32 // ============================================================================= @@ -142,6 +197,18 @@ typedef struct { // Options int verbose; int encode_limit; + + // Multithreading + int num_threads; // 0 = single-threaded, 1+ = num worker threads + gop_job_t *gop_jobs; // Array of GOP job slots [num_threads] + pthread_t *worker_threads; // Array of worker thread handles [num_threads] + pthread_mutex_t job_mutex; // Mutex for job slot access + pthread_cond_t job_ready; // Signal when a job slot is ready for encoding + pthread_cond_t job_complete; // Signal when a job slot is complete + volatile int shutdown_workers; // 1 when workers should exit + + // Encoder params (template for worker threads) + tav_encoder_params_t enc_params; } dt_encoder_t; // ============================================================================= @@ -160,6 +227,8 @@ static void print_usage(const char *program) { printf(" --pal Force PAL format (720x576)\n"); printf(" --interlaced Interlaced output\n"); printf(" --encode-limit N Encode only N frames (for testing)\n"); + printf(" -t, --threads N Parallel encoding threads (default: min(8, available CPUs))\n"); + printf(" 0 or 1 = single-threaded, 2-16 = multithreaded\n"); printf(" -v, --verbose Verbose output\n"); printf(" -h, --help Show this help\n"); } @@ -411,94 +480,236 @@ static FILE *spawn_ffmpeg_audio(dt_encoder_t *enc, pid_t *pid) { return fdopen(pipefd[0], "rb"); } +// ============================================================================= +// Multithreading Support +// ============================================================================= + +/** + * Worker thread context - passed to worker_thread_main. + */ +typedef struct { + dt_encoder_t *enc; + int thread_id; +} worker_context_t; + +/** + * Worker thread main function. + * Continuously picks up jobs from the job pool and encodes them. + */ +static void *worker_thread_main(void *arg) { + worker_context_t *wctx = (worker_context_t *)arg; + dt_encoder_t *enc = wctx->enc; + (void)wctx->thread_id; // Unused but kept for debugging + + while (1) { + pthread_mutex_lock(&enc->job_mutex); + + // Wait for a job or shutdown signal + while (!enc->shutdown_workers) { + // Look for a job slot that is ready to encode + int found_job = -1; + for (int i = 0; i < enc->num_threads; i++) { + if (enc->gop_jobs[i].status == GOP_SLOT_READY) { + enc->gop_jobs[i].status = GOP_SLOT_ENCODING; + found_job = i; + break; + } + } + + if (found_job >= 0) { + pthread_mutex_unlock(&enc->job_mutex); + + // Encode this GOP + gop_job_t *job = &enc->gop_jobs[found_job]; + + // Create thread-local encoder context + tav_encoder_context_t *ctx = tav_encoder_create(&job->params); + if (!ctx) { + fprintf(stderr, "Failed to create encoder for GOP %d\n", job->gop_index); + job->success = 0; + } else { + // Encode video GOP + int result = tav_encoder_encode_gop(ctx, + (const uint8_t **)job->rgb_frames, + job->num_frames, job->frame_numbers, + &job->packet); + job->success = (result >= 0 && job->packet != NULL); + + // Encode audio + if (job->success && job->audio_sample_count > 0) { + int max_index = tad32_quality_to_max_index(enc->quality_index); + job->tad_size = tad32_encode_chunk(job->audio_samples, job->audio_sample_count, + max_index, 1.0f, job->tad_output); + } + + tav_encoder_free(ctx); + } + + // Mark job as complete (reacquire lock for next iteration) + pthread_mutex_lock(&enc->job_mutex); + job->status = GOP_SLOT_COMPLETE; + pthread_cond_broadcast(&enc->job_complete); + // Keep lock held for next iteration of inner while loop + continue; // Look for more jobs + } + + // No job found, wait for signal + pthread_cond_wait(&enc->job_ready, &enc->job_mutex); + } + + pthread_mutex_unlock(&enc->job_mutex); + break; // Shutdown + } + + free(wctx); + return NULL; +} + +/** + * Initialize multithreading resources. + * Returns 0 on success, -1 on failure. + */ +static int init_threading(dt_encoder_t *enc) { + if (enc->num_threads <= 0) { + return 0; // Single-threaded mode + } + + // Initialize mutex and condition variables + if (pthread_mutex_init(&enc->job_mutex, NULL) != 0) { + fprintf(stderr, "Error: Failed to initialize job mutex\n"); + return -1; + } + if (pthread_cond_init(&enc->job_ready, NULL) != 0) { + fprintf(stderr, "Error: Failed to initialize job_ready cond\n"); + pthread_mutex_destroy(&enc->job_mutex); + return -1; + } + if (pthread_cond_init(&enc->job_complete, NULL) != 0) { + fprintf(stderr, "Error: Failed to initialize job_complete cond\n"); + pthread_cond_destroy(&enc->job_ready); + pthread_mutex_destroy(&enc->job_mutex); + return -1; + } + + // Allocate job slots (one per thread) + enc->gop_jobs = calloc(enc->num_threads, sizeof(gop_job_t)); + if (!enc->gop_jobs) { + fprintf(stderr, "Error: Failed to allocate job slots\n"); + pthread_cond_destroy(&enc->job_complete); + pthread_cond_destroy(&enc->job_ready); + pthread_mutex_destroy(&enc->job_mutex); + return -1; + } + + // Allocate worker thread handles + enc->worker_threads = malloc(enc->num_threads * sizeof(pthread_t)); + if (!enc->worker_threads) { + fprintf(stderr, "Error: Failed to allocate thread handles\n"); + free(enc->gop_jobs); + pthread_cond_destroy(&enc->job_complete); + pthread_cond_destroy(&enc->job_ready); + pthread_mutex_destroy(&enc->job_mutex); + return -1; + } + + // Start worker threads + enc->shutdown_workers = 0; + for (int i = 0; i < enc->num_threads; i++) { + worker_context_t *wctx = malloc(sizeof(worker_context_t)); + if (!wctx) { + fprintf(stderr, "Error: Failed to allocate worker context\n"); + enc->shutdown_workers = 1; + pthread_cond_broadcast(&enc->job_ready); + for (int j = 0; j < i; j++) { + pthread_join(enc->worker_threads[j], NULL); + } + free(enc->worker_threads); + free(enc->gop_jobs); + pthread_cond_destroy(&enc->job_complete); + pthread_cond_destroy(&enc->job_ready); + pthread_mutex_destroy(&enc->job_mutex); + return -1; + } + wctx->enc = enc; + wctx->thread_id = i; + + if (pthread_create(&enc->worker_threads[i], NULL, worker_thread_main, wctx) != 0) { + fprintf(stderr, "Error: Failed to create worker thread %d\n", i); + free(wctx); + enc->shutdown_workers = 1; + pthread_cond_broadcast(&enc->job_ready); + for (int j = 0; j < i; j++) { + pthread_join(enc->worker_threads[j], NULL); + } + free(enc->worker_threads); + free(enc->gop_jobs); + pthread_cond_destroy(&enc->job_complete); + pthread_cond_destroy(&enc->job_ready); + pthread_mutex_destroy(&enc->job_mutex); + return -1; + } + } + + printf("Started %d worker threads for parallel GOP encoding\n", enc->num_threads); + return 0; +} + +/** + * Shutdown multithreading resources. + */ +static void shutdown_threading(dt_encoder_t *enc) { + if (enc->num_threads <= 0) { + return; + } + + // Signal workers to shutdown + pthread_mutex_lock(&enc->job_mutex); + enc->shutdown_workers = 1; + pthread_cond_broadcast(&enc->job_ready); + pthread_mutex_unlock(&enc->job_mutex); + + // Wait for all workers to finish + for (int i = 0; i < enc->num_threads; i++) { + pthread_join(enc->worker_threads[i], NULL); + } + + // Free job slots (and any remaining resources) + if (enc->gop_jobs) { + for (int i = 0; i < enc->num_threads; i++) { + if (enc->gop_jobs[i].packet) { + tav_encoder_free_packet(enc->gop_jobs[i].packet); + } + } + free(enc->gop_jobs); + enc->gop_jobs = NULL; + } + + if (enc->worker_threads) { + free(enc->worker_threads); + enc->worker_threads = NULL; + } + + pthread_cond_destroy(&enc->job_complete); + pthread_cond_destroy(&enc->job_ready); + pthread_mutex_destroy(&enc->job_mutex); +} + // ============================================================================= // Main Encoding Loop // ============================================================================= -static int run_encoder(dt_encoder_t *enc) { - // Open output file - enc->output_fp = fopen(enc->output_file, "wb"); - if (!enc->output_fp) { - fprintf(stderr, "Error: Cannot create output file: %s\n", enc->output_file); - return -1; - } - - // Set up video encoder - tav_encoder_params_t params; - tav_encoder_params_init(¶ms, enc->width, enc->height); - params.fps_num = enc->fps_num; - params.fps_den = enc->fps_den; - params.wavelet_type = 1; // CDF 9/7 - params.temporal_wavelet = 255; // Haar - params.decomp_levels = DT_SPATIAL_LEVELS; - params.temporal_levels = DT_TEMPORAL_LEVELS; - params.enable_temporal_dwt = 1; - params.gop_size = DT_GOP_SIZE; - params.quality_level = enc->quality_index; - params.quantiser_y = QUALITY_Y[enc->quality_index]; - params.quantiser_co = QUALITY_CO[enc->quality_index]; - params.quantiser_cg = QUALITY_CG[enc->quality_index]; - params.entropy_coder = 1; // EZBC - params.encoder_preset = 0x01; // Sports mode - params.monoblock = 1; // Force monoblock - params.verbose = enc->verbose; - - enc->video_ctx = tav_encoder_create(¶ms); - if (!enc->video_ctx) { - fprintf(stderr, "Error: Cannot create video encoder\n"); - fclose(enc->output_fp); - return -1; - } - - printf("Forced Monoblock mode (--monoblock)\n"); - - // Get actual parameters (may have been adjusted) - tav_encoder_get_params(enc->video_ctx, ¶ms); - - if (enc->verbose) { - printf("Auto-selected Haar temporal wavelet with sports mode (resolution: %dx%d = %d pixels, quantiser_y = %d)\n", - enc->width, enc->height, enc->width * enc->height, params.quantiser_y); - } - - // Spawn FFmpeg for video - pid_t video_pid; - FILE *video_pipe = spawn_ffmpeg_video(enc, &video_pid); - if (!video_pipe) { - tav_encoder_free(enc->video_ctx); - fclose(enc->output_fp); - return -1; - } - - // Spawn FFmpeg for audio - pid_t audio_pid; - FILE *audio_pipe = spawn_ffmpeg_audio(enc, &audio_pid); - if (!audio_pipe) { - fclose(video_pipe); - waitpid(video_pid, NULL, 0); - tav_encoder_free(enc->video_ctx); - fclose(enc->output_fp); - return -1; - } - - // Allocate frame buffers +// Single-threaded encoding loop +static int run_encoder_st(dt_encoder_t *enc, FILE *video_pipe, FILE *audio_pipe, + pid_t video_pid __attribute__((unused)), + pid_t audio_pid __attribute__((unused))) { size_t frame_size = enc->width * enc->height * 3; - enc->gop_frames = malloc(DT_GOP_SIZE * sizeof(uint8_t *)); - for (int i = 0; i < DT_GOP_SIZE; i++) { - enc->gop_frames[i] = malloc(frame_size); - } - - // Audio buffer (enough for one GOP worth of audio) double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num; size_t audio_samples_per_gop = (size_t)(AUDIO_SAMPLE_RATE * gop_duration) + 1024; - enc->audio_buffer = malloc(audio_samples_per_gop * 2 * sizeof(float)); - enc->audio_buffer_capacity = audio_samples_per_gop; - enc->audio_buffer_samples = 0; // TAD output buffer - size_t tad_buffer_size = audio_samples_per_gop * 2; // Conservative estimate + size_t tad_buffer_size = audio_samples_per_gop * 2; uint8_t *tad_output = malloc(tad_buffer_size); - // Encoding loop enc->frame_number = 0; enc->gop_frame_count = 0; enc->current_timecode_ns = 0; @@ -506,26 +717,13 @@ static int run_encoder(dt_encoder_t *enc) { clock_t start_time = clock(); while (1) { - // Check encode limit if (enc->encode_limit > 0 && enc->frame_number >= enc->encode_limit) { break; } - // Read video frame size_t bytes_read = fread(enc->gop_frames[enc->gop_frame_count], 1, frame_size, video_pipe); if (bytes_read < frame_size) { - if (enc->verbose) { - fprintf(stderr, "Video read incomplete: got %zu/%zu bytes, frame %d, eof=%d, error=%d\n", - bytes_read, frame_size, enc->frame_number, feof(video_pipe), ferror(video_pipe)); - fprintf(stderr, "Audio buffer status: %zu/%zu samples\n", - enc->audio_buffer_samples, enc->audio_buffer_capacity); - // Try to read more audio to see if pipe is blocked - float test_audio[16]; - size_t test_read = fread(test_audio, sizeof(float), 16, audio_pipe); - fprintf(stderr, "Test audio read: %zu floats, eof=%d, error=%d\n", - test_read, feof(audio_pipe), ferror(audio_pipe)); - } - break; // End of video + break; } enc->gop_frame_count++; @@ -536,8 +734,6 @@ static int run_encoder(dt_encoder_t *enc) { size_t audio_samples_per_frame = (size_t)(AUDIO_SAMPLE_RATE * frame_duration); size_t audio_bytes = audio_samples_per_frame * 2 * sizeof(float); - // Always read audio to prevent pipe from filling up and blocking FFmpeg - // Expand buffer if needed if (enc->audio_buffer_samples + audio_samples_per_frame > enc->audio_buffer_capacity) { size_t new_capacity = enc->audio_buffer_capacity * 2; float *new_buffer = realloc(enc->audio_buffer, new_capacity * 2 * sizeof(float)); @@ -553,7 +749,6 @@ static int run_encoder(dt_encoder_t *enc) { // Encode GOP when full if (enc->gop_frame_count >= DT_GOP_SIZE) { - // Encode video GOP tav_encoder_packet_t *video_packet = NULL; int frame_numbers[DT_GOP_SIZE]; for (int i = 0; i < DT_GOP_SIZE; i++) { @@ -569,36 +764,28 @@ static int run_encoder(dt_encoder_t *enc) { break; } - // Encode audio int max_index = tad32_quality_to_max_index(enc->quality_index); size_t tad_size = tad32_encode_chunk(enc->audio_buffer, enc->audio_buffer_samples, max_index, 1.0f, tad_output); - // Write packet write_packet(enc, enc->current_timecode_ns, tad_output, tad_size, video_packet->data, video_packet->size, DT_GOP_SIZE, (uint16_t)enc->audio_buffer_samples, max_index); - // Update timecode enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9); enc->frames_encoded += DT_GOP_SIZE; - - // Reset buffers enc->gop_frame_count = 0; enc->audio_buffer_samples = 0; tav_encoder_free_packet(video_packet); - // Display progress (similar to reference TAV encoder) + // Display progress clock_t now = clock(); double elapsed = (double)(now - start_time) / CLOCKS_PER_SEC; double fps = elapsed > 0 ? (double)enc->frame_number / elapsed : 0.0; - - // Calculate bitrate: output_size_bits / duration_seconds / 1000 double duration = (double)enc->frame_number * enc->fps_den / enc->fps_num; double bitrate = duration > 0 ? (ftell(enc->output_fp) * 8.0) / duration / 1000.0 : 0.0; - long gop_count = enc->frame_number / DT_GOP_SIZE; size_t total_kb = ftell(enc->output_fp) / 1024; @@ -633,24 +820,416 @@ static int run_encoder(dt_encoder_t *enc) { enc->frames_encoded += enc->gop_frame_count; tav_encoder_free_packet(video_packet); } - free(frame_numbers); } + free(tad_output); + return 0; +} + +// Multithreaded encoding loop +static int run_encoder_mt(dt_encoder_t *enc, FILE *video_pipe, FILE *audio_pipe, + pid_t video_pid __attribute__((unused)), + pid_t audio_pid __attribute__((unused))) { + size_t frame_size = enc->width * enc->height * 3; + double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num; + // Calculate audio buffer size with generous padding to handle FFmpeg's audio delivery + // FFmpeg may deliver all audio for a GOP in the first read, so we need space for: + // 1. The expected GOP audio: AUDIO_SAMPLE_RATE * gop_duration + // 2. Worst-case per-frame variations: DT_GOP_SIZE * samples_per_frame + size_t expected_samples = (size_t)(AUDIO_SAMPLE_RATE * gop_duration); + size_t samples_per_frame = (size_t)(AUDIO_SAMPLE_RATE * enc->fps_den / enc->fps_num) + 1; + size_t audio_samples_per_gop = expected_samples + (DT_GOP_SIZE * samples_per_frame); + size_t tad_buffer_size = audio_samples_per_gop * 2; + + // Initialize threading + if (init_threading(enc) < 0) { + return -1; + } + + // Allocate per-slot frame buffers and audio buffers + for (int slot = 0; slot < enc->num_threads; slot++) { + enc->gop_jobs[slot].rgb_frames = malloc(DT_GOP_SIZE * sizeof(uint8_t*)); + enc->gop_jobs[slot].frame_numbers = malloc(DT_GOP_SIZE * sizeof(int)); + enc->gop_jobs[slot].audio_samples = malloc(audio_samples_per_gop * 2 * sizeof(float)); + enc->gop_jobs[slot].tad_output = malloc(tad_buffer_size); + + if (!enc->gop_jobs[slot].rgb_frames || !enc->gop_jobs[slot].frame_numbers || + !enc->gop_jobs[slot].audio_samples || !enc->gop_jobs[slot].tad_output) { + fprintf(stderr, "Error: Failed to allocate job slot %d buffers\n", slot); + shutdown_threading(enc); + return -1; + } + + for (int f = 0; f < DT_GOP_SIZE; f++) { + enc->gop_jobs[slot].rgb_frames[f] = malloc(frame_size); + if (!enc->gop_jobs[slot].rgb_frames[f]) { + fprintf(stderr, "Error: Failed to allocate frame buffer for slot %d\n", slot); + shutdown_threading(enc); + return -1; + } + } + + // Copy encoder params for thread safety + enc->gop_jobs[slot].params = enc->enc_params; + enc->gop_jobs[slot].status = GOP_SLOT_EMPTY; + enc->gop_jobs[slot].num_frames = 0; + enc->gop_jobs[slot].audio_sample_count = 0; + enc->gop_jobs[slot].tad_size = 0; + enc->gop_jobs[slot].packet = NULL; + enc->gop_jobs[slot].success = 0; + } + + printf("Encoding frames with %d threads...\n", enc->num_threads); + clock_t start_time = clock(); + + int current_slot = 0; + int next_gop_to_write = 0; + int current_gop_index = 0; + int frames_in_current_gop = 0; + int encoding_error = 0; + int eof_reached = 0; + enc->current_timecode_ns = 0; + + while (!encoding_error && !eof_reached) { + // Step 1: Try to write any completed GOPs in order + pthread_mutex_lock(&enc->job_mutex); + while (!encoding_error) { + int found = -1; + for (int i = 0; i < enc->num_threads; i++) { + if (enc->gop_jobs[i].status == GOP_SLOT_COMPLETE && + enc->gop_jobs[i].gop_index == next_gop_to_write) { + found = i; + break; + } + } + + if (found < 0) break; + + gop_job_t *job = &enc->gop_jobs[found]; + pthread_mutex_unlock(&enc->job_mutex); + + // Write this GOP + if (job->success && job->packet) { + int max_index = tad32_quality_to_max_index(enc->quality_index); + write_packet(enc, enc->current_timecode_ns, + job->tad_output, job->tad_size, + job->packet->data, job->packet->size, + job->num_frames, (uint16_t)job->audio_sample_count, max_index); + + enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9); + enc->frames_encoded += job->num_frames; + + tav_encoder_free_packet(job->packet); + job->packet = NULL; + + // Display progress + clock_t now = clock(); + double elapsed = (double)(now - start_time) / CLOCKS_PER_SEC; + double fps = elapsed > 0 ? (double)enc->frames_encoded / elapsed : 0.0; + double duration = (double)enc->frames_encoded * enc->fps_den / enc->fps_num; + double bitrate = duration > 0 ? (ftell(enc->output_fp) * 8.0) / duration / 1000.0 : 0.0; + long gop_count = enc->frames_encoded / DT_GOP_SIZE; + size_t total_kb = ftell(enc->output_fp) / 1024; + + printf("\rFrame %lu | GOPs: %ld | %.1f fps | %.1f kbps | %zu KB ", + enc->frames_encoded, gop_count, fps, bitrate, total_kb); + fflush(stdout); + } + + pthread_mutex_lock(&enc->job_mutex); + job->status = GOP_SLOT_EMPTY; + job->num_frames = 0; + job->audio_sample_count = 0; + job->tad_size = 0; + next_gop_to_write++; + } + pthread_mutex_unlock(&enc->job_mutex); + + if (encoding_error || eof_reached) break; + + // Step 2: Fill current slot with frames + gop_job_t *slot = &enc->gop_jobs[current_slot]; + + // Wait for slot to be empty + pthread_mutex_lock(&enc->job_mutex); + while (slot->status != GOP_SLOT_EMPTY && !enc->shutdown_workers) { + // While waiting, check if we can write any completed GOPs + int wrote_something = 0; + for (int i = 0; i < enc->num_threads; i++) { + if (enc->gop_jobs[i].status == GOP_SLOT_COMPLETE && + enc->gop_jobs[i].gop_index == next_gop_to_write) { + gop_job_t *job = &enc->gop_jobs[i]; + pthread_mutex_unlock(&enc->job_mutex); + + if (job->success && job->packet) { + int max_index = tad32_quality_to_max_index(enc->quality_index); + write_packet(enc, enc->current_timecode_ns, + job->tad_output, job->tad_size, + job->packet->data, job->packet->size, + job->num_frames, (uint16_t)job->audio_sample_count, max_index); + + enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9); + enc->frames_encoded += job->num_frames; + + tav_encoder_free_packet(job->packet); + job->packet = NULL; + } + + pthread_mutex_lock(&enc->job_mutex); + job->status = GOP_SLOT_EMPTY; + job->num_frames = 0; + job->audio_sample_count = 0; + job->tad_size = 0; + next_gop_to_write++; + wrote_something = 1; + break; + } + } + if (!wrote_something) { + pthread_cond_wait(&enc->job_complete, &enc->job_mutex); + } + } + pthread_mutex_unlock(&enc->job_mutex); + + // Reset audio accumulator only when starting a fresh GOP + if (frames_in_current_gop == 0) { + slot->audio_sample_count = 0; + } + + // Read frames into the slot + while (frames_in_current_gop < DT_GOP_SIZE && !eof_reached) { + if (enc->encode_limit > 0 && enc->frame_number >= enc->encode_limit) { + eof_reached = 1; + break; + } + + size_t bytes_read = fread(slot->rgb_frames[frames_in_current_gop], 1, frame_size, video_pipe); + if (bytes_read < frame_size) { + eof_reached = 1; + break; + } + + slot->frame_numbers[frames_in_current_gop] = enc->frame_number; + enc->frame_number++; + frames_in_current_gop++; + + // Read corresponding audio - read whatever is available up to buffer capacity + // Note: FFmpeg may buffer audio, so the first read might get multiple frames worth + size_t audio_buffer_capacity_samples = audio_samples_per_gop; + size_t audio_space_remaining = audio_buffer_capacity_samples - slot->audio_sample_count; + + if (audio_space_remaining > 0) { + // Read up to the remaining buffer space + size_t max_read_bytes = audio_space_remaining * 2 * sizeof(float); + size_t audio_read = fread(slot->audio_samples + slot->audio_sample_count * 2, + 1, max_read_bytes, audio_pipe); + slot->audio_sample_count += audio_read / (2 * sizeof(float)); + } + + // Submit GOP when full + if (frames_in_current_gop >= DT_GOP_SIZE) { + slot->num_frames = frames_in_current_gop; + slot->gop_index = current_gop_index; + + pthread_mutex_lock(&enc->job_mutex); + slot->status = GOP_SLOT_READY; + pthread_cond_broadcast(&enc->job_ready); + pthread_mutex_unlock(&enc->job_mutex); + + current_slot = (current_slot + 1) % enc->num_threads; + current_gop_index++; + frames_in_current_gop = 0; + break; // Exit frame-reading loop to wait for next available slot + } + } + } + + // Submit any partial GOP at EOF + if (frames_in_current_gop > 0) { + gop_job_t *slot = &enc->gop_jobs[current_slot]; + slot->num_frames = frames_in_current_gop; + slot->gop_index = current_gop_index; + + pthread_mutex_lock(&enc->job_mutex); + slot->status = GOP_SLOT_READY; + pthread_cond_broadcast(&enc->job_ready); + pthread_mutex_unlock(&enc->job_mutex); + + current_gop_index++; + } + + // Wait for all remaining GOPs to complete and write them + while (!encoding_error && next_gop_to_write < current_gop_index) { + pthread_mutex_lock(&enc->job_mutex); + + int found = -1; + while (found < 0 && !encoding_error) { + for (int i = 0; i < enc->num_threads; i++) { + if (enc->gop_jobs[i].status == GOP_SLOT_COMPLETE && + enc->gop_jobs[i].gop_index == next_gop_to_write) { + found = i; + break; + } + } + if (found < 0) { + pthread_cond_wait(&enc->job_complete, &enc->job_mutex); + } + } + + if (found >= 0) { + gop_job_t *job = &enc->gop_jobs[found]; + pthread_mutex_unlock(&enc->job_mutex); + + if (job->success && job->packet) { + int max_index = tad32_quality_to_max_index(enc->quality_index); + write_packet(enc, enc->current_timecode_ns, + job->tad_output, job->tad_size, + job->packet->data, job->packet->size, + job->num_frames, (uint16_t)job->audio_sample_count, max_index); + + enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9); + enc->frames_encoded += job->num_frames; + + tav_encoder_free_packet(job->packet); + job->packet = NULL; + } + + pthread_mutex_lock(&enc->job_mutex); + job->status = GOP_SLOT_EMPTY; + job->num_frames = 0; + job->audio_sample_count = 0; + job->tad_size = 0; + next_gop_to_write++; + pthread_mutex_unlock(&enc->job_mutex); + } else { + pthread_mutex_unlock(&enc->job_mutex); + } + } + + // Free per-slot buffers before shutdown + for (int slot = 0; slot < enc->num_threads; slot++) { + if (enc->gop_jobs[slot].rgb_frames) { + for (int f = 0; f < DT_GOP_SIZE; f++) { + free(enc->gop_jobs[slot].rgb_frames[f]); + } + free(enc->gop_jobs[slot].rgb_frames); + } + free(enc->gop_jobs[slot].frame_numbers); + free(enc->gop_jobs[slot].audio_samples); + free(enc->gop_jobs[slot].tad_output); + } + + shutdown_threading(enc); + + return encoding_error ? -1 : 0; +} + +static int run_encoder(dt_encoder_t *enc) { + // Open output file + enc->output_fp = fopen(enc->output_file, "wb"); + if (!enc->output_fp) { + fprintf(stderr, "Error: Cannot create output file: %s\n", enc->output_file); + return -1; + } + + // Set up video encoder params + tav_encoder_params_init(&enc->enc_params, enc->width, enc->height); + enc->enc_params.fps_num = enc->fps_num; + enc->enc_params.fps_den = enc->fps_den; + enc->enc_params.wavelet_type = 1; // CDF 9/7 + enc->enc_params.temporal_wavelet = 255; // Haar + enc->enc_params.decomp_levels = DT_SPATIAL_LEVELS; + enc->enc_params.temporal_levels = DT_TEMPORAL_LEVELS; + enc->enc_params.enable_temporal_dwt = 1; + enc->enc_params.gop_size = DT_GOP_SIZE; + enc->enc_params.quality_level = enc->quality_index; + enc->enc_params.quantiser_y = QUALITY_Y[enc->quality_index]; + enc->enc_params.quantiser_co = QUALITY_CO[enc->quality_index]; + enc->enc_params.quantiser_cg = QUALITY_CG[enc->quality_index]; + enc->enc_params.entropy_coder = 1; // EZBC + enc->enc_params.encoder_preset = 0x01; // Sports mode + enc->enc_params.monoblock = 1; // Force monoblock + enc->enc_params.verbose = enc->verbose; + + // For single-threaded mode, create a context to validate params + enc->video_ctx = tav_encoder_create(&enc->enc_params); + if (!enc->video_ctx) { + fprintf(stderr, "Error: Cannot create video encoder\n"); + fclose(enc->output_fp); + return -1; + } + + printf("Forced Monoblock mode (--monoblock)\n"); + + // Get actual parameters (may have been adjusted) + tav_encoder_get_params(enc->video_ctx, &enc->enc_params); + + if (enc->verbose) { + printf("Auto-selected Haar temporal wavelet with sports mode (resolution: %dx%d = %d pixels, quantiser_y = %d)\n", + enc->width, enc->height, enc->width * enc->height, enc->enc_params.quantiser_y); + } + + // Spawn FFmpeg for video + pid_t video_pid; + FILE *video_pipe = spawn_ffmpeg_video(enc, &video_pid); + if (!video_pipe) { + tav_encoder_free(enc->video_ctx); + fclose(enc->output_fp); + return -1; + } + + // Spawn FFmpeg for audio + pid_t audio_pid; + FILE *audio_pipe = spawn_ffmpeg_audio(enc, &audio_pid); + if (!audio_pipe) { + fclose(video_pipe); + waitpid(video_pid, NULL, 0); + tav_encoder_free(enc->video_ctx); + fclose(enc->output_fp); + return -1; + } + + // Allocate frame buffers for single-threaded mode + size_t frame_size = enc->width * enc->height * 3; + enc->gop_frames = malloc(DT_GOP_SIZE * sizeof(uint8_t *)); + for (int i = 0; i < DT_GOP_SIZE; i++) { + enc->gop_frames[i] = malloc(frame_size); + } + + // Audio buffer (enough for one GOP worth of audio) + double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num; + size_t audio_samples_per_gop = (size_t)(AUDIO_SAMPLE_RATE * gop_duration) + 1024; + enc->audio_buffer = malloc(audio_samples_per_gop * 2 * sizeof(float)); + enc->audio_buffer_capacity = audio_samples_per_gop; + enc->audio_buffer_samples = 0; + + clock_t start_time = clock(); + + // Run encoding + if (enc->num_threads > 0) { + printf("Multithreaded mode: %d threads\n", enc->num_threads); + run_encoder_mt(enc, video_pipe, audio_pipe, video_pid, audio_pid); + } else { + printf("Single-threaded mode\n"); + run_encoder_st(enc, video_pipe, audio_pipe, video_pid, audio_pid); + } + clock_t end_time = clock(); double elapsed = (double)(end_time - start_time) / CLOCKS_PER_SEC; // Print statistics - printf("\nEncoding complete:\n"); + printf("\nEncoding complete%s:\n", enc->num_threads > 0 ? " (multithreaded)" : ""); printf(" Frames: %lu\n", enc->frames_encoded); printf(" GOPs: %lu\n", enc->packets_written); printf(" Output size: %lu bytes (%.2f MB)\n", enc->bytes_written, enc->bytes_written / 1048576.0); printf(" Encoding speed: %.1f fps\n", enc->frames_encoded / elapsed); - printf(" Bitrate: %.1f kbps\n", - enc->bytes_written * 8.0 / (enc->frames_encoded * enc->fps_den / enc->fps_num) / 1000.0); + if (enc->frames_encoded > 0) { + printf(" Bitrate: %.1f kbps\n", + enc->bytes_written * 8.0 / (enc->frames_encoded * enc->fps_den / enc->fps_num) / 1000.0); + } // Cleanup - free(tad_output); free(enc->audio_buffer); for (int i = 0; i < DT_GOP_SIZE; i++) { free(enc->gop_frames[i]); @@ -684,6 +1263,7 @@ int main(int argc, char **argv) { enc.quality_index = 3; enc.is_pal = 0; enc.is_interlaced = 0; + enc.num_threads = get_default_thread_count(); // Default: min(8, available CPUs) // Initialize FEC libraries rs_init(); @@ -693,6 +1273,7 @@ int main(int argc, char **argv) { {"input", required_argument, 0, 'i'}, {"output", required_argument, 0, 'o'}, {"quality", required_argument, 0, 'q'}, + {"threads", required_argument, 0, 't'}, {"ntsc", no_argument, 0, 'N'}, {"pal", no_argument, 0, 'P'}, {"interlaced", no_argument, 0, 'I'}, @@ -703,7 +1284,7 @@ int main(int argc, char **argv) { }; int opt; - while ((opt = getopt_long(argc, argv, "i:o:q:vhNPI", long_options, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "i:o:q:t:vhNPI", long_options, NULL)) != -1) { switch (opt) { case 'i': enc.input_file = optarg; @@ -716,6 +1297,17 @@ int main(int argc, char **argv) { if (enc.quality_index < 0) enc.quality_index = 0; if (enc.quality_index > 5) enc.quality_index = 5; break; + case 't': { + int threads = atoi(optarg); + if (threads < 0) { + fprintf(stderr, "Error: Thread count must be positive\n"); + return 1; + } + // Both 0 and 1 mean single-threaded (use value 0 internally) + enc.num_threads = (threads <= 1) ? 0 : threads; + if (enc.num_threads > 16) enc.num_threads = 16; // Cap at 16 + break; + } case 'N': enc.is_pal = 0; enc.height = DT_HEIGHT_NTSC; @@ -774,6 +1366,8 @@ int main(int argc, char **argv) { printf(" Framerate: %d/%d\n", enc.fps_num, enc.fps_den); printf(" Quality: %d\n", enc.quality_index); printf(" GOP size: %d\n", DT_GOP_SIZE); + printf(" Threads: %d%s\n", enc.num_threads > 0 ? enc.num_threads : 1, + enc.num_threads > 0 ? " (multithreaded)" : " (single-threaded)"); printf(" Header sizes: main=%dB tad=%dB tav=%dB (after LDPC)\n", DT_MAIN_HEADER_SIZE * 2, DT_TAD_HEADER_SIZE * 2, DT_TAV_HEADER_SIZE * 2);