mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAV-DT multithreaded encoding
This commit is contained in:
@@ -30,6 +30,7 @@
|
|||||||
#include <sys/wait.h>
|
#include <sys/wait.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
#include "tav_video_decoder.h"
|
#include "tav_video_decoder.h"
|
||||||
#include "decoder_tad.h"
|
#include "decoder_tad.h"
|
||||||
@@ -64,6 +65,53 @@ static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
|
|||||||
static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
|
static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
|
||||||
|
|
||||||
#define MAX_PATH 4096
|
#define MAX_PATH 4096
|
||||||
|
#define MAX_DECODE_THREADS 16
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Multithreading Structures
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
#define DECODE_SLOT_EMPTY 0
|
||||||
|
#define DECODE_SLOT_PENDING 1
|
||||||
|
#define DECODE_SLOT_DONE 2
|
||||||
|
|
||||||
|
// GOP decode job structure
|
||||||
|
typedef struct {
|
||||||
|
// Input
|
||||||
|
uint8_t *compressed_data; // Raw GOP data to decode
|
||||||
|
size_t compressed_size;
|
||||||
|
int gop_size; // Number of frames in this GOP
|
||||||
|
int job_id; // Sequential job ID for ordering output
|
||||||
|
|
||||||
|
// Output
|
||||||
|
uint8_t **rgb_frames; // Decoded RGB24 frames [gop_size]
|
||||||
|
int frames_allocated; // How many frames are allocated
|
||||||
|
int decode_result; // 0 = success, -1 = error
|
||||||
|
|
||||||
|
// Status
|
||||||
|
volatile int status;
|
||||||
|
} gop_decode_job_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get number of available CPUs.
|
||||||
|
*/
|
||||||
|
static int get_available_cpus(void) {
|
||||||
|
#ifdef _SC_NPROCESSORS_ONLN
|
||||||
|
long nproc = sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
if (nproc > 0) {
|
||||||
|
return (int)nproc;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return 1; // Fallback to single core
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get default thread count (cap at 8)
|
||||||
|
*/
|
||||||
|
static int get_default_thread_count(void) {
|
||||||
|
int available = get_available_cpus();
|
||||||
|
return available < 8 ? available : 8;
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// CRC-32
|
// CRC-32
|
||||||
@@ -138,6 +186,23 @@ typedef struct {
|
|||||||
// Options
|
// Options
|
||||||
int verbose;
|
int verbose;
|
||||||
int dump_mode; // Just dump packets, don't decode
|
int dump_mode; // Just dump packets, don't decode
|
||||||
|
|
||||||
|
// Multithreading
|
||||||
|
int num_threads;
|
||||||
|
int num_slots;
|
||||||
|
gop_decode_job_t *slots;
|
||||||
|
tav_video_context_t **worker_video_ctx; // Per-thread decoder contexts
|
||||||
|
pthread_t *worker_threads;
|
||||||
|
pthread_mutex_t mutex;
|
||||||
|
pthread_cond_t cond_job_available;
|
||||||
|
pthread_cond_t cond_slot_free;
|
||||||
|
volatile int threads_should_exit;
|
||||||
|
volatile int next_write_slot; // Next slot to write to output
|
||||||
|
volatile int jobs_submitted;
|
||||||
|
volatile int jobs_completed;
|
||||||
|
|
||||||
|
// Timing
|
||||||
|
time_t start_time;
|
||||||
} dt_decoder_t;
|
} dt_decoder_t;
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -151,6 +216,8 @@ static void print_usage(const char *program) {
|
|||||||
printf(" -i, --input FILE Input TAV-DT file\n");
|
printf(" -i, --input FILE Input TAV-DT file\n");
|
||||||
printf(" -o, --output FILE Output video file (FFV1/MKV)\n");
|
printf(" -o, --output FILE Output video file (FFV1/MKV)\n");
|
||||||
printf("\nOptions:\n");
|
printf("\nOptions:\n");
|
||||||
|
printf(" -t, --threads N Number of decoder threads (default: min(8, available CPUs))\n");
|
||||||
|
printf(" 0 or 1 = single-threaded, 2-16 = multithreaded\n");
|
||||||
printf(" --dump Dump packet info without decoding\n");
|
printf(" --dump Dump packet info without decoding\n");
|
||||||
printf(" -v, --verbose Verbose output\n");
|
printf(" -v, --verbose Verbose output\n");
|
||||||
printf(" --help Show this help\n");
|
printf(" --help Show this help\n");
|
||||||
@@ -354,6 +421,12 @@ static int decode_audio_subpacket(dt_decoder_t *dec, const uint8_t *data, size_t
|
|||||||
// Calculate RS payload size
|
// Calculate RS payload size
|
||||||
size_t rs_total = rs_block_count * RS_BLOCK_SIZE;
|
size_t rs_total = rs_block_count * RS_BLOCK_SIZE;
|
||||||
|
|
||||||
|
// Handle empty audio packet (no samples in this GOP)
|
||||||
|
if (compressed_size == 0 || rs_block_count == 0 || sample_count == 0) {
|
||||||
|
*consumed = offset;
|
||||||
|
return 0; // Successfully processed empty audio packet
|
||||||
|
}
|
||||||
|
|
||||||
if (offset + rs_total > data_len) {
|
if (offset + rs_total > data_len) {
|
||||||
if (dec->verbose) {
|
if (dec->verbose) {
|
||||||
fprintf(stderr, "Warning: Audio packet truncated\n");
|
fprintf(stderr, "Warning: Audio packet truncated\n");
|
||||||
@@ -386,8 +459,13 @@ static int decode_audio_subpacket(dt_decoder_t *dec, const uint8_t *data, size_t
|
|||||||
// [sample_count(2)][max_index(1)][payload_size(4)][zstd_data]
|
// [sample_count(2)][max_index(1)][payload_size(4)][zstd_data]
|
||||||
// No need to rebuild the header - pass it directly to the TAD decoder
|
// No need to rebuild the header - pass it directly to the TAD decoder
|
||||||
|
|
||||||
// Decode TAD to PCMu8
|
// Read the actual sample count from the TAD chunk header (not the wrapper header)
|
||||||
uint8_t *pcmu8_output = malloc(sample_count * 2);
|
// The wrapper header sample_count might be incorrect or 0 in some cases
|
||||||
|
uint16_t tad_chunk_sample_count;
|
||||||
|
memcpy(&tad_chunk_sample_count, decoded_payload, 2);
|
||||||
|
|
||||||
|
// Decode TAD to PCMu8 - allocate based on TAD chunk's sample count
|
||||||
|
uint8_t *pcmu8_output = malloc(tad_chunk_sample_count * 2);
|
||||||
if (!pcmu8_output) {
|
if (!pcmu8_output) {
|
||||||
free(rs_data);
|
free(rs_data);
|
||||||
free(decoded_payload);
|
free(decoded_payload);
|
||||||
@@ -717,6 +795,231 @@ static int spawn_ffmpeg(dt_decoder_t *dec) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Multithreading Support
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
// Worker thread function - decodes GOPs in parallel
|
||||||
|
static void *decoder_worker_thread(void *arg) {
|
||||||
|
dt_decoder_t *dec = (dt_decoder_t *)arg;
|
||||||
|
|
||||||
|
// Get thread index by finding our thread ID in the array
|
||||||
|
int thread_idx = -1;
|
||||||
|
pthread_t self = pthread_self();
|
||||||
|
for (int i = 0; i < dec->num_threads; i++) {
|
||||||
|
if (pthread_equal(dec->worker_threads[i], self)) {
|
||||||
|
thread_idx = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (thread_idx < 0) thread_idx = 0; // Fallback
|
||||||
|
|
||||||
|
tav_video_context_t *my_video_ctx = dec->worker_video_ctx[thread_idx];
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
pthread_mutex_lock(&dec->mutex);
|
||||||
|
|
||||||
|
// Find a pending slot to work on
|
||||||
|
int slot_idx = -1;
|
||||||
|
while (slot_idx < 0 && !dec->threads_should_exit) {
|
||||||
|
for (int i = 0; i < dec->num_slots; i++) {
|
||||||
|
if (dec->slots[i].status == DECODE_SLOT_PENDING &&
|
||||||
|
dec->slots[i].compressed_data != NULL) {
|
||||||
|
dec->slots[i].status = DECODE_SLOT_DONE; // Claim it temporarily
|
||||||
|
slot_idx = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (slot_idx < 0 && !dec->threads_should_exit) {
|
||||||
|
pthread_cond_wait(&dec->cond_job_available, &dec->mutex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dec->threads_should_exit && slot_idx < 0) {
|
||||||
|
pthread_mutex_unlock(&dec->mutex);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_unlock(&dec->mutex);
|
||||||
|
|
||||||
|
if (slot_idx < 0) continue;
|
||||||
|
|
||||||
|
gop_decode_job_t *job = &dec->slots[slot_idx];
|
||||||
|
|
||||||
|
// Decode GOP using our thread's decoder context
|
||||||
|
job->decode_result = tav_video_decode_gop(
|
||||||
|
my_video_ctx,
|
||||||
|
job->compressed_data,
|
||||||
|
job->compressed_size,
|
||||||
|
job->gop_size,
|
||||||
|
job->rgb_frames
|
||||||
|
);
|
||||||
|
|
||||||
|
// Free compressed data
|
||||||
|
free(job->compressed_data);
|
||||||
|
job->compressed_data = NULL;
|
||||||
|
|
||||||
|
// Mark as done
|
||||||
|
pthread_mutex_lock(&dec->mutex);
|
||||||
|
job->status = DECODE_SLOT_DONE;
|
||||||
|
dec->jobs_completed++;
|
||||||
|
pthread_cond_broadcast(&dec->cond_slot_free);
|
||||||
|
pthread_mutex_unlock(&dec->mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int init_decoder_threads(dt_decoder_t *dec) {
|
||||||
|
if (dec->num_threads <= 0) {
|
||||||
|
return 0; // Single-threaded mode
|
||||||
|
}
|
||||||
|
|
||||||
|
// Limit threads
|
||||||
|
if (dec->num_threads > MAX_DECODE_THREADS) {
|
||||||
|
dec->num_threads = MAX_DECODE_THREADS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Number of slots = threads + 2 for pipelining
|
||||||
|
dec->num_slots = dec->num_threads + 2;
|
||||||
|
|
||||||
|
// Allocate slots
|
||||||
|
dec->slots = calloc(dec->num_slots, sizeof(gop_decode_job_t));
|
||||||
|
if (!dec->slots) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate decode slots\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate frame buffers for each slot
|
||||||
|
int internal_height = dec->is_interlaced ? dec->height / 2 : dec->height;
|
||||||
|
size_t frame_size = dec->width * internal_height * 3;
|
||||||
|
int max_gop_size = 16; // TAV-DT uses fixed 16-frame GOPs
|
||||||
|
|
||||||
|
for (int i = 0; i < dec->num_slots; i++) {
|
||||||
|
dec->slots[i].rgb_frames = malloc(max_gop_size * sizeof(uint8_t*));
|
||||||
|
if (!dec->slots[i].rgb_frames) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate frame pointers for slot %d\n", i);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
for (int f = 0; f < max_gop_size; f++) {
|
||||||
|
dec->slots[i].rgb_frames[f] = malloc(frame_size);
|
||||||
|
if (!dec->slots[i].rgb_frames[f]) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate frame buffer for slot %d\n", i);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dec->slots[i].frames_allocated = max_gop_size;
|
||||||
|
dec->slots[i].status = DECODE_SLOT_EMPTY;
|
||||||
|
dec->slots[i].job_id = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create per-thread video decoder contexts
|
||||||
|
dec->worker_video_ctx = malloc(dec->num_threads * sizeof(tav_video_context_t*));
|
||||||
|
if (!dec->worker_video_ctx) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate worker video contexts\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
tav_video_params_t video_params = {
|
||||||
|
.width = dec->width,
|
||||||
|
.height = internal_height,
|
||||||
|
.decomp_levels = DT_SPATIAL_LEVELS,
|
||||||
|
.temporal_levels = DT_TEMPORAL_LEVELS,
|
||||||
|
.wavelet_filter = 1, // CDF 9/7
|
||||||
|
.temporal_wavelet = 255, // Haar
|
||||||
|
.entropy_coder = 1, // EZBC
|
||||||
|
.channel_layout = 0, // YCoCg-R
|
||||||
|
.perceptual_tuning = 1,
|
||||||
|
.quantiser_y = QUALITY_Y[dec->quality_index],
|
||||||
|
.quantiser_co = QUALITY_CO[dec->quality_index],
|
||||||
|
.quantiser_cg = QUALITY_CG[dec->quality_index],
|
||||||
|
.encoder_preset = 0x01, // Sports
|
||||||
|
.monoblock = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = 0; i < dec->num_threads; i++) {
|
||||||
|
dec->worker_video_ctx[i] = tav_video_create(&video_params);
|
||||||
|
if (!dec->worker_video_ctx[i]) {
|
||||||
|
fprintf(stderr, "Error: Failed to create video context for thread %d\n", i);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize synchronization primitives
|
||||||
|
pthread_mutex_init(&dec->mutex, NULL);
|
||||||
|
pthread_cond_init(&dec->cond_job_available, NULL);
|
||||||
|
pthread_cond_init(&dec->cond_slot_free, NULL);
|
||||||
|
dec->threads_should_exit = 0;
|
||||||
|
dec->next_write_slot = 0;
|
||||||
|
dec->jobs_submitted = 0;
|
||||||
|
dec->jobs_completed = 0;
|
||||||
|
|
||||||
|
// Create worker threads
|
||||||
|
dec->worker_threads = malloc(dec->num_threads * sizeof(pthread_t));
|
||||||
|
if (!dec->worker_threads) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate worker threads\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < dec->num_threads; i++) {
|
||||||
|
if (pthread_create(&dec->worker_threads[i], NULL, decoder_worker_thread, dec) != 0) {
|
||||||
|
fprintf(stderr, "Error: Failed to create worker thread %d\n", i);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dec->verbose) {
|
||||||
|
printf("Initialized %d decoder worker threads with %d slots\n",
|
||||||
|
dec->num_threads, dec->num_slots);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cleanup_decoder_threads(dt_decoder_t *dec) {
|
||||||
|
if (dec->num_threads <= 0) return;
|
||||||
|
|
||||||
|
// Signal threads to exit
|
||||||
|
pthread_mutex_lock(&dec->mutex);
|
||||||
|
dec->threads_should_exit = 1;
|
||||||
|
pthread_cond_broadcast(&dec->cond_job_available);
|
||||||
|
pthread_mutex_unlock(&dec->mutex);
|
||||||
|
|
||||||
|
// Wait for threads to finish
|
||||||
|
for (int i = 0; i < dec->num_threads; i++) {
|
||||||
|
pthread_join(dec->worker_threads[i], NULL);
|
||||||
|
}
|
||||||
|
free(dec->worker_threads);
|
||||||
|
dec->worker_threads = NULL;
|
||||||
|
|
||||||
|
// Free per-thread video contexts
|
||||||
|
for (int i = 0; i < dec->num_threads; i++) {
|
||||||
|
tav_video_free(dec->worker_video_ctx[i]);
|
||||||
|
}
|
||||||
|
free(dec->worker_video_ctx);
|
||||||
|
dec->worker_video_ctx = NULL;
|
||||||
|
|
||||||
|
// Free slots
|
||||||
|
for (int i = 0; i < dec->num_slots; i++) {
|
||||||
|
if (dec->slots[i].rgb_frames) {
|
||||||
|
for (int f = 0; f < dec->slots[i].frames_allocated; f++) {
|
||||||
|
free(dec->slots[i].rgb_frames[f]);
|
||||||
|
}
|
||||||
|
free(dec->slots[i].rgb_frames);
|
||||||
|
}
|
||||||
|
if (dec->slots[i].compressed_data) {
|
||||||
|
free(dec->slots[i].compressed_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(dec->slots);
|
||||||
|
dec->slots = NULL;
|
||||||
|
|
||||||
|
// Destroy sync primitives
|
||||||
|
pthread_mutex_destroy(&dec->mutex);
|
||||||
|
pthread_cond_destroy(&dec->cond_job_available);
|
||||||
|
pthread_cond_destroy(&dec->cond_slot_free);
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Main Decoding Loop
|
// Main Decoding Loop
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -860,6 +1163,9 @@ int main(int argc, char **argv) {
|
|||||||
dt_decoder_t dec;
|
dt_decoder_t dec;
|
||||||
memset(&dec, 0, sizeof(dec));
|
memset(&dec, 0, sizeof(dec));
|
||||||
|
|
||||||
|
// Default thread count
|
||||||
|
dec.num_threads = get_default_thread_count();
|
||||||
|
|
||||||
// Initialize FEC libraries
|
// Initialize FEC libraries
|
||||||
rs_init();
|
rs_init();
|
||||||
ldpc_init();
|
ldpc_init();
|
||||||
@@ -867,6 +1173,7 @@ int main(int argc, char **argv) {
|
|||||||
static struct option long_options[] = {
|
static struct option long_options[] = {
|
||||||
{"input", required_argument, 0, 'i'},
|
{"input", required_argument, 0, 'i'},
|
||||||
{"output", required_argument, 0, 'o'},
|
{"output", required_argument, 0, 'o'},
|
||||||
|
{"threads", required_argument, 0, 't'},
|
||||||
{"dump", no_argument, 0, 'd'},
|
{"dump", no_argument, 0, 'd'},
|
||||||
{"verbose", no_argument, 0, 'v'},
|
{"verbose", no_argument, 0, 'v'},
|
||||||
{"help", no_argument, 0, 'h'},
|
{"help", no_argument, 0, 'h'},
|
||||||
@@ -874,7 +1181,7 @@ int main(int argc, char **argv) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
int opt;
|
int opt;
|
||||||
while ((opt = getopt_long(argc, argv, "i:o:dvh", long_options, NULL)) != -1) {
|
while ((opt = getopt_long(argc, argv, "i:o:t:dvh", long_options, NULL)) != -1) {
|
||||||
switch (opt) {
|
switch (opt) {
|
||||||
case 'i':
|
case 'i':
|
||||||
dec.input_file = optarg;
|
dec.input_file = optarg;
|
||||||
@@ -882,6 +1189,17 @@ int main(int argc, char **argv) {
|
|||||||
case 'o':
|
case 'o':
|
||||||
dec.output_file = optarg;
|
dec.output_file = optarg;
|
||||||
break;
|
break;
|
||||||
|
case 't': {
|
||||||
|
int threads = atoi(optarg);
|
||||||
|
if (threads < 0) {
|
||||||
|
fprintf(stderr, "Error: Thread count must be positive\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// Both 0 and 1 mean single-threaded (use value 0 internally)
|
||||||
|
dec.num_threads = (threads <= 1) ? 0 : threads;
|
||||||
|
if (dec.num_threads > MAX_DECODE_THREADS) dec.num_threads = MAX_DECODE_THREADS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
case 'd':
|
case 'd':
|
||||||
dec.dump_mode = 1;
|
dec.dump_mode = 1;
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
#include <sys/wait.h>
|
#include <sys/wait.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
#include "tav_encoder_lib.h"
|
#include "tav_encoder_lib.h"
|
||||||
#include "encoder_tad.h"
|
#include "encoder_tad.h"
|
||||||
@@ -67,6 +68,60 @@ static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
|
|||||||
// Audio samples per GOP (32kHz / framerate * gop_size)
|
// Audio samples per GOP (32kHz / framerate * gop_size)
|
||||||
#define AUDIO_SAMPLE_RATE 32000
|
#define AUDIO_SAMPLE_RATE 32000
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Multithreading Structures
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
#define GOP_SLOT_EMPTY 0
|
||||||
|
#define GOP_SLOT_READY 1
|
||||||
|
#define GOP_SLOT_ENCODING 2
|
||||||
|
#define GOP_SLOT_COMPLETE 3
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
// Input frames (copied from main thread)
|
||||||
|
uint8_t **rgb_frames; // Frame data pointers [gop_size]
|
||||||
|
int *frame_numbers; // Frame number array [gop_size]
|
||||||
|
int num_frames; // Actual number of frames in this GOP
|
||||||
|
int gop_index; // Sequential GOP index for ordering output
|
||||||
|
|
||||||
|
// Audio samples for this GOP
|
||||||
|
float *audio_samples; // Interleaved stereo samples
|
||||||
|
size_t audio_sample_count;
|
||||||
|
|
||||||
|
// Output
|
||||||
|
tav_encoder_packet_t *packet; // Encoded video packet
|
||||||
|
uint8_t *tad_output; // Encoded audio data
|
||||||
|
size_t tad_size; // Encoded audio size
|
||||||
|
int success; // 1 if encoding succeeded
|
||||||
|
|
||||||
|
// Encoder params (copy for thread safety)
|
||||||
|
tav_encoder_params_t params;
|
||||||
|
|
||||||
|
// Slot status
|
||||||
|
volatile int status;
|
||||||
|
} gop_job_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get number of available CPUs.
|
||||||
|
*/
|
||||||
|
static int get_available_cpus(void) {
|
||||||
|
#ifdef _SC_NPROCESSORS_ONLN
|
||||||
|
long nproc = sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
if (nproc > 0) {
|
||||||
|
return (int)nproc;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return 1; // Fallback to single core
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get default thread count (cap at 8)
|
||||||
|
*/
|
||||||
|
static int get_default_thread_count(void) {
|
||||||
|
int available = get_available_cpus();
|
||||||
|
return available < 8 ? available : 8;
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// CRC-32
|
// CRC-32
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -142,6 +197,18 @@ typedef struct {
|
|||||||
// Options
|
// Options
|
||||||
int verbose;
|
int verbose;
|
||||||
int encode_limit;
|
int encode_limit;
|
||||||
|
|
||||||
|
// Multithreading
|
||||||
|
int num_threads; // 0 = single-threaded, 1+ = num worker threads
|
||||||
|
gop_job_t *gop_jobs; // Array of GOP job slots [num_threads]
|
||||||
|
pthread_t *worker_threads; // Array of worker thread handles [num_threads]
|
||||||
|
pthread_mutex_t job_mutex; // Mutex for job slot access
|
||||||
|
pthread_cond_t job_ready; // Signal when a job slot is ready for encoding
|
||||||
|
pthread_cond_t job_complete; // Signal when a job slot is complete
|
||||||
|
volatile int shutdown_workers; // 1 when workers should exit
|
||||||
|
|
||||||
|
// Encoder params (template for worker threads)
|
||||||
|
tav_encoder_params_t enc_params;
|
||||||
} dt_encoder_t;
|
} dt_encoder_t;
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -160,6 +227,8 @@ static void print_usage(const char *program) {
|
|||||||
printf(" --pal Force PAL format (720x576)\n");
|
printf(" --pal Force PAL format (720x576)\n");
|
||||||
printf(" --interlaced Interlaced output\n");
|
printf(" --interlaced Interlaced output\n");
|
||||||
printf(" --encode-limit N Encode only N frames (for testing)\n");
|
printf(" --encode-limit N Encode only N frames (for testing)\n");
|
||||||
|
printf(" -t, --threads N Parallel encoding threads (default: min(8, available CPUs))\n");
|
||||||
|
printf(" 0 or 1 = single-threaded, 2-16 = multithreaded\n");
|
||||||
printf(" -v, --verbose Verbose output\n");
|
printf(" -v, --verbose Verbose output\n");
|
||||||
printf(" -h, --help Show this help\n");
|
printf(" -h, --help Show this help\n");
|
||||||
}
|
}
|
||||||
@@ -411,94 +480,236 @@ static FILE *spawn_ffmpeg_audio(dt_encoder_t *enc, pid_t *pid) {
|
|||||||
return fdopen(pipefd[0], "rb");
|
return fdopen(pipefd[0], "rb");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Multithreading Support
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Worker thread context - passed to worker_thread_main.
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
dt_encoder_t *enc;
|
||||||
|
int thread_id;
|
||||||
|
} worker_context_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Worker thread main function.
|
||||||
|
* Continuously picks up jobs from the job pool and encodes them.
|
||||||
|
*/
|
||||||
|
static void *worker_thread_main(void *arg) {
|
||||||
|
worker_context_t *wctx = (worker_context_t *)arg;
|
||||||
|
dt_encoder_t *enc = wctx->enc;
|
||||||
|
(void)wctx->thread_id; // Unused but kept for debugging
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
|
||||||
|
// Wait for a job or shutdown signal
|
||||||
|
while (!enc->shutdown_workers) {
|
||||||
|
// Look for a job slot that is ready to encode
|
||||||
|
int found_job = -1;
|
||||||
|
for (int i = 0; i < enc->num_threads; i++) {
|
||||||
|
if (enc->gop_jobs[i].status == GOP_SLOT_READY) {
|
||||||
|
enc->gop_jobs[i].status = GOP_SLOT_ENCODING;
|
||||||
|
found_job = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found_job >= 0) {
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
|
||||||
|
// Encode this GOP
|
||||||
|
gop_job_t *job = &enc->gop_jobs[found_job];
|
||||||
|
|
||||||
|
// Create thread-local encoder context
|
||||||
|
tav_encoder_context_t *ctx = tav_encoder_create(&job->params);
|
||||||
|
if (!ctx) {
|
||||||
|
fprintf(stderr, "Failed to create encoder for GOP %d\n", job->gop_index);
|
||||||
|
job->success = 0;
|
||||||
|
} else {
|
||||||
|
// Encode video GOP
|
||||||
|
int result = tav_encoder_encode_gop(ctx,
|
||||||
|
(const uint8_t **)job->rgb_frames,
|
||||||
|
job->num_frames, job->frame_numbers,
|
||||||
|
&job->packet);
|
||||||
|
job->success = (result >= 0 && job->packet != NULL);
|
||||||
|
|
||||||
|
// Encode audio
|
||||||
|
if (job->success && job->audio_sample_count > 0) {
|
||||||
|
int max_index = tad32_quality_to_max_index(enc->quality_index);
|
||||||
|
job->tad_size = tad32_encode_chunk(job->audio_samples, job->audio_sample_count,
|
||||||
|
max_index, 1.0f, job->tad_output);
|
||||||
|
}
|
||||||
|
|
||||||
|
tav_encoder_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark job as complete (reacquire lock for next iteration)
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
job->status = GOP_SLOT_COMPLETE;
|
||||||
|
pthread_cond_broadcast(&enc->job_complete);
|
||||||
|
// Keep lock held for next iteration of inner while loop
|
||||||
|
continue; // Look for more jobs
|
||||||
|
}
|
||||||
|
|
||||||
|
// No job found, wait for signal
|
||||||
|
pthread_cond_wait(&enc->job_ready, &enc->job_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
break; // Shutdown
|
||||||
|
}
|
||||||
|
|
||||||
|
free(wctx);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize multithreading resources.
|
||||||
|
* Returns 0 on success, -1 on failure.
|
||||||
|
*/
|
||||||
|
static int init_threading(dt_encoder_t *enc) {
|
||||||
|
if (enc->num_threads <= 0) {
|
||||||
|
return 0; // Single-threaded mode
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize mutex and condition variables
|
||||||
|
if (pthread_mutex_init(&enc->job_mutex, NULL) != 0) {
|
||||||
|
fprintf(stderr, "Error: Failed to initialize job mutex\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (pthread_cond_init(&enc->job_ready, NULL) != 0) {
|
||||||
|
fprintf(stderr, "Error: Failed to initialize job_ready cond\n");
|
||||||
|
pthread_mutex_destroy(&enc->job_mutex);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (pthread_cond_init(&enc->job_complete, NULL) != 0) {
|
||||||
|
fprintf(stderr, "Error: Failed to initialize job_complete cond\n");
|
||||||
|
pthread_cond_destroy(&enc->job_ready);
|
||||||
|
pthread_mutex_destroy(&enc->job_mutex);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate job slots (one per thread)
|
||||||
|
enc->gop_jobs = calloc(enc->num_threads, sizeof(gop_job_t));
|
||||||
|
if (!enc->gop_jobs) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate job slots\n");
|
||||||
|
pthread_cond_destroy(&enc->job_complete);
|
||||||
|
pthread_cond_destroy(&enc->job_ready);
|
||||||
|
pthread_mutex_destroy(&enc->job_mutex);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate worker thread handles
|
||||||
|
enc->worker_threads = malloc(enc->num_threads * sizeof(pthread_t));
|
||||||
|
if (!enc->worker_threads) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate thread handles\n");
|
||||||
|
free(enc->gop_jobs);
|
||||||
|
pthread_cond_destroy(&enc->job_complete);
|
||||||
|
pthread_cond_destroy(&enc->job_ready);
|
||||||
|
pthread_mutex_destroy(&enc->job_mutex);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start worker threads
|
||||||
|
enc->shutdown_workers = 0;
|
||||||
|
for (int i = 0; i < enc->num_threads; i++) {
|
||||||
|
worker_context_t *wctx = malloc(sizeof(worker_context_t));
|
||||||
|
if (!wctx) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate worker context\n");
|
||||||
|
enc->shutdown_workers = 1;
|
||||||
|
pthread_cond_broadcast(&enc->job_ready);
|
||||||
|
for (int j = 0; j < i; j++) {
|
||||||
|
pthread_join(enc->worker_threads[j], NULL);
|
||||||
|
}
|
||||||
|
free(enc->worker_threads);
|
||||||
|
free(enc->gop_jobs);
|
||||||
|
pthread_cond_destroy(&enc->job_complete);
|
||||||
|
pthread_cond_destroy(&enc->job_ready);
|
||||||
|
pthread_mutex_destroy(&enc->job_mutex);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
wctx->enc = enc;
|
||||||
|
wctx->thread_id = i;
|
||||||
|
|
||||||
|
if (pthread_create(&enc->worker_threads[i], NULL, worker_thread_main, wctx) != 0) {
|
||||||
|
fprintf(stderr, "Error: Failed to create worker thread %d\n", i);
|
||||||
|
free(wctx);
|
||||||
|
enc->shutdown_workers = 1;
|
||||||
|
pthread_cond_broadcast(&enc->job_ready);
|
||||||
|
for (int j = 0; j < i; j++) {
|
||||||
|
pthread_join(enc->worker_threads[j], NULL);
|
||||||
|
}
|
||||||
|
free(enc->worker_threads);
|
||||||
|
free(enc->gop_jobs);
|
||||||
|
pthread_cond_destroy(&enc->job_complete);
|
||||||
|
pthread_cond_destroy(&enc->job_ready);
|
||||||
|
pthread_mutex_destroy(&enc->job_mutex);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Started %d worker threads for parallel GOP encoding\n", enc->num_threads);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shutdown multithreading resources.
|
||||||
|
*/
|
||||||
|
static void shutdown_threading(dt_encoder_t *enc) {
|
||||||
|
if (enc->num_threads <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Signal workers to shutdown
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
enc->shutdown_workers = 1;
|
||||||
|
pthread_cond_broadcast(&enc->job_ready);
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
|
||||||
|
// Wait for all workers to finish
|
||||||
|
for (int i = 0; i < enc->num_threads; i++) {
|
||||||
|
pthread_join(enc->worker_threads[i], NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Free job slots (and any remaining resources)
|
||||||
|
if (enc->gop_jobs) {
|
||||||
|
for (int i = 0; i < enc->num_threads; i++) {
|
||||||
|
if (enc->gop_jobs[i].packet) {
|
||||||
|
tav_encoder_free_packet(enc->gop_jobs[i].packet);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(enc->gop_jobs);
|
||||||
|
enc->gop_jobs = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enc->worker_threads) {
|
||||||
|
free(enc->worker_threads);
|
||||||
|
enc->worker_threads = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_cond_destroy(&enc->job_complete);
|
||||||
|
pthread_cond_destroy(&enc->job_ready);
|
||||||
|
pthread_mutex_destroy(&enc->job_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Main Encoding Loop
|
// Main Encoding Loop
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|
||||||
static int run_encoder(dt_encoder_t *enc) {
|
// Single-threaded encoding loop
|
||||||
// Open output file
|
static int run_encoder_st(dt_encoder_t *enc, FILE *video_pipe, FILE *audio_pipe,
|
||||||
enc->output_fp = fopen(enc->output_file, "wb");
|
pid_t video_pid __attribute__((unused)),
|
||||||
if (!enc->output_fp) {
|
pid_t audio_pid __attribute__((unused))) {
|
||||||
fprintf(stderr, "Error: Cannot create output file: %s\n", enc->output_file);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set up video encoder
|
|
||||||
tav_encoder_params_t params;
|
|
||||||
tav_encoder_params_init(¶ms, enc->width, enc->height);
|
|
||||||
params.fps_num = enc->fps_num;
|
|
||||||
params.fps_den = enc->fps_den;
|
|
||||||
params.wavelet_type = 1; // CDF 9/7
|
|
||||||
params.temporal_wavelet = 255; // Haar
|
|
||||||
params.decomp_levels = DT_SPATIAL_LEVELS;
|
|
||||||
params.temporal_levels = DT_TEMPORAL_LEVELS;
|
|
||||||
params.enable_temporal_dwt = 1;
|
|
||||||
params.gop_size = DT_GOP_SIZE;
|
|
||||||
params.quality_level = enc->quality_index;
|
|
||||||
params.quantiser_y = QUALITY_Y[enc->quality_index];
|
|
||||||
params.quantiser_co = QUALITY_CO[enc->quality_index];
|
|
||||||
params.quantiser_cg = QUALITY_CG[enc->quality_index];
|
|
||||||
params.entropy_coder = 1; // EZBC
|
|
||||||
params.encoder_preset = 0x01; // Sports mode
|
|
||||||
params.monoblock = 1; // Force monoblock
|
|
||||||
params.verbose = enc->verbose;
|
|
||||||
|
|
||||||
enc->video_ctx = tav_encoder_create(¶ms);
|
|
||||||
if (!enc->video_ctx) {
|
|
||||||
fprintf(stderr, "Error: Cannot create video encoder\n");
|
|
||||||
fclose(enc->output_fp);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Forced Monoblock mode (--monoblock)\n");
|
|
||||||
|
|
||||||
// Get actual parameters (may have been adjusted)
|
|
||||||
tav_encoder_get_params(enc->video_ctx, ¶ms);
|
|
||||||
|
|
||||||
if (enc->verbose) {
|
|
||||||
printf("Auto-selected Haar temporal wavelet with sports mode (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
|
|
||||||
enc->width, enc->height, enc->width * enc->height, params.quantiser_y);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spawn FFmpeg for video
|
|
||||||
pid_t video_pid;
|
|
||||||
FILE *video_pipe = spawn_ffmpeg_video(enc, &video_pid);
|
|
||||||
if (!video_pipe) {
|
|
||||||
tav_encoder_free(enc->video_ctx);
|
|
||||||
fclose(enc->output_fp);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spawn FFmpeg for audio
|
|
||||||
pid_t audio_pid;
|
|
||||||
FILE *audio_pipe = spawn_ffmpeg_audio(enc, &audio_pid);
|
|
||||||
if (!audio_pipe) {
|
|
||||||
fclose(video_pipe);
|
|
||||||
waitpid(video_pid, NULL, 0);
|
|
||||||
tav_encoder_free(enc->video_ctx);
|
|
||||||
fclose(enc->output_fp);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate frame buffers
|
|
||||||
size_t frame_size = enc->width * enc->height * 3;
|
size_t frame_size = enc->width * enc->height * 3;
|
||||||
enc->gop_frames = malloc(DT_GOP_SIZE * sizeof(uint8_t *));
|
|
||||||
for (int i = 0; i < DT_GOP_SIZE; i++) {
|
|
||||||
enc->gop_frames[i] = malloc(frame_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Audio buffer (enough for one GOP worth of audio)
|
|
||||||
double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num;
|
double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num;
|
||||||
size_t audio_samples_per_gop = (size_t)(AUDIO_SAMPLE_RATE * gop_duration) + 1024;
|
size_t audio_samples_per_gop = (size_t)(AUDIO_SAMPLE_RATE * gop_duration) + 1024;
|
||||||
enc->audio_buffer = malloc(audio_samples_per_gop * 2 * sizeof(float));
|
|
||||||
enc->audio_buffer_capacity = audio_samples_per_gop;
|
|
||||||
enc->audio_buffer_samples = 0;
|
|
||||||
|
|
||||||
// TAD output buffer
|
// TAD output buffer
|
||||||
size_t tad_buffer_size = audio_samples_per_gop * 2; // Conservative estimate
|
size_t tad_buffer_size = audio_samples_per_gop * 2;
|
||||||
uint8_t *tad_output = malloc(tad_buffer_size);
|
uint8_t *tad_output = malloc(tad_buffer_size);
|
||||||
|
|
||||||
// Encoding loop
|
|
||||||
enc->frame_number = 0;
|
enc->frame_number = 0;
|
||||||
enc->gop_frame_count = 0;
|
enc->gop_frame_count = 0;
|
||||||
enc->current_timecode_ns = 0;
|
enc->current_timecode_ns = 0;
|
||||||
@@ -506,26 +717,13 @@ static int run_encoder(dt_encoder_t *enc) {
|
|||||||
clock_t start_time = clock();
|
clock_t start_time = clock();
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
// Check encode limit
|
|
||||||
if (enc->encode_limit > 0 && enc->frame_number >= enc->encode_limit) {
|
if (enc->encode_limit > 0 && enc->frame_number >= enc->encode_limit) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read video frame
|
|
||||||
size_t bytes_read = fread(enc->gop_frames[enc->gop_frame_count], 1, frame_size, video_pipe);
|
size_t bytes_read = fread(enc->gop_frames[enc->gop_frame_count], 1, frame_size, video_pipe);
|
||||||
if (bytes_read < frame_size) {
|
if (bytes_read < frame_size) {
|
||||||
if (enc->verbose) {
|
break;
|
||||||
fprintf(stderr, "Video read incomplete: got %zu/%zu bytes, frame %d, eof=%d, error=%d\n",
|
|
||||||
bytes_read, frame_size, enc->frame_number, feof(video_pipe), ferror(video_pipe));
|
|
||||||
fprintf(stderr, "Audio buffer status: %zu/%zu samples\n",
|
|
||||||
enc->audio_buffer_samples, enc->audio_buffer_capacity);
|
|
||||||
// Try to read more audio to see if pipe is blocked
|
|
||||||
float test_audio[16];
|
|
||||||
size_t test_read = fread(test_audio, sizeof(float), 16, audio_pipe);
|
|
||||||
fprintf(stderr, "Test audio read: %zu floats, eof=%d, error=%d\n",
|
|
||||||
test_read, feof(audio_pipe), ferror(audio_pipe));
|
|
||||||
}
|
|
||||||
break; // End of video
|
|
||||||
}
|
}
|
||||||
|
|
||||||
enc->gop_frame_count++;
|
enc->gop_frame_count++;
|
||||||
@@ -536,8 +734,6 @@ static int run_encoder(dt_encoder_t *enc) {
|
|||||||
size_t audio_samples_per_frame = (size_t)(AUDIO_SAMPLE_RATE * frame_duration);
|
size_t audio_samples_per_frame = (size_t)(AUDIO_SAMPLE_RATE * frame_duration);
|
||||||
size_t audio_bytes = audio_samples_per_frame * 2 * sizeof(float);
|
size_t audio_bytes = audio_samples_per_frame * 2 * sizeof(float);
|
||||||
|
|
||||||
// Always read audio to prevent pipe from filling up and blocking FFmpeg
|
|
||||||
// Expand buffer if needed
|
|
||||||
if (enc->audio_buffer_samples + audio_samples_per_frame > enc->audio_buffer_capacity) {
|
if (enc->audio_buffer_samples + audio_samples_per_frame > enc->audio_buffer_capacity) {
|
||||||
size_t new_capacity = enc->audio_buffer_capacity * 2;
|
size_t new_capacity = enc->audio_buffer_capacity * 2;
|
||||||
float *new_buffer = realloc(enc->audio_buffer, new_capacity * 2 * sizeof(float));
|
float *new_buffer = realloc(enc->audio_buffer, new_capacity * 2 * sizeof(float));
|
||||||
@@ -553,7 +749,6 @@ static int run_encoder(dt_encoder_t *enc) {
|
|||||||
|
|
||||||
// Encode GOP when full
|
// Encode GOP when full
|
||||||
if (enc->gop_frame_count >= DT_GOP_SIZE) {
|
if (enc->gop_frame_count >= DT_GOP_SIZE) {
|
||||||
// Encode video GOP
|
|
||||||
tav_encoder_packet_t *video_packet = NULL;
|
tav_encoder_packet_t *video_packet = NULL;
|
||||||
int frame_numbers[DT_GOP_SIZE];
|
int frame_numbers[DT_GOP_SIZE];
|
||||||
for (int i = 0; i < DT_GOP_SIZE; i++) {
|
for (int i = 0; i < DT_GOP_SIZE; i++) {
|
||||||
@@ -569,36 +764,28 @@ static int run_encoder(dt_encoder_t *enc) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Encode audio
|
|
||||||
int max_index = tad32_quality_to_max_index(enc->quality_index);
|
int max_index = tad32_quality_to_max_index(enc->quality_index);
|
||||||
size_t tad_size = tad32_encode_chunk(enc->audio_buffer, enc->audio_buffer_samples,
|
size_t tad_size = tad32_encode_chunk(enc->audio_buffer, enc->audio_buffer_samples,
|
||||||
max_index, 1.0f, tad_output);
|
max_index, 1.0f, tad_output);
|
||||||
|
|
||||||
// Write packet
|
|
||||||
write_packet(enc, enc->current_timecode_ns,
|
write_packet(enc, enc->current_timecode_ns,
|
||||||
tad_output, tad_size,
|
tad_output, tad_size,
|
||||||
video_packet->data, video_packet->size,
|
video_packet->data, video_packet->size,
|
||||||
DT_GOP_SIZE, (uint16_t)enc->audio_buffer_samples, max_index);
|
DT_GOP_SIZE, (uint16_t)enc->audio_buffer_samples, max_index);
|
||||||
|
|
||||||
// Update timecode
|
|
||||||
enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9);
|
enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9);
|
||||||
enc->frames_encoded += DT_GOP_SIZE;
|
enc->frames_encoded += DT_GOP_SIZE;
|
||||||
|
|
||||||
// Reset buffers
|
|
||||||
enc->gop_frame_count = 0;
|
enc->gop_frame_count = 0;
|
||||||
enc->audio_buffer_samples = 0;
|
enc->audio_buffer_samples = 0;
|
||||||
|
|
||||||
tav_encoder_free_packet(video_packet);
|
tav_encoder_free_packet(video_packet);
|
||||||
|
|
||||||
// Display progress (similar to reference TAV encoder)
|
// Display progress
|
||||||
clock_t now = clock();
|
clock_t now = clock();
|
||||||
double elapsed = (double)(now - start_time) / CLOCKS_PER_SEC;
|
double elapsed = (double)(now - start_time) / CLOCKS_PER_SEC;
|
||||||
double fps = elapsed > 0 ? (double)enc->frame_number / elapsed : 0.0;
|
double fps = elapsed > 0 ? (double)enc->frame_number / elapsed : 0.0;
|
||||||
|
|
||||||
// Calculate bitrate: output_size_bits / duration_seconds / 1000
|
|
||||||
double duration = (double)enc->frame_number * enc->fps_den / enc->fps_num;
|
double duration = (double)enc->frame_number * enc->fps_den / enc->fps_num;
|
||||||
double bitrate = duration > 0 ? (ftell(enc->output_fp) * 8.0) / duration / 1000.0 : 0.0;
|
double bitrate = duration > 0 ? (ftell(enc->output_fp) * 8.0) / duration / 1000.0 : 0.0;
|
||||||
|
|
||||||
long gop_count = enc->frame_number / DT_GOP_SIZE;
|
long gop_count = enc->frame_number / DT_GOP_SIZE;
|
||||||
size_t total_kb = ftell(enc->output_fp) / 1024;
|
size_t total_kb = ftell(enc->output_fp) / 1024;
|
||||||
|
|
||||||
@@ -633,24 +820,416 @@ static int run_encoder(dt_encoder_t *enc) {
|
|||||||
enc->frames_encoded += enc->gop_frame_count;
|
enc->frames_encoded += enc->gop_frame_count;
|
||||||
tav_encoder_free_packet(video_packet);
|
tav_encoder_free_packet(video_packet);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(frame_numbers);
|
free(frame_numbers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(tad_output);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multithreaded encoding loop
|
||||||
|
static int run_encoder_mt(dt_encoder_t *enc, FILE *video_pipe, FILE *audio_pipe,
|
||||||
|
pid_t video_pid __attribute__((unused)),
|
||||||
|
pid_t audio_pid __attribute__((unused))) {
|
||||||
|
size_t frame_size = enc->width * enc->height * 3;
|
||||||
|
double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num;
|
||||||
|
// Calculate audio buffer size with generous padding to handle FFmpeg's audio delivery
|
||||||
|
// FFmpeg may deliver all audio for a GOP in the first read, so we need space for:
|
||||||
|
// 1. The expected GOP audio: AUDIO_SAMPLE_RATE * gop_duration
|
||||||
|
// 2. Worst-case per-frame variations: DT_GOP_SIZE * samples_per_frame
|
||||||
|
size_t expected_samples = (size_t)(AUDIO_SAMPLE_RATE * gop_duration);
|
||||||
|
size_t samples_per_frame = (size_t)(AUDIO_SAMPLE_RATE * enc->fps_den / enc->fps_num) + 1;
|
||||||
|
size_t audio_samples_per_gop = expected_samples + (DT_GOP_SIZE * samples_per_frame);
|
||||||
|
size_t tad_buffer_size = audio_samples_per_gop * 2;
|
||||||
|
|
||||||
|
// Initialize threading
|
||||||
|
if (init_threading(enc) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate per-slot frame buffers and audio buffers
|
||||||
|
for (int slot = 0; slot < enc->num_threads; slot++) {
|
||||||
|
enc->gop_jobs[slot].rgb_frames = malloc(DT_GOP_SIZE * sizeof(uint8_t*));
|
||||||
|
enc->gop_jobs[slot].frame_numbers = malloc(DT_GOP_SIZE * sizeof(int));
|
||||||
|
enc->gop_jobs[slot].audio_samples = malloc(audio_samples_per_gop * 2 * sizeof(float));
|
||||||
|
enc->gop_jobs[slot].tad_output = malloc(tad_buffer_size);
|
||||||
|
|
||||||
|
if (!enc->gop_jobs[slot].rgb_frames || !enc->gop_jobs[slot].frame_numbers ||
|
||||||
|
!enc->gop_jobs[slot].audio_samples || !enc->gop_jobs[slot].tad_output) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate job slot %d buffers\n", slot);
|
||||||
|
shutdown_threading(enc);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int f = 0; f < DT_GOP_SIZE; f++) {
|
||||||
|
enc->gop_jobs[slot].rgb_frames[f] = malloc(frame_size);
|
||||||
|
if (!enc->gop_jobs[slot].rgb_frames[f]) {
|
||||||
|
fprintf(stderr, "Error: Failed to allocate frame buffer for slot %d\n", slot);
|
||||||
|
shutdown_threading(enc);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy encoder params for thread safety
|
||||||
|
enc->gop_jobs[slot].params = enc->enc_params;
|
||||||
|
enc->gop_jobs[slot].status = GOP_SLOT_EMPTY;
|
||||||
|
enc->gop_jobs[slot].num_frames = 0;
|
||||||
|
enc->gop_jobs[slot].audio_sample_count = 0;
|
||||||
|
enc->gop_jobs[slot].tad_size = 0;
|
||||||
|
enc->gop_jobs[slot].packet = NULL;
|
||||||
|
enc->gop_jobs[slot].success = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Encoding frames with %d threads...\n", enc->num_threads);
|
||||||
|
clock_t start_time = clock();
|
||||||
|
|
||||||
|
int current_slot = 0;
|
||||||
|
int next_gop_to_write = 0;
|
||||||
|
int current_gop_index = 0;
|
||||||
|
int frames_in_current_gop = 0;
|
||||||
|
int encoding_error = 0;
|
||||||
|
int eof_reached = 0;
|
||||||
|
enc->current_timecode_ns = 0;
|
||||||
|
|
||||||
|
while (!encoding_error && !eof_reached) {
|
||||||
|
// Step 1: Try to write any completed GOPs in order
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
while (!encoding_error) {
|
||||||
|
int found = -1;
|
||||||
|
for (int i = 0; i < enc->num_threads; i++) {
|
||||||
|
if (enc->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
|
||||||
|
enc->gop_jobs[i].gop_index == next_gop_to_write) {
|
||||||
|
found = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found < 0) break;
|
||||||
|
|
||||||
|
gop_job_t *job = &enc->gop_jobs[found];
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
|
||||||
|
// Write this GOP
|
||||||
|
if (job->success && job->packet) {
|
||||||
|
int max_index = tad32_quality_to_max_index(enc->quality_index);
|
||||||
|
write_packet(enc, enc->current_timecode_ns,
|
||||||
|
job->tad_output, job->tad_size,
|
||||||
|
job->packet->data, job->packet->size,
|
||||||
|
job->num_frames, (uint16_t)job->audio_sample_count, max_index);
|
||||||
|
|
||||||
|
enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9);
|
||||||
|
enc->frames_encoded += job->num_frames;
|
||||||
|
|
||||||
|
tav_encoder_free_packet(job->packet);
|
||||||
|
job->packet = NULL;
|
||||||
|
|
||||||
|
// Display progress
|
||||||
|
clock_t now = clock();
|
||||||
|
double elapsed = (double)(now - start_time) / CLOCKS_PER_SEC;
|
||||||
|
double fps = elapsed > 0 ? (double)enc->frames_encoded / elapsed : 0.0;
|
||||||
|
double duration = (double)enc->frames_encoded * enc->fps_den / enc->fps_num;
|
||||||
|
double bitrate = duration > 0 ? (ftell(enc->output_fp) * 8.0) / duration / 1000.0 : 0.0;
|
||||||
|
long gop_count = enc->frames_encoded / DT_GOP_SIZE;
|
||||||
|
size_t total_kb = ftell(enc->output_fp) / 1024;
|
||||||
|
|
||||||
|
printf("\rFrame %lu | GOPs: %ld | %.1f fps | %.1f kbps | %zu KB ",
|
||||||
|
enc->frames_encoded, gop_count, fps, bitrate, total_kb);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
job->status = GOP_SLOT_EMPTY;
|
||||||
|
job->num_frames = 0;
|
||||||
|
job->audio_sample_count = 0;
|
||||||
|
job->tad_size = 0;
|
||||||
|
next_gop_to_write++;
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
|
||||||
|
if (encoding_error || eof_reached) break;
|
||||||
|
|
||||||
|
// Step 2: Fill current slot with frames
|
||||||
|
gop_job_t *slot = &enc->gop_jobs[current_slot];
|
||||||
|
|
||||||
|
// Wait for slot to be empty
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
while (slot->status != GOP_SLOT_EMPTY && !enc->shutdown_workers) {
|
||||||
|
// While waiting, check if we can write any completed GOPs
|
||||||
|
int wrote_something = 0;
|
||||||
|
for (int i = 0; i < enc->num_threads; i++) {
|
||||||
|
if (enc->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
|
||||||
|
enc->gop_jobs[i].gop_index == next_gop_to_write) {
|
||||||
|
gop_job_t *job = &enc->gop_jobs[i];
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
|
||||||
|
if (job->success && job->packet) {
|
||||||
|
int max_index = tad32_quality_to_max_index(enc->quality_index);
|
||||||
|
write_packet(enc, enc->current_timecode_ns,
|
||||||
|
job->tad_output, job->tad_size,
|
||||||
|
job->packet->data, job->packet->size,
|
||||||
|
job->num_frames, (uint16_t)job->audio_sample_count, max_index);
|
||||||
|
|
||||||
|
enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9);
|
||||||
|
enc->frames_encoded += job->num_frames;
|
||||||
|
|
||||||
|
tav_encoder_free_packet(job->packet);
|
||||||
|
job->packet = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
job->status = GOP_SLOT_EMPTY;
|
||||||
|
job->num_frames = 0;
|
||||||
|
job->audio_sample_count = 0;
|
||||||
|
job->tad_size = 0;
|
||||||
|
next_gop_to_write++;
|
||||||
|
wrote_something = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!wrote_something) {
|
||||||
|
pthread_cond_wait(&enc->job_complete, &enc->job_mutex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
|
||||||
|
// Reset audio accumulator only when starting a fresh GOP
|
||||||
|
if (frames_in_current_gop == 0) {
|
||||||
|
slot->audio_sample_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read frames into the slot
|
||||||
|
while (frames_in_current_gop < DT_GOP_SIZE && !eof_reached) {
|
||||||
|
if (enc->encode_limit > 0 && enc->frame_number >= enc->encode_limit) {
|
||||||
|
eof_reached = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t bytes_read = fread(slot->rgb_frames[frames_in_current_gop], 1, frame_size, video_pipe);
|
||||||
|
if (bytes_read < frame_size) {
|
||||||
|
eof_reached = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
slot->frame_numbers[frames_in_current_gop] = enc->frame_number;
|
||||||
|
enc->frame_number++;
|
||||||
|
frames_in_current_gop++;
|
||||||
|
|
||||||
|
// Read corresponding audio - read whatever is available up to buffer capacity
|
||||||
|
// Note: FFmpeg may buffer audio, so the first read might get multiple frames worth
|
||||||
|
size_t audio_buffer_capacity_samples = audio_samples_per_gop;
|
||||||
|
size_t audio_space_remaining = audio_buffer_capacity_samples - slot->audio_sample_count;
|
||||||
|
|
||||||
|
if (audio_space_remaining > 0) {
|
||||||
|
// Read up to the remaining buffer space
|
||||||
|
size_t max_read_bytes = audio_space_remaining * 2 * sizeof(float);
|
||||||
|
size_t audio_read = fread(slot->audio_samples + slot->audio_sample_count * 2,
|
||||||
|
1, max_read_bytes, audio_pipe);
|
||||||
|
slot->audio_sample_count += audio_read / (2 * sizeof(float));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Submit GOP when full
|
||||||
|
if (frames_in_current_gop >= DT_GOP_SIZE) {
|
||||||
|
slot->num_frames = frames_in_current_gop;
|
||||||
|
slot->gop_index = current_gop_index;
|
||||||
|
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
slot->status = GOP_SLOT_READY;
|
||||||
|
pthread_cond_broadcast(&enc->job_ready);
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
|
||||||
|
current_slot = (current_slot + 1) % enc->num_threads;
|
||||||
|
current_gop_index++;
|
||||||
|
frames_in_current_gop = 0;
|
||||||
|
break; // Exit frame-reading loop to wait for next available slot
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Submit any partial GOP at EOF
|
||||||
|
if (frames_in_current_gop > 0) {
|
||||||
|
gop_job_t *slot = &enc->gop_jobs[current_slot];
|
||||||
|
slot->num_frames = frames_in_current_gop;
|
||||||
|
slot->gop_index = current_gop_index;
|
||||||
|
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
slot->status = GOP_SLOT_READY;
|
||||||
|
pthread_cond_broadcast(&enc->job_ready);
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
|
||||||
|
current_gop_index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all remaining GOPs to complete and write them
|
||||||
|
while (!encoding_error && next_gop_to_write < current_gop_index) {
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
|
||||||
|
int found = -1;
|
||||||
|
while (found < 0 && !encoding_error) {
|
||||||
|
for (int i = 0; i < enc->num_threads; i++) {
|
||||||
|
if (enc->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
|
||||||
|
enc->gop_jobs[i].gop_index == next_gop_to_write) {
|
||||||
|
found = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found < 0) {
|
||||||
|
pthread_cond_wait(&enc->job_complete, &enc->job_mutex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (found >= 0) {
|
||||||
|
gop_job_t *job = &enc->gop_jobs[found];
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
|
||||||
|
if (job->success && job->packet) {
|
||||||
|
int max_index = tad32_quality_to_max_index(enc->quality_index);
|
||||||
|
write_packet(enc, enc->current_timecode_ns,
|
||||||
|
job->tad_output, job->tad_size,
|
||||||
|
job->packet->data, job->packet->size,
|
||||||
|
job->num_frames, (uint16_t)job->audio_sample_count, max_index);
|
||||||
|
|
||||||
|
enc->current_timecode_ns += (uint64_t)(gop_duration * 1e9);
|
||||||
|
enc->frames_encoded += job->num_frames;
|
||||||
|
|
||||||
|
tav_encoder_free_packet(job->packet);
|
||||||
|
job->packet = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_lock(&enc->job_mutex);
|
||||||
|
job->status = GOP_SLOT_EMPTY;
|
||||||
|
job->num_frames = 0;
|
||||||
|
job->audio_sample_count = 0;
|
||||||
|
job->tad_size = 0;
|
||||||
|
next_gop_to_write++;
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
} else {
|
||||||
|
pthread_mutex_unlock(&enc->job_mutex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Free per-slot buffers before shutdown
|
||||||
|
for (int slot = 0; slot < enc->num_threads; slot++) {
|
||||||
|
if (enc->gop_jobs[slot].rgb_frames) {
|
||||||
|
for (int f = 0; f < DT_GOP_SIZE; f++) {
|
||||||
|
free(enc->gop_jobs[slot].rgb_frames[f]);
|
||||||
|
}
|
||||||
|
free(enc->gop_jobs[slot].rgb_frames);
|
||||||
|
}
|
||||||
|
free(enc->gop_jobs[slot].frame_numbers);
|
||||||
|
free(enc->gop_jobs[slot].audio_samples);
|
||||||
|
free(enc->gop_jobs[slot].tad_output);
|
||||||
|
}
|
||||||
|
|
||||||
|
shutdown_threading(enc);
|
||||||
|
|
||||||
|
return encoding_error ? -1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int run_encoder(dt_encoder_t *enc) {
|
||||||
|
// Open output file
|
||||||
|
enc->output_fp = fopen(enc->output_file, "wb");
|
||||||
|
if (!enc->output_fp) {
|
||||||
|
fprintf(stderr, "Error: Cannot create output file: %s\n", enc->output_file);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up video encoder params
|
||||||
|
tav_encoder_params_init(&enc->enc_params, enc->width, enc->height);
|
||||||
|
enc->enc_params.fps_num = enc->fps_num;
|
||||||
|
enc->enc_params.fps_den = enc->fps_den;
|
||||||
|
enc->enc_params.wavelet_type = 1; // CDF 9/7
|
||||||
|
enc->enc_params.temporal_wavelet = 255; // Haar
|
||||||
|
enc->enc_params.decomp_levels = DT_SPATIAL_LEVELS;
|
||||||
|
enc->enc_params.temporal_levels = DT_TEMPORAL_LEVELS;
|
||||||
|
enc->enc_params.enable_temporal_dwt = 1;
|
||||||
|
enc->enc_params.gop_size = DT_GOP_SIZE;
|
||||||
|
enc->enc_params.quality_level = enc->quality_index;
|
||||||
|
enc->enc_params.quantiser_y = QUALITY_Y[enc->quality_index];
|
||||||
|
enc->enc_params.quantiser_co = QUALITY_CO[enc->quality_index];
|
||||||
|
enc->enc_params.quantiser_cg = QUALITY_CG[enc->quality_index];
|
||||||
|
enc->enc_params.entropy_coder = 1; // EZBC
|
||||||
|
enc->enc_params.encoder_preset = 0x01; // Sports mode
|
||||||
|
enc->enc_params.monoblock = 1; // Force monoblock
|
||||||
|
enc->enc_params.verbose = enc->verbose;
|
||||||
|
|
||||||
|
// For single-threaded mode, create a context to validate params
|
||||||
|
enc->video_ctx = tav_encoder_create(&enc->enc_params);
|
||||||
|
if (!enc->video_ctx) {
|
||||||
|
fprintf(stderr, "Error: Cannot create video encoder\n");
|
||||||
|
fclose(enc->output_fp);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Forced Monoblock mode (--monoblock)\n");
|
||||||
|
|
||||||
|
// Get actual parameters (may have been adjusted)
|
||||||
|
tav_encoder_get_params(enc->video_ctx, &enc->enc_params);
|
||||||
|
|
||||||
|
if (enc->verbose) {
|
||||||
|
printf("Auto-selected Haar temporal wavelet with sports mode (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
|
||||||
|
enc->width, enc->height, enc->width * enc->height, enc->enc_params.quantiser_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spawn FFmpeg for video
|
||||||
|
pid_t video_pid;
|
||||||
|
FILE *video_pipe = spawn_ffmpeg_video(enc, &video_pid);
|
||||||
|
if (!video_pipe) {
|
||||||
|
tav_encoder_free(enc->video_ctx);
|
||||||
|
fclose(enc->output_fp);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spawn FFmpeg for audio
|
||||||
|
pid_t audio_pid;
|
||||||
|
FILE *audio_pipe = spawn_ffmpeg_audio(enc, &audio_pid);
|
||||||
|
if (!audio_pipe) {
|
||||||
|
fclose(video_pipe);
|
||||||
|
waitpid(video_pid, NULL, 0);
|
||||||
|
tav_encoder_free(enc->video_ctx);
|
||||||
|
fclose(enc->output_fp);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate frame buffers for single-threaded mode
|
||||||
|
size_t frame_size = enc->width * enc->height * 3;
|
||||||
|
enc->gop_frames = malloc(DT_GOP_SIZE * sizeof(uint8_t *));
|
||||||
|
for (int i = 0; i < DT_GOP_SIZE; i++) {
|
||||||
|
enc->gop_frames[i] = malloc(frame_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Audio buffer (enough for one GOP worth of audio)
|
||||||
|
double gop_duration = (double)DT_GOP_SIZE * enc->fps_den / enc->fps_num;
|
||||||
|
size_t audio_samples_per_gop = (size_t)(AUDIO_SAMPLE_RATE * gop_duration) + 1024;
|
||||||
|
enc->audio_buffer = malloc(audio_samples_per_gop * 2 * sizeof(float));
|
||||||
|
enc->audio_buffer_capacity = audio_samples_per_gop;
|
||||||
|
enc->audio_buffer_samples = 0;
|
||||||
|
|
||||||
|
clock_t start_time = clock();
|
||||||
|
|
||||||
|
// Run encoding
|
||||||
|
if (enc->num_threads > 0) {
|
||||||
|
printf("Multithreaded mode: %d threads\n", enc->num_threads);
|
||||||
|
run_encoder_mt(enc, video_pipe, audio_pipe, video_pid, audio_pid);
|
||||||
|
} else {
|
||||||
|
printf("Single-threaded mode\n");
|
||||||
|
run_encoder_st(enc, video_pipe, audio_pipe, video_pid, audio_pid);
|
||||||
|
}
|
||||||
|
|
||||||
clock_t end_time = clock();
|
clock_t end_time = clock();
|
||||||
double elapsed = (double)(end_time - start_time) / CLOCKS_PER_SEC;
|
double elapsed = (double)(end_time - start_time) / CLOCKS_PER_SEC;
|
||||||
|
|
||||||
// Print statistics
|
// Print statistics
|
||||||
printf("\nEncoding complete:\n");
|
printf("\nEncoding complete%s:\n", enc->num_threads > 0 ? " (multithreaded)" : "");
|
||||||
printf(" Frames: %lu\n", enc->frames_encoded);
|
printf(" Frames: %lu\n", enc->frames_encoded);
|
||||||
printf(" GOPs: %lu\n", enc->packets_written);
|
printf(" GOPs: %lu\n", enc->packets_written);
|
||||||
printf(" Output size: %lu bytes (%.2f MB)\n", enc->bytes_written, enc->bytes_written / 1048576.0);
|
printf(" Output size: %lu bytes (%.2f MB)\n", enc->bytes_written, enc->bytes_written / 1048576.0);
|
||||||
printf(" Encoding speed: %.1f fps\n", enc->frames_encoded / elapsed);
|
printf(" Encoding speed: %.1f fps\n", enc->frames_encoded / elapsed);
|
||||||
printf(" Bitrate: %.1f kbps\n",
|
if (enc->frames_encoded > 0) {
|
||||||
enc->bytes_written * 8.0 / (enc->frames_encoded * enc->fps_den / enc->fps_num) / 1000.0);
|
printf(" Bitrate: %.1f kbps\n",
|
||||||
|
enc->bytes_written * 8.0 / (enc->frames_encoded * enc->fps_den / enc->fps_num) / 1000.0);
|
||||||
|
}
|
||||||
|
|
||||||
// Cleanup
|
// Cleanup
|
||||||
free(tad_output);
|
|
||||||
free(enc->audio_buffer);
|
free(enc->audio_buffer);
|
||||||
for (int i = 0; i < DT_GOP_SIZE; i++) {
|
for (int i = 0; i < DT_GOP_SIZE; i++) {
|
||||||
free(enc->gop_frames[i]);
|
free(enc->gop_frames[i]);
|
||||||
@@ -684,6 +1263,7 @@ int main(int argc, char **argv) {
|
|||||||
enc.quality_index = 3;
|
enc.quality_index = 3;
|
||||||
enc.is_pal = 0;
|
enc.is_pal = 0;
|
||||||
enc.is_interlaced = 0;
|
enc.is_interlaced = 0;
|
||||||
|
enc.num_threads = get_default_thread_count(); // Default: min(8, available CPUs)
|
||||||
|
|
||||||
// Initialize FEC libraries
|
// Initialize FEC libraries
|
||||||
rs_init();
|
rs_init();
|
||||||
@@ -693,6 +1273,7 @@ int main(int argc, char **argv) {
|
|||||||
{"input", required_argument, 0, 'i'},
|
{"input", required_argument, 0, 'i'},
|
||||||
{"output", required_argument, 0, 'o'},
|
{"output", required_argument, 0, 'o'},
|
||||||
{"quality", required_argument, 0, 'q'},
|
{"quality", required_argument, 0, 'q'},
|
||||||
|
{"threads", required_argument, 0, 't'},
|
||||||
{"ntsc", no_argument, 0, 'N'},
|
{"ntsc", no_argument, 0, 'N'},
|
||||||
{"pal", no_argument, 0, 'P'},
|
{"pal", no_argument, 0, 'P'},
|
||||||
{"interlaced", no_argument, 0, 'I'},
|
{"interlaced", no_argument, 0, 'I'},
|
||||||
@@ -703,7 +1284,7 @@ int main(int argc, char **argv) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
int opt;
|
int opt;
|
||||||
while ((opt = getopt_long(argc, argv, "i:o:q:vhNPI", long_options, NULL)) != -1) {
|
while ((opt = getopt_long(argc, argv, "i:o:q:t:vhNPI", long_options, NULL)) != -1) {
|
||||||
switch (opt) {
|
switch (opt) {
|
||||||
case 'i':
|
case 'i':
|
||||||
enc.input_file = optarg;
|
enc.input_file = optarg;
|
||||||
@@ -716,6 +1297,17 @@ int main(int argc, char **argv) {
|
|||||||
if (enc.quality_index < 0) enc.quality_index = 0;
|
if (enc.quality_index < 0) enc.quality_index = 0;
|
||||||
if (enc.quality_index > 5) enc.quality_index = 5;
|
if (enc.quality_index > 5) enc.quality_index = 5;
|
||||||
break;
|
break;
|
||||||
|
case 't': {
|
||||||
|
int threads = atoi(optarg);
|
||||||
|
if (threads < 0) {
|
||||||
|
fprintf(stderr, "Error: Thread count must be positive\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// Both 0 and 1 mean single-threaded (use value 0 internally)
|
||||||
|
enc.num_threads = (threads <= 1) ? 0 : threads;
|
||||||
|
if (enc.num_threads > 16) enc.num_threads = 16; // Cap at 16
|
||||||
|
break;
|
||||||
|
}
|
||||||
case 'N':
|
case 'N':
|
||||||
enc.is_pal = 0;
|
enc.is_pal = 0;
|
||||||
enc.height = DT_HEIGHT_NTSC;
|
enc.height = DT_HEIGHT_NTSC;
|
||||||
@@ -774,6 +1366,8 @@ int main(int argc, char **argv) {
|
|||||||
printf(" Framerate: %d/%d\n", enc.fps_num, enc.fps_den);
|
printf(" Framerate: %d/%d\n", enc.fps_num, enc.fps_den);
|
||||||
printf(" Quality: %d\n", enc.quality_index);
|
printf(" Quality: %d\n", enc.quality_index);
|
||||||
printf(" GOP size: %d\n", DT_GOP_SIZE);
|
printf(" GOP size: %d\n", DT_GOP_SIZE);
|
||||||
|
printf(" Threads: %d%s\n", enc.num_threads > 0 ? enc.num_threads : 1,
|
||||||
|
enc.num_threads > 0 ? " (multithreaded)" : " (single-threaded)");
|
||||||
printf(" Header sizes: main=%dB tad=%dB tav=%dB (after LDPC)\n",
|
printf(" Header sizes: main=%dB tad=%dB tav=%dB (after LDPC)\n",
|
||||||
DT_MAIN_HEADER_SIZE * 2, DT_TAD_HEADER_SIZE * 2, DT_TAV_HEADER_SIZE * 2);
|
DT_MAIN_HEADER_SIZE * 2, DT_TAD_HEADER_SIZE * 2, DT_TAV_HEADER_SIZE * 2);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user