mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-06 13:38:30 +09:00
3797 lines
140 KiB
C
3797 lines
140 KiB
C
/**
|
||
* TAV Encoder CLI - Reference Implementation using libtavenc
|
||
*
|
||
* Complete reference encoder with all features from the original encoder:
|
||
* - Full command-line argument support
|
||
* - All encoder presets (sports, anime)
|
||
* - Scene change detection (two-pass encoding)
|
||
* - Multi-threading support
|
||
* - FFmpeg integration for frame reading
|
||
* - TAV file format writing with all packet types
|
||
* - TAD audio encoding integration
|
||
* - Subtitle and font ROM support
|
||
*
|
||
* This is the official CLI implementation using libtavenc library.
|
||
* Reduced from 14,000 lines to ~1,600 lines while preserving all features.
|
||
*
|
||
* Created by CuriousTorvald and Claude on 2025-12-03-04.
|
||
*/
|
||
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <stdint.h>
|
||
#include <string.h>
|
||
#include <getopt.h>
|
||
#include <time.h>
|
||
#include <unistd.h>
|
||
#include <sys/stat.h>
|
||
#include <pthread.h>
|
||
#include <math.h>
|
||
#include <float.h>
|
||
#include <limits.h>
|
||
|
||
#include "tav_encoder_lib.h"
|
||
#include "encoder_tad.h"
|
||
|
||
// =============================================================================
|
||
// Multithreading Structures
|
||
// =============================================================================
|
||
|
||
#define GOP_SLOT_EMPTY 0
|
||
#define GOP_SLOT_READY 1
|
||
#define GOP_SLOT_ENCODING 2
|
||
#define GOP_SLOT_COMPLETE 3
|
||
|
||
typedef struct gop_job {
|
||
// Slot state
|
||
volatile int status;
|
||
|
||
// Input data (owned by job)
|
||
uint8_t **rgb_frames; // Array of frame pointers [num_frames]
|
||
int num_frames; // Frames in this GOP
|
||
int *frame_numbers; // Frame indices for timecodes
|
||
int gop_index; // Sequential GOP number
|
||
|
||
// Audio data (owned by job)
|
||
float *audio_samples; // Stereo PCM32f for this GOP
|
||
size_t num_audio_samples; // Samples per channel
|
||
|
||
// Output data (filled by worker, owned by job)
|
||
tav_encoder_packet_t *packet; // Encoded video packet
|
||
int success; // 1 if encoding succeeded
|
||
|
||
// Encoder params (copy for thread safety)
|
||
tav_encoder_params_t params;
|
||
} gop_job_t;
|
||
|
||
// =============================================================================
|
||
// Constants and Globals
|
||
// =============================================================================
|
||
|
||
#define TAV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x56" // "\x1FTSVMTAV"
|
||
#define TAP_MAGIC "\x1F\x54\x53\x56\x4D\x54\x41\x50" // "\x1FTSVMTAP" (still picture)
|
||
#define MAX_PATH 4096
|
||
#define TEMP_AUDIO_FILE_SIZE 42
|
||
#define TEMP_PCM_FILE_SIZE 42
|
||
#define AUDIO_SAMPLE_RATE 32000 // TAD audio sample rate
|
||
#define MAX_SUBTITLE_LENGTH 2048
|
||
#define TAV_PACKET_SUBTITLE_TC 0x31 // Subtitle packet with timecode (SSF-TC format)
|
||
#define TAV_PACKET_SSF 0x30 // SSF packet (for font ROM)
|
||
#define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet
|
||
#define FONTROM_OPCODE_LOW 0x80 // Low font ROM opcode
|
||
#define FONTROM_OPCODE_HIGH 0x81 // High font ROM opcode
|
||
#define MAX_FONTROM_SIZE 1920 // Max font ROM size in bytes
|
||
|
||
#define DEFAULT_WIDTH 560 // TSVM default
|
||
#define DEFAULT_HEIGHT 448 // TSVM default
|
||
|
||
// Quality level to quantiser mapping (must match library tables)
|
||
static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2}; // Quality levels 0-5
|
||
static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
|
||
static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
|
||
static const float DEAD_ZONE_THRESHOLD[] = {1.5f, 1.5f, 1.2f, 1.1f, 0.8f, 0.6f, 0.0f};
|
||
|
||
static char TEMP_AUDIO_FILE[TEMP_AUDIO_FILE_SIZE];
|
||
static char TEMP_PCM_FILE[TEMP_PCM_FILE_SIZE];
|
||
|
||
// =============================================================================
|
||
// Two-Pass Scene Change Detection Constants
|
||
// =============================================================================
|
||
|
||
// Fixed analysis resolution for scene change detection (performance-independent of source size)
|
||
#define ANALYSIS_WIDTH 128
|
||
#define ANALYSIS_HEIGHT 128
|
||
#define ANALYSIS_DWT_LEVELS 3 // 3-level Haar DWT for analysis
|
||
|
||
// Adaptive threshold parameters
|
||
#define ANALYSIS_MOVING_WINDOW 30 // Moving average window (30 frames = ~1 second at 30fps)
|
||
#define ANALYSIS_STDDEV_MULTIPLIER 1.4 // Standard deviation multiplier for adaptive threshold
|
||
#define ANALYSIS_LL_DIFF_MIN_THRESHOLD 1.5 // Minimum absolute threshold for LL_diff
|
||
#define ANALYSIS_HB_RATIO_THRESHOLD 0.4 // Highband energy ratio threshold
|
||
#define ANALYSIS_HB_ENERGY_MULTIPLIER 1.4 // Energy spike multiplier (1.4× mean to trigger)
|
||
#define ANALYSIS_FADE_THRESHOLD 50.0 // Brightness change threshold over 5 frames
|
||
|
||
// GOP size constraints for two-pass mode
|
||
#define ANALYSIS_GOP_MIN_SIZE 10 // Minimum GOP size for two-pass mode
|
||
#define ANALYSIS_GOP_MAX_SIZE 24 // Maximum GOP size for two-pass mode
|
||
|
||
// =============================================================================
|
||
// Two-Pass Scene Change Detection Structures
|
||
// =============================================================================
|
||
|
||
// Frame analysis metrics for two-pass scene change detection
|
||
typedef struct frame_analysis {
|
||
int frame_number;
|
||
|
||
// Wavelet-based metrics (3-level Haar on fixed-size analysis buffer)
|
||
double ll_diff; // L1 distance between consecutive LL bands
|
||
double ll_mean; // Mean brightness (LL band average)
|
||
double ll_variance; // Contrast estimate (LL band variance)
|
||
|
||
double highband_energy; // Sum of absolute values in LH/HL/HH bands
|
||
double total_energy; // Total energy (all bands)
|
||
double highband_ratio; // highband_energy / total_energy
|
||
|
||
// Per-band entropies (Shannon entropy of coefficient magnitudes)
|
||
// double entropy_ll;
|
||
// double entropy_lh[ANALYSIS_DWT_LEVELS];
|
||
// double entropy_hl[ANALYSIS_DWT_LEVELS];
|
||
// double entropy_hh[ANALYSIS_DWT_LEVELS];
|
||
|
||
// Texture change indicators
|
||
double zero_crossing_rate; // Zero crossing rate in highbands
|
||
|
||
// Detection results
|
||
int is_scene_change; // Final scene change flag
|
||
double scene_change_score; // Composite score for debugging
|
||
} frame_analysis_t;
|
||
|
||
// GOP boundary list for two-pass encoding
|
||
typedef struct gop_boundary {
|
||
int start_frame;
|
||
int end_frame;
|
||
int num_frames;
|
||
struct gop_boundary *next;
|
||
} gop_boundary_t;
|
||
|
||
// =============================================================================
|
||
// Subtitle Structures
|
||
// =============================================================================
|
||
|
||
typedef struct subtitle_entry {
|
||
int start_frame;
|
||
int end_frame;
|
||
uint64_t start_time_ns; // Start time in nanoseconds
|
||
uint64_t end_time_ns; // End time in nanoseconds
|
||
char *text;
|
||
struct subtitle_entry *next;
|
||
} subtitle_entry_t;
|
||
|
||
// =============================================================================
|
||
// CLI Context
|
||
// =============================================================================
|
||
|
||
typedef struct {
|
||
// Input/output
|
||
char *input_file;
|
||
char *output_file;
|
||
FILE *output_fp;
|
||
|
||
// Video parameters (from library params)
|
||
tav_encoder_params_t enc_params;
|
||
|
||
// FFmpeg subprocess
|
||
FILE *ffmpeg_pipe;
|
||
int original_width, original_height;
|
||
int original_fps_num, original_fps_den;
|
||
|
||
// Encoding state
|
||
int64_t frame_count;
|
||
int64_t gop_count;
|
||
size_t total_bytes;
|
||
time_t start_time;
|
||
|
||
// GOP frame buffer (for tav_encoder_encode_gop())
|
||
uint8_t **gop_frames; // Array of frame pointers [gop_size]
|
||
int gop_frame_count; // Number of frames in current GOP
|
||
int *gop_frame_numbers; // Frame numbers for timecodes [gop_size]
|
||
|
||
// CLI options
|
||
int verbose;
|
||
int encode_limit; // Max frames to encode (0=all)
|
||
char *subtitle_file;
|
||
char *fontrom_low;
|
||
char *fontrom_high;
|
||
int separate_audio_track;
|
||
int use_native_audio; // PCM8 instead of TAD
|
||
int interlaced; // Interlaced mode (half-height internally, full height in header)
|
||
int header_height; // Height to write to header (may differ from enc_params.height when interlaced)
|
||
|
||
// Framerate conversion
|
||
int target_fps_num; // Target output framerate numerator (0 = no conversion)
|
||
int target_fps_den; // Target output framerate denominator
|
||
|
||
// Audio encoding
|
||
int has_audio;
|
||
int audio_quality; // TAD quality level (0-5)
|
||
FILE *pcm_file; // Extracted PCM32f audio file
|
||
float *audio_buffer; // Audio sample buffer (per-frame)
|
||
size_t audio_buffer_size; // Buffer size in samples per channel
|
||
int samples_per_frame; // Audio samples per video frame
|
||
size_t audio_remaining; // Remaining bytes in PCM file
|
||
float *gop_audio_buffer; // GOP audio accumulation buffer
|
||
size_t gop_audio_samples; // Accumulated audio samples for current GOP
|
||
|
||
// Subtitle processing
|
||
subtitle_entry_t *subtitles;
|
||
|
||
// Extended Header support
|
||
char *ffmpeg_version; // FFmpeg version string (first line of "ffmpeg -version")
|
||
uint64_t creation_time_us; // Creation time in microseconds since UNIX Epoch (UTC)
|
||
long extended_header_offset; // File offset for updating ENDT value at end
|
||
int suppress_xhdr; // If 1, don't write Extended Header
|
||
|
||
// Multithreading
|
||
int num_threads; // 0 = single-threaded, 1+ = num worker threads
|
||
gop_job_t *gop_jobs; // Array of GOP job slots [num_threads]
|
||
pthread_t *worker_threads; // Array of worker thread handles [num_threads]
|
||
pthread_mutex_t job_mutex; // Mutex for job slot access
|
||
pthread_cond_t job_ready; // Signal when a job slot is ready for encoding
|
||
pthread_cond_t job_complete; // Signal when a job slot is complete
|
||
volatile int shutdown_workers; // 1 when workers should exit
|
||
|
||
// Still image (TAP) mode
|
||
int is_still_image; // 1 if input is a still image (outputs TAP format)
|
||
|
||
// Two-pass scene change detection
|
||
int two_pass_mode; // 1 = two-pass enabled, 0 = disabled
|
||
frame_analysis_t *frame_analyses; // Array of frame analyses from first pass
|
||
int frame_analyses_count; // Number of frames analysed
|
||
int frame_analyses_capacity; // Allocated capacity
|
||
gop_boundary_t *gop_boundaries; // Linked list of GOP boundaries
|
||
gop_boundary_t *current_gop_boundary; // Current GOP being encoded
|
||
|
||
} cli_context_t;
|
||
|
||
// =============================================================================
|
||
// Utility Functions
|
||
// =============================================================================
|
||
|
||
static void generate_random_filename(char *filename) {
|
||
static int seeded = 0;
|
||
if (!seeded) {
|
||
srand(time(NULL));
|
||
seeded = 1;
|
||
}
|
||
|
||
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||
const int charset_size = sizeof(charset) - 1;
|
||
|
||
strcpy(filename, "/tmp/");
|
||
for (int i = 0; i < 32; i++) {
|
||
filename[5 + i] = charset[rand() % charset_size];
|
||
}
|
||
filename[37] = '\0';
|
||
}
|
||
|
||
/**
|
||
* Execute command and capture its output.
|
||
* Returns dynamically allocated string that caller must free(), or NULL on error.
|
||
*/
|
||
static char* execute_command(const char* command) {
|
||
FILE* pipe = popen(command, "r");
|
||
if (!pipe) return NULL;
|
||
|
||
size_t buffer_size = 4096;
|
||
char* buffer = malloc(buffer_size);
|
||
if (!buffer) {
|
||
pclose(pipe);
|
||
return NULL;
|
||
}
|
||
|
||
size_t total_size = 0;
|
||
size_t bytes_read;
|
||
|
||
while ((bytes_read = fread(buffer + total_size, 1, buffer_size - total_size - 1, pipe)) > 0) {
|
||
total_size += bytes_read;
|
||
if (total_size + 1 >= buffer_size) {
|
||
buffer_size *= 2;
|
||
char* new_buffer = realloc(buffer, buffer_size);
|
||
if (!new_buffer) {
|
||
free(buffer);
|
||
pclose(pipe);
|
||
return NULL;
|
||
}
|
||
buffer = new_buffer;
|
||
}
|
||
}
|
||
|
||
buffer[total_size] = '\0';
|
||
pclose(pipe);
|
||
return buffer;
|
||
}
|
||
|
||
/**
|
||
* Get FFmpeg version string (first line of "ffmpeg -version").
|
||
* Returns dynamically allocated string that caller must free(), or NULL on error.
|
||
*/
|
||
static char* get_ffmpeg_version(void) {
|
||
char *output = execute_command("ffmpeg -version 2>&1 | head -1");
|
||
if (!output) return NULL;
|
||
|
||
// Trim trailing newline/carriage return
|
||
size_t len = strlen(output);
|
||
while (len > 0 && (output[len-1] == '\n' || output[len-1] == '\r')) {
|
||
output[len-1] = '\0';
|
||
len--;
|
||
}
|
||
|
||
return output; // Caller must free
|
||
}
|
||
|
||
/**
|
||
* Get number of available CPU cores.
|
||
* Returns the number of online processors, or 1 on error.
|
||
*/
|
||
static int get_available_cpus(void) {
|
||
#ifdef _SC_NPROCESSORS_ONLN
|
||
long nproc = sysconf(_SC_NPROCESSORS_ONLN);
|
||
if (nproc > 0) {
|
||
return (int)nproc;
|
||
}
|
||
#endif
|
||
return 1; // Fallback to single core
|
||
}
|
||
|
||
/**
|
||
* Get default thread count (cap at 8)
|
||
*/
|
||
static int get_default_thread_count(void) {
|
||
int available = get_available_cpus();
|
||
return available < 8 ? available : 8;
|
||
}
|
||
|
||
static void print_usage(const char *program) {
|
||
printf("TAV Encoder - TSVM Advanced Video Codec (Reference Implementation)\n");
|
||
printf("\nUsage: %s -i input.mp4 -o output.tav [options]\n\n", program);
|
||
printf("Required:\n");
|
||
printf(" -i, --input FILE Input video file\n");
|
||
printf(" -o, --output FILE Output TAV file\n");
|
||
printf("\nVideo Options:\n");
|
||
printf(" -s, --size WxH Frame size (using %dx%d if omitted)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
|
||
printf(" -f, --fps NUM/DEN Output Framerate (e.g., 60/1, 30000/1001)\n");
|
||
printf(" -q, --quality N Quality level 0-5 (default: 3)\n");
|
||
printf(" -Q, --quantiser Y,Co,Cg Custom quantisers (advanced)\n");
|
||
printf(" -w, --wavelet N Spatial wavelet: 0=5/3, 1=9/7 (default), 2=13/7, 16=DD-4, 255=Haar\n");
|
||
printf(" --temporal-wavelet N Temporal wavelet: 0=Haar (default), 1=CDF 5/3\n");
|
||
printf(" -c, --colour-space N Colour space: 0=YCoCg-R (default), 1=ICtCp\n");
|
||
printf(" --decomp-levels N Spatial DWT levels (0=auto, default: 6)\n");
|
||
// printf(" --temporal-levels N Temporal DWT levels (0=auto, default: 2)\n");
|
||
printf("\nGOP Options:\n");
|
||
printf(" --temporal-dwt Enable 3D DWT GOP encoding (default)\n");
|
||
printf(" --intra-only Disable temporal compression (I-frames only)\n");
|
||
printf(" --gop-size N GOP size 8/16/24 (default: 24)\n");
|
||
// printf(" --single-pass Disable scene change detection\n");
|
||
printf("\nPerformance:\n");
|
||
printf(" -t, --threads N Parallel encoding threads (default: min(8, available CPUs))\n");
|
||
printf(" 0 or 1 = single-threaded, 2-16 = multithreaded\n");
|
||
printf(" Each thread encodes one GOP independently\n");
|
||
// printf("\nTiling:\n");
|
||
// printf(" --monoblock Force single-tile mode (auto-disabled for > %dx%d)\n",
|
||
// TAV_MONOBLOCK_MAX_WIDTH, TAV_MONOBLOCK_MAX_HEIGHT);
|
||
// printf(" --tiled Force multi-tile mode (Padded Tiling)\n");
|
||
printf("\nCompression:\n");
|
||
printf(" --zstd-level N Zstd level 3-22 (default: 7)\n");
|
||
printf(" --no-perceptual-tuning Disable HVS perceptual quantization\n");
|
||
printf(" --no-dead-zone Disable dead-zone quantization\n");
|
||
printf(" --dead-zone-threshold N Dead-zone threshold. Defaults by quality level:\n");
|
||
printf(" 0=1.5, 1=1.5, 2=1.2, 3=1.1, 4=0.8, 5=0.6\n");
|
||
printf("\nEncoder Presets:\n");
|
||
printf(" --preset-sports Sports mode (finer temporal quantization)\n");
|
||
printf(" --preset-anime Anime mode (disable grain)\n");
|
||
printf("\nAudio:\n");
|
||
printf(" --tad-audio Use TAD audio codec (default)\n");
|
||
printf(" --pcm8-audio Use TSVM-native PCM8 audio\n");
|
||
printf(" --audio-quality N TAD audio quality 0-5 (default: matches video -q)\n");
|
||
printf(" --no-audio Disable audio encoding\n");
|
||
printf(" --separate-audio-track Multiplex audio as separate track\n");
|
||
printf("\nMisc:\n");
|
||
printf(" --encode-limit N Encode only first N frames\n");
|
||
printf(" --subtitle FILE Add subtitle track (.srt)\n");
|
||
printf(" --fontrom-low FILE Font ROM for low ASCII (.chr)\n");
|
||
printf(" --fontrom-high FILE Font ROM for high ASCII (.chr)\n");
|
||
printf(" --suppress-xhdr Suppress Extended Header packet (enabled by default)\n");
|
||
printf(" --interlaced Enable interlaced video mode (half-height encoding)\n");
|
||
printf(" -v, --verbose Verbose output\n");
|
||
printf(" --help Show this help\n");
|
||
printf("\nExamples:\n");
|
||
printf(" # Basic encoding\n");
|
||
printf(" %s -i video.mp4 -o out.tav -q 3\n\n", program);
|
||
printf(" # High quality with CDF 5/3 wavelet\n");
|
||
printf(" %s -i video.mp4 -o out.tav -q 5 -w 0\n\n", program);
|
||
printf(" # Sports mode with larger GOP\n");
|
||
printf(" %s -i video.mp4 -o out.tav --preset-sports --gop-size 24\n\n", program);
|
||
printf(" # Advanced: separate quantiser per channel\n");
|
||
printf(" %s -i video.mp4 -o out.tav -Q 3,5,6\n\n", program);
|
||
printf(" # Multithreaded encoding with 4 threads\n");
|
||
printf(" %s -i video.mp4 -o out.tav -t 4 -q 3\n", program);
|
||
}
|
||
|
||
// =============================================================================
|
||
// FFmpeg Integration
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Probe video file to get resolution and framerate using FFmpeg.
|
||
*/
|
||
static int get_video_info(const char *input_file, int *width, int *height,
|
||
int *fps_num, int *fps_den) {
|
||
char cmd[MAX_PATH * 2];
|
||
snprintf(cmd, sizeof(cmd),
|
||
"ffprobe -v error -select_streams v:0 "
|
||
"-show_entries stream=width,height,r_frame_rate "
|
||
"-of default=noprint_wrappers=1:nokey=1 \"%s\"",
|
||
input_file);
|
||
|
||
FILE *fp = popen(cmd, "r");
|
||
if (!fp) {
|
||
fprintf(stderr, "Error: Failed to run ffprobe\n");
|
||
return -1;
|
||
}
|
||
|
||
if (fscanf(fp, "%d\n%d\n", width, height) != 2) {
|
||
fprintf(stderr, "Error: Failed to parse video dimensions\n");
|
||
pclose(fp);
|
||
return -1;
|
||
}
|
||
|
||
char fps_str[64];
|
||
if (fgets(fps_str, sizeof(fps_str), fp) == NULL) {
|
||
fprintf(stderr, "Error: Failed to parse framerate\n");
|
||
pclose(fp);
|
||
return -1;
|
||
}
|
||
|
||
// Parse framerate (format: "num/den" or "num")
|
||
if (sscanf(fps_str, "%d/%d", fps_num, fps_den) != 2) {
|
||
if (sscanf(fps_str, "%d", fps_num) == 1) {
|
||
*fps_den = 1;
|
||
} else {
|
||
fprintf(stderr, "Error: Failed to parse framerate: %s\n", fps_str);
|
||
pclose(fp);
|
||
return -1;
|
||
}
|
||
}
|
||
|
||
pclose(fp);
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Check if input file is a still image (not a video).
|
||
* Uses FFmpeg to check if the input has a video stream with frames.
|
||
* Returns 1 if still image, 0 if video, -1 on error.
|
||
*/
|
||
static int is_input_still_image(const char *input_file) {
|
||
char cmd[MAX_PATH * 2];
|
||
|
||
// Check for common image extensions first (quick path)
|
||
const char *ext = strrchr(input_file, '.');
|
||
if (ext) {
|
||
const char *image_exts[] = {
|
||
".png", ".jpg", ".jpeg", ".bmp", ".tga", ".gif", ".tiff", ".tif",
|
||
".webp", ".ppm", ".pgm", ".pbm", ".pnm", ".exr", ".hdr",
|
||
".PNG", ".JPG", ".JPEG", ".BMP", ".TGA", ".GIF", ".TIFF", ".TIF",
|
||
".WEBP", ".PPM", ".PGM", ".PBM", ".PNM", ".EXR", ".HDR",
|
||
NULL
|
||
};
|
||
for (int i = 0; image_exts[i]; i++) {
|
||
if (strcmp(ext, image_exts[i]) == 0) {
|
||
return 1; // Known image extension
|
||
}
|
||
}
|
||
|
||
if (strcmp(ext, ".webm") == 0 || strcmp(ext, ".WEBM") == 0) {
|
||
return 0; // Known video extension
|
||
}
|
||
}
|
||
|
||
// Use ffprobe to check if it's a single-frame input
|
||
// For still images, nb_frames will be "1" or "N/A" and duration will be very short or N/A
|
||
snprintf(cmd, sizeof(cmd),
|
||
"ffprobe -v error -select_streams v:0 "
|
||
"-show_entries stream=nb_frames,duration "
|
||
"-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
|
||
input_file);
|
||
|
||
FILE *fp = popen(cmd, "r");
|
||
if (!fp) {
|
||
return -1;
|
||
}
|
||
|
||
char nb_frames_str[64] = {0};
|
||
char duration_str[64] = {0};
|
||
|
||
if (fgets(nb_frames_str, sizeof(nb_frames_str), fp) != NULL) {
|
||
fgets(duration_str, sizeof(duration_str), fp);
|
||
}
|
||
pclose(fp);
|
||
|
||
// Check if nb_frames is exactly "1" or "N/A"
|
||
// Also check if duration is very short (< 0.1 seconds) or N/A
|
||
if (nb_frames_str[0]) {
|
||
// Remove trailing newline
|
||
char *nl = strchr(nb_frames_str, '\n');
|
||
if (nl) *nl = '\0';
|
||
nl = strchr(duration_str, '\n');
|
||
if (nl) *nl = '\0';
|
||
|
||
// Still image if nb_frames is "1" or "N/A"
|
||
if (strcmp(nb_frames_str, "1") == 0 ||
|
||
strcmp(nb_frames_str, "N/A") == 0) {
|
||
return 1;
|
||
}
|
||
|
||
// Also check for very short duration (might be a single frame)
|
||
if (duration_str[0] && strcmp(duration_str, "N/A") != 0) {
|
||
double duration = atof(duration_str);
|
||
if (duration > 0 && duration < 0.1) {
|
||
return 1; // Very short, likely a single frame
|
||
}
|
||
}
|
||
}
|
||
|
||
return 0; // Assume video
|
||
}
|
||
|
||
/**
|
||
* Open FFmpeg pipe for reading RGB24 frames.
|
||
*
|
||
* When interlaced=1:
|
||
* - full_height is the full display height (written to header)
|
||
* - FFmpeg outputs half-height frames via tinterlace+separatefields
|
||
* - Filtergraph: scale/crop to full size, then tinterlace weave halves
|
||
* framerate, then separatefields restores framerate at half height
|
||
*
|
||
* Framerate conversion:
|
||
* - If target_fps > source_fps: uses minterpolate for motion interpolation
|
||
* - If target_fps < source_fps: uses fps filter for frame dropping
|
||
* - If target_fps == source_fps: no fps filter applied
|
||
*/
|
||
static FILE* open_ffmpeg_pipe(const char *input_file, int width, int height,
|
||
int interlaced, int full_height,
|
||
int target_fps_num, int target_fps_den,
|
||
int source_fps_num, int source_fps_den) {
|
||
char cmd[MAX_PATH * 2];
|
||
char fps_filter[128] = "";
|
||
|
||
// Build fps filter string if conversion is requested (applied first)
|
||
if (target_fps_num > 0 && target_fps_den > 0 &&
|
||
source_fps_num > 0 && source_fps_den > 0) {
|
||
// Compare framerates: target/1 vs source/1 -> target * source_den vs source * target_den
|
||
double target_rate = (double)target_fps_num / (double)source_fps_den;
|
||
double source_rate = (double)source_fps_num / (double)target_fps_den;
|
||
|
||
if (target_rate > source_rate) {
|
||
// Upsampling: use motion interpolation
|
||
snprintf(fps_filter, sizeof(fps_filter), "minterpolate=fps=%d/%d,",
|
||
target_fps_num, target_fps_den);
|
||
} else if (target_rate < source_rate) {
|
||
// Downsampling: use fps filter
|
||
snprintf(fps_filter, sizeof(fps_filter), "fps=%d/%d,",
|
||
target_fps_num, target_fps_den);
|
||
}
|
||
// If equal, fps_filter remains empty (no conversion needed)
|
||
}
|
||
|
||
if (interlaced) {
|
||
// Interlaced mode filtergraph:
|
||
// 1. fps filter (if conversion requested) - applied first
|
||
// 2. scale and crop to full size (width x full_height)
|
||
// 3. tinterlace interleave_top:cvlpf - weave fields, halves framerate
|
||
// 4. separatefields - separate into half-height frames, doubles framerate back
|
||
// Final output: width x (full_height/2) at target framerate
|
||
snprintf(cmd, sizeof(cmd),
|
||
"ffmpeg -hide_banner -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf "
|
||
"\"%sscale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,"
|
||
"tinterlace=interleave_top:cvlpf,separatefields\" -",
|
||
input_file, fps_filter, width, full_height, width, full_height);
|
||
} else {
|
||
// Progressive mode - optional fps conversion, then scale and crop
|
||
snprintf(cmd, sizeof(cmd),
|
||
"ffmpeg -hide_banner -v quiet -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf "
|
||
"\"%sscale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" -",
|
||
input_file, fps_filter, width, height, width, height);
|
||
}
|
||
|
||
FILE *fp = popen(cmd, "r");
|
||
if (!fp) {
|
||
fprintf(stderr, "Error: Failed to start FFmpeg\n");
|
||
return NULL;
|
||
}
|
||
|
||
return fp;
|
||
}
|
||
|
||
/**
|
||
* Read one RGB24 frame from FFmpeg pipe.
|
||
* Returns 1 on success, 0 on EOF, -1 on error.
|
||
*/
|
||
static int read_rgb_frame(FILE *fp, uint8_t *rgb_frame, size_t frame_size) {
|
||
size_t bytes_read = fread(rgb_frame, 1, frame_size, fp);
|
||
|
||
if (bytes_read == 0) {
|
||
return feof(fp) ? 0 : -1; // EOF or error
|
||
}
|
||
|
||
if (bytes_read != frame_size) {
|
||
fprintf(stderr, "Warning: Incomplete frame read (%zu/%zu bytes)\n",
|
||
bytes_read, frame_size);
|
||
return -1;
|
||
}
|
||
|
||
return 1;
|
||
}
|
||
|
||
// =============================================================================
|
||
// TAV File Format Writing
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Write TAV/TAP file header.
|
||
*
|
||
* When interlaced mode is enabled:
|
||
* - header_height should be the full display height (e.g., 448)
|
||
* - params->height is the internal encoding height (e.g., 224)
|
||
* - video_flags bit 0 is set to indicate interlaced
|
||
*
|
||
* When is_still_image is set:
|
||
* - Writes TAP magic instead of TAV
|
||
* - FPS is set to 0
|
||
* - Total frames is set to 0xFFFFFFFF
|
||
*/
|
||
static int write_tav_header(FILE *fp, const tav_encoder_params_t *params,
|
||
int has_audio, int has_subtitles,
|
||
int interlaced, int header_height,
|
||
int is_still_image) {
|
||
// Magic (8 bytes: \x1FTSVMTAV or \x1FTSVMTAP)
|
||
if (is_still_image) {
|
||
fwrite(TAP_MAGIC, 1, 8, fp);
|
||
} else {
|
||
fwrite(TAV_MAGIC, 1, 8, fp);
|
||
}
|
||
|
||
// Version (1 byte) - calculate based on params
|
||
// Version encoding (monoblock mode always used):
|
||
// 3 = YCoCg-R monoblock uniform
|
||
// 4 = ICtCp monoblock uniform
|
||
// 5 = YCoCg-R monoblock perceptual
|
||
// 6 = ICtCp monoblock perceptual
|
||
// Add 8 if using CDF 5/3 temporal wavelet
|
||
uint8_t version;
|
||
if (params->monoblock) {
|
||
if (params->perceptual_tuning) {
|
||
// Monoblock perceptual: version 5 (YCoCg-R) or 6 (ICtCp)
|
||
version = params->channel_layout ? 6 : 5;
|
||
} else {
|
||
// Monoblock uniform: version 3 (YCoCg-R) or 4 (ICtCp)
|
||
version = params->channel_layout ? 4 : 3;
|
||
}
|
||
} else {
|
||
if (params->perceptual_tuning) {
|
||
// Tiled perceptual: version 7 (YCoCg-R) or 8 (ICtCp)
|
||
version = params->channel_layout ? 7 : 8;
|
||
} else {
|
||
// Tiled uniform: version 1 (YCoCg-R) or 2 (ICtCp)
|
||
version = params->channel_layout ? 1 : 2;
|
||
}
|
||
}
|
||
// Add 8 if using CDF 5/3 temporal wavelet
|
||
if (params->enable_temporal_dwt && params->temporal_wavelet == 0) {
|
||
version += 8;
|
||
}
|
||
fputc(version, fp);
|
||
|
||
// Width (uint16_t, 2 bytes)
|
||
// Write 0 if width exceeds 65535 (extended dimensions will be in XDIM)
|
||
uint16_t width = (params->width > 65535) ? 0 : (uint16_t)params->width;
|
||
fwrite(&width, sizeof(uint16_t), 1, fp);
|
||
|
||
// Height (uint16_t, 2 bytes)
|
||
// For interlaced mode, write the full display height (header_height)
|
||
// For progressive mode, write params->height
|
||
// Write 0 if height exceeds 65535 (extended dimensions will be in XDIM)
|
||
int actual_height = interlaced ? header_height : params->height;
|
||
uint16_t height = (actual_height > 65535) ? 0 : (uint16_t)actual_height;
|
||
fwrite(&height, sizeof(uint16_t), 1, fp);
|
||
|
||
// FPS (uint8_t, 1 byte)
|
||
// - 0x00 for still images
|
||
// - 0xFF if fps_num > 254 or fps_den is not 1 or 1001 (use XFPS extended header)
|
||
// - otherwise fps_num
|
||
uint8_t fps;
|
||
if (is_still_image) {
|
||
fps = 0;
|
||
} else if (params->fps_num > 254 ||
|
||
(params->fps_den != 1 && params->fps_den != 1001)) {
|
||
fps = 0xFF; // Extended framerate in XFPS
|
||
} else {
|
||
fps = (uint8_t)params->fps_num;
|
||
}
|
||
fputc(fps, fp);
|
||
|
||
// Total frames (uint32_t, 4 bytes)
|
||
// For still images: 0xFFFFFFFF
|
||
// For video: 0 (will be updated later)
|
||
uint32_t total_frames = is_still_image ? 0xFFFFFFFF : 0;
|
||
fwrite(&total_frames, sizeof(uint32_t), 1, fp);
|
||
|
||
// Wavelet filter (uint8_t, 1 byte)
|
||
fputc((uint8_t)params->wavelet_type, fp);
|
||
|
||
// Decomp levels (uint8_t, 1 byte)
|
||
fputc((uint8_t)params->decomp_levels, fp);
|
||
|
||
// Quantisers (3 bytes: Y, Co, Cg)
|
||
fputc((uint8_t)params->quantiser_y, fp);
|
||
fputc((uint8_t)params->quantiser_co, fp);
|
||
fputc((uint8_t)params->quantiser_cg, fp);
|
||
|
||
// Extra flags (uint8_t, 1 byte)
|
||
uint8_t extra_flags = 0;
|
||
if (has_audio) extra_flags |= 0x01; // Bit 0: has audio
|
||
if (has_subtitles) extra_flags |= 0x02; // Bit 1: has subtitles
|
||
fputc(extra_flags, fp);
|
||
|
||
// Video flags (uint8_t, 1 byte)
|
||
// Bit 0 = interlaced, Bit 1 = NTSC framerate, Bit 2 = lossless, etc.
|
||
uint8_t video_flags = 0;
|
||
if (interlaced) video_flags |= 0x01; // Bit 0: interlaced
|
||
fputc(video_flags, fp);
|
||
|
||
// Quality level (uint8_t, 1 byte)
|
||
uint8_t quality_level = params->quality_level + 1;
|
||
fputc(quality_level, fp);
|
||
|
||
// Channel layout (uint8_t, 1 byte)
|
||
fputc((uint8_t)params->channel_layout, fp);
|
||
|
||
// Entropy coder (uint8_t, 1 byte): 0=Twobitmap, 1=EZBC
|
||
fputc((uint8_t)params->entropy_coder, fp);
|
||
|
||
// Encoder preset (uint8_t, 1 byte)
|
||
fputc((uint8_t)params->encoder_preset, fp);
|
||
|
||
// Reserved (uint8_t, 1 byte)
|
||
fputc(0, fp);
|
||
|
||
// Device orientation (uint8_t, 1 byte)
|
||
fputc(0, fp);
|
||
|
||
// File role (uint8_t, 1 byte)
|
||
fputc(0, fp);
|
||
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Write Extended Header packet (0xEF) with metadata.
|
||
* Returns the file offset of the ENDT value for later update, or -1 on error.
|
||
*/
|
||
static long write_extended_header(cli_context_t *cli, int width, int height) {
|
||
FILE *fp = cli->output_fp;
|
||
|
||
// Write packet type (0xEF)
|
||
uint8_t packet_type = TAV_PACKET_EXTENDED_HDR;
|
||
if (fwrite(&packet_type, 1, 1, fp) != 1) return -1;
|
||
|
||
// Count key-value pairs: BGNT, ENDT, CDAT, VNDR, optionally FMPG, XDIM, XFPS
|
||
int has_xdim = (width > 65535 || height > 65535);
|
||
int has_xfps = (cli->enc_params.fps_num > 254 ||
|
||
(cli->enc_params.fps_den != 1 && cli->enc_params.fps_den != 1001));
|
||
uint16_t num_pairs = 4; // BGNT, ENDT, CDAT, VNDR
|
||
if (cli->ffmpeg_version) num_pairs++; // FMPG
|
||
if (has_xdim) num_pairs++; // XDIM
|
||
if (has_xfps) num_pairs++; // XFPS
|
||
if (fwrite(&num_pairs, sizeof(uint16_t), 1, fp) != 1) return -1;
|
||
|
||
// Helper macros for writing key-value pairs
|
||
#define WRITE_KV_UINT64(key_str, value) do { \
|
||
if (fwrite(key_str, 1, 4, fp) != 4) return -1; \
|
||
uint8_t value_type = 0x04; /* Uint64 */ \
|
||
if (fwrite(&value_type, 1, 1, fp) != 1) return -1; \
|
||
uint64_t val = (value); \
|
||
if (fwrite(&val, sizeof(uint64_t), 1, fp) != 1) return -1; \
|
||
} while(0)
|
||
|
||
#define WRITE_KV_BYTES(key_str, data, len) do { \
|
||
if (fwrite(key_str, 1, 4, fp) != 4) return -1; \
|
||
uint8_t value_type = 0x10; /* Bytes */ \
|
||
if (fwrite(&value_type, 1, 1, fp) != 1) return -1; \
|
||
uint16_t length = (len); \
|
||
if (fwrite(&length, sizeof(uint16_t), 1, fp) != 1) return -1; \
|
||
if (fwrite((data), 1, (len), fp) != (len)) return -1; \
|
||
} while(0)
|
||
|
||
// BGNT: Video begin time (0 nanoseconds for frame 0)
|
||
WRITE_KV_UINT64("BGNT", 0ULL);
|
||
|
||
// ENDT: Video end time (placeholder, will be updated at end)
|
||
// Save the file offset of the ENDT value (after key + type byte)
|
||
long endt_offset = ftell(fp) + 4 + 1; // 4 bytes for "ENDT", 1 byte for type
|
||
WRITE_KV_UINT64("ENDT", 0ULL);
|
||
|
||
// CDAT: Creation time in microseconds since UNIX Epoch (UTC)
|
||
WRITE_KV_UINT64("CDAT", cli->creation_time_us);
|
||
|
||
// VNDR: Encoder name and version
|
||
const char *vendor_str = "Encoder-TAV 20260121 (reference)";
|
||
WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
|
||
|
||
// FMPG: FFmpeg version (if available)
|
||
if (cli->ffmpeg_version) {
|
||
WRITE_KV_BYTES("FMPG", cli->ffmpeg_version, strlen(cli->ffmpeg_version));
|
||
}
|
||
|
||
// XDIM: Extended dimensions (if width or height exceeds 65535)
|
||
if (has_xdim) {
|
||
char xdim_str[32];
|
||
snprintf(xdim_str, sizeof(xdim_str), "%d,%d", width, height);
|
||
WRITE_KV_BYTES("XDIM", xdim_str, strlen(xdim_str));
|
||
}
|
||
|
||
// XFPS: Extended framerate (if fps_num > 254 or fps_den is not 1 or 1001)
|
||
if (has_xfps) {
|
||
char xfps_str[32];
|
||
snprintf(xfps_str, sizeof(xfps_str), "%d/%d",
|
||
cli->enc_params.fps_num, cli->enc_params.fps_den);
|
||
WRITE_KV_BYTES("XFPS", xfps_str, strlen(xfps_str));
|
||
}
|
||
|
||
#undef WRITE_KV_UINT64
|
||
#undef WRITE_KV_BYTES
|
||
|
||
return endt_offset;
|
||
}
|
||
|
||
/**
|
||
* Update ENDT value in Extended Header.
|
||
* Seeks to the stored offset and updates the uint64_t ENDT value.
|
||
*/
|
||
static int update_extended_header_endt(FILE *fp, long endt_offset, uint64_t end_time_ns) {
|
||
if (endt_offset < 0) return -1; // Extended Header not written
|
||
|
||
long current_pos = ftell(fp);
|
||
if (current_pos < 0) return -1;
|
||
|
||
// Seek to ENDT value offset
|
||
if (fseek(fp, endt_offset, SEEK_SET) != 0) return -1;
|
||
|
||
// Write ENDT value
|
||
if (fwrite(&end_time_ns, sizeof(uint64_t), 1, fp) != 1) {
|
||
fseek(fp, current_pos, SEEK_SET);
|
||
return -1;
|
||
}
|
||
|
||
// Restore file position
|
||
if (fseek(fp, current_pos, SEEK_SET) != 0) return -1;
|
||
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Update total frames in header.
|
||
* Seeks back to offset 14 and updates the uint32_t total_frames field.
|
||
*/
|
||
static int update_total_frames(FILE *fp, uint32_t total_frames) {
|
||
long current_pos = ftell(fp);
|
||
if (current_pos < 0) {
|
||
return -1;
|
||
}
|
||
|
||
// Seek to total_frames field (offset 14: magic(8) + version(1) + width(2) + height(2) + fps(1))
|
||
if (fseek(fp, 14, SEEK_SET) != 0) {
|
||
return -1;
|
||
}
|
||
|
||
// Write total frames
|
||
fwrite(&total_frames, sizeof(uint32_t), 1, fp);
|
||
|
||
// Seek back to original position
|
||
if (fseek(fp, current_pos, SEEK_SET) != 0) {
|
||
return -1;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Write TAV packet to file.
|
||
*/
|
||
static int write_tav_packet(FILE *fp, const tav_encoder_packet_t *packet) {
|
||
if (!packet || !packet->data) {
|
||
return -1;
|
||
}
|
||
|
||
// Packet is already formatted: [type(1)][size(4)][data(N)]
|
||
// Or: [type(1)][gop_size(1)][size(4)][data(N)] for GOP packets
|
||
size_t written = fwrite(packet->data, 1, packet->size, fp);
|
||
|
||
if (written != packet->size) {
|
||
fprintf(stderr, "Error: Failed to write packet (%zu/%zu bytes)\n",
|
||
written, packet->size);
|
||
return -1;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Write timecode packet.
|
||
* Format: [type(1)][timecode_ns(8)] where timecode_ns is uint64_t in nanoseconds
|
||
*/
|
||
static int write_timecode_packet(FILE *fp, int64_t frame_number, int fps_num, int fps_den) {
|
||
uint8_t packet[9];
|
||
packet[0] = TAV_PACKET_TIMECODE;
|
||
|
||
// Convert frame number to nanoseconds
|
||
// timecode_ns = (frame_number * fps_den * 1000000000) / fps_num
|
||
uint64_t timecode_ns = ((uint64_t)frame_number * (uint64_t)fps_den * 1000000000ULL) / (uint64_t)fps_num;
|
||
memcpy(packet + 1, &timecode_ns, 8);
|
||
|
||
fwrite(packet, 1, 9, fp);
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Write GOP sync packet.
|
||
* Format: [type(1)][frame_count(1)]
|
||
*/
|
||
static int write_gop_sync_packet(FILE *fp, int frame_count) {
|
||
uint8_t packet[2];
|
||
packet[0] = TAV_PACKET_GOP_SYNC;
|
||
packet[1] = (uint8_t)frame_count;
|
||
|
||
fwrite(packet, 1, 2, fp);
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Write sync packet (0xFF) for intra-only mode.
|
||
* Format: [type(1)] (no payload)
|
||
*/
|
||
static int write_sync_packet(FILE *fp) {
|
||
uint8_t packet = TAV_PACKET_SYNC;
|
||
fwrite(&packet, 1, 1, fp);
|
||
return 0;
|
||
}
|
||
|
||
// =============================================================================
|
||
// Audio Encoding Functions
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Extract audio from video file to PCM32f stereo at 32kHz.
|
||
* Uses FFmpeg with high-quality resampling and highpass filter.
|
||
*/
|
||
static int extract_audio_to_file(const char *input_file, const char *output_file) {
|
||
char cmd[MAX_PATH * 2];
|
||
snprintf(cmd, sizeof(cmd),
|
||
"ffmpeg -hide_banner -v quiet -i \"%s\" -f f32le -acodec pcm_f32le -ar %d -ac 2 "
|
||
"-af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" "
|
||
"-y \"%s\" 2>/dev/null",
|
||
input_file, AUDIO_SAMPLE_RATE, output_file);
|
||
|
||
int result = system(cmd);
|
||
if (result != 0) {
|
||
fprintf(stderr, "Warning: FFmpeg audio extraction failed\n");
|
||
return 0;
|
||
}
|
||
|
||
// Check if output file exists and has content
|
||
struct stat st;
|
||
if (stat(output_file, &st) != 0 || st.st_size == 0) {
|
||
return 0;
|
||
}
|
||
|
||
return 1;
|
||
}
|
||
|
||
/**
|
||
* Read audio samples for one frame from PCM file.
|
||
* Returns number of samples actually read.
|
||
*/
|
||
static size_t read_audio_samples(cli_context_t *cli, float *buffer, size_t samples_to_read) {
|
||
if (!cli->pcm_file || cli->audio_remaining == 0) {
|
||
return 0;
|
||
}
|
||
|
||
// Calculate bytes to read (stereo float32)
|
||
size_t bytes_to_read = samples_to_read * 2 * sizeof(float);
|
||
if (bytes_to_read > cli->audio_remaining) {
|
||
bytes_to_read = cli->audio_remaining;
|
||
samples_to_read = bytes_to_read / (2 * sizeof(float));
|
||
}
|
||
|
||
size_t bytes_read = fread(buffer, 1, bytes_to_read, cli->pcm_file);
|
||
cli->audio_remaining -= bytes_read;
|
||
|
||
return bytes_read / (2 * sizeof(float));
|
||
}
|
||
|
||
/**
|
||
* Encode and write TAD audio packet.
|
||
* Format per terranmon.txt:
|
||
* uint8 Packet Type (0x24)
|
||
* <header for decoding packet>
|
||
* uint16 Sample Count
|
||
* uint32 Compressed Size + 7
|
||
* <header for decoding TAD chunk>
|
||
* uint16 Sample Count
|
||
* uint8 Quantiser Bits
|
||
* uint32 Compressed Size
|
||
* * Zstd-compressed TAD
|
||
*/
|
||
static int write_audio_packet(FILE *fp, cli_context_t *cli, float *pcm_samples, size_t num_samples) {
|
||
if (num_samples == 0) {
|
||
return 0;
|
||
}
|
||
|
||
// Allocate buffer for TAD-encoded data
|
||
size_t max_output_size = num_samples * 4 * sizeof(float) + 1024;
|
||
uint8_t *tad_buffer = malloc(max_output_size);
|
||
if (!tad_buffer) {
|
||
fprintf(stderr, "Error: Cannot allocate TAD buffer\n");
|
||
return -1;
|
||
}
|
||
|
||
// Encode with TAD (returns: sample_count(2) + max_index(1) + payload_size(4) + payload)
|
||
int max_index = tad32_quality_to_max_index(cli->audio_quality);
|
||
size_t tad_chunk_size = tad32_encode_chunk(pcm_samples, num_samples, max_index, 1.0f,
|
||
cli->enc_params.zstd_level, tad_buffer);
|
||
|
||
if (tad_chunk_size == 0) {
|
||
fprintf(stderr, "Error: TAD encoding failed\n");
|
||
free(tad_buffer);
|
||
return -1;
|
||
}
|
||
|
||
// Extract TAD chunk header
|
||
uint16_t sample_count;
|
||
uint8_t quantiser_bits;
|
||
uint32_t compressed_size;
|
||
memcpy(&sample_count, tad_buffer, 2);
|
||
memcpy(&quantiser_bits, tad_buffer + 2, 1);
|
||
memcpy(&compressed_size, tad_buffer + 3, 4);
|
||
|
||
// Write TAV packet header
|
||
fputc(TAV_PACKET_AUDIO_TAD, fp); // Packet type (0x24)
|
||
fwrite(&sample_count, 2, 1, fp); // Sample count
|
||
uint32_t packet_payload_size = compressed_size + 7; // TAD chunk size
|
||
fwrite(&packet_payload_size, 4, 1, fp); // Compressed size + 7
|
||
|
||
// Write TAD chunk (sample_count, quantiser_bits, compressed_size, payload)
|
||
fwrite(tad_buffer, 1, tad_chunk_size, fp);
|
||
|
||
free(tad_buffer);
|
||
return 1 + 2 + 4 + tad_chunk_size; // Total bytes written
|
||
}
|
||
|
||
// =============================================================================
|
||
// Subtitle Functions
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Convert SRT timestamp to nanoseconds.
|
||
* Format: "HH:MM:SS,mmm" (e.g., "00:01:23,456")
|
||
*/
|
||
static uint64_t srt_time_to_ns(const char *time_str) {
|
||
int hours = 0, minutes = 0, seconds = 0, milliseconds = 0;
|
||
if (sscanf(time_str, "%d:%d:%d,%d", &hours, &minutes, &seconds, &milliseconds) != 4) {
|
||
return 0;
|
||
}
|
||
|
||
uint64_t total_ns = 0;
|
||
total_ns += (uint64_t)hours * 3600ULL * 1000000000ULL;
|
||
total_ns += (uint64_t)minutes * 60ULL * 1000000000ULL;
|
||
total_ns += (uint64_t)seconds * 1000000000ULL;
|
||
total_ns += (uint64_t)milliseconds * 1000000ULL;
|
||
|
||
return total_ns;
|
||
}
|
||
|
||
/**
|
||
* Parse SRT subtitle file.
|
||
* Returns linked list of subtitle entries, or NULL on error.
|
||
*/
|
||
static subtitle_entry_t* parse_srt_file(const char *filename) {
|
||
FILE *file = fopen(filename, "r");
|
||
if (!file) {
|
||
fprintf(stderr, "Failed to open subtitle file: %s\n", filename);
|
||
return NULL;
|
||
}
|
||
|
||
subtitle_entry_t *head = NULL;
|
||
subtitle_entry_t *tail = NULL;
|
||
char line[1024];
|
||
int state = 0; // 0=index, 1=time, 2=text, 3=blank
|
||
|
||
subtitle_entry_t *current_entry = NULL;
|
||
char *text_buffer = NULL;
|
||
size_t text_buffer_size = 0;
|
||
|
||
while (fgets(line, sizeof(line), file)) {
|
||
// Remove trailing newline/carriage return
|
||
size_t len = strlen(line);
|
||
while (len > 0 && (line[len-1] == '\n' || line[len-1] == '\r')) {
|
||
line[--len] = '\0';
|
||
}
|
||
|
||
if (state == 0) { // Expecting subtitle index
|
||
if (strlen(line) == 0) continue; // Skip empty lines
|
||
current_entry = calloc(1, sizeof(subtitle_entry_t));
|
||
if (!current_entry) break;
|
||
state = 1;
|
||
} else if (state == 1) { // Expecting time range
|
||
char start_time[32], end_time[32];
|
||
if (sscanf(line, "%31s --> %31s", start_time, end_time) == 2) {
|
||
current_entry->start_time_ns = srt_time_to_ns(start_time);
|
||
current_entry->end_time_ns = srt_time_to_ns(end_time);
|
||
|
||
if (current_entry->start_time_ns == 0 && current_entry->end_time_ns == 0) {
|
||
free(current_entry);
|
||
current_entry = NULL;
|
||
state = 3; // Skip to next blank line
|
||
continue;
|
||
}
|
||
|
||
// Initialize text buffer
|
||
text_buffer_size = 256;
|
||
text_buffer = malloc(text_buffer_size);
|
||
if (!text_buffer) {
|
||
free(current_entry);
|
||
current_entry = NULL;
|
||
break;
|
||
}
|
||
text_buffer[0] = '\0';
|
||
state = 2;
|
||
} else {
|
||
free(current_entry);
|
||
current_entry = NULL;
|
||
state = 3; // Skip malformed entry
|
||
}
|
||
} else if (state == 2) { // Collecting subtitle text
|
||
if (strlen(line) == 0) {
|
||
// End of subtitle text
|
||
current_entry->text = strdup(text_buffer);
|
||
free(text_buffer);
|
||
text_buffer = NULL;
|
||
|
||
// Add to list
|
||
if (!head) {
|
||
head = current_entry;
|
||
tail = current_entry;
|
||
} else {
|
||
tail->next = current_entry;
|
||
tail = current_entry;
|
||
}
|
||
current_entry = NULL;
|
||
state = 0;
|
||
} else {
|
||
// Append text line
|
||
size_t current_len = strlen(text_buffer);
|
||
size_t line_len = strlen(line);
|
||
size_t needed = current_len + line_len + 2; // +2 for newline and null
|
||
|
||
if (needed > text_buffer_size) {
|
||
text_buffer_size = needed + 256;
|
||
char *new_buffer = realloc(text_buffer, text_buffer_size);
|
||
if (!new_buffer) {
|
||
free(text_buffer);
|
||
free(current_entry);
|
||
current_entry = NULL;
|
||
break;
|
||
}
|
||
text_buffer = new_buffer;
|
||
}
|
||
|
||
if (current_len > 0) {
|
||
strcat(text_buffer, "\n");
|
||
}
|
||
strcat(text_buffer, line);
|
||
}
|
||
} else if (state == 3) { // Skipping to next blank line
|
||
if (strlen(line) == 0) {
|
||
state = 0;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Handle last subtitle if file ended while collecting text
|
||
if (state == 2 && current_entry && text_buffer) {
|
||
current_entry->text = strdup(text_buffer);
|
||
free(text_buffer);
|
||
text_buffer = NULL;
|
||
|
||
// Add to list
|
||
if (!head) {
|
||
head = current_entry;
|
||
tail = current_entry;
|
||
} else {
|
||
tail->next = current_entry;
|
||
tail = current_entry;
|
||
}
|
||
current_entry = NULL;
|
||
} else if (current_entry) {
|
||
// Cleanup any incomplete entry
|
||
free(current_entry);
|
||
if (text_buffer) free(text_buffer);
|
||
}
|
||
|
||
fclose(file);
|
||
return head;
|
||
}
|
||
|
||
/**
|
||
* Free subtitle list.
|
||
*/
|
||
static void free_subtitle_list(subtitle_entry_t *list) {
|
||
while (list) {
|
||
subtitle_entry_t *next = list->next;
|
||
free(list->text);
|
||
free(list);
|
||
list = next;
|
||
}
|
||
}
|
||
|
||
// =============================================================================
|
||
// Two-Pass Scene Change Detection Functions
|
||
// =============================================================================
|
||
|
||
// 1D Haar forward transform (in-place)
|
||
static void haar_forward_1d(float *data, int length) {
|
||
if (length < 2) return;
|
||
|
||
int half = length / 2;
|
||
float *temp = malloc(length * sizeof(float));
|
||
|
||
for (int i = 0; i < half; i++) {
|
||
float a = data[2 * i];
|
||
float b = data[2 * i + 1];
|
||
temp[i] = (a + b) * 0.5f; // Low-pass (average)
|
||
temp[half + i] = (a - b) * 0.5f; // High-pass (difference)
|
||
}
|
||
|
||
memcpy(data, temp, length * sizeof(float));
|
||
free(temp);
|
||
}
|
||
|
||
// 2D Haar forward transform for analysis (works on ANALYSIS_WIDTH x ANALYSIS_HEIGHT buffer)
|
||
static void analysis_haar_2d_forward(float *data, int width, int height, int levels) {
|
||
float *temp = malloc((width > height ? width : height) * sizeof(float));
|
||
|
||
// Generate division series for levels
|
||
int widths[levels + 1];
|
||
int heights[levels + 1];
|
||
widths[0] = width;
|
||
heights[0] = height;
|
||
|
||
for (int i = 1; i <= levels; i++) {
|
||
widths[i] = (int)roundf(widths[i - 1] / 2.0f);
|
||
heights[i] = (int)roundf(heights[i - 1] / 2.0f);
|
||
}
|
||
|
||
for (int level = 0; level < levels; level++) {
|
||
int current_width = widths[level];
|
||
int current_height = heights[level];
|
||
|
||
if (current_width < 2 || current_height < 2) break;
|
||
|
||
// Horizontal pass
|
||
for (int y = 0; y < current_height; y++) {
|
||
for (int x = 0; x < current_width; x++) {
|
||
temp[x] = data[y * width + x];
|
||
}
|
||
haar_forward_1d(temp, current_width);
|
||
for (int x = 0; x < current_width; x++) {
|
||
data[y * width + x] = temp[x];
|
||
}
|
||
}
|
||
|
||
// Vertical pass
|
||
for (int x = 0; x < current_width; x++) {
|
||
for (int y = 0; y < current_height; y++) {
|
||
temp[y] = data[y * width + x];
|
||
}
|
||
haar_forward_1d(temp, current_height);
|
||
for (int y = 0; y < current_height; y++) {
|
||
data[y * width + x] = temp[y];
|
||
}
|
||
}
|
||
}
|
||
|
||
free(temp);
|
||
}
|
||
|
||
// Bilinear resize RGB frame to fixed 128x128 grayscale analysis buffer
|
||
static float* resize_frame_to_analysis(const uint8_t *rgb_frame, int src_width, int src_height) {
|
||
float *gray = malloc(ANALYSIS_WIDTH * ANALYSIS_HEIGHT * sizeof(float));
|
||
|
||
float x_ratio = (float)(src_width - 1) / (ANALYSIS_WIDTH - 1);
|
||
float y_ratio = (float)(src_height - 1) / (ANALYSIS_HEIGHT - 1);
|
||
|
||
for (int y = 0; y < ANALYSIS_HEIGHT; y++) {
|
||
for (int x = 0; x < ANALYSIS_WIDTH; x++) {
|
||
float src_x = x * x_ratio;
|
||
float src_y = y * y_ratio;
|
||
|
||
int x0 = (int)src_x;
|
||
int y0 = (int)src_y;
|
||
int x1 = x0 + 1 < src_width ? x0 + 1 : x0;
|
||
int y1 = y0 + 1 < src_height ? y0 + 1 : y0;
|
||
|
||
float x_frac = src_x - x0;
|
||
float y_frac = src_y - y0;
|
||
|
||
// Get grayscale values at four corners
|
||
int idx00 = (y0 * src_width + x0) * 3;
|
||
int idx01 = (y0 * src_width + x1) * 3;
|
||
int idx10 = (y1 * src_width + x0) * 3;
|
||
int idx11 = (y1 * src_width + x1) * 3;
|
||
|
||
float g00 = 0.299f * rgb_frame[idx00] + 0.587f * rgb_frame[idx00 + 1] + 0.114f * rgb_frame[idx00 + 2];
|
||
float g01 = 0.299f * rgb_frame[idx01] + 0.587f * rgb_frame[idx01 + 1] + 0.114f * rgb_frame[idx01 + 2];
|
||
float g10 = 0.299f * rgb_frame[idx10] + 0.587f * rgb_frame[idx10 + 1] + 0.114f * rgb_frame[idx10 + 2];
|
||
float g11 = 0.299f * rgb_frame[idx11] + 0.587f * rgb_frame[idx11 + 1] + 0.114f * rgb_frame[idx11 + 2];
|
||
|
||
// Bilinear interpolation
|
||
float top = g00 * (1 - x_frac) + g01 * x_frac;
|
||
float bottom = g10 * (1 - x_frac) + g11 * x_frac;
|
||
gray[y * ANALYSIS_WIDTH + x] = top * (1 - y_frac) + bottom * y_frac;
|
||
}
|
||
}
|
||
|
||
return gray;
|
||
}
|
||
|
||
// Calculate Shannon entropy of coefficient magnitudes
|
||
/*static double calculate_shannon_entropy(const float *coeffs, int count) {
|
||
if (count == 0) return 0.0;
|
||
|
||
// Build histogram of coefficient magnitudes (use 256 bins)
|
||
#define HIST_BINS 256
|
||
int histogram[HIST_BINS] = {0};
|
||
|
||
// Find min/max for normalisation
|
||
float min_val = FLT_MAX, max_val = -FLT_MAX;
|
||
for (int i = 0; i < count; i++) {
|
||
float abs_val = fabsf(coeffs[i]);
|
||
if (abs_val < min_val) min_val = abs_val;
|
||
if (abs_val > max_val) max_val = abs_val;
|
||
}
|
||
|
||
// Avoid division by zero
|
||
float range = max_val - min_val;
|
||
if (range < 1e-6) return 0.0;
|
||
|
||
// Build histogram
|
||
for (int i = 0; i < count; i++) {
|
||
float abs_val = fabsf(coeffs[i]);
|
||
int bin = (int)((abs_val - min_val) / range * (HIST_BINS - 1));
|
||
bin = bin < 0 ? 0 : (bin >= HIST_BINS ? HIST_BINS - 1 : bin);
|
||
histogram[bin]++;
|
||
}
|
||
|
||
// Calculate entropy: H = -sum(p_i * log2(p_i))
|
||
double entropy = 0.0;
|
||
for (int i = 0; i < HIST_BINS; i++) {
|
||
if (histogram[i] > 0) {
|
||
double p = (double)histogram[i] / count;
|
||
entropy -= p * log2(p);
|
||
}
|
||
}
|
||
|
||
return entropy;
|
||
#undef HIST_BINS
|
||
}*/
|
||
|
||
// Extract subband from DWT coefficients (helper for entropy calculation)
|
||
/*static void extract_subband(const float *dwt_data, int width, int height, int level,
|
||
int band, float *output, int *out_count) {
|
||
// band: 0=LL, 1=LH, 2=HL, 3=HH
|
||
// For level L, subbands are in top-left quadrant of size (width>>L, height>>L)
|
||
|
||
// Generate division series
|
||
int widths[10]; widths[0] = width;
|
||
int heights[10]; heights[0] = height;
|
||
|
||
for (int i = 1; i < 10; i++) {
|
||
widths[i] = (int)roundf(widths[i - 1] / 2.0f);
|
||
heights[i] = (int)roundf(heights[i - 1] / 2.0f);
|
||
}
|
||
|
||
int level_width = widths[level];
|
||
int level_height = heights[level];
|
||
int half_width = level_width / 2;
|
||
int half_height = level_height / 2;
|
||
|
||
if (half_width < 1 || half_height < 1) {
|
||
*out_count = 0;
|
||
return;
|
||
}
|
||
|
||
int count = 0;
|
||
int offset_x = (band & 1) ? half_width : 0; // LH, HH have x offset
|
||
int offset_y = (band & 2) ? half_height : 0; // HL, HH have y offset
|
||
|
||
for (int y = 0; y < half_height; y++) {
|
||
for (int x = 0; x < half_width; x++) {
|
||
int src_x = offset_x + x;
|
||
int src_y = offset_y + y;
|
||
output[count++] = dwt_data[src_y * width + src_x];
|
||
}
|
||
}
|
||
|
||
*out_count = count;
|
||
}*/
|
||
|
||
// Compute comprehensive frame analysis metrics
|
||
static void compute_frame_metrics(const float *dwt_current, const float *dwt_previous,
|
||
frame_analysis_t *metrics) {
|
||
int width = ANALYSIS_WIDTH;
|
||
int height = ANALYSIS_HEIGHT;
|
||
int num_pixels = width * height;
|
||
int levels = ANALYSIS_DWT_LEVELS;
|
||
|
||
// Generate division series
|
||
int widths[levels + 1]; widths[0] = width;
|
||
int heights[levels + 1]; heights[0] = height;
|
||
|
||
for (int i = 1; i <= levels; i++) {
|
||
widths[i] = (int)roundf(widths[i - 1] / 2.0f);
|
||
heights[i] = (int)roundf(heights[i - 1] / 2.0f);
|
||
}
|
||
|
||
// Initialise metrics
|
||
memset(metrics, 0, sizeof(frame_analysis_t));
|
||
|
||
// Extract LL band (approximation coefficients)
|
||
int ll_width = widths[levels];
|
||
int ll_height = heights[levels];
|
||
int ll_count = ll_width * ll_height;
|
||
|
||
if (ll_count <= 0) return;
|
||
|
||
// Metric 1: LL band statistics (mean, variance)
|
||
double ll_sum = 0.0, ll_sum_sq = 0.0;
|
||
for (int i = 0; i < ll_count; i++) {
|
||
float val = dwt_current[i];
|
||
ll_sum += val;
|
||
ll_sum_sq += val * val;
|
||
}
|
||
metrics->ll_mean = ll_sum / ll_count;
|
||
double ll_var = (ll_sum_sq / ll_count) - (metrics->ll_mean * metrics->ll_mean);
|
||
metrics->ll_variance = ll_var > 0 ? ll_var : 0;
|
||
|
||
// Metric 2: LL_diff (L1 distance between consecutive frames)
|
||
if (dwt_previous) {
|
||
double diff_sum = 0.0;
|
||
for (int i = 0; i < ll_count; i++) {
|
||
diff_sum += fabs(dwt_current[i] - dwt_previous[i]);
|
||
}
|
||
metrics->ll_diff = diff_sum / ll_count;
|
||
}
|
||
|
||
// Metric 3: Highband energy and ratio
|
||
double total_energy = 0.0, highband_energy = 0.0;
|
||
for (int i = 0; i < num_pixels; i++) {
|
||
float abs_val = fabsf(dwt_current[i]);
|
||
total_energy += abs_val;
|
||
if (i >= ll_count) { // All coefficients except LL band
|
||
highband_energy += abs_val;
|
||
}
|
||
}
|
||
metrics->total_energy = total_energy;
|
||
metrics->highband_energy = highband_energy;
|
||
metrics->highband_ratio = total_energy > 0 ? (highband_energy / total_energy) : 0;
|
||
|
||
// Metric 4: Per-band entropies
|
||
/*float *subband_buffer = malloc(num_pixels * sizeof(float));
|
||
int subband_count;
|
||
|
||
// LL band entropy
|
||
extract_subband(dwt_current, width, height, levels, 0, subband_buffer, &subband_count);
|
||
metrics->entropy_ll = calculate_shannon_entropy(subband_buffer, subband_count);
|
||
|
||
// High-frequency bands entropy (LH, HL, HH for each level)
|
||
for (int level = 0; level < levels && level < ANALYSIS_DWT_LEVELS; level++) {
|
||
// LH band
|
||
extract_subband(dwt_current, width, height, level, 1, subband_buffer, &subband_count);
|
||
metrics->entropy_lh[level] = calculate_shannon_entropy(subband_buffer, subband_count);
|
||
|
||
// HL band
|
||
extract_subband(dwt_current, width, height, level, 2, subband_buffer, &subband_count);
|
||
metrics->entropy_hl[level] = calculate_shannon_entropy(subband_buffer, subband_count);
|
||
|
||
// HH band
|
||
extract_subband(dwt_current, width, height, level, 3, subband_buffer, &subband_count);
|
||
metrics->entropy_hh[level] = calculate_shannon_entropy(subband_buffer, subband_count);
|
||
}*/
|
||
|
||
// Metric 5: Zero crossing rate in highbands (texture change indicator)
|
||
int zero_crossings = 0;
|
||
int highband_coeffs = num_pixels - ll_count;
|
||
if (highband_coeffs > 1) {
|
||
for (int i = ll_count; i < num_pixels - 1; i++) {
|
||
if ((dwt_current[i] > 0 && dwt_current[i + 1] < 0) ||
|
||
(dwt_current[i] < 0 && dwt_current[i + 1] > 0)) {
|
||
zero_crossings++;
|
||
}
|
||
}
|
||
metrics->zero_crossing_rate = (double)zero_crossings / highband_coeffs;
|
||
}
|
||
|
||
//free(subband_buffer);
|
||
}
|
||
|
||
// Hybrid scene change detector with adaptive thresholds
|
||
// Returns 1 if scene change detected, 0 otherwise
|
||
static int detect_scene_change_wavelet(int frame_number,
|
||
const frame_analysis_t *metrics_history,
|
||
int history_count,
|
||
const frame_analysis_t *current_metrics,
|
||
int verbose) {
|
||
if (history_count < 2) return 0; // Need history for adaptive thresholds
|
||
|
||
// Calculate moving statistics for LL_diff (mean and stddev)
|
||
int window_size = history_count < ANALYSIS_MOVING_WINDOW ? history_count : ANALYSIS_MOVING_WINDOW;
|
||
int start_idx = history_count - window_size;
|
||
|
||
double ll_diff_sum = 0.0, ll_diff_sum_sq = 0.0;
|
||
for (int i = start_idx; i < history_count; i++) {
|
||
double val = metrics_history[i].ll_diff;
|
||
ll_diff_sum += val;
|
||
ll_diff_sum_sq += val * val;
|
||
}
|
||
|
||
double ll_diff_mean = ll_diff_sum / window_size;
|
||
double ll_diff_variance = (ll_diff_sum_sq / window_size) - (ll_diff_mean * ll_diff_mean);
|
||
double ll_diff_stddev = ll_diff_variance > 0 ? sqrt(ll_diff_variance) : 0;
|
||
|
||
// Adaptive threshold: mean + k*stddev (with minimum absolute threshold)
|
||
double ll_diff_threshold = ll_diff_mean + ANALYSIS_STDDEV_MULTIPLIER * ll_diff_stddev;
|
||
if (ll_diff_threshold < ANALYSIS_LL_DIFF_MIN_THRESHOLD) {
|
||
ll_diff_threshold = ANALYSIS_LL_DIFF_MIN_THRESHOLD;
|
||
}
|
||
|
||
// Detection rule 1: Hard cut or fast fade (LL_diff spike)
|
||
// Normalise LL_diff by LL_mean to handle exposure/lighting changes
|
||
double normalised_ll_diff = current_metrics->ll_mean > 1.0 ?
|
||
current_metrics->ll_diff / current_metrics->ll_mean : current_metrics->ll_diff;
|
||
double normalised_threshold = current_metrics->ll_mean > 1.0 ?
|
||
ll_diff_threshold / current_metrics->ll_mean : ll_diff_threshold;
|
||
|
||
if (normalised_ll_diff > normalised_threshold) {
|
||
if (verbose) {
|
||
printf(" Scene change detected frame %d: Normalised LL_diff=%.4f > threshold=%.4f (raw: %.2f > %.2f)\n",
|
||
frame_number + 1, normalised_ll_diff, normalised_threshold,
|
||
current_metrics->ll_diff, ll_diff_threshold);
|
||
}
|
||
return 1;
|
||
}
|
||
|
||
// Detection rule 2: Structural change (high-frequency energy spike)
|
||
double hb_ratio_threshold = ANALYSIS_HB_RATIO_THRESHOLD;
|
||
|
||
// Calculate average highband energy from history
|
||
double hb_energy_sum = 0.0;
|
||
for (int i = start_idx; i < history_count; i++) {
|
||
hb_energy_sum += metrics_history[i].highband_energy;
|
||
}
|
||
double hb_energy_mean = hb_energy_sum / window_size;
|
||
double hb_energy_threshold = hb_energy_mean * ANALYSIS_HB_ENERGY_MULTIPLIER;
|
||
|
||
// Check if highband spike is detected
|
||
if (current_metrics->highband_ratio > hb_ratio_threshold &&
|
||
current_metrics->highband_energy > hb_energy_threshold) {
|
||
|
||
// Calculate confidence: how much does it exceed threshold?
|
||
double ratio_confidence = current_metrics->highband_ratio / hb_ratio_threshold;
|
||
double energy_confidence = current_metrics->highband_energy / hb_energy_threshold;
|
||
double min_confidence = ratio_confidence < energy_confidence ? ratio_confidence : energy_confidence;
|
||
|
||
// High confidence (>1.3x threshold): Skip persistence check (likely hard cut)
|
||
if (min_confidence > 1.3) {
|
||
if (verbose) {
|
||
printf(" Scene change detected frame %d: HB_ratio=%.3f > %.3f AND HB_energy=%.1f > %.1f (high confidence: %.2fx)\n",
|
||
frame_number + 1, current_metrics->highband_ratio, hb_ratio_threshold,
|
||
current_metrics->highband_energy, hb_energy_threshold, min_confidence);
|
||
}
|
||
return 1;
|
||
}
|
||
|
||
// Borderline detection: Check persistence to avoid single-frame flashes
|
||
if (history_count >= 1) {
|
||
const frame_analysis_t *prev_metrics = &metrics_history[history_count - 1];
|
||
if (prev_metrics->highband_ratio > hb_ratio_threshold * 0.6 ||
|
||
prev_metrics->highband_energy > hb_energy_threshold * 0.6) {
|
||
if (verbose) {
|
||
printf(" Scene change detected frame %d: HB_ratio=%.3f > %.3f AND HB_energy=%.1f > %.1f (persistent)\n",
|
||
frame_number + 1, current_metrics->highband_ratio, hb_ratio_threshold,
|
||
current_metrics->highband_energy, hb_energy_threshold);
|
||
}
|
||
return 1;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Detection rule 3: Gradual transition (slow LL_mean change over several frames)
|
||
// Check if LL_mean changed significantly over last 5 frames
|
||
if (history_count >= 5) {
|
||
double ll_mean_5_frames_ago = metrics_history[history_count - 5].ll_mean;
|
||
double ll_mean_change = fabs(current_metrics->ll_mean - ll_mean_5_frames_ago);
|
||
|
||
if (ll_mean_change > ANALYSIS_FADE_THRESHOLD) {
|
||
if (verbose) {
|
||
printf(" Scene change detected frame %d: Gradual fade - LL_mean change=%.2f over 5 frames (threshold=%.1f)\n",
|
||
frame_number + 1, ll_mean_change, ANALYSIS_FADE_THRESHOLD);
|
||
}
|
||
return 1;
|
||
}
|
||
}
|
||
|
||
return 0; // No scene change detected
|
||
}
|
||
|
||
// Split a scene into evenly-sized GOPs
|
||
// Returns linked list of GOP boundaries for the scene
|
||
static gop_boundary_t* split_scene_into_gops(int scene_start, int scene_end,
|
||
int min_gop_size, int max_gop_size,
|
||
gop_boundary_t **tail_ptr, int verbose) {
|
||
int scene_length = scene_end - scene_start + 1;
|
||
|
||
if (scene_length < min_gop_size) {
|
||
// Scene too short, make it a single GOP
|
||
gop_boundary_t *boundary = malloc(sizeof(gop_boundary_t));
|
||
boundary->start_frame = scene_start;
|
||
boundary->end_frame = scene_end;
|
||
boundary->num_frames = scene_length;
|
||
boundary->next = NULL;
|
||
*tail_ptr = boundary;
|
||
return boundary;
|
||
}
|
||
|
||
// Calculate optimal number of GOPs for this scene
|
||
int num_gops = (scene_length + max_gop_size - 1) / max_gop_size; // ceil(scene_length / max_gop_size)
|
||
|
||
// Make sure each GOP is at least min_gop_size
|
||
if (scene_length / num_gops < min_gop_size) {
|
||
num_gops = scene_length / min_gop_size;
|
||
}
|
||
|
||
if (num_gops < 1) num_gops = 1;
|
||
|
||
// Calculate base GOP size and remainder for even distribution
|
||
int base_gop_size = scene_length / num_gops;
|
||
int remainder = scene_length % num_gops;
|
||
|
||
gop_boundary_t *head = NULL;
|
||
gop_boundary_t *tail = NULL;
|
||
int current_frame = scene_start;
|
||
|
||
for (int i = 0; i < num_gops; i++) {
|
||
// Distribute remainder frames evenly across GOPs
|
||
int gop_size = base_gop_size + (i < remainder ? 1 : 0);
|
||
|
||
gop_boundary_t *boundary = malloc(sizeof(gop_boundary_t));
|
||
boundary->start_frame = current_frame;
|
||
boundary->end_frame = current_frame + gop_size - 1;
|
||
boundary->num_frames = gop_size;
|
||
boundary->next = NULL;
|
||
|
||
if (tail) {
|
||
tail->next = boundary;
|
||
tail = boundary;
|
||
} else {
|
||
head = tail = boundary;
|
||
}
|
||
|
||
if (verbose) {
|
||
printf(" GOP: frames %d-%d (length %d)\n",
|
||
boundary->start_frame, boundary->end_frame, boundary->num_frames);
|
||
}
|
||
|
||
current_frame += gop_size;
|
||
}
|
||
|
||
*tail_ptr = tail;
|
||
return head;
|
||
}
|
||
|
||
// Build GOP boundaries from frame analysis data
|
||
// First detects scene boundaries, then splits each scene into evenly-sized GOPs
|
||
static gop_boundary_t* build_gop_boundaries(const frame_analysis_t *analyses, int num_frames,
|
||
int min_gop_size, int max_gop_size, int verbose) {
|
||
if (num_frames < min_gop_size) return NULL;
|
||
|
||
// Step 1: Detect scene boundaries (actual hard cuts only)
|
||
int *scene_boundaries = malloc((num_frames + 1) * sizeof(int));
|
||
int num_scenes = 0;
|
||
scene_boundaries[num_scenes++] = 0; // First scene starts at frame 0
|
||
|
||
for (int i = 1; i < num_frames; i++) {
|
||
if (analyses[i].is_scene_change) {
|
||
scene_boundaries[num_scenes++] = i;
|
||
if (verbose) {
|
||
printf(" Scene boundary candidate at frame %d\n", i);
|
||
}
|
||
}
|
||
}
|
||
scene_boundaries[num_scenes++] = num_frames; // End of last scene
|
||
|
||
// Step 1.5: Merge tiny scenes (< min_gop_size) with adjacent scenes
|
||
// This prevents false positives from creating 1-frame GOPs
|
||
int *merged_boundaries = malloc((num_scenes + 1) * sizeof(int));
|
||
int num_merged = 0;
|
||
merged_boundaries[num_merged++] = scene_boundaries[0]; // Always keep first boundary
|
||
|
||
for (int s = 1; s < num_scenes; s++) {
|
||
int scene_length = scene_boundaries[s] - scene_boundaries[s - 1];
|
||
|
||
// If this scene is too short, skip this boundary (merge with next scene)
|
||
if (scene_length >= min_gop_size || s == num_scenes - 1) {
|
||
merged_boundaries[num_merged++] = scene_boundaries[s];
|
||
} else if (verbose) {
|
||
printf(" Merging tiny scene at frame %d (length %d)\n",
|
||
scene_boundaries[s - 1], scene_length);
|
||
}
|
||
}
|
||
|
||
// Replace original boundaries with merged ones
|
||
free(scene_boundaries);
|
||
scene_boundaries = merged_boundaries;
|
||
num_scenes = num_merged;
|
||
|
||
if (verbose) {
|
||
printf(" After merging: %d scenes\n", num_scenes - 1);
|
||
}
|
||
|
||
// Step 2: Split each scene into evenly-sized GOPs
|
||
gop_boundary_t *head = NULL;
|
||
gop_boundary_t *tail = NULL;
|
||
|
||
for (int s = 0; s < num_scenes - 1; s++) {
|
||
int scene_start = scene_boundaries[s];
|
||
int scene_end = scene_boundaries[s + 1] - 1;
|
||
int scene_length = scene_end - scene_start + 1;
|
||
|
||
if (verbose) {
|
||
printf(" Scene %d: frames %d-%d (length %d)\n",
|
||
s + 1, scene_start, scene_end, scene_length);
|
||
}
|
||
|
||
// Split scene into evenly-sized GOPs
|
||
gop_boundary_t *scene_tail = NULL;
|
||
gop_boundary_t *scene_gops = split_scene_into_gops(scene_start, scene_end,
|
||
min_gop_size, max_gop_size,
|
||
&scene_tail, verbose);
|
||
|
||
// Link to main GOP list
|
||
if (head == NULL) {
|
||
head = scene_gops;
|
||
tail = scene_tail;
|
||
} else {
|
||
tail->next = scene_gops;
|
||
tail = scene_tail;
|
||
}
|
||
}
|
||
|
||
free(scene_boundaries);
|
||
return head;
|
||
}
|
||
|
||
// Free GOP boundary list
|
||
static void free_gop_boundaries(gop_boundary_t *head) {
|
||
while (head) {
|
||
gop_boundary_t *next = head->next;
|
||
free(head);
|
||
head = next;
|
||
}
|
||
}
|
||
|
||
// First pass: Analyse all frames and build GOP boundaries
|
||
// Returns 0 on success, -1 on error
|
||
static int two_pass_first_pass(cli_context_t *cli) {
|
||
printf("=== Two-Pass Encoding: First Pass (Scene Analysis) ===\n");
|
||
printf(" Using fixed 128x128 analysis resolution for all video sizes\n");
|
||
|
||
// Allocate analysis array (estimate: 10000 frames max for in-memory storage)
|
||
cli->frame_analyses_capacity = 10000;
|
||
cli->frame_analyses = malloc(cli->frame_analyses_capacity * sizeof(frame_analysis_t));
|
||
cli->frame_analyses_count = 0;
|
||
|
||
if (!cli->frame_analyses) {
|
||
fprintf(stderr, "Error: Failed to allocate frame analysis buffer\n");
|
||
return -1;
|
||
}
|
||
|
||
// Open FFmpeg pipe for first pass
|
||
char ffmpeg_cmd[MAX_PATH * 2];
|
||
if (cli->interlaced) {
|
||
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
|
||
"ffmpeg -loglevel error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
|
||
"-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,"
|
||
"tinterlace=interleave_top:cvlpf,separatefields\" -",
|
||
cli->input_file, cli->enc_params.width, cli->header_height,
|
||
cli->enc_params.width, cli->header_height);
|
||
} else {
|
||
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
|
||
"ffmpeg -loglevel error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
|
||
"-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" -",
|
||
cli->input_file, cli->enc_params.width, cli->enc_params.height,
|
||
cli->enc_params.width, cli->enc_params.height);
|
||
}
|
||
|
||
FILE *ffmpeg_pipe = popen(ffmpeg_cmd, "r");
|
||
if (!ffmpeg_pipe) {
|
||
fprintf(stderr, "Error: Failed to open FFmpeg pipe for first pass\n");
|
||
free(cli->frame_analyses);
|
||
cli->frame_analyses = NULL;
|
||
return -1;
|
||
}
|
||
|
||
size_t frame_rgb_size = cli->enc_params.width * cli->enc_params.height * 3;
|
||
uint8_t *frame_rgb = malloc(frame_rgb_size);
|
||
float *prev_dwt = NULL;
|
||
|
||
int frame_num = 0;
|
||
size_t bytes_read;
|
||
while ((bytes_read = fread(frame_rgb, 1, frame_rgb_size, ffmpeg_pipe)) == frame_rgb_size) {
|
||
// Honor encode limit BEFORE processing
|
||
if (cli->encode_limit > 0 && frame_num >= cli->encode_limit) {
|
||
break;
|
||
}
|
||
|
||
// Resize to fixed 128x128 grayscale
|
||
float *gray = resize_frame_to_analysis(frame_rgb, cli->enc_params.width, cli->enc_params.height);
|
||
|
||
// Apply 3-level Haar DWT
|
||
analysis_haar_2d_forward(gray, ANALYSIS_WIDTH, ANALYSIS_HEIGHT, ANALYSIS_DWT_LEVELS);
|
||
|
||
// Compute metrics
|
||
frame_analysis_t metrics;
|
||
compute_frame_metrics(gray, prev_dwt, &metrics);
|
||
|
||
// Set frame number AFTER compute_frame_metrics (which does memset)
|
||
metrics.frame_number = frame_num;
|
||
|
||
// Detect scene change using hybrid detector
|
||
if (frame_num > 0) {
|
||
metrics.is_scene_change = detect_scene_change_wavelet(
|
||
frame_num,
|
||
cli->frame_analyses,
|
||
cli->frame_analyses_count,
|
||
&metrics,
|
||
cli->verbose
|
||
);
|
||
} else {
|
||
metrics.is_scene_change = 0; // First frame is always start of first GOP
|
||
}
|
||
|
||
// Store analysis
|
||
if (cli->frame_analyses_count >= cli->frame_analyses_capacity) {
|
||
// Expand array
|
||
cli->frame_analyses_capacity *= 2;
|
||
cli->frame_analyses = realloc(cli->frame_analyses,
|
||
cli->frame_analyses_capacity * sizeof(frame_analysis_t));
|
||
if (!cli->frame_analyses) {
|
||
fprintf(stderr, "Error: Failed to reallocate analysis buffer\n");
|
||
free(gray);
|
||
if (prev_dwt) free(prev_dwt);
|
||
free(frame_rgb);
|
||
pclose(ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
}
|
||
|
||
cli->frame_analyses[cli->frame_analyses_count++] = metrics;
|
||
|
||
// Update previous DWT
|
||
if (prev_dwt) free(prev_dwt);
|
||
prev_dwt = gray;
|
||
|
||
frame_num++;
|
||
|
||
if (frame_num % 100 == 0) {
|
||
printf(" Analysed %d frames...\r", frame_num);
|
||
fflush(stdout);
|
||
}
|
||
}
|
||
|
||
printf("\n Analysed %d frames total\n", frame_num);
|
||
|
||
free(frame_rgb);
|
||
if (prev_dwt) free(prev_dwt);
|
||
pclose(ffmpeg_pipe);
|
||
|
||
// Build GOP boundaries
|
||
printf(" Building GOP boundaries...\n");
|
||
cli->gop_boundaries = build_gop_boundaries(
|
||
cli->frame_analyses,
|
||
cli->frame_analyses_count,
|
||
ANALYSIS_GOP_MIN_SIZE,
|
||
ANALYSIS_GOP_MAX_SIZE,
|
||
cli->verbose
|
||
);
|
||
|
||
// Count and print GOP statistics
|
||
int num_gops = 0;
|
||
int total_gop_frames = 0;
|
||
int min_gop = INT_MAX, max_gop = 0;
|
||
gop_boundary_t *gop = cli->gop_boundaries;
|
||
while (gop) {
|
||
num_gops++;
|
||
total_gop_frames += gop->num_frames;
|
||
if (gop->num_frames < min_gop) min_gop = gop->num_frames;
|
||
if (gop->num_frames > max_gop) max_gop = gop->num_frames;
|
||
gop = gop->next;
|
||
}
|
||
|
||
printf(" GOP Statistics:\n");
|
||
printf(" Total GOPs: %d\n", num_gops);
|
||
if (num_gops > 0) {
|
||
printf(" Average GOP size: %.1f frames\n", (double)total_gop_frames / num_gops);
|
||
printf(" Min GOP size: %d frames\n", min_gop);
|
||
printf(" Max GOP size: %d frames\n", max_gop);
|
||
}
|
||
|
||
printf("=== First Pass Complete ===\n\n");
|
||
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Write subtitle packet in SSF-TC format.
|
||
* Packet structure:
|
||
* uint8 Packet Type (0x31)
|
||
* uint32 Packet Size
|
||
* uint24 Subtitle Index (little-endian, always 0 for now)
|
||
* uint64 Timecode (nanoseconds, little-endian)
|
||
* uint8 Opcode (0x01=show, 0x02=hide)
|
||
* char[] Text (null-terminated, empty for hide)
|
||
*/
|
||
static int write_subtitle_packet(FILE *fp, uint64_t timecode_ns, uint8_t opcode, const char *text) {
|
||
// Calculate packet size: index (3) + timecode (8) + opcode (1) + text + null
|
||
size_t text_len = text ? strlen(text) : 0;
|
||
size_t packet_size = 3 + 8 + 1 + text_len + 1;
|
||
|
||
// Write packet type and size
|
||
fputc(TAV_PACKET_SUBTITLE_TC, fp);
|
||
uint32_t size32 = (uint32_t)packet_size;
|
||
fwrite(&size32, 4, 1, fp);
|
||
|
||
// Write subtitle index (24-bit, little-endian) - always 0
|
||
uint8_t index_bytes[3] = {0, 0, 0};
|
||
fwrite(index_bytes, 3, 1, fp);
|
||
|
||
// Write timecode (64-bit, little-endian)
|
||
uint8_t timecode_bytes[8];
|
||
for (int i = 0; i < 8; i++) {
|
||
timecode_bytes[i] = (timecode_ns >> (i * 8)) & 0xFF;
|
||
}
|
||
fwrite(timecode_bytes, 8, 1, fp);
|
||
|
||
// Write opcode
|
||
fputc(opcode, fp);
|
||
|
||
// Write text if present
|
||
if (text && text_len > 0) {
|
||
fwrite(text, 1, text_len, fp);
|
||
}
|
||
|
||
// Write null terminator
|
||
fputc(0, fp);
|
||
|
||
return 1 + 4 + (int)packet_size; // Total bytes written
|
||
}
|
||
|
||
/**
|
||
* Write all subtitles upfront in SSF-TC format.
|
||
* Each subtitle generates two packets: show and hide events.
|
||
*/
|
||
static int write_all_subtitles(FILE *fp, subtitle_entry_t *subtitles, int verbose) {
|
||
if (!subtitles) return 0;
|
||
|
||
int bytes_written = 0;
|
||
int subtitle_count = 0;
|
||
|
||
subtitle_entry_t *sub = subtitles;
|
||
while (sub) {
|
||
// Write show subtitle event (opcode 0x01)
|
||
bytes_written += write_subtitle_packet(fp, sub->start_time_ns, 0x01, sub->text);
|
||
|
||
// Write hide subtitle event (opcode 0x02)
|
||
bytes_written += write_subtitle_packet(fp, sub->end_time_ns, 0x02, NULL);
|
||
|
||
subtitle_count++;
|
||
if (verbose) {
|
||
printf(" Subtitle %d: show at %.3fs, hide at %.3fs: %.50s%s\n",
|
||
subtitle_count,
|
||
sub->start_time_ns / 1000000000.0,
|
||
sub->end_time_ns / 1000000000.0,
|
||
sub->text, strlen(sub->text) > 50 ? "..." : "");
|
||
}
|
||
|
||
sub = sub->next;
|
||
}
|
||
|
||
if (verbose && subtitle_count > 0) {
|
||
printf("Wrote %d SSF-TC subtitle events (%d bytes)\n", subtitle_count * 2, bytes_written);
|
||
}
|
||
|
||
return bytes_written;
|
||
}
|
||
|
||
// =============================================================================
|
||
// Font ROM Functions
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Write font ROM packet in SSF format.
|
||
* Packet structure:
|
||
* uint8 Packet Type (0x30 - SSF)
|
||
* uint32 Packet Size
|
||
* uint24 Index (3 bytes, always 0 for font ROM)
|
||
* uint8 Opcode (0x80=low font ROM, 0x81=high font ROM)
|
||
* uint16 Payload Length
|
||
* uint8[] Font data (up to 1920 bytes)
|
||
* uint8 Terminator (0x00)
|
||
*/
|
||
static int write_fontrom_packet(FILE *fp, const char *filename, uint8_t opcode, int verbose) {
|
||
if (!filename || !fp) return 0;
|
||
|
||
FILE *rom_file = fopen(filename, "rb");
|
||
if (!rom_file) {
|
||
fprintf(stderr, "Warning: Could not open font ROM file: %s\n", filename);
|
||
return -1;
|
||
}
|
||
|
||
// Get file size
|
||
fseek(rom_file, 0, SEEK_END);
|
||
long file_size = ftell(rom_file);
|
||
fseek(rom_file, 0, SEEK_SET);
|
||
|
||
if (file_size > MAX_FONTROM_SIZE) {
|
||
fprintf(stderr, "Warning: Font ROM file too large (max %d bytes): %s\n", MAX_FONTROM_SIZE, filename);
|
||
fclose(rom_file);
|
||
return -1;
|
||
}
|
||
|
||
// Read font data
|
||
uint8_t *font_data = malloc(file_size);
|
||
if (!font_data) {
|
||
fprintf(stderr, "Error: Could not allocate memory for font ROM\n");
|
||
fclose(rom_file);
|
||
return -1;
|
||
}
|
||
|
||
size_t bytes_read = fread(font_data, 1, file_size, rom_file);
|
||
fclose(rom_file);
|
||
|
||
if (bytes_read != (size_t)file_size) {
|
||
fprintf(stderr, "Warning: Could not read entire font ROM file: %s\n", filename);
|
||
free(font_data);
|
||
return -1;
|
||
}
|
||
|
||
// Calculate packet size: index(3) + opcode(1) + length(2) + data + terminator(1)
|
||
uint32_t packet_size = 3 + 1 + 2 + file_size + 1;
|
||
|
||
// Write packet type (0x30 - SSF)
|
||
fputc(TAV_PACKET_SSF, fp);
|
||
|
||
// Write packet size (uint32, little-endian)
|
||
fputc(packet_size & 0xFF, fp);
|
||
fputc((packet_size >> 8) & 0xFF, fp);
|
||
fputc((packet_size >> 16) & 0xFF, fp);
|
||
fputc((packet_size >> 24) & 0xFF, fp);
|
||
|
||
// Write index (3 bytes, always 0 for font ROM)
|
||
fputc(0, fp);
|
||
fputc(0, fp);
|
||
fputc(0, fp);
|
||
|
||
// Write opcode
|
||
fputc(opcode, fp);
|
||
|
||
// Write payload length (uint16, little-endian)
|
||
uint16_t payload_len = (uint16_t)file_size;
|
||
fputc(payload_len & 0xFF, fp);
|
||
fputc((payload_len >> 8) & 0xFF, fp);
|
||
|
||
// Write font data
|
||
fwrite(font_data, 1, file_size, fp);
|
||
|
||
// Write terminator
|
||
fputc(0x00, fp);
|
||
|
||
free(font_data);
|
||
|
||
if (verbose) {
|
||
printf(" Font ROM uploaded: %s (%ld bytes, opcode 0x%02X)\n", filename, file_size, opcode);
|
||
}
|
||
|
||
return 1 + 4 + (int)packet_size; // Total bytes written
|
||
}
|
||
|
||
// =============================================================================
|
||
// Worker Thread Functions
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Worker thread context - passed to worker_thread_main.
|
||
*/
|
||
typedef struct {
|
||
cli_context_t *cli;
|
||
int thread_id;
|
||
} worker_context_t;
|
||
|
||
/**
|
||
* Worker thread main function.
|
||
* Continuously picks up jobs from the job pool and encodes them.
|
||
*/
|
||
static void *worker_thread_main(void *arg) {
|
||
worker_context_t *wctx = (worker_context_t *)arg;
|
||
cli_context_t *cli = wctx->cli;
|
||
(void)wctx->thread_id; // Unused but kept for debugging
|
||
|
||
while (1) {
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
|
||
// Wait for a job or shutdown signal
|
||
while (!cli->shutdown_workers) {
|
||
// Look for a job slot that is ready to encode
|
||
int found_job = -1;
|
||
for (int i = 0; i < cli->num_threads; i++) {
|
||
if (cli->gop_jobs[i].status == GOP_SLOT_READY) {
|
||
cli->gop_jobs[i].status = GOP_SLOT_ENCODING;
|
||
found_job = i;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (found_job >= 0) {
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
|
||
// Encode this GOP
|
||
gop_job_t *job = &cli->gop_jobs[found_job];
|
||
|
||
// Create thread-local encoder context
|
||
tav_encoder_context_t *ctx = tav_encoder_create(&job->params);
|
||
if (!ctx) {
|
||
fprintf(stderr, "Failed to create encoder for GOP %d\n", job->gop_index);
|
||
job->success = 0;
|
||
} else {
|
||
// Encode GOP
|
||
int result = tav_encoder_encode_gop(ctx,
|
||
(const uint8_t **)job->rgb_frames,
|
||
job->num_frames,
|
||
job->frame_numbers,
|
||
&job->packet);
|
||
job->success = (result == 1 && job->packet != NULL);
|
||
tav_encoder_free(ctx);
|
||
}
|
||
|
||
// Mark job as complete (reacquire lock for next iteration)
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
job->status = GOP_SLOT_COMPLETE;
|
||
pthread_cond_broadcast(&cli->job_complete);
|
||
// Keep lock held for next iteration of inner while loop
|
||
continue; // Look for more jobs
|
||
}
|
||
|
||
// No job found, wait for signal
|
||
pthread_cond_wait(&cli->job_ready, &cli->job_mutex);
|
||
}
|
||
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
break; // Shutdown
|
||
}
|
||
|
||
free(wctx);
|
||
return NULL;
|
||
}
|
||
|
||
/**
|
||
* Initialize multithreading resources.
|
||
* Returns 0 on success, -1 on failure.
|
||
*/
|
||
static int init_threading(cli_context_t *cli) {
|
||
if (cli->num_threads <= 0) {
|
||
return 0; // Single-threaded mode
|
||
}
|
||
|
||
// Initialize mutex and condition variables
|
||
if (pthread_mutex_init(&cli->job_mutex, NULL) != 0) {
|
||
fprintf(stderr, "Error: Failed to initialize job mutex\n");
|
||
return -1;
|
||
}
|
||
if (pthread_cond_init(&cli->job_ready, NULL) != 0) {
|
||
fprintf(stderr, "Error: Failed to initialize job_ready cond\n");
|
||
pthread_mutex_destroy(&cli->job_mutex);
|
||
return -1;
|
||
}
|
||
if (pthread_cond_init(&cli->job_complete, NULL) != 0) {
|
||
fprintf(stderr, "Error: Failed to initialize job_complete cond\n");
|
||
pthread_cond_destroy(&cli->job_ready);
|
||
pthread_mutex_destroy(&cli->job_mutex);
|
||
return -1;
|
||
}
|
||
|
||
// Allocate job slots (one per thread)
|
||
cli->gop_jobs = calloc(cli->num_threads, sizeof(gop_job_t));
|
||
if (!cli->gop_jobs) {
|
||
fprintf(stderr, "Error: Failed to allocate job slots\n");
|
||
pthread_cond_destroy(&cli->job_complete);
|
||
pthread_cond_destroy(&cli->job_ready);
|
||
pthread_mutex_destroy(&cli->job_mutex);
|
||
return -1;
|
||
}
|
||
|
||
// Allocate worker thread handles
|
||
cli->worker_threads = malloc(cli->num_threads * sizeof(pthread_t));
|
||
if (!cli->worker_threads) {
|
||
fprintf(stderr, "Error: Failed to allocate thread handles\n");
|
||
free(cli->gop_jobs);
|
||
pthread_cond_destroy(&cli->job_complete);
|
||
pthread_cond_destroy(&cli->job_ready);
|
||
pthread_mutex_destroy(&cli->job_mutex);
|
||
return -1;
|
||
}
|
||
|
||
// Start worker threads
|
||
cli->shutdown_workers = 0;
|
||
for (int i = 0; i < cli->num_threads; i++) {
|
||
worker_context_t *wctx = malloc(sizeof(worker_context_t));
|
||
if (!wctx) {
|
||
fprintf(stderr, "Error: Failed to allocate worker context\n");
|
||
cli->shutdown_workers = 1;
|
||
pthread_cond_broadcast(&cli->job_ready);
|
||
for (int j = 0; j < i; j++) {
|
||
pthread_join(cli->worker_threads[j], NULL);
|
||
}
|
||
free(cli->worker_threads);
|
||
free(cli->gop_jobs);
|
||
pthread_cond_destroy(&cli->job_complete);
|
||
pthread_cond_destroy(&cli->job_ready);
|
||
pthread_mutex_destroy(&cli->job_mutex);
|
||
return -1;
|
||
}
|
||
wctx->cli = cli;
|
||
wctx->thread_id = i;
|
||
|
||
if (pthread_create(&cli->worker_threads[i], NULL, worker_thread_main, wctx) != 0) {
|
||
fprintf(stderr, "Error: Failed to create worker thread %d\n", i);
|
||
free(wctx);
|
||
cli->shutdown_workers = 1;
|
||
pthread_cond_broadcast(&cli->job_ready);
|
||
for (int j = 0; j < i; j++) {
|
||
pthread_join(cli->worker_threads[j], NULL);
|
||
}
|
||
free(cli->worker_threads);
|
||
free(cli->gop_jobs);
|
||
pthread_cond_destroy(&cli->job_complete);
|
||
pthread_cond_destroy(&cli->job_ready);
|
||
pthread_mutex_destroy(&cli->job_mutex);
|
||
return -1;
|
||
}
|
||
}
|
||
|
||
printf("Started %d worker threads for parallel GOP encoding\n", cli->num_threads);
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* Shutdown multithreading resources.
|
||
*/
|
||
static void shutdown_threading(cli_context_t *cli) {
|
||
if (cli->num_threads <= 0) {
|
||
return;
|
||
}
|
||
|
||
// Signal workers to shutdown
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
cli->shutdown_workers = 1;
|
||
pthread_cond_broadcast(&cli->job_ready);
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
|
||
// Wait for all workers to finish
|
||
for (int i = 0; i < cli->num_threads; i++) {
|
||
pthread_join(cli->worker_threads[i], NULL);
|
||
}
|
||
|
||
// Free job slots (and any remaining resources)
|
||
if (cli->gop_jobs) {
|
||
for (int i = 0; i < cli->num_threads; i++) {
|
||
if (cli->gop_jobs[i].packet) {
|
||
tav_encoder_free_packet(cli->gop_jobs[i].packet);
|
||
}
|
||
// Note: rgb_frames should already be freed by now
|
||
}
|
||
free(cli->gop_jobs);
|
||
cli->gop_jobs = NULL;
|
||
}
|
||
|
||
if (cli->worker_threads) {
|
||
free(cli->worker_threads);
|
||
cli->worker_threads = NULL;
|
||
}
|
||
|
||
pthread_cond_destroy(&cli->job_complete);
|
||
pthread_cond_destroy(&cli->job_ready);
|
||
pthread_mutex_destroy(&cli->job_mutex);
|
||
}
|
||
|
||
// =============================================================================
|
||
// Multithreaded Encoding Loop
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Multithreaded video encoding function.
|
||
* Uses worker threads to encode GOPs in parallel.
|
||
*/
|
||
static int encode_video_mt(cli_context_t *cli) {
|
||
printf("Opening FFmpeg pipe...\n");
|
||
cli->ffmpeg_pipe = open_ffmpeg_pipe(cli->input_file,
|
||
cli->enc_params.width,
|
||
cli->enc_params.height,
|
||
cli->interlaced,
|
||
cli->header_height,
|
||
cli->target_fps_num,
|
||
cli->target_fps_den,
|
||
cli->original_fps_num,
|
||
cli->original_fps_den);
|
||
if (!cli->ffmpeg_pipe) {
|
||
return -1;
|
||
}
|
||
|
||
// Create temporary encoder to get calculated params (decomp_levels, etc.)
|
||
printf("Creating encoder context...\n");
|
||
tav_encoder_context_t *ctx = tav_encoder_create(&cli->enc_params);
|
||
if (!ctx) {
|
||
fprintf(stderr, "Error: %s\n", "Failed to create encoder");
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
tav_encoder_get_params(ctx, &cli->enc_params);
|
||
tav_encoder_free(ctx);
|
||
ctx = NULL;
|
||
|
||
// Initialize threading
|
||
if (init_threading(cli) < 0) {
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
|
||
// Allocate per-job frame buffers
|
||
size_t frame_size = cli->enc_params.width * cli->enc_params.height * 3;
|
||
int gop_size = cli->enc_params.gop_size;
|
||
if (!cli->enc_params.enable_temporal_dwt) {
|
||
gop_size = 1;
|
||
}
|
||
|
||
// In two-pass mode, use max GOP size for buffer since GOPs have variable sizes
|
||
int buffer_gop_size = cli->two_pass_mode ? ANALYSIS_GOP_MAX_SIZE : gop_size;
|
||
|
||
// Allocate frame buffers for each job slot
|
||
for (int slot = 0; slot < cli->num_threads; slot++) {
|
||
cli->gop_jobs[slot].rgb_frames = malloc(buffer_gop_size * sizeof(uint8_t*));
|
||
cli->gop_jobs[slot].frame_numbers = malloc(buffer_gop_size * sizeof(int));
|
||
if (!cli->gop_jobs[slot].rgb_frames || !cli->gop_jobs[slot].frame_numbers) {
|
||
fprintf(stderr, "Error: Failed to allocate job slot %d buffers\n", slot);
|
||
shutdown_threading(cli);
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
for (int f = 0; f < buffer_gop_size; f++) {
|
||
cli->gop_jobs[slot].rgb_frames[f] = malloc(frame_size);
|
||
if (!cli->gop_jobs[slot].rgb_frames[f]) {
|
||
fprintf(stderr, "Error: Failed to allocate frame buffer for slot %d\n", slot);
|
||
shutdown_threading(cli);
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
}
|
||
// Copy encoder params for thread safety
|
||
cli->gop_jobs[slot].params = cli->enc_params;
|
||
cli->gop_jobs[slot].status = GOP_SLOT_EMPTY;
|
||
cli->gop_jobs[slot].num_frames = 0;
|
||
}
|
||
|
||
// Allocate audio buffers if needed
|
||
if (cli->has_audio) {
|
||
size_t max_gop_audio = buffer_gop_size * cli->samples_per_frame * 2;
|
||
cli->gop_audio_buffer = malloc(max_gop_audio * sizeof(float));
|
||
cli->gop_audio_samples = 0;
|
||
if (!cli->gop_audio_buffer) {
|
||
fprintf(stderr, "Error: Failed to allocate GOP audio buffer\n");
|
||
shutdown_threading(cli);
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
|
||
// Allocate per-job audio buffers
|
||
for (int slot = 0; slot < cli->num_threads; slot++) {
|
||
cli->gop_jobs[slot].audio_samples = malloc(max_gop_audio * sizeof(float));
|
||
if (!cli->gop_jobs[slot].audio_samples) {
|
||
fprintf(stderr, "Error: Failed to allocate audio buffer for slot %d\n", slot);
|
||
shutdown_threading(cli);
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Temporary frame buffer for reading
|
||
uint8_t *rgb_frame = malloc(frame_size);
|
||
if (!rgb_frame) {
|
||
fprintf(stderr, "Error: Failed to allocate frame buffer\n");
|
||
shutdown_threading(cli);
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
|
||
// Write TAV/TAP header
|
||
write_tav_header(cli->output_fp, &cli->enc_params, cli->has_audio, cli->subtitles != NULL,
|
||
cli->interlaced, cli->header_height, cli->is_still_image);
|
||
|
||
// Write Extended Header (unless suppressed)
|
||
// For interlaced mode, use header_height for XDIM if needed
|
||
int xhdr_height = cli->interlaced ? cli->header_height : cli->enc_params.height;
|
||
if (!cli->suppress_xhdr) {
|
||
cli->extended_header_offset = write_extended_header(cli, cli->enc_params.width, xhdr_height);
|
||
if (cli->extended_header_offset < 0) {
|
||
fprintf(stderr, "Warning: Failed to write Extended Header\n");
|
||
}
|
||
}
|
||
|
||
// Write subtitles upfront
|
||
if (cli->subtitles) {
|
||
printf("Writing subtitles...\n");
|
||
write_all_subtitles(cli->output_fp, cli->subtitles, cli->verbose);
|
||
}
|
||
|
||
// Write font ROMs if provided
|
||
if (cli->fontrom_low) {
|
||
printf("Uploading low font ROM...\n");
|
||
write_fontrom_packet(cli->output_fp, cli->fontrom_low, FONTROM_OPCODE_LOW, cli->verbose);
|
||
}
|
||
if (cli->fontrom_high) {
|
||
printf("Uploading high font ROM...\n");
|
||
write_fontrom_packet(cli->output_fp, cli->fontrom_high, FONTROM_OPCODE_HIGH, cli->verbose);
|
||
}
|
||
|
||
printf("Encoding frames with %d threads...\n", cli->num_threads);
|
||
cli->start_time = time(NULL);
|
||
|
||
int current_slot = 0; // Slot being filled
|
||
int next_gop_to_write = 0; // GOP index that should be written next
|
||
int current_gop_index = 0; // Current GOP index being assembled
|
||
int frames_in_current_gop = 0; // Frames accumulated in current slot
|
||
int encoding_error = 0;
|
||
int eof_reached = 0;
|
||
|
||
while (!encoding_error) {
|
||
// Step 1: Try to write any completed GOPs in order
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
while (!encoding_error) {
|
||
// Find the slot with the next GOP to write
|
||
int found = -1;
|
||
for (int i = 0; i < cli->num_threads; i++) {
|
||
if (cli->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
|
||
cli->gop_jobs[i].gop_index == next_gop_to_write) {
|
||
found = i;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (found < 0) break; // No complete GOP ready to write
|
||
|
||
gop_job_t *job = &cli->gop_jobs[found];
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
|
||
// Write this GOP
|
||
if (job->success && job->packet) {
|
||
// Write TIMECODE
|
||
write_timecode_packet(cli->output_fp, job->frame_numbers[0],
|
||
cli->enc_params.fps_num, cli->enc_params.fps_den);
|
||
|
||
// Write AUDIO for this GOP
|
||
if (cli->has_audio && job->num_audio_samples > 0) {
|
||
write_audio_packet(cli->output_fp, cli, job->audio_samples, job->num_audio_samples);
|
||
}
|
||
|
||
// Write VIDEO packet
|
||
write_tav_packet(cli->output_fp, job->packet);
|
||
cli->total_bytes += job->packet->size;
|
||
cli->gop_count++;
|
||
|
||
// Write sync packet
|
||
if (job->packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
|
||
// For 3D-DWT mode, write GOP_SYNC (0xFC) with frame count
|
||
int frames_in_gop = job->packet->data[1];
|
||
write_gop_sync_packet(cli->output_fp, frames_in_gop);
|
||
} else if (job->packet->packet_type == TAV_PACKET_IFRAME) {
|
||
// For intra-only mode, write SYNC (0xFF) with no payload
|
||
write_sync_packet(cli->output_fp);
|
||
}
|
||
|
||
tav_encoder_free_packet(job->packet);
|
||
job->packet = NULL;
|
||
} else {
|
||
fprintf(stderr, "Error: GOP %d encoding failed\n", job->gop_index);
|
||
encoding_error = 1;
|
||
}
|
||
|
||
// Mark slot as empty
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
job->status = GOP_SLOT_EMPTY;
|
||
job->num_frames = 0;
|
||
next_gop_to_write++;
|
||
|
||
// Progress
|
||
if (cli->verbose || cli->frame_count % 60 == 0) {
|
||
time_t elapsed = time(NULL) - cli->start_time;
|
||
double fps = elapsed > 0 ? (double)cli->frame_count / elapsed : 0.0;
|
||
double bitrate = elapsed > 0 ?
|
||
(cli->total_bytes * 8.0) / (cli->frame_count / ((double)cli->enc_params.fps_num / cli->enc_params.fps_den)) / 1000.0 : 0.0;
|
||
|
||
printf("\rFrame %ld | GOPs: %ld | %.1f fps | %.1f kbps | %zu KB ",
|
||
cli->frame_count, cli->gop_count, fps, bitrate,
|
||
cli->total_bytes / 1024);
|
||
fflush(stdout);
|
||
}
|
||
}
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
|
||
if (encoding_error || eof_reached) break;
|
||
|
||
// Step 2: Fill current GOP slot
|
||
gop_job_t *slot = &cli->gop_jobs[current_slot];
|
||
|
||
// Wait for slot to be empty (writing completed GOPs along the way)
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
while (slot->status != GOP_SLOT_EMPTY && !cli->shutdown_workers) {
|
||
// While waiting, check if we can write any completed GOPs
|
||
int wrote_something = 0;
|
||
for (int i = 0; i < cli->num_threads; i++) {
|
||
if (cli->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
|
||
cli->gop_jobs[i].gop_index == next_gop_to_write) {
|
||
gop_job_t *job = &cli->gop_jobs[i];
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
|
||
// Write this GOP
|
||
if (job->success && job->packet) {
|
||
write_timecode_packet(cli->output_fp, job->frame_numbers[0],
|
||
cli->enc_params.fps_num, cli->enc_params.fps_den);
|
||
if (cli->has_audio && job->num_audio_samples > 0) {
|
||
write_audio_packet(cli->output_fp, cli, job->audio_samples, job->num_audio_samples);
|
||
}
|
||
write_tav_packet(cli->output_fp, job->packet);
|
||
cli->total_bytes += job->packet->size;
|
||
cli->gop_count++;
|
||
|
||
if (job->packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
|
||
write_gop_sync_packet(cli->output_fp, job->packet->data[1]);
|
||
} else if (job->packet->packet_type == TAV_PACKET_IFRAME) {
|
||
write_sync_packet(cli->output_fp);
|
||
}
|
||
|
||
tav_encoder_free_packet(job->packet);
|
||
job->packet = NULL;
|
||
|
||
// Progress
|
||
time_t elapsed = time(NULL) - cli->start_time;
|
||
double fps = elapsed > 0 ? (double)cli->frame_count / elapsed : 0.0;
|
||
printf("\rFrame %ld | GOPs: %ld | %.1f fps | %zu KB ",
|
||
cli->frame_count, cli->gop_count, fps, cli->total_bytes / 1024);
|
||
fflush(stdout);
|
||
}
|
||
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
job->status = GOP_SLOT_EMPTY;
|
||
job->num_frames = 0;
|
||
next_gop_to_write++;
|
||
wrote_something = 1;
|
||
break;
|
||
}
|
||
}
|
||
if (!wrote_something) {
|
||
pthread_cond_wait(&cli->job_complete, &cli->job_mutex);
|
||
}
|
||
}
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
|
||
// Reset audio accumulator only when starting a fresh GOP
|
||
if (frames_in_current_gop == 0) {
|
||
slot->num_audio_samples = 0;
|
||
}
|
||
|
||
// Read frame from FFmpeg
|
||
if (cli->encode_limit > 0 && cli->frame_count >= cli->encode_limit) {
|
||
eof_reached = 1;
|
||
} else {
|
||
int result = read_rgb_frame(cli->ffmpeg_pipe, rgb_frame, frame_size);
|
||
if (result == 0) {
|
||
eof_reached = 1;
|
||
} else if (result < 0) {
|
||
fprintf(stderr, "Error reading frame\n");
|
||
encoding_error = 1;
|
||
} else {
|
||
// Copy frame to slot buffer
|
||
memcpy(slot->rgb_frames[frames_in_current_gop], rgb_frame, frame_size);
|
||
slot->frame_numbers[frames_in_current_gop] = (int)cli->frame_count;
|
||
frames_in_current_gop++;
|
||
cli->frame_count++;
|
||
|
||
// Accumulate audio
|
||
if (cli->has_audio && cli->audio_buffer) {
|
||
size_t samples_read = read_audio_samples(cli, cli->audio_buffer, cli->samples_per_frame);
|
||
if (samples_read > 0) {
|
||
memcpy(slot->audio_samples + slot->num_audio_samples * 2,
|
||
cli->audio_buffer,
|
||
samples_read * 2 * sizeof(float));
|
||
slot->num_audio_samples += samples_read;
|
||
}
|
||
}
|
||
|
||
// Determine current GOP size for two-pass mode
|
||
int current_gop_size = gop_size;
|
||
if (cli->two_pass_mode && cli->current_gop_boundary) {
|
||
current_gop_size = cli->current_gop_boundary->num_frames;
|
||
}
|
||
|
||
// Check if GOP is complete
|
||
if (frames_in_current_gop >= current_gop_size) {
|
||
slot->num_frames = frames_in_current_gop;
|
||
slot->gop_index = current_gop_index;
|
||
|
||
// Submit GOP to worker threads
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
slot->status = GOP_SLOT_READY;
|
||
pthread_cond_broadcast(&cli->job_ready);
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
|
||
// Advance to next GOP boundary (two-pass mode)
|
||
if (cli->two_pass_mode && cli->current_gop_boundary) {
|
||
cli->current_gop_boundary = cli->current_gop_boundary->next;
|
||
}
|
||
|
||
// Move to next slot
|
||
current_slot = (current_slot + 1) % cli->num_threads;
|
||
current_gop_index++;
|
||
frames_in_current_gop = 0;
|
||
|
||
// Note: audio reset moved to after we confirm slot is empty
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Handle partial GOP at end
|
||
if (!encoding_error && frames_in_current_gop > 0) {
|
||
printf("\nEncoding final partial GOP (%d frames)...\n", frames_in_current_gop);
|
||
|
||
gop_job_t *slot = &cli->gop_jobs[current_slot];
|
||
slot->num_frames = frames_in_current_gop;
|
||
slot->gop_index = current_gop_index;
|
||
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
slot->status = GOP_SLOT_READY;
|
||
pthread_cond_broadcast(&cli->job_ready);
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
|
||
current_gop_index++;
|
||
}
|
||
|
||
// Wait for all remaining GOPs to complete and write them
|
||
while (!encoding_error && next_gop_to_write < current_gop_index) {
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
|
||
// Find slot with next GOP to write
|
||
int found = -1;
|
||
while (found < 0 && !encoding_error) {
|
||
for (int i = 0; i < cli->num_threads; i++) {
|
||
if (cli->gop_jobs[i].status == GOP_SLOT_COMPLETE &&
|
||
cli->gop_jobs[i].gop_index == next_gop_to_write) {
|
||
found = i;
|
||
break;
|
||
}
|
||
}
|
||
if (found < 0) {
|
||
pthread_cond_wait(&cli->job_complete, &cli->job_mutex);
|
||
}
|
||
}
|
||
|
||
if (found >= 0) {
|
||
gop_job_t *job = &cli->gop_jobs[found];
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
|
||
if (job->success && job->packet) {
|
||
write_timecode_packet(cli->output_fp, job->frame_numbers[0],
|
||
cli->enc_params.fps_num, cli->enc_params.fps_den);
|
||
if (cli->has_audio && job->num_audio_samples > 0) {
|
||
write_audio_packet(cli->output_fp, cli, job->audio_samples, job->num_audio_samples);
|
||
}
|
||
write_tav_packet(cli->output_fp, job->packet);
|
||
cli->total_bytes += job->packet->size;
|
||
cli->gop_count++;
|
||
|
||
if (job->packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
|
||
write_gop_sync_packet(cli->output_fp, job->packet->data[1]);
|
||
} else if (job->packet->packet_type == TAV_PACKET_IFRAME) {
|
||
write_sync_packet(cli->output_fp);
|
||
}
|
||
|
||
tav_encoder_free_packet(job->packet);
|
||
job->packet = NULL;
|
||
}
|
||
|
||
pthread_mutex_lock(&cli->job_mutex);
|
||
job->status = GOP_SLOT_EMPTY;
|
||
next_gop_to_write++;
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
} else {
|
||
pthread_mutex_unlock(&cli->job_mutex);
|
||
}
|
||
}
|
||
|
||
printf("\n");
|
||
|
||
// Update total frames in header (skip for still images - already set to 0xFFFFFFFF)
|
||
if (!cli->is_still_image) {
|
||
update_total_frames(cli->output_fp, (uint32_t)cli->frame_count);
|
||
}
|
||
|
||
// Update ENDT in Extended Header (skip for still images)
|
||
if (!cli->is_still_image && !cli->suppress_xhdr && cli->extended_header_offset >= 0) {
|
||
// Calculate end time in nanoseconds
|
||
uint64_t end_time_ns = (uint64_t)cli->frame_count * 1000000000ULL * cli->enc_params.fps_den / cli->enc_params.fps_num;
|
||
update_extended_header_endt(cli->output_fp, cli->extended_header_offset, end_time_ns);
|
||
}
|
||
|
||
// Free per-job frame buffers (must be done before shutdown_threading)
|
||
for (int slot = 0; slot < cli->num_threads; slot++) {
|
||
if (cli->gop_jobs[slot].rgb_frames) {
|
||
for (int f = 0; f < buffer_gop_size; f++) {
|
||
free(cli->gop_jobs[slot].rgb_frames[f]);
|
||
}
|
||
free(cli->gop_jobs[slot].rgb_frames);
|
||
cli->gop_jobs[slot].rgb_frames = NULL;
|
||
}
|
||
free(cli->gop_jobs[slot].frame_numbers);
|
||
cli->gop_jobs[slot].frame_numbers = NULL;
|
||
free(cli->gop_jobs[slot].audio_samples);
|
||
cli->gop_jobs[slot].audio_samples = NULL;
|
||
}
|
||
|
||
// Cleanup
|
||
free(rgb_frame);
|
||
shutdown_threading(cli);
|
||
pclose(cli->ffmpeg_pipe);
|
||
|
||
// Cleanup audio
|
||
if (cli->audio_buffer) {
|
||
free(cli->audio_buffer);
|
||
cli->audio_buffer = NULL;
|
||
}
|
||
if (cli->gop_audio_buffer) {
|
||
free(cli->gop_audio_buffer);
|
||
cli->gop_audio_buffer = NULL;
|
||
}
|
||
if (cli->pcm_file) {
|
||
fclose(cli->pcm_file);
|
||
cli->pcm_file = NULL;
|
||
}
|
||
if (cli->has_audio) {
|
||
unlink(TEMP_PCM_FILE);
|
||
}
|
||
|
||
// Final statistics
|
||
time_t total_time = time(NULL) - cli->start_time;
|
||
double avg_fps = total_time > 0 ? (double)cli->frame_count / total_time : 0.0;
|
||
double duration = (double)cli->frame_count / ((double)cli->enc_params.fps_num / cli->enc_params.fps_den);
|
||
double avg_bitrate = duration > 0 ? (cli->total_bytes * 8.0) / duration / 1000.0 : 0.0;
|
||
|
||
printf("\nEncoding complete! (multithreaded, %d threads)\n", cli->num_threads);
|
||
printf(" Frames encoded: %ld\n", cli->frame_count);
|
||
printf(" GOPs encoded: %ld\n", cli->gop_count);
|
||
printf(" Total size: %.2f MB\n", cli->total_bytes / (1024.0 * 1024.0));
|
||
printf(" Duration: %.2f seconds\n", duration);
|
||
printf(" Average bitrate: %.1f kbps\n", avg_bitrate);
|
||
printf(" Encoding speed: %.1f fps\n", avg_fps);
|
||
printf(" Time taken: %ld seconds\n", total_time);
|
||
|
||
return encoding_error ? -1 : 0;
|
||
}
|
||
|
||
// =============================================================================
|
||
// Single-Threaded Encoding Loop
|
||
// =============================================================================
|
||
|
||
static int encode_video(cli_context_t *cli) {
|
||
// Dispatch to multithreaded version if threads > 0
|
||
if (cli->num_threads > 0) {
|
||
return encode_video_mt(cli);
|
||
}
|
||
|
||
printf("Opening FFmpeg pipe...\n");
|
||
cli->ffmpeg_pipe = open_ffmpeg_pipe(cli->input_file,
|
||
cli->enc_params.width,
|
||
cli->enc_params.height,
|
||
cli->interlaced,
|
||
cli->header_height,
|
||
cli->target_fps_num,
|
||
cli->target_fps_den,
|
||
cli->original_fps_num,
|
||
cli->original_fps_den);
|
||
if (!cli->ffmpeg_pipe) {
|
||
return -1;
|
||
}
|
||
|
||
// Create encoder
|
||
printf("Creating encoder context...\n");
|
||
tav_encoder_context_t *ctx = tav_encoder_create(&cli->enc_params);
|
||
if (!ctx) {
|
||
fprintf(stderr, "Error: %s\n", "Failed to create encoder");
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
|
||
// Get actual encoder params (with calculated values like decomp_levels)
|
||
tav_encoder_get_params(ctx, &cli->enc_params);
|
||
|
||
// NOW allocate GOP audio buffer with correct gop_size
|
||
if (cli->has_audio) {
|
||
size_t max_gop_audio = cli->enc_params.gop_size * cli->samples_per_frame * 2;
|
||
cli->gop_audio_buffer = malloc(max_gop_audio * sizeof(float));
|
||
cli->gop_audio_samples = 0;
|
||
|
||
if (!cli->gop_audio_buffer) {
|
||
fprintf(stderr, "Error: Failed to allocate GOP audio buffer\n");
|
||
tav_encoder_free(ctx);
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
|
||
if (cli->verbose) {
|
||
printf(" GOP audio buffer: %zu samples (%zu bytes)\n",
|
||
max_gop_audio / 2, max_gop_audio * sizeof(float));
|
||
}
|
||
}
|
||
|
||
// Allocate GOP frame buffer for tav_encoder_encode_gop()
|
||
size_t frame_size = cli->enc_params.width * cli->enc_params.height * 3;
|
||
int gop_size = cli->enc_params.gop_size;
|
||
|
||
// In intra-only mode, encode each frame immediately (GOP size = 1)
|
||
if (!cli->enc_params.enable_temporal_dwt) {
|
||
gop_size = 1;
|
||
}
|
||
|
||
// In two-pass mode, use max GOP size for buffer since GOPs have variable sizes
|
||
int buffer_gop_size = cli->two_pass_mode ? ANALYSIS_GOP_MAX_SIZE : gop_size;
|
||
|
||
cli->gop_frames = malloc(buffer_gop_size * sizeof(uint8_t*));
|
||
cli->gop_frame_numbers = malloc(buffer_gop_size * sizeof(int));
|
||
cli->gop_frame_count = 0;
|
||
|
||
if (!cli->gop_frames || !cli->gop_frame_numbers) {
|
||
fprintf(stderr, "Error: Failed to allocate GOP frame buffer\n");
|
||
tav_encoder_free(ctx);
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
|
||
for (int i = 0; i < buffer_gop_size; i++) {
|
||
cli->gop_frames[i] = malloc(frame_size);
|
||
if (!cli->gop_frames[i]) {
|
||
fprintf(stderr, "Error: Failed to allocate GOP frame %d\n", i);
|
||
for (int j = 0; j < i; j++) free(cli->gop_frames[j]);
|
||
free(cli->gop_frames);
|
||
free(cli->gop_frame_numbers);
|
||
tav_encoder_free(ctx);
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
}
|
||
|
||
if (cli->verbose) {
|
||
printf(" GOP frame buffer: %d frames x %zu bytes = %zu KB%s\n",
|
||
buffer_gop_size, frame_size, (buffer_gop_size * frame_size) / 1024,
|
||
cli->two_pass_mode ? " (two-pass mode)" : "");
|
||
}
|
||
|
||
// Temporary frame buffer for reading from FFmpeg
|
||
uint8_t *rgb_frame = malloc(frame_size);
|
||
if (!rgb_frame) {
|
||
fprintf(stderr, "Error: Failed to allocate frame buffer\n");
|
||
for (int i = 0; i < buffer_gop_size; i++) free(cli->gop_frames[i]);
|
||
free(cli->gop_frames);
|
||
free(cli->gop_frame_numbers);
|
||
tav_encoder_free(ctx);
|
||
pclose(cli->ffmpeg_pipe);
|
||
return -1;
|
||
}
|
||
|
||
// Write TAV/TAP header (with actual encoder params)
|
||
write_tav_header(cli->output_fp, &cli->enc_params, cli->has_audio, cli->subtitles != NULL,
|
||
cli->interlaced, cli->header_height, cli->is_still_image);
|
||
|
||
// Write Extended Header (unless suppressed)
|
||
// For interlaced mode, use header_height for XDIM if needed
|
||
int xhdr_height_st = cli->interlaced ? cli->header_height : cli->enc_params.height;
|
||
if (!cli->suppress_xhdr) {
|
||
cli->extended_header_offset = write_extended_header(cli, cli->enc_params.width, xhdr_height_st);
|
||
if (cli->extended_header_offset < 0) {
|
||
fprintf(stderr, "Warning: Failed to write Extended Header\n");
|
||
}
|
||
}
|
||
|
||
// Write subtitles upfront (SSF-TC format)
|
||
if (cli->subtitles) {
|
||
printf("Writing subtitles...\n");
|
||
write_all_subtitles(cli->output_fp, cli->subtitles, cli->verbose);
|
||
}
|
||
|
||
// Write font ROMs if provided
|
||
if (cli->fontrom_low) {
|
||
printf("Uploading low font ROM...\n");
|
||
write_fontrom_packet(cli->output_fp, cli->fontrom_low, FONTROM_OPCODE_LOW, cli->verbose);
|
||
}
|
||
if (cli->fontrom_high) {
|
||
printf("Uploading high font ROM...\n");
|
||
write_fontrom_packet(cli->output_fp, cli->fontrom_high, FONTROM_OPCODE_HIGH, cli->verbose);
|
||
}
|
||
|
||
// Encoding loop using tav_encoder_encode_gop()
|
||
printf("Encoding frames...\n");
|
||
cli->start_time = time(NULL);
|
||
|
||
tav_encoder_packet_t *packet = NULL;
|
||
int encoding_error = 0;
|
||
|
||
while (1) {
|
||
// Check encode limit
|
||
if (cli->encode_limit > 0 && cli->frame_count >= cli->encode_limit) {
|
||
break;
|
||
}
|
||
|
||
// Read frame from FFmpeg
|
||
int result = read_rgb_frame(cli->ffmpeg_pipe, rgb_frame, frame_size);
|
||
if (result == 0) {
|
||
break; // EOF
|
||
} else if (result < 0) {
|
||
fprintf(stderr, "Error reading frame\n");
|
||
encoding_error = 1;
|
||
break;
|
||
}
|
||
|
||
// Copy frame to GOP buffer
|
||
memcpy(cli->gop_frames[cli->gop_frame_count], rgb_frame, frame_size);
|
||
cli->gop_frame_numbers[cli->gop_frame_count] = (int)cli->frame_count;
|
||
cli->gop_frame_count++;
|
||
|
||
// Accumulate audio samples for this frame (will write when GOP completes)
|
||
if (cli->has_audio && cli->audio_buffer && cli->gop_audio_buffer) {
|
||
size_t samples_read = read_audio_samples(cli, cli->audio_buffer, cli->samples_per_frame);
|
||
if (samples_read > 0) {
|
||
// Append to GOP audio buffer (samples_read is per-channel count, stereo interleaved)
|
||
memcpy(cli->gop_audio_buffer + cli->gop_audio_samples * 2,
|
||
cli->audio_buffer,
|
||
samples_read * 2 * sizeof(float));
|
||
cli->gop_audio_samples += samples_read;
|
||
}
|
||
}
|
||
|
||
cli->frame_count++;
|
||
|
||
// Determine current GOP size for two-pass mode
|
||
int current_gop_size = gop_size;
|
||
if (cli->two_pass_mode && cli->current_gop_boundary) {
|
||
current_gop_size = cli->current_gop_boundary->num_frames;
|
||
}
|
||
|
||
// Check if GOP is full (either reached fixed size or two-pass boundary)
|
||
if (cli->gop_frame_count >= current_gop_size) {
|
||
// Encode complete GOP
|
||
result = tav_encoder_encode_gop(ctx,
|
||
(const uint8_t**)cli->gop_frames,
|
||
cli->gop_frame_count,
|
||
cli->gop_frame_numbers,
|
||
&packet);
|
||
|
||
if (result < 0) {
|
||
fprintf(stderr, "Error: %s\n", tav_encoder_get_error(ctx));
|
||
encoding_error = 1;
|
||
break;
|
||
}
|
||
|
||
if (packet) {
|
||
// GOP is complete - write in correct order: TIMECODE, AUDIO, VIDEO, GOP_SYNC
|
||
|
||
// 1. Write timecode before GOP (use first frame number in GOP)
|
||
write_timecode_packet(cli->output_fp, cli->gop_frame_numbers[0],
|
||
cli->enc_params.fps_num, cli->enc_params.fps_den);
|
||
|
||
// 2. Write accumulated audio for this GOP as single TAD packet
|
||
if (cli->has_audio && cli->gop_audio_samples > 0) {
|
||
write_audio_packet(cli->output_fp, cli, cli->gop_audio_buffer, cli->gop_audio_samples);
|
||
cli->gop_audio_samples = 0; // Reset for next GOP
|
||
}
|
||
|
||
// 3. Write video GOP packet
|
||
write_tav_packet(cli->output_fp, packet);
|
||
cli->total_bytes += packet->size;
|
||
cli->gop_count++;
|
||
|
||
// 4. Write sync packet after video packets
|
||
if (packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
|
||
int frames_in_gop = packet->data[1];
|
||
write_gop_sync_packet(cli->output_fp, frames_in_gop);
|
||
} else if (packet->packet_type == TAV_PACKET_IFRAME) {
|
||
write_sync_packet(cli->output_fp);
|
||
}
|
||
|
||
tav_encoder_free_packet(packet);
|
||
packet = NULL;
|
||
}
|
||
|
||
// Reset GOP buffer
|
||
cli->gop_frame_count = 0;
|
||
|
||
// Advance to next GOP boundary (two-pass mode)
|
||
if (cli->two_pass_mode && cli->current_gop_boundary) {
|
||
cli->current_gop_boundary = cli->current_gop_boundary->next;
|
||
}
|
||
|
||
// Progress
|
||
if (cli->verbose || cli->frame_count % 60 == 0) {
|
||
time_t elapsed = time(NULL) - cli->start_time;
|
||
double fps = elapsed > 0 ? (double)cli->frame_count / elapsed : 0.0;
|
||
double bitrate = elapsed > 0 ?
|
||
(cli->total_bytes * 8.0) / (cli->frame_count / ((double)cli->enc_params.fps_num / cli->enc_params.fps_den)) / 1000.0 : 0.0;
|
||
|
||
printf("\rFrame %ld/%ld | GOPs: %ld | %.1f fps | %.1f kbps | %zu KB",
|
||
cli->frame_count,
|
||
cli->encode_limit > 0 ? cli->encode_limit : 0L,
|
||
cli->gop_count, fps, bitrate,
|
||
cli->total_bytes / 1024);
|
||
fflush(stdout);
|
||
}
|
||
}
|
||
}
|
||
|
||
printf("\n");
|
||
|
||
// Encode remaining frames in GOP buffer (partial GOP)
|
||
if (!encoding_error && cli->gop_frame_count > 0) {
|
||
printf("Encoding final partial GOP (%d frames)...\n", cli->gop_frame_count);
|
||
|
||
int result = tav_encoder_encode_gop(ctx,
|
||
(const uint8_t**)cli->gop_frames,
|
||
cli->gop_frame_count,
|
||
cli->gop_frame_numbers,
|
||
&packet);
|
||
|
||
if (result < 0) {
|
||
fprintf(stderr, "Error encoding final GOP: %s\n", tav_encoder_get_error(ctx));
|
||
} else if (packet) {
|
||
// Write remaining packets in correct order: TIMECODE, AUDIO, VIDEO, GOP_SYNC
|
||
|
||
// 1. Write timecode
|
||
write_timecode_packet(cli->output_fp, cli->gop_frame_numbers[0],
|
||
cli->enc_params.fps_num, cli->enc_params.fps_den);
|
||
|
||
// 2. Write any remaining accumulated audio for this GOP
|
||
if (cli->has_audio && cli->gop_audio_samples > 0) {
|
||
write_audio_packet(cli->output_fp, cli, cli->gop_audio_buffer, cli->gop_audio_samples);
|
||
cli->gop_audio_samples = 0;
|
||
}
|
||
|
||
// 3. Write video packet
|
||
write_tav_packet(cli->output_fp, packet);
|
||
cli->total_bytes += packet->size;
|
||
cli->gop_count++;
|
||
|
||
// 4. Write sync packet after video packets
|
||
if (packet->packet_type == TAV_PACKET_GOP_UNIFIED) {
|
||
int frames_in_gop = packet->data[1];
|
||
write_gop_sync_packet(cli->output_fp, frames_in_gop);
|
||
} else if (packet->packet_type == TAV_PACKET_IFRAME) {
|
||
write_sync_packet(cli->output_fp);
|
||
}
|
||
|
||
tav_encoder_free_packet(packet);
|
||
}
|
||
}
|
||
|
||
// Update total frames in header (skip for still images - already set to 0xFFFFFFFF)
|
||
if (!cli->is_still_image) {
|
||
update_total_frames(cli->output_fp, (uint32_t)cli->frame_count);
|
||
}
|
||
|
||
// Update ENDT in Extended Header (skip for still images)
|
||
if (!cli->is_still_image && !cli->suppress_xhdr && cli->extended_header_offset >= 0) {
|
||
// Calculate end time in nanoseconds
|
||
uint64_t end_time_ns = (uint64_t)cli->frame_count * 1000000000ULL * cli->enc_params.fps_den / cli->enc_params.fps_num;
|
||
update_extended_header_endt(cli->output_fp, cli->extended_header_offset, end_time_ns);
|
||
}
|
||
|
||
// Cleanup
|
||
free(rgb_frame);
|
||
tav_encoder_free(ctx);
|
||
pclose(cli->ffmpeg_pipe);
|
||
|
||
// Cleanup GOP frame buffer
|
||
if (cli->gop_frames) {
|
||
for (int i = 0; i < gop_size; i++) {
|
||
free(cli->gop_frames[i]);
|
||
}
|
||
free(cli->gop_frames);
|
||
cli->gop_frames = NULL;
|
||
}
|
||
if (cli->gop_frame_numbers) {
|
||
free(cli->gop_frame_numbers);
|
||
cli->gop_frame_numbers = NULL;
|
||
}
|
||
|
||
// Cleanup audio resources
|
||
if (cli->audio_buffer) {
|
||
free(cli->audio_buffer);
|
||
cli->audio_buffer = NULL;
|
||
}
|
||
if (cli->gop_audio_buffer) {
|
||
free(cli->gop_audio_buffer);
|
||
cli->gop_audio_buffer = NULL;
|
||
}
|
||
if (cli->pcm_file) {
|
||
fclose(cli->pcm_file);
|
||
cli->pcm_file = NULL;
|
||
}
|
||
// Remove temporary audio file
|
||
if (cli->has_audio) {
|
||
unlink(TEMP_PCM_FILE);
|
||
}
|
||
|
||
// Final statistics
|
||
time_t total_time = time(NULL) - cli->start_time;
|
||
double avg_fps = total_time > 0 ? (double)cli->frame_count / total_time : 0.0;
|
||
double duration = (double)cli->frame_count / ((double)cli->enc_params.fps_num / cli->enc_params.fps_den);
|
||
double avg_bitrate = duration > 0 ? (cli->total_bytes * 8.0) / duration / 1000.0 : 0.0;
|
||
|
||
printf("\nEncoding complete!\n");
|
||
printf(" Frames encoded: %ld\n", cli->frame_count);
|
||
printf(" GOPs encoded: %ld\n", cli->gop_count);
|
||
printf(" Total size: %.2f MB\n", cli->total_bytes / (1024.0 * 1024.0));
|
||
printf(" Duration: %.2f seconds\n", duration);
|
||
printf(" Average bitrate: %.1f kbps\n", avg_bitrate);
|
||
printf(" Encoding speed: %.1f fps\n", avg_fps);
|
||
printf(" Time taken: %ld seconds\n", total_time);
|
||
|
||
return 0;
|
||
}
|
||
|
||
// =============================================================================
|
||
// Main
|
||
// =============================================================================
|
||
|
||
// Parse resolution string like "1024x768" with keyword recognition
|
||
static int parse_resolution(const char *res_str, int *width, int *height) {
|
||
if (!res_str) return 0;
|
||
if (strcmp(res_str, "cif") == 0 || strcmp(res_str, "CIF") == 0) {
|
||
*width = 352;
|
||
*height = 288;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "qcif") == 0 || strcmp(res_str, "QCIF") == 0) {
|
||
*width = 176;
|
||
*height = 144;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "vga") == 0 || strcmp(res_str, "VGA") == 0) {
|
||
*width = 640;
|
||
*height = 480;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "d1") == 0 || strcmp(res_str, "D1") == 0) {
|
||
*width = 720;
|
||
*height = 480;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "d1pal") == 0 || strcmp(res_str, "D1PAL") == 0) {
|
||
*width = 720;
|
||
*height = 576;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "960h") == 0 || strcmp(res_str, "960H") == 0) {
|
||
*width = 960;
|
||
*height = 576;
|
||
return 1;
|
||
}
|
||
// HD-ish resolutions
|
||
if (strcmp(res_str, "540p") == 0 || strcmp(res_str, "540P") == 0 || strcmp(res_str, "qHD") == 0) {
|
||
*width = 960;
|
||
*height = 540;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "720p") == 0 || strcmp(res_str, "720P") == 0 || strcmp(res_str, "wxga") == 0 || strcmp(res_str, "WXGA") == 0) {
|
||
*width = 1280;
|
||
*height = 720;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "800p") == 0 || strcmp(res_str, "800P") == 0) {
|
||
*width = 1280;
|
||
*height = 800;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "900p") == 0 || strcmp(res_str, "900P") == 0) {
|
||
*width = 1600;
|
||
*height = 900;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "960p") == 0 || strcmp(res_str, "960P") == 0 || strcmp(res_str, "wsxga") == 0 || strcmp(res_str, "WSXGA") == 0) {
|
||
*width = 1706;
|
||
*height = 960;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "1080p") == 0 || strcmp(res_str, "1080P") == 0 || strcmp(res_str, "fhd") == 0 || strcmp(res_str, "FHD") == 0 || strcmp(res_str, "wuxga") == 0 || strcmp(res_str, "WUXGA") == 0) {
|
||
*width = 1920;
|
||
*height = 1080;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "1440p") == 0 || strcmp(res_str, "1440P") == 0 || strcmp(res_str, "wqhd") == 0 || strcmp(res_str, "WQHD") == 0) {
|
||
*width = 2560;
|
||
*height = 1440;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "4k") == 0 || strcmp(res_str, "4K") == 0 || strcmp(res_str, "2160p") == 0 || strcmp(res_str, "2160p") == 0 || strcmp(res_str, "uhd") == 0 || strcmp(res_str, "UHD") == 0) {
|
||
*width = 3840;
|
||
*height = 2160;
|
||
return 1;
|
||
}
|
||
// 4K Univisium
|
||
if (strcmp(res_str, "4ku") == 0 || strcmp(res_str, "4KU") == 0) {
|
||
*width = 4096;
|
||
*height = 2048;
|
||
return 1;
|
||
}
|
||
// 3K Univisium
|
||
if (strcmp(res_str, "3ku") == 0 || strcmp(res_str, "3KU") == 0) {
|
||
*width = 3072;
|
||
*height = 1536;
|
||
return 1;
|
||
}
|
||
// 2K Univisium
|
||
if (strcmp(res_str, "2ku") == 0 || strcmp(res_str, "2KU") == 0) {
|
||
*width = 2048;
|
||
*height = 1024;
|
||
return 1;
|
||
}
|
||
// 1K Univisium
|
||
if (strcmp(res_str, "1ku") == 0 || strcmp(res_str, "1KU") == 0) {
|
||
*width = 1024;
|
||
*height = 512;
|
||
return 1;
|
||
}
|
||
// 4K DCI
|
||
if (strcmp(res_str, "4kdci") == 0 || strcmp(res_str, "4KDCI") == 0 || strcmp(res_str, "4k_dci") == 0 || strcmp(res_str, "4K_DCI") == 0 || strcmp(res_str, "4k-dci") == 0 || strcmp(res_str, "4K-DCI") == 0) {
|
||
*width = 4096;
|
||
*height = 2160;
|
||
return 1;
|
||
}
|
||
// 2.5K DCI
|
||
if (strcmp(res_str, "2.5kdci") == 0 || strcmp(res_str, "2.5KDCI") == 0 || strcmp(res_str, "2.5k_dci") == 0 || strcmp(res_str, "2.5K_DCI") == 0 || strcmp(res_str, "2.5k-dci") == 0 || strcmp(res_str, "2.5K-DCI") == 0 ||
|
||
strcmp(res_str, "2,5kdci") == 0 || strcmp(res_str, "2,5KDCI") == 0 || strcmp(res_str, "2,5k_dci") == 0 || strcmp(res_str, "2,5K_DCI") == 0 || strcmp(res_str, "2,5k-dci") == 0 || strcmp(res_str, "2,5K-DCI") == 0) {
|
||
*width = 2560;
|
||
*height = 1350;
|
||
return 1;
|
||
}
|
||
// 2K DCI
|
||
if (strcmp(res_str, "2kdci") == 0 || strcmp(res_str, "2KDCI") == 0 || strcmp(res_str, "2k_dci") == 0 || strcmp(res_str, "2K_DCI") == 0 || strcmp(res_str, "2k-dci") == 0 || strcmp(res_str, "2K-DCI") == 0) {
|
||
*width = 2048;
|
||
*height = 1080;
|
||
return 1;
|
||
}
|
||
// 1K DCI
|
||
if (strcmp(res_str, "1kdci") == 0 || strcmp(res_str, "1KDCI") == 0 || strcmp(res_str, "1k_dci") == 0 || strcmp(res_str, "1K_DCI") == 0 || strcmp(res_str, "1k-dci") == 0 || strcmp(res_str, "1K-DCI") == 0) {
|
||
*width = 1024;
|
||
*height = 540;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "half") == 0 || strcmp(res_str, "HALF") == 0) {
|
||
*width = 280;
|
||
*height = 224;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "full") == 0 || strcmp(res_str, "FULL") == 0 || strcmp(res_str, "tsvm") == 0 || strcmp(res_str, "TSVM") == 0) {
|
||
*width = 560;
|
||
*height = 448;
|
||
return 1;
|
||
}
|
||
if (strcmp(res_str, "default") == 0 || strcmp(res_str, "DEFAULT") == 0) {
|
||
*width = DEFAULT_WIDTH;
|
||
*height = DEFAULT_HEIGHT;
|
||
return 1;
|
||
}
|
||
return sscanf(res_str, "%dx%d", width, height) == 2;
|
||
}
|
||
|
||
int main(int argc, char *argv[]) {
|
||
// Generate temp file names
|
||
generate_random_filename(TEMP_AUDIO_FILE);
|
||
generate_random_filename(TEMP_PCM_FILE);
|
||
strcpy(TEMP_PCM_FILE + 37, ".pcm");
|
||
strcpy(TEMP_AUDIO_FILE + 37, ".mp2");
|
||
|
||
printf("TAV Encoder - TSVM Advanced Video Codec (Reference Implementation)\n");
|
||
printf("Using libtavenc v1.0 - Complete feature set with all encoder presets\n\n");
|
||
|
||
// Initialize CLI context
|
||
cli_context_t cli = {0};
|
||
|
||
// Initialize encoder params with defaults
|
||
tav_encoder_params_init(&cli.enc_params, DEFAULT_WIDTH, DEFAULT_HEIGHT);
|
||
|
||
// Force EZBC entropy coder (Twobitmap is deprecated)
|
||
cli.enc_params.entropy_coder = 1; // Always use EZBC
|
||
|
||
// Ensure two-pass scene detection is enabled by default
|
||
cli.enc_params.enable_two_pass = 1;
|
||
|
||
// Initialize audio defaults
|
||
cli.has_audio = 1; // Enabled by default
|
||
cli.audio_quality = -1; // Will match video quality if not specified
|
||
cli.use_native_audio = 0; // TAD by default
|
||
|
||
// Initialize threading
|
||
cli.num_threads = get_default_thread_count();
|
||
|
||
// Command-line options
|
||
static struct option long_options[] = {
|
||
{"input", required_argument, 0, 'i'},
|
||
{"output", required_argument, 0, 'o'},
|
||
{"size", required_argument, 0, 's'},
|
||
{"fps", required_argument, 0, 'f'},
|
||
{"quality", required_argument, 0, 'q'},
|
||
{"quantiser", required_argument, 0, 'Q'},
|
||
{"wavelet", required_argument, 0, 'w'},
|
||
{"temporal-wavelet", required_argument, 0, 1021},
|
||
{"colour-space", required_argument, 0, 'c'},
|
||
{"verbose", no_argument, 0, 'v'},
|
||
{"intra-only", no_argument, 0, 1001},
|
||
{"temporal-dwt", no_argument, 0, 1002},
|
||
{"gop-size", required_argument, 0, 1003},
|
||
{"single-pass", no_argument, 0, 1004},
|
||
{"zstd-level", required_argument, 0, 1005},
|
||
{"no-perceptual-tuning", no_argument, 0, 1006},
|
||
{"no-dead-zone", no_argument, 0, 1007},
|
||
{"dead-zone-threshold", required_argument, 0, 1023},
|
||
{"decomp-levels", required_argument, 0, 1024},
|
||
{"temporal-levels", required_argument, 0, 1025},
|
||
{"encode-limit", required_argument, 0, 1009},
|
||
{"subtitle", required_argument, 0, 1010},
|
||
{"fontrom-low", required_argument, 0, 1011},
|
||
{"fontrom-high", required_argument, 0, 1012},
|
||
{"tad-audio", no_argument, 0, 1013},
|
||
{"pcm8-audio", no_argument, 0, 1014},
|
||
{"separate-audio-track", no_argument, 0, 1015},
|
||
{"audio-quality", required_argument, 0, 1016},
|
||
{"no-audio", no_argument, 0, 1017},
|
||
{"preset-sports", no_argument, 0, 1026},
|
||
{"preset-anime", no_argument, 0, 1027},
|
||
{"monoblock", no_argument, 0, 1028},
|
||
{"tiled", no_argument, 0, 1029},
|
||
{"suppress-xhdr", no_argument, 0, 1030},
|
||
{"threads", required_argument, 0, 't'},
|
||
{"interlaced", no_argument, 0, 1031},
|
||
{"help", no_argument, 0, '?'},
|
||
{0, 0, 0, 0}
|
||
};
|
||
|
||
|
||
// Probe video to get resolution and framerate
|
||
int need_probe_dimensions = 0;
|
||
int need_probe_fps = 1;
|
||
|
||
int c, option_index = 0;
|
||
while ((c = getopt_long(argc, argv, "i:o:s:f:q:Q:w:c:t:v?", long_options, &option_index)) != -1) {
|
||
switch (c) {
|
||
case 'i':
|
||
cli.input_file = strdup(optarg);
|
||
break;
|
||
case 'o':
|
||
cli.output_file = strdup(optarg);
|
||
break;
|
||
case 's': {
|
||
if (strcmp(optarg, "original") == 0 || strcmp(optarg, "ORIGINAL") == 0) {
|
||
need_probe_dimensions = 1;
|
||
break;
|
||
}
|
||
if (!parse_resolution(optarg, &cli.enc_params.width, &cli.enc_params.height)) {
|
||
fprintf(stderr, "Invalid resolution format: %s\n", optarg);
|
||
return 1;
|
||
}
|
||
break;
|
||
}
|
||
case 'f': {
|
||
int num, den = 1;
|
||
if (sscanf(optarg, "%d/%d", &num, &den) < 1) {
|
||
fprintf(stderr, "Error: Invalid fps format. Use NUM or NUM/DEN\n");
|
||
return 1;
|
||
}
|
||
// Keep need_probe_fps = 1 so we always probe source fps
|
||
// (needed for minterpolate vs fps filter decision)
|
||
cli.target_fps_num = num;
|
||
cli.target_fps_den = den;
|
||
cli.enc_params.fps_num = num;
|
||
cli.enc_params.fps_den = den;
|
||
break;
|
||
}
|
||
case 'q': {
|
||
int q = atoi(optarg);
|
||
if (q < 0 || q > 5) {
|
||
fprintf(stderr, "Error: Quality must be 0-5\n");
|
||
return 1;
|
||
}
|
||
// Convert quality level to quantiser indices
|
||
cli.enc_params.quality_level = q;
|
||
cli.enc_params.quantiser_y = QUALITY_Y[q];
|
||
cli.enc_params.quantiser_co = QUALITY_CO[q];
|
||
cli.enc_params.quantiser_cg = QUALITY_CG[q];
|
||
cli.enc_params.dead_zone_threshold = DEAD_ZONE_THRESHOLD[q];
|
||
break;
|
||
}
|
||
case 'Q': {
|
||
int y, co, cg;
|
||
if (sscanf(optarg, "%d,%d,%d", &y, &co, &cg) != 3) {
|
||
fprintf(stderr, "Error: Invalid quantiser format. Use Y,Co,Cg\n");
|
||
return 1;
|
||
}
|
||
cli.enc_params.quantiser_y = y;
|
||
cli.enc_params.quantiser_co = co;
|
||
cli.enc_params.quantiser_cg = cg;
|
||
break;
|
||
}
|
||
case 'w':
|
||
cli.enc_params.wavelet_type = atoi(optarg);
|
||
break;
|
||
case 'c':
|
||
cli.enc_params.channel_layout = atoi(optarg);
|
||
break;
|
||
case 'v':
|
||
cli.verbose = 1;
|
||
cli.enc_params.verbose = 1;
|
||
break;
|
||
case 1001: // --intra-only
|
||
cli.enc_params.enable_temporal_dwt = 0;
|
||
break;
|
||
case 1002: // --temporal-dwt
|
||
cli.enc_params.enable_temporal_dwt = 1;
|
||
break;
|
||
case 1003: // --gop-size
|
||
cli.enc_params.gop_size = atoi(optarg);
|
||
break;
|
||
case 1004: // --single-pass
|
||
cli.enc_params.enable_two_pass = 0;
|
||
break;
|
||
case 1005: // --zstd-level
|
||
cli.enc_params.zstd_level = atoi(optarg);
|
||
break;
|
||
case 1006: // --no-perceptual-tuning
|
||
cli.enc_params.perceptual_tuning = 0;
|
||
break;
|
||
case 1007: // --no-dead-zone
|
||
cli.enc_params.dead_zone_threshold = 0.0;
|
||
break;
|
||
case 1009: // --encode-limit
|
||
cli.encode_limit = atoi(optarg);
|
||
break;
|
||
case 1010: // --subtitle
|
||
cli.subtitle_file = strdup(optarg);
|
||
break;
|
||
case 1011: // --fontrom-low
|
||
cli.fontrom_low = strdup(optarg);
|
||
break;
|
||
case 1012: // --fontrom-high
|
||
cli.fontrom_high = strdup(optarg);
|
||
break;
|
||
case 1013: // --tad-audio
|
||
cli.use_native_audio = 0;
|
||
break;
|
||
case 1014: // --pcm8-audio
|
||
cli.use_native_audio = 1;
|
||
break;
|
||
case 1015: // --separate-audio-track
|
||
cli.separate_audio_track = 1;
|
||
break;
|
||
case 1016: // --audio-quality
|
||
cli.audio_quality = atoi(optarg);
|
||
if (cli.audio_quality < 0 || cli.audio_quality > 5) {
|
||
fprintf(stderr, "Error: Audio quality must be 0-5\n");
|
||
return 1;
|
||
}
|
||
break;
|
||
case 1017: // --no-audio
|
||
cli.has_audio = 0;
|
||
break;
|
||
case 1021: // --temporal-wavelet
|
||
cli.enc_params.temporal_wavelet = atoi(optarg);
|
||
break;
|
||
case 1023: // --dead-zone-threshold
|
||
cli.enc_params.dead_zone_threshold = atof(optarg);
|
||
break;
|
||
case 1024: // --decomp-levels
|
||
cli.enc_params.decomp_levels = atoi(optarg);
|
||
break;
|
||
case 1025: // --temporal-levels
|
||
cli.enc_params.temporal_levels = atoi(optarg);
|
||
break;
|
||
case 1026: // --preset-sports
|
||
cli.enc_params.encoder_preset |= 0x01;
|
||
break;
|
||
case 1027: // --preset-anime
|
||
cli.enc_params.encoder_preset |= 0x02;
|
||
break;
|
||
case 1028: // --monoblock
|
||
cli.enc_params.monoblock = 1;
|
||
break;
|
||
case 1029: // --tiled
|
||
cli.enc_params.monoblock = 0;
|
||
break;
|
||
case 1030: // --suppress-xhdr
|
||
cli.suppress_xhdr = 1;
|
||
break;
|
||
case 1031: // --interlaced
|
||
cli.interlaced = 1;
|
||
break;
|
||
case 't': { // --threads
|
||
int threads = atoi(optarg);
|
||
if (threads < 0) {
|
||
fprintf(stderr, "Error: Thread count must be positive\n");
|
||
return 1;
|
||
}
|
||
// Both 0 and 1 mean single-threaded (use value 0 internally)
|
||
cli.num_threads = (threads <= 1) ? 0 : threads;
|
||
break;
|
||
}
|
||
case '?':
|
||
default:
|
||
print_usage(argv[0]);
|
||
return (c == '?') ? 0 : 1;
|
||
}
|
||
}
|
||
|
||
// Validate required arguments
|
||
if (!cli.input_file || !cli.output_file) {
|
||
fprintf(stderr, "Error: Input and output files are required\n\n");
|
||
print_usage(argv[0]);
|
||
return 1;
|
||
}
|
||
|
||
// Detect still images (TAP mode)
|
||
int still_image_check = is_input_still_image(cli.input_file);
|
||
if (still_image_check > 0) {
|
||
cli.is_still_image = 1;
|
||
printf("Detected still image - encoding as TAP format\n");
|
||
|
||
// Force single-threaded mode for still images (override user option)
|
||
if (cli.num_threads > 0) {
|
||
printf(" Disabling multithreading for still image\n");
|
||
cli.num_threads = 0;
|
||
}
|
||
|
||
// Force intra-only mode (no temporal DWT)
|
||
cli.enc_params.enable_temporal_dwt = 0;
|
||
|
||
// Disable audio for still images by default
|
||
if (cli.has_audio) {
|
||
printf(" Disabling audio for still image\n");
|
||
cli.has_audio = 0;
|
||
}
|
||
|
||
// Force encode limit to 1 frame
|
||
cli.encode_limit = 1;
|
||
}
|
||
|
||
if (need_probe_dimensions || need_probe_fps) {
|
||
printf("Probing input file...\n");
|
||
if (get_video_info(cli.input_file,
|
||
&cli.original_width, &cli.original_height,
|
||
&cli.original_fps_num, &cli.original_fps_den) < 0) {
|
||
return 1;
|
||
}
|
||
|
||
// Use probed dimensions if not specified by -s
|
||
if (need_probe_dimensions) {
|
||
cli.enc_params.width = cli.original_width;
|
||
cli.enc_params.height = cli.original_height;
|
||
printf(" Resolution: %dx%d\n", cli.original_width, cli.original_height);
|
||
}
|
||
|
||
// Always print source framerate
|
||
printf(" Framerate: %d/%d\n", cli.original_fps_num, cli.original_fps_den);
|
||
|
||
// Use probed framerate if not specified by -f
|
||
if (cli.target_fps_num == 0) {
|
||
cli.enc_params.fps_num = cli.original_fps_num;
|
||
cli.enc_params.fps_den = cli.original_fps_den;
|
||
}
|
||
}
|
||
|
||
// Handle interlaced mode: store full height for header, use half-height internally
|
||
if (cli.interlaced) {
|
||
// Store full height for the header
|
||
cli.header_height = cli.enc_params.height;
|
||
// Use half-height internally (FFmpeg will output half-height frames)
|
||
cli.enc_params.height = cli.enc_params.height / 2;
|
||
printf("Interlaced mode: header=%dx%d, internal=%dx%d\n",
|
||
cli.enc_params.width, cli.header_height,
|
||
cli.enc_params.width, cli.enc_params.height);
|
||
} else {
|
||
// Progressive mode: header_height equals internal height
|
||
cli.header_height = cli.enc_params.height;
|
||
}
|
||
|
||
// Report fps conversion if enabled
|
||
if (cli.target_fps_num > 0 && cli.original_fps_num > 0) {
|
||
long long target_rate = (long long)cli.target_fps_num * cli.original_fps_den;
|
||
long long source_rate = (long long)cli.original_fps_num * cli.target_fps_den;
|
||
|
||
if (target_rate > source_rate) {
|
||
printf("Framerate conversion: %d/%d -> %d/%d (minterpolate)\n",
|
||
cli.original_fps_num, cli.original_fps_den,
|
||
cli.target_fps_num, cli.target_fps_den);
|
||
} else if (target_rate < source_rate) {
|
||
printf("Framerate conversion: %d/%d -> %d/%d (fps)\n",
|
||
cli.original_fps_num, cli.original_fps_den,
|
||
cli.target_fps_num, cli.target_fps_den);
|
||
}
|
||
// If equal, no message needed (no conversion)
|
||
} else if (cli.target_fps_num > 0) {
|
||
printf("Output framerate: %d/%d\n", cli.target_fps_num, cli.target_fps_den);
|
||
}
|
||
|
||
// Set audio quality to match video quality if not specified
|
||
if (cli.audio_quality < 0) {
|
||
cli.audio_quality = cli.enc_params.quality_level; // Match luma quality
|
||
}
|
||
|
||
// Extract audio if enabled
|
||
if (cli.has_audio && !cli.use_native_audio) {
|
||
printf("Extracting audio...\n");
|
||
if (extract_audio_to_file(cli.input_file, TEMP_PCM_FILE)) {
|
||
cli.pcm_file = fopen(TEMP_PCM_FILE, "rb");
|
||
if (cli.pcm_file) {
|
||
fseek(cli.pcm_file, 0, SEEK_END);
|
||
cli.audio_remaining = ftell(cli.pcm_file);
|
||
fseek(cli.pcm_file, 0, SEEK_SET);
|
||
|
||
// Calculate samples per frame (accounting for fractional fps via fps_den)
|
||
cli.samples_per_frame = (AUDIO_SAMPLE_RATE * cli.enc_params.fps_den + cli.enc_params.fps_num - 1) / cli.enc_params.fps_num;
|
||
|
||
// Allocate per-frame audio buffer
|
||
cli.audio_buffer_size = cli.samples_per_frame * 2; // Stereo
|
||
cli.audio_buffer = malloc(cli.audio_buffer_size * sizeof(float));
|
||
|
||
// Note: GOP audio buffer will be allocated in encode_video() after encoder creation
|
||
// when we know the actual GOP size
|
||
|
||
printf(" Audio: TAD quality %d, %d samples/frame\n",
|
||
cli.audio_quality, cli.samples_per_frame);
|
||
} else {
|
||
fprintf(stderr, "Warning: Failed to open extracted audio, encoding without audio\n");
|
||
cli.has_audio = 0;
|
||
}
|
||
} else {
|
||
fprintf(stderr, "Warning: No audio stream found or extraction failed\n");
|
||
cli.has_audio = 0;
|
||
}
|
||
}
|
||
|
||
// Parse subtitle file if provided
|
||
if (cli.subtitle_file) {
|
||
printf("Parsing subtitles: %s\n", cli.subtitle_file);
|
||
cli.subtitles = parse_srt_file(cli.subtitle_file);
|
||
if (cli.subtitles) {
|
||
// Count subtitles
|
||
int count = 0;
|
||
subtitle_entry_t *sub = cli.subtitles;
|
||
while (sub) {
|
||
count++;
|
||
sub = sub->next;
|
||
}
|
||
printf(" Loaded %d subtitles\n", count);
|
||
} else {
|
||
fprintf(stderr, "Warning: Failed to parse subtitle file\n");
|
||
}
|
||
}
|
||
|
||
// Initialize Extended Header metadata
|
||
cli.ffmpeg_version = get_ffmpeg_version(); // May return NULL if FFmpeg not found
|
||
struct timespec ts;
|
||
if (clock_gettime(CLOCK_REALTIME, &ts) == 0) {
|
||
cli.creation_time_us = (uint64_t)ts.tv_sec * 1000000ULL + (uint64_t)ts.tv_nsec / 1000ULL;
|
||
} else {
|
||
// Fallback to time() if clock_gettime fails
|
||
cli.creation_time_us = (uint64_t)time(NULL) * 1000000ULL;
|
||
}
|
||
|
||
// Open output file
|
||
cli.output_fp = fopen(cli.output_file, "wb");
|
||
if (!cli.output_fp) {
|
||
fprintf(stderr, "Error: Failed to open output file: %s\n", cli.output_file);
|
||
return 1;
|
||
}
|
||
|
||
// Two-pass scene change detection (if enabled and temporal DWT is active)
|
||
if (cli.enc_params.enable_two_pass && cli.enc_params.enable_temporal_dwt && !cli.is_still_image) {
|
||
if (two_pass_first_pass(&cli) == 0) {
|
||
cli.two_pass_mode = 1;
|
||
cli.current_gop_boundary = cli.gop_boundaries; // Start at first GOP
|
||
printf("Two-pass mode: Using adaptive GOP sizes based on scene detection\n");
|
||
} else {
|
||
fprintf(stderr, "Warning: Two-pass analysis failed, falling back to single-pass\n");
|
||
cli.two_pass_mode = 0;
|
||
}
|
||
} else {
|
||
cli.two_pass_mode = 0;
|
||
if (cli.enc_params.enable_two_pass && !cli.enc_params.enable_temporal_dwt) {
|
||
printf("Note: Two-pass mode requires temporal DWT (disabled in intra-only mode)\n");
|
||
}
|
||
}
|
||
|
||
// Encode video
|
||
int result = encode_video(&cli);
|
||
|
||
// Print output file before cleanup frees the string
|
||
if (result >= 0) {
|
||
printf("\nOutput written to: %s\n", cli.output_file);
|
||
}
|
||
|
||
// Cleanup
|
||
fclose(cli.output_fp);
|
||
free(cli.input_file);
|
||
free(cli.output_file);
|
||
if (cli.subtitle_file) {
|
||
free(cli.subtitle_file);
|
||
}
|
||
if (cli.subtitles) {
|
||
free_subtitle_list(cli.subtitles);
|
||
}
|
||
if (cli.fontrom_low) {
|
||
free(cli.fontrom_low);
|
||
}
|
||
if (cli.fontrom_high) {
|
||
free(cli.fontrom_high);
|
||
}
|
||
if (cli.ffmpeg_version) {
|
||
free(cli.ffmpeg_version);
|
||
}
|
||
|
||
// Cleanup two-pass data structures
|
||
if (cli.frame_analyses) {
|
||
free(cli.frame_analyses);
|
||
}
|
||
if (cli.gop_boundaries) {
|
||
free_gop_boundaries(cli.gop_boundaries);
|
||
}
|
||
|
||
if (result < 0) {
|
||
fprintf(stderr, "Encoding failed\n");
|
||
return 1;
|
||
}
|
||
|
||
return 0;
|
||
}
|