mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-06 05:28:31 +09:00
TAV: two-pass GOP slicer
This commit is contained in:
@@ -487,6 +487,7 @@ let trueFrameCount = 0
|
||||
let stopPlay = false
|
||||
let akku = FRAME_TIME
|
||||
let akku2 = 0.0
|
||||
let firstFrameIssued = false // Track when first frame has been displayed
|
||||
let nextFrameTime = 0 // Absolute time when next frame should display (nanoseconds)
|
||||
let currentFileIndex = 1 // Track which file we're playing in concatenated stream
|
||||
let totalFilesProcessed = 0
|
||||
@@ -825,6 +826,7 @@ try {
|
||||
frameCount = 0
|
||||
akku = FRAME_TIME
|
||||
akku2 = 0.0
|
||||
firstFrameIssued = false
|
||||
audio.purgeQueue(0)
|
||||
if (paused) {
|
||||
audio.play(0)
|
||||
@@ -844,6 +846,7 @@ try {
|
||||
frameCount = 0
|
||||
akku = FRAME_TIME
|
||||
akku2 = 0.0
|
||||
firstFrameIssued = false
|
||||
audio.purgeQueue(0)
|
||||
if (paused) {
|
||||
audio.play(0)
|
||||
@@ -863,6 +866,7 @@ try {
|
||||
frameCount = seekTarget.frameNum
|
||||
akku = FRAME_TIME
|
||||
akku2 -= 5.5
|
||||
firstFrameIssued = false
|
||||
audio.purgeQueue(0)
|
||||
if (paused) {
|
||||
audio.play(0)
|
||||
@@ -890,6 +894,7 @@ try {
|
||||
frameCount = seekTarget.frameNum
|
||||
akku = FRAME_TIME
|
||||
akku2 += 5.0
|
||||
firstFrameIssued = false
|
||||
audio.purgeQueue(0)
|
||||
if (paused) {
|
||||
audio.play(0)
|
||||
@@ -925,6 +930,7 @@ try {
|
||||
frameCount = 0
|
||||
akku = 0.0
|
||||
akku2 = 0.0
|
||||
firstFrameIssued = false
|
||||
FRAME_TIME = 1.0 / header.fps
|
||||
audio.purgeQueue(0)
|
||||
currentFileIndex++
|
||||
@@ -952,19 +958,16 @@ try {
|
||||
}
|
||||
|
||||
if (packetType === TAV_PACKET_SYNC || packetType == TAV_PACKET_SYNC_NTSC) {
|
||||
// Sync packet - no additional data (for I/P frames, not GOPs)
|
||||
akku -= FRAME_TIME
|
||||
if (packetType == TAV_PACKET_SYNC) {
|
||||
frameCount++
|
||||
}
|
||||
|
||||
trueFrameCount++
|
||||
|
||||
// Swap ping-pong buffers
|
||||
let temp = CURRENT_RGB_ADDR
|
||||
CURRENT_RGB_ADDR = PREV_RGB_ADDR
|
||||
PREV_RGB_ADDR = temp
|
||||
|
||||
// SYNC packets are vestigial in TAV's time-based playback model
|
||||
// (legacy from TEV's synchronous display model)
|
||||
//
|
||||
// Frame display timing is controlled by nextFrameTime, not SYNC packets:
|
||||
// - I/P frames: Display logic at line 1553-1597
|
||||
// - GOP frames: Display logic at line 1600-1684
|
||||
//
|
||||
// NTSC sync (frame duplication): Handled automatically by audio queue timing
|
||||
//
|
||||
// Do nothing - skip to next packet
|
||||
}
|
||||
else if (packetType === TAV_PACKET_IFRAME || packetType === TAV_PACKET_PFRAME) {
|
||||
// Record I-frame position for seeking
|
||||
@@ -1072,7 +1075,7 @@ try {
|
||||
const gopSize = seqread.readOneByte()
|
||||
const compressedSize = seqread.readInt()
|
||||
let compressedPtr = seqread.readBytes(compressedSize)
|
||||
updateDataRateBin(compressedSize)
|
||||
updateDataRateBin(compressedSize / gopSize)
|
||||
|
||||
// TRIPLE-BUFFERING LOGIC (3 slots: playing, ready, decoding):
|
||||
// - If no GOP playing: decode first GOP to slot 0
|
||||
@@ -1456,7 +1459,7 @@ try {
|
||||
} // end of !paused packet read block
|
||||
|
||||
let t2 = sys.nanoTime()
|
||||
if (!paused) {
|
||||
if (!paused && firstFrameIssued) {
|
||||
// Only accumulate time if we have a GOP to play
|
||||
// Don't accumulate during first GOP decode or we'll get fast playback
|
||||
if (currentGopSize > 0) {
|
||||
@@ -1484,8 +1487,12 @@ try {
|
||||
currentGopBufferSlot = asyncDecodeSlot
|
||||
asyncDecodeInProgress = false
|
||||
|
||||
// Set first frame time to NOW
|
||||
// Initialize timing ONLY if this is the very first frame of the video
|
||||
// If we're transitioning from I-frames, preserve timing continuity
|
||||
if (nextFrameTime === 0) {
|
||||
nextFrameTime = sys.nanoTime()
|
||||
}
|
||||
// Otherwise keep existing nextFrameTime from previous I-frame/GOP
|
||||
|
||||
// Resume packet reading only if not all 3 buffers are full
|
||||
// (might have buffered GOP 2 and 3 during GOP 1 decode)
|
||||
@@ -1577,6 +1584,11 @@ try {
|
||||
audioFired = true
|
||||
}
|
||||
|
||||
// Mark first frame as issued (starts akku/akku2 timers)
|
||||
if (!firstFrameIssued) {
|
||||
firstFrameIssued = true
|
||||
}
|
||||
|
||||
frameCount++
|
||||
trueFrameCount++
|
||||
iframeReady = false
|
||||
@@ -1586,7 +1598,7 @@ try {
|
||||
CURRENT_RGB_ADDR = PREV_RGB_ADDR
|
||||
PREV_RGB_ADDR = temp
|
||||
|
||||
// Schedule next frame
|
||||
// Schedule next frame (advance AFTER display, consistent with GOP timing)
|
||||
nextFrameTime += (frametime) // frametime is in nanoseconds from header
|
||||
|
||||
// Log performance data every 60 frames
|
||||
@@ -1628,6 +1640,11 @@ try {
|
||||
audioFired = true
|
||||
}
|
||||
|
||||
// Mark first frame as issued (starts akku/akku2 timers)
|
||||
if (!firstFrameIssued) {
|
||||
firstFrameIssued = true
|
||||
}
|
||||
|
||||
currentGopFrameIndex++
|
||||
frameCount++
|
||||
trueFrameCount++
|
||||
@@ -1836,7 +1853,7 @@ try {
|
||||
con.color_pair(253, 0)
|
||||
let guiStatus = {
|
||||
fps: header.fps,
|
||||
videoRate: getVideoRate(),
|
||||
videoRate: getVideoRate().toFixed(0),
|
||||
frameCount: frameCount,
|
||||
totalFrames: header.totalFrames,
|
||||
frameMode: decoderDbgInfo.frameMode,
|
||||
|
||||
@@ -135,10 +135,23 @@ static int needs_alpha_channel(int channel_layout) {
|
||||
#define TEMPORAL_GOP_SIZE_MIN 10 // Minimum GOP size to avoid decoder hiccups
|
||||
#define TEMPORAL_DECOMP_LEVEL 2
|
||||
|
||||
// Single-pass scene change detection constants
|
||||
#define SCENE_CHANGE_THRESHOLD_SOFT 0.72
|
||||
#define SCENE_CHANGE_THRESHOLD_HARD 0.90
|
||||
#define MOTION_THRESHOLD 24.0f // Flush if motion exceeds 24 pixels in any direction
|
||||
|
||||
// Two-pass scene change detection constants
|
||||
#define ANALYSIS_SUBSAMPLE_FACTOR 4 // Subsample to 1/4 resolution for speed
|
||||
#define ANALYSIS_DWT_LEVELS 3 // 3-level Haar DWT for analysis
|
||||
#define ANALYSIS_MOVING_WINDOW 30 // Moving average window (30 frames = ~1 second)
|
||||
#define ANALYSIS_STDDEV_MULTIPLIER 2.0 // Standard deviation multiplier for adaptive threshold (balanced sensitivity)
|
||||
#define ANALYSIS_LL_DIFF_MIN_THRESHOLD 2.0 // Minimum absolute threshold for LL_diff (avoid false positives)
|
||||
#define ANALYSIS_HB_RATIO_THRESHOLD 0.70 // Highband energy ratio threshold (balanced for scene cuts)
|
||||
#define ANALYSIS_HB_ENERGY_MULTIPLIER 2.5 // Energy spike multiplier (2.5× mean to trigger)
|
||||
#define ANALYSIS_FADE_THRESHOLD 50.0 // Brightness change threshold over 5 frames
|
||||
#define ANALYSIS_GOP_MIN_SIZE 10 // Minimum GOP size for two-pass mode. Keep it same as default settings.
|
||||
#define ANALYSIS_GOP_MAX_SIZE 24 // Maximum GOP size for two-pass mode. Keep it same as default settings.
|
||||
|
||||
// Audio/subtitle constants (reused from TEV)
|
||||
#define TSVM_AUDIO_SAMPLE_RATE 32000
|
||||
#define MP2_DEFAULT_PACKET_SIZE 1152
|
||||
@@ -156,6 +169,41 @@ typedef struct subtitle_entry {
|
||||
struct subtitle_entry *next;
|
||||
} subtitle_entry_t;
|
||||
|
||||
// Frame analysis metrics for two-pass scene change detection
|
||||
typedef struct frame_analysis {
|
||||
int frame_number;
|
||||
|
||||
// Wavelet-based metrics (3-level Haar on subsampled frame)
|
||||
double ll_diff; // L1 distance between consecutive LL bands
|
||||
double ll_mean; // Mean brightness (LL band average)
|
||||
double ll_variance; // Contrast estimate (LL band variance)
|
||||
|
||||
double highband_energy; // Sum of absolute values in LH/HL/HH bands
|
||||
double total_energy; // Total energy (all bands)
|
||||
double highband_ratio; // highband_energy / total_energy
|
||||
|
||||
// Per-band entropies (Shannon entropy of coefficient magnitudes)
|
||||
double entropy_ll;
|
||||
double entropy_lh[ANALYSIS_DWT_LEVELS];
|
||||
double entropy_hl[ANALYSIS_DWT_LEVELS];
|
||||
double entropy_hh[ANALYSIS_DWT_LEVELS];
|
||||
|
||||
// Texture change indicators
|
||||
double zero_crossing_rate; // Zero crossing rate in highbands
|
||||
|
||||
// Detection results
|
||||
int is_scene_change; // Final scene change flag
|
||||
double scene_change_score; // Composite score for debugging
|
||||
} frame_analysis_t;
|
||||
|
||||
// GOP boundary list for two-pass encoding
|
||||
typedef struct gop_boundary {
|
||||
int start_frame;
|
||||
int end_frame;
|
||||
int num_frames;
|
||||
struct gop_boundary *next;
|
||||
} gop_boundary_t;
|
||||
|
||||
static void generate_random_filename(char *filename) {
|
||||
srand(time(NULL));
|
||||
|
||||
@@ -1916,6 +1964,16 @@ typedef struct tav_encoder_s {
|
||||
uint64_t creation_time_us; // Creation time in nanoseconds since UNIX epoch
|
||||
long extended_header_offset; // File offset of extended header for ENDT update
|
||||
|
||||
// Two-pass scene change detection
|
||||
int two_pass_mode; // Enable two-pass encoding (0=disabled, 1=enabled)
|
||||
frame_analysis_t *frame_analyses; // Array of frame analysis metrics (first pass)
|
||||
int frame_analyses_capacity; // Allocated capacity
|
||||
int frame_analyses_count; // Current number of analyzed frames
|
||||
gop_boundary_t *gop_boundaries; // Linked list of GOP boundaries (computed in first pass)
|
||||
gop_boundary_t *current_gop_boundary; // Current GOP being encoded (second pass)
|
||||
int two_pass_current_frame; // Current frame number in second pass
|
||||
char *two_pass_analysis_file; // Temporary file for storing analysis data (NULL = in-memory)
|
||||
|
||||
} tav_encoder_t;
|
||||
|
||||
// Wavelet filter constants removed - using lifting scheme implementation instead
|
||||
@@ -2296,6 +2354,7 @@ static void show_usage(const char *program_name) {
|
||||
printf(" --enable-delta Enable delta encoding\n");
|
||||
printf(" --delta-haar N Apply N-level Haar DWT to delta coefficients (1-6, auto-enables delta)\n");
|
||||
printf(" --3d-dwt Enable temporal 3D DWT (GOP-based encoding with temporal transform)\n");
|
||||
printf(" --single-pass Disable two-pass encoding with wavelet-based scene change detection (optimal GOP boundaries)\n");
|
||||
printf(" --mc-ezbc Enable MC-EZBC block-based motion compensation (requires --temporal-dwt, implies --ezbc)\n");
|
||||
printf(" --ezbc Enable EZBC (Embedded Zero Block Coding) entropy coding\n");
|
||||
printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
|
||||
@@ -2446,6 +2505,16 @@ static tav_encoder_t* create_encoder(void) {
|
||||
enc->residual_coding_lookahead_buffer_cg = NULL;
|
||||
enc->residual_coding_lookahead_buffer_display_index = NULL;
|
||||
|
||||
// Two-pass mode initialization
|
||||
enc->two_pass_mode = 1; // enable by default
|
||||
enc->frame_analyses = NULL;
|
||||
enc->frame_analyses_capacity = 0;
|
||||
enc->frame_analyses_count = 0;
|
||||
enc->gop_boundaries = NULL;
|
||||
enc->current_gop_boundary = NULL;
|
||||
enc->two_pass_current_frame = 0;
|
||||
enc->two_pass_analysis_file = NULL;
|
||||
|
||||
return enc;
|
||||
}
|
||||
|
||||
@@ -4838,6 +4907,24 @@ static int gop_should_flush_motion(tav_encoder_t *enc) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if GOP should be flushed based on pre-computed boundaries (two-pass mode)
|
||||
static int gop_should_flush_twopass(tav_encoder_t *enc, int current_frame_number) {
|
||||
if (!enc->two_pass_mode || !enc->current_gop_boundary) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if we've reached the end of the current GOP
|
||||
if (current_frame_number >= enc->current_gop_boundary->end_frame) {
|
||||
if (enc->verbose) {
|
||||
printf(" Two-pass: GOP boundary reached (frame %d, end=%d)\n",
|
||||
current_frame_number, enc->current_gop_boundary->end_frame);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Flush GOP: apply 3D DWT, quantize, serialise, and write to output
|
||||
// Returns number of bytes written, or 0 on error
|
||||
// This function processes the entire GOP and writes all frames with temporal 3D DWT
|
||||
@@ -5396,8 +5483,8 @@ static size_t gop_process_and_flush(tav_encoder_t *enc, FILE *output, int base_q
|
||||
int actual_gop_size = enc->temporal_gop_frame_count;
|
||||
int scene_change_frame = -1;
|
||||
|
||||
// Check for scene changes within the GOP
|
||||
if (!force_flush) {
|
||||
// Check for scene changes within the GOP (skip in two-pass mode - boundaries are pre-computed)
|
||||
if (!force_flush && !enc->two_pass_mode) {
|
||||
for (int i = 1; i < enc->temporal_gop_frame_count; i++) {
|
||||
// Compare consecutive frames using unified scene change detection
|
||||
double avg_diff, changed_ratio;
|
||||
@@ -9448,6 +9535,647 @@ static int detect_scene_change(tav_encoder_t *enc, double *out_changed_ratio) {
|
||||
return is_scene_change;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Two-Pass Scene Change Detection - Wavelet-based Analysis
|
||||
// =============================================================================
|
||||
|
||||
// Fast subsampled 2D Haar DWT for analysis (works in-place)
|
||||
// Performs N-level 2D Haar transform on subsampled grayscale data
|
||||
static void analysis_haar_2d_forward(float *data, int width, int height, int levels) {
|
||||
float *temp = malloc((width > height ? width : height) * sizeof(float));
|
||||
|
||||
for (int level = 0; level < levels; level++) {
|
||||
int current_width = width >> level;
|
||||
int current_height = height >> level;
|
||||
|
||||
if (current_width < 2 || current_height < 2) break;
|
||||
|
||||
// Horizontal pass
|
||||
for (int y = 0; y < current_height; y++) {
|
||||
for (int x = 0; x < current_width; x++) {
|
||||
temp[x] = data[y * width + x];
|
||||
}
|
||||
dwt_haar_forward_1d(temp, current_width);
|
||||
for (int x = 0; x < current_width; x++) {
|
||||
data[y * width + x] = temp[x];
|
||||
}
|
||||
}
|
||||
|
||||
// Vertical pass
|
||||
for (int x = 0; x < current_width; x++) {
|
||||
for (int y = 0; y < current_height; y++) {
|
||||
temp[y] = data[y * width + x];
|
||||
}
|
||||
dwt_haar_forward_1d(temp, current_height);
|
||||
for (int y = 0; y < current_height; y++) {
|
||||
data[y * width + x] = temp[y];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// Subsample RGB frame to grayscale for analysis (1/N resolution)
|
||||
// Returns newly allocated buffer
|
||||
static float* subsample_frame_to_gray(const uint8_t *rgb_frame, int width, int height, int factor) {
|
||||
int sub_width = width / factor;
|
||||
int sub_height = height / factor;
|
||||
float *gray = malloc(sub_width * sub_height * sizeof(float));
|
||||
|
||||
for (int y = 0; y < sub_height; y++) {
|
||||
for (int x = 0; x < sub_width; x++) {
|
||||
// Sample center pixel of each block
|
||||
int src_x = x * factor + factor / 2;
|
||||
int src_y = y * factor + factor / 2;
|
||||
int src_idx = (src_y * width + src_x) * 3;
|
||||
|
||||
// Convert to grayscale using standard weights
|
||||
float r = rgb_frame[src_idx + 0];
|
||||
float g = rgb_frame[src_idx + 1];
|
||||
float b = rgb_frame[src_idx + 2];
|
||||
gray[y * sub_width + x] = 0.299f * r + 0.587f * g + 0.114f * b;
|
||||
}
|
||||
}
|
||||
|
||||
return gray;
|
||||
}
|
||||
|
||||
// Calculate Shannon entropy of coefficient magnitudes
|
||||
static double calculate_shannon_entropy(const float *coeffs, int count) {
|
||||
if (count == 0) return 0.0;
|
||||
|
||||
// Build histogram of coefficient magnitudes (use 256 bins)
|
||||
#define HIST_BINS 256
|
||||
int histogram[HIST_BINS] = {0};
|
||||
|
||||
// Find min/max for normalization
|
||||
float min_val = FLT_MAX, max_val = -FLT_MAX;
|
||||
for (int i = 0; i < count; i++) {
|
||||
float abs_val = fabsf(coeffs[i]);
|
||||
if (abs_val < min_val) min_val = abs_val;
|
||||
if (abs_val > max_val) max_val = abs_val;
|
||||
}
|
||||
|
||||
// Avoid division by zero
|
||||
float range = max_val - min_val;
|
||||
if (range < 1e-6) return 0.0;
|
||||
|
||||
// Build histogram
|
||||
for (int i = 0; i < count; i++) {
|
||||
float abs_val = fabsf(coeffs[i]);
|
||||
int bin = (int)((abs_val - min_val) / range * (HIST_BINS - 1));
|
||||
bin = bin < 0 ? 0 : (bin >= HIST_BINS ? HIST_BINS - 1 : bin);
|
||||
histogram[bin]++;
|
||||
}
|
||||
|
||||
// Calculate entropy: H = -sum(p_i * log2(p_i))
|
||||
double entropy = 0.0;
|
||||
for (int i = 0; i < HIST_BINS; i++) {
|
||||
if (histogram[i] > 0) {
|
||||
double p = (double)histogram[i] / count;
|
||||
entropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
|
||||
return entropy;
|
||||
#undef HIST_BINS
|
||||
}
|
||||
|
||||
// Extract subband from DWT coefficients (helper for entropy calculation)
|
||||
static void extract_subband(const float *dwt_data, int width, int height, int level,
|
||||
int band, float *output, int *out_count) {
|
||||
// band: 0=LL, 1=LH, 2=HL, 3=HH
|
||||
// For level L, subbands are in top-left quadrant of size (width>>L, height>>L)
|
||||
|
||||
int level_width = width >> level;
|
||||
int level_height = height >> level;
|
||||
int half_width = level_width / 2;
|
||||
int half_height = level_height / 2;
|
||||
|
||||
if (half_width < 1 || half_height < 1) {
|
||||
*out_count = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
int offset_x = (band & 1) ? half_width : 0; // LH, HH have x offset
|
||||
int offset_y = (band & 2) ? half_height : 0; // HL, HH have y offset
|
||||
|
||||
for (int y = 0; y < half_height; y++) {
|
||||
for (int x = 0; x < half_width; x++) {
|
||||
int src_x = offset_x + x;
|
||||
int src_y = offset_y + y;
|
||||
output[count++] = dwt_data[src_y * width + src_x];
|
||||
}
|
||||
}
|
||||
|
||||
*out_count = count;
|
||||
}
|
||||
|
||||
// Compute comprehensive frame analysis metrics
|
||||
static void compute_frame_metrics(const float *dwt_current, const float *dwt_previous,
|
||||
int width, int height, int levels,
|
||||
frame_analysis_t *metrics) {
|
||||
int num_pixels = width * height;
|
||||
|
||||
// Initialize metrics
|
||||
memset(metrics, 0, sizeof(frame_analysis_t));
|
||||
|
||||
// Extract LL band (approximation coefficients)
|
||||
int ll_width = width >> levels;
|
||||
int ll_height = height >> levels;
|
||||
int ll_count = ll_width * ll_height;
|
||||
|
||||
if (ll_count <= 0) return;
|
||||
|
||||
// Metric 1: LL band statistics (mean, variance)
|
||||
double ll_sum = 0.0, ll_sum_sq = 0.0;
|
||||
for (int i = 0; i < ll_count; i++) {
|
||||
float val = dwt_current[i];
|
||||
ll_sum += val;
|
||||
ll_sum_sq += val * val;
|
||||
}
|
||||
metrics->ll_mean = ll_sum / ll_count;
|
||||
double ll_var = (ll_sum_sq / ll_count) - (metrics->ll_mean * metrics->ll_mean);
|
||||
metrics->ll_variance = ll_var > 0 ? ll_var : 0;
|
||||
|
||||
// Metric 2: LL_diff (L1 distance between consecutive frames)
|
||||
if (dwt_previous) {
|
||||
double diff_sum = 0.0;
|
||||
for (int i = 0; i < ll_count; i++) {
|
||||
diff_sum += fabs(dwt_current[i] - dwt_previous[i]);
|
||||
}
|
||||
metrics->ll_diff = diff_sum / ll_count;
|
||||
}
|
||||
|
||||
// Metric 3: Highband energy and ratio
|
||||
double total_energy = 0.0, highband_energy = 0.0;
|
||||
for (int i = 0; i < num_pixels; i++) {
|
||||
float abs_val = fabsf(dwt_current[i]);
|
||||
total_energy += abs_val;
|
||||
if (i >= ll_count) { // All coefficients except LL band
|
||||
highband_energy += abs_val;
|
||||
}
|
||||
}
|
||||
metrics->total_energy = total_energy;
|
||||
metrics->highband_energy = highband_energy;
|
||||
metrics->highband_ratio = total_energy > 0 ? (highband_energy / total_energy) : 0;
|
||||
|
||||
// Metric 4: Per-band entropies
|
||||
float *subband_buffer = malloc(num_pixels * sizeof(float));
|
||||
int subband_count;
|
||||
|
||||
// LL band entropy
|
||||
extract_subband(dwt_current, width, height, levels, 0, subband_buffer, &subband_count);
|
||||
metrics->entropy_ll = calculate_shannon_entropy(subband_buffer, subband_count);
|
||||
|
||||
// High-frequency bands entropy (LH, HL, HH for each level)
|
||||
for (int level = 0; level < levels && level < ANALYSIS_DWT_LEVELS; level++) {
|
||||
// LH band
|
||||
extract_subband(dwt_current, width, height, level, 1, subband_buffer, &subband_count);
|
||||
metrics->entropy_lh[level] = calculate_shannon_entropy(subband_buffer, subband_count);
|
||||
|
||||
// HL band
|
||||
extract_subband(dwt_current, width, height, level, 2, subband_buffer, &subband_count);
|
||||
metrics->entropy_hl[level] = calculate_shannon_entropy(subband_buffer, subband_count);
|
||||
|
||||
// HH band
|
||||
extract_subband(dwt_current, width, height, level, 3, subband_buffer, &subband_count);
|
||||
metrics->entropy_hh[level] = calculate_shannon_entropy(subband_buffer, subband_count);
|
||||
}
|
||||
|
||||
// Metric 5: Zero crossing rate in highbands (texture change indicator)
|
||||
int zero_crossings = 0;
|
||||
int highband_coeffs = num_pixels - ll_count;
|
||||
if (highband_coeffs > 1) {
|
||||
for (int i = ll_count; i < num_pixels - 1; i++) {
|
||||
if ((dwt_current[i] > 0 && dwt_current[i + 1] < 0) ||
|
||||
(dwt_current[i] < 0 && dwt_current[i + 1] > 0)) {
|
||||
zero_crossings++;
|
||||
}
|
||||
}
|
||||
metrics->zero_crossing_rate = (double)zero_crossings / highband_coeffs;
|
||||
}
|
||||
|
||||
free(subband_buffer);
|
||||
}
|
||||
|
||||
// Hybrid scene change detector with adaptive thresholds
|
||||
// Returns 1 if scene change detected, 0 otherwise
|
||||
static int detect_scene_change_wavelet(int frame_number,
|
||||
const frame_analysis_t *metrics_history,
|
||||
int history_count,
|
||||
const frame_analysis_t *current_metrics,
|
||||
int verbose) {
|
||||
if (history_count < 2) return 0; // Need history for adaptive thresholds
|
||||
|
||||
// Calculate moving statistics for LL_diff (mean and stddev)
|
||||
int window_size = history_count < ANALYSIS_MOVING_WINDOW ? history_count : ANALYSIS_MOVING_WINDOW;
|
||||
int start_idx = history_count - window_size;
|
||||
|
||||
double ll_diff_sum = 0.0, ll_diff_sum_sq = 0.0;
|
||||
for (int i = start_idx; i < history_count; i++) {
|
||||
double val = metrics_history[i].ll_diff;
|
||||
ll_diff_sum += val;
|
||||
ll_diff_sum_sq += val * val;
|
||||
}
|
||||
|
||||
double ll_diff_mean = ll_diff_sum / window_size;
|
||||
double ll_diff_variance = (ll_diff_sum_sq / window_size) - (ll_diff_mean * ll_diff_mean);
|
||||
double ll_diff_stddev = ll_diff_variance > 0 ? sqrt(ll_diff_variance) : 0;
|
||||
|
||||
// Adaptive threshold: mean + k*stddev (with minimum absolute threshold)
|
||||
double ll_diff_threshold = ll_diff_mean + ANALYSIS_STDDEV_MULTIPLIER * ll_diff_stddev;
|
||||
if (ll_diff_threshold < ANALYSIS_LL_DIFF_MIN_THRESHOLD) {
|
||||
ll_diff_threshold = ANALYSIS_LL_DIFF_MIN_THRESHOLD;
|
||||
}
|
||||
|
||||
// Detection rule 1: Hard cut or fast fade (LL_diff spike)
|
||||
// Improvement: Normalize LL_diff by LL_mean to handle exposure/lighting changes
|
||||
double normalized_ll_diff = current_metrics->ll_mean > 1.0 ?
|
||||
current_metrics->ll_diff / current_metrics->ll_mean : current_metrics->ll_diff;
|
||||
double normalized_threshold = current_metrics->ll_mean > 1.0 ?
|
||||
ll_diff_threshold / current_metrics->ll_mean : ll_diff_threshold;
|
||||
|
||||
if (normalized_ll_diff > normalized_threshold) {
|
||||
if (verbose) {
|
||||
printf(" Scene change detected frame %d: Normalized LL_diff=%.4f > threshold=%.4f (raw: %.2f > %.2f)\n",
|
||||
frame_number + 1, normalized_ll_diff, normalized_threshold,
|
||||
current_metrics->ll_diff, ll_diff_threshold);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Detection rule 2: Structural change (high-frequency energy spike)
|
||||
// Improvement: Require temporal persistence only for borderline detections
|
||||
double hb_ratio_threshold = ANALYSIS_HB_RATIO_THRESHOLD;
|
||||
|
||||
// Calculate average highband energy from history (normalized by total energy for RMS-like measure)
|
||||
double hb_energy_sum = 0.0;
|
||||
for (int i = start_idx; i < history_count; i++) {
|
||||
hb_energy_sum += metrics_history[i].highband_energy;
|
||||
}
|
||||
double hb_energy_mean = hb_energy_sum / window_size;
|
||||
double hb_energy_threshold = hb_energy_mean * ANALYSIS_HB_ENERGY_MULTIPLIER;
|
||||
|
||||
// Check if highband spike is detected
|
||||
if (current_metrics->highband_ratio > hb_ratio_threshold &&
|
||||
current_metrics->highband_energy > hb_energy_threshold) {
|
||||
|
||||
// Calculate confidence: how much does it exceed threshold?
|
||||
double ratio_confidence = current_metrics->highband_ratio / hb_ratio_threshold;
|
||||
double energy_confidence = current_metrics->highband_energy / hb_energy_threshold;
|
||||
double min_confidence = ratio_confidence < energy_confidence ? ratio_confidence : energy_confidence;
|
||||
|
||||
// High confidence (>1.3x threshold): Skip persistence check (likely hard cut)
|
||||
if (min_confidence > 1.3) {
|
||||
if (verbose) {
|
||||
printf(" Scene change detected frame %d: HB_ratio=%.3f > %.3f AND HB_energy=%.1f > %.1f (high confidence: %.2fx)\n",
|
||||
frame_number + 1, current_metrics->highband_ratio, hb_ratio_threshold,
|
||||
current_metrics->highband_energy, hb_energy_threshold, min_confidence);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Borderline detection: Check persistence to avoid single-frame flashes
|
||||
if (history_count >= 1) {
|
||||
const frame_analysis_t *prev_metrics = &metrics_history[history_count - 1];
|
||||
if (prev_metrics->highband_ratio > hb_ratio_threshold * 0.6 || // Relaxed to 60%
|
||||
prev_metrics->highband_energy > hb_energy_threshold * 0.6) {
|
||||
if (verbose) {
|
||||
printf(" Scene change detected frame %d: HB_ratio=%.3f > %.3f AND HB_energy=%.1f > %.1f (persistent)\n",
|
||||
frame_number + 1, current_metrics->highband_ratio, hb_ratio_threshold,
|
||||
current_metrics->highband_energy, hb_energy_threshold);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Detection rule 3: Gradual transition (slow LL_mean change over several frames)
|
||||
// Check if LL_mean changed significantly over last 5 frames
|
||||
if (history_count >= 5) {
|
||||
double ll_mean_5_frames_ago = metrics_history[history_count - 5].ll_mean;
|
||||
double ll_mean_change = fabs(current_metrics->ll_mean - ll_mean_5_frames_ago);
|
||||
|
||||
if (ll_mean_change > ANALYSIS_FADE_THRESHOLD) {
|
||||
if (verbose) {
|
||||
printf(" Scene change detected frame %d: Gradual fade - LL_mean change=%.2f over 5 frames (threshold=%.1f)\n",
|
||||
frame_number + 1, ll_mean_change, ANALYSIS_FADE_THRESHOLD);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0; // No scene change detected
|
||||
}
|
||||
|
||||
// Split a scene into evenly-sized GOPs
|
||||
// Returns linked list of GOP boundaries for the scene
|
||||
static gop_boundary_t* split_scene_into_gops(int scene_start, int scene_end,
|
||||
int min_gop_size, int max_gop_size,
|
||||
gop_boundary_t **tail_ptr, int verbose) {
|
||||
int scene_length = scene_end - scene_start + 1;
|
||||
|
||||
if (scene_length < min_gop_size) {
|
||||
// Scene too short, make it a single GOP
|
||||
gop_boundary_t *boundary = malloc(sizeof(gop_boundary_t));
|
||||
boundary->start_frame = scene_start;
|
||||
boundary->end_frame = scene_end;
|
||||
boundary->num_frames = scene_length;
|
||||
boundary->next = NULL;
|
||||
*tail_ptr = boundary;
|
||||
return boundary;
|
||||
}
|
||||
|
||||
// Calculate optimal number of GOPs for this scene
|
||||
int num_gops = (scene_length + max_gop_size - 1) / max_gop_size; // ceil(scene_length / max_gop_size)
|
||||
|
||||
// Make sure each GOP is at least min_gop_size
|
||||
if (scene_length / num_gops < min_gop_size) {
|
||||
num_gops = scene_length / min_gop_size;
|
||||
}
|
||||
|
||||
if (num_gops < 1) num_gops = 1;
|
||||
|
||||
// Calculate base GOP size and remainder for even distribution
|
||||
int base_gop_size = scene_length / num_gops;
|
||||
int remainder = scene_length % num_gops;
|
||||
|
||||
gop_boundary_t *head = NULL;
|
||||
gop_boundary_t *tail = NULL;
|
||||
int current_frame = scene_start;
|
||||
|
||||
for (int i = 0; i < num_gops; i++) {
|
||||
// Distribute remainder frames evenly across GOPs
|
||||
int gop_size = base_gop_size + (i < remainder ? 1 : 0);
|
||||
|
||||
gop_boundary_t *boundary = malloc(sizeof(gop_boundary_t));
|
||||
boundary->start_frame = current_frame;
|
||||
boundary->end_frame = current_frame + gop_size - 1;
|
||||
boundary->num_frames = gop_size;
|
||||
boundary->next = NULL;
|
||||
|
||||
if (tail) {
|
||||
tail->next = boundary;
|
||||
tail = boundary;
|
||||
} else {
|
||||
head = tail = boundary;
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
printf(" GOP %d: frames %d-%d (length %d)\n",
|
||||
i + 1, boundary->start_frame, boundary->end_frame, boundary->num_frames);
|
||||
}
|
||||
|
||||
current_frame += gop_size;
|
||||
}
|
||||
|
||||
*tail_ptr = tail;
|
||||
return head;
|
||||
}
|
||||
|
||||
// Build GOP boundaries from frame analysis data
|
||||
// First detects scene boundaries, then splits each scene into evenly-sized GOPs
|
||||
static gop_boundary_t* build_gop_boundaries(const frame_analysis_t *analyses, int num_frames,
|
||||
int min_gop_size, int max_gop_size, int verbose) {
|
||||
if (num_frames < min_gop_size) return NULL;
|
||||
|
||||
// Step 1: Detect scene boundaries (actual hard cuts only)
|
||||
int *scene_boundaries = malloc((num_frames + 1) * sizeof(int));
|
||||
int num_scenes = 0;
|
||||
scene_boundaries[num_scenes++] = 0; // First scene starts at frame 0
|
||||
|
||||
for (int i = 1; i < num_frames; i++) {
|
||||
if (analyses[i].is_scene_change) {
|
||||
scene_boundaries[num_scenes++] = i;
|
||||
if (verbose) {
|
||||
printf("Scene boundary candidate at frame %d\n", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
scene_boundaries[num_scenes++] = num_frames; // End of last scene
|
||||
|
||||
// Step 1.5: Merge tiny scenes (< min_gop_size) with adjacent scenes
|
||||
// This prevents false positives from creating 1-frame GOPs
|
||||
int *merged_boundaries = malloc((num_scenes + 1) * sizeof(int));
|
||||
int num_merged = 0;
|
||||
merged_boundaries[num_merged++] = scene_boundaries[0]; // Always keep first boundary
|
||||
|
||||
for (int s = 1; s < num_scenes; s++) {
|
||||
int scene_length = scene_boundaries[s] - scene_boundaries[s - 1];
|
||||
|
||||
// If this scene is too short, skip this boundary (merge with next scene)
|
||||
if (scene_length >= min_gop_size || s == num_scenes - 1) {
|
||||
merged_boundaries[num_merged++] = scene_boundaries[s];
|
||||
} else if (verbose) {
|
||||
printf(" Merging tiny scene at frame %d (length %d)\n",
|
||||
scene_boundaries[s - 1], scene_length);
|
||||
}
|
||||
}
|
||||
|
||||
// Replace original boundaries with merged ones
|
||||
free(scene_boundaries);
|
||||
scene_boundaries = merged_boundaries;
|
||||
num_scenes = num_merged;
|
||||
|
||||
if (verbose) {
|
||||
printf("After merging: %d scenes\n", num_scenes - 1);
|
||||
}
|
||||
|
||||
// Step 2: Split each scene into evenly-sized GOPs
|
||||
gop_boundary_t *head = NULL;
|
||||
gop_boundary_t *tail = NULL;
|
||||
|
||||
for (int s = 0; s < num_scenes - 1; s++) {
|
||||
int scene_start = scene_boundaries[s];
|
||||
int scene_end = scene_boundaries[s + 1] - 1;
|
||||
int scene_length = scene_end - scene_start + 1;
|
||||
|
||||
if (verbose) {
|
||||
printf("Scene %d: frames %d-%d (length %d)\n",
|
||||
s + 1, scene_start, scene_end, scene_length);
|
||||
}
|
||||
|
||||
// Split scene into evenly-sized GOPs
|
||||
gop_boundary_t *scene_tail = NULL;
|
||||
gop_boundary_t *scene_gops = split_scene_into_gops(scene_start, scene_end,
|
||||
min_gop_size, max_gop_size,
|
||||
&scene_tail, verbose);
|
||||
|
||||
// Link to main GOP list
|
||||
if (head == NULL) {
|
||||
head = scene_gops;
|
||||
tail = scene_tail;
|
||||
} else {
|
||||
tail->next = scene_gops;
|
||||
tail = scene_tail;
|
||||
}
|
||||
}
|
||||
|
||||
free(scene_boundaries);
|
||||
return head;
|
||||
}
|
||||
|
||||
// Free GOP boundary list
|
||||
static void free_gop_boundaries(gop_boundary_t *head) {
|
||||
while (head) {
|
||||
gop_boundary_t *next = head->next;
|
||||
free(head);
|
||||
head = next;
|
||||
}
|
||||
}
|
||||
|
||||
// First pass: Analyze all frames and build GOP boundaries
|
||||
// Returns 0 on success, -1 on error
|
||||
static int two_pass_first_pass(tav_encoder_t *enc, const char *input_file) {
|
||||
printf("=== Two-Pass Encoding: First Pass (Scene Analysis) ===\n");
|
||||
|
||||
// Allocate analysis array (estimate: 10000 frames max for in-memory storage)
|
||||
enc->frame_analyses_capacity = 10000;
|
||||
enc->frame_analyses = malloc(enc->frame_analyses_capacity * sizeof(frame_analysis_t));
|
||||
enc->frame_analyses_count = 0;
|
||||
|
||||
if (!enc->frame_analyses) {
|
||||
fprintf(stderr, "Error: Failed to allocate frame analysis buffer\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Calculate subsampled dimensions
|
||||
int sub_width = enc->width / ANALYSIS_SUBSAMPLE_FACTOR;
|
||||
int sub_height = enc->height / ANALYSIS_SUBSAMPLE_FACTOR;
|
||||
|
||||
// Open FFmpeg pipe for first pass using SAME filters as second pass
|
||||
// This ensures frame counts match between passes
|
||||
char ffmpeg_cmd[4096];
|
||||
if (enc->progressive_mode) {
|
||||
// Progressive: scale and crop only
|
||||
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
|
||||
"ffmpeg -loglevel error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
|
||||
"-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" -",
|
||||
input_file, enc->width, enc->height, enc->width, enc->height);
|
||||
} else {
|
||||
// Interlaced: scale, crop, and separate fields (doubles frame count!)
|
||||
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
|
||||
"ffmpeg -loglevel error -i \"%s\" -f rawvideo -pix_fmt rgb24 "
|
||||
"-vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d,tinterlace=interleave_top:cvlpf,separatefields\" -",
|
||||
input_file, enc->width, enc->height * 2, enc->width, enc->height * 2);
|
||||
}
|
||||
|
||||
FILE *ffmpeg_pipe = popen(ffmpeg_cmd, "r");
|
||||
if (!ffmpeg_pipe) {
|
||||
fprintf(stderr, "Error: Failed to open FFmpeg pipe for first pass\n");
|
||||
free(enc->frame_analyses);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t frame_rgb_size = enc->width * enc->height * 3;
|
||||
uint8_t *frame_rgb = malloc(frame_rgb_size);
|
||||
float *prev_dwt = NULL;
|
||||
|
||||
int frame_num = 0;
|
||||
size_t bytes_read;
|
||||
while ((bytes_read = fread(frame_rgb, 1, frame_rgb_size, ffmpeg_pipe)) == frame_rgb_size) {
|
||||
// Honor encode limit BEFORE processing
|
||||
if (enc->encode_limit > 0 && frame_num >= enc->encode_limit) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Subsample to grayscale
|
||||
float *gray = subsample_frame_to_gray(frame_rgb, enc->width, enc->height, ANALYSIS_SUBSAMPLE_FACTOR);
|
||||
|
||||
// Apply 3-level Haar DWT
|
||||
analysis_haar_2d_forward(gray, sub_width, sub_height, ANALYSIS_DWT_LEVELS);
|
||||
|
||||
// Compute metrics
|
||||
frame_analysis_t metrics;
|
||||
metrics.frame_number = frame_num;
|
||||
compute_frame_metrics(gray, prev_dwt, sub_width, sub_height, ANALYSIS_DWT_LEVELS, &metrics);
|
||||
|
||||
// Detect scene change using hybrid detector
|
||||
if (frame_num > 0) {
|
||||
metrics.is_scene_change = detect_scene_change_wavelet(
|
||||
frame_num,
|
||||
enc->frame_analyses,
|
||||
enc->frame_analyses_count,
|
||||
&metrics,
|
||||
enc->verbose
|
||||
);
|
||||
} else {
|
||||
metrics.is_scene_change = 0; // First frame is always start of first GOP
|
||||
}
|
||||
|
||||
// Store analysis
|
||||
if (enc->frame_analyses_count >= enc->frame_analyses_capacity) {
|
||||
// Expand array
|
||||
enc->frame_analyses_capacity *= 2;
|
||||
enc->frame_analyses = realloc(enc->frame_analyses,
|
||||
enc->frame_analyses_capacity * sizeof(frame_analysis_t));
|
||||
if (!enc->frame_analyses) {
|
||||
fprintf(stderr, "Error: Failed to reallocate analysis buffer\n");
|
||||
free(gray);
|
||||
if (prev_dwt) free(prev_dwt);
|
||||
free(frame_rgb);
|
||||
pclose(ffmpeg_pipe);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
enc->frame_analyses[enc->frame_analyses_count++] = metrics;
|
||||
|
||||
// Update previous DWT
|
||||
if (prev_dwt) free(prev_dwt);
|
||||
prev_dwt = gray;
|
||||
|
||||
frame_num++;
|
||||
|
||||
if (frame_num % 100 == 0) {
|
||||
printf(" Analyzed %d frames...\r", frame_num);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n Analyzed %d frames total\n", frame_num);
|
||||
|
||||
free(frame_rgb);
|
||||
if (prev_dwt) free(prev_dwt);
|
||||
pclose(ffmpeg_pipe);
|
||||
|
||||
// Build GOP boundaries
|
||||
printf(" Building GOP boundaries...\n");
|
||||
enc->gop_boundaries = build_gop_boundaries(
|
||||
enc->frame_analyses,
|
||||
enc->frame_analyses_count,
|
||||
ANALYSIS_GOP_MIN_SIZE,
|
||||
ANALYSIS_GOP_MAX_SIZE,
|
||||
enc->verbose
|
||||
);
|
||||
|
||||
// Count and print GOP statistics
|
||||
int num_gops = 0;
|
||||
int total_gop_frames = 0;
|
||||
int min_gop = INT_MAX, max_gop = 0;
|
||||
gop_boundary_t *gop = enc->gop_boundaries;
|
||||
while (gop) {
|
||||
num_gops++;
|
||||
total_gop_frames += gop->num_frames;
|
||||
if (gop->num_frames < min_gop) min_gop = gop->num_frames;
|
||||
if (gop->num_frames > max_gop) max_gop = gop->num_frames;
|
||||
gop = gop->next;
|
||||
}
|
||||
|
||||
printf(" GOP Statistics:\n");
|
||||
printf(" Total GOPs: %d\n", num_gops);
|
||||
printf(" Average GOP size: %.1f frames\n", (double)total_gop_frames / num_gops);
|
||||
printf(" Min GOP size: %d frames\n", min_gop);
|
||||
printf(" Max GOP size: %d frames\n", max_gop);
|
||||
|
||||
printf("=== First Pass Complete ===\n\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Detect still frames by comparing quantised DWT coefficients
|
||||
// Returns 1 if frame is still (suitable for SKIP mode), 0 otherwise
|
||||
static int detect_still_frame(tav_encoder_t *enc) {
|
||||
@@ -9632,6 +10360,7 @@ int main(int argc, char *argv[]) {
|
||||
{"native-audio", no_argument, 0, 1027},
|
||||
{"native-audio-format", no_argument, 0, 1027},
|
||||
{"tad-audio", no_argument, 0, 1028},
|
||||
{"single-pass", no_argument, 0, 1050}, // disable two-pass encoding with wavelet-based scene detection
|
||||
{"help", no_argument, 0, '?'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
@@ -9856,6 +10585,10 @@ int main(int argc, char *argv[]) {
|
||||
enc->tad_audio = 1;
|
||||
printf("TAD audio mode enabled (packet 0x24, quality follows -q)\n");
|
||||
break;
|
||||
case 1050: // --single-pass
|
||||
enc->two_pass_mode = 0;
|
||||
printf("Two-pass wavelet-based scene change detection disabled\n");
|
||||
break;
|
||||
case 'a':
|
||||
int bitrate = atoi(optarg);
|
||||
int valid_bitrate = validate_mp2_bitrate(bitrate);
|
||||
@@ -9924,6 +10657,11 @@ int main(int argc, char *argv[]) {
|
||||
enc->lossless = 1;
|
||||
}
|
||||
|
||||
// if temporal-dwt is used, and user did not select suitable audio codec, force PCMu8 (or TAD when it's production-ready)
|
||||
if (enc->enable_temporal_dwt && !enc->pcm8_audio && !enc->tad_audio) {
|
||||
enc->pcm8_audio = 1; // TODO replace with tad_audio when it's production-ready
|
||||
}
|
||||
|
||||
if ((!enc->input_file && !enc->test_mode) || !enc->output_file) {
|
||||
fprintf(stderr, "Error: Input and output files must be specified\n");
|
||||
show_usage(argv[0]);
|
||||
@@ -10048,6 +10786,52 @@ int main(int argc, char *argv[]) {
|
||||
printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
|
||||
}
|
||||
|
||||
// Two-pass mode: Run first pass for scene analysis
|
||||
if (enc->two_pass_mode) {
|
||||
if (two_pass_first_pass(enc, enc->input_file) != 0) {
|
||||
fprintf(stderr, "Error: First pass failed\n");
|
||||
cleanup_encoder(enc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Initialize GOP boundary iterator for second pass
|
||||
enc->current_gop_boundary = enc->gop_boundaries;
|
||||
enc->two_pass_current_frame = 0;
|
||||
|
||||
// Adjust GOP capacity to match maximum computed GOP size
|
||||
enc->temporal_gop_capacity = ANALYSIS_GOP_MAX_SIZE;
|
||||
|
||||
// Re-allocate GOP buffers with new capacity
|
||||
enc->temporal_gop_rgb_frames = realloc(enc->temporal_gop_rgb_frames,
|
||||
enc->temporal_gop_capacity * sizeof(uint8_t*));
|
||||
enc->temporal_gop_y_frames = realloc(enc->temporal_gop_y_frames,
|
||||
enc->temporal_gop_capacity * sizeof(float*));
|
||||
enc->temporal_gop_co_frames = realloc(enc->temporal_gop_co_frames,
|
||||
enc->temporal_gop_capacity * sizeof(float*));
|
||||
enc->temporal_gop_cg_frames = realloc(enc->temporal_gop_cg_frames,
|
||||
enc->temporal_gop_capacity * sizeof(float*));
|
||||
enc->temporal_gop_translation_x = realloc(enc->temporal_gop_translation_x,
|
||||
enc->temporal_gop_capacity * sizeof(int16_t));
|
||||
enc->temporal_gop_translation_y = realloc(enc->temporal_gop_translation_y,
|
||||
enc->temporal_gop_capacity * sizeof(int16_t));
|
||||
|
||||
// Allocate new frame buffers for expanded capacity
|
||||
int frame_size = enc->width * enc->height;
|
||||
for (int i = TEMPORAL_GOP_SIZE; i < ANALYSIS_GOP_MAX_SIZE; i++) {
|
||||
enc->temporal_gop_rgb_frames[i] = malloc(frame_size * 3);
|
||||
enc->temporal_gop_y_frames[i] = malloc(frame_size * sizeof(float));
|
||||
enc->temporal_gop_co_frames[i] = malloc(frame_size * sizeof(float));
|
||||
enc->temporal_gop_cg_frames[i] = malloc(frame_size * sizeof(float));
|
||||
}
|
||||
|
||||
if (enc->verbose) {
|
||||
printf(" Adjusted GOP capacity from %d to %d frames\n",
|
||||
TEMPORAL_GOP_SIZE, ANALYSIS_GOP_MAX_SIZE);
|
||||
}
|
||||
|
||||
printf("\n=== Two-Pass Encoding: Second Pass (Encoding) ===\n");
|
||||
}
|
||||
|
||||
printf("Starting encoding...\n");
|
||||
|
||||
// Main encoding loop - process frames until EOF or frame limit
|
||||
@@ -10138,7 +10922,13 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// Determine frame type
|
||||
double scene_change_ratio = 0.0;
|
||||
int is_scene_change = detect_scene_change(enc, &scene_change_ratio);
|
||||
int is_scene_change = 0;
|
||||
|
||||
// Only detect scene changes in non-two-pass mode (two-pass uses pre-computed GOP boundaries)
|
||||
if (!enc->two_pass_mode) {
|
||||
is_scene_change = detect_scene_change(enc, &scene_change_ratio);
|
||||
}
|
||||
|
||||
int is_time_keyframe = (frame_count % TEMPORAL_GOP_SIZE) == 0;
|
||||
|
||||
// Check if we can use SKIP mode (DWT coefficient-based detection)
|
||||
@@ -10221,6 +11011,9 @@ int main(int argc, char *argv[]) {
|
||||
int should_flush_scene_change = 0;
|
||||
int force_iframes_for_scene_change = 0;
|
||||
|
||||
// Only use old scene change detection in non-two-pass mode
|
||||
// Two-pass mode uses pre-computed GOP boundaries instead
|
||||
if (!enc->two_pass_mode) {
|
||||
if (is_scene_change && enc->temporal_gop_frame_count > 0) {
|
||||
|
||||
if (scene_change_ratio >= SCENE_CHANGE_THRESHOLD_HARD) {
|
||||
@@ -10246,6 +11039,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (should_flush_scene_change) {
|
||||
// Get quantiser
|
||||
@@ -10315,6 +11109,17 @@ int main(int argc, char *argv[]) {
|
||||
enc->total_compressed_size += packet_size;
|
||||
|
||||
gop_reset(enc);
|
||||
|
||||
// Two-pass mode: advance to next GOP boundary
|
||||
if (enc->two_pass_mode && enc->current_gop_boundary) {
|
||||
enc->current_gop_boundary = enc->current_gop_boundary->next;
|
||||
if (enc->verbose && enc->current_gop_boundary) {
|
||||
printf(" Advanced to next GOP: frames %d-%d (length %d)\n",
|
||||
enc->current_gop_boundary->start_frame,
|
||||
enc->current_gop_boundary->end_frame,
|
||||
enc->current_gop_boundary->num_frames);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now add current frame to GOP (will be first frame of new GOP if scene change)
|
||||
@@ -10330,6 +11135,15 @@ int main(int argc, char *argv[]) {
|
||||
int should_flush = 0;
|
||||
int force_flush = 0;
|
||||
|
||||
// Two-pass mode: use pre-computed GOP boundaries
|
||||
if (enc->two_pass_mode) {
|
||||
if (gop_should_flush_twopass(enc, frame_count)) {
|
||||
should_flush = 1;
|
||||
force_flush = 1; // Force flush at pre-computed boundaries
|
||||
}
|
||||
}
|
||||
// Normal mode: use motion-based detection
|
||||
else {
|
||||
// Flush if GOP is full
|
||||
if (gop_is_full(enc)) {
|
||||
should_flush = 1;
|
||||
@@ -10352,6 +11166,7 @@ int main(int argc, char *argv[]) {
|
||||
enc->temporal_gop_frame_count, TEMPORAL_GOP_SIZE_MIN);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Note: Scene change flush is now handled BEFORE adding frame (above)
|
||||
|
||||
// Flush GOP if needed (for reasons other than scene change)
|
||||
@@ -10378,6 +11193,19 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// Update total compressed size with GOP packet
|
||||
enc->total_compressed_size += packet_size;
|
||||
|
||||
gop_reset(enc);
|
||||
|
||||
// Two-pass mode: advance to next GOP boundary
|
||||
if (enc->two_pass_mode && enc->current_gop_boundary) {
|
||||
enc->current_gop_boundary = enc->current_gop_boundary->next;
|
||||
if (enc->verbose && enc->current_gop_boundary) {
|
||||
printf(" Advanced to next GOP: frames %d-%d (length %d)\n",
|
||||
enc->current_gop_boundary->start_frame,
|
||||
enc->current_gop_boundary->end_frame,
|
||||
enc->current_gop_boundary->num_frames);
|
||||
}
|
||||
}
|
||||
} else if (packet_size == 0) {
|
||||
// Frame added to GOP buffer but not flushed yet
|
||||
// Skip normal packet processing (no packet written yet)
|
||||
@@ -10805,6 +11633,14 @@ static void cleanup_encoder(tav_encoder_t *enc) {
|
||||
free_subtitle_list(enc->subtitles);
|
||||
}
|
||||
|
||||
// Free two-pass data structures
|
||||
if (enc->frame_analyses) {
|
||||
free(enc->frame_analyses);
|
||||
}
|
||||
if (enc->gop_boundaries) {
|
||||
free_gop_boundaries(enc->gop_boundaries);
|
||||
}
|
||||
|
||||
if (enc->zstd_ctx) {
|
||||
ZSTD_freeCCtx(enc->zstd_ctx);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user