mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 11:51:49 +09:00
1557 lines
58 KiB
C
1557 lines
58 KiB
C
/**
|
|
* TAV Encoder Library - Main Implementation
|
|
*
|
|
* High-level API for encoding video using TAV codec with GOP-based
|
|
* multi-threaded encoding.
|
|
*
|
|
* Based on encoder_tav.c - extracted into library form.
|
|
*/
|
|
|
|
#include "tav_encoder_lib.h"
|
|
#include "tav_encoder_color.h"
|
|
#include "tav_encoder_dwt.h"
|
|
#include "tav_encoder_quantize.h"
|
|
#include "tav_encoder_ezbc.h"
|
|
#include "tav_encoder_utils.h"
|
|
#include "encoder_tad.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <threads.h>
|
|
#include <time.h>
|
|
#include <zstd.h>
|
|
|
|
// =============================================================================
|
|
// Internal Constants
|
|
// =============================================================================
|
|
|
|
#define ENCODER_VERSION "TAV Encoder Library v1.0"
|
|
#define MAX_ERROR_MESSAGE 256
|
|
|
|
// GOP status values
|
|
#define GOP_STATUS_EMPTY 0
|
|
#define GOP_STATUS_FILLING 1
|
|
#define GOP_STATUS_READY 2
|
|
#define GOP_STATUS_ENCODING 3
|
|
#define GOP_STATUS_COMPLETE 4
|
|
|
|
// Quality to quantizer mapping (indices into QLUT)
|
|
static const int QLUT[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096};
|
|
|
|
static const int QUALITY_Y[] = {79, 47, 23, 11, 5, 2}; // Quality levels 0-5
|
|
static const int QUALITY_CO[] = {123, 108, 91, 76, 59, 29};
|
|
static const int QUALITY_CG[] = {148, 133, 113, 99, 76, 39};
|
|
static const float DEAD_ZONE_THRESHOLD[] = {1.5f, 1.5f, 1.2f, 1.1f, 0.8f, 0.6f, 0.0f};
|
|
|
|
// Channel layout definitions (from TAV specification)
|
|
#define CHANNEL_LAYOUT_YCOCG 0
|
|
#define CHANNEL_LAYOUT_YCOCG_A 1
|
|
#define CHANNEL_LAYOUT_Y_ONLY 2
|
|
#define CHANNEL_LAYOUT_Y_A 3
|
|
#define CHANNEL_LAYOUT_COCG 4
|
|
#define CHANNEL_LAYOUT_COCG_A 5
|
|
|
|
// Channel layout configuration
|
|
typedef struct {
|
|
int layout_id;
|
|
int num_channels;
|
|
const char *channels[4];
|
|
int has_y, has_co, has_cg, has_alpha;
|
|
} channel_layout_config_t;
|
|
|
|
static const channel_layout_config_t channel_layouts[] = {
|
|
{CHANNEL_LAYOUT_YCOCG, 3, {"Y", "Co", "Cg", NULL}, 1, 1, 1, 0}, // 0: Y-Co-Cg
|
|
{CHANNEL_LAYOUT_YCOCG_A, 4, {"Y", "Co", "Cg", "A"}, 1, 1, 1, 1}, // 1: Y-Co-Cg-A
|
|
{CHANNEL_LAYOUT_Y_ONLY, 1, {"Y", NULL, NULL, NULL}, 1, 0, 0, 0}, // 2: Y only
|
|
{CHANNEL_LAYOUT_Y_A, 2, {"Y", NULL, NULL, "A"}, 1, 0, 0, 1}, // 3: Y-A
|
|
{CHANNEL_LAYOUT_COCG, 2, {NULL, "Co", "Cg", NULL}, 0, 1, 1, 0}, // 4: Co-Cg
|
|
{CHANNEL_LAYOUT_COCG_A, 3, {NULL, "Co", "Cg", "A"}, 0, 1, 1, 1} // 5: Co-Cg-A
|
|
};
|
|
|
|
// Coefficient preprocessing modes
|
|
typedef enum {
|
|
PREPROCESS_TWOBITMAP = 0, // Twobit-plane significance map (default, best compression)
|
|
PREPROCESS_EZBC = 1, // EZBC embedded zero block coding
|
|
PREPROCESS_RAW = 2 // No preprocessing - raw coefficients
|
|
} preprocess_mode_t;
|
|
|
|
// =============================================================================
|
|
// Internal Structures
|
|
// =============================================================================
|
|
|
|
// Compatibility structure for extracted modules
|
|
// The quantization and DWT modules expect a tav_encoder_t structure
|
|
// with certain fields. This minimal structure provides those fields.
|
|
struct tav_encoder_s {
|
|
int quality_level; // For perceptual quantization
|
|
int *widths; // Subband widths array (per decomposition level)
|
|
int *heights; // Subband heights array (per decomposition level)
|
|
int decomp_levels; // Number of spatial DWT decomposition levels
|
|
float dead_zone_threshold; // Dead-zone quantization threshold
|
|
int encoder_preset; // Preset flags (sports mode, etc.)
|
|
int temporal_decomp_levels; // Temporal DWT levels
|
|
int verbose; // Verbose output flag
|
|
int frame_count; // Current frame number for encoding
|
|
float adjusted_quantiser_y_float; // For bitrate control (if needed)
|
|
float dither_accumulator; // Dither accumulator for bitrate mode
|
|
int width; // Frame width
|
|
int height; // Frame height
|
|
int perceptual_tuning; // 1 = perceptual quantization, 0 = uniform
|
|
};
|
|
|
|
// GOP slot for circular buffering
|
|
typedef struct gop_slot {
|
|
// Status
|
|
volatile int status; // GOP_STATUS_* values
|
|
int gop_index; // Sequential GOP number
|
|
|
|
// Input data
|
|
uint8_t **rgb_frames; // [frame][width*height*3] RGB data
|
|
int num_frames; // Number of frames in this GOP
|
|
int *frame_numbers; // Original frame indices (for timecodes)
|
|
int width, height; // Frame dimensions
|
|
|
|
// Audio data
|
|
float *pcm_samples; // Stereo PCM32f samples (L,R,L,R,...)
|
|
size_t num_audio_samples; // Samples per channel
|
|
|
|
// Output data (filled by worker thread)
|
|
tav_encoder_packet_t *packets; // Array of output packets
|
|
int num_packets; // Number of packets in this GOP
|
|
|
|
// Error handling
|
|
int encoding_failed;
|
|
char error_message[MAX_ERROR_MESSAGE];
|
|
|
|
// Synchronization
|
|
mtx_t mutex;
|
|
cnd_t status_changed;
|
|
} gop_slot_t;
|
|
|
|
// Thread-local worker context
|
|
typedef struct thread_worker_context {
|
|
int thread_id;
|
|
struct thread_pool *pool;
|
|
|
|
// Thread-local work buffers (reused across GOPs)
|
|
float **work_y_frames; // [max_gop_size][max_pixels]
|
|
float **work_co_frames;
|
|
float **work_cg_frames;
|
|
int16_t **quantised_y;
|
|
int16_t **quantised_co;
|
|
int16_t **quantised_cg;
|
|
uint8_t *compression_buffer;
|
|
size_t compression_buffer_size;
|
|
ZSTD_CCtx *zstd_ctx;
|
|
|
|
// Buffer sizing
|
|
int max_gop_frames;
|
|
size_t max_frame_pixels;
|
|
} thread_worker_context_t;
|
|
|
|
// Thread pool structure
|
|
typedef struct thread_pool {
|
|
int num_threads;
|
|
thrd_t *worker_threads;
|
|
|
|
// Circular buffer of GOP slots
|
|
gop_slot_t *slots;
|
|
int num_slots; // 2 * num_threads
|
|
int slot_capacity; // Max frames per GOP
|
|
|
|
// Producer state (frame submission)
|
|
int next_slot_to_fill;
|
|
int total_gops_produced;
|
|
int producer_finished; // 1 when no more frames
|
|
|
|
// Job queue for workers
|
|
int *job_queue;
|
|
int job_queue_head;
|
|
int job_queue_tail;
|
|
int job_queue_size;
|
|
int job_queue_capacity;
|
|
mtx_t job_queue_mutex;
|
|
cnd_t job_available;
|
|
cnd_t slot_available;
|
|
|
|
// Shutdown signal
|
|
int shutdown;
|
|
|
|
// Shared encoder context (read-only)
|
|
struct tav_encoder_context *shared_ctx;
|
|
} thread_pool_t;
|
|
|
|
// Main encoder context (opaque to API users)
|
|
struct tav_encoder_context {
|
|
// Configuration (from params)
|
|
int width, height;
|
|
int fps_num, fps_den;
|
|
int wavelet_type;
|
|
int temporal_wavelet;
|
|
int decomp_levels;
|
|
int temporal_levels;
|
|
int channel_layout;
|
|
int perceptual_tuning;
|
|
int enable_temporal_dwt;
|
|
int gop_size;
|
|
int enable_two_pass;
|
|
int quality_level, quality_y, quality_co, quality_cg;
|
|
int dead_zone_threshold;
|
|
int entropy_coder;
|
|
int zstd_level;
|
|
int num_threads;
|
|
int encoder_preset;
|
|
int verbose;
|
|
int monoblock;
|
|
|
|
// Derived quantizer values (QLUT indices)
|
|
int quantiser_y, quantiser_co, quantiser_cg;
|
|
|
|
// Compatibility encoder for modules (quantization, DWT)
|
|
tav_encoder_t *compat_enc;
|
|
|
|
// Thread pool (NULL if single-threaded)
|
|
thread_pool_t *pool;
|
|
|
|
// Single-threaded GOP buffer
|
|
uint8_t **gop_rgb_frames; // [frame][pixel*3]
|
|
int gop_frame_count;
|
|
int64_t *gop_frame_pts; // Presentation timestamps
|
|
|
|
// TAD audio quality mapping
|
|
int tad_max_index;
|
|
|
|
// Error handling
|
|
char error_message[MAX_ERROR_MESSAGE];
|
|
|
|
// Statistics
|
|
int64_t frames_encoded;
|
|
int64_t gops_encoded;
|
|
size_t total_bytes;
|
|
size_t video_bytes;
|
|
size_t audio_bytes;
|
|
time_t start_time;
|
|
};
|
|
|
|
// =============================================================================
|
|
// Forward Declarations
|
|
// =============================================================================
|
|
|
|
static int encode_gop_intra_only(tav_encoder_context_t *ctx, gop_slot_t *slot);
|
|
static int encode_gop_unified(tav_encoder_context_t *ctx, gop_slot_t *slot);
|
|
static int worker_thread_main(void *arg);
|
|
static void free_gop_slot(gop_slot_t *slot);
|
|
|
|
static tav_encoder_t *create_compat_encoder(tav_encoder_context_t *ctx);
|
|
static void free_compat_encoder(tav_encoder_t *enc);
|
|
|
|
static size_t preprocess_coefficients_ezbc(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
|
|
int coeff_count, int width, int height, int channel_layout,
|
|
uint8_t *output_buffer);
|
|
static size_t preprocess_gop_unified(preprocess_mode_t preprocess_mode, int16_t **quant_y, int16_t **quant_co, int16_t **quant_cg,
|
|
int num_frames, int num_pixels, int width, int height, int channel_layout,
|
|
uint8_t *output_buffer);
|
|
static void rgb_to_colour_space_frame(tav_encoder_context_t *ctx, const uint8_t *rgb,
|
|
float *c1, float *c2, float *c3,
|
|
int width, int height);
|
|
|
|
// =============================================================================
|
|
// Parameter Initialization
|
|
// =============================================================================
|
|
|
|
void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height) {
|
|
memset(params, 0, sizeof(tav_encoder_params_t));
|
|
|
|
// Video dimensions
|
|
params->width = width;
|
|
params->height = height;
|
|
params->fps_num = 60;
|
|
params->fps_den = 1;
|
|
|
|
// Wavelet defaults
|
|
params->wavelet_type = 1; // CDF 9/7 (best compression)
|
|
params->temporal_wavelet = 255; // Always Haar
|
|
params->decomp_levels = 0; // Auto-calculate
|
|
params->temporal_levels = 2; // Always 2
|
|
|
|
// Color space
|
|
params->channel_layout = 0; // YCoCg-R
|
|
params->perceptual_tuning = 1; // Enable HVS model
|
|
|
|
// GOP settings
|
|
params->enable_temporal_dwt = 1; // Enable 3D DWT GOP encoding
|
|
params->gop_size = 0; // Auto (8 for 60fps, 16 for 30fps)
|
|
params->enable_two_pass = 1; // Enable scene change detection
|
|
|
|
// Quality defaults (level 3 = balanced)
|
|
params->quality_level = 3;
|
|
params->quality_y = QUALITY_Y[3]; // 11 - quantiser index
|
|
params->quality_co = QUALITY_CO[3]; // 76 - quantiser index
|
|
params->quality_cg = QUALITY_CG[3]; // 99 - quantiser index
|
|
params->dead_zone_threshold = DEAD_ZONE_THRESHOLD[3]; // 1.1 for Q3
|
|
|
|
// Compression
|
|
params->entropy_coder = 1; // EZBC as default
|
|
params->zstd_level = 7; // Balanced compression/speed
|
|
|
|
// Threading
|
|
params->num_threads = 0; // Single-threaded (multi-threading not yet implemented)
|
|
|
|
// Encoder presets
|
|
params->encoder_preset = 0; // None
|
|
|
|
// Advanced
|
|
params->verbose = 0;
|
|
params->monoblock = 1; // Single tile (always 1 for current implementation)
|
|
}
|
|
|
|
// =============================================================================
|
|
// Encoder Creation
|
|
// =============================================================================
|
|
|
|
tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params) {
|
|
if (!params) {
|
|
return NULL;
|
|
}
|
|
|
|
// Validate parameters
|
|
if (params->width <= 0 || params->height <= 0) {
|
|
fprintf(stderr, "ERROR: Invalid dimensions %dx%d\n", params->width, params->height);
|
|
return NULL;
|
|
}
|
|
|
|
if (params->width % 2 != 0 || params->height % 2 != 0) {
|
|
fprintf(stderr, "ERROR: Dimensions must be even (got %dx%d)\n", params->width, params->height);
|
|
return NULL;
|
|
}
|
|
|
|
// Allocate context
|
|
tav_encoder_context_t *ctx = calloc(1, sizeof(tav_encoder_context_t));
|
|
if (!ctx) {
|
|
fprintf(stderr, "ERROR: Failed to allocate encoder context\n");
|
|
return NULL;
|
|
}
|
|
|
|
// Copy configuration
|
|
ctx->width = params->width;
|
|
ctx->height = params->height;
|
|
ctx->fps_num = params->fps_num;
|
|
ctx->fps_den = params->fps_den;
|
|
ctx->wavelet_type = params->wavelet_type;
|
|
ctx->temporal_wavelet = params->temporal_wavelet;
|
|
ctx->decomp_levels = params->decomp_levels;
|
|
ctx->temporal_levels = params->temporal_levels;
|
|
ctx->channel_layout = params->channel_layout;
|
|
ctx->perceptual_tuning = params->perceptual_tuning;
|
|
ctx->enable_temporal_dwt = params->enable_temporal_dwt;
|
|
ctx->gop_size = params->gop_size;
|
|
ctx->enable_two_pass = params->enable_two_pass;
|
|
ctx->quality_level = params->quality_level; // CRITICAL: Was missing, caused quality_level=0
|
|
ctx->quality_y = params->quality_y;
|
|
ctx->quality_co = params->quality_co;
|
|
ctx->quality_cg = params->quality_cg;
|
|
ctx->dead_zone_threshold = params->dead_zone_threshold;
|
|
ctx->entropy_coder = params->entropy_coder;
|
|
ctx->zstd_level = params->zstd_level;
|
|
ctx->num_threads = params->num_threads;
|
|
ctx->encoder_preset = params->encoder_preset;
|
|
ctx->verbose = params->verbose;
|
|
ctx->monoblock = params->monoblock;
|
|
|
|
// quality_y/co/cg already contain quantiser indices (0-255)
|
|
// Clamp to valid range
|
|
if (ctx->quality_y < 0) ctx->quality_y = 0;
|
|
if (ctx->quality_y > 255) ctx->quality_y = 255;
|
|
if (ctx->quality_co < 0) ctx->quality_co = 0;
|
|
if (ctx->quality_co > 255) ctx->quality_co = 255;
|
|
if (ctx->quality_cg < 0) ctx->quality_cg = 0;
|
|
if (ctx->quality_cg > 255) ctx->quality_cg = 255;
|
|
|
|
// Copy quantiser indices for encoding
|
|
ctx->quantiser_y = ctx->quality_y;
|
|
ctx->quantiser_co = ctx->quality_co;
|
|
ctx->quantiser_cg = ctx->quality_cg;
|
|
|
|
// Force EZBC entropy coder (Twobitmap is deprecated)
|
|
ctx->entropy_coder = 1;
|
|
// Force Haar temporal
|
|
ctx->temporal_wavelet = 255;
|
|
// Force temporal level 2
|
|
ctx->temporal_levels = 2;
|
|
|
|
// Calculate decomp levels if auto (0)
|
|
if (ctx->decomp_levels == 0) {
|
|
int levels = 0;
|
|
int min_dim = (ctx->width < ctx->height) ? ctx->width : ctx->height;
|
|
// Keep halving until we reach minimum size
|
|
while (min_dim >= 32) {
|
|
min_dim /= 2;
|
|
levels++;
|
|
}
|
|
// Cap at 6 levels maximum
|
|
ctx->decomp_levels = (levels > 6) ? 6 : levels;
|
|
}
|
|
|
|
// Calculate GOP size if auto (0)
|
|
if (ctx->gop_size == 0) {
|
|
int fps = ctx->fps_num / ctx->fps_den;
|
|
if (fps >= 50) {
|
|
ctx->gop_size = 8; // High frame rate: smaller GOPs
|
|
} else if (fps >= 25) {
|
|
ctx->gop_size = 16; // Medium frame rate
|
|
} else {
|
|
ctx->gop_size = 24; // Low frame rate: larger GOPs
|
|
}
|
|
}
|
|
|
|
// Auto-select temporal wavelet if still at default (255=Haar) and temporal DWT enabled
|
|
// Logic from old encoder: use Haar for large videos, CDF 5/3 for small/low-quality videos
|
|
if (ctx->enable_temporal_dwt && ctx->temporal_wavelet == 255) {
|
|
int num_pixels = ctx->width * ctx->height;
|
|
int use_pure_haar = 0;
|
|
|
|
// Smart preset based on resolution and quality
|
|
// For large videos with reasonable quality, use Haar (better compression)
|
|
// For smaller videos or low quality, use CDF 5/3 (better detail preservation)
|
|
if ((num_pixels >= 820000 && ctx->quantiser_y <= 29) ||
|
|
(num_pixels >= 500000 && ctx->quantiser_y <= 14) ||
|
|
(num_pixels >= 340000 && ctx->quantiser_y <= 7) ||
|
|
(num_pixels >= 260000 && ctx->quantiser_y <= 3)) {
|
|
use_pure_haar = 1;
|
|
}
|
|
|
|
if (use_pure_haar) {
|
|
ctx->temporal_wavelet = 255; // Keep Haar
|
|
if (ctx->verbose) {
|
|
printf("Auto-selected Haar temporal wavelet (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
|
|
ctx->width, ctx->height, num_pixels, ctx->quantiser_y);
|
|
}
|
|
} else {
|
|
ctx->temporal_wavelet = 255; // Keep Haar
|
|
ctx->encoder_preset |= 1; // Enable Sports mode
|
|
if (ctx->verbose) {
|
|
printf("Auto-selected Haar temporal wavelet with sports mode (resolution: %dx%d = %d pixels, quantiser_y = %d)\n",
|
|
ctx->width, ctx->height, num_pixels, ctx->quantiser_y);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Determine thread count
|
|
if (ctx->num_threads < 0) {
|
|
// Auto-detect: use system thread count
|
|
ctx->num_threads = 4; // Conservative default (TODO: detect actual CPU count)
|
|
} else if (ctx->num_threads == 0) {
|
|
ctx->num_threads = 0; // Single-threaded
|
|
}
|
|
|
|
// Allocate single-threaded GOP buffer if not using threading
|
|
if (ctx->num_threads == 0) {
|
|
ctx->gop_rgb_frames = calloc(ctx->gop_size, sizeof(uint8_t *));
|
|
ctx->gop_frame_pts = calloc(ctx->gop_size, sizeof(int64_t));
|
|
if (!ctx->gop_rgb_frames || !ctx->gop_frame_pts) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"Failed to allocate GOP buffers");
|
|
tav_encoder_free(ctx);
|
|
return NULL;
|
|
}
|
|
|
|
size_t frame_size = ctx->width * ctx->height * 3;
|
|
for (int i = 0; i < ctx->gop_size; i++) {
|
|
ctx->gop_rgb_frames[i] = malloc(frame_size);
|
|
if (!ctx->gop_rgb_frames[i]) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"Failed to allocate GOP frame buffer %d", i);
|
|
tav_encoder_free(ctx);
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Set TAD audio quality mapping (from quality_y)
|
|
ctx->tad_max_index = tad32_quality_to_max_index(ctx->quality_y);
|
|
|
|
// Initialize statistics
|
|
ctx->start_time = time(NULL);
|
|
|
|
// Create compatibility encoder for extracted modules
|
|
ctx->compat_enc = create_compat_encoder(ctx);
|
|
if (!ctx->compat_enc) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"Failed to create compatibility encoder");
|
|
tav_encoder_free(ctx);
|
|
return NULL;
|
|
}
|
|
|
|
// TODO: Initialize thread pool if multi-threaded
|
|
// (Thread pool implementation deferred - requires extracting worker logic)
|
|
|
|
if (ctx->verbose) {
|
|
printf("%s created:\n", ENCODER_VERSION);
|
|
printf(" Resolution: %dx%d @ %d/%d fps\n",
|
|
ctx->width, ctx->height, ctx->fps_num, ctx->fps_den);
|
|
printf(" GOP size: %d frames\n", ctx->gop_size);
|
|
printf(" Wavelet: %d (spatial), %d (temporal)\n",
|
|
ctx->wavelet_type, ctx->temporal_wavelet);
|
|
printf(" DWT levels: %d (spatial), %d (temporal)\n",
|
|
ctx->decomp_levels, ctx->temporal_levels);
|
|
printf(" Quality: Y=%d, Co=%d, Cg=%d\n",
|
|
ctx->quality_y, ctx->quality_co, ctx->quality_cg);
|
|
printf(" Threads: %d\n", ctx->num_threads);
|
|
}
|
|
|
|
return ctx;
|
|
}
|
|
|
|
// =============================================================================
|
|
// Encoder Cleanup
|
|
// =============================================================================
|
|
|
|
void tav_encoder_free(tav_encoder_context_t *ctx) {
|
|
if (!ctx) return;
|
|
|
|
// Free single-threaded GOP buffers
|
|
if (ctx->gop_rgb_frames) {
|
|
for (int i = 0; i < ctx->gop_size; i++) {
|
|
free(ctx->gop_rgb_frames[i]);
|
|
}
|
|
free(ctx->gop_rgb_frames);
|
|
}
|
|
free(ctx->gop_frame_pts);
|
|
|
|
// Free compatibility encoder
|
|
free_compat_encoder(ctx->compat_enc);
|
|
|
|
// TODO: Shutdown thread pool if exists
|
|
|
|
free(ctx);
|
|
}
|
|
|
|
// =============================================================================
|
|
// Error Handling
|
|
// =============================================================================
|
|
|
|
const char *tav_encoder_get_error(tav_encoder_context_t *ctx) {
|
|
if (!ctx) return "Invalid encoder context";
|
|
return ctx->error_message[0] ? ctx->error_message : NULL;
|
|
}
|
|
|
|
void tav_encoder_get_params(tav_encoder_context_t *ctx, tav_encoder_params_t *params) {
|
|
if (!ctx || !params) return;
|
|
|
|
params->width = ctx->width;
|
|
params->height = ctx->height;
|
|
params->fps_num = ctx->fps_num;
|
|
params->fps_den = ctx->fps_den;
|
|
params->wavelet_type = ctx->wavelet_type;
|
|
params->temporal_wavelet = ctx->temporal_wavelet;
|
|
params->decomp_levels = ctx->decomp_levels; // Calculated value
|
|
params->temporal_levels = ctx->temporal_levels; // Calculated value
|
|
params->channel_layout = ctx->channel_layout;
|
|
params->perceptual_tuning = ctx->perceptual_tuning;
|
|
params->enable_temporal_dwt = ctx->enable_temporal_dwt;
|
|
params->gop_size = ctx->gop_size; // Calculated value
|
|
params->enable_two_pass = ctx->enable_two_pass;
|
|
params->quality_y = ctx->quality_y;
|
|
params->quality_co = ctx->quality_co;
|
|
params->quality_cg = ctx->quality_cg;
|
|
params->dead_zone_threshold = ctx->dead_zone_threshold;
|
|
params->entropy_coder = ctx->entropy_coder; // Forced to 1 (EZBC)
|
|
params->zstd_level = ctx->zstd_level;
|
|
params->num_threads = ctx->num_threads;
|
|
params->encoder_preset = ctx->encoder_preset;
|
|
params->verbose = ctx->verbose;
|
|
params->monoblock = ctx->monoblock;
|
|
}
|
|
|
|
int tav_encoder_validate_context(tav_encoder_context_t *ctx) {
|
|
if (!ctx) return 0;
|
|
|
|
// Basic sanity checks
|
|
if (ctx->width < 16 || ctx->width > 8192) return 0;
|
|
if (ctx->height < 16 || ctx->height > 8192) return 0;
|
|
if (ctx->gop_size < 1 || ctx->gop_size > 48) return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
// =============================================================================
|
|
// Statistics
|
|
// =============================================================================
|
|
|
|
void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stats) {
|
|
if (!ctx || !stats) return;
|
|
|
|
memset(stats, 0, sizeof(tav_encoder_stats_t));
|
|
|
|
stats->frames_encoded = ctx->frames_encoded;
|
|
stats->gops_encoded = ctx->gops_encoded;
|
|
stats->total_bytes = ctx->total_bytes;
|
|
stats->video_bytes = ctx->video_bytes;
|
|
stats->audio_bytes = ctx->audio_bytes;
|
|
|
|
// Calculate average bitrate
|
|
time_t elapsed = time(NULL) - ctx->start_time;
|
|
if (elapsed > 0) {
|
|
double seconds = (double)ctx->frames_encoded / ((double)ctx->fps_num / ctx->fps_den);
|
|
if (seconds > 0) {
|
|
stats->avg_bitrate_kbps = (ctx->total_bytes * 8.0) / (seconds * 1000.0);
|
|
}
|
|
}
|
|
|
|
// Calculate encoding speed
|
|
if (elapsed > 0) {
|
|
stats->encoding_fps = (double)ctx->frames_encoded / elapsed;
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Frame Encoding (Single-threaded implementation for now)
|
|
// =============================================================================
|
|
|
|
int tav_encoder_encode_frame(tav_encoder_context_t *ctx,
|
|
const uint8_t *rgb_frame,
|
|
int64_t frame_pts,
|
|
tav_encoder_packet_t **packet) {
|
|
if (!ctx || !rgb_frame || !packet) {
|
|
if (ctx) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Invalid parameters");
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
*packet = NULL; // No packet until GOP is complete
|
|
|
|
// Single-threaded implementation: buffer frames until GOP full
|
|
if (ctx->num_threads == 0) {
|
|
// Copy RGB frame to GOP buffer
|
|
size_t frame_size = ctx->width * ctx->height * 3;
|
|
memcpy(ctx->gop_rgb_frames[ctx->gop_frame_count], rgb_frame, frame_size);
|
|
ctx->gop_frame_pts[ctx->gop_frame_count] = frame_pts;
|
|
ctx->gop_frame_count++;
|
|
|
|
// Check if GOP is full
|
|
if (ctx->gop_frame_count >= ctx->gop_size) {
|
|
// Create temporary GOP slot
|
|
gop_slot_t slot = {0};
|
|
slot.rgb_frames = ctx->gop_rgb_frames;
|
|
slot.num_frames = ctx->gop_frame_count;
|
|
slot.frame_numbers = tav_calloc(ctx->gop_frame_count, sizeof(int));
|
|
for (int i = 0; i < ctx->gop_frame_count; i++) {
|
|
slot.frame_numbers[i] = (int)(ctx->frames_encoded + i);
|
|
}
|
|
slot.width = ctx->width;
|
|
slot.height = ctx->height;
|
|
|
|
// Encode GOP
|
|
int result;
|
|
if (ctx->enable_temporal_dwt && ctx->gop_size > 1) {
|
|
result = encode_gop_unified(ctx, &slot);
|
|
} else {
|
|
result = encode_gop_intra_only(ctx, &slot);
|
|
}
|
|
|
|
free(slot.frame_numbers);
|
|
|
|
if (result < 0) {
|
|
// Error message already set by encoding function
|
|
return -1;
|
|
}
|
|
|
|
// Extract packets from slot
|
|
if (slot.num_packets > 0) {
|
|
*packet = &slot.packets[0];
|
|
}
|
|
|
|
// Update statistics
|
|
ctx->frames_encoded += ctx->gop_frame_count;
|
|
ctx->gops_encoded++;
|
|
ctx->video_bytes += slot.packets[0].size;
|
|
ctx->total_bytes += slot.packets[0].size;
|
|
|
|
// Reset GOP buffer
|
|
ctx->gop_frame_count = 0;
|
|
|
|
return 1; // Packet ready
|
|
}
|
|
|
|
return 0; // Buffering, no packet yet
|
|
}
|
|
|
|
// Multi-threaded implementation
|
|
// TODO: Submit frame to thread pool
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"Multi-threaded encoding not yet implemented");
|
|
return -1;
|
|
}
|
|
|
|
// =============================================================================
|
|
// Flush Encoder
|
|
// =============================================================================
|
|
|
|
int tav_encoder_flush(tav_encoder_context_t *ctx,
|
|
tav_encoder_packet_t **packet) {
|
|
if (!ctx || !packet) {
|
|
if (ctx) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Invalid parameters");
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
*packet = NULL;
|
|
|
|
// Encode any remaining frames in GOP buffer
|
|
if (ctx->num_threads == 0 && ctx->gop_frame_count > 0) {
|
|
// Create temporary GOP slot for partial GOP
|
|
gop_slot_t slot = {0};
|
|
slot.rgb_frames = ctx->gop_rgb_frames;
|
|
slot.num_frames = ctx->gop_frame_count;
|
|
slot.frame_numbers = tav_calloc(ctx->gop_frame_count, sizeof(int));
|
|
for (int i = 0; i < ctx->gop_frame_count; i++) {
|
|
slot.frame_numbers[i] = (int)(ctx->frames_encoded + i);
|
|
}
|
|
slot.width = ctx->width;
|
|
slot.height = ctx->height;
|
|
|
|
int result;
|
|
|
|
// For partial GOPs: use unified mode if temporal DWT enabled and >1 frame,
|
|
// otherwise encode as I-frames one at a time
|
|
if (ctx->enable_temporal_dwt && ctx->gop_frame_count > 1) {
|
|
result = encode_gop_unified(ctx, &slot);
|
|
} else if (ctx->gop_frame_count == 1) {
|
|
result = encode_gop_intra_only(ctx, &slot);
|
|
} else {
|
|
// Encode each frame separately as I-frame
|
|
// TODO: This is inefficient - should encode them in a batch
|
|
// For now, just encode the first frame
|
|
gop_slot_t single_slot = {0};
|
|
single_slot.rgb_frames = malloc(sizeof(uint8_t*));
|
|
single_slot.rgb_frames[0] = ctx->gop_rgb_frames[0];
|
|
single_slot.num_frames = 1;
|
|
single_slot.frame_numbers = malloc(sizeof(int));
|
|
single_slot.frame_numbers[0] = (int)ctx->frames_encoded;
|
|
single_slot.width = ctx->width;
|
|
single_slot.height = ctx->height;
|
|
|
|
result = encode_gop_intra_only(ctx, &single_slot);
|
|
|
|
if (result == 0 && single_slot.num_packets > 0) {
|
|
// Copy packet pointer
|
|
slot.packets = single_slot.packets;
|
|
slot.num_packets = single_slot.num_packets;
|
|
|
|
// Don't free single_slot.packets - we transferred ownership
|
|
}
|
|
|
|
free(single_slot.rgb_frames);
|
|
free(single_slot.frame_numbers);
|
|
|
|
// Mark only 1 frame as encoded (we'll call flush again for others)
|
|
ctx->gop_frame_count--;
|
|
// Shift remaining frames down
|
|
for (int i = 0; i < ctx->gop_frame_count; i++) {
|
|
ctx->gop_rgb_frames[i] = ctx->gop_rgb_frames[i+1];
|
|
}
|
|
}
|
|
|
|
free(slot.frame_numbers);
|
|
|
|
if (result < 0) {
|
|
// Error message already set by encoding function
|
|
return -1;
|
|
}
|
|
|
|
// Extract packets from slot
|
|
if (slot.num_packets > 0) {
|
|
*packet = slot.packets; // Transfer ownership to caller
|
|
}
|
|
|
|
// Update statistics (only for frames actually encoded)
|
|
int frames_in_packet = (ctx->enable_temporal_dwt || ctx->gop_frame_count == 1)
|
|
? slot.num_frames : 1;
|
|
ctx->frames_encoded += frames_in_packet;
|
|
ctx->gops_encoded++;
|
|
if (slot.num_packets > 0) {
|
|
ctx->video_bytes += slot.packets[0].size;
|
|
ctx->total_bytes += slot.packets[0].size;
|
|
}
|
|
|
|
// Reset GOP buffer if we encoded everything
|
|
if (!ctx->enable_temporal_dwt && ctx->gop_frame_count > 0) {
|
|
// Still have frames to encode - return 1 to continue flushing
|
|
return 1;
|
|
}
|
|
|
|
ctx->gop_frame_count = 0;
|
|
|
|
return 1; // Packet ready
|
|
}
|
|
|
|
// Multi-threaded: wait for all pending GOPs to complete
|
|
if (ctx->num_threads > 0) {
|
|
// TODO: Flush thread pool
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"Multi-threaded flush not yet implemented");
|
|
return -1;
|
|
}
|
|
|
|
return 0; // No more packets
|
|
}
|
|
|
|
void tav_encoder_free_packet(tav_encoder_packet_t *packet) {
|
|
if (!packet) return;
|
|
|
|
if (packet->data) {
|
|
free(packet->data);
|
|
}
|
|
free(packet);
|
|
}
|
|
|
|
// =============================================================================
|
|
// GOP-Level Encoding (Thread-Safe)
|
|
// =============================================================================
|
|
|
|
int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
|
|
const uint8_t **rgb_frames,
|
|
int num_frames,
|
|
const int *frame_numbers,
|
|
tav_encoder_packet_t **packet) {
|
|
if (!ctx || !rgb_frames || !packet) {
|
|
if (ctx) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Invalid parameters");
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
if (num_frames < 1 || num_frames > 24) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"Invalid GOP size: %d (must be 1-24)", num_frames);
|
|
return -1;
|
|
}
|
|
|
|
*packet = NULL;
|
|
|
|
// Create temporary GOP slot
|
|
gop_slot_t slot = {0};
|
|
|
|
// Allocate array of frame pointers (casting away const for internal use)
|
|
slot.rgb_frames = tav_malloc(num_frames * sizeof(uint8_t*));
|
|
for (int i = 0; i < num_frames; i++) {
|
|
slot.rgb_frames[i] = (uint8_t*)rgb_frames[i]; // Cast away const
|
|
}
|
|
|
|
slot.num_frames = num_frames;
|
|
slot.width = ctx->width;
|
|
slot.height = ctx->height;
|
|
|
|
// Copy or generate frame numbers
|
|
slot.frame_numbers = tav_calloc(num_frames, sizeof(int));
|
|
if (frame_numbers) {
|
|
memcpy(slot.frame_numbers, frame_numbers, num_frames * sizeof(int));
|
|
} else {
|
|
// Generate sequential frame numbers if not provided
|
|
for (int i = 0; i < num_frames; i++) {
|
|
slot.frame_numbers[i] = i;
|
|
}
|
|
}
|
|
|
|
// Encode GOP
|
|
int result;
|
|
if (ctx->enable_temporal_dwt && num_frames > 1) {
|
|
result = encode_gop_unified(ctx, &slot);
|
|
} else {
|
|
result = encode_gop_intra_only(ctx, &slot);
|
|
}
|
|
|
|
// Cleanup temporary allocations
|
|
free(slot.rgb_frames);
|
|
free(slot.frame_numbers);
|
|
|
|
if (result < 0) {
|
|
// Error message already set by encoding function
|
|
return -1;
|
|
}
|
|
|
|
// Extract packet from slot
|
|
if (slot.num_packets > 0) {
|
|
*packet = &slot.packets[0];
|
|
} else {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Encoding produced no packets");
|
|
return -1;
|
|
}
|
|
|
|
// NOTE: Statistics NOT updated here - caller manages that
|
|
// This function is stateless for multithreading
|
|
|
|
return 1; // Packet ready
|
|
}
|
|
|
|
// =============================================================================
|
|
// Audio Encoding
|
|
// =============================================================================
|
|
|
|
int tav_encoder_encode_audio(tav_encoder_context_t *ctx,
|
|
const float *pcm_samples,
|
|
size_t num_samples,
|
|
tav_encoder_packet_t **packet) {
|
|
if (!ctx || !pcm_samples || !packet) {
|
|
if (ctx) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE, "Invalid parameters");
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
*packet = NULL;
|
|
|
|
// Validate chunk size
|
|
if (num_samples < TAD32_MIN_CHUNK_SIZE) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"Audio chunk too small (%zu < %d)", num_samples, TAD32_MIN_CHUNK_SIZE);
|
|
return -1;
|
|
}
|
|
|
|
// Allocate output buffer (conservative estimate: 4 bytes per sample)
|
|
size_t output_capacity = num_samples * 4 + 1024;
|
|
uint8_t *tad_data = malloc(output_capacity);
|
|
if (!tad_data) {
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"Failed to allocate TAD output buffer");
|
|
return -1;
|
|
}
|
|
|
|
// Encode audio with TAD encoder
|
|
size_t tad_size = tad32_encode_chunk(pcm_samples, num_samples,
|
|
ctx->tad_max_index, 1.0f, tad_data);
|
|
if (tad_size == 0) {
|
|
free(tad_data);
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"TAD audio encoding failed");
|
|
return -1;
|
|
}
|
|
|
|
// Create packet
|
|
tav_encoder_packet_t *pkt = calloc(1, sizeof(tav_encoder_packet_t));
|
|
if (!pkt) {
|
|
free(tad_data);
|
|
snprintf(ctx->error_message, MAX_ERROR_MESSAGE,
|
|
"Failed to allocate packet");
|
|
return -1;
|
|
}
|
|
|
|
pkt->data = tad_data;
|
|
pkt->size = tad_size;
|
|
pkt->packet_type = TAV_PACKET_AUDIO_TAD;
|
|
pkt->frame_number = -1; // Audio doesn't have frame number
|
|
pkt->is_video = 0;
|
|
|
|
*packet = pkt;
|
|
|
|
ctx->audio_bytes += tad_size;
|
|
ctx->total_bytes += tad_size;
|
|
|
|
return 1; // Packet ready
|
|
}
|
|
|
|
// =============================================================================
|
|
// Compatibility Encoder Helpers
|
|
// =============================================================================
|
|
|
|
/**
|
|
* Create compatibility encoder structure for extracted modules.
|
|
* Calculates subband widths/heights arrays needed by quantization module.
|
|
*/
|
|
static tav_encoder_t *create_compat_encoder(tav_encoder_context_t *ctx) {
|
|
tav_encoder_t *enc = calloc(1, sizeof(tav_encoder_t));
|
|
if (!enc) return NULL;
|
|
|
|
// Copy basic fields
|
|
enc->quality_level = ctx->quality_level;
|
|
enc->dead_zone_threshold = ctx->dead_zone_threshold;
|
|
enc->encoder_preset = ctx->encoder_preset;
|
|
enc->temporal_decomp_levels = ctx->temporal_levels;
|
|
enc->verbose = ctx->verbose;
|
|
enc->perceptual_tuning = ctx->perceptual_tuning;
|
|
|
|
// Copy frame dimensions (needed by quantisation functions)
|
|
enc->width = ctx->width;
|
|
enc->height = ctx->height;
|
|
enc->decomp_levels = ctx->decomp_levels;
|
|
enc->frame_count = 0; // Will be updated during encoding
|
|
|
|
// Calculate subband widths and heights arrays
|
|
// These are needed by the perceptual quantization module
|
|
int max_levels = ctx->decomp_levels + 1;
|
|
enc->widths = calloc(max_levels, sizeof(int));
|
|
enc->heights = calloc(max_levels, sizeof(int));
|
|
|
|
if (!enc->widths || !enc->heights) {
|
|
free(enc->widths);
|
|
free(enc->heights);
|
|
free(enc);
|
|
return NULL;
|
|
}
|
|
|
|
// Level 0 is full resolution
|
|
int w = ctx->width;
|
|
int h = ctx->height;
|
|
|
|
for (int level = 0; level < max_levels; level++) {
|
|
enc->widths[level] = w;
|
|
enc->heights[level] = h;
|
|
w = (w + 1) / 2; // Next level is half resolution (rounded up)
|
|
h = (h + 1) / 2;
|
|
}
|
|
|
|
return enc;
|
|
}
|
|
|
|
/**
|
|
* Free compatibility encoder structure.
|
|
*/
|
|
static void free_compat_encoder(tav_encoder_t *enc) {
|
|
if (!enc) return;
|
|
free(enc->widths);
|
|
free(enc->heights);
|
|
free(enc);
|
|
}
|
|
|
|
// =============================================================================
|
|
// GOP Encoding Implementation
|
|
// =============================================================================
|
|
|
|
/**
|
|
* Convert RGB frame to color space (YCoCg-R or ICtCp).
|
|
* Helper function for GOP encoding.
|
|
*/
|
|
static void rgb_to_colour_space_frame(tav_encoder_context_t *ctx, const uint8_t *rgb,
|
|
float *c1, float *c2, float *c3,
|
|
int width, int height) {
|
|
int num_pixels = width * height;
|
|
|
|
if (ctx->channel_layout == 1) { // ICtCp mode
|
|
// Use color module function for ICtCp conversion
|
|
for (int i = 0; i < num_pixels; i++) {
|
|
double I, Ct, Cp;
|
|
tav_srgb8_to_ictcp_hlg(rgb[i*3], rgb[i*3+1], rgb[i*3+2], &I, &Ct, &Cp);
|
|
c1[i] = (float)I;
|
|
c2[i] = (float)Ct;
|
|
c3[i] = (float)Cp;
|
|
}
|
|
} else { // YCoCg-R mode (default)
|
|
tav_rgb_to_ycocg(rgb, c1, c2, c3, width, height);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Preprocess coefficients using EZBC encoding (single frame).
|
|
* Based on encoder_tav.c:preprocess_coefficients_ezbc().
|
|
* NOTE: EZBC encoder allocates its own output buffer, which we copy to output_buffer.
|
|
*/
|
|
static size_t preprocess_coefficients_ezbc(int16_t *coeffs_y, int16_t *coeffs_co, int16_t *coeffs_cg, int16_t *coeffs_alpha,
|
|
int coeff_count, int width, int height, int channel_layout,
|
|
uint8_t *output_buffer) {
|
|
const channel_layout_config_t *config = &channel_layouts[channel_layout];
|
|
size_t total_size = 0;
|
|
uint8_t *write_ptr = output_buffer;
|
|
|
|
// Encode each active channel separately with EZBC
|
|
int16_t *channel_coeffs[4] = {coeffs_y, coeffs_co, coeffs_cg, coeffs_alpha};
|
|
int channel_active[4] = {config->has_y, config->has_co, config->has_cg, config->has_alpha};
|
|
|
|
for (int ch = 0; ch < 4; ch++) {
|
|
if (!channel_active[ch] || !channel_coeffs[ch]) continue;
|
|
|
|
// EZBC encoder allocates output buffer
|
|
uint8_t *ezbc_output = NULL;
|
|
size_t encoded_size = tav_encode_channel_ezbc(
|
|
channel_coeffs[ch], coeff_count, width, height,
|
|
&ezbc_output // Double pointer - EZBC allocates memory
|
|
);
|
|
|
|
if (encoded_size == 0 || !ezbc_output) {
|
|
continue; // Skip channel if encoding failed
|
|
}
|
|
|
|
// Write channel size header (4 bytes)
|
|
*((uint32_t*)write_ptr) = (uint32_t)encoded_size;
|
|
write_ptr += sizeof(uint32_t);
|
|
|
|
// Copy EZBC output to our buffer
|
|
memcpy(write_ptr, ezbc_output, encoded_size);
|
|
write_ptr += encoded_size;
|
|
total_size += sizeof(uint32_t) + encoded_size;
|
|
|
|
// Free EZBC-allocated buffer
|
|
free(ezbc_output);
|
|
}
|
|
|
|
return total_size;
|
|
}
|
|
|
|
/**
|
|
* Unified GOP preprocessing function.
|
|
* Handles twobitmap, EZBC, and raw coefficient modes.
|
|
* Based on encoder_tav.c:preprocess_gop_unified().
|
|
*/
|
|
static size_t preprocess_gop_unified(preprocess_mode_t preprocess_mode, int16_t **quant_y, int16_t **quant_co, int16_t **quant_cg,
|
|
int num_frames, int num_pixels, int width, int height, int channel_layout,
|
|
uint8_t *output_buffer) {
|
|
const channel_layout_config_t *config = &channel_layouts[channel_layout];
|
|
|
|
// Raw mode: just concatenate all coefficients
|
|
if (preprocess_mode == PREPROCESS_RAW) {
|
|
size_t offset = 0;
|
|
|
|
// Copy all Y frames
|
|
if (config->has_y && quant_y) {
|
|
for (int frame = 0; frame < num_frames; frame++) {
|
|
if (quant_y[frame]) {
|
|
memcpy(output_buffer + offset, quant_y[frame], num_pixels * sizeof(int16_t));
|
|
offset += num_pixels * sizeof(int16_t);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Copy all Co frames
|
|
if (config->has_co && quant_co) {
|
|
for (int frame = 0; frame < num_frames; frame++) {
|
|
if (quant_co[frame]) {
|
|
memcpy(output_buffer + offset, quant_co[frame], num_pixels * sizeof(int16_t));
|
|
offset += num_pixels * sizeof(int16_t);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Copy all Cg frames
|
|
if (config->has_cg && quant_cg) {
|
|
for (int frame = 0; frame < num_frames; frame++) {
|
|
if (quant_cg[frame]) {
|
|
memcpy(output_buffer + offset, quant_cg[frame], num_pixels * sizeof(int16_t));
|
|
offset += num_pixels * sizeof(int16_t);
|
|
}
|
|
}
|
|
}
|
|
|
|
return offset;
|
|
}
|
|
|
|
// EZBC mode: encode each frame separately with EZBC
|
|
if (preprocess_mode == PREPROCESS_EZBC) {
|
|
size_t total_size = 0;
|
|
uint8_t *write_ptr = output_buffer;
|
|
|
|
for (int frame = 0; frame < num_frames; frame++) {
|
|
// Encode this frame with EZBC
|
|
size_t frame_size = preprocess_coefficients_ezbc(
|
|
quant_y ? quant_y[frame] : NULL,
|
|
quant_co ? quant_co[frame] : NULL,
|
|
quant_cg ? quant_cg[frame] : NULL,
|
|
NULL, // No alpha in GOP mode
|
|
num_pixels, width, height, channel_layout,
|
|
write_ptr + sizeof(uint32_t) // Leave space for size header
|
|
);
|
|
|
|
// Write frame size header
|
|
*((uint32_t*)write_ptr) = (uint32_t)frame_size;
|
|
write_ptr += sizeof(uint32_t) + frame_size;
|
|
total_size += sizeof(uint32_t) + frame_size;
|
|
}
|
|
|
|
return total_size;
|
|
}
|
|
|
|
// Twobit-map mode: original unified GOP preprocessing
|
|
const int map_bytes_per_frame = (num_pixels * 2 + 7) / 8; // 2 bits per coefficient
|
|
|
|
// Count "other" values (not 0, +1, or -1) for each channel across ALL frames
|
|
int other_count_y = 0, other_count_co = 0, other_count_cg = 0;
|
|
|
|
for (int frame = 0; frame < num_frames; frame++) {
|
|
if (config->has_y && quant_y && quant_y[frame]) {
|
|
for (int i = 0; i < num_pixels; i++) {
|
|
int16_t val = quant_y[frame][i];
|
|
if (val != 0 && val != 1 && val != -1) other_count_y++;
|
|
}
|
|
}
|
|
if (config->has_co && quant_co && quant_co[frame]) {
|
|
for (int i = 0; i < num_pixels; i++) {
|
|
int16_t val = quant_co[frame][i];
|
|
if (val != 0 && val != 1 && val != -1) other_count_co++;
|
|
}
|
|
}
|
|
if (config->has_cg && quant_cg && quant_cg[frame]) {
|
|
for (int i = 0; i < num_pixels; i++) {
|
|
int16_t val = quant_cg[frame][i];
|
|
if (val != 0 && val != 1 && val != -1) other_count_cg++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate buffer layout
|
|
uint8_t *write_ptr = output_buffer;
|
|
|
|
// Significance maps: grouped by channel (all Y frames, then all Co frames, then all Cg frames)
|
|
uint8_t *y_maps_start = write_ptr;
|
|
if (config->has_y) write_ptr += map_bytes_per_frame * num_frames;
|
|
|
|
uint8_t *co_maps_start = write_ptr;
|
|
if (config->has_co) write_ptr += map_bytes_per_frame * num_frames;
|
|
|
|
uint8_t *cg_maps_start = write_ptr;
|
|
if (config->has_cg) write_ptr += map_bytes_per_frame * num_frames;
|
|
|
|
// Value arrays: grouped by channel
|
|
int16_t *y_values = (int16_t *)write_ptr;
|
|
if (config->has_y) write_ptr += other_count_y * sizeof(int16_t);
|
|
|
|
int16_t *co_values = (int16_t *)write_ptr;
|
|
if (config->has_co) write_ptr += other_count_co * sizeof(int16_t);
|
|
|
|
int16_t *cg_values = (int16_t *)write_ptr;
|
|
if (config->has_cg) write_ptr += other_count_cg * sizeof(int16_t);
|
|
|
|
// Clear all map bytes
|
|
size_t total_map_bytes = 0;
|
|
if (config->has_y) total_map_bytes += map_bytes_per_frame * num_frames;
|
|
if (config->has_co) total_map_bytes += map_bytes_per_frame * num_frames;
|
|
if (config->has_cg) total_map_bytes += map_bytes_per_frame * num_frames;
|
|
memset(output_buffer, 0, total_map_bytes);
|
|
|
|
// Process each frame and fill maps/values
|
|
int y_value_idx = 0, co_value_idx = 0, cg_value_idx = 0;
|
|
|
|
for (int frame = 0; frame < num_frames; frame++) {
|
|
uint8_t *y_map = y_maps_start + frame * map_bytes_per_frame;
|
|
uint8_t *co_map = co_maps_start + frame * map_bytes_per_frame;
|
|
uint8_t *cg_map = cg_maps_start + frame * map_bytes_per_frame;
|
|
|
|
for (int i = 0; i < num_pixels; i++) {
|
|
size_t bit_pos = i * 2;
|
|
size_t byte_idx = bit_pos / 8;
|
|
size_t bit_offset = bit_pos % 8;
|
|
|
|
// Process Y channel
|
|
if (config->has_y && quant_y && quant_y[frame]) {
|
|
int16_t val = quant_y[frame][i];
|
|
uint8_t code;
|
|
|
|
if (val == 0) code = 0; // 00
|
|
else if (val == 1) code = 1; // 01
|
|
else if (val == -1) code = 2; // 10
|
|
else {
|
|
code = 3; // 11
|
|
y_values[y_value_idx++] = val;
|
|
}
|
|
|
|
y_map[byte_idx] |= (code << bit_offset);
|
|
if (bit_offset == 7 && byte_idx + 1 < (size_t)map_bytes_per_frame) {
|
|
y_map[byte_idx + 1] |= (code >> 1);
|
|
}
|
|
}
|
|
|
|
// Process Co channel
|
|
if (config->has_co && quant_co && quant_co[frame]) {
|
|
int16_t val = quant_co[frame][i];
|
|
uint8_t code;
|
|
|
|
if (val == 0) code = 0;
|
|
else if (val == 1) code = 1;
|
|
else if (val == -1) code = 2;
|
|
else {
|
|
code = 3;
|
|
co_values[co_value_idx++] = val;
|
|
}
|
|
|
|
co_map[byte_idx] |= (code << bit_offset);
|
|
if (bit_offset == 7 && byte_idx + 1 < (size_t)map_bytes_per_frame) {
|
|
co_map[byte_idx + 1] |= (code >> 1);
|
|
}
|
|
}
|
|
|
|
// Process Cg channel
|
|
if (config->has_cg && quant_cg && quant_cg[frame]) {
|
|
int16_t val = quant_cg[frame][i];
|
|
uint8_t code;
|
|
|
|
if (val == 0) code = 0;
|
|
else if (val == 1) code = 1;
|
|
else if (val == -1) code = 2;
|
|
else {
|
|
code = 3;
|
|
cg_values[cg_value_idx++] = val;
|
|
}
|
|
|
|
cg_map[byte_idx] |= (code << bit_offset);
|
|
if (bit_offset == 7 && byte_idx + 1 < (size_t)map_bytes_per_frame) {
|
|
cg_map[byte_idx + 1] |= (code >> 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Return total size
|
|
return (size_t)(write_ptr - output_buffer);
|
|
}
|
|
|
|
/**
|
|
* Encode single-frame I-frame (intra-only mode).
|
|
* Uses 2D DWT on individual frame.
|
|
*/
|
|
static int encode_gop_intra_only(tav_encoder_context_t *ctx, gop_slot_t *slot) {
|
|
const int width = slot->width;
|
|
const int height = slot->height;
|
|
const int num_pixels = width * height;
|
|
const int num_frames = slot->num_frames;
|
|
|
|
if (num_frames != 1) {
|
|
snprintf(slot->error_message, MAX_ERROR_MESSAGE,
|
|
"encode_gop_intra_only called with %d frames (expected 1)", num_frames);
|
|
return -1;
|
|
}
|
|
|
|
// Allocate work buffers for single frame
|
|
float *work_y = tav_calloc(num_pixels, sizeof(float));
|
|
float *work_co = tav_calloc(num_pixels, sizeof(float));
|
|
float *work_cg = tav_calloc(num_pixels, sizeof(float));
|
|
int16_t *quant_y = tav_calloc(num_pixels, sizeof(int16_t));
|
|
int16_t *quant_co = tav_calloc(num_pixels, sizeof(int16_t));
|
|
int16_t *quant_cg = tav_calloc(num_pixels, sizeof(int16_t));
|
|
|
|
// Step 1: RGB to YCoCg-R (or ICtCp)
|
|
rgb_to_colour_space_frame(ctx, slot->rgb_frames[0], work_y, work_co, work_cg, width, height);
|
|
|
|
// Step 2: Apply 2D DWT
|
|
tav_dwt_2d_forward(work_y, width, height, ctx->decomp_levels, ctx->wavelet_type);
|
|
tav_dwt_2d_forward(work_co, width, height, ctx->decomp_levels, ctx->wavelet_type);
|
|
tav_dwt_2d_forward(work_cg, width, height, ctx->decomp_levels, ctx->wavelet_type);
|
|
|
|
// Step 3: Quantize coefficients
|
|
// ctx->quantiser_y/co/cg contain QLUT indices, lookup actual quantiser values
|
|
int base_quantiser_y = QLUT[ctx->quantiser_y];
|
|
int base_quantiser_co = QLUT[ctx->quantiser_co];
|
|
int base_quantiser_cg = QLUT[ctx->quantiser_cg];
|
|
|
|
if (ctx->perceptual_tuning) {
|
|
tav_quantise_perceptual(ctx->compat_enc, work_y, quant_y, num_pixels,
|
|
base_quantiser_y, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 0, 0);
|
|
tav_quantise_perceptual(ctx->compat_enc, work_co, quant_co, num_pixels,
|
|
base_quantiser_co, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0);
|
|
tav_quantise_perceptual(ctx->compat_enc, work_cg, quant_cg, num_pixels,
|
|
base_quantiser_cg, (float)ctx->dead_zone_threshold, width, height, ctx->decomp_levels, 1, 0);
|
|
} else {
|
|
tav_quantise_uniform(work_y, quant_y, num_pixels, base_quantiser_y,
|
|
(float)ctx->dead_zone_threshold, width, height,
|
|
ctx->decomp_levels, 0);
|
|
tav_quantise_uniform(work_co, quant_co, num_pixels, base_quantiser_co,
|
|
(float)ctx->dead_zone_threshold, width, height,
|
|
ctx->decomp_levels, 1);
|
|
tav_quantise_uniform(work_cg, quant_cg, num_pixels, base_quantiser_cg,
|
|
(float)ctx->dead_zone_threshold, width, height,
|
|
ctx->decomp_levels, 1);
|
|
}
|
|
|
|
// Step 4: Preprocess coefficients
|
|
size_t preprocess_capacity = num_pixels * 3 * sizeof(int16_t) + 65536; // Conservative
|
|
uint8_t *preprocess_buffer = tav_malloc(preprocess_capacity);
|
|
|
|
// Use EZBC preprocessing (Twobitmap is deprecated)
|
|
size_t preprocessed_size = preprocess_coefficients_ezbc(
|
|
quant_y, quant_co, quant_cg, NULL,
|
|
num_pixels, width, height, ctx->channel_layout,
|
|
preprocess_buffer
|
|
);
|
|
|
|
// Step 5: Zstd compress
|
|
size_t compressed_bound = ZSTD_compressBound(preprocessed_size);
|
|
uint8_t *compression_buffer = tav_malloc(compressed_bound);
|
|
|
|
size_t compressed_size = ZSTD_compress(
|
|
compression_buffer, compressed_bound,
|
|
preprocess_buffer, preprocessed_size,
|
|
ctx->zstd_level
|
|
);
|
|
|
|
if (ZSTD_isError(compressed_size)) {
|
|
free(work_y); free(work_co); free(work_cg);
|
|
free(quant_y); free(quant_co); free(quant_cg);
|
|
free(preprocess_buffer);
|
|
free(compression_buffer);
|
|
snprintf(slot->error_message, MAX_ERROR_MESSAGE,
|
|
"Zstd compression failed: %s", ZSTD_getErrorName(compressed_size));
|
|
return -1;
|
|
}
|
|
|
|
// Step 6: Format I-frame packet
|
|
// Packet format: [type(1)][size(4)][data(N)]
|
|
size_t packet_size = 1 + 4 + compressed_size;
|
|
tav_encoder_packet_t *pkt = calloc(1, sizeof(tav_encoder_packet_t));
|
|
pkt->data = malloc(packet_size);
|
|
pkt->size = packet_size;
|
|
pkt->packet_type = TAV_PACKET_IFRAME;
|
|
pkt->frame_number = slot->frame_numbers[0];
|
|
pkt->is_video = 1;
|
|
|
|
uint8_t *write_ptr = pkt->data;
|
|
*write_ptr++ = TAV_PACKET_IFRAME;
|
|
uint32_t size_field = (uint32_t)compressed_size;
|
|
memcpy(write_ptr, &size_field, 4);
|
|
write_ptr += 4;
|
|
memcpy(write_ptr, compression_buffer, compressed_size);
|
|
|
|
// Store packet in slot
|
|
slot->packets = pkt;
|
|
slot->num_packets = 1;
|
|
|
|
// Cleanup
|
|
free(work_y); free(work_co); free(work_cg);
|
|
free(quant_y); free(quant_co); free(quant_cg);
|
|
free(preprocess_buffer);
|
|
free(compression_buffer);
|
|
|
|
return 0; // Success
|
|
}
|
|
|
|
/**
|
|
* Encode multi-frame GOP using 3D DWT (unified mode).
|
|
* Uses temporal + spatial DWT for optimal compression.
|
|
*/
|
|
static int encode_gop_unified(tav_encoder_context_t *ctx, gop_slot_t *slot) {
|
|
const int width = slot->width;
|
|
const int height = slot->height;
|
|
const int num_pixels = width * height;
|
|
const int num_frames = slot->num_frames;
|
|
|
|
// Allocate work buffers for all frames
|
|
float **work_y = tav_calloc(num_frames, sizeof(float*));
|
|
float **work_co = tav_calloc(num_frames, sizeof(float*));
|
|
float **work_cg = tav_calloc(num_frames, sizeof(float*));
|
|
int16_t **quant_y = tav_calloc(num_frames, sizeof(int16_t*));
|
|
int16_t **quant_co = tav_calloc(num_frames, sizeof(int16_t*));
|
|
int16_t **quant_cg = tav_calloc(num_frames, sizeof(int16_t*));
|
|
|
|
for (int i = 0; i < num_frames; i++) {
|
|
work_y[i] = tav_calloc(num_pixels, sizeof(float));
|
|
work_co[i] = tav_calloc(num_pixels, sizeof(float));
|
|
work_cg[i] = tav_calloc(num_pixels, sizeof(float));
|
|
quant_y[i] = tav_calloc(num_pixels, sizeof(int16_t));
|
|
quant_co[i] = tav_calloc(num_pixels, sizeof(int16_t));
|
|
quant_cg[i] = tav_calloc(num_pixels, sizeof(int16_t));
|
|
}
|
|
|
|
// Step 1: RGB to YCoCg-R for all frames
|
|
for (int frame = 0; frame < num_frames; frame++) {
|
|
rgb_to_colour_space_frame(ctx, slot->rgb_frames[frame],
|
|
work_y[frame], work_co[frame], work_cg[frame],
|
|
width, height);
|
|
}
|
|
|
|
// Step 2: Apply 3D DWT (temporal + spatial)
|
|
tav_dwt_3d_forward(work_y, width, height, num_frames,
|
|
ctx->decomp_levels, ctx->temporal_levels,
|
|
ctx->wavelet_type, ctx->temporal_wavelet);
|
|
tav_dwt_3d_forward(work_co, width, height, num_frames,
|
|
ctx->decomp_levels, ctx->temporal_levels,
|
|
ctx->wavelet_type, ctx->temporal_wavelet);
|
|
tav_dwt_3d_forward(work_cg, width, height, num_frames,
|
|
ctx->decomp_levels, ctx->temporal_levels,
|
|
ctx->wavelet_type, ctx->temporal_wavelet);
|
|
|
|
// Step 3: Quantize 3D coefficients
|
|
// ctx->quantiser_y/co/cg contain QLUT indices, lookup actual quantiser values
|
|
int base_quantiser_y = QLUT[ctx->quantiser_y];
|
|
int base_quantiser_co = QLUT[ctx->quantiser_co];
|
|
int base_quantiser_cg = QLUT[ctx->quantiser_cg];
|
|
|
|
// CRITICAL: Force perceptual quantization for GOPs to match old encoder behavior
|
|
// The old encoder's quantise_dwt_coefficients_perceptual_per_coeff() does NOT check
|
|
// perceptual_tuning flag - it always applies perceptual weights for GOP encoding.
|
|
// The --no-perceptual-tuning flag only affects I-frame encoding in the old encoder.
|
|
int saved_perceptual = ctx->compat_enc->perceptual_tuning;
|
|
ctx->compat_enc->perceptual_tuning = 1; // Force perceptual for GOP encoding
|
|
|
|
if (ctx->verbose) {
|
|
fprintf(stderr, "[DEBUG] GOP quantization: decomp_levels=%d, base_q_y=%d, perceptual=%d (forced on for GOP), preset=0x%02x\n",
|
|
ctx->compat_enc->decomp_levels, base_quantiser_y, ctx->compat_enc->perceptual_tuning, ctx->compat_enc->encoder_preset);
|
|
}
|
|
|
|
tav_quantise_3d_dwt(ctx->compat_enc, work_y, quant_y, num_frames, num_pixels,
|
|
base_quantiser_y, 0);
|
|
tav_quantise_3d_dwt(ctx->compat_enc, work_co, quant_co, num_frames, num_pixels,
|
|
base_quantiser_co, 1);
|
|
tav_quantise_3d_dwt(ctx->compat_enc, work_cg, quant_cg, num_frames, num_pixels,
|
|
base_quantiser_cg, 1);
|
|
|
|
ctx->compat_enc->perceptual_tuning = saved_perceptual; // Restore for I-frames
|
|
|
|
// Step 4: Unified GOP preprocessing (EZBC only)
|
|
size_t preprocess_capacity = num_pixels * num_frames * 3 * sizeof(int16_t) + 65536;
|
|
uint8_t *preprocess_buffer = tav_malloc(preprocess_capacity);
|
|
|
|
size_t preprocessed_size = preprocess_gop_unified(
|
|
PREPROCESS_EZBC, quant_y, quant_co, quant_cg,
|
|
num_frames, num_pixels, width, height, ctx->channel_layout,
|
|
preprocess_buffer
|
|
);
|
|
|
|
// Step 5: Zstd compress
|
|
size_t compressed_bound = ZSTD_compressBound(preprocessed_size);
|
|
uint8_t *compression_buffer = tav_malloc(compressed_bound);
|
|
|
|
size_t compressed_size = ZSTD_compress(
|
|
compression_buffer, compressed_bound,
|
|
preprocess_buffer, preprocessed_size,
|
|
ctx->zstd_level
|
|
);
|
|
|
|
if (ZSTD_isError(compressed_size)) {
|
|
// Cleanup and return error
|
|
for (int i = 0; i < num_frames; i++) {
|
|
free(work_y[i]); free(work_co[i]); free(work_cg[i]);
|
|
free(quant_y[i]); free(quant_co[i]); free(quant_cg[i]);
|
|
}
|
|
free(work_y); free(work_co); free(work_cg);
|
|
free(quant_y); free(quant_co); free(quant_cg);
|
|
free(preprocess_buffer);
|
|
free(compression_buffer);
|
|
snprintf(slot->error_message, MAX_ERROR_MESSAGE,
|
|
"Zstd compression failed: %s", ZSTD_getErrorName(compressed_size));
|
|
return -1;
|
|
}
|
|
|
|
// Step 6: Format GOP unified packet
|
|
// Packet format: [type(1)][gop_size(1)][size(4)][data(N)]
|
|
size_t packet_size = 1 + 1 + 4 + compressed_size;
|
|
tav_encoder_packet_t *pkt = calloc(1, sizeof(tav_encoder_packet_t));
|
|
pkt->data = malloc(packet_size);
|
|
pkt->size = packet_size;
|
|
pkt->packet_type = TAV_PACKET_GOP_UNIFIED;
|
|
pkt->frame_number = slot->frame_numbers[0]; // First frame in GOP
|
|
pkt->is_video = 1;
|
|
|
|
uint8_t *write_ptr = pkt->data;
|
|
*write_ptr++ = TAV_PACKET_GOP_UNIFIED;
|
|
*write_ptr++ = (uint8_t)num_frames;
|
|
uint32_t size_field = (uint32_t)compressed_size;
|
|
memcpy(write_ptr, &size_field, 4);
|
|
write_ptr += 4;
|
|
memcpy(write_ptr, compression_buffer, compressed_size);
|
|
|
|
// Store packet in slot
|
|
slot->packets = pkt;
|
|
slot->num_packets = 1;
|
|
|
|
// Cleanup
|
|
for (int i = 0; i < num_frames; i++) {
|
|
free(work_y[i]); free(work_co[i]); free(work_cg[i]);
|
|
free(quant_y[i]); free(quant_co[i]); free(quant_cg[i]);
|
|
}
|
|
free(work_y); free(work_co); free(work_cg);
|
|
free(quant_y); free(quant_co); free(quant_cg);
|
|
free(preprocess_buffer);
|
|
free(compression_buffer);
|
|
|
|
return 0; // Success
|
|
}
|