Files
tsvm/video_encoder/include/tav_encoder_lib.h

295 lines
11 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* TAV Encoder Library - Public API
*
* High-level interface for encoding video using the TSVM Advanced Video (TAV) codec.
* Supports GOP-based encoding with internal multi-threading for optimal performance.
*
* Created by CuriousTorvald and Claude on 2025-12-03.
*/
#ifndef TAV_ENCODER_LIB_H
#define TAV_ENCODER_LIB_H
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// Opaque Encoder Context
// =============================================================================
/**
* TAV encoder context - opaque to users.
* Created with tav_encoder_create(), freed with tav_encoder_free().
*/
typedef struct tav_encoder_context tav_encoder_context_t;
// =============================================================================
// Configuration Structures
// =============================================================================
/**
* Video encoding parameters.
*/
typedef struct {
// === Video Dimensions ===
int width; // Frame width (must be even)
int height; // Frame height (must be even)
int fps_num; // Framerate numerator (e.g., 60 for 60fps)
int fps_den; // Framerate denominator (e.g., 1 for 60/1)
// === Wavelet Configuration ===
int wavelet_type; // Spatial wavelet: 0=CDF 5/3, 1=CDF 9/7 (default), 2=CDF 13/7, 16=DD-4, 255=Haar
int temporal_wavelet; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (default for smooth motion)
int decomp_levels; // Spatial DWT levels (0=auto, typically 6)
int temporal_levels; // Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
// === Color Space ===
int channel_layout; // 0=YCoCg-R (default), 1=ICtCp (for HDR/BT.2100 sources)
int perceptual_tuning; // 1=enable HVS perceptual quantization (default), 0=uniform
// === GOP Configuration ===
int enable_temporal_dwt; // 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
int gop_size; // Frames per GOP (8, 16, or 24; 0=auto based on framerate)
int enable_two_pass; // 1=enable two-pass with scene change detection (default), 0=single-pass
// === Quality Control ===
int quality_level;
int quantiser_y; // Luma quantiser (0-255, indexed against QLUT)
int quantiser_co; // Orange chrominance quantiser (0-255, indexed against QLUT)
int quantiser_cg; // Green chrominance quantiser (0-255, indexed against QLUT)
float dead_zone_threshold; // Dead-zone quantization threshold (0.0=disabled, 0.6-1.5 typical)
// === Entropy Coding ===
int entropy_coder; // 0=Twobitmap (default), 1=EZBC (better for high-quality)
int zstd_level; // Zstd compression level (3-22, default: 7)
// === Multi-threading ===
int num_threads; // Worker threads (0=single-threaded, -1=auto, 1-16=explicit)
// === Encoder Presets ===
int encoder_preset; // Preset flags: 0x01=sports (finer temporal quant), 0x02=anime (disable grain)
// === Advanced Options ===
int verbose; // 1=enable debug output, 0=quiet (default)
int monoblock; // -1=auto (based on dimensions), 0=force tiled, 1=force monoblock
} tav_encoder_params_t;
/**
* Initialize encoder parameters with default values.
*
* @param params Parameter structure to initialize
* @param width Frame width
* @param height Frame height
*/
void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height);
/**
* Encoder output packet.
* Contains encoded video or audio data.
*/
typedef struct {
uint8_t *data; // Packet data (owned by encoder, valid until next encode/flush)
size_t size; // Packet size in bytes
uint8_t packet_type; // TAV packet type (0x10=I-frame, 0x12=GOP, 0x24=audio, etc.)
int frame_number; // Frame number (for video packets)
int is_video; // 1=video packet, 0=audio packet
} tav_encoder_packet_t;
// =============================================================================
// Encoder Lifecycle
// =============================================================================
/**
* Create TAV encoder context.
*
* Allocates internal buffers, initializes thread pool (if multi-threading enabled),
* and prepares encoder for frame submission.
*
* @param params Encoder parameters (copied internally)
* @return Encoder context, or NULL on failure
*/
tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params);
/**
* Free TAV encoder context.
*
* Shuts down thread pool, frees all buffers and resources.
* Any unflushed frames in the GOP buffer will be lost.
*
* @param ctx Encoder context
*/
void tav_encoder_free(tav_encoder_context_t *ctx);
/**
* Get last error message.
*
* @param ctx Encoder context
* @return Error message string (valid until next encode operation)
*/
const char *tav_encoder_get_error(tav_encoder_context_t *ctx);
/**
* Get encoder parameters (with calculated values).
* After context creation, params will contain actual values used
* (e.g., auto-calculated decomp_levels, gop_size).
*
* @param ctx Encoder context
* @param params Output parameters structure
*/
void tav_encoder_get_params(tav_encoder_context_t *ctx, tav_encoder_params_t *params);
/**
* DEBUG: Validate encoder context integrity
* Returns 1 if context appears valid, 0 otherwise
*/
int tav_encoder_validate_context(tav_encoder_context_t *ctx);
// =============================================================================
// Video Encoding
// =============================================================================
/*
* DEPRECATED: tav_encoder_encode_frame() and tav_encoder_flush() have been
* removed. Use tav_encoder_encode_gop() instead, which works for both
* single-threaded and multi-threaded modes. The CLI should buffer frames
* and call encode_gop() when a full GOP is ready.
*/
/**
* Encode a complete GOP (Group of Pictures) directly.
*
* This function is STATELESS and THREAD-SAFE with separate contexts.
* Perfect for multithreaded encoding from CLI:
* - Each thread creates its own encoder context
* - Each thread calls encode_gop() with a batch of frames
* - No shared state, no locking needed
*
* Example multithreaded usage:
* ```c
* // Worker thread function
* void* worker(void* arg) {
* work_item_t* item = (work_item_t*)arg;
*
* // Create thread-local encoder context
* tav_encoder_context_t* ctx = tav_encoder_create(&shared_params);
*
* // Encode this GOP
* tav_encoder_packet_t* packet;
* tav_encoder_encode_gop(ctx, item->frames, item->num_frames,
* item->frame_numbers, &packet);
*
* // Store packet in output queue
* queue_push(output_queue, packet);
*
* tav_encoder_free(ctx);
* return NULL;
* }
* ```
*
* @param ctx Encoder context (one per thread)
* @param rgb_frames Array of RGB24 frames [frame][width*height*3]
* @param num_frames Number of frames in GOP (1-24)
* @param frame_numbers Frame indices for timecodes (can be NULL)
* @param packet Output packet pointer
* @return 1 if packet ready, -1 on error
*/
int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
const uint8_t **rgb_frames,
int num_frames,
const int *frame_numbers,
tav_encoder_packet_t **packet);
/**
* Free a packet returned by encode_frame(), flush(), or encode_gop().
*
* @param packet Packet to free (can be NULL)
*/
void tav_encoder_free_packet(tav_encoder_packet_t *packet);
// =============================================================================
// Audio Encoding (Optional)
// =============================================================================
/**
* Encode audio samples (TAD codec).
*
* Audio is encoded synchronously and returned immediately.
* For TAV muxing: interleave audio packets with video packets by frame PTS.
*
* @param ctx Encoder context
* @param pcm_samples PCM32f stereo samples (interleaved: L,R,L,R,...), num_samples×2 floats
* @param num_samples Number of samples per channel
* @param packet Output packet pointer
* @return 1 if packet ready, -1 on error
*/
int tav_encoder_encode_audio(tav_encoder_context_t *ctx,
const float *pcm_samples,
size_t num_samples,
tav_encoder_packet_t **packet);
// =============================================================================
// Statistics and Info
// =============================================================================
/**
* Get encoding statistics.
*/
typedef struct {
int64_t frames_encoded; // Total frames encoded
int64_t gops_encoded; // Total GOPs encoded
size_t total_bytes; // Total bytes output (video + audio)
size_t video_bytes; // Video bytes
size_t audio_bytes; // Audio bytes
double avg_bitrate_kbps; // Average bitrate (kbps)
double encoding_fps; // Encoding speed (frames/sec)
} tav_encoder_stats_t;
/**
* Get encoding statistics.
*
* @param ctx Encoder context
* @param stats Output statistics structure
*/
void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stats);
// =============================================================================
// TAV Packet Types (for reference)
// =============================================================================
#define TAV_PACKET_IFRAME 0x10 // I-frame (intra-only, single frame)
#define TAV_PACKET_PFRAME 0x11 // P-frame (delta from previous)
#define TAV_PACKET_GOP_UNIFIED 0x12 // GOP unified (3D DWT, multiple frames)
#define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio (DWT-based perceptual codec)
#define TAV_PACKET_AUDIO_PCM8 0x20 // PCM8 audio (legacy)
#define TAV_PACKET_LOOP_START 0xF0 // Loop point start (no payload)
#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync (frame count marker)
#define TAV_PACKET_TIMECODE 0xFD // Timecode metadata
// =============================================================================
// Tile Settings (for multi-tile mode)
// =============================================================================
#define TAV_TILE_SIZE_X 640 // Base tile width
#define TAV_TILE_SIZE_Y 540 // Base tile height
#define TAV_DWT_FILTER_HALF_SUPPORT 4 // For 9/7 filter (filter lengths 9,7 → L=4)
#define TAV_TILE_MARGIN_LEVELS 3 // Use margin for 3 levels: 4 * (2^3) = 32px
#define TAV_TILE_MARGIN (TAV_DWT_FILTER_HALF_SUPPORT * (1 << TAV_TILE_MARGIN_LEVELS)) // 32px
#define TAV_PADDED_TILE_SIZE_X (TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN) // 704
#define TAV_PADDED_TILE_SIZE_Y (TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN) // 604
// Monoblock threshold: D1 PAL resolution (720x576)
// If width > 720 OR height > 576, automatically switch to tiled mode
#define TAV_MONOBLOCK_MAX_WIDTH 720
#define TAV_MONOBLOCK_MAX_HEIGHT 576
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_LIB_H