mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 11:51:49 +09:00
tav: librarying
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -9,6 +9,8 @@ buildapp/out/TerranBASIC*
|
||||
buildapp/TerranBASIC_linux.*
|
||||
buildapp/TerranBASIC_macOS.*
|
||||
buildapp/TerranBASIC_windows.*
|
||||
*.o
|
||||
*.a
|
||||
|
||||
# Java native errors
|
||||
hs_err_pid*
|
||||
|
||||
@@ -455,11 +455,6 @@ const isInterlaced = (header.videoFlags & 0x01) !== 0
|
||||
const isNTSC = (header.videoFlags & 0x02) !== 0
|
||||
const isLossless = (header.videoFlags & 0x04) !== 0
|
||||
|
||||
// Calculate tile dimensions (112x112 vs TEV's 16x16 blocks)
|
||||
const tilesX = Math.ceil(header.width / 2)
|
||||
const tilesY = Math.ceil(header.height / 2)
|
||||
const numTiles = 4
|
||||
|
||||
console.log(`TAV Decoder`)
|
||||
console.log(`Resolution: ${header.width}x${header.height}`)
|
||||
console.log(`FPS: ${header.fps}`)
|
||||
@@ -469,7 +464,6 @@ console.log(`Decomposition levels: ${header.decompLevels}`)
|
||||
console.log(`Quality: Y=${QLUT[header.qualityY]}, Co=${QLUT[header.qualityCo]}, Cg=${QLUT[header.qualityCg]}`)
|
||||
console.log(`Channel layout: ${getChannelLayoutName(header.channelLayout)}`)
|
||||
console.log(`Entropy coder: ${header.entropyCoder === 0 ? "Twobit-map" : header.entropyCoder === 1 ? "EZBC" : "Unknown"}`)
|
||||
console.log(`Tiles: ${tilesX}x${tilesY} (${numTiles} total)`)
|
||||
console.log(`Colour space: ${header.version % 2 == 0 ? "ICtCp" : "YCoCg-R"}`)
|
||||
console.log(`Features: ${hasAudio ? "Audio " : ""}${hasSubtitles ? "Subtitles " : ""}${progressiveTransmission ? "Progressive " : ""}${roiCoding ? "ROI " : ""}`)
|
||||
console.log(`Video flags raw: 0x${header.videoFlags.toString(16)}`)
|
||||
|
||||
@@ -1624,7 +1624,7 @@ start of the next packet
|
||||
- Video flags: Interlaced/NTSC framerate (defined in packet header)
|
||||
- Channel layout: Y-Co-Cg
|
||||
- Entropy coder: EZBC
|
||||
- Encoder preset: default preset only
|
||||
- Encoder preset: sports preset always enabled
|
||||
- Tiles: monoblock
|
||||
|
||||
# Packet Structure
|
||||
@@ -1636,7 +1636,8 @@ start of the next packet
|
||||
- bit 4-7 = quality index (0-5)
|
||||
* Quality indices follow TSVM encoder's
|
||||
int16 Reserved (zero-fill)
|
||||
uint32 Total packet size past header
|
||||
uint32 Total packet size past 16-byte header, modulo 2^32
|
||||
!! this value should NOT be used to derive the actual packet size !!
|
||||
uint32 CRC-32 of 12-byte header
|
||||
uint64 Timecode (0xFD packet) without header byte
|
||||
* TAD packet (full 0x24 packet)
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# Created by CuriousTorvald and Claude on 2025-08-17.
|
||||
# Makefile for TSVM Enhanced Video (TEV) encoder
|
||||
# Makefile for TSVM Enhanced Video (TEV) encoder and libraries
|
||||
|
||||
CC = gcc
|
||||
CXX = g++
|
||||
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl
|
||||
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl
|
||||
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
|
||||
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
|
||||
DBGFLAGS =
|
||||
PREFIX = /usr/local
|
||||
|
||||
@@ -17,58 +17,124 @@ LIBS = -lm $(ZSTD_LIBS)
|
||||
OPENCV_CFLAGS = $(shell pkg-config --cflags opencv4)
|
||||
OPENCV_LIBS = $(shell pkg-config --libs opencv4)
|
||||
|
||||
# =============================================================================
|
||||
# Library Object Files
|
||||
# =============================================================================
|
||||
|
||||
# libtavenc - TAV encoder library
|
||||
LIBTAVENC_OBJ = lib/libtavenc/tav_encoder_lib.o \
|
||||
lib/libtavenc/tav_encoder_color.o \
|
||||
lib/libtavenc/tav_encoder_dwt.o \
|
||||
lib/libtavenc/tav_encoder_quantize.o \
|
||||
lib/libtavenc/tav_encoder_ezbc.o \
|
||||
lib/libtavenc/tav_encoder_utils.o
|
||||
|
||||
# libtavdec - TAV decoder library
|
||||
LIBTAVDEC_OBJ = lib/libtavdec/tav_video_decoder.o
|
||||
|
||||
# libtadenc - TAD encoder library
|
||||
LIBTADENC_OBJ = lib/libtadenc/encoder_tad.o
|
||||
|
||||
# libtaddec - TAD decoder library
|
||||
LIBTADDEC_OBJ = lib/libtaddec/decoder_tad.o
|
||||
|
||||
# =============================================================================
|
||||
# Targets
|
||||
# =============================================================================
|
||||
|
||||
# Source files and targets
|
||||
TARGETS = tev tav tav_decoder tav_inspector tav_dt_decoder
|
||||
TARGETS = clean libs encoder_tav_ref#tev tav tav_decoder tav_inspector tav_dt_decoder
|
||||
TAD_TARGETS = encoder_tad decoder_tad
|
||||
LIBRARIES = lib/libtavenc.a lib/libtavdec.a lib/libtadenc.a lib/libtaddec.a
|
||||
TEST_TARGETS = test_mesh_warp test_mesh_roundtrip
|
||||
|
||||
# Build all encoders
|
||||
# Build all encoders (default)
|
||||
all: $(TARGETS)
|
||||
|
||||
# Build all libraries
|
||||
libs: $(LIBRARIES)
|
||||
|
||||
# Build main encoder
|
||||
tev: encoder_tev.c
|
||||
rm -f encoder_tev
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o encoder_tev $< $(LIBS)
|
||||
|
||||
tav: encoder_tav.c encoder_tad.c encoder_tav_opencv.cpp
|
||||
tav: src/encoder_tav.c lib/libtadenc/encoder_tad.c encoder_tav_opencv.cpp
|
||||
rm -f encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav.c -o encoder_tav.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c src/encoder_tav.c -o encoder_tav.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c lib/libtadenc/encoder_tad.c -o encoder_tad.o
|
||||
$(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) $(ZSTD_CFLAGS) -c encoder_tav_opencv.cpp -o encoder_tav_opencv.o
|
||||
$(CXX) $(DBGFLAGS) -o encoder_tav encoder_tav.o encoder_tad.o encoder_tav_opencv.o $(LIBS) $(OPENCV_LIBS)
|
||||
|
||||
tav_decoder: decoder_tav.c decoder_tad.c decoder_tad.h
|
||||
rm -f decoder_tav decoder_tav.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c decoder_tad.c -o decoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c decoder_tav.c -o decoder_tav.o
|
||||
# New library-based TAV encoder
|
||||
tav_new: src/encoder_tav_new.c lib/libtavenc.a lib/libtadenc.a
|
||||
rm -f encoder_tav_new
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o encoder_tav_new src/encoder_tav_new.c lib/libtavenc.a lib/libtadenc.a $(LIBS)
|
||||
|
||||
tav_decoder: src/decoder_tav.c lib/libtaddec/decoder_tad.c include/decoder_tad.h
|
||||
rm -f decoder_tav decoder_tav.o decoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c lib/libtaddec/decoder_tad.c -o decoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c src/decoder_tav.c -o decoder_tav.o
|
||||
$(CC) $(DBGFLAGS) -o decoder_tav decoder_tav.o decoder_tad.o $(LIBS)
|
||||
|
||||
tav_inspector: tav_inspector.c
|
||||
rm -f tav_inspector
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o tav_inspector $< $(LIBS)
|
||||
|
||||
tav_dt_decoder: decoder_tav_dt.c decoder_tad.c decoder_tad.h tav_video_decoder.c tav_video_decoder.h
|
||||
rm -f decoder_tav_dt decoder_tav_dt.o tav_video_decoder.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c decoder_tad.c -o decoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c tav_video_decoder.c -o tav_video_decoder.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c decoder_tav_dt.c -o decoder_tav_dt.o
|
||||
tav_dt_decoder: src/decoder_tav_dt.c lib/libtaddec/decoder_tad.c include/decoder_tad.h lib/libtavdec/tav_video_decoder.c include/tav_video_decoder.h
|
||||
rm -f decoder_tav_dt decoder_tav_dt.o tav_video_decoder.o decoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c lib/libtaddec/decoder_tad.c -o decoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c lib/libtavdec/tav_video_decoder.c -o tav_video_decoder.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c src/decoder_tav_dt.c -o decoder_tav_dt.o
|
||||
$(CC) $(DBGFLAGS) -o decoder_tav_dt decoder_tav_dt.o decoder_tad.o tav_video_decoder.o $(LIBS)
|
||||
|
||||
# Build TAD (Terrarum Advanced Audio) tools
|
||||
encoder_tad: encoder_tad_standalone.c encoder_tad.c encoder_tad.h
|
||||
encoder_tad: src/encoder_tad_standalone.c lib/libtadenc/encoder_tad.c include/encoder_tad.h
|
||||
rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad.c -o encoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c encoder_tad_standalone.c -o encoder_tad_standalone.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c lib/libtadenc/encoder_tad.c -o encoder_tad.o
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c src/encoder_tad_standalone.c -o encoder_tad_standalone.o
|
||||
$(CC) $(DBGFLAGS) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
|
||||
|
||||
decoder_tad: decoder_tad.c
|
||||
decoder_tad: lib/libtaddec/decoder_tad.c
|
||||
rm -f decoder_tad
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o decoder_tad $< $(LIBS)
|
||||
|
||||
# Build all TAD tools
|
||||
tad: $(TAD_TARGETS)
|
||||
|
||||
# Build test programs
|
||||
# =============================================================================
|
||||
# Library Build Rules
|
||||
# =============================================================================
|
||||
|
||||
# Compile library object files
|
||||
lib/libtavenc/%.o: lib/libtavenc/%.c
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
|
||||
|
||||
lib/libtavdec/%.o: lib/libtavdec/%.c
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
|
||||
|
||||
lib/libtadenc/%.o: lib/libtadenc/%.c
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
|
||||
|
||||
lib/libtaddec/%.o: lib/libtaddec/%.c
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c $< -o $@
|
||||
|
||||
# Build static libraries
|
||||
lib/libtavenc.a: $(LIBTAVENC_OBJ)
|
||||
ar rcs $@ $^
|
||||
|
||||
lib/libtavdec.a: $(LIBTAVDEC_OBJ)
|
||||
ar rcs $@ $^
|
||||
|
||||
lib/libtadenc.a: $(LIBTADENC_OBJ)
|
||||
ar rcs $@ $^
|
||||
|
||||
lib/libtaddec.a: $(LIBTADDEC_OBJ)
|
||||
ar rcs $@ $^
|
||||
|
||||
# =============================================================================
|
||||
# Test Programs
|
||||
# =============================================================================
|
||||
|
||||
test_mesh_roundtrip: test_mesh_roundtrip.cpp encoder_tav_opencv.cpp
|
||||
rm -f test_mesh_roundtrip test_mesh_roundtrip.o
|
||||
@@ -91,11 +157,11 @@ tests: $(TEST_TARGETS)
|
||||
# Build with debug symbols
|
||||
debug: CFLAGS += -g -DDEBUG -fsanitize=address
|
||||
debug: DBGFLAGS += -fsanitize=address
|
||||
debug: $(TARGETS)
|
||||
debug: tav_new #$(TARGETS)
|
||||
|
||||
# Clean build artifacts
|
||||
clean:
|
||||
rm -f $(TARGETS) $(TAD_TARGETS) *.o
|
||||
rm -f $(TARGETS) $(TAD_TARGETS) $(LIBRARIES) *.o lib/*/*.o
|
||||
|
||||
# Install (copy to PATH)
|
||||
install: $(TARGETS) $(TAD_TARGETS)
|
||||
@@ -119,22 +185,40 @@ help:
|
||||
@echo ""
|
||||
@echo "Targets:"
|
||||
@echo " all - Build video encoders (default)"
|
||||
@echo " libs - Build all codec libraries (.a files)"
|
||||
@echo " tev - Build the TEV video encoder"
|
||||
@echo " tav - Build the TAV advanced video encoder"
|
||||
@echo " tad - Build all TAD audio tools (encoder, decoder)"
|
||||
@echo " encoder_tad - Build TAD audio encoder"
|
||||
@echo " decoder_tad - Build TAD audio decoder"
|
||||
@echo " tests - Build test programs"
|
||||
@echo " debug - Build with debug symbols"
|
||||
@echo " clean - Remove build artifacts"
|
||||
@echo " install - Install to /usr/local/bin"
|
||||
@echo " check-deps - Check for required dependencies"
|
||||
@echo " help - Show this help"
|
||||
@echo ""
|
||||
@echo "Libraries:"
|
||||
@echo " lib/libtavenc.a - TAV encoder library"
|
||||
@echo " lib/libtavdec.a - TAV decoder library"
|
||||
@echo " lib/libtadenc.a - TAD encoder library"
|
||||
@echo " lib/libtaddec.a - TAD decoder library"
|
||||
@echo ""
|
||||
@echo "Usage:"
|
||||
@echo " make # Build video encoders"
|
||||
@echo " make libs # Build all libraries"
|
||||
@echo " make tev # Build TEV encoder"
|
||||
@echo " make tav # Build TAV encoder"
|
||||
@echo " make tad # Build all TAD audio tools"
|
||||
@echo " sudo make install # Install all encoders"
|
||||
|
||||
.PHONY: all clean install check-deps help debug tad tad16 tad10
|
||||
.PHONY: all libs clean install check-deps help debug tad tests
|
||||
|
||||
# Reference encoder using libtavenc (replaces old monolithic encoder)
|
||||
encoder_tav_ref: src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a
|
||||
rm -f encoder_tav_ref
|
||||
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o encoder_tav_ref src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a $(LIBS)
|
||||
@echo ""
|
||||
@echo "Reference encoder built: encoder_tav_ref"
|
||||
@echo "This is the official reference implementation with all features"
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
65
video_encoder/include/coefficient_compress.h
Normal file
65
video_encoder/include/coefficient_compress.h
Normal file
@@ -0,0 +1,65 @@
|
||||
// Simple coefficient preprocessing for better compression
|
||||
// Insert right before Zstd compression
|
||||
|
||||
#ifndef COEFFICIENT_COMPRESS_H
|
||||
#define COEFFICIENT_COMPRESS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
// Preprocess coefficients using significance map
|
||||
// Returns new buffer size, modifies buffer in-place if possible
|
||||
static size_t preprocess_coefficients(int16_t *coeffs, int coeff_count, uint8_t *output_buffer) {
|
||||
// Count non-zero coefficients
|
||||
int nonzero_count = 0;
|
||||
for (int i = 0; i < coeff_count; i++) {
|
||||
if (coeffs[i] != 0) nonzero_count++;
|
||||
}
|
||||
|
||||
// Create significance map (1 bit per coefficient, packed into bytes)
|
||||
int map_bytes = (coeff_count + 7) / 8; // Round up to nearest byte
|
||||
uint8_t *sig_map = output_buffer;
|
||||
int16_t *values = (int16_t *)(output_buffer + map_bytes);
|
||||
|
||||
// Clear significance map
|
||||
memset(sig_map, 0, map_bytes);
|
||||
|
||||
// Fill significance map and extract non-zero values
|
||||
int value_idx = 0;
|
||||
for (int i = 0; i < coeff_count; i++) {
|
||||
if (coeffs[i] != 0) {
|
||||
// Set bit in significance map
|
||||
int byte_idx = i / 8;
|
||||
int bit_idx = i % 8;
|
||||
sig_map[byte_idx] |= (1 << bit_idx);
|
||||
|
||||
// Store the value
|
||||
values[value_idx++] = coeffs[i];
|
||||
}
|
||||
}
|
||||
|
||||
return map_bytes + (nonzero_count * sizeof(int16_t));
|
||||
}
|
||||
|
||||
// Decoder: reconstruct coefficients from significance map
|
||||
static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) {
|
||||
int map_bytes = (coeff_count + 7) / 8;
|
||||
uint8_t *sig_map = compressed_data;
|
||||
int16_t *values = (int16_t *)(compressed_data + map_bytes);
|
||||
|
||||
// Clear output
|
||||
memset(output_coeffs, 0, coeff_count * sizeof(int16_t));
|
||||
|
||||
// Reconstruct coefficients
|
||||
int value_idx = 0;
|
||||
for (int i = 0; i < coeff_count; i++) {
|
||||
int byte_idx = i / 8;
|
||||
int bit_idx = i % 8;
|
||||
|
||||
if (sig_map[byte_idx] & (1 << bit_idx)) {
|
||||
output_coeffs[i] = values[value_idx++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // COEFFICIENT_COMPRESS_H
|
||||
74
video_encoder/include/entropy_coder.h
Normal file
74
video_encoder/include/entropy_coder.h
Normal file
@@ -0,0 +1,74 @@
|
||||
// TEV Entropy Coder - Specialised for DCT coefficients
|
||||
// Replaces gzip with video-optimized compression
|
||||
#ifndef ENTROPY_CODER_H
|
||||
#define ENTROPY_CODER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// Bit writer for variable-length codes
|
||||
typedef struct {
|
||||
uint8_t *buffer;
|
||||
size_t buffer_size;
|
||||
size_t byte_pos;
|
||||
int bit_pos; // 0-7, next bit to write
|
||||
} bit_writer_t;
|
||||
|
||||
// Bit reader for decoding
|
||||
typedef struct {
|
||||
const uint8_t *buffer;
|
||||
size_t buffer_size;
|
||||
size_t byte_pos;
|
||||
int bit_pos; // 0-7, next bit to read
|
||||
} bit_reader_t;
|
||||
|
||||
// Huffman table entry
|
||||
typedef struct {
|
||||
uint16_t code; // Huffman code
|
||||
uint8_t bits; // Code length in bits
|
||||
} huffman_entry_t;
|
||||
|
||||
// Video entropy coder optimized for TEV coefficients
|
||||
typedef struct {
|
||||
// Huffman tables for different coefficient types
|
||||
huffman_entry_t y_dc_table[512]; // Y DC coefficients (-255 to +255)
|
||||
huffman_entry_t y_ac_table[512]; // Y AC coefficients
|
||||
huffman_entry_t c_dc_table[512]; // Chroma DC coefficients
|
||||
huffman_entry_t c_ac_table[512]; // Chroma AC coefficients
|
||||
huffman_entry_t run_table[256]; // Zero run lengths (0-255)
|
||||
|
||||
// Motion vector Huffman tables
|
||||
huffman_entry_t mv_table[65]; // Motion vectors (-32 to +32)
|
||||
|
||||
// Bit writer/reader
|
||||
bit_writer_t writer;
|
||||
bit_reader_t reader;
|
||||
} entropy_coder_t;
|
||||
|
||||
static const huffman_entry_t BLOCK_MODE_HUFFMAN[16];
|
||||
|
||||
void write_bits(bit_writer_t *writer, uint32_t value, int bits);
|
||||
uint32_t read_bits(bit_reader_t *reader, int bits);
|
||||
|
||||
// Initialise entropy coder
|
||||
entropy_coder_t* entropy_coder_create(uint8_t *buffer, size_t buffer_size);
|
||||
void entropy_coder_destroy(entropy_coder_t *coder);
|
||||
|
||||
// Encoding functions
|
||||
int encode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
|
||||
int encode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
|
||||
int encode_motion_vector(entropy_coder_t *coder, int16_t mv_x, int16_t mv_y);
|
||||
int encode_block_mode(entropy_coder_t *coder, uint8_t mode);
|
||||
|
||||
// Decoding functions
|
||||
void entropy_coder_init_reader(entropy_coder_t *coder, const uint8_t *buffer, size_t buffer_size);
|
||||
int decode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
|
||||
int decode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
|
||||
int decode_motion_vector(entropy_coder_t *coder, int16_t *mv_x, int16_t *mv_y);
|
||||
int decode_block_mode(entropy_coder_t *coder, uint8_t *mode);
|
||||
|
||||
// Get compressed size
|
||||
size_t entropy_coder_get_size(entropy_coder_t *coder);
|
||||
void entropy_coder_reset(entropy_coder_t *coder);
|
||||
|
||||
#endif // ENTROPY_CODER_H
|
||||
303
video_encoder/include/tav_encoder_lib.h
Normal file
303
video_encoder/include/tav_encoder_lib.h
Normal file
@@ -0,0 +1,303 @@
|
||||
/**
|
||||
* TAV Encoder Library - Public API
|
||||
*
|
||||
* High-level interface for encoding video using the TSVM Advanced Video (TAV) codec.
|
||||
* Supports GOP-based encoding with internal multi-threading for optimal performance.
|
||||
*
|
||||
* Created by CuriousTorvald and Claude on 2025-12-03.
|
||||
*/
|
||||
|
||||
#ifndef TAV_ENCODER_LIB_H
|
||||
#define TAV_ENCODER_LIB_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// =============================================================================
|
||||
// Opaque Encoder Context
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* TAV encoder context - opaque to users.
|
||||
* Created with tav_encoder_create(), freed with tav_encoder_free().
|
||||
*/
|
||||
typedef struct tav_encoder_context tav_encoder_context_t;
|
||||
|
||||
// =============================================================================
|
||||
// Configuration Structures
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Video encoding parameters.
|
||||
*/
|
||||
typedef struct {
|
||||
// === Video Dimensions ===
|
||||
int width; // Frame width (must be even)
|
||||
int height; // Frame height (must be even)
|
||||
int fps_num; // Framerate numerator (e.g., 60 for 60fps)
|
||||
int fps_den; // Framerate denominator (e.g., 1 for 60/1)
|
||||
|
||||
// === Wavelet Configuration ===
|
||||
int wavelet_type; // Spatial wavelet: 0=CDF 5/3, 1=CDF 9/7 (default), 2=CDF 13/7, 16=DD-4, 255=Haar
|
||||
int temporal_wavelet; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (default for smooth motion)
|
||||
int decomp_levels; // Spatial DWT levels (0=auto, typically 6)
|
||||
int temporal_levels; // Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
|
||||
|
||||
// === Color Space ===
|
||||
int channel_layout; // 0=YCoCg-R (default), 1=ICtCp (for HDR/BT.2100 sources)
|
||||
int perceptual_tuning; // 1=enable HVS perceptual quantization (default), 0=uniform
|
||||
|
||||
// === GOP Configuration ===
|
||||
int enable_temporal_dwt; // 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
|
||||
int gop_size; // Frames per GOP (8, 16, or 24; 0=auto based on framerate)
|
||||
int enable_two_pass; // 1=enable two-pass with scene change detection (default), 0=single-pass
|
||||
|
||||
// === Quality Control ===
|
||||
int quality_level;
|
||||
int quality_y; // Luma quality (0-5, default: 3)
|
||||
int quality_co; // Orange chrominance quality (0-5, default: 3)
|
||||
int quality_cg; // Green chrominance quality (0-5, default: 3)
|
||||
int dead_zone_threshold; // Dead-zone quantization threshold (0=disabled, 1-10 typical)
|
||||
|
||||
// === Entropy Coding ===
|
||||
int entropy_coder; // 0=Twobitmap (default), 1=EZBC (better for high-quality)
|
||||
int zstd_level; // Zstd compression level (3-22, default: 7)
|
||||
|
||||
// === Multi-threading ===
|
||||
int num_threads; // Worker threads (0=single-threaded, -1=auto, 1-16=explicit)
|
||||
|
||||
// === Encoder Presets ===
|
||||
int encoder_preset; // Preset flags: 0x01=sports (finer temporal quant), 0x02=anime (disable grain)
|
||||
|
||||
// === Advanced Options ===
|
||||
int verbose; // 1=enable debug output, 0=quiet (default)
|
||||
int monoblock; // 1=single tile encoding (always 1 for current implementation)
|
||||
|
||||
} tav_encoder_params_t;
|
||||
|
||||
/**
|
||||
* Initialize encoder parameters with default values.
|
||||
*
|
||||
* @param params Parameter structure to initialize
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
*/
|
||||
void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height);
|
||||
|
||||
/**
|
||||
* Encoder output packet.
|
||||
* Contains encoded video or audio data.
|
||||
*/
|
||||
typedef struct {
|
||||
uint8_t *data; // Packet data (owned by encoder, valid until next encode/flush)
|
||||
size_t size; // Packet size in bytes
|
||||
uint8_t packet_type; // TAV packet type (0x10=I-frame, 0x12=GOP, 0x24=audio, etc.)
|
||||
int frame_number; // Frame number (for video packets)
|
||||
int is_video; // 1=video packet, 0=audio packet
|
||||
} tav_encoder_packet_t;
|
||||
|
||||
// =============================================================================
|
||||
// Encoder Lifecycle
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Create TAV encoder context.
|
||||
*
|
||||
* Allocates internal buffers, initializes thread pool (if multi-threading enabled),
|
||||
* and prepares encoder for frame submission.
|
||||
*
|
||||
* @param params Encoder parameters (copied internally)
|
||||
* @return Encoder context, or NULL on failure
|
||||
*/
|
||||
tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params);
|
||||
|
||||
/**
|
||||
* Free TAV encoder context.
|
||||
*
|
||||
* Shuts down thread pool, frees all buffers and resources.
|
||||
* Any unflushed frames in the GOP buffer will be lost.
|
||||
*
|
||||
* @param ctx Encoder context
|
||||
*/
|
||||
void tav_encoder_free(tav_encoder_context_t *ctx);
|
||||
|
||||
/**
|
||||
* Get last error message.
|
||||
*
|
||||
* @param ctx Encoder context
|
||||
* @return Error message string (valid until next encode operation)
|
||||
*/
|
||||
const char *tav_encoder_get_error(tav_encoder_context_t *ctx);
|
||||
|
||||
/**
|
||||
* Get encoder parameters (with calculated values).
|
||||
* After context creation, params will contain actual values used
|
||||
* (e.g., auto-calculated decomp_levels, gop_size).
|
||||
*
|
||||
* @param ctx Encoder context
|
||||
* @param params Output parameters structure
|
||||
*/
|
||||
void tav_encoder_get_params(tav_encoder_context_t *ctx, tav_encoder_params_t *params);
|
||||
|
||||
/**
|
||||
* DEBUG: Validate encoder context integrity
|
||||
* Returns 1 if context appears valid, 0 otherwise
|
||||
*/
|
||||
int tav_encoder_validate_context(tav_encoder_context_t *ctx);
|
||||
|
||||
// =============================================================================
|
||||
// Video Encoding
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Encode a single RGB24 frame.
|
||||
*
|
||||
* Frames are buffered internally until a GOP is full, then encoded and returned.
|
||||
* For GOP encoding: returns NULL until GOP is complete.
|
||||
* For intra-only: returns packet immediately.
|
||||
*
|
||||
* Thread-safety: NOT thread-safe. Caller must serialize calls to encode_frame().
|
||||
*
|
||||
* @param ctx Encoder context
|
||||
* @param rgb_frame RGB24 frame data (planar: [R...][G...][B...]), width×height×3 bytes
|
||||
* @param frame_pts Presentation timestamp (frame number or time)
|
||||
* @param packet Output packet pointer (NULL if GOP not yet complete)
|
||||
* @return 1 if packet ready, 0 if buffering for GOP, -1 on error
|
||||
*/
|
||||
int tav_encoder_encode_frame(tav_encoder_context_t *ctx,
|
||||
const uint8_t *rgb_frame,
|
||||
int64_t frame_pts,
|
||||
tav_encoder_packet_t **packet);
|
||||
|
||||
/**
|
||||
* Flush encoder and encode any remaining buffered frames.
|
||||
*
|
||||
* Call at end of encoding to output final GOP (even if not full).
|
||||
* Returns packets one at a time through repeated calls.
|
||||
*
|
||||
* @param ctx Encoder context
|
||||
* @param packet Output packet pointer (NULL when no more packets)
|
||||
* @return 1 if packet ready, 0 if no more packets, -1 on error
|
||||
*/
|
||||
int tav_encoder_flush(tav_encoder_context_t *ctx,
|
||||
tav_encoder_packet_t **packet);
|
||||
|
||||
/**
|
||||
* Encode a complete GOP (Group of Pictures) directly.
|
||||
*
|
||||
* This function is STATELESS and THREAD-SAFE with separate contexts.
|
||||
* Perfect for multithreaded encoding from CLI:
|
||||
* - Each thread creates its own encoder context
|
||||
* - Each thread calls encode_gop() with a batch of frames
|
||||
* - No shared state, no locking needed
|
||||
*
|
||||
* Example multithreaded usage:
|
||||
* ```c
|
||||
* // Worker thread function
|
||||
* void* worker(void* arg) {
|
||||
* work_item_t* item = (work_item_t*)arg;
|
||||
*
|
||||
* // Create thread-local encoder context
|
||||
* tav_encoder_context_t* ctx = tav_encoder_create(&shared_params);
|
||||
*
|
||||
* // Encode this GOP
|
||||
* tav_encoder_packet_t* packet;
|
||||
* tav_encoder_encode_gop(ctx, item->frames, item->num_frames,
|
||||
* item->frame_numbers, &packet);
|
||||
*
|
||||
* // Store packet in output queue
|
||||
* queue_push(output_queue, packet);
|
||||
*
|
||||
* tav_encoder_free(ctx);
|
||||
* return NULL;
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* @param ctx Encoder context (one per thread)
|
||||
* @param rgb_frames Array of RGB24 frames [frame][width*height*3]
|
||||
* @param num_frames Number of frames in GOP (1-24)
|
||||
* @param frame_numbers Frame indices for timecodes (can be NULL)
|
||||
* @param packet Output packet pointer
|
||||
* @return 1 if packet ready, -1 on error
|
||||
*/
|
||||
int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
|
||||
const uint8_t **rgb_frames,
|
||||
int num_frames,
|
||||
const int *frame_numbers,
|
||||
tav_encoder_packet_t **packet);
|
||||
|
||||
/**
|
||||
* Free a packet returned by encode_frame(), flush(), or encode_gop().
|
||||
*
|
||||
* @param packet Packet to free (can be NULL)
|
||||
*/
|
||||
void tav_encoder_free_packet(tav_encoder_packet_t *packet);
|
||||
|
||||
// =============================================================================
|
||||
// Audio Encoding (Optional)
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Encode audio samples (TAD codec).
|
||||
*
|
||||
* Audio is encoded synchronously and returned immediately.
|
||||
* For TAV muxing: interleave audio packets with video packets by frame PTS.
|
||||
*
|
||||
* @param ctx Encoder context
|
||||
* @param pcm_samples PCM32f stereo samples (interleaved: L,R,L,R,...), num_samples×2 floats
|
||||
* @param num_samples Number of samples per channel
|
||||
* @param packet Output packet pointer
|
||||
* @return 1 if packet ready, -1 on error
|
||||
*/
|
||||
int tav_encoder_encode_audio(tav_encoder_context_t *ctx,
|
||||
const float *pcm_samples,
|
||||
size_t num_samples,
|
||||
tav_encoder_packet_t **packet);
|
||||
|
||||
// =============================================================================
|
||||
// Statistics and Info
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Get encoding statistics.
|
||||
*/
|
||||
typedef struct {
|
||||
int64_t frames_encoded; // Total frames encoded
|
||||
int64_t gops_encoded; // Total GOPs encoded
|
||||
size_t total_bytes; // Total bytes output (video + audio)
|
||||
size_t video_bytes; // Video bytes
|
||||
size_t audio_bytes; // Audio bytes
|
||||
double avg_bitrate_kbps; // Average bitrate (kbps)
|
||||
double encoding_fps; // Encoding speed (frames/sec)
|
||||
} tav_encoder_stats_t;
|
||||
|
||||
/**
|
||||
* Get encoding statistics.
|
||||
*
|
||||
* @param ctx Encoder context
|
||||
* @param stats Output statistics structure
|
||||
*/
|
||||
void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stats);
|
||||
|
||||
// =============================================================================
|
||||
// TAV Packet Types (for reference)
|
||||
// =============================================================================
|
||||
|
||||
#define TAV_PACKET_IFRAME 0x10 // I-frame (intra-only, single frame)
|
||||
#define TAV_PACKET_PFRAME 0x11 // P-frame (delta from previous)
|
||||
#define TAV_PACKET_GOP_UNIFIED 0x12 // GOP unified (3D DWT, multiple frames)
|
||||
#define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio (DWT-based perceptual codec)
|
||||
#define TAV_PACKET_AUDIO_PCM8 0x20 // PCM8 audio (legacy)
|
||||
#define TAV_PACKET_LOOP_START 0xF0 // Loop point start (no payload)
|
||||
#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync (frame count marker)
|
||||
#define TAV_PACKET_TIMECODE 0xFD // Timecode metadata
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TAV_ENCODER_LIB_H
|
||||
275
video_encoder/include/tav_simd_dispatch.h
Normal file
275
video_encoder/include/tav_simd_dispatch.h
Normal file
@@ -0,0 +1,275 @@
|
||||
/*
|
||||
* TAV SIMD Function Dispatcher
|
||||
*
|
||||
* This file provides runtime CPU detection and function pointer dispatch
|
||||
* for SIMD-optimized versions of performance-critical TAV encoder functions.
|
||||
*
|
||||
* Usage:
|
||||
* 1. Include this header after defining all scalar functions
|
||||
* 2. Call tav_simd_init() once at encoder initialization
|
||||
* 3. Use function pointers (e.g., dwt_53_forward_1d_ptr) throughout code
|
||||
*
|
||||
* The dispatcher will automatically select AVX-512, AVX2, or scalar versions
|
||||
* based on runtime CPU capabilities.
|
||||
*/
|
||||
|
||||
#ifndef TAV_SIMD_DISPATCH_H
|
||||
#define TAV_SIMD_DISPATCH_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// =============================================================================
|
||||
// Function Pointer Types
|
||||
// =============================================================================
|
||||
|
||||
// 1D DWT function pointer types
|
||||
typedef void (*dwt_1d_func_t)(float *data, int length);
|
||||
|
||||
// Quantization function pointer types
|
||||
typedef void (*quantise_basic_func_t)(
|
||||
float *coeffs, int16_t *quantised, int size,
|
||||
float effective_q, float dead_zone_threshold,
|
||||
int width, int height, int decomp_levels, int is_chroma,
|
||||
int (*get_subband_level)(int, int, int, int),
|
||||
int (*get_subband_type)(int, int, int, int)
|
||||
);
|
||||
|
||||
typedef void (*quantise_perceptual_func_t)(
|
||||
float *coeffs, int16_t *quantised, int size,
|
||||
float *weights, float base_quantiser
|
||||
);
|
||||
|
||||
// Color conversion function pointer type
|
||||
typedef void (*rgb_to_ycocg_func_t)(
|
||||
const uint8_t *rgb, float *y, float *co, float *cg,
|
||||
int width, int height
|
||||
);
|
||||
|
||||
// 2D DWT column operations
|
||||
typedef void (*dwt_2d_column_extract_func_t)(
|
||||
const float *tile_data, float *column,
|
||||
int x, int width, int height
|
||||
);
|
||||
|
||||
typedef void (*dwt_2d_column_insert_func_t)(
|
||||
float *tile_data, const float *column,
|
||||
int x, int width, int height
|
||||
);
|
||||
|
||||
// =============================================================================
|
||||
// Global Function Pointers (initialized by tav_simd_init)
|
||||
// =============================================================================
|
||||
|
||||
// DWT 1D transforms
|
||||
static dwt_1d_func_t dwt_53_forward_1d_ptr = NULL;
|
||||
static dwt_1d_func_t dwt_97_forward_1d_ptr = NULL;
|
||||
static dwt_1d_func_t dwt_haar_forward_1d_ptr = NULL;
|
||||
static dwt_1d_func_t dwt_53_inverse_1d_ptr = NULL;
|
||||
static dwt_1d_func_t dwt_haar_inverse_1d_ptr = NULL;
|
||||
|
||||
// Quantization
|
||||
static quantise_basic_func_t quantise_dwt_coefficients_ptr = NULL;
|
||||
static quantise_perceptual_func_t quantise_dwt_coefficients_perceptual_ptr = NULL;
|
||||
|
||||
// Color conversion
|
||||
static rgb_to_ycocg_func_t rgb_to_ycocg_ptr = NULL;
|
||||
|
||||
// 2D DWT column operations
|
||||
static dwt_2d_column_extract_func_t dwt_2d_extract_column_ptr = NULL;
|
||||
static dwt_2d_column_insert_func_t dwt_2d_insert_column_ptr = NULL;
|
||||
|
||||
// =============================================================================
|
||||
// SIMD Capability Detection
|
||||
// =============================================================================
|
||||
|
||||
typedef enum {
|
||||
SIMD_NONE = 0,
|
||||
SIMD_AVX512F = 1,
|
||||
SIMD_AVX2 = 2,
|
||||
SIMD_SSE42 = 3
|
||||
} simd_level_t;
|
||||
|
||||
static simd_level_t detected_simd_level = SIMD_NONE;
|
||||
|
||||
static inline simd_level_t detect_simd_capabilities(void) {
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
// Use GCC/Clang built-in CPU detection
|
||||
if (!__builtin_cpu_supports("sse4.2")) {
|
||||
return SIMD_NONE;
|
||||
}
|
||||
|
||||
#ifdef __AVX512F__
|
||||
if (__builtin_cpu_supports("avx512f") &&
|
||||
__builtin_cpu_supports("avx512dq") &&
|
||||
__builtin_cpu_supports("avx512bw") &&
|
||||
__builtin_cpu_supports("avx512vl")) {
|
||||
return SIMD_AVX512F;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
if (__builtin_cpu_supports("avx2")) {
|
||||
return SIMD_AVX2;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (__builtin_cpu_supports("sse4.2")) {
|
||||
return SIMD_SSE42;
|
||||
}
|
||||
#endif
|
||||
|
||||
return SIMD_NONE;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Scalar Fallback Wrappers
|
||||
// =============================================================================
|
||||
|
||||
// These wrappers adapt the scalar functions to match function pointer signatures
|
||||
|
||||
static void quantise_dwt_coefficients_scalar_wrapper(
|
||||
float *coeffs, int16_t *quantised, int size,
|
||||
float effective_q, float dead_zone_threshold,
|
||||
int width, int height, int decomp_levels, int is_chroma,
|
||||
int (*get_subband_level)(int, int, int, int),
|
||||
int (*get_subband_type)(int, int, int, int)
|
||||
);
|
||||
// Implementation provided by including encoder - just declare prototype
|
||||
|
||||
static void quantise_dwt_coefficients_perceptual_scalar_wrapper(
|
||||
float *coeffs, int16_t *quantised, int size,
|
||||
float *weights, float base_quantiser
|
||||
);
|
||||
// Implementation provided by including encoder
|
||||
|
||||
static void dwt_2d_extract_column_scalar(
|
||||
const float *tile_data, float *column,
|
||||
int x, int width, int height
|
||||
) {
|
||||
for (int y = 0; y < height; y++) {
|
||||
column[y] = tile_data[y * width + x];
|
||||
}
|
||||
}
|
||||
|
||||
static void dwt_2d_insert_column_scalar(
|
||||
float *tile_data, const float *column,
|
||||
int x, int width, int height
|
||||
) {
|
||||
for (int y = 0; y < height; y++) {
|
||||
tile_data[y * width + x] = column[y];
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// SIMD Initialization
|
||||
// =============================================================================
|
||||
|
||||
static void tav_simd_init(void) {
|
||||
// Detect CPU capabilities
|
||||
detected_simd_level = detect_simd_capabilities();
|
||||
|
||||
const char *simd_names[] = {"None", "AVX-512", "AVX2", "SSE4.2"};
|
||||
fprintf(stderr, "[TAV] SIMD level detected: %s\n",
|
||||
simd_names[detected_simd_level]);
|
||||
|
||||
#ifdef __AVX512F__
|
||||
if (detected_simd_level == SIMD_AVX512F) {
|
||||
fprintf(stderr, "[TAV] Using AVX-512 optimizations\n");
|
||||
|
||||
// DWT functions
|
||||
extern void dwt_53_forward_1d_avx512(float *data, int length);
|
||||
extern void dwt_97_forward_1d_avx512(float *data, int length);
|
||||
extern void dwt_haar_forward_1d_avx512(float *data, int length);
|
||||
|
||||
dwt_53_forward_1d_ptr = dwt_53_forward_1d_avx512;
|
||||
dwt_97_forward_1d_ptr = dwt_97_forward_1d_avx512;
|
||||
dwt_haar_forward_1d_ptr = dwt_haar_forward_1d_avx512;
|
||||
|
||||
// Quantization
|
||||
// Note: Need wrapper functions that match the complex signature
|
||||
// For now, using scalar versions
|
||||
extern void dwt_53_forward_1d(float *data, int length);
|
||||
extern void dwt_97_forward_1d(float *data, int length);
|
||||
extern void dwt_haar_forward_1d(float *data, int length);
|
||||
extern void dwt_53_inverse_1d(float *data, int length);
|
||||
extern void dwt_haar_inverse_1d(float *data, int length);
|
||||
|
||||
// Fallback to scalar for inverse (can optimize later)
|
||||
dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
|
||||
dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
|
||||
|
||||
// Color conversion
|
||||
extern void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
|
||||
rgb_to_ycocg_ptr = rgb_to_ycocg_avx512;
|
||||
|
||||
// 2D column operations
|
||||
extern void dwt_2d_extract_column_avx512(const float *tile_data, float *column, int x, int width, int height);
|
||||
extern void dwt_2d_insert_column_avx512(float *tile_data, const float *column, int x, int width, int height);
|
||||
|
||||
dwt_2d_extract_column_ptr = dwt_2d_extract_column_avx512;
|
||||
dwt_2d_insert_column_ptr = dwt_2d_insert_column_avx512;
|
||||
|
||||
// Quantization uses scalar for now (needs integration work)
|
||||
extern void dwt_53_forward_1d(float *data, int length);
|
||||
extern void dwt_97_forward_1d(float *data, int length);
|
||||
extern void dwt_haar_forward_1d(float *data, int length);
|
||||
extern void dwt_53_inverse_1d(float *data, int length);
|
||||
extern void dwt_haar_inverse_1d(float *data, int length);
|
||||
extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
|
||||
|
||||
quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
|
||||
quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Fallback to scalar implementations
|
||||
fprintf(stderr, "[TAV] Using scalar (non-SIMD) implementations\n");
|
||||
|
||||
extern void dwt_53_forward_1d(float *data, int length);
|
||||
extern void dwt_97_forward_1d(float *data, int length);
|
||||
extern void dwt_haar_forward_1d(float *data, int length);
|
||||
extern void dwt_53_inverse_1d(float *data, int length);
|
||||
extern void dwt_haar_inverse_1d(float *data, int length);
|
||||
extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
|
||||
|
||||
dwt_53_forward_1d_ptr = dwt_53_forward_1d;
|
||||
dwt_97_forward_1d_ptr = dwt_97_forward_1d;
|
||||
dwt_haar_forward_1d_ptr = dwt_haar_forward_1d;
|
||||
dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
|
||||
dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
|
||||
|
||||
rgb_to_ycocg_ptr = rgb_to_ycocg;
|
||||
|
||||
dwt_2d_extract_column_ptr = dwt_2d_extract_column_scalar;
|
||||
dwt_2d_insert_column_ptr = dwt_2d_insert_column_scalar;
|
||||
|
||||
quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
|
||||
quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Convenience Macros for Code Readability
|
||||
// =============================================================================
|
||||
|
||||
// Use these macros in encoder code for cleaner dispatch
|
||||
#define DWT_53_FORWARD_1D(data, length) \
|
||||
dwt_53_forward_1d_ptr((data), (length))
|
||||
|
||||
#define DWT_97_FORWARD_1D(data, length) \
|
||||
dwt_97_forward_1d_ptr((data), (length))
|
||||
|
||||
#define DWT_HAAR_FORWARD_1D(data, length) \
|
||||
dwt_haar_forward_1d_ptr((data), (length))
|
||||
|
||||
#define RGB_TO_YCOCG(rgb, y, co, cg, width, height) \
|
||||
rgb_to_ycocg_ptr((rgb), (y), (co), (cg), (width), (height))
|
||||
|
||||
#define DWT_2D_EXTRACT_COLUMN(tile_data, column, x, width, height) \
|
||||
dwt_2d_extract_column_ptr((tile_data), (column), (x), (width), (height))
|
||||
|
||||
#define DWT_2D_INSERT_COLUMN(tile_data, column, x, width, height) \
|
||||
dwt_2d_insert_column_ptr((tile_data), (column), (x), (width), (height))
|
||||
|
||||
#endif // TAV_SIMD_DISPATCH_H
|
||||
354
video_encoder/lib/libtavenc/README.md
Normal file
354
video_encoder/lib/libtavenc/README.md
Normal file
@@ -0,0 +1,354 @@
|
||||
# libtavenc - TAV Video Encoder Library
|
||||
|
||||
**libtavenc** is a high-performance video encoding library implementing the TSVM Advanced Video (TAV) codec. It provides a clean C API for encoding RGB24 video frames using discrete wavelet transform (DWT) with perceptual quantization and GOP-based temporal compression.
|
||||
|
||||
## Features
|
||||
|
||||
- **Multiple Wavelet Types**: CDF 5/3, CDF 9/7, CDF 13/7, DD-4, Haar
|
||||
- **3D DWT GOP Encoding**: Temporal + spatial wavelet compression
|
||||
- **Perceptual Quantization**: HVS-optimized coefficient scaling
|
||||
- **EZBC Entropy Coding**: Efficient coefficient compression with Zstd
|
||||
- **Multi-threading**: Internal thread pool for optimal performance
|
||||
- **Color Spaces**: YCoCg-R (default) and ICtCp (for HDR)
|
||||
- **Quality Levels**: 0-5 (0=lowest/smallest, 5=highest/largest)
|
||||
|
||||
## Building
|
||||
|
||||
```bash
|
||||
# Build static library
|
||||
make lib/libtavenc.a
|
||||
|
||||
# Build with encoder CLI
|
||||
make encoder_tav
|
||||
|
||||
# Install library and headers
|
||||
make install-libs PREFIX=/usr/local
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Basic Encoding
|
||||
|
||||
```c
|
||||
#include "tav_encoder_lib.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
// Initialize encoder parameters
|
||||
tav_encoder_params_t params;
|
||||
tav_encoder_params_init(¶ms, 1920, 1080);
|
||||
|
||||
// Configure encoding options
|
||||
params.fps_num = 60;
|
||||
params.fps_den = 1;
|
||||
params.wavelet_type = 1; // CDF 9/7 (default)
|
||||
params.quality_y = 3; // Quality level 3
|
||||
params.quality_co = 3;
|
||||
params.quality_cg = 3;
|
||||
params.enable_temporal_dwt = 1; // Enable 3D GOP encoding
|
||||
params.gop_size = 0; // Auto-calculate (typically 16-24)
|
||||
params.num_threads = 4; // 4 worker threads
|
||||
|
||||
// Create encoder context
|
||||
tav_encoder_context_t *ctx = tav_encoder_create(¶ms);
|
||||
if (!ctx) {
|
||||
fprintf(stderr, "Failed to create encoder\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get actual parameters (with auto-calculated values)
|
||||
tav_encoder_get_params(ctx, ¶ms);
|
||||
printf("GOP size: %d frames\n", params.gop_size);
|
||||
|
||||
// Encode frames
|
||||
uint8_t *rgb_frame = /* ... load RGB24 frame ... */;
|
||||
tav_encoder_packet_t *packet;
|
||||
|
||||
for (int i = 0; i < num_frames; i++) {
|
||||
int result = tav_encoder_encode_frame(ctx, rgb_frame, i, &packet);
|
||||
|
||||
if (result == 1) {
|
||||
// Packet ready (GOP completed)
|
||||
fwrite(packet->data, 1, packet->size, outfile);
|
||||
tav_encoder_free_packet(packet);
|
||||
}
|
||||
else if (result == 0) {
|
||||
// Frame buffered, waiting for GOP to fill
|
||||
}
|
||||
else {
|
||||
// Error
|
||||
fprintf(stderr, "Encoding error: %s\n", tav_encoder_get_error(ctx));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Flush remaining frames
|
||||
while (tav_encoder_flush(ctx, &packet) == 1) {
|
||||
fwrite(packet->data, 1, packet->size, outfile);
|
||||
tav_encoder_free_packet(packet);
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
tav_encoder_free(ctx);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
### Stateless GOP Encoding (Multi-threaded)
|
||||
|
||||
The library provides `tav_encoder_encode_gop()` for stateless GOP encoding, perfect for multi-threaded applications:
|
||||
|
||||
```c
|
||||
#include "tav_encoder_lib.h"
|
||||
#include <pthread.h>
|
||||
|
||||
typedef struct {
|
||||
tav_encoder_params_t params;
|
||||
uint8_t **rgb_frames;
|
||||
int num_frames;
|
||||
int *frame_numbers;
|
||||
tav_encoder_packet_t *output_packet;
|
||||
} gop_encode_job_t;
|
||||
|
||||
void *encode_gop_thread(void *arg) {
|
||||
gop_encode_job_t *job = (gop_encode_job_t *)arg;
|
||||
|
||||
// Create thread-local encoder context
|
||||
tav_encoder_context_t *ctx = tav_encoder_create(&job->params);
|
||||
if (!ctx) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Encode entire GOP at once (stateless, thread-safe)
|
||||
tav_encoder_encode_gop(ctx,
|
||||
(const uint8_t **)job->rgb_frames,
|
||||
job->num_frames,
|
||||
job->frame_numbers,
|
||||
&job->output_packet);
|
||||
|
||||
tav_encoder_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int main() {
|
||||
// Setup parameters
|
||||
tav_encoder_params_t params;
|
||||
tav_encoder_params_init(¶ms, 1920, 1080);
|
||||
params.enable_temporal_dwt = 1;
|
||||
params.gop_size = 24;
|
||||
|
||||
// Create worker threads
|
||||
pthread_t threads[4];
|
||||
gop_encode_job_t jobs[4];
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
jobs[i].params = params;
|
||||
jobs[i].rgb_frames = /* ... load GOP frames ... */;
|
||||
jobs[i].num_frames = 24;
|
||||
jobs[i].frame_numbers = /* ... frame indices ... */;
|
||||
|
||||
pthread_create(&threads[i], NULL, encode_gop_thread, &jobs[i]);
|
||||
}
|
||||
|
||||
// Wait for completion
|
||||
for (int i = 0; i < 4; i++) {
|
||||
pthread_join(threads[i], NULL);
|
||||
|
||||
// Write output packet
|
||||
if (jobs[i].output_packet) {
|
||||
fwrite(jobs[i].output_packet->data, 1,
|
||||
jobs[i].output_packet->size, outfile);
|
||||
tav_encoder_free_packet(jobs[i].output_packet);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
### Context Management
|
||||
|
||||
#### `tav_encoder_create()`
|
||||
Creates encoder context with specified parameters. Allocates internal buffers and initializes thread pool if multi-threading enabled.
|
||||
|
||||
**Returns**: Encoder context or NULL on failure
|
||||
|
||||
#### `tav_encoder_free()`
|
||||
Frees encoder context and all resources. Any unflushed GOP frames are lost.
|
||||
|
||||
#### `tav_encoder_get_error()`
|
||||
Returns last error message string.
|
||||
|
||||
#### `tav_encoder_get_params()`
|
||||
Gets encoder parameters with calculated values (e.g., auto-calculated GOP size, decomposition levels).
|
||||
|
||||
### Frame Encoding
|
||||
|
||||
#### `tav_encoder_encode_frame()`
|
||||
Encodes single RGB24 frame. Frames are buffered until GOP is full.
|
||||
|
||||
**Parameters**:
|
||||
- `rgb_frame`: RGB24 planar format `[R...][G...][B...]`, width×height×3 bytes
|
||||
- `frame_pts`: Presentation timestamp (frame number or time)
|
||||
- `packet`: Output packet pointer (NULL if GOP not ready)
|
||||
|
||||
**Returns**:
|
||||
- `1`: Packet ready (GOP completed)
|
||||
- `0`: Frame buffered, waiting for more frames
|
||||
- `-1`: Error
|
||||
|
||||
#### `tav_encoder_flush()`
|
||||
Flushes remaining buffered frames and encodes final GOP. Call at end of stream.
|
||||
|
||||
**Returns**:
|
||||
- `1`: Packet ready
|
||||
- `0`: No more packets
|
||||
- `-1`: Error
|
||||
|
||||
#### `tav_encoder_encode_gop()`
|
||||
Stateless GOP encoding. Thread-safe with separate contexts.
|
||||
|
||||
**Parameters**:
|
||||
- `rgb_frames`: Array of RGB24 frames `[frame][width×height×3]`
|
||||
- `num_frames`: Number of frames in GOP (1-24)
|
||||
- `frame_numbers`: Frame indices for timecodes (can be NULL)
|
||||
- `packet`: Output packet pointer
|
||||
|
||||
**Returns**: `1` on success, `-1` on error
|
||||
|
||||
### Packet Management
|
||||
|
||||
#### `tav_encoder_free_packet()`
|
||||
Frees packet returned by encoding functions.
|
||||
|
||||
## Encoder Parameters
|
||||
|
||||
### Video Dimensions
|
||||
- `width`, `height`: Frame dimensions (must be even)
|
||||
- `fps_num`, `fps_den`: Framerate (e.g., 60/1 for 60fps)
|
||||
|
||||
### Wavelet Configuration
|
||||
- `wavelet_type`: Spatial wavelet
|
||||
- `0`: CDF 5/3 (reversible, lossless-capable)
|
||||
- `1`: CDF 9/7 (default, best compression)
|
||||
- `2`: CDF 13/7 (experimental)
|
||||
- `16`: DD-4 (four-point interpolating)
|
||||
- `255`: Haar (demonstration)
|
||||
- `temporal_wavelet`: Temporal wavelet for 3D DWT
|
||||
- `0`: Haar (default for sports/high motion)
|
||||
- `1`: CDF 5/3 (smooth motion)
|
||||
- `decomp_levels`: Spatial DWT levels (0=auto, typically 6)
|
||||
- `temporal_levels`: Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
|
||||
|
||||
### Color Space
|
||||
- `channel_layout`:
|
||||
- `0`: YCoCg-R (default, efficient chroma)
|
||||
- `1`: ICtCp (for HDR/BT.2100 sources)
|
||||
- `perceptual_tuning`: 1=enable HVS perceptual quantization (default), 0=uniform
|
||||
|
||||
### GOP Configuration
|
||||
- `enable_temporal_dwt`: 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
|
||||
- `gop_size`: Frames per GOP (8, 16, or 24; 0=auto based on framerate)
|
||||
- `enable_two_pass`: 1=enable two-pass with scene change detection (default), 0=single-pass
|
||||
|
||||
### Quality Control
|
||||
- `quality_y`: Luma quality (0-5, default: 3)
|
||||
- `quality_co`: Orange chrominance quality (0-5, default: 3)
|
||||
- `quality_cg`: Green chrominance quality (0-5, default: 3)
|
||||
- `dead_zone_threshold`: Dead-zone quantization (0=disabled, 1-10 typical)
|
||||
|
||||
### Entropy Coding
|
||||
- `entropy_coder`:
|
||||
- `0`: Twobitmap (default, fast)
|
||||
- `1`: EZBC (better compression for high-quality)
|
||||
- `zstd_level`: Zstd compression level (3-22, default: 7)
|
||||
|
||||
### Multi-threading
|
||||
- `num_threads`: Worker threads
|
||||
- `0`: Single-threaded (default for CLI)
|
||||
- `-1`: Auto-detect CPU cores
|
||||
- `1-16`: Explicit thread count
|
||||
|
||||
### Encoder Presets
|
||||
- `encoder_preset`: Preset flags
|
||||
- `0x01`: Sports mode (finer temporal quantization)
|
||||
- `0x02`: Anime mode (disable grain)
|
||||
|
||||
## TAV Packet Types
|
||||
|
||||
Output packets have type field indicating content:
|
||||
|
||||
- `0x10`: I-frame (intra-only, single frame)
|
||||
- `0x11`: P-frame (delta from previous)
|
||||
- `0x12`: GOP unified (3D DWT, multiple frames)
|
||||
- `0x24`: TAD audio (DWT-based audio codec)
|
||||
- `0xF0`: Loop point start
|
||||
- `0xFC`: GOP sync (frame count marker)
|
||||
- `0xFD`: Timecode metadata
|
||||
|
||||
## Performance Notes
|
||||
|
||||
### Threading Model
|
||||
- Library manages internal thread pool when `num_threads > 0`
|
||||
- GOP encoding is parallelized across worker threads
|
||||
- For CLI tools: use `num_threads=0` (single-threaded) to avoid double-threading with external parallelism
|
||||
- For library integration: use `num_threads=-1` or explicit count for optimal performance
|
||||
|
||||
### Memory Usage
|
||||
- Each encoder context allocates:
|
||||
- GOP buffer: `gop_size × width × height × 3` bytes (RGB frames)
|
||||
- DWT coefficients: `~width × height × 12` bytes per channel
|
||||
- Thread pool: `num_threads × (GOP buffer + workspace)`
|
||||
- Typical 1920×1080 encoder with GOP=24: ~180 MB per context
|
||||
|
||||
### Encoding Speed
|
||||
- Single-threaded: 10-15 fps (1920×1080 on modern CPU)
|
||||
- Multi-threaded (4 threads): 30-40 fps
|
||||
- GOP size affects latency: larger GOP = higher latency, better compression
|
||||
|
||||
## Integration with TAD Audio
|
||||
|
||||
TAV files typically include TAD-compressed audio. Link with both libraries:
|
||||
|
||||
```c
|
||||
#include "tav_encoder_lib.h"
|
||||
#include "encoder_tad.h"
|
||||
|
||||
// Encode video frame
|
||||
tav_encoder_encode_frame(video_ctx, rgb_frame, pts, &video_packet);
|
||||
|
||||
// Encode audio chunk (32kHz stereo, float samples)
|
||||
tad32_encode_chunk(audio_ctx, pcm_samples, num_samples, &audio_data, &audio_size);
|
||||
|
||||
// Mux both into TAV file (interleave by frame PTS)
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
All functions return error codes and set error message accessible via `tav_encoder_get_error()`:
|
||||
|
||||
```c
|
||||
if (tav_encoder_encode_frame(ctx, frame, pts, &packet) < 0) {
|
||||
fprintf(stderr, "Encoding failed: %s\n", tav_encoder_get_error(ctx));
|
||||
// Handle error
|
||||
}
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
- Maximum resolution: 8192×8192
|
||||
- GOP size: 1-48 frames
|
||||
- Single-tile encoding only (no spatial tiling)
|
||||
- Requires even width and height
|
||||
|
||||
## License
|
||||
|
||||
Part of the TSVM project.
|
||||
|
||||
## See Also
|
||||
|
||||
- `include/tav_encoder_lib.h` - Complete API documentation
|
||||
- `src/encoder_tav.c` - CLI reference implementation
|
||||
- `lib/libtadenc/` - TAD audio encoder library
|
||||
255
video_encoder/lib/libtavenc/tav_encoder_color.c
Normal file
255
video_encoder/lib/libtavenc/tav_encoder_color.c
Normal file
@@ -0,0 +1,255 @@
|
||||
/**
|
||||
* TAV Encoder - Color Space Conversion Library
|
||||
*
|
||||
* Provides RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions
|
||||
* for the TSVM Advanced Video (TAV) encoder.
|
||||
*
|
||||
* Extracted from encoder_tav.c as part of library refactoring.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
// =============================================================================
|
||||
// Utility Functions
|
||||
// =============================================================================
|
||||
|
||||
static inline int CLAMP(int x, int min, int max) {
|
||||
return x < min ? min : (x > max ? max : x);
|
||||
}
|
||||
|
||||
static inline float FCLAMP(float x, float min, float max) {
|
||||
return x < min ? min : (x > max ? max : x);
|
||||
}
|
||||
|
||||
static inline int iround(double v) {
|
||||
return (int)floor(v + 0.5);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// sRGB Gamma Helpers
|
||||
// =============================================================================
|
||||
|
||||
static inline double srgb_linearise(double val) {
|
||||
if (val <= 0.04045) return val / 12.92;
|
||||
return pow((val + 0.055) / 1.055, 2.4);
|
||||
}
|
||||
|
||||
static inline double srgb_unlinearise(double val) {
|
||||
if (val <= 0.0031308) return 12.92 * val;
|
||||
return 1.055 * pow(val, 1.0/2.4) - 0.055;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// HLG (Hybrid Log-Gamma) Transfer Functions
|
||||
// =============================================================================
|
||||
|
||||
static inline double HLG_OETF(double E) {
|
||||
const double a = 0.17883277;
|
||||
const double b = 0.28466892; // 1 - 4*a
|
||||
const double c = 0.55991073; // 0.5 - a*ln(4*a)
|
||||
|
||||
if (E <= 1.0/12.0) return sqrt(3.0 * E);
|
||||
return a * log(12.0 * E - b) + c;
|
||||
}
|
||||
|
||||
static inline double HLG_EOTF(double Ep) {
|
||||
const double a = 0.17883277;
|
||||
const double b = 0.28466892;
|
||||
const double c = 0.55991073;
|
||||
|
||||
if (Ep <= 0.5) {
|
||||
double val = Ep * Ep / 3.0;
|
||||
return val;
|
||||
}
|
||||
double val = (exp((Ep - c) / a) + b) / 12.0;
|
||||
return val;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Color Space Transformation Matrices
|
||||
// =============================================================================
|
||||
|
||||
// BT.2100 RGB -> LMS matrix
|
||||
static const double M_RGB_TO_LMS[3][3] = {
|
||||
{1688.0/4096, 2146.0/4096, 262.0/4096},
|
||||
{ 683.0/4096, 2951.0/4096, 462.0/4096},
|
||||
{ 99.0/4096, 309.0/4096, 3688.0/4096}
|
||||
};
|
||||
|
||||
// LMS -> RGB inverse matrix
|
||||
static const double M_LMS_TO_RGB[3][3] = {
|
||||
{ 6.1723815689243215, -5.319534979827695, 0.14699442094633924},
|
||||
{-1.3243428148026244, 2.560286104841917, -0.2359203727576164},
|
||||
{-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
|
||||
};
|
||||
|
||||
// ICtCp matrix (L' M' S' -> I Ct Cp) - BT.2100 constants
|
||||
static const double M_LMSPRIME_TO_ICTCP[3][3] = {
|
||||
{ 2048.0/4096.0, 2048.0/4096.0, 0.0 },
|
||||
{ 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 },
|
||||
{ 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 }
|
||||
};
|
||||
|
||||
// ICtCp -> L' M' S' inverse matrix
|
||||
static const double M_ICTCP_TO_LMSPRIME[3][3] = {
|
||||
{ 1.0, 0.015718580108730416, 0.2095810681164055 },
|
||||
{ 1.0, -0.015718580108730416, -0.20958106811640548},
|
||||
{ 1.0, 1.0212710798422344, -0.6052744909924316 }
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// YCoCg-R Color Space Conversion
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Convert RGB24 to YCoCg-R color space for a full frame.
|
||||
*
|
||||
* YCoCg-R is a reversible color transform optimized for compression:
|
||||
* - Y = luma (G + (R-B)/2)
|
||||
* - Co = orange chrominance (R - B)
|
||||
* - Cg = green chrominance (G - (R+B)/2)
|
||||
*
|
||||
* @param rgb Input RGB24 data (planar: RRRR...GGGG...BBBB...)
|
||||
* @param y Output luma channel
|
||||
* @param co Output orange chrominance
|
||||
* @param cg Output green chrominance
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
*/
|
||||
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
|
||||
int width, int height)
|
||||
{
|
||||
const int total_pixels = width * height;
|
||||
|
||||
// Process 4 pixels at a time for better cache utilization
|
||||
int i = 0;
|
||||
const int simd_end = (total_pixels / 4) * 4;
|
||||
|
||||
// Vectorized processing for groups of 4 pixels
|
||||
for (i = 0; i < simd_end; i += 4) {
|
||||
const uint8_t *rgb_ptr = &rgb[i * 3];
|
||||
|
||||
// Process 4 pixels simultaneously with loop unrolling
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const int idx = i + j;
|
||||
const float r = rgb_ptr[j * 3 + 0];
|
||||
const float g = rgb_ptr[j * 3 + 1];
|
||||
const float b = rgb_ptr[j * 3 + 2];
|
||||
|
||||
// YCoCg-R transform
|
||||
co[idx] = r - b;
|
||||
const float tmp = b + co[idx] * 0.5f;
|
||||
cg[idx] = g - tmp;
|
||||
y[idx] = tmp + cg[idx] * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle remaining pixels (1-3 pixels)
|
||||
for (; i < total_pixels; i++) {
|
||||
const float r = rgb[i * 3 + 0];
|
||||
const float g = rgb[i * 3 + 1];
|
||||
const float b = rgb[i * 3 + 2];
|
||||
|
||||
co[i] = r - b;
|
||||
const float tmp = b + co[i] * 0.5f;
|
||||
cg[i] = g - tmp;
|
||||
y[i] = tmp + cg[i] * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// ICtCp Color Space Conversion (HDR-capable)
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Convert sRGB8 to ICtCp color space using HLG transfer function.
|
||||
*
|
||||
* ICtCp is a perceptually uniform color space designed for HDR content:
|
||||
* - I = intensity (luma)
|
||||
* - Ct = tritanope (blue-yellow)
|
||||
* - Cp = protanope (red-green)
|
||||
*
|
||||
* Uses BT.2100 ICtCp with HLG OETF for better perceptual uniformity.
|
||||
*
|
||||
* @param r8 Input red component (0-255)
|
||||
* @param g8 Input green component (0-255)
|
||||
* @param b8 Input blue component (0-255)
|
||||
* @param out_I Output intensity (0-255)
|
||||
* @param out_Ct Output tritanope (0-255, centered at 127.5)
|
||||
* @param out_Cp Output protanope (0-255, centered at 127.5)
|
||||
*/
|
||||
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
|
||||
double *out_I, double *out_Ct, double *out_Cp)
|
||||
{
|
||||
// 1) Linearize sRGB to 0..1
|
||||
double r = srgb_linearise((double)r8 / 255.0);
|
||||
double g = srgb_linearise((double)g8 / 255.0);
|
||||
double b = srgb_linearise((double)b8 / 255.0);
|
||||
|
||||
// 2) Linear RGB -> LMS (3x3 multiply)
|
||||
double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
|
||||
double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
|
||||
double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
|
||||
|
||||
// 3) Apply HLG OETF (Hybrid Log-Gamma)
|
||||
double Lp = HLG_OETF(L);
|
||||
double Mp = HLG_OETF(M);
|
||||
double Sp = HLG_OETF(S);
|
||||
|
||||
// 4) L'M'S' -> ICtCp
|
||||
double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
|
||||
double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
|
||||
double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
|
||||
|
||||
// 5) Scale and offset to 0-255 range
|
||||
*out_I = FCLAMP(I * 255.0, 0.0, 255.0);
|
||||
*out_Ct = FCLAMP(Ct * 255.0 + 127.5, 0.0, 255.0);
|
||||
*out_Cp = FCLAMP(Cp * 255.0 + 127.5, 0.0, 255.0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
|
||||
*
|
||||
* @param I8 Input intensity (0-255)
|
||||
* @param Ct8 Input tritanope (0-255, centered at 127.5)
|
||||
* @param Cp8 Input protanope (0-255, centered at 127.5)
|
||||
* @param r8 Output red component (0-255)
|
||||
* @param g8 Output green component (0-255)
|
||||
* @param b8 Output blue component (0-255)
|
||||
*/
|
||||
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
|
||||
uint8_t *r8, uint8_t *g8, uint8_t *b8)
|
||||
{
|
||||
// 1) Denormalize from 0-255 range
|
||||
double I = I8 / 255.0;
|
||||
double Ct = (Ct8 - 127.5) / 255.0;
|
||||
double Cp = (Cp8 - 127.5) / 255.0;
|
||||
|
||||
// 2) ICtCp -> L' M' S' (3x3 inverse multiply)
|
||||
double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
|
||||
double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
|
||||
double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
|
||||
|
||||
// 3) Apply HLG inverse EOTF
|
||||
double L = HLG_EOTF(Lp);
|
||||
double M = HLG_EOTF(Mp);
|
||||
double S = HLG_EOTF(Sp);
|
||||
|
||||
// 4) LMS -> linear sRGB (3x3 inverse multiply)
|
||||
double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
|
||||
double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
|
||||
double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
|
||||
|
||||
// 5) Apply sRGB gamma and convert to 0-255 with rounding
|
||||
double r = srgb_unlinearise(r_lin);
|
||||
double g = srgb_unlinearise(g_lin);
|
||||
double b = srgb_unlinearise(b_lin);
|
||||
|
||||
*r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0));
|
||||
*g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0));
|
||||
*b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0));
|
||||
}
|
||||
67
video_encoder/lib/libtavenc/tav_encoder_color.h
Normal file
67
video_encoder/lib/libtavenc/tav_encoder_color.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* TAV Encoder - Color Space Conversion Library
|
||||
*
|
||||
* Public API for RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions.
|
||||
*/
|
||||
|
||||
#ifndef TAV_ENCODER_COLOR_H
|
||||
#define TAV_ENCODER_COLOR_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// =============================================================================
|
||||
// YCoCg-R Color Space Conversion
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Convert RGB24 to YCoCg-R color space for a full frame.
|
||||
*
|
||||
* @param rgb Input RGB24 data (interleaved: RGBRGBRGB...)
|
||||
* @param y Output luma channel
|
||||
* @param co Output orange chrominance
|
||||
* @param cg Output green chrominance
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
*/
|
||||
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
|
||||
int width, int height);
|
||||
|
||||
// =============================================================================
|
||||
// ICtCp Color Space Conversion (HDR-capable)
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Convert sRGB8 to ICtCp color space using HLG transfer function.
|
||||
*
|
||||
* @param r8 Input red component (0-255)
|
||||
* @param g8 Input green component (0-255)
|
||||
* @param b8 Input blue component (0-255)
|
||||
* @param out_I Output intensity (0-255)
|
||||
* @param out_Ct Output tritanope (0-255, centered at 127.5)
|
||||
* @param out_Cp Output protanope (0-255, centered at 127.5)
|
||||
*/
|
||||
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
|
||||
double *out_I, double *out_Ct, double *out_Cp);
|
||||
|
||||
/**
|
||||
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
|
||||
*
|
||||
* @param I8 Input intensity (0-255)
|
||||
* @param Ct8 Input tritanope (0-255, centered at 127.5)
|
||||
* @param Cp8 Input protanope (0-255, centered at 127.5)
|
||||
* @param r8 Output red component (0-255)
|
||||
* @param g8 Output green component (0-255)
|
||||
* @param b8 Output blue component (0-255)
|
||||
*/
|
||||
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
|
||||
uint8_t *r8, uint8_t *g8, uint8_t *b8);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TAV_ENCODER_COLOR_H
|
||||
619
video_encoder/lib/libtavenc/tav_encoder_dwt.c
Normal file
619
video_encoder/lib/libtavenc/tav_encoder_dwt.c
Normal file
@@ -0,0 +1,619 @@
|
||||
/**
|
||||
* TAV Encoder - Discrete Wavelet Transform (DWT) Library
|
||||
*
|
||||
* Provides multi-resolution wavelet decomposition for video compression.
|
||||
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, and Haar.
|
||||
*
|
||||
* Extracted from encoder_tav.c as part of library refactoring.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
// =============================================================================
|
||||
// Wavelet Type Constants
|
||||
// =============================================================================
|
||||
|
||||
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 - Lossless capable
|
||||
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 - Higher compression (default)
|
||||
#define WAVELET_BIORTHOGONAL_13_7 2 // Biorthogonal 13/7
|
||||
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
|
||||
#define WAVELET_HAAR 255 // Haar - Simplest wavelet
|
||||
|
||||
// =============================================================================
|
||||
// 1D Forward DWT Transforms
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* CDF 5/3 reversible wavelet forward 1D transform (lossless capable).
|
||||
*
|
||||
* Uses lifting scheme with predict and update steps.
|
||||
* Output layout: [LL...LL, HH...HH] (low-pass, then high-pass)
|
||||
*
|
||||
* @param data In/out signal data (modified in-place)
|
||||
* @param length Signal length (handles non-power-of-2)
|
||||
*/
|
||||
static void dwt_53_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = calloc(length, sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Predict step (high-pass)
|
||||
for (int i = 0; i < half; i++) {
|
||||
int idx = 2 * i + 1;
|
||||
if (idx < length) {
|
||||
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
|
||||
temp[half + i] = data[idx] - pred;
|
||||
}
|
||||
}
|
||||
|
||||
// Update step (low-pass)
|
||||
for (int i = 0; i < half; i++) {
|
||||
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
|
||||
(i < half - 1 ? temp[half + i] : 0));
|
||||
temp[i] = data[2 * i] + update;
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
/**
|
||||
* CDF 9/7 irreversible wavelet forward 1D transform (JPEG 2000 standard).
|
||||
*
|
||||
* Five-step lifting scheme with scaling for optimal compression.
|
||||
* Output layout: [LL...LL, HH...HH]
|
||||
*
|
||||
* @param data In/out signal data
|
||||
* @param length Signal length
|
||||
*/
|
||||
static void dwt_97_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Split into even/odd samples
|
||||
for (int i = 0; i < half; i++) {
|
||||
temp[i] = data[2 * i]; // Even (low)
|
||||
}
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
temp[half + i] = data[2 * i + 1]; // Odd (high)
|
||||
}
|
||||
|
||||
// JPEG2000 9/7 lifting coefficients
|
||||
const float alpha = -1.586134342f;
|
||||
const float beta = -0.052980118f;
|
||||
const float gamma = 0.882911076f;
|
||||
const float delta = 0.443506852f;
|
||||
const float K = 1.230174105f;
|
||||
|
||||
// Step 1: Predict α
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
if (half + i < length) {
|
||||
float s_curr = temp[i];
|
||||
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
|
||||
temp[half + i] += alpha * (s_curr + s_next);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Update β
|
||||
for (int i = 0; i < half; i++) {
|
||||
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
|
||||
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
|
||||
temp[i] += beta * (d_prev + d_curr);
|
||||
}
|
||||
|
||||
// Step 3: Predict γ
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
if (half + i < length) {
|
||||
float s_curr = temp[i];
|
||||
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
|
||||
temp[half + i] += gamma * (s_curr + s_next);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Update δ
|
||||
for (int i = 0; i < half; i++) {
|
||||
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
|
||||
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
|
||||
temp[i] += delta * (d_prev + d_curr);
|
||||
}
|
||||
|
||||
// Step 5: Scaling
|
||||
for (int i = 0; i < half; i++) {
|
||||
temp[i] *= K;
|
||||
}
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
if (half + i < length) {
|
||||
temp[half + i] /= K;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
/**
|
||||
* CDF 9/7 integer-reversible wavelet forward 1D (fixed-point lifting).
|
||||
*
|
||||
* Same structure as 9/7 irreversible but uses integer arithmetic.
|
||||
*
|
||||
* @param data In/out signal data
|
||||
* @param length Signal length
|
||||
*/
|
||||
static void dwt_97_iint_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
for (int i = 0; i < half; ++i) temp[i] = data[2*i];
|
||||
for (int i = 0; i < length/2; ++i) temp[half + i] = data[2*i + 1];
|
||||
|
||||
const int SHIFT = 16;
|
||||
const int64_t ROUND = 1LL << (SHIFT - 1);
|
||||
const int64_t A = -103949; // α
|
||||
const int64_t B = -3472; // β
|
||||
const int64_t G = 57862; // γ
|
||||
const int64_t D = 29066; // δ
|
||||
const int64_t K_FP = 80542; // ≈ 1.230174105 * 2^16
|
||||
const int64_t Ki_FP = 53283; // ≈ (1/1.230174105) * 2^16
|
||||
|
||||
#define RN(x) (((x)>=0)?(((x)+ROUND)>>SHIFT):(-((-(x)+ROUND)>>SHIFT)))
|
||||
|
||||
// Predict α
|
||||
for (int i = 0; i < length/2; ++i) {
|
||||
int s = temp[i];
|
||||
int sn = (i+1<half)? temp[i+1] : s;
|
||||
temp[half+i] += RN(A * (int64_t)(s + sn));
|
||||
}
|
||||
|
||||
// Update β
|
||||
for (int i = 0; i < half; ++i) {
|
||||
int d = (half+i<length)? temp[half+i]:0;
|
||||
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
|
||||
temp[i] += RN(B * (int64_t)(dp + d));
|
||||
}
|
||||
|
||||
// Predict γ
|
||||
for (int i = 0; i < length/2; ++i) {
|
||||
int s = temp[i];
|
||||
int sn = (i+1<half)? temp[i+1]:s;
|
||||
temp[half+i] += RN(G * (int64_t)(s + sn));
|
||||
}
|
||||
|
||||
// Update δ
|
||||
for (int i = 0; i < half; ++i) {
|
||||
int d = (half+i<length)? temp[half+i]:0;
|
||||
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
|
||||
temp[i] += RN(D * (int64_t)(dp + d));
|
||||
}
|
||||
|
||||
// Scaling
|
||||
for (int i = 0; i < half; ++i) {
|
||||
temp[i] = (((int64_t)temp[i] * K_FP + ROUND) >> SHIFT);
|
||||
}
|
||||
for (int i = 0; i < length/2; ++i) {
|
||||
if (half + i < length) {
|
||||
temp[half + i] = (((int64_t)temp[half + i] * Ki_FP + ROUND) >> SHIFT);
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
#undef RN
|
||||
}
|
||||
|
||||
/**
|
||||
* Deslauriers-Dubuc 4-point interpolating wavelet forward 1D (DD-4).
|
||||
*
|
||||
* Uses four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
|
||||
* Good for smooth signals and still images.
|
||||
*
|
||||
* @param data In/out signal data
|
||||
* @param length Signal length
|
||||
*/
|
||||
static void dwt_dd4_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Split into even/odd samples
|
||||
for (int i = 0; i < half; i++) {
|
||||
temp[i] = data[2 * i];
|
||||
}
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
temp[half + i] = data[2 * i + 1];
|
||||
}
|
||||
|
||||
// DD-4 prediction step with four-point kernel
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
// Get four neighbouring even samples with symmetric boundary extension
|
||||
float s_m1, s_0, s_1, s_2;
|
||||
|
||||
s_m1 = (i > 0) ? temp[i - 1] : temp[0];
|
||||
s_0 = temp[i];
|
||||
s_1 = (i + 1 < half) ? temp[i + 1] : temp[half - 1];
|
||||
s_2 = (i + 2 < half) ? temp[i + 2] : ((half > 1) ? temp[half - 2] : temp[half - 1]);
|
||||
|
||||
float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
|
||||
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
|
||||
|
||||
temp[half + i] -= prediction;
|
||||
}
|
||||
|
||||
// DD-4 update step
|
||||
for (int i = 0; i < half; i++) {
|
||||
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
|
||||
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
|
||||
temp[i] += 0.25f * (d_prev + d_curr);
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Biorthogonal 13/7 wavelet forward 1D.
|
||||
*
|
||||
* Analysis filters: Low-pass (13 taps), High-pass (7 taps)
|
||||
* Simplified implementation using 5/3 structure with scaling.
|
||||
*
|
||||
* @param data In/out signal data
|
||||
* @param length Signal length
|
||||
*/
|
||||
static void dwt_bior137_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
const float K = 1.230174105f;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Predict step (high-pass)
|
||||
for (int i = 0; i < half; i++) {
|
||||
int idx = 2 * i + 1;
|
||||
if (idx < length) {
|
||||
float left = data[2 * i];
|
||||
float right = (2 * i + 2 < length) ? data[2 * i + 2] : data[2 * i];
|
||||
float prediction = 0.5f * (left + right);
|
||||
temp[half + i] = data[idx] - prediction;
|
||||
}
|
||||
}
|
||||
|
||||
// Update step (low-pass)
|
||||
for (int i = 0; i < half; i++) {
|
||||
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
|
||||
(i < half - 1 ? temp[half + i] : 0));
|
||||
temp[i] = data[2 * i] + update;
|
||||
}
|
||||
|
||||
// Scaling
|
||||
for (int i = 0; i < half; i++) {
|
||||
temp[i] *= K;
|
||||
}
|
||||
for (int i = 0; i < length / 2; i++) {
|
||||
if (half + i < length) {
|
||||
temp[half + i] /= K;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Haar wavelet forward 1D transform.
|
||||
*
|
||||
* The simplest wavelet: averages (low-pass) and differences (high-pass).
|
||||
* Useful for temporal DWT in GOPs.
|
||||
*
|
||||
* @param data In/out signal data
|
||||
* @param length Signal length
|
||||
*/
|
||||
static void dwt_haar_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
for (int i = 0; i < half; i++) {
|
||||
if (2 * i + 1 < length) {
|
||||
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
|
||||
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
|
||||
} else {
|
||||
temp[i] = data[2 * i];
|
||||
if (half + i < length) {
|
||||
temp[half + i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// 1D Inverse DWT Transforms
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* CDF 5/3 reversible wavelet inverse 1D transform.
|
||||
*
|
||||
* Reverses dwt_53_forward_1d() transform exactly.
|
||||
*
|
||||
* @param data In/out coefficient data
|
||||
* @param length Signal length
|
||||
*/
|
||||
static void dwt_53_inverse_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Copy low-pass and high-pass coefficients
|
||||
memcpy(temp, data, length * sizeof(float));
|
||||
|
||||
// Undo update step
|
||||
for (int i = 0; i < half; i++) {
|
||||
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
|
||||
(i < half - 1 ? temp[half + i] : 0));
|
||||
temp[i] -= update;
|
||||
}
|
||||
|
||||
// Undo predict step
|
||||
for (int i = 0; i < half; i++) {
|
||||
int idx = 2 * i + 1;
|
||||
if (idx < length) {
|
||||
float pred = 0.5f * (temp[i] + ((i + 1 < half) ? temp[i + 1] : temp[i]));
|
||||
data[2 * i] = temp[i];
|
||||
data[idx] = temp[half + i] + pred;
|
||||
} else {
|
||||
data[2 * i] = temp[i];
|
||||
}
|
||||
}
|
||||
|
||||
free(temp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Haar wavelet inverse 1D transform.
|
||||
*
|
||||
* Reverses dwt_haar_forward_1d() transform.
|
||||
*
|
||||
* @param data In/out coefficient data
|
||||
* @param length Signal length
|
||||
*/
|
||||
static void dwt_haar_inverse_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(length * sizeof(float));
|
||||
int half = (length + 1) / 2;
|
||||
|
||||
// Reconstruct from averages and differences
|
||||
for (int i = 0; i < half; i++) {
|
||||
if (2 * i + 1 < length) {
|
||||
temp[2 * i] = data[i] + data[half + i];
|
||||
temp[2 * i + 1] = data[i] - data[half + i];
|
||||
} else {
|
||||
temp[2 * i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(data, temp, length * sizeof(float));
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// 2D DWT Transform
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Apply 2D forward DWT to a frame (in-place).
|
||||
*
|
||||
* Applies separable 1D transforms: horizontal (rows), then vertical (columns).
|
||||
* Supports multi-level decomposition.
|
||||
*
|
||||
* @param data In/out 2D image data (row-major, width stride)
|
||||
* @param width Image width
|
||||
* @param height Image height
|
||||
* @param levels Number of decomposition levels
|
||||
* @param filter_type Wavelet type (WAVELET_* constant)
|
||||
*/
|
||||
void tav_dwt_2d_forward(float *data, int width, int height, int levels, int filter_type) {
|
||||
const int max_size = (width > height) ? width : height;
|
||||
float *temp_row = malloc(max_size * sizeof(float));
|
||||
float *temp_col = malloc(max_size * sizeof(float));
|
||||
|
||||
// Pre-calculate dimensions for each level
|
||||
int *widths = malloc((levels + 1) * sizeof(int));
|
||||
int *heights = malloc((levels + 1) * sizeof(int));
|
||||
widths[0] = width;
|
||||
heights[0] = height;
|
||||
for (int i = 1; i <= levels; i++) {
|
||||
widths[i] = (widths[i - 1] + 1) / 2;
|
||||
heights[i] = (heights[i - 1] + 1) / 2;
|
||||
}
|
||||
|
||||
// Apply multi-level decomposition
|
||||
for (int level = 0; level < levels; level++) {
|
||||
int current_width = widths[level];
|
||||
int current_height = heights[level];
|
||||
if (current_width < 1 || current_height < 1) break;
|
||||
|
||||
// Row transform (horizontal)
|
||||
for (int y = 0; y < current_height; y++) {
|
||||
// Extract row
|
||||
for (int x = 0; x < current_width; x++) {
|
||||
temp_row[x] = data[y * width + x];
|
||||
}
|
||||
|
||||
// Apply 1D DWT
|
||||
switch (filter_type) {
|
||||
case WAVELET_5_3_REVERSIBLE:
|
||||
dwt_53_forward_1d(temp_row, current_width);
|
||||
break;
|
||||
case WAVELET_9_7_IRREVERSIBLE:
|
||||
dwt_97_forward_1d(temp_row, current_width);
|
||||
break;
|
||||
case WAVELET_BIORTHOGONAL_13_7:
|
||||
dwt_bior137_forward_1d(temp_row, current_width);
|
||||
break;
|
||||
case WAVELET_DD4:
|
||||
dwt_dd4_forward_1d(temp_row, current_width);
|
||||
break;
|
||||
case WAVELET_HAAR:
|
||||
dwt_haar_forward_1d(temp_row, current_width);
|
||||
break;
|
||||
}
|
||||
|
||||
// Write back
|
||||
for (int x = 0; x < current_width; x++) {
|
||||
data[y * width + x] = temp_row[x];
|
||||
}
|
||||
}
|
||||
|
||||
// Column transform (vertical)
|
||||
for (int x = 0; x < current_width; x++) {
|
||||
// Extract column
|
||||
for (int y = 0; y < current_height; y++) {
|
||||
temp_col[y] = data[y * width + x];
|
||||
}
|
||||
|
||||
// Apply 1D DWT
|
||||
switch (filter_type) {
|
||||
case WAVELET_5_3_REVERSIBLE:
|
||||
dwt_53_forward_1d(temp_col, current_height);
|
||||
break;
|
||||
case WAVELET_9_7_IRREVERSIBLE:
|
||||
dwt_97_forward_1d(temp_col, current_height);
|
||||
break;
|
||||
case WAVELET_BIORTHOGONAL_13_7:
|
||||
dwt_bior137_forward_1d(temp_col, current_height);
|
||||
break;
|
||||
case WAVELET_DD4:
|
||||
dwt_dd4_forward_1d(temp_col, current_height);
|
||||
break;
|
||||
case WAVELET_HAAR:
|
||||
dwt_haar_forward_1d(temp_col, current_height);
|
||||
break;
|
||||
}
|
||||
|
||||
// Write back
|
||||
for (int y = 0; y < current_height; y++) {
|
||||
data[y * width + x] = temp_col[y];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(widths);
|
||||
free(heights);
|
||||
free(temp_row);
|
||||
free(temp_col);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// 3D DWT Transform (Temporal + Spatial)
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Apply 3D forward DWT to a GOP (group of pictures).
|
||||
*
|
||||
* First applies temporal DWT across frames at each spatial location,
|
||||
* then applies 2D spatial DWT to each resulting temporal subband.
|
||||
*
|
||||
* @param gop_data Array of frame pointers [num_frames][width*height]
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param num_frames Number of frames in GOP
|
||||
* @param spatial_levels Number of 2D spatial decomposition levels
|
||||
* @param temporal_levels Number of 1D temporal decomposition levels
|
||||
* @param spatial_filter Wavelet type for spatial transform
|
||||
* @param temporal_filter Wavelet type for temporal transform (0=Haar, 1=5/3)
|
||||
*/
|
||||
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
|
||||
int spatial_levels, int temporal_levels,
|
||||
int spatial_filter, int temporal_filter) {
|
||||
if (num_frames < 2 || width < 2 || height < 2) return;
|
||||
|
||||
float *temporal_line = malloc(num_frames * sizeof(float));
|
||||
|
||||
// Pre-calculate temporal lengths for non-power-of-2 GOPs
|
||||
int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
|
||||
temporal_lengths[0] = num_frames;
|
||||
for (int i = 1; i <= temporal_levels; i++) {
|
||||
temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
|
||||
}
|
||||
|
||||
// Step 1: Apply temporal DWT across frames
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
int pixel_idx = y * width + x;
|
||||
|
||||
// Extract temporal signal
|
||||
for (int t = 0; t < num_frames; t++) {
|
||||
temporal_line[t] = gop_data[t][pixel_idx];
|
||||
}
|
||||
|
||||
// Apply temporal DWT with multiple levels
|
||||
for (int level = 0; level < temporal_levels; level++) {
|
||||
int level_frames = temporal_lengths[level];
|
||||
if (level_frames >= 2) {
|
||||
if (temporal_filter == 255) {
|
||||
// Haar temporal (default)
|
||||
dwt_haar_forward_1d(temporal_line, level_frames);
|
||||
} else if (temporal_filter == 0) {
|
||||
// CDF 5/3 temporal
|
||||
dwt_53_forward_1d(temporal_line, level_frames);
|
||||
} else {
|
||||
// Fallback to Haar for unsupported wavelets
|
||||
dwt_haar_forward_1d(temporal_line, level_frames);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write back temporal coefficients
|
||||
for (int t = 0; t < num_frames; t++) {
|
||||
gop_data[t][pixel_idx] = temporal_line[t];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(temporal_lengths);
|
||||
free(temporal_line);
|
||||
|
||||
// Step 2: Apply 2D spatial DWT to each temporal subband
|
||||
for (int t = 0; t < num_frames; t++) {
|
||||
tav_dwt_2d_forward(gop_data[t], width, height, spatial_levels, spatial_filter);
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Utility Functions
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Calculate recommended number of decomposition levels for given dimensions.
|
||||
*
|
||||
* @param width Image width
|
||||
* @param height Image height
|
||||
* @return Recommended number of levels (1-6)
|
||||
*/
|
||||
int tav_dwt_calculate_levels(int width, int height) {
|
||||
int levels = 0;
|
||||
int min_size = (width < height) ? width : height;
|
||||
|
||||
// Keep halving until we reach minimum size
|
||||
while (min_size >= 32) {
|
||||
min_size /= 2;
|
||||
levels++;
|
||||
}
|
||||
|
||||
// Cap at reasonable maximum
|
||||
return (levels > 6) ? 6 : levels;
|
||||
}
|
||||
88
video_encoder/lib/libtavenc/tav_encoder_dwt.h
Normal file
88
video_encoder/lib/libtavenc/tav_encoder_dwt.h
Normal file
@@ -0,0 +1,88 @@
|
||||
/**
|
||||
* TAV Encoder - Discrete Wavelet Transform Library
|
||||
*
|
||||
* Public API for multi-resolution wavelet decomposition.
|
||||
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, Haar
|
||||
*/
|
||||
|
||||
#ifndef TAV_ENCODER_DWT_H
|
||||
#define TAV_ENCODER_DWT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// =============================================================================
|
||||
// Wavelet Type Constants
|
||||
// =============================================================================
|
||||
|
||||
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 reversible (lossless capable)
|
||||
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 JPEG2000 (default, best compression)
|
||||
#define WAVELET_BIORTHOGONAL_13_7 2 // CDF 13/7 experimental
|
||||
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
|
||||
#define WAVELET_HAAR 255 // Haar (demonstration only)
|
||||
|
||||
// =============================================================================
|
||||
// 2D Discrete Wavelet Transform
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Apply 2D wavelet transform to spatial data.
|
||||
*
|
||||
* Uses separable 1D transforms: apply horizontal rows, then vertical columns.
|
||||
* Multi-level decomposition creates frequency subbands: LL, LH, HL, HH.
|
||||
*
|
||||
* @param data Input/output data array (modified in-place)
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param levels Number of decomposition levels (0 = auto-calculate)
|
||||
* @param filter_type Wavelet type (WAVELET_* constants)
|
||||
*/
|
||||
void tav_dwt_2d_forward(float *data, int width, int height,
|
||||
int levels, int filter_type);
|
||||
|
||||
// =============================================================================
|
||||
// 3D Discrete Wavelet Transform (GOP Temporal + Spatial)
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Apply 3D wavelet transform to group-of-pictures (GOP).
|
||||
*
|
||||
* Process:
|
||||
* 1. Apply temporal 1D DWT across frames at each spatial position
|
||||
* 2. Apply spatial 2D DWT to each temporal subband frame
|
||||
*
|
||||
* @param gop_data Array of frame pointers [num_frames]
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param num_frames Number of frames in GOP
|
||||
* @param spatial_levels Spatial decomposition levels (0 = auto)
|
||||
* @param temporal_levels Temporal decomposition levels
|
||||
* @param spatial_filter Wavelet type for spatial transform
|
||||
* @param temporal_filter Wavelet type for temporal transform
|
||||
*/
|
||||
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
|
||||
int spatial_levels, int temporal_levels,
|
||||
int spatial_filter, int temporal_filter);
|
||||
|
||||
// =============================================================================
|
||||
// Utility Functions
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Calculate optimal number of decomposition levels for given dimensions.
|
||||
*
|
||||
* Uses formula: floor(log2(min(width, height))) - 1
|
||||
* Ensures at least 2x2 low-pass subband remains after decomposition.
|
||||
*
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @return Recommended number of levels
|
||||
*/
|
||||
int tav_dwt_calculate_levels(int width, int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TAV_ENCODER_DWT_H
|
||||
415
video_encoder/lib/libtavenc/tav_encoder_ezbc.c
Normal file
415
video_encoder/lib/libtavenc/tav_encoder_ezbc.c
Normal file
@@ -0,0 +1,415 @@
|
||||
/**
|
||||
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
|
||||
*
|
||||
* Implements binary tree embedded zero block coding for efficient storage
|
||||
* of sparse wavelet coefficients. Exploits coefficient sparsity through
|
||||
* hierarchical significance testing and progressive bitplane encoding.
|
||||
*
|
||||
* Extracted from encoder_tav.c as part of library refactoring.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
|
||||
// =============================================================================
|
||||
// EZBC Structures
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Bitstream writer for bit-level encoding.
|
||||
*/
|
||||
typedef struct {
|
||||
uint8_t *data;
|
||||
size_t capacity;
|
||||
size_t byte_pos;
|
||||
uint8_t bit_pos; // 0-7, current bit position in current byte
|
||||
} bitstream_t;
|
||||
|
||||
/**
|
||||
* Block structure for EZBC quadtree decomposition.
|
||||
*/
|
||||
typedef struct {
|
||||
int x, y; // Top-left position in 2D coefficient array
|
||||
int width, height; // Block dimensions
|
||||
} ezbc_block_t;
|
||||
|
||||
/**
|
||||
* Queue for EZBC block processing.
|
||||
*/
|
||||
typedef struct {
|
||||
ezbc_block_t *blocks;
|
||||
size_t count;
|
||||
size_t capacity;
|
||||
} block_queue_t;
|
||||
|
||||
/**
|
||||
* Track coefficient state for refinement.
|
||||
*/
|
||||
typedef struct {
|
||||
bool significant; // Has been marked significant
|
||||
int first_bitplane; // Bitplane where it became significant
|
||||
} coeff_state_t;
|
||||
|
||||
/**
|
||||
* EZBC encoding context for recursive processing.
|
||||
*/
|
||||
typedef struct {
|
||||
bitstream_t *bs;
|
||||
int16_t *coeffs;
|
||||
coeff_state_t *states;
|
||||
int width;
|
||||
int height;
|
||||
int bitplane;
|
||||
int threshold;
|
||||
block_queue_t *next_insignificant;
|
||||
block_queue_t *next_significant;
|
||||
int *sign_count;
|
||||
} ezbc_context_t;
|
||||
|
||||
// =============================================================================
|
||||
// Bitstream Operations
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Initialize bitstream with initial capacity.
|
||||
*/
|
||||
static void bitstream_init(bitstream_t *bs, size_t initial_capacity) {
|
||||
// Ensure minimum capacity to avoid issues with zero-size allocations
|
||||
if (initial_capacity < 64) initial_capacity = 64;
|
||||
bs->capacity = initial_capacity;
|
||||
bs->data = calloc(1, initial_capacity);
|
||||
if (!bs->data) {
|
||||
fprintf(stderr, "ERROR: Failed to allocate bitstream buffer of size %zu\n", initial_capacity);
|
||||
exit(1);
|
||||
}
|
||||
bs->byte_pos = 0;
|
||||
bs->bit_pos = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a single bit to bitstream.
|
||||
*/
|
||||
static void bitstream_write_bit(bitstream_t *bs, int bit) {
|
||||
// Grow if needed
|
||||
if (bs->byte_pos >= bs->capacity) {
|
||||
size_t old_capacity = bs->capacity;
|
||||
bs->capacity *= 2;
|
||||
bs->data = realloc(bs->data, bs->capacity);
|
||||
// Clear only the newly allocated memory region
|
||||
memset(bs->data + old_capacity, 0, bs->capacity - old_capacity);
|
||||
}
|
||||
|
||||
if (bit) {
|
||||
bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
|
||||
}
|
||||
|
||||
bs->bit_pos++;
|
||||
if (bs->bit_pos == 8) {
|
||||
bs->bit_pos = 0;
|
||||
bs->byte_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write multiple bits to bitstream (LSB first).
|
||||
*/
|
||||
static void bitstream_write_bits(bitstream_t *bs, uint32_t value, int num_bits) {
|
||||
for (int i = 0; i < num_bits; i++) {
|
||||
bitstream_write_bit(bs, (value >> i) & 1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current bitstream size in bytes.
|
||||
*/
|
||||
static size_t bitstream_size(bitstream_t *bs) {
|
||||
return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Free bitstream buffer.
|
||||
*/
|
||||
static void bitstream_free(bitstream_t *bs) {
|
||||
free(bs->data);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Block Queue Operations
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Initialize block queue with initial capacity.
|
||||
*/
|
||||
static void queue_init(block_queue_t *q) {
|
||||
q->capacity = 1024;
|
||||
q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
|
||||
q->count = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Push block onto queue, growing if needed.
|
||||
*/
|
||||
static void queue_push(block_queue_t *q, ezbc_block_t block) {
|
||||
if (q->count >= q->capacity) {
|
||||
q->capacity *= 2;
|
||||
q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
|
||||
}
|
||||
q->blocks[q->count++] = block;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free block queue.
|
||||
*/
|
||||
static void queue_free(block_queue_t *q) {
|
||||
free(q->blocks);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// EZBC Helper Functions
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Check if all coefficients in block have |coeff| < threshold.
|
||||
*/
|
||||
static bool is_zero_block_ezbc(int16_t *coeffs, int width, int height,
|
||||
const ezbc_block_t *block, int threshold) {
|
||||
for (int y = block->y; y < block->y + block->height && y < height; y++) {
|
||||
for (int x = block->x; x < block->x + block->width && x < width; x++) {
|
||||
int idx = y * width + x;
|
||||
if (abs(coeffs[idx]) >= threshold) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find maximum absolute value in coefficient array.
|
||||
*/
|
||||
static int find_max_abs_ezbc(int16_t *coeffs, size_t count) {
|
||||
int max_abs = 0;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
int abs_val = abs(coeffs[i]);
|
||||
if (abs_val > max_abs) {
|
||||
max_abs = abs_val;
|
||||
}
|
||||
}
|
||||
return max_abs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get MSB position (bitplane number).
|
||||
* Returns floor(log2(value)), i.e., the position of the highest set bit.
|
||||
*/
|
||||
static int get_msb_bitplane(int value) {
|
||||
if (value == 0) return 0;
|
||||
int bitplane = 0;
|
||||
while (value > 1) {
|
||||
value >>= 1;
|
||||
bitplane++;
|
||||
}
|
||||
return bitplane;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively process a significant block - subdivide until 1x1.
|
||||
*/
|
||||
static void process_significant_block_recursive(ezbc_context_t *ctx, ezbc_block_t block) {
|
||||
// If 1x1 block: emit sign bit and add to significant queue
|
||||
if (block.width == 1 && block.height == 1) {
|
||||
int idx = block.y * ctx->width + block.x;
|
||||
bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
|
||||
(*ctx->sign_count)++;
|
||||
ctx->states[idx].significant = true;
|
||||
ctx->states[idx].first_bitplane = ctx->bitplane;
|
||||
queue_push(ctx->next_significant, block);
|
||||
return;
|
||||
}
|
||||
|
||||
// Block is > 1x1: subdivide into children and recursively process each
|
||||
int mid_x = block.width / 2;
|
||||
int mid_y = block.height / 2;
|
||||
if (mid_x == 0) mid_x = 1;
|
||||
if (mid_y == 0) mid_y = 1;
|
||||
|
||||
// Process top-left child
|
||||
ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
|
||||
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tl, ctx->threshold)) {
|
||||
bitstream_write_bit(ctx->bs, 1); // Significant
|
||||
process_significant_block_recursive(ctx, tl);
|
||||
} else {
|
||||
bitstream_write_bit(ctx->bs, 0); // Insignificant
|
||||
queue_push(ctx->next_insignificant, tl);
|
||||
}
|
||||
|
||||
// Process top-right child (if exists)
|
||||
if (block.width > mid_x) {
|
||||
ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
|
||||
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tr, ctx->threshold)) {
|
||||
bitstream_write_bit(ctx->bs, 1);
|
||||
process_significant_block_recursive(ctx, tr);
|
||||
} else {
|
||||
bitstream_write_bit(ctx->bs, 0);
|
||||
queue_push(ctx->next_insignificant, tr);
|
||||
}
|
||||
}
|
||||
|
||||
// Process bottom-left child (if exists)
|
||||
if (block.height > mid_y) {
|
||||
ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
|
||||
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &bl, ctx->threshold)) {
|
||||
bitstream_write_bit(ctx->bs, 1);
|
||||
process_significant_block_recursive(ctx, bl);
|
||||
} else {
|
||||
bitstream_write_bit(ctx->bs, 0);
|
||||
queue_push(ctx->next_insignificant, bl);
|
||||
}
|
||||
}
|
||||
|
||||
// Process bottom-right child (if exists)
|
||||
if (block.width > mid_x && block.height > mid_y) {
|
||||
ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
|
||||
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &br, ctx->threshold)) {
|
||||
bitstream_write_bit(ctx->bs, 1);
|
||||
process_significant_block_recursive(ctx, br);
|
||||
} else {
|
||||
bitstream_write_bit(ctx->bs, 0);
|
||||
queue_push(ctx->next_insignificant, br);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Main EZBC Encoding Function
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* EZBC encoding for a single channel.
|
||||
*
|
||||
* Uses two separate queues for insignificant blocks and significant 1x1 blocks.
|
||||
* Encodes coefficients progressively from MSB to LSB bitplane.
|
||||
*
|
||||
* Algorithm:
|
||||
* 1. Find MSB bitplane from maximum absolute coefficient value
|
||||
* 2. Write header: MSB bitplane, width, height
|
||||
* 3. For each bitplane from MSB to 0:
|
||||
* a. Process insignificant blocks: check if they become significant
|
||||
* b. For newly significant blocks: recursively subdivide until 1x1
|
||||
* c. Emit sign bits for newly significant 1x1 coefficients
|
||||
* d. Process already-significant coefficients: emit refinement bits
|
||||
* 4. Return encoded bitstream
|
||||
*
|
||||
* @param coeffs Input quantized coefficients (int16_t array)
|
||||
* @param count Number of coefficients
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param output Output buffer pointer (allocated by this function)
|
||||
* @return Encoded size in bytes
|
||||
*/
|
||||
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
|
||||
uint8_t **output) {
|
||||
bitstream_t bs;
|
||||
bitstream_init(&bs, count / 4); // Initial guess
|
||||
|
||||
// Track coefficient significance
|
||||
coeff_state_t *states = calloc(count, sizeof(coeff_state_t));
|
||||
|
||||
// Find maximum value to determine MSB bitplane
|
||||
int max_abs = find_max_abs_ezbc(coeffs, count);
|
||||
int msb_bitplane = get_msb_bitplane(max_abs);
|
||||
|
||||
// Write header: MSB bitplane and dimensions
|
||||
bitstream_write_bits(&bs, msb_bitplane, 8);
|
||||
bitstream_write_bits(&bs, width, 16);
|
||||
bitstream_write_bits(&bs, height, 16);
|
||||
|
||||
// Initialise two queues: insignificant blocks and significant 1x1 blocks
|
||||
block_queue_t insignificant_queue, next_insignificant;
|
||||
block_queue_t significant_queue, next_significant;
|
||||
|
||||
queue_init(&insignificant_queue);
|
||||
queue_init(&next_insignificant);
|
||||
queue_init(&significant_queue);
|
||||
queue_init(&next_significant);
|
||||
|
||||
// Start with root block as insignificant
|
||||
ezbc_block_t root = {0, 0, width, height};
|
||||
queue_push(&insignificant_queue, root);
|
||||
|
||||
// Process bitplanes from MSB to LSB
|
||||
for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
|
||||
int threshold = 1 << bitplane;
|
||||
|
||||
int sign_bits_this_bitplane = 0;
|
||||
|
||||
// Process insignificant blocks - check if they become significant
|
||||
for (size_t i = 0; i < insignificant_queue.count; i++) {
|
||||
ezbc_block_t block = insignificant_queue.blocks[i];
|
||||
|
||||
// Check if this block has any coefficient >= threshold
|
||||
if (is_zero_block_ezbc(coeffs, width, height, &block, threshold)) {
|
||||
// Still insignificant: emit 0
|
||||
bitstream_write_bit(&bs, 0);
|
||||
// Keep in insignificant queue for next bitplane
|
||||
queue_push(&next_insignificant, block);
|
||||
} else {
|
||||
// Became significant: emit 1
|
||||
bitstream_write_bit(&bs, 1);
|
||||
|
||||
// Use recursive subdivision to process this block and all children
|
||||
ezbc_context_t ctx = {
|
||||
.bs = &bs,
|
||||
.coeffs = coeffs,
|
||||
.states = states,
|
||||
.width = width,
|
||||
.height = height,
|
||||
.bitplane = bitplane,
|
||||
.threshold = threshold,
|
||||
.next_insignificant = &next_insignificant,
|
||||
.next_significant = &next_significant,
|
||||
.sign_count = &sign_bits_this_bitplane
|
||||
};
|
||||
process_significant_block_recursive(&ctx, block);
|
||||
}
|
||||
}
|
||||
|
||||
// Process significant 1x1 blocks - emit refinement bits
|
||||
for (size_t i = 0; i < significant_queue.count; i++) {
|
||||
ezbc_block_t block = significant_queue.blocks[i];
|
||||
int idx = block.y * width + block.x;
|
||||
int abs_val = abs(coeffs[idx]);
|
||||
|
||||
// Emit refinement bit at current bitplane
|
||||
int bit = (abs_val >> bitplane) & 1;
|
||||
bitstream_write_bit(&bs, bit);
|
||||
|
||||
// Keep in significant queue for next bitplane
|
||||
queue_push(&next_significant, block);
|
||||
}
|
||||
|
||||
// Swap queues for next bitplane
|
||||
queue_free(&insignificant_queue);
|
||||
queue_free(&significant_queue);
|
||||
insignificant_queue = next_insignificant;
|
||||
significant_queue = next_significant;
|
||||
queue_init(&next_insignificant);
|
||||
queue_init(&next_significant);
|
||||
}
|
||||
|
||||
// Free all queues
|
||||
queue_free(&insignificant_queue);
|
||||
queue_free(&significant_queue);
|
||||
queue_free(&next_insignificant);
|
||||
queue_free(&next_significant);
|
||||
free(states);
|
||||
|
||||
size_t final_size = bitstream_size(&bs);
|
||||
*output = bs.data;
|
||||
|
||||
return final_size;
|
||||
}
|
||||
61
video_encoder/lib/libtavenc/tav_encoder_ezbc.h
Normal file
61
video_encoder/lib/libtavenc/tav_encoder_ezbc.h
Normal file
@@ -0,0 +1,61 @@
|
||||
/**
|
||||
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
|
||||
*
|
||||
* Public API for EZBC entropy coding of wavelet coefficients.
|
||||
*/
|
||||
|
||||
#ifndef TAV_ENCODER_EZBC_H
|
||||
#define TAV_ENCODER_EZBC_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// =============================================================================
|
||||
// EZBC Encoding
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* EZBC encoding for a single channel.
|
||||
*
|
||||
* Implements binary tree embedded zero block coding for efficient storage
|
||||
* of sparse wavelet coefficients. Exploits coefficient sparsity through
|
||||
* hierarchical significance testing and progressive bitplane encoding.
|
||||
*
|
||||
* Algorithm:
|
||||
* 1. Find MSB bitplane from maximum absolute coefficient value
|
||||
* 2. Write header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
|
||||
* 3. For each bitplane from MSB to 0:
|
||||
* a. Process insignificant blocks: check if they become significant
|
||||
* - Emit 0 if still insignificant, 1 if became significant
|
||||
* b. For newly significant blocks: recursively subdivide until 1x1
|
||||
* - Emit tree structure: 1=child is significant, 0=child insignificant
|
||||
* c. Emit sign bits for newly significant 1x1 coefficients (1=negative, 0=positive)
|
||||
* d. Process already-significant coefficients: emit refinement bits
|
||||
* - Emit bit at current bitplane for progressive reconstruction
|
||||
* 4. Return encoded bitstream
|
||||
*
|
||||
* Benefits:
|
||||
* - Exploits coefficient sparsity (typical: 86.9% zeros in luma, 97.8% in chroma)
|
||||
* - Progressive refinement from MSB to LSB
|
||||
* - Spatial clustering through quadtree decomposition
|
||||
* - No additional entropy coding needed (bitstream is already compressed)
|
||||
*
|
||||
* @param coeffs Input quantized coefficients (int16_t array)
|
||||
* @param count Number of coefficients (width × height)
|
||||
* @param width Frame width (must match coefficient array layout)
|
||||
* @param height Frame height (must match coefficient array layout)
|
||||
* @param output Output buffer pointer (allocated by this function, caller must free)
|
||||
* @return Encoded size in bytes (including header)
|
||||
*/
|
||||
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
|
||||
uint8_t **output);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TAV_ENCODER_EZBC_H
|
||||
1526
video_encoder/lib/libtavenc/tav_encoder_lib.c
Normal file
1526
video_encoder/lib/libtavenc/tav_encoder_lib.c
Normal file
File diff suppressed because it is too large
Load Diff
624
video_encoder/lib/libtavenc/tav_encoder_quantize.c
Normal file
624
video_encoder/lib/libtavenc/tav_encoder_quantize.c
Normal file
@@ -0,0 +1,624 @@
|
||||
/**
|
||||
* TAV Encoder - Quantization Library
|
||||
*
|
||||
* Provides DWT coefficient quantization with perceptual weighting based on
|
||||
* the Human Visual System (HVS). Implements separable 3D quantization for
|
||||
* temporal GOP encoding.
|
||||
*
|
||||
* Extracted from encoder_tav.c as part of library refactoring.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
// Forward declaration of encoder context (defined in main encoder)
|
||||
typedef struct tav_encoder_s tav_encoder_t;
|
||||
|
||||
// =============================================================================
|
||||
// Utility Functions
|
||||
// =============================================================================
|
||||
|
||||
static inline int CLAMP(int x, int min, int max) {
|
||||
return x < min ? min : (x > max ? max : x);
|
||||
}
|
||||
|
||||
static inline float FCLAMP(float x, float min, float max) {
|
||||
return x < min ? min : (x > max ? max : x);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Constants for Perceptual Model
|
||||
// =============================================================================
|
||||
|
||||
// Dead-zone quantization scaling factors (applied selectively to luma only)
|
||||
#define DEAD_ZONE_FINEST_SCALE 1.0f // Full dead-zone for finest level
|
||||
#define DEAD_ZONE_FINE_SCALE 0.5f // Reduced dead-zone for second-finest level
|
||||
|
||||
// Anisotropy parameters for horizontal vs vertical detail quantization
|
||||
// Index by quality level (0-5)
|
||||
static const float ANISOTROPY_MULT[] = {5.1f, 3.8f, 2.7f, 2.0f, 1.5f, 1.2f, 1.0f};
|
||||
static const float ANISOTROPY_BIAS[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
|
||||
|
||||
// Chroma-specific anisotropy (more aggressive quantization)
|
||||
static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
|
||||
static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
|
||||
|
||||
// Detail preservation factors for 2-pixel and 4-pixel structures
|
||||
#define FOUR_PIXEL_DETAILER 0.88f
|
||||
#define TWO_PIXEL_DETAILER 0.92f
|
||||
|
||||
// =============================================================================
|
||||
// Subband Analysis Helper Functions
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Get decomposition level for coefficient at 2D spatial position.
|
||||
* Returns: level (1=finest to decomp_levels=coarsest, 0 for LL)
|
||||
*/
|
||||
static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels) {
|
||||
// Recursively determine which level this coefficient belongs to
|
||||
// by checking which quadrant it's in at each level
|
||||
|
||||
for (int level = 1; level <= decomp_levels; level++) {
|
||||
int half_w = width >> 1;
|
||||
int half_h = height >> 1;
|
||||
|
||||
// Check if in top-left quadrant (LL - contains finer levels)
|
||||
if (x < half_w && y < half_h) {
|
||||
// Continue to finer level
|
||||
width = half_w;
|
||||
height = half_h;
|
||||
continue;
|
||||
}
|
||||
|
||||
// In one of the detail bands (LH, HL, HH) at this level
|
||||
return level;
|
||||
}
|
||||
|
||||
// Reached LL subband at coarsest level
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get subband type for coefficient at 2D spatial position.
|
||||
* Returns: 0=LL, 1=LH, 2=HL, 3=HH
|
||||
*/
|
||||
static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels) {
|
||||
// Recursively determine which subband this coefficient belongs to
|
||||
|
||||
for (int level = 1; level <= decomp_levels; level++) {
|
||||
int half_w = width >> 1;
|
||||
int half_h = height >> 1;
|
||||
|
||||
// Check if in top-left quadrant (LL - contains finer levels)
|
||||
if (x < half_w && y < half_h) {
|
||||
// Continue to finer level
|
||||
width = half_w;
|
||||
height = half_h;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Determine which detail band at this level
|
||||
if (x >= half_w && y < half_h) {
|
||||
return 1; // LH (top-right)
|
||||
} else if (x < half_w && y >= half_h) {
|
||||
return 2; // HL (bottom-left)
|
||||
} else {
|
||||
return 3; // HH (bottom-right)
|
||||
}
|
||||
}
|
||||
|
||||
// Reached LL subband at coarsest level
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy functions - convert linear index to 2D coords.
|
||||
*/
|
||||
static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) {
|
||||
int x = linear_idx % width;
|
||||
int y = linear_idx / width;
|
||||
return get_subband_level_2d(x, y, width, height, decomp_levels);
|
||||
}
|
||||
|
||||
static int get_subband_type(int linear_idx, int width, int height, int decomp_levels) {
|
||||
int x = linear_idx % width;
|
||||
int y = linear_idx / width;
|
||||
return get_subband_type_2d(x, y, width, height, decomp_levels);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get temporal subband level for frame index in GOP.
|
||||
* After temporal DWT with N levels, frames are organized as:
|
||||
* - Frames 0...num_frames/(2^N) = tL...L (N low-passes, coarsest)
|
||||
* - Remaining frames are temporal high-pass subbands at various levels
|
||||
*
|
||||
* Returns: 0 for coarsest (tLL), temporal_levels for finest (tHH)
|
||||
*/
|
||||
static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
|
||||
// Check each level boundary from coarsest to finest
|
||||
for (int level = 0; level < temporal_levels; level++) {
|
||||
int frames_at_this_level = num_frames >> (temporal_levels - level);
|
||||
if (frame_idx < frames_at_this_level) {
|
||||
return level;
|
||||
}
|
||||
}
|
||||
|
||||
// Finest level (first decomposition's high-pass)
|
||||
return temporal_levels;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Perceptual Model Functions (HVS-based weighting)
|
||||
// =============================================================================
|
||||
|
||||
// Linear interpolation helper
|
||||
static float lerp(float x, float y, float a) {
|
||||
return x * (1.f - a) + y * a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perceptual model for LH subband (horizontal details).
|
||||
* Human eyes are more sensitive to horizontal details than vertical.
|
||||
* Curve: https://www.desmos.com/calculator/mjlpwqm8ge
|
||||
*
|
||||
* @param quality Quality level (0-5)
|
||||
* @param level Normalized decomposition level (1.0-6.0)
|
||||
* @return Perceptual weight multiplier
|
||||
*/
|
||||
static float perceptual_model3_LH(int quality, float level) {
|
||||
float H4 = 1.2f;
|
||||
float K = 2.f; // using fixed value for fixed curve; quantiser will scale it up anyway
|
||||
float K12 = K * 12.f;
|
||||
float x = level;
|
||||
|
||||
float Lx = H4 - ((K + 1.f) / 15.f) * (x - 4.f);
|
||||
float C3 = -1.f / 45.f * (K12 + 92);
|
||||
float G3x = (-x / 180.f) * (K12 + 5*x*x - 60*x + 252) - C3 + H4;
|
||||
|
||||
return (level >= 4) ? Lx : G3x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perceptual model for HL subband (vertical details).
|
||||
* Derived from LH with anisotropy compensation.
|
||||
*
|
||||
* @param quality Quality level (0-5)
|
||||
* @param LH LH subband weight
|
||||
* @return Perceptual weight multiplier
|
||||
*/
|
||||
static float perceptual_model3_HL(int quality, float LH) {
|
||||
return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perceptual model for HH subband (diagonal details).
|
||||
* Interpolates between LH and HL based on level.
|
||||
*
|
||||
* @param LH LH subband weight
|
||||
* @param HL HL subband weight
|
||||
* @param level Normalized decomposition level
|
||||
* @return Perceptual weight multiplier
|
||||
*/
|
||||
static float perceptual_model3_HH(float LH, float HL, float level) {
|
||||
float Kx = fmaf((sqrtf(level) - 1.f), 0.5f, 0.5f);
|
||||
return lerp(LH, HL, Kx);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perceptual model for LL subband (low-frequency baseband).
|
||||
* Contains most image energy, preserve carefully.
|
||||
*
|
||||
* @param quality Quality level (0-5)
|
||||
* @param level Normalized decomposition level
|
||||
* @return Perceptual weight multiplier
|
||||
*/
|
||||
static float perceptual_model3_LL(int quality, float level) {
|
||||
float n = perceptual_model3_LH(quality, level);
|
||||
float m = perceptual_model3_LH(quality, level - 1) / n;
|
||||
|
||||
return n / m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Chroma-specific perceptual model base curve.
|
||||
* Less critical for human perception, more aggressive quantization.
|
||||
*
|
||||
* @param quality Quality level (0-5)
|
||||
* @param level Normalized decomposition level
|
||||
* @return Perceptual weight multiplier
|
||||
*/
|
||||
static float perceptual_model3_chroma_basecurve(int quality, float level) {
|
||||
return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get perceptual weight for a specific subband and level.
|
||||
* Implements HVS-optimized frequency weighting.
|
||||
*
|
||||
* NOTE: This function requires enc->quality_level field from encoder context.
|
||||
*
|
||||
* @param enc Encoder context (for quality_level)
|
||||
* @param level0 Decomposition level (1-based: 1=finest, decomp_levels=coarsest)
|
||||
* @param subband_type Subband type (0=LL, 1=LH, 2=HL, 3=HH)
|
||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||||
* @param max_levels Maximum decomposition levels
|
||||
* @return Perceptual weight multiplier (≥1.0)
|
||||
*/
|
||||
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
|
||||
|
||||
/**
|
||||
* Get perceptual weight for coefficient at linear index position.
|
||||
* Maps linear coefficient index to DWT subband layout.
|
||||
*
|
||||
* NOTE: This function requires enc->widths[]/enc->heights[] arrays from encoder context.
|
||||
*
|
||||
* @param enc Encoder context (for widths/heights arrays and quality_level)
|
||||
* @param linear_idx Linear coefficient index
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param decomp_levels Number of decomposition levels
|
||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||||
* @return Perceptual weight multiplier (≥1.0)
|
||||
*/
|
||||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma);
|
||||
|
||||
// =============================================================================
|
||||
// Quantization Functions
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
|
||||
*
|
||||
* This is the basic quantization function without perceptual weighting.
|
||||
* Dead-zone quantization is applied selectively to luma channel only:
|
||||
* - HH1 (finest diagonal): full dead-zone
|
||||
* - LH1/HL1/HH2: half dead-zone
|
||||
* - Coarser levels: no dead-zone (preserve structure)
|
||||
*
|
||||
* @param coeffs Input DWT coefficients (float)
|
||||
* @param quantised Output quantized coefficients (int16_t)
|
||||
* @param size Number of coefficients
|
||||
* @param quantiser Base quantizer value (1-4096)
|
||||
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param decomp_levels Number of decomposition levels
|
||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||||
*/
|
||||
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
|
||||
float dead_zone_threshold, int width, int height,
|
||||
int decomp_levels, int is_chroma);
|
||||
|
||||
/**
|
||||
* Quantize DWT coefficients with per-coefficient perceptual weighting.
|
||||
*
|
||||
* Applies HVS-optimized frequency weighting to each coefficient based on its
|
||||
* position in the DWT subband tree. Implements the full perceptual model with
|
||||
* dead-zone quantization for luma.
|
||||
*
|
||||
* NOTE: This function requires encoder context fields:
|
||||
* - enc->widths[]/enc->heights[] for subband layout
|
||||
* - enc->quality_level for perceptual model
|
||||
* - enc->dead_zone_threshold for dead-zone quantization
|
||||
*
|
||||
* @param enc Encoder context
|
||||
* @param coeffs Input DWT coefficients (float)
|
||||
* @param quantised Output quantized coefficients (int16_t)
|
||||
* @param size Number of coefficients
|
||||
* @param base_quantiser Base quantizer value (before perceptual weighting)
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param decomp_levels Number of decomposition levels
|
||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||||
* @param frame_count Current frame number (for any frame-dependent logic)
|
||||
*/
|
||||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
||||
float *coeffs, int16_t *quantised, int size,
|
||||
int base_quantiser, int width, int height,
|
||||
int decomp_levels, int is_chroma, int frame_count);
|
||||
|
||||
/**
|
||||
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
|
||||
*
|
||||
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
|
||||
* - Temporal DWT applied first → temporal subbands at different levels
|
||||
* - Spatial 2D DWT applied to each temporal subband
|
||||
*
|
||||
* Quantization strategy:
|
||||
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
|
||||
* - tLL (level 0): coarsest temporal → smallest quantizer
|
||||
* - tHH (highest level): finest temporal → largest quantizer
|
||||
* 2. Apply spatial perceptual weighting to tH_base
|
||||
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
|
||||
*
|
||||
* NOTE: This function requires encoder context fields:
|
||||
* - enc->encoder_preset for sports mode detection
|
||||
* - enc->temporal_decomp_levels for temporal level calculation
|
||||
* - enc->verbose for debug output
|
||||
* - Plus all fields needed by tav_quantise_perceptual()
|
||||
*
|
||||
* @param enc Encoder context
|
||||
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
|
||||
* @param quantised Output quantized coefficients [frame][pixel]
|
||||
* @param num_frames Number of temporal subband frames
|
||||
* @param spatial_size Number of spatial coefficients per frame
|
||||
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
|
||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||||
*/
|
||||
void tav_quantise_3d_dwt(tav_encoder_t *enc,
|
||||
float **gop_coeffs, int16_t **quantised, int num_frames,
|
||||
int spatial_size, int base_quantiser, int is_chroma);
|
||||
|
||||
/**
|
||||
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
|
||||
*
|
||||
* Implements Floyd-Steinberg style error diffusion to avoid quantization
|
||||
* artifacts when converting float quantizer values to integers for rate control.
|
||||
*
|
||||
* NOTE: This function requires encoder context fields:
|
||||
* - enc->adjusted_quantiser_y_float (current float quantizer)
|
||||
* - enc->dither_accumulator (accumulated error, modified by this function)
|
||||
*
|
||||
* @param enc Encoder context
|
||||
* @return Integer quantizer value (0-254)
|
||||
*/
|
||||
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
|
||||
|
||||
// =============================================================================
|
||||
// Perceptual Weight Implementation (requires encoder context)
|
||||
// =============================================================================
|
||||
|
||||
// NOTE: This implementation requires encoder context (enc->quality_level)
|
||||
// Struct definition will be in encoder header when integrated
|
||||
|
||||
#ifndef TAV_ENCODER_QUANTIZE_INTERNAL
|
||||
// Forward declare structure access - will be properly defined when integrated
|
||||
struct tav_encoder_s {
|
||||
int quality_level;
|
||||
int *widths;
|
||||
int *heights;
|
||||
int decomp_levels;
|
||||
float dead_zone_threshold;
|
||||
int encoder_preset;
|
||||
int temporal_decomp_levels;
|
||||
int verbose;
|
||||
int frame_count;
|
||||
float adjusted_quantiser_y_float;
|
||||
float dither_accumulator;
|
||||
int width;
|
||||
int height;
|
||||
};
|
||||
#endif
|
||||
|
||||
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
|
||||
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
|
||||
|
||||
float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
|
||||
|
||||
// strategy: more horizontal detail
|
||||
if (!is_chroma) {
|
||||
// LL subband - contains most image energy, preserve carefully
|
||||
if (subband_type == 0)
|
||||
return perceptual_model3_LL(enc->quality_level, level);
|
||||
|
||||
// LH subband - horizontal details (human eyes more sensitive)
|
||||
float LH = perceptual_model3_LH(enc->quality_level, level);
|
||||
if (subband_type == 1)
|
||||
return LH;
|
||||
|
||||
// HL subband - vertical details
|
||||
float HL = perceptual_model3_HL(enc->quality_level, LH);
|
||||
if (subband_type == 2)
|
||||
return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
|
||||
|
||||
// HH subband - diagonal details
|
||||
else return perceptual_model3_HH(LH, HL, level) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
|
||||
} else {
|
||||
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
|
||||
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
|
||||
|
||||
if (subband_type == 0) { // LL chroma - still important but less than luma
|
||||
return 1.0f;
|
||||
} else if (subband_type == 1) { // LH chroma - horizontal chroma details
|
||||
return FCLAMP(base, 1.0f, 100.0f);
|
||||
} else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
|
||||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level], 1.0f, 100.0f);
|
||||
} else { // HH chroma - diagonal chroma details (most aggressive)
|
||||
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
|
||||
// Map linear coefficient index to DWT subband using same layout as decoder
|
||||
int offset = 0;
|
||||
|
||||
// First: LL subband at maximum decomposition level
|
||||
int ll_width = enc->widths[decomp_levels];
|
||||
int ll_height = enc->heights[decomp_levels];
|
||||
int ll_size = ll_width * ll_height;
|
||||
|
||||
if (linear_idx < offset + ll_size) {
|
||||
// LL subband at maximum level - use get_perceptual_weight for consistency
|
||||
return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += ll_size;
|
||||
|
||||
// Then: LH, HL, HH subbands for each level from max down to 1
|
||||
for (int level = decomp_levels; level >= 1; level--) {
|
||||
int level_width = enc->widths[decomp_levels - level + 1];
|
||||
int level_height = enc->heights[decomp_levels - level + 1];
|
||||
const int subband_size = level_width * level_height;
|
||||
|
||||
// LH subband (horizontal details)
|
||||
if (linear_idx < offset + subband_size) {
|
||||
return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += subband_size;
|
||||
|
||||
// HL subband (vertical details)
|
||||
if (linear_idx < offset + subband_size) {
|
||||
return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += subband_size;
|
||||
|
||||
// HH subband (diagonal details)
|
||||
if (linear_idx < offset + subband_size) {
|
||||
return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
|
||||
}
|
||||
offset += subband_size;
|
||||
}
|
||||
|
||||
// Fallback for out-of-bounds indices
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Quantization Function Implementations
|
||||
// =============================================================================
|
||||
|
||||
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
|
||||
float dead_zone_threshold, int width, int height,
|
||||
int decomp_levels, int is_chroma) {
|
||||
float effective_q = quantiser;
|
||||
effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);
|
||||
|
||||
// Scalar implementation (AVX-512 version would go in separate optimized module)
|
||||
for (int i = 0; i < size; i++) {
|
||||
float quantised_val = coeffs[i] / effective_q;
|
||||
|
||||
// Apply dead-zone quantisation ONLY to luma channel and specific subbands
|
||||
if (dead_zone_threshold > 0.0f && !is_chroma) {
|
||||
int level = get_subband_level(i, width, height, decomp_levels);
|
||||
int subband_type = get_subband_type(i, width, height, decomp_levels);
|
||||
float level_threshold = 0.0f;
|
||||
|
||||
if (level == 1) {
|
||||
// Finest level
|
||||
if (subband_type == 3) {
|
||||
// HH1: full dead-zone
|
||||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
|
||||
} else if (subband_type == 1 || subband_type == 2) {
|
||||
// LH1, HL1: half dead-zone
|
||||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||||
}
|
||||
} else if (level == 2) {
|
||||
// Second-finest level
|
||||
if (subband_type == 3) {
|
||||
// HH2: half dead-zone
|
||||
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||||
}
|
||||
}
|
||||
|
||||
if (fabsf(quantised_val) <= level_threshold) {
|
||||
quantised_val = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||||
}
|
||||
}
|
||||
|
||||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
||||
float *coeffs, int16_t *quantised, int size,
|
||||
int base_quantiser, int width, int height,
|
||||
int decomp_levels, int is_chroma, int frame_count) {
|
||||
float effective_base_q = base_quantiser;
|
||||
effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
// Apply perceptual weight based on coefficient's position in DWT layout
|
||||
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
|
||||
float effective_q = effective_base_q * weight;
|
||||
float quantised_val = coeffs[i] / effective_q;
|
||||
|
||||
// Apply dead-zone quantisation ONLY to luma channel
|
||||
if (enc->dead_zone_threshold > 0.0f && !is_chroma) {
|
||||
int level = get_subband_level(i, width, height, decomp_levels);
|
||||
int subband_type = get_subband_type(i, width, height, decomp_levels);
|
||||
float level_threshold = 0.0f;
|
||||
|
||||
if (level == 1) {
|
||||
if (subband_type == 3) {
|
||||
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
|
||||
} else if (subband_type == 1 || subband_type == 2) {
|
||||
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||||
}
|
||||
} else if (level == 2) {
|
||||
if (subband_type == 3) {
|
||||
level_threshold = enc->dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
|
||||
}
|
||||
}
|
||||
|
||||
if (fabsf(quantised_val) <= level_threshold) {
|
||||
quantised_val = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
|
||||
}
|
||||
}
|
||||
|
||||
void tav_quantise_3d_dwt(tav_encoder_t *enc,
|
||||
float **gop_coeffs, int16_t **quantised, int num_frames,
|
||||
int spatial_size, int base_quantiser, int is_chroma) {
|
||||
// Sports preset: use finer temporal quantisation (less aggressive)
|
||||
const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
|
||||
const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
|
||||
|
||||
// Process each temporal subband independently (separable approach)
|
||||
for (int t = 0; t < num_frames; t++) {
|
||||
// Step 1: Determine temporal subband level
|
||||
int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
|
||||
|
||||
// Step 2: Compute temporal base quantiser using exponential scaling
|
||||
float temporal_scale = powf(2.0f, BETA * powf(temporal_level, KAPPA));
|
||||
float temporal_quantiser = base_quantiser * temporal_scale;
|
||||
|
||||
int temporal_base_quantiser = (int)roundf(temporal_quantiser);
|
||||
temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
|
||||
|
||||
// Step 3: Apply spatial quantisation within this temporal subband
|
||||
tav_quantise_perceptual(
|
||||
enc,
|
||||
gop_coeffs[t], // Input: spatial coefficients for this temporal subband
|
||||
quantised[t], // Output: quantised spatial coefficients
|
||||
spatial_size, // Number of spatial coefficients
|
||||
temporal_base_quantiser, // Temporally-scaled base quantiser
|
||||
enc->width, // Frame width
|
||||
enc->height, // Frame height
|
||||
enc->decomp_levels, // Spatial decomposition levels
|
||||
is_chroma, // Is chroma channel
|
||||
enc->frame_count + t // Frame number
|
||||
);
|
||||
|
||||
/*if (enc->verbose && (t == 0 || t == num_frames - 1)) {
|
||||
printf(" Temporal subband %d: level=%d, tH_base=%d\n",
|
||||
t, temporal_level, temporal_base_quantiser);
|
||||
}*/
|
||||
}
|
||||
}
|
||||
|
||||
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc) {
|
||||
float qy_float = enc->adjusted_quantiser_y_float;
|
||||
|
||||
// Add accumulated dithering error
|
||||
float qy_with_error = qy_float + enc->dither_accumulator;
|
||||
|
||||
// Round to nearest integer
|
||||
int qy_int = (int)(qy_with_error + 0.5f);
|
||||
|
||||
// Calculate quantisation error and accumulate for next frame
|
||||
// This is Floyd-Steinberg style error diffusion
|
||||
float quantisation_error = qy_with_error - (float)qy_int;
|
||||
enc->dither_accumulator = quantisation_error * 0.5f; // Diffuse 50% of error to next frame
|
||||
|
||||
// Clamp to valid range
|
||||
qy_int = CLAMP(qy_int, 0, 254);
|
||||
|
||||
return qy_int;
|
||||
}
|
||||
137
video_encoder/lib/libtavenc/tav_encoder_quantize.h
Normal file
137
video_encoder/lib/libtavenc/tav_encoder_quantize.h
Normal file
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
* TAV Encoder - Quantization Library
|
||||
*
|
||||
* Public API for DWT coefficient quantization with perceptual weighting.
|
||||
*/
|
||||
|
||||
#ifndef TAV_ENCODER_QUANTIZE_H
|
||||
#define TAV_ENCODER_QUANTIZE_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Forward declaration of encoder context (defined in main encoder)
|
||||
typedef struct tav_encoder_s tav_encoder_t;
|
||||
|
||||
// =============================================================================
|
||||
// Uniform Quantization
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
|
||||
*
|
||||
* This is the basic quantization function without perceptual weighting.
|
||||
* Dead-zone quantization is applied selectively to luma channel only:
|
||||
* - HH1 (finest diagonal): full dead-zone
|
||||
* - LH1/HL1/HH2: half dead-zone
|
||||
* - Coarser levels: no dead-zone (preserve structure)
|
||||
*
|
||||
* @param coeffs Input DWT coefficients (float)
|
||||
* @param quantised Output quantized coefficients (int16_t)
|
||||
* @param size Number of coefficients
|
||||
* @param quantiser Base quantizer value (1-4096)
|
||||
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param decomp_levels Number of decomposition levels
|
||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||||
*/
|
||||
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
|
||||
float dead_zone_threshold, int width, int height,
|
||||
int decomp_levels, int is_chroma);
|
||||
|
||||
// =============================================================================
|
||||
// Perceptual Quantization
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Quantize DWT coefficients with per-coefficient perceptual weighting.
|
||||
*
|
||||
* Applies HVS-optimized frequency weighting to each coefficient based on its
|
||||
* position in the DWT subband tree. Implements the full perceptual model with
|
||||
* dead-zone quantization for luma.
|
||||
*
|
||||
* NOTE: This function requires encoder context fields:
|
||||
* - enc->widths[]/enc->heights[] for subband layout
|
||||
* - enc->quality_level for perceptual model
|
||||
* - enc->dead_zone_threshold for dead-zone quantization
|
||||
*
|
||||
* @param enc Encoder context
|
||||
* @param coeffs Input DWT coefficients (float)
|
||||
* @param quantised Output quantized coefficients (int16_t)
|
||||
* @param size Number of coefficients
|
||||
* @param base_quantiser Base quantizer value (before perceptual weighting)
|
||||
* @param width Frame width
|
||||
* @param height Frame height
|
||||
* @param decomp_levels Number of decomposition levels
|
||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||||
* @param frame_count Current frame number (for any frame-dependent logic)
|
||||
*/
|
||||
void tav_quantise_perceptual(tav_encoder_t *enc,
|
||||
float *coeffs, int16_t *quantised, int size,
|
||||
int base_quantiser, int width, int height,
|
||||
int decomp_levels, int is_chroma, int frame_count);
|
||||
|
||||
// =============================================================================
|
||||
// 3D GOP Quantization
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
|
||||
*
|
||||
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
|
||||
* - Temporal DWT applied first → temporal subbands at different levels
|
||||
* - Spatial 2D DWT applied to each temporal subband
|
||||
*
|
||||
* Quantization strategy:
|
||||
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
|
||||
* - tLL (level 0): coarsest temporal → smallest quantizer
|
||||
* - tHH (highest level): finest temporal → largest quantizer
|
||||
* 2. Apply spatial perceptual weighting to tH_base
|
||||
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
|
||||
*
|
||||
* NOTE: This function requires encoder context fields:
|
||||
* - enc->encoder_preset for sports mode detection
|
||||
* - enc->temporal_decomp_levels for temporal level calculation
|
||||
* - enc->verbose for debug output
|
||||
* - Plus all fields needed by tav_quantise_perceptual()
|
||||
*
|
||||
* @param enc Encoder context
|
||||
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
|
||||
* @param quantised Output quantized coefficients [frame][pixel]
|
||||
* @param num_frames Number of temporal subband frames
|
||||
* @param spatial_size Number of spatial coefficients per frame
|
||||
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
|
||||
* @param is_chroma 1 for chroma channels, 0 for luma
|
||||
*/
|
||||
void tav_quantise_3d_dwt(tav_encoder_t *enc,
|
||||
float **gop_coeffs, int16_t **quantised, int num_frames,
|
||||
int spatial_size, int base_quantiser, int is_chroma);
|
||||
|
||||
// =============================================================================
|
||||
// Rate Control
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
|
||||
*
|
||||
* Implements Floyd-Steinberg style error diffusion to avoid quantization
|
||||
* artifacts when converting float quantizer values to integers for rate control.
|
||||
*
|
||||
* NOTE: This function requires encoder context fields:
|
||||
* - enc->adjusted_quantiser_y_float (current float quantizer)
|
||||
* - enc->dither_accumulator (accumulated error, modified by this function)
|
||||
*
|
||||
* @param enc Encoder context
|
||||
* @return Integer quantizer value (0-254)
|
||||
*/
|
||||
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TAV_ENCODER_QUANTIZE_H
|
||||
441
video_encoder/lib/libtavenc/tav_encoder_utils.c
Normal file
441
video_encoder/lib/libtavenc/tav_encoder_utils.c
Normal file
@@ -0,0 +1,441 @@
|
||||
/**
|
||||
* TAV Encoder - Utilities Library
|
||||
*
|
||||
* Common utility functions and helpers used across the encoder.
|
||||
* Includes math utilities, clamping, filename generation, etc.
|
||||
*
|
||||
* Extracted from encoder_tav.c as part of library refactoring.
|
||||
*/
|
||||
|
||||
#define _POSIX_C_SOURCE 200112L
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <math.h>
|
||||
|
||||
// =============================================================================
|
||||
// Math Utilities
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Clamp integer value to range [min, max].
|
||||
*/
|
||||
int tav_clamp_int(int x, int min, int max) {
|
||||
return x < min ? min : (x > max ? max : x);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clamp float value to range [min, max].
|
||||
*/
|
||||
float tav_clamp_float(float x, float min, float max) {
|
||||
return x < min ? min : (x > max ? max : x);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clamp double value to range [min, max].
|
||||
*/
|
||||
double tav_clamp_double(double x, double min, double max) {
|
||||
return x < min ? min : (x > max ? max : x);
|
||||
}
|
||||
|
||||
/**
|
||||
* Round double to nearest integer.
|
||||
*/
|
||||
int tav_iround(double v) {
|
||||
return (int)floor(v + 0.5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Linear interpolation between two values.
|
||||
* @param a Start value (when t=0)
|
||||
* @param b End value (when t=1)
|
||||
* @param t Interpolation factor (0.0 to 1.0)
|
||||
* @return Interpolated value
|
||||
*/
|
||||
float tav_lerp(float a, float b, float t) {
|
||||
return a * (1.0f - t) + b * t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Double precision linear interpolation.
|
||||
*/
|
||||
double tav_lerp_double(double a, double b, double t) {
|
||||
return a * (1.0 - t) + b * t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get minimum of two integers.
|
||||
*/
|
||||
int tav_min_int(int a, int b) {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get maximum of two integers.
|
||||
*/
|
||||
int tav_max_int(int a, int b) {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get minimum of two floats.
|
||||
*/
|
||||
float tav_min_float(float a, float b) {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get maximum of two floats.
|
||||
*/
|
||||
float tav_max_float(float a, float b) {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute absolute value of integer.
|
||||
*/
|
||||
int tav_abs_int(int x) {
|
||||
return x < 0 ? -x : x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute absolute value of float.
|
||||
*/
|
||||
float tav_abs_float(float x) {
|
||||
return x < 0.0f ? -x : x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sign function: returns -1, 0, or 1.
|
||||
*/
|
||||
int tav_sign(int x) {
|
||||
return (x > 0) - (x < 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if integer is power of 2.
|
||||
*/
|
||||
int tav_is_power_of_2(int x) {
|
||||
return x > 0 && (x & (x - 1)) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Round up to next power of 2.
|
||||
*/
|
||||
int tav_next_power_of_2(int x) {
|
||||
if (x <= 0) return 1;
|
||||
x--;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
return x + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute floor of log2(x).
|
||||
* Returns -1 for x <= 0.
|
||||
*/
|
||||
int tav_floor_log2(int x) {
|
||||
if (x <= 0) return -1;
|
||||
int log = 0;
|
||||
while (x > 1) {
|
||||
x >>= 1;
|
||||
log++;
|
||||
}
|
||||
return log;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute ceil of log2(x).
|
||||
* Returns -1 for x <= 0.
|
||||
*/
|
||||
int tav_ceil_log2(int x) {
|
||||
if (x <= 0) return -1;
|
||||
if (x == 1) return 0;
|
||||
int log = tav_floor_log2(x);
|
||||
// Check if x is power of 2
|
||||
if ((1 << log) == x) {
|
||||
return log;
|
||||
}
|
||||
return log + 1;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Random Filename Generation
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Generate a random temporary filename with .mp2 extension.
|
||||
* Format: /tmp/[32 random chars].mp2
|
||||
*
|
||||
* @param filename Output buffer (must be at least 42 bytes)
|
||||
*/
|
||||
void tav_generate_random_filename(char *filename) {
|
||||
static int seeded = 0;
|
||||
if (!seeded) {
|
||||
srand(time(NULL));
|
||||
seeded = 1;
|
||||
}
|
||||
|
||||
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
const int charset_size = sizeof(charset) - 1;
|
||||
|
||||
// Start with the prefix
|
||||
strcpy(filename, "/tmp/");
|
||||
|
||||
// Generate 32 random characters
|
||||
for (int i = 0; i < 32; i++) {
|
||||
filename[5 + i] = charset[rand() % charset_size];
|
||||
}
|
||||
|
||||
// Add the .mp2 extension
|
||||
strcpy(filename + 37, ".mp2");
|
||||
filename[41] = '\0'; // Null terminate
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a random temporary filename with custom extension.
|
||||
* Format: /tmp/[32 random chars].[ext]
|
||||
*
|
||||
* @param filename Output buffer (must be large enough for path + extension)
|
||||
* @param ext File extension (without leading dot, e.g., "tmp", "wav")
|
||||
*/
|
||||
void tav_generate_random_filename_ext(char *filename, const char *ext) {
|
||||
static int seeded = 0;
|
||||
if (!seeded) {
|
||||
srand(time(NULL));
|
||||
seeded = 1;
|
||||
}
|
||||
|
||||
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
const int charset_size = sizeof(charset) - 1;
|
||||
|
||||
// Start with the prefix
|
||||
strcpy(filename, "/tmp/");
|
||||
|
||||
// Generate 32 random characters
|
||||
for (int i = 0; i < 32; i++) {
|
||||
filename[5 + i] = charset[rand() % charset_size];
|
||||
}
|
||||
|
||||
// Add the extension
|
||||
filename[37] = '.';
|
||||
strcpy(filename + 38, ext);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Memory Utilities
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Safe malloc with error checking.
|
||||
* Exits program on allocation failure.
|
||||
*/
|
||||
void *tav_malloc(size_t size) {
|
||||
void *ptr = malloc(size);
|
||||
if (!ptr && size > 0) {
|
||||
fprintf(stderr, "ERROR: Failed to allocate %zu bytes\n", size);
|
||||
exit(1);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Safe calloc with error checking.
|
||||
* Exits program on allocation failure.
|
||||
*/
|
||||
void *tav_calloc(size_t count, size_t size) {
|
||||
void *ptr = calloc(count, size);
|
||||
if (!ptr && count > 0 && size > 0) {
|
||||
fprintf(stderr, "ERROR: Failed to allocate %zu elements of %zu bytes\n", count, size);
|
||||
exit(1);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Safe realloc with error checking.
|
||||
* Exits program on allocation failure.
|
||||
*/
|
||||
void *tav_realloc(void *ptr, size_t size) {
|
||||
void *new_ptr = realloc(ptr, size);
|
||||
if (!new_ptr && size > 0) {
|
||||
fprintf(stderr, "ERROR: Failed to reallocate to %zu bytes\n", size);
|
||||
exit(1);
|
||||
}
|
||||
return new_ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate aligned memory.
|
||||
* Returns NULL on failure.
|
||||
*/
|
||||
void *tav_aligned_alloc(size_t alignment, size_t size) {
|
||||
// Ensure alignment is power of 2
|
||||
if (!tav_is_power_of_2(alignment)) {
|
||||
fprintf(stderr, "ERROR: Alignment must be power of 2, got %zu\n", alignment);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
void *ptr = NULL;
|
||||
if (posix_memalign(&ptr, alignment, size) != 0) {
|
||||
return NULL;
|
||||
}
|
||||
return ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Free aligned memory.
|
||||
*/
|
||||
void tav_aligned_free(void *ptr) {
|
||||
#ifdef _WIN32
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Array Utilities
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Fill integer array with constant value.
|
||||
*/
|
||||
void tav_array_fill_int(int *array, size_t count, int value) {
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
array[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill float array with constant value.
|
||||
*/
|
||||
void tav_array_fill_float(float *array, size_t count, float value) {
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
array[i] = value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy integer array.
|
||||
*/
|
||||
void tav_array_copy_int(int *dst, const int *src, size_t count) {
|
||||
memcpy(dst, src, count * sizeof(int));
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy float array.
|
||||
*/
|
||||
void tav_array_copy_float(float *dst, const float *src, size_t count) {
|
||||
memcpy(dst, src, count * sizeof(float));
|
||||
}
|
||||
|
||||
/**
|
||||
* Find maximum value in integer array.
|
||||
*/
|
||||
int tav_array_max_int(const int *array, size_t count) {
|
||||
if (count == 0) return 0;
|
||||
int max_val = array[0];
|
||||
for (size_t i = 1; i < count; i++) {
|
||||
if (array[i] > max_val) {
|
||||
max_val = array[i];
|
||||
}
|
||||
}
|
||||
return max_val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find minimum value in integer array.
|
||||
*/
|
||||
int tav_array_min_int(const int *array, size_t count) {
|
||||
if (count == 0) return 0;
|
||||
int min_val = array[0];
|
||||
for (size_t i = 1; i < count; i++) {
|
||||
if (array[i] < min_val) {
|
||||
min_val = array[i];
|
||||
}
|
||||
}
|
||||
return min_val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find maximum absolute value in float array.
|
||||
*/
|
||||
float tav_array_max_abs_float(const float *array, size_t count) {
|
||||
if (count == 0) return 0.0f;
|
||||
float max_abs = fabsf(array[0]);
|
||||
for (size_t i = 1; i < count; i++) {
|
||||
float abs_val = fabsf(array[i]);
|
||||
if (abs_val > max_abs) {
|
||||
max_abs = abs_val;
|
||||
}
|
||||
}
|
||||
return max_abs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute sum of integer array.
|
||||
*/
|
||||
long long tav_array_sum_int(const int *array, size_t count) {
|
||||
long long sum = 0;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
sum += array[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute sum of float array.
|
||||
*/
|
||||
double tav_array_sum_float(const float *array, size_t count) {
|
||||
double sum = 0.0;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
sum += array[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute mean of float array.
|
||||
*/
|
||||
float tav_array_mean_float(const float *array, size_t count) {
|
||||
if (count == 0) return 0.0f;
|
||||
return (float)(tav_array_sum_float(array, count) / count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap two integer values.
|
||||
*/
|
||||
void tav_swap_int(int *a, int *b) {
|
||||
int temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap two float values.
|
||||
*/
|
||||
void tav_swap_float(float *a, float *b) {
|
||||
float temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap two pointer values.
|
||||
*/
|
||||
void tav_swap_ptr(void **a, void **b) {
|
||||
void *temp = *a;
|
||||
*a = *b;
|
||||
*b = temp;
|
||||
}
|
||||
165
video_encoder/lib/libtavenc/tav_encoder_utils.h
Normal file
165
video_encoder/lib/libtavenc/tav_encoder_utils.h
Normal file
@@ -0,0 +1,165 @@
|
||||
/**
|
||||
* TAV Encoder - Utilities Library
|
||||
*
|
||||
* Public API for common utility functions and helpers.
|
||||
*/
|
||||
|
||||
#ifndef TAV_ENCODER_UTILS_H
|
||||
#define TAV_ENCODER_UTILS_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// =============================================================================
|
||||
// Math Utilities
|
||||
// =============================================================================
|
||||
|
||||
/** Clamp integer value to range [min, max] */
|
||||
int tav_clamp_int(int x, int min, int max);
|
||||
|
||||
/** Clamp float value to range [min, max] */
|
||||
float tav_clamp_float(float x, float min, float max);
|
||||
|
||||
/** Clamp double value to range [min, max] */
|
||||
double tav_clamp_double(double x, double min, double max);
|
||||
|
||||
/** Round double to nearest integer */
|
||||
int tav_iround(double v);
|
||||
|
||||
/** Linear interpolation between two floats */
|
||||
float tav_lerp(float a, float b, float t);
|
||||
|
||||
/** Linear interpolation between two doubles */
|
||||
double tav_lerp_double(double a, double b, double t);
|
||||
|
||||
/** Get minimum of two integers */
|
||||
int tav_min_int(int a, int b);
|
||||
|
||||
/** Get maximum of two integers */
|
||||
int tav_max_int(int a, int b);
|
||||
|
||||
/** Get minimum of two floats */
|
||||
float tav_min_float(float a, float b);
|
||||
|
||||
/** Get maximum of two floats */
|
||||
float tav_max_float(float a, float b);
|
||||
|
||||
/** Compute absolute value of integer */
|
||||
int tav_abs_int(int x);
|
||||
|
||||
/** Compute absolute value of float */
|
||||
float tav_abs_float(float x);
|
||||
|
||||
/** Sign function: returns -1, 0, or 1 */
|
||||
int tav_sign(int x);
|
||||
|
||||
/** Check if integer is power of 2 */
|
||||
int tav_is_power_of_2(int x);
|
||||
|
||||
/** Round up to next power of 2 */
|
||||
int tav_next_power_of_2(int x);
|
||||
|
||||
/** Compute floor of log2(x) */
|
||||
int tav_floor_log2(int x);
|
||||
|
||||
/** Compute ceil of log2(x) */
|
||||
int tav_ceil_log2(int x);
|
||||
|
||||
// =============================================================================
|
||||
// Random Filename Generation
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Generate a random temporary filename with .mp2 extension.
|
||||
* Format: /tmp/[32 random chars].mp2
|
||||
*
|
||||
* @param filename Output buffer (must be at least 42 bytes)
|
||||
*/
|
||||
void tav_generate_random_filename(char *filename);
|
||||
|
||||
/**
|
||||
* Generate a random temporary filename with custom extension.
|
||||
* Format: /tmp/[32 random chars].[ext]
|
||||
*
|
||||
* @param filename Output buffer (must be large enough)
|
||||
* @param ext File extension (without leading dot)
|
||||
*/
|
||||
void tav_generate_random_filename_ext(char *filename, const char *ext);
|
||||
|
||||
// =============================================================================
|
||||
// Memory Utilities
|
||||
// =============================================================================
|
||||
|
||||
/** Safe malloc with error checking (exits on failure) */
|
||||
void *tav_malloc(size_t size);
|
||||
|
||||
/** Safe calloc with error checking (exits on failure) */
|
||||
void *tav_calloc(size_t count, size_t size);
|
||||
|
||||
/** Safe realloc with error checking (exits on failure) */
|
||||
void *tav_realloc(void *ptr, size_t size);
|
||||
|
||||
/** Allocate aligned memory (returns NULL on failure) */
|
||||
void *tav_aligned_alloc(size_t alignment, size_t size);
|
||||
|
||||
/** Free aligned memory */
|
||||
void tav_aligned_free(void *ptr);
|
||||
|
||||
// =============================================================================
|
||||
// Array Utilities
|
||||
// =============================================================================
|
||||
|
||||
/** Fill integer array with constant value */
|
||||
void tav_array_fill_int(int *array, size_t count, int value);
|
||||
|
||||
/** Fill float array with constant value */
|
||||
void tav_array_fill_float(float *array, size_t count, float value);
|
||||
|
||||
/** Copy integer array */
|
||||
void tav_array_copy_int(int *dst, const int *src, size_t count);
|
||||
|
||||
/** Copy float array */
|
||||
void tav_array_copy_float(float *dst, const float *src, size_t count);
|
||||
|
||||
/** Find maximum value in integer array */
|
||||
int tav_array_max_int(const int *array, size_t count);
|
||||
|
||||
/** Find minimum value in integer array */
|
||||
int tav_array_min_int(const int *array, size_t count);
|
||||
|
||||
/** Find maximum absolute value in float array */
|
||||
float tav_array_max_abs_float(const float *array, size_t count);
|
||||
|
||||
/** Compute sum of integer array */
|
||||
long long tav_array_sum_int(const int *array, size_t count);
|
||||
|
||||
/** Compute sum of float array */
|
||||
double tav_array_sum_float(const float *array, size_t count);
|
||||
|
||||
/** Compute mean of float array */
|
||||
float tav_array_mean_float(const float *array, size_t count);
|
||||
|
||||
/** Swap two integer values */
|
||||
void tav_swap_int(int *a, int *b);
|
||||
|
||||
/** Swap two float values */
|
||||
void tav_swap_float(float *a, float *b);
|
||||
|
||||
/** Swap two pointer values */
|
||||
void tav_swap_ptr(void **a, void **b);
|
||||
|
||||
// =============================================================================
|
||||
// Convenience Macros (for backward compatibility)
|
||||
// =============================================================================
|
||||
|
||||
#define CLAMP(x, min, max) tav_clamp_int(x, min, max)
|
||||
#define FCLAMP(x, min, max) tav_clamp_float(x, min, max)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TAV_ENCODER_UTILS_H
|
||||
1490
video_encoder/src/encoder_tav.c
Normal file
1490
video_encoder/src/encoder_tav.c
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user