using "correct" colourimetry

(cherry picked from commit ded609e65e)
This commit is contained in:
minjaesong
2025-09-15 10:14:44 +09:00
parent 9f901681a6
commit b497570a3b
2 changed files with 495 additions and 1540 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
// Created by Claude on 2025-08-18.
// TEV (TSVM Enhanced Video) Encoder - YCoCg-R 4:2:0 16x16 Block Version
// TEV (TSVM Enhanced Video) Encoder - YCoCg-R/ICtCp 4:2:0 16x16 Block Version
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
@@ -68,7 +68,9 @@ static inline float float16_to_float(uint16_t hbits) {
// TSVM Enhanced Video (TEV) format constants
#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV"
#define TEV_VERSION 2 // Updated for YCoCg-R 4:2:0
// TEV version - dynamic based on color space mode
// Version 2: YCoCg-R 4:2:0 (default)
// Version 3: ICtCp 4:2:0 (--ictcp flag)
// version 1: 8x8 RGB
// version 2: 16x16 Y, 8x8 Co/Cg, asymetric quantisation, optional quantiser multiplier for rate control multiplier (1.0 when unused) {current winner}
// version 3: version 2 + internal 6-bit processing (discarded due to higher noise floor)
@@ -152,7 +154,6 @@ static const uint32_t QUANT_TABLE_C[HALF_BLOCK_SIZE_SQR] =
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99};
// Audio constants (reuse MP2 from existing system)
#define MP2_SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 1728
@@ -192,17 +193,6 @@ typedef struct __attribute__((packed)) {
int16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // quantised Cg DCT coefficients (8x8)
} tev_block_t;
// Lossless TEV block structure (uses float32 internally, converted to float16 during serialization)
typedef struct __attribute__((packed)) {
uint8_t mode; // Block encoding mode
int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision)
float rate_control_factor; // Always 1.0f in lossless mode
uint16_t cbp; // Coded block pattern (which channels have non-zero coeffs)
float y_coeffs[BLOCK_SIZE_SQR]; // lossless Y DCT coefficients (16x16)
float co_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Co DCT coefficients (8x8)
float cg_coeffs[HALF_BLOCK_SIZE_SQR]; // lossless Cg DCT coefficients (8x8)
} tev_lossless_block_t;
// Subtitle entry structure
typedef struct subtitle_entry {
int start_frame;
@@ -232,7 +222,7 @@ typedef struct {
int qualityCg;
int verbose;
int disable_rcf; // 0 = rcf enabled, 1 = disabled
int lossless_mode; // 0 = lossy (default), 1 = lossless mode
int ictcp_mode; // 0 = YCoCg-R (default), 1 = ICtCp color space
// Bitrate control
int target_bitrate_kbps; // Target bitrate in kbps (0 = quality mode)
@@ -289,6 +279,10 @@ typedef struct {
int complexity_capacity; // Capacity of complexity_values array
} tev_encoder_t;
//////////////////////////
// COLOUR MATHS CODES //
//////////////////////////
// RGB to YCoCg-R transform (per YCoCg-R specification with truncated division)
static void rgb_to_ycocgr(uint8_t r, uint8_t g, uint8_t b, int *y, int *co, int *cg) {
*co = (int)r - (int)b;
@@ -315,6 +309,189 @@ static void ycocgr_to_rgb(int y, int co, int cg, uint8_t *r, uint8_t *g, uint8_t
*b = CLAMP(*b, 0, 255);
}
// ---------------------- ICtCp Implementation ----------------------
static inline int iround(double v) { return (int)floor(v + 0.5); }
// ---------------------- sRGB gamma helpers ----------------------
static inline double srgb_linearize(double val) {
// val in [0,1]
if (val <= 0.04045) return val / 12.92;
return pow((val + 0.055) / 1.055, 2.4);
}
static inline double srgb_unlinearize(double val) {
// val in [0,1]
if (val <= 0.0031308) return val * 12.92;
return 1.055 * pow(val, 1.0 / 2.4) - 0.055;
}
// -------------------------- HLG --------------------------
// Forward HLG OETF (linear -> HLG)
static inline double HLG_OETF(double L) {
// L in [0,1], relative scene-linear
const double a = 0.17883277;
const double b = 1.0 - 4.0 * a;
const double c = 0.5 - a * log(4.0 * a);
if (L <= 1.0/12.0)
return sqrt(3.0 * L);
else
return a * log(12.0 * L - b) + c;
}
// Inverse HLG OETF (HLG -> linear)
static inline double HLG_inverse_OETF(double V) {
const double a = 0.17883277;
const double b = 1.0 - 4.0 * a;
const double c = 0.5 - a * log(4.0 * a);
if (V <= 0.5)
return (V * V) / 3.0;
else
return (exp((V - c)/a) + b) / 12.0;
}
// ---------------------- Matrices (doubles) ----------------------
// linear RGB -> XYZ -> Rec.2100 -> LMS
/*static const double M_RGB_TO_LMS[3][3] = {
{1688.0/4096.0,2146.0/4096.0, 262.0/4096.0},
{ 683.0/4096.0,2951.0/4096.0, 462.0/4096.0},
{ 99.0/4096.0, 309.0/4096.0,3688.0/4096.0}
};*/
static const double M_RGB_TO_LMS[3][3] = {
{0.2958564579364564, 0.6230869483219083, 0.08106989398623762},
{0.15627390752659093, 0.727308963512872, 0.11639736914944238},
{0.035141262332177715, 0.15657109121101628, 0.8080956851990795}
};
// Inverse: LMS -> linear sRGB (inverse of above)
/*static const double M_LMS_TO_RGB[3][3] = {
{3.436606694333079, -2.5064521186562705, 0.06984542432319149},
{-0.7913295555989289, 1.983600451792291, -0.192270896193362},
{-0.025949899690592665, -0.09891371471172647, 1.1248636144023192}
};*/
static const double M_LMS_TO_RGB[3][3] = {
{6.1723815689243215, -5.319534979827695, 0.14699442094633924},
{-1.3243428148026244, 2.560286104841917, -0.2359203727576164},
{-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
};
// ICtCp matrix (L' M' S' -> I Ct Cp). Values are the BT.2100 integer-derived /4096 constants.
static const double M_LMSPRIME_TO_ICTCP[3][3] = {
{ 2048.0/4096.0, 2048.0/4096.0, 0.0 },
{ 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 },
{ 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 }
};
// Inverse: I Ct Cp -> L' M' S' (precomputed inverse)
static const double M_ICTCP_TO_LMSPRIME[3][3] = {
{ 1.0, 0.015718580108730416, 0.2095810681164055 },
{ 1.0, -0.015718580108730416, -0.20958106811640548 },
{ 1.0, 1.0212710798422344, -0.6052744909924316 }
};
// ---------------------- Forward: sRGB8 -> ICtCp (doubles) ----------------------
// Inputs: r,g,b in 0..255 sRGB (8-bit)
// Outputs: I, Ct, Cp as doubles (nominally I in ~[0..1], Ct/Cp ranges depend on colors)
void srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
double *out_I, double *out_Ct, double *out_Cp)
{
// 1) linearize sRGB to 0..1
double r = srgb_linearize((double)r8 / 255.0);
double g = srgb_linearize((double)g8 / 255.0);
double b = srgb_linearize((double)b8 / 255.0);
// 2) linear RGB -> LMS (single 3x3 multiply)
double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
// 3) apply HLG encode (map linear LMS -> perceptual domain L',M',S')
double Lp = HLG_OETF(L);
double Mp = HLG_OETF(M);
double Sp = HLG_OETF(S);
// 4) L'M'S' -> ICtCp
double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
*out_I = FCLAMP(I * 255.f, 0.f, 255.f);
*out_Ct = FCLAMP(Ct * 255.f, -256.f, 255.f);
*out_Cp = FCLAMP(Cp * 255.f, -256.f, 255.f);
}
// ---------------------- Reverse: ICtCp -> sRGB8 (doubles) ----------------------
// Inputs: I, Ct, Cp as doubles
// Outputs: r8,g8,b8 in 0..255 (8-bit sRGB, clamped and rounded)
void ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
uint8_t *r8, uint8_t *g8, uint8_t *b8)
{
double I = I8 / 255.f;
double Ct = Ct8 / 255.f;
double Cp = Cp8 / 255.f;
// 1) ICtCp -> L' M' S' (3x3 multiply)
double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
// 2) HLG decode: L' -> linear LMS
double L = HLG_inverse_OETF(Lp);
double M = HLG_inverse_OETF(Mp);
double S = HLG_inverse_OETF(Sp);
// 3) LMS -> linear sRGB (3x3 inverse)
double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
// 4) gamma encode and convert to 0..255 with center-of-bin rounding
double r = srgb_unlinearize(r_lin);
double g = srgb_unlinearize(g_lin);
double b = srgb_unlinearize(b_lin);
*r8 = (uint8_t)CLAMP(iround(r * 255.0), 0, 255);
*g8 = (uint8_t)CLAMP(iround(g * 255.0), 0, 255);
*b8 = (uint8_t)CLAMP(iround(b * 255.0), 0, 255);
}
// ---------------------- Color Space Switching Functions ----------------------
// Wrapper functions that choose between YCoCg-R and ICtCp based on encoder mode
static void rgb_to_color_space(tev_encoder_t *enc, uint8_t r, uint8_t g, uint8_t b,
double *c1, double *c2, double *c3) {
if (enc->ictcp_mode) {
// Use ICtCp color space
srgb8_to_ictcp_hlg(r, g, b, c1, c2, c3);
} else {
// Use YCoCg-R color space (convert to int first, then to double)
int y_val, co_val, cg_val;
rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
*c1 = (double)y_val;
*c2 = (double)co_val;
*c3 = (double)cg_val;
}
}
static void color_space_to_rgb(tev_encoder_t *enc, double c1, double c2, double c3,
uint8_t *r, uint8_t *g, uint8_t *b) {
if (enc->ictcp_mode) {
// Use ICtCp color space
ictcp_hlg_to_srgb8(c1, c2, c3, r, g, b);
} else {
// Use YCoCg-R color space (convert from double to int first)
int y_val = (int)round(c1);
int co_val = (int)round(c2);
int cg_val = (int)round(c3);
ycocgr_to_rgb(y_val, co_val, cg_val, r, g, b);
}
}
////////////////////////////////////////
// DISCRETE COSINE TRANSFORMATIONS //
////////////////////////////////////////
// Pre-calculated cosine tables
static float dct_table_16[16][16]; // For 16x16 DCT
static float dct_table_8[8][8]; // For 8x8 DCT
@@ -429,14 +606,14 @@ static int16_t quantise_coeff(float coeff, float quant, int is_dc, int is_chroma
}
}
// Extract 16x16 block from RGB frame and convert to YCoCg-R
static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
int block_x, int block_y,
float *y_block, float *co_block, float *cg_block) {
// Extract 16x16 block from RGB frame and convert to color space
static void extract_color_space_block(tev_encoder_t *enc, uint8_t *rgb_frame, int width, int height,
int block_x, int block_y,
float *c1_block, float *c2_block, float *c3_block) {
int start_x = block_x * BLOCK_SIZE;
int start_y = block_y * BLOCK_SIZE;
// Extract 16x16 Y block
// Extract 16x16 primary channel block (Y for YCoCg-R, I for ICtCp)
for (int py = 0; py < BLOCK_SIZE; py++) {
for (int px = 0; px < BLOCK_SIZE; px++) {
int x = start_x + px;
@@ -448,10 +625,10 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
uint8_t g = rgb_frame[offset + 1];
uint8_t b = rgb_frame[offset + 2];
int y_val, co_val, cg_val;
rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
double c1, c2, c3;
rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3);
y_block[py * BLOCK_SIZE + px] = (float)y_val - 128.0f; // Center around 0
c1_block[py * BLOCK_SIZE + px] = (float)c1 - 128.0f;
}
}
}
@@ -473,25 +650,30 @@ static void extract_ycocgr_block(uint8_t *rgb_frame, int width, int height,
uint8_t g = rgb_frame[offset + 1];
uint8_t b = rgb_frame[offset + 2];
int y_val, co_val, cg_val;
rgb_to_ycocgr(r, g, b, &y_val, &co_val, &cg_val);
double c1, c2, c3;
rgb_to_color_space(enc, r, g, b, &c1, &c2, &c3);
co_sum += (int)c2;
cg_sum += (int)c3;
co_sum += co_val;
cg_sum += cg_val;
count++;
}
}
}
if (count > 0) {
// Center chroma around 0 for DCT (Co/Cg range is -255 to +255, so don't add offset)
co_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count);
cg_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count);
// Average the accumulated chroma values and store
c2_block[py * HALF_BLOCK_SIZE + px] = (float)(co_sum / count);
c3_block[py * HALF_BLOCK_SIZE + px] = (float)(cg_sum / count);
}
}
}
}
// Calculate spatial activity for any channel (16x16 or 8x8)
static float calculate_spatial_activity(const float *block, int block_size) {
float activity = 0.0f;
@@ -791,8 +973,143 @@ static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y,
}
// Convert RGB block to YCoCg-R with 4:2:0 chroma subsampling
static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
static void convert_rgb_to_color_space_block(tev_encoder_t *enc, const uint8_t *rgb_block,
float *c1_workspace, float *c2_workspace, float *c3_workspace) {
if (enc->ictcp_mode) {
// ICtCp mode: Convert 16x16 RGB to ICtCp (full resolution for I, 4:2:0 subsampling for CtCp)
// Convert I channel at full resolution (16x16)
for (int py = 0; py < BLOCK_SIZE; py++) {
for (int px = 0; px < BLOCK_SIZE; px++) {
int rgb_idx = (py * BLOCK_SIZE + px) * 3;
uint8_t r = rgb_block[rgb_idx];
uint8_t g = rgb_block[rgb_idx + 1];
uint8_t b = rgb_block[rgb_idx + 2];
double I, Ct, Cp;
srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
// Store I at full resolution, scale to appropriate range
c1_workspace[py * BLOCK_SIZE + px] = (float)(I * 255.0);
}
}
// Convert Ct and Cp with 4:2:0 subsampling (8x8)
for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
double sum_ct = 0.0, sum_cp = 0.0;
// Sample 2x2 block from RGB and average for chroma
for (int dy = 0; dy < 2; dy++) {
for (int dx = 0; dx < 2; dx++) {
int py = cy * 2 + dy;
int px = cx * 2 + dx;
int rgb_idx = (py * 16 + px) * 3;
int r = rgb_block[rgb_idx];
int g = rgb_block[rgb_idx + 1];
int b = rgb_block[rgb_idx + 2];
double I, Ct, Cp;
srgb8_to_ictcp_hlg(r, g, b, &I, &Ct, &Cp);
sum_ct += Ct;
sum_cp += Cp;
}
}
// Average and store subsampled chroma, scale to signed 8-bit equivalent range
// Apply centering to ensure chroma is balanced around 0 (like YCoCg-R)
double avg_ct = sum_ct / 4.0;
double avg_cp = sum_cp / 4.0;
// Scale and clamp to [-256, 255] range like YCoCg-R
c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(avg_ct * 255.0, -256, 255);
c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(avg_cp * 255.0, -256, 255);
}
}
} else {
// YCoCg-R mode: Original implementation
// Convert 16x16 RGB to Y (full resolution)
for (int py = 0; py < BLOCK_SIZE; py++) {
for (int px = 0; px < BLOCK_SIZE; px++) {
int rgb_idx = (py * BLOCK_SIZE + px) * 3;
int r = rgb_block[rgb_idx];
int g = rgb_block[rgb_idx + 1];
int b = rgb_block[rgb_idx + 2];
// YCoCg-R transform (per specification with truncated division)
int y = (r + 2*g + b) / 4;
c1_workspace[py * BLOCK_SIZE + px] = (float)CLAMP(y, 0, 255);
}
}
// Convert to Co and Cg with 4:2:0 subsampling (8x8)
for (int cy = 0; cy < HALF_BLOCK_SIZE; cy++) {
for (int cx = 0; cx < HALF_BLOCK_SIZE; cx++) {
int sum_co = 0, sum_cg = 0;
// Sample 2x2 block from RGB and average for chroma
for (int dy = 0; dy < 2; dy++) {
for (int dx = 0; dx < 2; dx++) {
int py = cy * 2 + dy;
int px = cx * 2 + dx;
int rgb_idx = (py * 16 + px) * 3;
int r = rgb_block[rgb_idx];
int g = rgb_block[rgb_idx + 1];
int b = rgb_block[rgb_idx + 2];
int co = r - b;
int tmp = b + (co / 2);
int cg = g - tmp;
sum_co += co;
sum_cg += cg;
}
}
// Average and store subsampled chroma
c2_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_co / 4, -256, 255);
c3_workspace[cy * HALF_BLOCK_SIZE + cx] = (float)CLAMP(sum_cg / 4, -256, 255);
}
}
}
}
// Extract motion-compensated YCoCg-R block from reference frame
static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height,
int block_x, int block_y, int mv_x, int mv_y,
uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
// Extract 16x16 RGB block with motion compensation
uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3];
for (int dy = 0; dy < BLOCK_SIZE; dy++) {
for (int dx = 0; dx < BLOCK_SIZE; dx++) {
int cur_x = block_x + dx;
int cur_y = block_y + dy;
int ref_x = cur_x + mv_x; // Revert to original motion compensation
int ref_y = cur_y + mv_y;
int rgb_idx = (dy * BLOCK_SIZE + dx) * 3;
if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) {
// Copy RGB from reference position
int ref_offset = (ref_y * width + ref_x) * 3;
rgb_block[rgb_idx] = rgb_data[ref_offset]; // R
rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G
rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B
} else {
// Out of bounds - use black
rgb_block[rgb_idx] = 0; // R
rgb_block[rgb_idx + 1] = 0; // G
rgb_block[rgb_idx + 2] = 0; // B
}
}
}
// Convert RGB block to YCoCg-R (original implementation for motion compensation)
// Convert 16x16 RGB to Y (full resolution)
for (int py = 0; py < BLOCK_SIZE; py++) {
for (int px = 0; px < BLOCK_SIZE; px++) {
@@ -840,41 +1157,6 @@ static void convert_rgb_to_ycocgr_block(const uint8_t *rgb_block,
}
}
// Extract motion-compensated YCoCg-R block from reference frame
static void extract_motion_compensated_block(const uint8_t *rgb_data, int width, int height,
int block_x, int block_y, int mv_x, int mv_y,
uint8_t *y_block, int8_t *co_block, int8_t *cg_block) {
// Extract 16x16 RGB block with motion compensation
uint8_t rgb_block[BLOCK_SIZE * BLOCK_SIZE * 3];
for (int dy = 0; dy < BLOCK_SIZE; dy++) {
for (int dx = 0; dx < BLOCK_SIZE; dx++) {
int cur_x = block_x + dx;
int cur_y = block_y + dy;
int ref_x = cur_x + mv_x; // Revert to original motion compensation
int ref_y = cur_y + mv_y;
int rgb_idx = (dy * BLOCK_SIZE + dx) * 3;
if (ref_x >= 0 && ref_y >= 0 && ref_x < width && ref_y < height) {
// Copy RGB from reference position
int ref_offset = (ref_y * width + ref_x) * 3;
rgb_block[rgb_idx] = rgb_data[ref_offset]; // R
rgb_block[rgb_idx + 1] = rgb_data[ref_offset + 1]; // G
rgb_block[rgb_idx + 2] = rgb_data[ref_offset + 2]; // B
} else {
// Out of bounds - use black
rgb_block[rgb_idx] = 0; // R
rgb_block[rgb_idx + 1] = 0; // G
rgb_block[rgb_idx + 2] = 0; // B
}
}
}
// Convert RGB block to YCoCg-R
convert_rgb_to_ycocgr_block(rgb_block, y_block, co_block, cg_block);
}
// Compute motion-compensated residual for INTER mode
static void compute_motion_residual(tev_encoder_t *enc, int block_x, int block_y, int mv_x, int mv_y) {
int start_x = block_x * BLOCK_SIZE;
@@ -909,7 +1191,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
tev_block_t *block = &enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
// Extract YCoCg-R block
extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
extract_color_space_block(enc, enc->current_rgb, enc->width, enc->height,
block_x, block_y,
enc->y_workspace, enc->co_workspace, enc->cg_workspace);
@@ -1069,7 +1351,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
dct_16x16_fast(enc->y_workspace, enc->dct_workspace);
// quantise Y coefficients (luma) using per-block rate control
const uint32_t *y_quant = QUANT_TABLE_Y;
const uint32_t *y_quant = enc->ictcp_mode ? QUANT_TABLE_Y : QUANT_TABLE_Y;
const float qmult_y = jpeg_quality_to_mult(enc->qualityY * block->rate_control_factor);
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
// Apply rate control factor to quantization table (like decoder does)
@@ -1081,7 +1363,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
dct_8x8_fast(enc->co_workspace, enc->dct_workspace);
// quantise Co coefficients (chroma - orange-blue) using per-block rate control
const uint32_t *co_quant = QUANT_TABLE_C;
const uint32_t *co_quant = enc->ictcp_mode ? QUANT_TABLE_C : QUANT_TABLE_C;
const float qmult_co = jpeg_quality_to_mult(enc->qualityCo * block->rate_control_factor);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
// Apply rate control factor to quantization table (like decoder does)
@@ -1093,7 +1375,8 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);
// quantise Cg coefficients (chroma - green-magenta, qmult_cg is more aggressive like NTSC Q) using per-block rate control
const uint32_t *cg_quant = QUANT_TABLE_C;
// In ICtCp mode, Cg becomes Cp (chroma-red) which needs special quantization table
const uint32_t *cg_quant = enc->ictcp_mode ? QUANT_TABLE_C : QUANT_TABLE_C;
const float qmult_cg = jpeg_quality_to_mult(enc->qualityCg * block->rate_control_factor);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
// Apply rate control factor to quantization table (like decoder does)
@@ -1105,107 +1388,6 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
block->cbp = 0x07; // Y, Co, Cg all present
}
// Encode a 16x16 block in lossless mode
static void encode_block_lossless(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
tev_lossless_block_t *block = (tev_lossless_block_t*)&enc->block_data[block_y * ((enc->width + 15) / 16) + block_x];
// Extract YCoCg-R block
extract_ycocgr_block(enc->current_rgb, enc->width, enc->height,
block_x, block_y,
enc->y_workspace, enc->co_workspace, enc->cg_workspace);
if (is_keyframe) {
// Intra coding for keyframes
block->mode = TEV_MODE_INTRA;
block->mv_x = block->mv_y = 0;
enc->blocks_intra++;
} else {
// Same mode decision logic as regular encode_block
// For simplicity, using INTRA for now in lossless mode
block->mode = TEV_MODE_INTRA;
block->mv_x = block->mv_y = 0;
enc->blocks_intra++;
}
// Lossless mode: rate control factor is always 1.0f
block->rate_control_factor = 1.0f;
// Apply DCT transforms using the same pattern as regular encoding
// Y channel (16x16)
dct_16x16_fast(enc->y_workspace, enc->dct_workspace);
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
block->y_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Co channel (8x8)
dct_8x8_fast(enc->co_workspace, enc->dct_workspace);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
block->co_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Cg channel (8x8)
dct_8x8_fast(enc->cg_workspace, enc->dct_workspace);
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
block->cg_coeffs[i] = enc->dct_workspace[i]; // Store directly without quantization
}
// Set CBP (simplified - always encode all channels)
block->cbp = 0x07; // Y, Co, Cg all present
}
// Serialized lossless block structure (for writing to file with float16 coefficients)
typedef struct __attribute__((packed)) {
uint8_t mode;
int16_t mv_x, mv_y;
float rate_control_factor; // Always 1.0f in lossless mode
uint16_t cbp;
uint16_t y_coeffs[BLOCK_SIZE_SQR]; // float16 Y coefficients
uint16_t co_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Co coefficients
uint16_t cg_coeffs[HALF_BLOCK_SIZE_SQR]; // float16 Cg coefficients
} tev_serialized_lossless_block_t;
// Convert lossless blocks to serialized format with float16 coefficients
static void serialize_lossless_blocks(tev_encoder_t *enc, int blocks_x, int blocks_y,
tev_serialized_lossless_block_t *serialized_blocks) {
for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) {
tev_lossless_block_t *src = (tev_lossless_block_t*)&enc->block_data[by * blocks_x + bx];
tev_serialized_lossless_block_t *dst = &serialized_blocks[by * blocks_x + bx];
// Copy basic fields
dst->mode = src->mode;
dst->mv_x = src->mv_x;
dst->mv_y = src->mv_y;
dst->rate_control_factor = src->rate_control_factor;
dst->cbp = src->cbp;
// Convert float32 coefficients to float16 with range clamping
// Float16 max finite value is approximately 65504
const float FLOAT16_MAX = 65504.0f;
for (int i = 0; i < BLOCK_SIZE_SQR; i++) {
float coeff = FCLAMP(src->y_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
dst->y_coeffs[i] = float_to_float16(coeff);
if (enc->verbose && fabsf(src->y_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Y coefficient %d clamped: %f -> %f\n", i, src->y_coeffs[i], coeff);
}
}
for (int i = 0; i < HALF_BLOCK_SIZE_SQR; i++) {
float co_coeff = FCLAMP(src->co_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
float cg_coeff = FCLAMP(src->cg_coeffs[i], -FLOAT16_MAX, FLOAT16_MAX);
dst->co_coeffs[i] = float_to_float16(co_coeff);
dst->cg_coeffs[i] = float_to_float16(cg_coeff);
if (enc->verbose && fabsf(src->co_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Co coefficient %d clamped: %f -> %f\n", i, src->co_coeffs[i], co_coeff);
}
if (enc->verbose && fabsf(src->cg_coeffs[i]) > FLOAT16_MAX) {
printf("WARNING: Cg coefficient %d clamped: %f -> %f\n", i, src->cg_coeffs[i], cg_coeff);
}
}
}
}
}
// Convert SubRip time format (HH:MM:SS,mmm) to frame number
static int srt_time_to_frame(const char *time_str, int fps) {
int hours, minutes, seconds, milliseconds;
@@ -1820,17 +2002,13 @@ static int alloc_encoder_buffers(tev_encoder_t *enc) {
enc->cg_workspace = malloc(8 * 8 * sizeof(float));
enc->dct_workspace = malloc(16 * 16 * sizeof(float));
// Allocate block data
enc->block_data = malloc(total_blocks * sizeof(tev_block_t));
// Allocate compression buffer large enough for both regular and lossless modes
size_t max_block_size = sizeof(tev_block_t) > sizeof(tev_serialized_lossless_block_t) ?
sizeof(tev_block_t) : sizeof(tev_serialized_lossless_block_t);
size_t compressed_buffer_size = total_blocks * max_block_size * 2;
// Allocate compression buffer
size_t compressed_buffer_size = total_blocks * sizeof(tev_block_t) * 2;
enc->compressed_buffer = malloc(compressed_buffer_size);
if (enc->verbose) {
printf("Allocated compressed buffer: %zu bytes for %d blocks (max_block_size: %zu)\n",
compressed_buffer_size, total_blocks, max_block_size);
}
enc->mp2_buffer = malloc(MP2_DEFAULT_PACKET_SIZE);
if (!enc->current_rgb || !enc->previous_rgb || !enc->reference_rgb ||
@@ -1889,7 +2067,7 @@ static void free_encoder(tev_encoder_t *enc) {
static int write_tev_header(FILE *output, tev_encoder_t *enc) {
// Magic + version
fwrite(TEV_MAGIC, 1, 8, output);
uint8_t version = TEV_VERSION;
uint8_t version = enc->ictcp_mode ? 3 : 2; // Version 3 for ICtCp, 2 for YCoCg-R
fwrite(&version, 1, 1, output);
// Video parameters
@@ -1901,7 +2079,7 @@ static int write_tev_header(FILE *output, tev_encoder_t *enc) {
uint8_t qualityCo = enc->qualityCo;
uint8_t qualityCg = enc->qualityCg;
uint8_t flags = (enc->has_audio) | (enc->has_subtitles << 1);
uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0) | (enc->lossless_mode ? 4 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate, bit 2 = is_lossless
uint8_t video_flags = (enc->progressive_mode ? 0 : 1) | (enc->is_ntsc_framerate ? 2 : 0); // bit 0 = is_interlaced, bit 1 = is_ntsc_framerate
uint8_t reserved = 0;
fwrite(&width, 2, 1, output);
@@ -2008,11 +2186,7 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
// Encode all blocks
for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) {
if (enc->lossless_mode) {
encode_block_lossless(enc, bx, by, is_keyframe);
} else {
encode_block(enc, bx, by, is_keyframe);
}
encode_block(enc, bx, by, is_keyframe);
// Calculate complexity for rate control (if enabled)
if (enc->bitrate_mode > 0) {
@@ -2029,34 +2203,14 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num, int fie
// Compress block data using Zstd (compatible with TSVM decoder)
size_t compressed_size;
if (enc->lossless_mode) {
// Lossless mode: serialize blocks with float16 coefficients
size_t serialized_block_data_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t);
tev_serialized_lossless_block_t *serialized_blocks = malloc(serialized_block_data_size);
if (!serialized_blocks) {
fprintf(stderr, "Failed to allocate memory for serialized lossless blocks\n");
return -1;
}
serialize_lossless_blocks(enc, blocks_x, blocks_y, serialized_blocks);
// Use the pre-allocated buffer size instead of calculating dynamically
size_t output_buffer_size = blocks_x * blocks_y * sizeof(tev_serialized_lossless_block_t) * 2;
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, output_buffer_size,
serialized_blocks, serialized_block_data_size,
ZSTD_COMPRESSON_LEVEL);
free(serialized_blocks);
} else {
// Regular mode: use regular block data
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, block_data_size * 2,
enc->block_data, block_data_size,
ZSTD_COMPRESSON_LEVEL);
}
// Regular mode: use regular block data
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
compressed_size = ZSTD_compressCCtx(enc->zstd_context,
enc->compressed_buffer, block_data_size * 2,
enc->block_data, block_data_size,
ZSTD_COMPRESSON_LEVEL);
if (ZSTD_isError(compressed_size)) {
fprintf(stderr, "Zstd compression failed: %s\n", ZSTD_getErrorName(compressed_size));
return 0;
@@ -2288,7 +2442,7 @@ static int start_audio_conversion(tev_encoder_t *enc) {
char command[2048];
snprintf(command, sizeof(command),
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a %dk -ar %d -ac 2 -y \"%s\" 2>/dev/null",
enc->input_file, enc->lossless_mode ? 384 : MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
enc->input_file, MP2_RATE_TABLE[enc->qualityIndex], MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
int result = system(command);
if (result == 0) {
@@ -2429,7 +2583,7 @@ static int process_audio(tev_encoder_t *enc, int frame_num, FILE *output) {
// Show usage information
static void show_usage(const char *program_name) {
printf("TEV YCoCg-R 4:2:0 Video Encoder\n");
printf("TEV YCoCg-R/ICtCp 4:2:0 Video Encoder\n");
printf("Usage: %s [options] -i input.mp4 -o output.mv2\n\n", program_name);
printf("Options:\n");
printf(" -i, --input FILE Input video file\n");
@@ -2443,7 +2597,7 @@ static void show_usage(const char *program_name) {
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
printf(" -v, --verbose Verbose output\n");
printf(" -t, --test Test mode: generate solid colour frames\n");
printf(" --lossless Lossless mode: store coefficients as float16 (no quantisation, implies -p, 384k audio)\n");
printf(" --ictcp Use ICtCp color space instead of YCoCg-R (generates TEV version 3)\n");
printf(" --enable-rcf Enable per-block rate control (experimental)\n");
printf(" --enable-encode-stats Collect and report block complexity statistics\n");
printf(" --help Show this help\n\n");
@@ -2467,7 +2621,7 @@ static void show_usage(const char *program_name) {
printf("\n -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT);
printf("\n\n");
printf("Features:\n");
printf(" - YCoCg-R 4:2:0 chroma subsampling for 50%% compression improvement\n");
printf(" - YCoCg-R or ICtCp 4:2:0 chroma subsampling for 50%% compression improvement\n");
printf(" - 16x16 Y blocks with 8x8 chroma for optimal DCT efficiency\n");
printf(" - Frame rate conversion with FFmpeg temporal filtering\n");
printf(" - Adaptive quality control with complexity-based adjustment\n");
@@ -2536,7 +2690,7 @@ int main(int argc, char *argv[]) {
{"test", no_argument, 0, 't'},
{"enable-encode-stats", no_argument, 0, 1000},
{"enable-rcf", no_argument, 0, 1100},
{"lossless", no_argument, 0, 1200},
{"ictcp", no_argument, 0, 1300},
{"help", no_argument, 0, '?'},
{0, 0, 0, 0}
};
@@ -2611,8 +2765,8 @@ int main(int argc, char *argv[]) {
case 1100: // --enable-rcf
enc->disable_rcf = 0;
break;
case 1200: // --lossless
enc->lossless_mode = 1;
case 1300: // --ictcp
enc->ictcp_mode = 1;
break;
case 0:
if (strcmp(long_options[option_index].name, "help") == 0) {
@@ -2633,24 +2787,19 @@ int main(int argc, char *argv[]) {
}
}
// Lossless mode validation and adjustments
if (enc->lossless_mode) {
// In lossless mode, disable rate control and set quality to maximum
enc->bitrate_mode = 0;
enc->disable_rcf = 1;
enc->progressive_mode = 1;
enc->qualityIndex = 5;
enc->qualityY = enc->qualityCo = enc->qualityCg = 255; // Use 255 as a redundant lossless marker
if (enc->verbose) {
printf("Lossless mode enabled: Rate control disabled, quality set to maximum, enabling progressive scan\n");
}
}
// halve the internal representation of frame height
if (!enc->progressive_mode) {
enc->height /= 2;
}
if (enc->ictcp_mode) {
// ICtCp: Ct and Cp have different characteristics than YCoCg Co/Cg
// Cp channel now uses specialized quantization table, so moderate quality is fine
int base_chroma_quality = enc->qualityCo;
enc->qualityCo = base_chroma_quality; // Ct channel: keep original Co quantization
enc->qualityCg = base_chroma_quality; // Cp channel: same quality since Q_Cp_8 handles detail preservation
}
if (!test_mode && (!enc->input_file || !enc->output_file)) {
fprintf(stderr, "Input and output files are required (unless using --test mode)\n");
show_usage(argv[0]);
@@ -2737,7 +2886,7 @@ int main(int argc, char *argv[]) {
write_tev_header(output, enc);
gettimeofday(&enc->start_time, NULL);
printf("Encoding video with YCoCg-R 4:2:0 format...\n");
printf("Encoding video with %s 4:2:0 format...\n", enc->ictcp_mode ? "ICtCp" : "YCoCg-R");
if (enc->output_fps != enc->fps) {
printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps);
}
@@ -2791,13 +2940,13 @@ int main(int argc, char *argv[]) {
printf("Frame %d: %s (%d,%d,%d)\n", frame_count, colour_name, test_r, test_g, test_b);
// Test YCoCg-R conversion
int y_test, co_test, cg_test;
rgb_to_ycocgr(test_r, test_g, test_b, &y_test, &co_test, &cg_test);
printf(" YCoCg-R: Y=%d Co=%d Cg=%d\n", y_test, co_test, cg_test);
double y_test, co_test, cg_test;
rgb_to_color_space(enc, test_r, test_g, test_b, &y_test, &co_test, &cg_test);
printf(" %s: Y=%.3f Co=%.3f Cg=%.3f\n", enc->ictcp_mode ? "ICtCp" : "YCoCg", y_test, co_test, cg_test);
// Test reverse conversion
uint8_t r_rev, g_rev, b_rev;
ycocgr_to_rgb(y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev);
color_space_to_rgb(enc, y_test, co_test, cg_test, &r_rev, &g_rev, &b_rev);
printf(" Reverse: R=%d G=%d B=%d\n", r_rev, g_rev, b_rev);
} else {