Changed video format; added TEV version 3 (XYB colour space)

2026-03-07 19:51:51 +09:00 · 2025-08-26 22:17:45 +09:00
parent 6d982a9786
commit 33e77e378e
7 changed files with 2485 additions and 141 deletions
--- a/video_encoder/Makefile
+++ b/video_encoder/Makefile
@@ -5,26 +5,37 @@ CC = gcc
 CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE
 LIBS = -lm -lz

-# Source files
-SOURCES = encoder_tev.c
-TARGET = encoder_tev
+# Source files and targets
+SOURCES = encoder_tev.c encoder_tev_xyb.c
+TARGETS = encoder_tev encoder_tev_xyb

-# Build encoder
-$(TARGET): $(SOURCES)
-	rm -f $(TARGET)
+# Build all encoders
+all: $(TARGETS)
+
+# Build main encoder
+encoder_tev: encoder_tev.c
+	rm -f encoder_tev
 	$(CC) $(CFLAGS) -o $@ $< $(LIBS)

+# Build XYB encoder
+encoder_tev_xyb: encoder_tev_xyb.c
+	rm -f encoder_tev_xyb
+	$(CC) $(CFLAGS) -o $@ $< $(LIBS)
+
+# Default target
+$(TARGETS): all
+
 # Build with debug symbols
 debug: CFLAGS += -g -DDEBUG
-debug: $(TARGET)
+debug: $(TARGETS)

 # Clean build artifacts
 clean:
-	rm -f $(TARGET)
+	rm -f $(TARGETS)

 # Install (copy to PATH)
-install: $(TARGET)
-	cp $(TARGET) /usr/local/bin/
+install: $(TARGETS)
+	cp $(TARGETS) /usr/local/bin/

 # Check for required dependencies
 check-deps:
@@ -38,7 +49,9 @@ help:
 	@echo "TSVM Enhanced Video (TEV) Encoder"
 	@echo ""
 	@echo "Targets:"
-	@echo "  encoder_tev  - Build the encoder (default)"
+	@echo "  all          - Build both encoders (default)"
+	@echo "  encoder_tev  - Build the main TEV encoder"
+	@echo "  encoder_tev_xyb - Build the XYB color space encoder"
 	@echo "  debug        - Build with debug symbols"
 	@echo "  clean        - Remove build artifacts"
 	@echo "  install      - Install to /usr/local/bin"
@@ -46,7 +59,8 @@ help:
 	@echo "  help         - Show this help"
 	@echo ""
 	@echo "Usage:"
-	@echo "  make"
+	@echo "  make         # Build both encoders"
 	@echo "  ./encoder_tev input.mp4 -o output.tev"
+	@echo "  ./encoder_tev_xyb input.mp4 -o output.tev"

-.PHONY: clean install check-deps help debug
+.PHONY: all clean install check-deps help debug
--- a/video_encoder/encoder_tev.c
+++ b/video_encoder/encoder_tev.c
@@ -95,6 +95,7 @@ int KEYFRAME_INTERVAL = 60;
 typedef struct __attribute__((packed)) {
    uint8_t mode;           // Block encoding mode
    int16_t mv_x, mv_y;     // Motion vector (1/4 pixel precision)
+    float rate_control_factor; // Rate control factor (4 bytes, little-endian)
    uint16_t cbp;           // Coded block pattern (which channels have non-zero coeffs)
    int16_t y_coeffs[256];  // quantised Y DCT coefficients (16x16)
    int16_t co_coeffs[64];  // quantised Co DCT coefficients (8x8)
@@ -666,6 +667,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
        // Intra coding for keyframes
        block->mode = TEV_MODE_INTRA;
        block->mv_x = block->mv_y = 0;
+        block->rate_control_factor = enc->rate_control_factor;
        enc->blocks_intra++;
    } else {
        // Implement proper mode decision for P-frames
@@ -749,6 +751,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
            block->mode = TEV_MODE_SKIP;
            block->mv_x = 0;
            block->mv_y = 0;
+            block->rate_control_factor = enc->rate_control_factor;
            block->cbp = 0x00;  // No coefficients present
            // Zero out DCT coefficients for consistent format
            memset(block->y_coeffs, 0, sizeof(block->y_coeffs));
@@ -760,6 +763,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
                   (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
            // Good motion prediction - use motion-only mode
            block->mode = TEV_MODE_MOTION;
+            block->rate_control_factor = enc->rate_control_factor;
            block->cbp = 0x00;  // No coefficients present
            // Zero out DCT coefficients for consistent format
            memset(block->y_coeffs, 0, sizeof(block->y_coeffs));
@@ -772,6 +776,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
            // Motion compensation with threshold
            if (motion_sad <= 1024) {
                block->mode = TEV_MODE_MOTION;
+                block->rate_control_factor = enc->rate_control_factor;
                block->cbp = 0x00;  // No coefficients present
                memset(block->y_coeffs, 0, sizeof(block->y_coeffs));
                memset(block->co_coeffs, 0, sizeof(block->co_coeffs));
@@ -783,10 +788,12 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
            // Use INTER mode with motion vector and residuals
            if (abs(block->mv_x) <= 24 && abs(block->mv_y) <= 24) {
                block->mode = TEV_MODE_INTER;
+                block->rate_control_factor = enc->rate_control_factor;
                enc->blocks_inter++;
            } else {
                // Motion vector too large, fall back to INTRA
                block->mode = TEV_MODE_INTRA;
+                block->rate_control_factor = enc->rate_control_factor;
                block->mv_x = 0;
                block->mv_y = 0;
                enc->blocks_intra++;
@@ -795,6 +802,7 @@ static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_ke
        } else {
            // No good motion prediction - use intra mode
            block->mode = TEV_MODE_INTRA;
+            block->rate_control_factor = enc->rate_control_factor;
            block->mv_x = 0;
            block->mv_y = 0;
            enc->blocks_intra++;
@@ -1293,20 +1301,19 @@ static int encode_frame(tev_encoder_t *enc, FILE *output, int frame_num) {
    // Clean up frame stream
    deflateEnd(&frame_stream);

-    // Write frame packet header (always include rate control factor)
+    // Write frame packet header (rate control factor now per-block)
    uint8_t packet_type = is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME;
-    uint32_t payload_size = compressed_size + 4; // +4 bytes for rate control factor (always)
+    uint32_t payload_size = compressed_size; // Rate control factor now per-block, not per-packet

    fwrite(&packet_type, 1, 1, output);
    fwrite(&payload_size, 4, 1, output);
-    fwrite(&enc->rate_control_factor, 4, 1, output); // Always store rate control factor
    fwrite(enc->compressed_buffer, 1, compressed_size, output);

    if (enc->verbose) {
        printf("rateControlFactor=%.6f\n", enc->rate_control_factor);
    }

-    enc->total_output_bytes += 5 + 4 + compressed_size; // packet + size + rate_factor + data
+    enc->total_output_bytes += 5 + compressed_size; // packet + size + data (rate_factor now per-block)

    // Update rate control for next frame
    if (enc->bitrate_mode > 0) {
--- a/video_encoder/encoder_tev_xyb.c
+++ b/video_encoder/encoder_tev_xyb.c
--- a/video_encoder/xyb_conversion.c
+++ b/video_encoder/xyb_conversion.c
@@ -0,0 +1,200 @@
+// XYB Color Space Conversion Functions for TEV
+// Based on JPEG XL XYB specification with proper sRGB linearization
+// test with:
+//// gcc -DXYB_TEST_MAIN -o test_xyb xyb_conversion.c -lm && ./test_xyb
+
+#include <stdio.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x)))
+
+// XYB conversion constants from JPEG XL specification
+static const double XYB_BIAS = 0.00379307325527544933;
+static const double CBRT_BIAS = 0.01558; // cbrt(XYB_BIAS)
+
+// RGB to LMS mixing coefficients
+static const double RGB_TO_LMS[3][3] = {
+    {0.3, 0.622, 0.078},                           // L coefficients
+    {0.23, 0.692, 0.078},                          // M coefficients  
+    {0.24342268924547819, 0.20476744424496821, 0.55180986650955360}  // S coefficients
+};
+
+// LMS to RGB inverse matrix (calculated via matrix inversion)
+static const double LMS_TO_RGB[3][3] = {
+    {11.0315669046, -9.8669439081, -0.1646229965},
+    {-3.2541473811, 4.4187703776, -0.1646229965},
+    {-3.6588512867, 2.7129230459, 1.9459282408}
+};
+
+// sRGB linearization (0..1 range)
+static inline double srgb_linearize(double val) {
+    if (val > 0.04045) {
+        return pow((val + 0.055) / 1.055, 2.4);
+    } else {
+        return val / 12.92;
+    }
+}
+
+// sRGB unlinearization (0..1 range) 
+static inline double srgb_unlinearize(double val) {
+    if (val > 0.0031308) {
+        return 1.055 * pow(val, 1.0 / 2.4) - 0.055;
+    } else {
+        return val * 12.92;
+    }
+}
+
+// Fast cube root approximation for performance
+static inline double fast_cbrt(double x) {
+    if (x < 0) return -cbrt(-x);
+    return cbrt(x);
+}
+
+// RGB to XYB conversion with proper sRGB linearization
+void rgb_to_xyb(uint8_t r, uint8_t g, uint8_t b, double *x, double *y, double *xyb_b) {
+    // Convert RGB to 0-1 range and linearize sRGB
+    double r_norm = srgb_linearize(r / 255.0);
+    double g_norm = srgb_linearize(g / 255.0);
+    double b_norm = srgb_linearize(b / 255.0);
+    
+    // RGB to LMS mixing with bias
+    double lmix = RGB_TO_LMS[0][0] * r_norm + RGB_TO_LMS[0][1] * g_norm + RGB_TO_LMS[0][2] * b_norm + XYB_BIAS;
+    double mmix = RGB_TO_LMS[1][0] * r_norm + RGB_TO_LMS[1][1] * g_norm + RGB_TO_LMS[1][2] * b_norm + XYB_BIAS;
+    double smix = RGB_TO_LMS[2][0] * r_norm + RGB_TO_LMS[2][1] * g_norm + RGB_TO_LMS[2][2] * b_norm + XYB_BIAS;
+    
+    // Apply gamma correction (cube root)
+    double lgamma = fast_cbrt(lmix) - CBRT_BIAS;
+    double mgamma = fast_cbrt(mmix) - CBRT_BIAS;
+    double sgamma = fast_cbrt(smix) - CBRT_BIAS;
+    
+    // LMS to XYB transformation
+    *x = (lgamma - mgamma) / 2.0;
+    *y = (lgamma + mgamma) / 2.0;
+    *xyb_b = sgamma;
+}
+
+// XYB to RGB conversion with proper sRGB unlinearization
+void xyb_to_rgb(double x, double y, double xyb_b, uint8_t *r, uint8_t *g, uint8_t *b) {
+    // XYB to LMS gamma
+    double lgamma = x + y;
+    double mgamma = y - x;
+    double sgamma = xyb_b;
+    
+    // Remove gamma correction
+    double lmix = pow(lgamma + CBRT_BIAS, 3.0) - XYB_BIAS;
+    double mmix = pow(mgamma + CBRT_BIAS, 3.0) - XYB_BIAS;
+    double smix = pow(sgamma + CBRT_BIAS, 3.0) - XYB_BIAS;
+    
+    // LMS to linear RGB using inverse matrix
+    double r_linear = LMS_TO_RGB[0][0] * lmix + LMS_TO_RGB[0][1] * mmix + LMS_TO_RGB[0][2] * smix;
+    double g_linear = LMS_TO_RGB[1][0] * lmix + LMS_TO_RGB[1][1] * mmix + LMS_TO_RGB[1][2] * smix;
+    double b_linear = LMS_TO_RGB[2][0] * lmix + LMS_TO_RGB[2][1] * mmix + LMS_TO_RGB[2][2] * smix;
+    
+    // Clamp linear RGB to valid range
+    r_linear = CLAMP(r_linear, 0.0, 1.0);
+    g_linear = CLAMP(g_linear, 0.0, 1.0);
+    b_linear = CLAMP(b_linear, 0.0, 1.0);
+    
+    // Convert back to sRGB gamma and 0-255 range
+    *r = CLAMP((int)(srgb_unlinearize(r_linear) * 255.0 + 0.5), 0, 255);
+    *g = CLAMP((int)(srgb_unlinearize(g_linear) * 255.0 + 0.5), 0, 255);
+    *b = CLAMP((int)(srgb_unlinearize(b_linear) * 255.0 + 0.5), 0, 255);
+}
+
+// Convert RGB to XYB with integer quantization suitable for TEV format
+void rgb_to_xyb_quantized(uint8_t r, uint8_t g, uint8_t b, int *x_quant, int *y_quant, int *b_quant) {
+    double x, y, xyb_b;
+    rgb_to_xyb(r, g, b, &x, &y, &xyb_b);
+    
+    // Quantize to suitable integer ranges for TEV
+    // Y channel: 0-255 (similar to current Y in YCoCg)  
+    *y_quant = CLAMP((int)(y * 255.0 + 128.0), 0, 255);
+    
+    // X channel: -128 to +127 (similar to Co range)
+    *x_quant = CLAMP((int)(x * 255.0), -128, 127);
+    
+    // B channel: -128 to +127 (similar to Cg, can be aggressively quantized)
+    *b_quant = CLAMP((int)(xyb_b * 255.0), -128, 127);
+}
+
+// Test function to verify conversion accuracy
+int test_xyb_conversion() {
+    printf("Testing XYB conversion accuracy with sRGB linearization...\n");
+    
+    // Test with various RGB values
+    uint8_t test_colors[][3] = {
+        {255, 0, 0},    // Red
+        {0, 255, 0},    // Green  
+        {0, 0, 255},    // Blue
+        {255, 255, 255}, // White
+        {0, 0, 0},      // Black
+        {128, 128, 128}, // Gray
+        {255, 255, 0},  // Yellow
+        {255, 0, 255},  // Magenta
+        {0, 255, 255},  // Cyan
+        // MacBeth chart colours converted to sRGB
+        {0x73,0x52,0x44},
+        {0xc2,0x96,0x82},
+        {0x62,0x7a,0x9d},
+        {0x57,0x6c,0x43},
+        {0x85,0x80,0xb1},
+        {0x67,0xbd,0xaa},
+        {0xd6,0x7e,0x2c},
+        {0x50,0x5b,0xa6},
+        {0xc1,0x5a,0x63},
+        {0x5e,0x3c,0x6c},
+        {0x9d,0xbc,0x40},
+        {0xe0,0xa3,0x2e},
+        {0x38,0x3d,0x96},
+        {0x46,0x94,0x49},
+        {0xaf,0x36,0x3c},
+        {0xe7,0xc7,0x1f},
+        {0xbb,0x56,0x95},
+        {0x08,0x85,0xa1},
+        {0xf3,0xf3,0xf3},
+        {0xc8,0xc8,0xc8},
+        {0xa0,0xa0,0xa0},
+        {0x7a,0x7a,0x7a},
+        {0x55,0x55,0x55},
+        {0x34,0x34,0x34}
+    };
+    
+    int num_tests = sizeof(test_colors) / sizeof(test_colors[0]);
+    int errors = 0;
+    
+    for (int i = 0; i < num_tests; i++) {
+        uint8_t r_orig = test_colors[i][0];
+        uint8_t g_orig = test_colors[i][1]; 
+        uint8_t b_orig = test_colors[i][2];
+        
+        double x, y, xyb_b;
+        uint8_t r_conv, g_conv, b_conv;
+        
+        // Forward and reverse conversion
+        rgb_to_xyb(r_orig, g_orig, b_orig, &x, &y, &xyb_b);
+        xyb_to_rgb(x, y, xyb_b, &r_conv, &g_conv, &b_conv);
+        
+        // Check accuracy (allow small rounding errors)
+        int r_error = abs((int)r_orig - (int)r_conv);
+        int g_error = abs((int)g_orig - (int)g_conv);
+        int b_error = abs((int)b_orig - (int)b_conv);
+        
+        printf("RGB(%3d,%3d,%3d) -> XYB(%6.3f,%6.3f,%6.3f) -> RGB(%3d,%3d,%3d) [Error: %d,%d,%d]\n",
+               r_orig, g_orig, b_orig, x, y, xyb_b, r_conv, g_conv, b_conv, r_error, g_error, b_error);
+        
+        if (r_error > 2 || g_error > 2 || b_error > 2) {
+            errors++;
+        }
+    }
+    
+    printf("Test completed: %d/%d passed\n", num_tests - errors, num_tests);
+    return errors == 0;
+}
+
+#ifdef XYB_TEST_MAIN
+int main() {
+    return test_xyb_conversion() ? 0 : 1;
+}
+#endif