// Created by CuriousTorvald and Claude on 2025-08-17.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include <math.h>
#include <zlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include <getopt.h>
#include <sys/time.h>

// TSVM Enhanced Video (TEV) format constants
#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56"  // "\x1FTSVM TEV"
#define TEV_VERSION 1

// Block encoding modes (8x8 blocks)
#define TEV_MODE_SKIP      0x00  // Skip block (copy from reference)
#define TEV_MODE_INTRA     0x01  // Intra DCT coding (I-frame blocks)
#define TEV_MODE_INTER     0x02  // Inter DCT coding with motion compensation
#define TEV_MODE_MOTION    0x03  // Motion vector only (good prediction)

// Video packet types
#define TEV_PACKET_IFRAME      0x10  // Intra frame (keyframe)
#define TEV_PACKET_PFRAME      0x11  // Predicted frame  
#define TEV_PACKET_AUDIO_MP2   0x20  // MP2 audio
#define TEV_PACKET_SYNC        0xFF  // Sync packet

// Quality settings for quantization
static const uint8_t QUANT_TABLES[8][64] = {
    // Quality 0 (lowest)
    {80, 60, 50, 80, 120, 200, 255, 255,
     55, 60, 70, 95, 130, 255, 255, 255,
     70, 65, 80, 120, 200, 255, 255, 255,
     70, 85, 110, 145, 255, 255, 255, 255,
     90, 110, 185, 255, 255, 255, 255, 255,
     120, 175, 255, 255, 255, 255, 255, 255,
     245, 255, 255, 255, 255, 255, 255, 255,
     255, 255, 255, 255, 255, 255, 255, 255},
    // Quality 1-6 (intermediate)...
    {40, 30, 25, 40, 60, 100, 128, 150,
     28, 30, 35, 48, 65, 128, 150, 180,
     35, 33, 40, 60, 100, 128, 150, 180,
     35, 43, 55, 73, 128, 150, 180, 200,
     45, 55, 93, 128, 150, 180, 200, 220,
     60, 88, 128, 150, 180, 200, 220, 240,
     123, 128, 150, 180, 200, 220, 240, 250,
     128, 150, 180, 200, 220, 240, 250, 255},
    // ... (simplified for example)
    {20, 15, 13, 20, 30, 50, 64, 75,
     14, 15, 18, 24, 33, 64, 75, 90,
     18, 17, 20, 30, 50, 64, 75, 90,
     18, 22, 28, 37, 64, 75, 90, 100,
     23, 28, 47, 64, 75, 90, 100, 110,
     30, 44, 64, 75, 90, 100, 110, 120,
     62, 64, 75, 90, 100, 110, 120, 125,
     64, 75, 90, 100, 110, 120, 125, 128},
    {16, 12, 10, 16, 24, 40, 51, 60,
     11, 12, 14, 19, 26, 51, 60, 72,
     14, 13, 16, 24, 40, 51, 60, 72,
     14, 17, 22, 29, 51, 60, 72, 80,
     18, 22, 37, 51, 60, 72, 80, 88,
     24, 35, 51, 60, 72, 80, 88, 96,
     49, 51, 60, 72, 80, 88, 96, 100,
     51, 60, 72, 80, 88, 96, 100, 102},
    {12, 9, 8, 12, 18, 30, 38, 45,
     8, 9, 11, 14, 20, 38, 45, 54,
     11, 10, 12, 18, 30, 38, 45, 54,
     11, 13, 17, 22, 38, 45, 54, 60,
     14, 17, 28, 38, 45, 54, 60, 66,
     18, 26, 38, 45, 54, 60, 66, 72,
     37, 38, 45, 54, 60, 66, 72, 75,
     38, 45, 54, 60, 66, 72, 75, 77},
    {10, 7, 6, 10, 15, 25, 32, 38,
     7, 7, 9, 12, 16, 32, 38, 45,
     9, 8, 10, 15, 25, 32, 38, 45,
     9, 11, 14, 18, 32, 38, 45, 50,
     12, 14, 23, 32, 38, 45, 50, 55,
     15, 22, 32, 38, 45, 50, 55, 60,
     31, 32, 38, 45, 50, 55, 60, 63,
     32, 38, 45, 50, 55, 60, 63, 65},
    {8, 6, 5, 8, 12, 20, 26, 30,
     6, 6, 7, 10, 13, 26, 30, 36,
     7, 7, 8, 12, 20, 26, 30, 36,
     7, 9, 11, 15, 26, 30, 36, 40,
     10, 11, 19, 26, 30, 36, 40, 44,
     12, 17, 26, 30, 36, 40, 44, 48,
     25, 26, 30, 36, 40, 44, 48, 50,
     26, 30, 36, 40, 44, 48, 50, 52},
    // Quality 7 (highest)
    {2, 1, 1, 2, 3, 5, 6, 7,
     1, 1, 1, 2, 3, 6, 7, 9,
     1, 1, 2, 3, 5, 6, 7, 9,
     1, 2, 3, 4, 6, 7, 9, 10,
     2, 3, 5, 6, 7, 9, 10, 11,
     3, 4, 6, 7, 9, 10, 11, 12,
     6, 6, 7, 9, 10, 11, 12, 13,
     6, 7, 9, 10, 11, 12, 13, 13}
};

// Audio constants (reuse MP2 from existing system)
#define MP2_SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 0x240

// Encoding parameters
#define MAX_MOTION_SEARCH 16
#define KEYFRAME_INTERVAL 30
#define BLOCK_SIZE 8

// Default values
#define DEFAULT_WIDTH 560
#define DEFAULT_HEIGHT 448
#define TEMP_AUDIO_FILE "/tmp/tev_temp_audio.mp2"

typedef struct __attribute__((packed)) {
    uint8_t mode;           // Block encoding mode
    int16_t mv_x, mv_y;     // Motion vector (1/4 pixel precision)
    uint16_t cbp;           // Coded block pattern (which 8x8 have non-zero coeffs)
    int16_t dct_coeffs[3][64]; // Quantized DCT coefficients (R,G,B)
} tev_block_t;

typedef struct {
    char *input_file;
    char *output_file;
    int width;
    int height;
    int fps;
    int total_frames;
    double duration;
    int has_audio;
    int output_to_stdout;
    int quality;  // 0-7, higher = better quality
    
    // Frame buffers (8-bit RGB format for encoding)
    uint8_t *current_rgb, *previous_rgb, *reference_rgb;
    
    // Encoding workspace
    uint8_t *rgb_workspace;     // 8x8 RGB blocks (192 bytes)
    float *dct_workspace;       // DCT coefficients (192 floats)
    tev_block_t *block_data;    // Encoded block data
    uint8_t *compressed_buffer; // Zstd output
    
    // Audio handling
    FILE *mp2_file;
    int mp2_packet_size;
    size_t audio_remaining;
    uint8_t *mp2_buffer;
    
    // Compression context
    z_stream gzip_stream;
    
    // FFmpeg processes
    FILE *ffmpeg_video_pipe;
    
    // Progress tracking
    struct timeval start_time;
    size_t total_output_bytes;
    
    // Statistics
    int blocks_skip, blocks_intra, blocks_inter, blocks_motion;
} tev_encoder_t;

// Quantize DCT coefficient using quality table
static int16_t quantize_coeff(float coeff, uint8_t quant, int is_dc) {
    if (is_dc) {
        // DC coefficient uses fixed quantizer
        return (int16_t)roundf(coeff / 8.0f);
    } else {
        // AC coefficients use quality table
        return (int16_t)roundf(coeff / quant);
    }
}

// These functions are reserved for future rate-distortion optimization
// Currently using simplified encoding logic

// Convert RGB to 4096-color format
static void copy_rgb_frame(uint8_t *rgb_input, uint8_t *rgb_frame, int pixels) {
    // Copy input RGB data to frame buffer (preserving full 8-bit precision)
    memcpy(rgb_frame, rgb_input, pixels * 3);
}

// Simple motion estimation (full search)
static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y, 
                           int16_t *best_mv_x, int16_t *best_mv_y) {
    int best_sad = INT_MAX;
    *best_mv_x = 0;
    *best_mv_y = 0;
    
    int start_x = block_x * BLOCK_SIZE;
    int start_y = block_y * BLOCK_SIZE;
    
    // Search in range [-16, +16] pixels
    for (int mv_y = -MAX_MOTION_SEARCH; mv_y <= MAX_MOTION_SEARCH; mv_y++) {
        for (int mv_x = -MAX_MOTION_SEARCH; mv_x <= MAX_MOTION_SEARCH; mv_x++) {
            int ref_x = start_x + mv_x;
            int ref_y = start_y + mv_y;
            
            // Check bounds
            if (ref_x >= 0 && ref_y >= 0 && 
                ref_x + BLOCK_SIZE <= enc->width && 
                ref_y + BLOCK_SIZE <= enc->height) {
                
                int sad = 0;
                
                // Calculate Sum of Absolute Differences
                for (int dy = 0; dy < BLOCK_SIZE; dy++) {
                    for (int dx = 0; dx < BLOCK_SIZE; dx++) {
                        int cur_offset = (start_y + dy) * enc->width + (start_x + dx);
                        int ref_offset = (ref_y + dy) * enc->width + (ref_x + dx);
                        
                        int cur_r = enc->current_rgb[cur_offset * 3];
                        int cur_g = enc->current_rgb[cur_offset * 3 + 1];
                        int cur_b = enc->current_rgb[cur_offset * 3 + 2];
                        int ref_r = enc->previous_rgb[ref_offset * 3];
                        int ref_g = enc->previous_rgb[ref_offset * 3 + 1];
                        int ref_b = enc->previous_rgb[ref_offset * 3 + 2];
                        
                        // SAD on 8-bit RGB channels
                        sad += abs(cur_r - ref_r) + abs(cur_g - ref_g) + abs(cur_b - ref_b);
                    }
                }
                
                if (sad < best_sad) {
                    best_sad = sad;
                    *best_mv_x = mv_x * 4; // Convert to 1/4 pixel units
                    *best_mv_y = mv_y * 4;
                }
            }
        }
    }
}

// Encode an 8x8 block using the best mode
static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
    int block_idx = block_y * ((enc->width + 7) / 8) + block_x;
    tev_block_t *block = &enc->block_data[block_idx];
    
    int start_x = block_x * BLOCK_SIZE;
    int start_y = block_y * BLOCK_SIZE;
    
    // Extract 8x8 RGB block from current frame
    for (int y = 0; y < BLOCK_SIZE; y++) {
        for (int x = 0; x < BLOCK_SIZE; x++) {
            int pixel_x = block_x * BLOCK_SIZE + x;
            int pixel_y = block_y * BLOCK_SIZE + y;
            int offset = (y * BLOCK_SIZE + x) * 3;
            
            if (pixel_x < enc->width && pixel_y < enc->height) {
                int frame_offset = pixel_y * enc->width + pixel_x;
                // Copy RGB data directly (preserving full 8-bit precision)
                enc->rgb_workspace[offset] = enc->current_rgb[frame_offset * 3];      // R
                enc->rgb_workspace[offset + 1] = enc->current_rgb[frame_offset * 3 + 1]; // G  
                enc->rgb_workspace[offset + 2] = enc->current_rgb[frame_offset * 3 + 2]; // B
            } else {
                // Pad with black
                enc->rgb_workspace[offset] = 0;
                enc->rgb_workspace[offset + 1] = 0;
                enc->rgb_workspace[offset + 2] = 0;
            }
        }
    }
    
    // Initialize block
    memset(block, 0, sizeof(tev_block_t));
    
    if (is_keyframe) {
        // Keyframes use INTRA mode
        block->mode = TEV_MODE_INTRA;
        enc->blocks_intra++;
    } else {
        // Try different modes and pick the best
        
        // Try SKIP mode - compare with previous frame
        int skip_sad = 0;
        for (int dy = 0; dy < BLOCK_SIZE; dy++) {
            for (int dx = 0; dx < BLOCK_SIZE; dx++) {
                int pixel_x = start_x + dx;
                int pixel_y = start_y + dy;
                if (pixel_x < enc->width && pixel_y < enc->height) {
                    int offset = pixel_y * enc->width + pixel_x;
                    int cur_r = enc->current_rgb[offset * 3];
                    int cur_g = enc->current_rgb[offset * 3 + 1];
                    int cur_b = enc->current_rgb[offset * 3 + 2];
                    int prev_r = enc->previous_rgb[offset * 3];
                    int prev_g = enc->previous_rgb[offset * 3 + 1];
                    int prev_b = enc->previous_rgb[offset * 3 + 2];
                    
                    skip_sad += abs(cur_r - prev_r) + abs(cur_g - prev_g) + abs(cur_b - prev_b);
                }
            }
        }
        
        if (skip_sad < 8) { // Much stricter threshold for SKIP
            block->mode = TEV_MODE_SKIP;
            enc->blocks_skip++;
            return;
        }
        
        // Try MOTION mode
        estimate_motion(enc, block_x, block_y, &block->mv_x, &block->mv_y);
        
        // Calculate motion compensation SAD
        int motion_sad = 0;
        for (int y = 0; y < BLOCK_SIZE; y++) {
            for (int x = 0; x < BLOCK_SIZE; x++) {
                int cur_x = block_x * BLOCK_SIZE + x;
                int cur_y = block_y * BLOCK_SIZE + y;
                int ref_x = cur_x + block->mv_x;
                int ref_y = cur_y + block->mv_y;
                
                if (cur_x < enc->width && cur_y < enc->height &&
                    ref_x >= 0 && ref_x < enc->width && ref_y >= 0 && ref_y < enc->height) {
                    
                    int cur_offset = cur_y * enc->width + cur_x;
                    int ref_offset = ref_y * enc->width + ref_x;
                    
                    uint8_t cur_r = enc->current_rgb[cur_offset * 3];
                    uint8_t cur_g = enc->current_rgb[cur_offset * 3 + 1];
                    uint8_t cur_b = enc->current_rgb[cur_offset * 3 + 2];
                    uint8_t ref_r = enc->previous_rgb[ref_offset * 3];
                    uint8_t ref_g = enc->previous_rgb[ref_offset * 3 + 1];
                    uint8_t ref_b = enc->previous_rgb[ref_offset * 3 + 2];
                    
                    motion_sad += abs(cur_r - ref_r) + abs(cur_g - ref_g) + abs(cur_b - ref_b);
                } else {
                    motion_sad += 48; // Penalty for out-of-bounds reference
                }
            }
        }
        
        // Decide on encoding mode based on analysis
        if (motion_sad < 32 && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
            // Good motion prediction
            block->mode = TEV_MODE_MOTION;
            enc->blocks_motion++;
            return; // Motion blocks don't need DCT coefficients
        } else if (motion_sad < 64) {
            // Use INTER mode (motion compensation + DCT residual)
            block->mode = TEV_MODE_INTER;
            enc->blocks_inter++;
        } else {
            // Fall back to INTRA mode
            block->mode = TEV_MODE_INTRA;
            enc->blocks_intra++;
        }
    }
    
    // Full 8x8 DCT implementation for all blocks (keyframe and P-frame)
    const uint8_t *quant_table = QUANT_TABLES[enc->quality];
    
    // DCT-II basis functions (precomputed for 8x8)
    static double dct_basis[8][8];
    static int basis_initialized = 0;
    
    if (!basis_initialized) {
        for (int u = 0; u < 8; u++) {
            for (int x = 0; x < 8; x++) {
                double cu = (u == 0) ? sqrt(1.0/8.0) : sqrt(2.0/8.0);
                dct_basis[u][x] = cu * cos((2.0 * x + 1.0) * u * M_PI / 16.0);
            }
        }
        basis_initialized = 1;
    }
    
    // Convert RGB block to DCT input format (subtract 128 to center around 0)
    double rgb_block[3][8][8];
    for (int y = 0; y < 8; y++) {
        for (int x = 0; x < 8; x++) {
            int offset = (y * 8 + x) * 3;
            rgb_block[0][y][x] = enc->rgb_workspace[offset] - 128.0;     // R: 0-255 -> -128 to +127
            rgb_block[1][y][x] = enc->rgb_workspace[offset + 1] - 128.0; // G: 0-255 -> -128 to +127  
            rgb_block[2][y][x] = enc->rgb_workspace[offset + 2] - 128.0; // B: 0-255 -> -128 to +127
        }
    }
    
    // Apply 2D DCT to each channel
    double dct_coeffs[3][8][8];
    for (int channel = 0; channel < 3; channel++) {
        for (int u = 0; u < 8; u++) {
            for (int v = 0; v < 8; v++) {
                double sum = 0.0;
                for (int x = 0; x < 8; x++) {
                    for (int y = 0; y < 8; y++) {
                        sum += dct_basis[u][x] * dct_basis[v][y] * rgb_block[channel][y][x];
                    }
                }
                dct_coeffs[channel][u][v] = sum;
            }
        }
    }
    
    // Quantize and store DCT coefficients
    for (int channel = 0; channel < 3; channel++) {
        for (int u = 0; u < 8; u++) {
            for (int v = 0; v < 8; v++) {
                int coeff_index = u * 8 + v;
                int is_dc = (coeff_index == 0);
                
                block->dct_coeffs[channel][coeff_index] = 
                    quantize_coeff(dct_coeffs[channel][u][v], quant_table[coeff_index], is_dc);
                
                // Debug DC coefficient for first block
                if (block_x == 0 && block_y == 0 && channel < 3 && coeff_index == 0) {
                    fprintf(stderr, "Ch%d: DCT raw=%.2f, stored=%d, ", 
                           channel, dct_coeffs[channel][u][v], (int)block->dct_coeffs[channel][coeff_index]);
                    // Show raw bytes in memory
                    uint8_t *bytes = (uint8_t*)&block->dct_coeffs[channel][coeff_index];
                    fprintf(stderr, "bytes=[%d,%d]\n", bytes[0], bytes[1]);
                }
            }
        }
    }
}

// Execute command and capture output
static char *execute_command(const char *command) {
    FILE *pipe = popen(command, "r");
    if (!pipe) return NULL;
    
    char *result = malloc(4096);
    size_t len = fread(result, 1, 4095, pipe);
    result[len] = '\0';
    
    pclose(pipe);
    return result;
}

// Get video metadata using ffprobe
static int get_video_metadata(tev_encoder_t *enc) {
    char command[1024];
    char *output;
    
    // Get frame count
    snprintf(command, sizeof(command), 
        "ffprobe -v quiet -select_streams v:0 -count_frames -show_entries stream=nb_read_frames -of csv=p=0 \"%s\"", 
        enc->input_file);
    output = execute_command(command);
    if (!output) {
        fprintf(stderr, "Failed to get frame count\n");
        return 0;
    }
    enc->total_frames = atoi(output);
    free(output);
    
    // Get frame rate
    snprintf(command, sizeof(command),
        "ffprobe -v quiet -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 \"%s\"",
        enc->input_file);
    output = execute_command(command);
    if (!output) {
        fprintf(stderr, "Failed to get frame rate\n");
        return 0;
    }
    
    int num, den;
    if (sscanf(output, "%d/%d", &num, &den) == 2) {
        enc->fps = (den > 0) ? (num / den) : 30;
    } else {
        enc->fps = (int)round(atof(output));
    }
    free(output);
    
    // Get duration
    snprintf(command, sizeof(command),
        "ffprobe -v quiet -show_entries format=duration -of csv=p=0 \"%s\"",
        enc->input_file);
    output = execute_command(command);
    if (output) {
        enc->duration = atof(output);
        free(output);
    }
    
    // Check if has audio
    snprintf(command, sizeof(command),
        "ffprobe -v quiet -select_streams a:0 -show_entries stream=index -of csv=p=0 \"%s\"",
        enc->input_file);
    output = execute_command(command);
    enc->has_audio = (output && strlen(output) > 0 && atoi(output) >= 0);
    if (output) free(output);
    
    if (enc->total_frames <= 0 && enc->duration > 0) {
        enc->total_frames = (int)(enc->duration * enc->fps);
    }
    
    fprintf(stderr, "Video metadata:\n");
    fprintf(stderr, "  Frames: %d\n", enc->total_frames);
    fprintf(stderr, "  FPS: %d\n", enc->fps);
    fprintf(stderr, "  Duration: %.2fs\n", enc->duration);
    fprintf(stderr, "  Audio: %s\n", enc->has_audio ? "Yes" : "No");
    fprintf(stderr, "  Resolution: %dx%d\n", enc->width, enc->height);
    
    return (enc->total_frames > 0 && enc->fps > 0);
}

// Start FFmpeg process for video conversion
static int start_video_conversion(tev_encoder_t *enc) {
    char command[2048];
    snprintf(command, sizeof(command),
        "ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d -y - 2>/dev/null",
        enc->input_file, enc->width, enc->height, enc->width, enc->height);
    
    enc->ffmpeg_video_pipe = popen(command, "r");
    return (enc->ffmpeg_video_pipe != NULL);
}

// Start audio conversion
static int start_audio_conversion(tev_encoder_t *enc) {
    if (!enc->has_audio) return 1;
    
    char command[2048];
    snprintf(command, sizeof(command),
        "ffmpeg -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
        enc->input_file, MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
    
    int result = system(command);
    if (result == 0) {
        enc->mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
        if (enc->mp2_file) {
            fseek(enc->mp2_file, 0, SEEK_END);
            enc->audio_remaining = ftell(enc->mp2_file);
            fseek(enc->mp2_file, 0, SEEK_SET);
            return 1;
        }
    }
    
    fprintf(stderr, "Warning: Failed to convert audio\n");
    enc->has_audio = 0;
    return 1;
}

// Write TEV header
static void write_tev_header(tev_encoder_t *enc, FILE *output) {
    fwrite(TEV_MAGIC, 1, 8, output);
    
    uint8_t version = TEV_VERSION;
    fwrite(&version, 1, 1, output);
    
    uint8_t flags = enc->has_audio ? 0x01 : 0x00;
    fwrite(&flags, 1, 1, output);
    
    fwrite(&enc->width, 2, 1, output);
    fwrite(&enc->height, 2, 1, output);
    fwrite(&enc->fps, 2, 1, output);
    fwrite(&enc->total_frames, 4, 1, output);
    
    uint8_t quality = enc->quality;
    fwrite(&quality, 1, 1, output);
    
    uint8_t reserved[5] = {0};
    fwrite(reserved, 1, 5, output);
}

// Process and encode one frame
static int process_frame(tev_encoder_t *enc, int frame_num, FILE *output) {
    // Read RGB data
    size_t rgb_size = enc->width * enc->height * 3;
    uint8_t *rgb_buffer = malloc(rgb_size);
    if (fread(rgb_buffer, 1, rgb_size, enc->ffmpeg_video_pipe) != rgb_size) {
        free(rgb_buffer);
        return 0; // End of video
    }
    
    // Convert to 4096-color format
    copy_rgb_frame(rgb_buffer, enc->current_rgb, enc->width * enc->height);
    free(rgb_buffer);
    
    int is_keyframe = (frame_num == 1) || (frame_num % KEYFRAME_INTERVAL == 0);
    
    // Reset statistics
    enc->blocks_skip = enc->blocks_intra = enc->blocks_inter = enc->blocks_motion = 0;
    
    // Encode all 8x8 blocks
    int blocks_x = (enc->width + 7) / 8;
    int blocks_y = (enc->height + 7) / 8;
    
    for (int by = 0; by < blocks_y; by++) {
        for (int bx = 0; bx < blocks_x; bx++) {
            encode_block(enc, bx, by, is_keyframe);
        }
    }
    
    // Debug struct layout
    fprintf(stderr, "Block size: %zu, DCT offset: %zu\n", 
           sizeof(tev_block_t), offsetof(tev_block_t, dct_coeffs));
    
    // No endian conversion needed - system is already little-endian
    
    // Compress block data using gzip
    size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
    
    // Reset compression stream
    enc->gzip_stream.next_in = (Bytef*)enc->block_data;
    enc->gzip_stream.avail_in = block_data_size;
    enc->gzip_stream.next_out = (Bytef*)enc->compressed_buffer;
    enc->gzip_stream.avail_out = block_data_size * 2;
    
    if (deflateReset(&enc->gzip_stream) != Z_OK) {
        fprintf(stderr, "Gzip deflateReset failed\n");
        return -1;
    }
    
    int result = deflate(&enc->gzip_stream, Z_FINISH);
    if (result != Z_STREAM_END) {
        fprintf(stderr, "Gzip compression failed: %d\n", result);
        return -1;
    }
    
    size_t compressed_size = enc->gzip_stream.total_out;
    
    // Write video packet
    uint8_t packet_type[2] = {is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME, 0x00};
    fwrite(packet_type, 1, 2, output);
    
    uint32_t size = (uint32_t)compressed_size;
    fwrite(&size, 4, 1, output);
    fwrite(enc->compressed_buffer, 1, compressed_size, output);
    
    // Write sync packet
    uint8_t sync[2] = {0xFF, 0xFF};
    fwrite(sync, 1, 2, output);
    
    enc->total_output_bytes += 2 + 4 + compressed_size + 2;
    
    // Swap frame buffers for next frame
    uint8_t *temp_rgb = enc->previous_rgb;
    enc->previous_rgb = enc->current_rgb;
    enc->current_rgb = temp_rgb;
    
    fprintf(stderr, "\rFrame %d/%d [%c] - Skip:%d Intra:%d Inter:%d - Ratio:%.1f%%", 
            frame_num, enc->total_frames, is_keyframe ? 'I' : 'P',
            enc->blocks_skip, enc->blocks_intra, enc->blocks_inter,
            (compressed_size * 100.0) / block_data_size);
    fflush(stderr);
    
    return 1;
}

// Initialize encoder
static tev_encoder_t *init_encoder() {
    tev_encoder_t *enc = calloc(1, sizeof(tev_encoder_t));
    if (!enc) return NULL;
    
    enc->width = DEFAULT_WIDTH;
    enc->height = DEFAULT_HEIGHT;
    enc->quality = 5; // Default quality
    enc->output_to_stdout = 1;
    
    return enc;
}

// Allocate buffers
static int allocate_buffers(tev_encoder_t *enc) {
    int pixels = enc->width * enc->height;
    int blocks = ((enc->width + 7) / 8) * ((enc->height + 7) / 8);
    
    enc->current_rgb = malloc(pixels * 3);   // RGB: 3 bytes per pixel
    enc->previous_rgb = malloc(pixels * 3);
    enc->reference_rgb = malloc(pixels * 3);
    
    enc->rgb_workspace = malloc(BLOCK_SIZE * BLOCK_SIZE * 3);
    enc->dct_workspace = malloc(BLOCK_SIZE * BLOCK_SIZE * 3 * sizeof(float));
    enc->block_data = malloc(blocks * sizeof(tev_block_t));
    enc->compressed_buffer = malloc(blocks * sizeof(tev_block_t) * 2);
    enc->mp2_buffer = malloc(2048);
    
    // Initialize gzip compression stream
    enc->gzip_stream.zalloc = Z_NULL;
    enc->gzip_stream.zfree = Z_NULL;
    enc->gzip_stream.opaque = Z_NULL;
    
    int gzip_init_result = deflateInit2(&enc->gzip_stream, Z_DEFAULT_COMPRESSION, 
                                       Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY); // 15+16 for gzip format
    
    return (enc->current_rgb && enc->previous_rgb && enc->reference_rgb && 
            enc->rgb_workspace && enc->dct_workspace && enc->block_data && enc->compressed_buffer && 
            enc->mp2_buffer && gzip_init_result == Z_OK);
}

// Cleanup
static void cleanup_encoder(tev_encoder_t *enc) {
    if (!enc) return;
    
    if (enc->ffmpeg_video_pipe) pclose(enc->ffmpeg_video_pipe);
    if (enc->mp2_file) fclose(enc->mp2_file);
    deflateEnd(&enc->gzip_stream);
    
    free(enc->input_file);
    free(enc->output_file);
    free(enc->current_rgb);
    free(enc->previous_rgb);
    free(enc->reference_rgb);
    free(enc->rgb_workspace);
    free(enc->dct_workspace);
    free(enc->block_data);
    free(enc->compressed_buffer);
    free(enc->mp2_buffer);
    
    unlink(TEMP_AUDIO_FILE);
    free(enc);
}

// Print usage
static void print_usage(const char *program_name) {
    printf("TSVM Enhanced Video (TEV) Encoder\n\n");
    printf("Usage: %s [options] input_video\n\n", program_name);
    printf("Options:\n");
    printf("  -o, --output FILE    Output TEV file (default: stdout)\n");
    printf("  -s, --size WxH       Video resolution (default: 560x448)\n");
    printf("  -q, --quality N      Quality level 0-7 (default: 5)\n");
    printf("  -h, --help           Show this help\n\n");
    printf("TEV Features:\n");
    printf("  - 8x8 DCT-based compression with motion compensation\n");
    printf("  - Native 4096-color support (4:4:4 RGB)\n");
    printf("  - Zstd compression for optimal efficiency\n");
    printf("  - Hardware-accelerated encoding functions\n\n");
    printf("Examples:\n");
    printf("  %s input.mp4 -o output.tev\n", program_name);
    printf("  %s input.avi -s 1024x768 -q 7 -o output.tev\n", program_name);
}

int main(int argc, char *argv[]) {
    tev_encoder_t *enc = init_encoder();
    if (!enc) {
        fprintf(stderr, "Failed to initialize encoder\n");
        return 1;
    }
    
    // Parse arguments
    static struct option long_options[] = {
        {"output", required_argument, 0, 'o'},
        {"size", required_argument, 0, 's'},
        {"quality", required_argument, 0, 'q'},
        {"help", no_argument, 0, 'h'},
        {0, 0, 0, 0}
    };
    
    int c;
    while ((c = getopt_long(argc, argv, "o:s:q:h", long_options, NULL)) != -1) {
        switch (c) {
            case 'o':
                enc->output_file = strdup(optarg);
                enc->output_to_stdout = 0;
                break;
            case 's':
                if (sscanf(optarg, "%dx%d", &enc->width, &enc->height) != 2) {
                    fprintf(stderr, "Invalid resolution: %s\n", optarg);
                    cleanup_encoder(enc);
                    return 1;
                }
                break;
            case 'q':
                enc->quality = atoi(optarg);
                if (enc->quality < 0 || enc->quality > 7) {
                    fprintf(stderr, "Quality must be 0-7\n");
                    cleanup_encoder(enc);
                    return 1;
                }
                break;
            case 'h':
                print_usage(argv[0]);
                cleanup_encoder(enc);
                return 0;
            default:
                print_usage(argv[0]);
                cleanup_encoder(enc);
                return 1;
        }
    }
    
    if (optind >= argc) {
        fprintf(stderr, "Input file required\n");
        print_usage(argv[0]);
        cleanup_encoder(enc);
        return 1;
    }
    
    enc->input_file = strdup(argv[optind]);
    
    // Initialize
    if (!get_video_metadata(enc) || !allocate_buffers(enc) || 
        !start_video_conversion(enc) || !start_audio_conversion(enc)) {
        cleanup_encoder(enc);
        return 1;
    }
    
    FILE *output = enc->output_to_stdout ? stdout : fopen(enc->output_file, "wb");
    if (!output) {
        fprintf(stderr, "Failed to open output\n");
        cleanup_encoder(enc);
        return 1;
    }
    
    write_tev_header(enc, output);
    gettimeofday(&enc->start_time, NULL);
    enc->total_output_bytes = 8 + 1 + 1 + 2 + 2 + 2 + 4 + 1 + 5; // TEV header size
    
    // Process all frames
    for (int frame = 1; frame <= enc->total_frames; frame++) {
        int result = process_frame(enc, frame, output);
        if (result <= 0) break;
    }
    
    fprintf(stderr, "\nEncoding complete\n");
    
    if (!enc->output_to_stdout) {
        fclose(output);
        fprintf(stderr, "Output: %s (%.1f MB)\n", enc->output_file, 
                enc->total_output_bytes / (1024.0 * 1024.0));
    }
    
    cleanup_encoder(enc);
    return 0;
}