wip2

2026-06-09 06:34:04 +09:00 · 2025-09-13 13:32:14 +09:00
parent 62d6ee94cf
commit dca09cf4a3
1 changed files with 470 additions and 4 deletions
--- a/video_encoder/encoder_tav.c
+++ b/video_encoder/encoder_tav.c
@@ -231,7 +231,7 @@ static void cleanup_encoder(tav_encoder_t *enc);
 static int initialize_encoder(tav_encoder_t *enc);
 static int encode_frame(tav_encoder_t *enc, int frame_num, int is_keyframe);
 static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
-static void dwt_2d_forward(float *input, dwt_tile_t *tile, int filter_type);
+static void dwt_2d_forward(float *tile_data, int levels, int filter_type);
 static void dwt_2d_inverse(dwt_tile_t *tile, float *output, int filter_type);
 static void quantize_subbands(dwt_tile_t *tile, int q_y, int q_co, int q_cg, float rcf);
 static int estimate_motion_64x64(const float *current, const float *reference, 
@@ -356,6 +356,321 @@ static int initialize_encoder(tav_encoder_t *enc) {
    return 0;
 }
 // =============================================================================
 // DWT Implementation - 5/3 Reversible and 9/7 Irreversible Filters
 // =============================================================================
 // 1D DWT using lifting scheme for 5/3 reversible filter
 static void dwt_53_forward_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = malloc(length * sizeof(float));
    int half = length / 2;
    // Predict step (high-pass)
    for (int i = 0; i < half; i++) {
        int idx = 2 * i + 1;
        if (idx < length) {
            float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
            temp[half + i] = data[idx] - pred;
        }
    }
    // Update step (low-pass)
    for (int i = 0; i < half; i++) {
        float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) + 
                               (i < half - 1 ? temp[half + i] : 0));
        temp[i] = data[2 * i] + update;
    }
    // Copy back
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 static void dwt_53_inverse_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = malloc(length * sizeof(float));
    int half = length / 2;
    // Inverse update step
    for (int i = 0; i < half; i++) {
        float update = 0.25f * ((i > 0 ? data[half + i - 1] : 0) + 
                               (i < half - 1 ? data[half + i] : 0));
        temp[2 * i] = data[i] - update;
    }
    // Inverse predict step  
    for (int i = 0; i < half; i++) {
        int idx = 2 * i + 1;
        if (idx < length) {
            float pred = 0.5f * (temp[2 * i] + (2 * i + 2 < length ? temp[2 * i + 2] : temp[2 * i]));
            temp[idx] = data[half + i] + pred;
        }
    }
    // Copy back
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 // 1D DWT using lifting scheme for 9/7 irreversible filter
 static void dwt_97_forward_1d(float *data, int length) {
    if (length < 2) return;
    float *temp = malloc(length * sizeof(float));
    int half = length / 2;
    // Split into even/odd samples
    for (int i = 0; i < half; i++) {
        temp[i] = data[2 * i];           // Even (low)
        if (2 * i + 1 < length) {
            temp[half + i] = data[2 * i + 1]; // Odd (high)
        }
    }
    // Apply 9/7 lifting steps
    const float alpha = -1.586134342f;
    const float beta = -0.052980118f;
    const float gamma = 0.882911076f;
    const float delta = 0.443506852f;
    const float K = 1.230174105f;
    // First lifting step
    for (int i = 0; i < half; i++) {
        float left = (i > 0) ? temp[i - 1] : temp[i];
        float right = (i < half - 1) ? temp[i + 1] : temp[i];
        temp[half + i] += alpha * (left + right);
    }
    // Second lifting step
    for (int i = 0; i < half; i++) {
        float left = (i > 0) ? temp[half + i - 1] : temp[half + i];
        float right = (i < half - 1) ? temp[half + i + 1] : temp[half + i];
        temp[i] += beta * (left + right);
    }
    // Third lifting step
    for (int i = 0; i < half; i++) {
        float left = (i > 0) ? temp[i - 1] : temp[i];
        float right = (i < half - 1) ? temp[i + 1] : temp[i];
        temp[half + i] += gamma * (left + right);
    }
    // Fourth lifting step
    for (int i = 0; i < half; i++) {
        float left = (i > 0) ? temp[half + i - 1] : temp[half + i];
        float right = (i < half - 1) ? temp[half + i + 1] : temp[half + i];
        temp[i] += delta * (left + right);
    }
    // Scaling
    for (int i = 0; i < half; i++) {
        temp[i] *= K;
        temp[half + i] /= K;
    }
    memcpy(data, temp, length * sizeof(float));
    free(temp);
 }
 // 2D DWT forward transform for 64x64 tile
 static void dwt_2d_forward(float *tile_data, int levels, int filter_type) {
    const int size = 64;
    float *temp_row = malloc(size * sizeof(float));
    float *temp_col = malloc(size * sizeof(float));
    for (int level = 0; level < levels; level++) {
        int current_size = size >> level;
        if (current_size < 2) break;
        // Row transform
        for (int y = 0; y < current_size; y++) {
            for (int x = 0; x < current_size; x++) {
                temp_row[x] = tile_data[y * size + x];
            }
            if (filter_type == WAVELET_5_3_REVERSIBLE) {
                dwt_53_forward_1d(temp_row, current_size);
            } else {
                dwt_97_forward_1d(temp_row, current_size);
            }
            for (int x = 0; x < current_size; x++) {
                tile_data[y * size + x] = temp_row[x];
            }
        }
        // Column transform
        for (int x = 0; x < current_size; x++) {
            for (int y = 0; y < current_size; y++) {
                temp_col[y] = tile_data[y * size + x];
            }
            if (filter_type == WAVELET_5_3_REVERSIBLE) {
                dwt_53_forward_1d(temp_col, current_size);
            } else {
                dwt_97_forward_1d(temp_col, current_size);
            }
            for (int y = 0; y < current_size; y++) {
                tile_data[y * size + x] = temp_col[y];
            }
        }
    }
    free(temp_row);
    free(temp_col);
 }
 // Quantization for DWT subbands with rate control
 static void quantize_dwt_tile(dwt_tile_t *tile, int q_y, int q_co, int q_cg, float rcf) {
    // Apply rate control factor to quantizers
    int effective_q_y = (int)(q_y * rcf);
    int effective_q_co = (int)(q_co * rcf);  
    int effective_q_cg = (int)(q_cg * rcf);
    // Clamp quantizers to valid range
    effective_q_y = CLAMP(effective_q_y, 1, 255);
    effective_q_co = CLAMP(effective_q_co, 1, 255);
    effective_q_cg = CLAMP(effective_q_cg, 1, 255);
    // TODO: Apply quantization to each subband based on frequency and channel
    // Different quantization strategies for LL, LH, HL, HH subbands
    // More aggressive quantization for higher frequency subbands
 }
 // Motion estimation for 64x64 tiles using SAD
 static int estimate_motion_64x64(const float *current, const float *reference, 
                                 int width, int height, int tile_x, int tile_y, 
                                 motion_vector_t *mv) {
    const int tile_size = 64;
    const int search_range = 16;  // ±16 pixels
    const int start_x = tile_x * tile_size;
    const int start_y = tile_y * tile_size;
    int best_mv_x = 0, best_mv_y = 0;
    int min_sad = INT_MAX;
    // Search within ±16 pixel range
    for (int dy = -search_range; dy <= search_range; dy++) {
        for (int dx = -search_range; dx <= search_range; dx++) {
            int ref_x = start_x + dx;
            int ref_y = start_y + dy;
            // Check bounds
            if (ref_x < 0 || ref_y < 0 || 
                ref_x + tile_size > width || ref_y + tile_size > height) {
                continue;
            }
            // Calculate SAD
            int sad = 0;
            for (int y = 0; y < tile_size; y++) {
                for (int x = 0; x < tile_size; x++) {
                    int curr_idx = (start_y + y) * width + (start_x + x);
                    int ref_idx = (ref_y + y) * width + (ref_x + x);
                    if (curr_idx >= 0 && curr_idx < width * height &&
                        ref_idx >= 0 && ref_idx < width * height) {
                        int diff = (int)(current[curr_idx] - reference[ref_idx]);
                        sad += abs(diff);
                    }
                }
            }
            if (sad < min_sad) {
                min_sad = sad;
                best_mv_x = dx * 4;  // Convert to 1/4 pixel precision
                best_mv_y = dy * 4;
            }
        }
    }
    mv->mv_x = best_mv_x;
    mv->mv_y = best_mv_y;
    mv->rate_control_factor = 1.0f;  // TODO: Calculate based on complexity
    return min_sad;
 }
 // RGB to YCoCg color space conversion
 static void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height) {
    for (int i = 0; i < width * height; i++) {
        float r = rgb[i * 3 + 0];
        float g = rgb[i * 3 + 1]; 
        float b = rgb[i * 3 + 2];
        // YCoCg-R transform
        co[i] = r - b;
        float tmp = b + co[i] / 2;
        cg[i] = g - tmp;
        y[i] = tmp + cg[i] / 2;
    }
 }
 // Write TAV file header
 static int write_tav_header(tav_encoder_t *enc) {
    if (!enc->output_fp) return -1;
    // Magic number
    fwrite(TAV_MAGIC, 1, 8, enc->output_fp);
    // Version
    fputc(TAV_VERSION, enc->output_fp);
    // Video parameters
    fwrite(&enc->width, sizeof(uint16_t), 1, enc->output_fp);
    fwrite(&enc->height, sizeof(uint16_t), 1, enc->output_fp);
    fputc(enc->fps, enc->output_fp);
    fwrite(&enc->total_frames, sizeof(uint32_t), 1, enc->output_fp);
    // Encoder parameters
    fputc(enc->wavelet_filter, enc->output_fp);
    fputc(enc->decomp_levels, enc->output_fp);
    fputc(enc->quantizer_y, enc->output_fp);
    fputc(enc->quantizer_co, enc->output_fp);
    fputc(enc->quantizer_cg, enc->output_fp);
    // Feature flags
    uint8_t extra_flags = 0;
    if (1) extra_flags |= 0x01;  // Has audio (placeholder)
    if (enc->subtitle_file) extra_flags |= 0x02;  // Has subtitles
    if (enc->enable_progressive_transmission) extra_flags |= 0x04;
    if (enc->enable_roi) extra_flags |= 0x08;
    fputc(extra_flags, enc->output_fp);
    uint8_t video_flags = 0;
    if (!enc->progressive) video_flags |= 0x01;  // Interlaced
    if (enc->fps == 29 || enc->fps == 30) video_flags |= 0x02;  // NTSC
    if (enc->lossless) video_flags |= 0x04;  // Lossless
    if (enc->decomp_levels > 1) video_flags |= 0x08;  // Multi-resolution
    fputc(video_flags, enc->output_fp);
    // Reserved bytes (7 bytes)
    for (int i = 0; i < 7; i++) {
        fputc(0, enc->output_fp);
    }
    return 0;
 }
 // Encode a single frame
 static int encode_frame(tav_encoder_t *enc, int frame_num, int is_keyframe) {
    // TODO: Read frame data from FFmpeg pipe
    // TODO: Convert RGB to YCoCg
    // TODO: Process tiles with DWT
    // TODO: Apply motion estimation for P-frames
    // TODO: Quantize and compress tile data
    // TODO: Write packet to output file
    printf("Encoding frame %d/%d (%s)\n", frame_num + 1, enc->total_frames, 
           is_keyframe ? "I-frame" : "P-frame");
    return 0;
 }
 // Main function
 int main(int argc, char *argv[]) {
    generate_random_filename(TEMP_AUDIO_FILE);
@@ -439,7 +754,7 @@ int main(int argc, char *argv[]) {
        }
    }
-    if (!enc->input_file || !enc->output_file) {
+    if ((!enc->input_file && !enc->test_mode) || !enc->output_file) {
        fprintf(stderr, "Error: Input and output files must be specified\n");
        show_usage(argv[0]);
        cleanup_encoder(enc);
@@ -460,8 +775,159 @@ int main(int argc, char *argv[]) {
    printf("Decomposition levels: %d\n", enc->decomp_levels);
    printf("Quality: Y=%d, Co=%d, Cg=%d\n", enc->quantizer_y, enc->quantizer_co, enc->quantizer_cg);
-    // TODO: Implement actual encoding pipeline
+    // Open output file
-    printf("Note: TAV encoder implementation in progress...\n");
+    if (strcmp(enc->output_file, "-") == 0) {
        enc->output_fp = stdout;
    } else {
        enc->output_fp = fopen(enc->output_file, "wb");
        if (!enc->output_fp) {
            fprintf(stderr, "Error: Cannot open output file %s\n", enc->output_file);
            cleanup_encoder(enc);
            return 1;
        }
    }
    // Start FFmpeg process for video input
    char ffmpeg_cmd[1024];
    if (enc->test_mode) {
        // Test mode - generate solid color frames
        snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
            "ffmpeg -f lavfi -i color=gray:size=%dx%d:duration=5:rate=%d "
            "-f rawvideo -pix_fmt rgb24 -",
            enc->width, enc->height, enc->fps);
        enc->total_frames = enc->fps * 5;  // 5 seconds of test video
    } else {
        // Normal mode - read from input file
        snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
            "ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 "
            "-s %dx%d -r %d -",
            enc->input_file, enc->width, enc->height, enc->fps);
        // Get total frame count (simplified)
        enc->total_frames = 300; // Placeholder - should be calculated from input
    }
    if (enc->verbose) {
        printf("FFmpeg command: %s\n", ffmpeg_cmd);
    }
    enc->ffmpeg_video_pipe = popen(ffmpeg_cmd, "r");
    if (!enc->ffmpeg_video_pipe) {
        fprintf(stderr, "Error: Failed to start FFmpeg process\n");
        cleanup_encoder(enc);
        return 1;
    }
    // Write TAV header
    if (write_tav_header(enc) != 0) {
        fprintf(stderr, "Error: Failed to write TAV header\n");
        cleanup_encoder(enc);
        return 1;
    }
    printf("Starting encoding...\n");
    // Main encoding loop
    int keyframe_interval = 30;  // I-frame every 30 frames
    size_t frame_size = enc->width * enc->height * 3;  // RGB24
    for (int frame = 0; frame < enc->total_frames; frame++) {
        // Read frame from FFmpeg
        size_t bytes_read = fread(enc->current_frame_rgb, 1, frame_size, enc->ffmpeg_video_pipe);
        if (bytes_read != frame_size) {
            if (feof(enc->ffmpeg_video_pipe)) {
                printf("End of input reached at frame %d\n", frame);
                break;
            } else {
                fprintf(stderr, "Error reading frame %d\n", frame);
                break;
            }
        }
        // Determine frame type
        int is_keyframe = (frame % keyframe_interval == 0);
        // Convert RGB to YCoCg
        rgb_to_ycocg(enc->current_frame_rgb, 
                     enc->current_frame_y, enc->current_frame_co, enc->current_frame_cg,
                     enc->width, enc->height);
        // Process tiles
        int num_tiles = enc->tiles_x * enc->tiles_y;
        for (int tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
            int tile_x = tile_idx % enc->tiles_x;
            int tile_y = tile_idx / enc->tiles_x;
            // Extract 64x64 tile data
            float tile_y_data[64 * 64];
            float tile_co_data[64 * 64];
            float tile_cg_data[64 * 64];
            for (int y = 0; y < 64; y++) {
                for (int x = 0; x < 64; x++) {
                    int src_x = tile_x * 64 + x;
                    int src_y = tile_y * 64 + y;
                    int src_idx = src_y * enc->width + src_x;
                    int tile_idx_local = y * 64 + x;
                    if (src_x < enc->width && src_y < enc->height) {
                        tile_y_data[tile_idx_local] = enc->current_frame_y[src_idx];
                        tile_co_data[tile_idx_local] = enc->current_frame_co[src_idx];
                        tile_cg_data[tile_idx_local] = enc->current_frame_cg[src_idx];
                    } else {
                        // Pad with zeros if tile extends beyond frame
                        tile_y_data[tile_idx_local] = 0.0f;
                        tile_co_data[tile_idx_local] = 0.0f;
                        tile_cg_data[tile_idx_local] = 0.0f;
                    }
                }
            }
            // Apply DWT transform
            dwt_2d_forward(tile_y_data, enc->decomp_levels, enc->wavelet_filter);
            dwt_2d_forward(tile_co_data, enc->decomp_levels, enc->wavelet_filter);
            dwt_2d_forward(tile_cg_data, enc->decomp_levels, enc->wavelet_filter);
            // Motion estimation for P-frames
            if (!is_keyframe && frame > 0) {
                estimate_motion_64x64(enc->current_frame_y, enc->previous_frame_y,
                                      enc->width, enc->height, tile_x, tile_y,
                                      &enc->motion_vectors[tile_idx]);
            } else {
                enc->motion_vectors[tile_idx].mv_x = 0;
                enc->motion_vectors[tile_idx].mv_y = 0;
                enc->motion_vectors[tile_idx].rate_control_factor = 1.0f;
            }
        }
        // Write frame packet
        uint8_t packet_type = is_keyframe ? TAV_PACKET_IFRAME : TAV_PACKET_PFRAME;
        // Placeholder: write minimal packet structure
        fwrite(&packet_type, 1, 1, enc->output_fp);
        uint32_t compressed_size = 1024;  // Placeholder
        fwrite(&compressed_size, sizeof(uint32_t), 1, enc->output_fp);
        // Write dummy compressed data
        uint8_t dummy_data[1024] = {0};
        fwrite(dummy_data, 1, compressed_size, enc->output_fp);
        // Copy current frame to previous frame buffer
        memcpy(enc->previous_frame_y, enc->current_frame_y, enc->width * enc->height * sizeof(float));
        memcpy(enc->previous_frame_co, enc->current_frame_co, enc->width * enc->height * sizeof(float));
        memcpy(enc->previous_frame_cg, enc->current_frame_cg, enc->width * enc->height * sizeof(float));
        memcpy(enc->previous_frame_rgb, enc->current_frame_rgb, frame_size);
        enc->frame_count++;
        if (enc->verbose || frame % 30 == 0) {
            printf("Encoded frame %d/%d (%s)\n", frame + 1, enc->total_frames, 
                   is_keyframe ? "I-frame" : "P-frame");
        }
    }
    printf("Encoding completed: %d frames\n", enc->frame_count);
    printf("Output file: %s\n", enc->output_file);
    cleanup_encoder(enc);
    return 0;