mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-06-09 06:34:04 +09:00
monoblock TAV
This commit is contained in:
@@ -806,12 +806,6 @@ similar to JPEG2000, providing superior compression efficiency and scalability c
|
|||||||
to DCT-based codecs like TEV. Features include multi-resolution encoding, progressive
|
to DCT-based codecs like TEV. Features include multi-resolution encoding, progressive
|
||||||
transmission capability, and region-of-interest coding.
|
transmission capability, and region-of-interest coding.
|
||||||
|
|
||||||
## Version History
|
|
||||||
- Version 1.0: Initial DWT-based implementation with 5/3 reversible filter
|
|
||||||
- Version 1.1: Added 9/7 irreversible filter for higher compression
|
|
||||||
- Version 1.2: Multi-resolution pyramid encoding with up to 4 decomposition levels
|
|
||||||
- Version 1.3: Optimized 112x112 tiles for TSVM resolution with up to 6 decomposition levels
|
|
||||||
|
|
||||||
# File Structure
|
# File Structure
|
||||||
\x1F T S V M T A V
|
\x1F T S V M T A V
|
||||||
[HEADER]
|
[HEADER]
|
||||||
|
|||||||
@@ -184,12 +184,15 @@ typedef struct {
|
|||||||
int intra_only; // Force all tiles to use INTRA mode (disable delta encoding)
|
int intra_only; // Force all tiles to use INTRA mode (disable delta encoding)
|
||||||
int monoblock; // Single DWT tile mode (encode entire frame as one tile)
|
int monoblock; // Single DWT tile mode (encode entire frame as one tile)
|
||||||
|
|
||||||
// Frame buffers
|
// Frame buffers - ping-pong implementation
|
||||||
|
uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous
|
||||||
|
int frame_buffer_index; // 0 or 1, indicates which set is "current"
|
||||||
|
float *current_frame_y, *current_frame_co, *current_frame_cg;
|
||||||
|
|
||||||
|
// Convenience pointers (updated each frame to point to current ping-pong buffers)
|
||||||
uint8_t *current_frame_rgb;
|
uint8_t *current_frame_rgb;
|
||||||
uint8_t *previous_frame_rgb;
|
uint8_t *previous_frame_rgb;
|
||||||
float *current_frame_y, *current_frame_co, *current_frame_cg;
|
|
||||||
float *previous_frame_y, *previous_frame_co, *previous_frame_cg;
|
|
||||||
|
|
||||||
// Tile processing
|
// Tile processing
|
||||||
int tiles_x, tiles_y;
|
int tiles_x, tiles_y;
|
||||||
dwt_tile_t *tiles;
|
dwt_tile_t *tiles;
|
||||||
@@ -235,6 +238,16 @@ typedef struct {
|
|||||||
|
|
||||||
// Wavelet filter constants removed - using lifting scheme implementation instead
|
// Wavelet filter constants removed - using lifting scheme implementation instead
|
||||||
|
|
||||||
|
// Swap ping-pong frame buffers (eliminates need for memcpy)
|
||||||
|
static void swap_frame_buffers(tav_encoder_t *enc) {
|
||||||
|
// Flip the buffer index
|
||||||
|
enc->frame_buffer_index = 1 - enc->frame_buffer_index;
|
||||||
|
|
||||||
|
// Update convenience pointers to point to the new current/previous buffers
|
||||||
|
enc->current_frame_rgb = enc->frame_rgb[enc->frame_buffer_index];
|
||||||
|
enc->previous_frame_rgb = enc->frame_rgb[1 - enc->frame_buffer_index];
|
||||||
|
}
|
||||||
|
|
||||||
// Parse resolution string like "1024x768" with keyword recognition
|
// Parse resolution string like "1024x768" with keyword recognition
|
||||||
static int parse_resolution(const char *res_str, int *width, int *height) {
|
static int parse_resolution(const char *res_str, int *width, int *height) {
|
||||||
if (!res_str) return 0;
|
if (!res_str) return 0;
|
||||||
@@ -293,14 +306,14 @@ static void show_usage(const char *program_name) {
|
|||||||
printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
|
printf(" -s, --size WxH Video size (default: %dx%d)\n", DEFAULT_WIDTH, DEFAULT_HEIGHT);
|
||||||
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
|
printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n");
|
||||||
printf(" -q, --quality N Quality level 0-5 (default: 2)\n");
|
printf(" -q, --quality N Quality level 0-5 (default: 2)\n");
|
||||||
printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 0-100 for each channel\n");
|
printf(" -Q, --quantiser Y,Co,Cg Quantiser levels 0-255 for each channel (1: lossless, 255: potato)\n");
|
||||||
// printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n");
|
// printf(" -w, --wavelet N Wavelet filter: 0=5/3 reversible, 1=9/7 irreversible (default: 1)\n");
|
||||||
// printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
|
// printf(" -b, --bitrate N Target bitrate in kbps (enables bitrate control mode)\n");
|
||||||
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
printf(" -S, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n");
|
||||||
printf(" -v, --verbose Verbose output\n");
|
printf(" -v, --verbose Verbose output\n");
|
||||||
printf(" -t, --test Test mode: generate solid colour frames\n");
|
printf(" -t, --test Test mode: generate solid colour frames\n");
|
||||||
printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n");
|
printf(" --lossless Lossless mode: use 5/3 reversible wavelet\n");
|
||||||
printf(" --delta-code Enable delta encoding (improved compression but noisy picture)\n");
|
printf(" --delta Enable delta encoding (improved compression but noisy picture)\n");
|
||||||
printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
|
printf(" --ictcp Use ICtCp colour space instead of YCoCg-R (use when source is in BT.2100)\n");
|
||||||
printf(" --help Show this help\n\n");
|
printf(" --help Show this help\n\n");
|
||||||
|
|
||||||
@@ -321,15 +334,20 @@ static void show_usage(const char *program_name) {
|
|||||||
for (int i = 0; i < 6; i++) {
|
for (int i = 0; i < 6; i++) {
|
||||||
printf("%d: Q %d \t", i, QUALITY_CG[i]);
|
printf("%d: Q %d \t", i, QUALITY_CG[i]);
|
||||||
}
|
}
|
||||||
|
printf("\n\nVideo Size Keywords:");
|
||||||
printf("\n\nFeatures:\n");
|
printf("\n -s cif: equal to 352x288");
|
||||||
|
printf("\n -s qcif: equal to 176x144");
|
||||||
|
printf("\n -s half: equal to %dx%d", DEFAULT_WIDTH >> 1, DEFAULT_HEIGHT >> 1);
|
||||||
|
printf("\n -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT);
|
||||||
|
printf("\n\n");
|
||||||
|
printf("Features:\n");
|
||||||
printf(" - Single DWT tile (monoblock) encoding for optimal quality\n");
|
printf(" - Single DWT tile (monoblock) encoding for optimal quality\n");
|
||||||
printf(" - Full resolution YCoCg-R/ICtCp colour space\n");
|
printf(" - Full resolution YCoCg-R/ICtCp colour space\n");
|
||||||
printf(" - Lossless and lossy compression modes\n");
|
printf(" - Lossless and lossy compression modes\n");
|
||||||
|
|
||||||
printf("\nExamples:\n");
|
printf("\nExamples:\n");
|
||||||
printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name);
|
printf(" %s -i input.mp4 -o output.mv3 # Default settings\n", program_name);
|
||||||
printf(" %s -i input.mkv -q 4 -w 1 -o output.mv3 # Maximum quality with 9/7 wavelet\n", program_name);
|
printf(" %s -i input.mkv -q 4 -o output.mv3 # At maximum quality\n", program_name);
|
||||||
printf(" %s -i input.avi --lossless -o output.mv3 # Lossless encoding\n", program_name);
|
printf(" %s -i input.avi --lossless -o output.mv3 # Lossless encoding\n", program_name);
|
||||||
// printf(" %s -i input.mp4 -b 800 -o output.mv3 # 800 kbps bitrate target\n", program_name);
|
// printf(" %s -i input.mp4 -b 800 -o output.mv3 # 800 kbps bitrate target\n", program_name);
|
||||||
printf(" %s -i input.webm -S subs.srt -o output.mv3 # With subtitles\n", program_name);
|
printf(" %s -i input.webm -S subs.srt -o output.mv3 # With subtitles\n", program_name);
|
||||||
@@ -377,16 +395,18 @@ static int initialize_encoder(tav_encoder_t *enc) {
|
|||||||
}
|
}
|
||||||
int num_tiles = enc->tiles_x * enc->tiles_y;
|
int num_tiles = enc->tiles_x * enc->tiles_y;
|
||||||
|
|
||||||
// Allocate frame buffers
|
// Allocate ping-pong frame buffers
|
||||||
size_t frame_size = enc->width * enc->height;
|
size_t frame_size = enc->width * enc->height;
|
||||||
enc->current_frame_rgb = malloc(frame_size * 3);
|
enc->frame_rgb[0] = malloc(frame_size * 3);
|
||||||
enc->previous_frame_rgb = malloc(frame_size * 3);
|
enc->frame_rgb[1] = malloc(frame_size * 3);
|
||||||
|
|
||||||
|
// Initialize ping-pong buffer index and convenience pointers
|
||||||
|
enc->frame_buffer_index = 0;
|
||||||
|
enc->current_frame_rgb = enc->frame_rgb[0];
|
||||||
|
enc->previous_frame_rgb = enc->frame_rgb[1];
|
||||||
enc->current_frame_y = malloc(frame_size * sizeof(float));
|
enc->current_frame_y = malloc(frame_size * sizeof(float));
|
||||||
enc->current_frame_co = malloc(frame_size * sizeof(float));
|
enc->current_frame_co = malloc(frame_size * sizeof(float));
|
||||||
enc->current_frame_cg = malloc(frame_size * sizeof(float));
|
enc->current_frame_cg = malloc(frame_size * sizeof(float));
|
||||||
enc->previous_frame_y = malloc(frame_size * sizeof(float));
|
|
||||||
enc->previous_frame_co = malloc(frame_size * sizeof(float));
|
|
||||||
enc->previous_frame_cg = malloc(frame_size * sizeof(float));
|
|
||||||
|
|
||||||
// Allocate tile structures
|
// Allocate tile structures
|
||||||
enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t));
|
enc->tiles = malloc(num_tiles * sizeof(dwt_tile_t));
|
||||||
@@ -423,9 +443,8 @@ static int initialize_encoder(tav_encoder_t *enc) {
|
|||||||
enc->previous_coeffs_cg = malloc(total_coeff_size);
|
enc->previous_coeffs_cg = malloc(total_coeff_size);
|
||||||
enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame
|
enc->previous_coeffs_allocated = 0; // Will be set to 1 after first I-frame
|
||||||
|
|
||||||
if (!enc->current_frame_rgb || !enc->previous_frame_rgb ||
|
if (!enc->frame_rgb[0] || !enc->frame_rgb[1] ||
|
||||||
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg ||
|
!enc->current_frame_y || !enc->current_frame_co || !enc->current_frame_cg ||
|
||||||
!enc->previous_frame_y || !enc->previous_frame_co || !enc->previous_frame_cg ||
|
|
||||||
!enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer ||
|
!enc->tiles || !enc->zstd_ctx || !enc->compressed_buffer ||
|
||||||
!enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg ||
|
!enc->reusable_quantised_y || !enc->reusable_quantised_co || !enc->reusable_quantised_cg ||
|
||||||
!enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) {
|
!enc->previous_coeffs_y || !enc->previous_coeffs_co || !enc->previous_coeffs_cg) {
|
||||||
@@ -2180,13 +2199,14 @@ int main(int argc, char *argv[]) {
|
|||||||
{"quantizer", required_argument, 0, 'Q'},
|
{"quantizer", required_argument, 0, 'Q'},
|
||||||
// {"wavelet", required_argument, 0, 'w'},
|
// {"wavelet", required_argument, 0, 'w'},
|
||||||
{"bitrate", required_argument, 0, 'b'},
|
{"bitrate", required_argument, 0, 'b'},
|
||||||
|
{"subtitle", required_argument, 0, 'S'},
|
||||||
{"subtitles", required_argument, 0, 'S'},
|
{"subtitles", required_argument, 0, 'S'},
|
||||||
{"verbose", no_argument, 0, 'v'},
|
{"verbose", no_argument, 0, 'v'},
|
||||||
{"test", no_argument, 0, 't'},
|
{"test", no_argument, 0, 't'},
|
||||||
{"lossless", no_argument, 0, 1000},
|
{"lossless", no_argument, 0, 1000},
|
||||||
{"delta-code", no_argument, 0, 1006},
|
{"delta", no_argument, 0, 1006},
|
||||||
{"ictcp", no_argument, 0, 1005},
|
{"ictcp", no_argument, 0, 1005},
|
||||||
{"help", no_argument, 0, 1004},
|
{"help", no_argument, 0, '?'},
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -2509,13 +2529,8 @@ int main(int argc, char *argv[]) {
|
|||||||
count_pframe++;
|
count_pframe++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy current frame to previous frame buffer
|
// Swap ping-pong buffers (eliminates memcpy operations)
|
||||||
size_t float_frame_size = enc->width * enc->height * sizeof(float);
|
swap_frame_buffers(enc);
|
||||||
size_t rgb_frame_size = enc->width * enc->height * 3;
|
|
||||||
memcpy(enc->previous_frame_y, enc->current_frame_y, float_frame_size);
|
|
||||||
memcpy(enc->previous_frame_co, enc->current_frame_co, float_frame_size);
|
|
||||||
memcpy(enc->previous_frame_cg, enc->current_frame_cg, float_frame_size);
|
|
||||||
memcpy(enc->previous_frame_rgb, enc->current_frame_rgb, rgb_frame_size);
|
|
||||||
|
|
||||||
frame_count++;
|
frame_count++;
|
||||||
enc->frame_count = frame_count;
|
enc->frame_count = frame_count;
|
||||||
@@ -2586,14 +2601,8 @@ static void cleanup_encoder(tav_encoder_t *enc) {
|
|||||||
free(enc->input_file);
|
free(enc->input_file);
|
||||||
free(enc->output_file);
|
free(enc->output_file);
|
||||||
free(enc->subtitle_file);
|
free(enc->subtitle_file);
|
||||||
free(enc->current_frame_rgb);
|
free(enc->frame_rgb[0]);
|
||||||
free(enc->previous_frame_rgb);
|
free(enc->frame_rgb[1]);
|
||||||
free(enc->current_frame_y);
|
|
||||||
free(enc->current_frame_co);
|
|
||||||
free(enc->current_frame_cg);
|
|
||||||
free(enc->previous_frame_y);
|
|
||||||
free(enc->previous_frame_co);
|
|
||||||
free(enc->previous_frame_cg);
|
|
||||||
free(enc->tiles);
|
free(enc->tiles);
|
||||||
free(enc->compressed_buffer);
|
free(enc->compressed_buffer);
|
||||||
free(enc->mp2_buffer);
|
free(enc->mp2_buffer);
|
||||||
|
|||||||
@@ -2626,6 +2626,7 @@ int main(int argc, char *argv[]) {
|
|||||||
{"input", required_argument, 0, 'i'},
|
{"input", required_argument, 0, 'i'},
|
||||||
{"output", required_argument, 0, 'o'},
|
{"output", required_argument, 0, 'o'},
|
||||||
{"size", required_argument, 0, 's'},
|
{"size", required_argument, 0, 's'},
|
||||||
|
{"subtitle", required_argument, 0, 'S'},
|
||||||
{"subtitles", required_argument, 0, 'S'},
|
{"subtitles", required_argument, 0, 'S'},
|
||||||
{"fps", required_argument, 0, 'f'},
|
{"fps", required_argument, 0, 'f'},
|
||||||
{"quality", required_argument, 0, 'q'},
|
{"quality", required_argument, 0, 'q'},
|
||||||
|
|||||||
Reference in New Issue
Block a user