diff --git a/assets/disk0/tvdos/bin/playtav.js b/assets/disk0/tvdos/bin/playtav.js index eff764f..a409e43 100644 --- a/assets/disk0/tvdos/bin/playtav.js +++ b/assets/disk0/tvdos/bin/playtav.js @@ -39,6 +39,7 @@ const TAV_PACKET_SUBTITLE = 0x30 // Legacy SSF (frame-locked) const TAV_PACKET_SUBTITLE_TC = 0x31 // SSF-TC (timecode-based) const TAV_PACKET_AUDIO_BUNDLED = 0x40 // Entire MP2 audio file in single packet const TAV_PACKET_EXTENDED_HDR = 0xEF +const TAV_PACKET_SCREEN_MASK = 0xF2 // Screen masking (letterbox/pillarbox) const TAV_PACKET_GOP_SYNC = 0xFC // GOP sync (N frames decoded from GOP block) const TAV_PACKET_TIMECODE = 0xFD const TAV_PACKET_SYNC_NTSC = 0xFE @@ -72,6 +73,13 @@ let currentTimecodeNs = 0 // Current playback timecode (updated every frame) let baseTimecodeNs = 0 // Base timecode from most recent TIMECODE packet let baseTimecodeFrameCount = 0 // Frame count when base timecode was set +// Screen masking (letterbox/pillarbox) state +let screenMaskEntries = [] // Array of {frameNum, top, right, bottom, left} +let screenMaskTop = 0 +let screenMaskRight = 0 +let screenMaskBottom = 0 +let screenMaskLeft = 0 + // Parse command line options let interactive = false let filmGrainLevel = null @@ -739,6 +747,77 @@ function scanForwardToIframe(targetFrame, currentPos) { } // Function to try reading next TAV file header at current position +// Update active screen mask for the given frame number +// Screen mask packets are sorted by frameNum, so find the last entry with frameNum <= currentFrameNum +function updateScreenMask(currentFrameNum) { + if (screenMaskEntries.length === 0) { + return // No screen mask entries + } + + // Find the most recent screen mask entry for this frame + // Entries are in order, so scan backwards for efficiency + for (let i = screenMaskEntries.length - 1; i >= 0; i--) { + if (screenMaskEntries[i].frameNum <= currentFrameNum) { + // Apply this mask + screenMaskTop = screenMaskEntries[i].top + screenMaskRight = screenMaskEntries[i].right + screenMaskBottom = screenMaskEntries[i].bottom + screenMaskLeft = screenMaskEntries[i].left + return + } + } +} + +// Fill masked regions (letterbox/pillarbox bars) with black +function fillMaskedRegions() { + return +// console.log(`ScrMask: ${screenMaskTop}, ${screenMaskRight}, ${screenMaskBottom}, ${screenMaskLeft}`) + + if (screenMaskTop === 0 && screenMaskRight === 0 && + screenMaskBottom === 0 && screenMaskLeft === 0) { + return // No masking + } + + const width = header.width + const height = header.height + const blackRG = 0xF0 + const blackBA = 0xFF // 0xF0FF (magenta) for test + + // Fill top letterbox bar + for (let y = 0; y < screenMaskTop && y < height; y++) { + for (let x = 0; x < width; x++) { + graphics.plotPixel(x, y, blackRG) + graphics.plotPixel2(x, y, blackBA) + } + } + + // Fill bottom letterbox bar + for (let y = height - screenMaskBottom; y < height; y++) { + if (y < 0) continue + for (let x = 0; x < width; x++) { + graphics.plotPixel(x, y, blackRG) + graphics.plotPixel2(x, y, blackBA) + } + } + + // Fill left pillarbox bar + for (let y = 0; y < height; y++) { + for (let x = 0; x < screenMaskLeft && x < width; x++) { + graphics.plotPixel(x, y, blackRG) + graphics.plotPixel2(x, y, blackBA) + } + } + + // Fill right pillarbox bar + for (let y = 0; y < height; y++) { + for (let x = width - screenMaskRight; x < width; x++) { + if (x < 0) continue + graphics.plotPixel(x, y, blackRG) + graphics.plotPixel2(x, y, blackBA) + } + } +} + function tryReadNextTAVHeader() { // Save current position let currentPos = seqread.getReadCount() @@ -1116,6 +1195,9 @@ try { // Do nothing - skip to next packet } else if (packetType === TAV_PACKET_IFRAME || packetType === TAV_PACKET_PFRAME) { + // Update active screen mask for this frame (Phase 1: just tracking, not applying) + updateScreenMask(frameCount) + // Record I-frame position for seeking if (packetType === TAV_PACKET_IFRAME) { iframePositions.push({offset: packetOffset, frameNum: frameCount}) @@ -1588,6 +1670,28 @@ try { } } } + else if (packetType === TAV_PACKET_SCREEN_MASK) { + // Screen masking packet (letterbox/pillarbox detection) + // Format: frame_num(4) + top(2) + right(2) + bottom(2) + left(2) = 12 bytes + let frameNum = seqread.readInt() // uint32 frame number + let top = seqread.readOneByte() | (seqread.readOneByte() << 8) + let right = seqread.readOneByte() | (seqread.readOneByte() << 8) + let bottom = seqread.readOneByte() | (seqread.readOneByte() << 8) + let left = seqread.readOneByte() | (seqread.readOneByte() << 8) + + // Store in entries array + screenMaskEntries.push({ + frameNum: frameNum, + top: top, + right: right, + bottom: bottom, + left: left + }) + + if (interactive) { + serial.println(`[SCREEN_MASK] frame=${frameNum} top=${top} right=${right} bottom=${bottom} left=${left}`) + } + } else if (packetType === TAV_PACKET_TIMECODE) { // Timecode packet - time since stream start in nanoseconds let timecodeLow = seqread.readInt() @@ -1789,6 +1893,12 @@ try { graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset) uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 + // Update active screen mask for this GOP frame + updateScreenMask(frameCount) + + // Fill masked regions with black (letterbox/pillarbox bars) + fillMaskedRegions() + if (interactive && currentGopFrameIndex === 0) { // console.log(`[GOP] Playing GOP: ${currentGopSize} frames from slot ${currentGopBufferSlot}`) } diff --git a/video_encoder/decoder_tav.c b/video_encoder/decoder_tav.c index 3974cc2..722192f 100644 --- a/video_encoder/decoder_tav.c +++ b/video_encoder/decoder_tav.c @@ -32,7 +32,9 @@ #define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio - SUPPORTED (decode to PCMu8) #define TAV_PACKET_AUDIO_TRACK 0x40 // Bundled audio track - SUPPORTED (passthrough) #define TAV_PACKET_SUBTITLE 0x30 // Subtitle - SKIPPED +#define TAV_PACKET_SUBTITLE_TC 0x31 // Subtitle - SKIPPED #define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header - SKIPPED +#define TAV_PACKET_SCREEN_MASK 0xF2 // Screen masking (letterbox/pillarbox) - PARSED #define TAV_PACKET_GOP_SYNC 0xFC // GOP sync packet - SKIPPED #define TAV_PACKET_TIMECODE 0xFD // Timecode - SKIPPED #define TAV_PACKET_SYNC_NTSC 0xFE // NTSC sync - SKIPPED @@ -1586,6 +1588,15 @@ static void write_wav_header(FILE *fp, uint32_t sample_rate, uint16_t channels, // Decoder State Structure //============================================================================= +// Screen masking entry (letterbox/pillarbox geometry change) +typedef struct { + uint32_t frame_num; + uint16_t top; + uint16_t right; + uint16_t bottom; + uint16_t left; +} screen_mask_entry_t; + typedef struct { FILE *input_fp; tav_header_t header; @@ -1601,6 +1612,16 @@ typedef struct { int frame_size; int is_monoblock; // True if version 3-6 (single tile mode) + // Screen masking (letterbox/pillarbox) - array of geometry changes + screen_mask_entry_t *screen_masks; + int screen_mask_count; + int screen_mask_capacity; + // Current active mask + uint16_t screen_mask_top; + uint16_t screen_mask_right; + uint16_t screen_mask_bottom; + uint16_t screen_mask_left; + // FFmpeg pipe for video only (audio from file) FILE *video_pipe; pid_t ffmpeg_pid; @@ -1669,6 +1690,11 @@ static int extract_audio_to_wav(const char *input_file, const char *wav_file, in continue; } + if (packet_type == TAV_PACKET_SCREEN_MASK) { + fseek(input_fp, 12, SEEK_CUR); // Skip frame_num(4) + top(2) + right(2) + bottom(2) + left(2) + continue; + } + if (packet_type == TAV_PACKET_GOP_UNIFIED) { uint8_t gop_size; uint32_t compressed_size; @@ -1948,10 +1974,83 @@ static void tav_decoder_free(tav_decoder_t *decoder) { free(decoder->reference_ycocg_y); free(decoder->reference_ycocg_co); free(decoder->reference_ycocg_cg); + free(decoder->screen_masks); free(decoder->audio_file_path); free(decoder); } +//============================================================================= +// Screen Mask Management +//============================================================================= + +// Fill masked regions (letterbox/pillarbox bars) with black +static void fill_masked_regions(uint8_t *frame_rgb, int width, int height, + uint16_t top, uint16_t right, uint16_t bottom, uint16_t left) { + // Fill top letterbox bar + for (int y = 0; y < top && y < height; y++) { + for (int x = 0; x < width; x++) { + int offset = (y * width + x) * 3; + frame_rgb[offset] = 255; // R + frame_rgb[offset + 1] = 0; // G + frame_rgb[offset + 2] = 0; // B + } + } + + // Fill bottom letterbox bar + for (int y = height - bottom; y < height; y++) { + if (y < 0) continue; + for (int x = 0; x < width; x++) { + int offset = (y * width + x) * 3; + frame_rgb[offset] = 255; // R + frame_rgb[offset + 1] = 0; // G + frame_rgb[offset + 2] = 0; // B + } + } + + // Fill left pillarbox bar + for (int y = 0; y < height; y++) { + for (int x = 0; x < left && x < width; x++) { + int offset = (y * width + x) * 3; + frame_rgb[offset] = 0; // R + frame_rgb[offset + 1] = 0; // G + frame_rgb[offset + 2] = 255; // B + } + } + + // Fill right pillarbox bar + for (int y = 0; y < height; y++) { + for (int x = width - right; x < width; x++) { + if (x < 0) continue; + int offset = (y * width + x) * 3; + frame_rgb[offset] = 0; // R + frame_rgb[offset + 1] = 0; // G + frame_rgb[offset + 2] = 255; // B + } + } +} + +// Update active screen mask for the given frame number +// Screen mask packets are sorted by frame_num, so we find the last entry +// with frame_num <= current_frame_num +static void update_screen_mask(tav_decoder_t *decoder, uint32_t current_frame_num) { + if (!decoder->screen_masks || decoder->screen_mask_count == 0) { + return; // No screen mask entries + } + + // Find the most recent screen mask entry for this frame + // Entries are in order, so scan backwards for efficiency + for (int i = decoder->screen_mask_count - 1; i >= 0; i--) { + if (decoder->screen_masks[i].frame_num <= current_frame_num) { + // Apply this mask + decoder->screen_mask_top = decoder->screen_masks[i].top; + decoder->screen_mask_right = decoder->screen_masks[i].right; + decoder->screen_mask_bottom = decoder->screen_masks[i].bottom; + decoder->screen_mask_left = decoder->screen_masks[i].left; + return; + } + } +} + //============================================================================= // Frame Decoding Logic //============================================================================= @@ -2486,6 +2585,50 @@ int main(int argc, char *argv[]) { continue; } + // Handle screen masking packets (letterbox/pillarbox detection) + // Format: frame_num(4) + top(2) + right(2) + bottom(2) + left(2) = 12 bytes + if (packet_type == TAV_PACKET_SCREEN_MASK) { + uint32_t frame_num; + uint16_t top, right, bottom, left; + if (fread(&frame_num, 4, 1, decoder->input_fp) != 1 || + fread(&top, 2, 1, decoder->input_fp) != 1 || + fread(&right, 2, 1, decoder->input_fp) != 1 || + fread(&bottom, 2, 1, decoder->input_fp) != 1 || + fread(&left, 2, 1, decoder->input_fp) != 1) { + fprintf(stderr, "Error: Failed to read screen mask packet\n"); + result = -1; + break; + } + + // Allocate array if needed + if (decoder->screen_masks == NULL) { + decoder->screen_mask_capacity = 16; + decoder->screen_masks = malloc(decoder->screen_mask_capacity * sizeof(screen_mask_entry_t)); + decoder->screen_mask_count = 0; + } + + // Expand array if needed + if (decoder->screen_mask_count >= decoder->screen_mask_capacity) { + decoder->screen_mask_capacity *= 2; + decoder->screen_masks = realloc(decoder->screen_masks, + decoder->screen_mask_capacity * sizeof(screen_mask_entry_t)); + } + + // Store entry + screen_mask_entry_t *entry = &decoder->screen_masks[decoder->screen_mask_count++]; + entry->frame_num = frame_num; + entry->top = top; + entry->right = right; + entry->bottom = bottom; + entry->left = left; + + if (verbose) { + fprintf(stderr, "Packet %d: SCREEN_MASK (0x%02X) - frame=%u top=%u right=%u bottom=%u left=%u\n", + total_packets, packet_type, frame_num, top, right, bottom, left); + } + continue; + } + // Handle GOP unified packets (custom format: 1-byte gop_size + 4-byte compressed_size) if (packet_type == TAV_PACKET_GOP_UNIFIED) { uint8_t gop_size; @@ -2738,6 +2881,14 @@ int main(int argc, char *argv[]) { frame_rgb[i * 3 + 2] = b; } + // Update active screen mask for this GOP frame + update_screen_mask(decoder, decoder->frame_count + t); + + // Fill masked regions with black (letterbox/pillarbox bars) + fill_masked_regions(frame_rgb, decoder->header.width, decoder->header.height, + decoder->screen_mask_top, decoder->screen_mask_right, + decoder->screen_mask_bottom, decoder->screen_mask_left); + // Write frame to FFmpeg video pipe const size_t bytes_to_write = decoder->frame_size * 3; @@ -2869,6 +3020,9 @@ int main(int argc, char *argv[]) { switch (packet_type) { case TAV_PACKET_IFRAME: case TAV_PACKET_PFRAME: + // Update active screen mask for this frame (Phase 1: just tracking, not applying) + update_screen_mask(decoder, decoder->frame_count); + iframe_count++; if (verbose && iframe_count <= 5) { fprintf(stderr, "Processing %s (packet %d, size %u bytes)...\n", @@ -2902,6 +3056,7 @@ int main(int argc, char *argv[]) { break; case TAV_PACKET_SUBTITLE: + case TAV_PACKET_SUBTITLE_TC: // Skip subtitle packets fseek(decoder->input_fp, packet_size, SEEK_CUR); break; diff --git a/video_encoder/encoder_tav.c b/video_encoder/encoder_tav.c index 27de097..1ae66a7 100644 --- a/video_encoder/encoder_tav.c +++ b/video_encoder/encoder_tav.c @@ -59,6 +59,7 @@ #define TAV_PACKET_SUBTITLE_TC 0x31 // Subtitle packet with timecode (SSF-TC format) #define TAV_PACKET_AUDIO_TRACK 0x40 // Separate audio track (full MP2 file) #define TAV_PACKET_EXTENDED_HDR 0xEF // Extended header packet +#define TAV_PACKET_SCREEN_MASK 0xF2 // Screen masking packet (letterbox/pillarbox) #define TAV_PACKET_GOP_SYNC 0xFC // GOP sync packet (N frames decoded) #define TAV_PACKET_TIMECODE 0xFD // Timecode packet #define TAV_PACKET_SYNC_NTSC 0xFE // NTSC Sync packet @@ -199,6 +200,13 @@ typedef struct frame_analysis { // Detection results int is_scene_change; // Final scene change flag double scene_change_score; // Composite score for debugging + + // Letterbox/pillarbox detection + uint16_t letterbox_top; + uint16_t letterbox_right; + uint16_t letterbox_bottom; + uint16_t letterbox_left; + int has_letterbox; // 1 if any masking detected } frame_analysis_t; // GOP boundary list for two-pass encoding @@ -1804,6 +1812,7 @@ typedef struct tav_encoder_s { int separate_audio_track; // 1 = write entire MP2 file as packet 0x40 after header, 0 = interleave audio (default) int pcm8_audio; // 1 = use 8-bit PCM audio (packet 0x21), 0 = use MP2 (default) int tad_audio; // 1 = use TAD audio (packet 0x24), 0 = use MP2/PCM8 (default, quality follows quality_level) + int enable_letterbox_detect; // 1 = detect and emit letterbox/pillarbox packets (default), 0 = disable // Frame buffers - ping-pong implementation uint8_t *frame_rgb[2]; // [0] and [1] alternate between current and previous @@ -2419,6 +2428,7 @@ static tav_encoder_t* create_encoder(void) { enc->separate_audio_track = 0; // Default: interleave audio packets enc->pcm8_audio = 0; // Default: use MP2 audio enc->tad_audio = 0; // Default: use MP2 audio (TAD quality follows quality_level) + enc->enable_letterbox_detect = 1; // Default: enable letterbox/pillarbox detection // GOP / temporal DWT settings enc->enable_temporal_dwt = 1; // Mutually exclusive with use_delta_encoding @@ -8125,6 +8135,415 @@ static void write_timecode_packet(FILE *output, int frame_num, int fps, int is_n fwrite(&timecode_ns, sizeof(uint64_t), 1, output); } +// Write screen masking packet (letterbox/pillarbox detection) +// Packet structure: type(1) + frame_num(4) + top(2) + right(2) + bottom(2) + left(2) = 13 bytes +static void write_screen_mask_packet(FILE *output, uint32_t frame_num, + uint16_t top, uint16_t right, + uint16_t bottom, uint16_t left) { + uint8_t packet_type = TAV_PACKET_SCREEN_MASK; + fwrite(&packet_type, 1, 1, output); + fwrite(&frame_num, sizeof(uint32_t), 1, output); + fwrite(&top, sizeof(uint16_t), 1, output); + fwrite(&right, sizeof(uint16_t), 1, output); + fwrite(&bottom, sizeof(uint16_t), 1, output); + fwrite(&left, sizeof(uint16_t), 1, output); +} + +// Calculate Sobel gradient magnitude for a pixel (edge detection) +static float calculate_sobel_magnitude(const uint8_t *frame_rgb, int width, int height, + int x, int y) { + // Sobel kernels for X and Y gradients + // Gx = [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]] + // Gy = [[-1, -2, -1], [0, 0, 0], [1, 2, 1]] + + // Handle boundary conditions with symmetric extension + int x_prev = (x > 0) ? (x - 1) : 0; + int x_next = (x < width - 1) ? (x + 1) : (width - 1); + int y_prev = (y > 0) ? (y - 1) : 0; + int y_next = (y < height - 1) ? (y + 1) : (height - 1); + + // Sample 3x3 neighborhood (using luma only for efficiency) + float pixels[3][3]; + for (int dy = 0; dy < 3; dy++) { + for (int dx = 0; dx < 3; dx++) { + int sample_y = (dy == 0) ? y_prev : ((dy == 1) ? y : y_next); + int sample_x = (dx == 0) ? x_prev : ((dx == 1) ? x : x_next); + int offset = (sample_y * width + sample_x) * 3; + + // Convert to luma (simple approximation: Y = 0.299R + 0.587G + 0.114B) + pixels[dy][dx] = (0.299f * frame_rgb[offset] + + 0.587f * frame_rgb[offset + 1] + + 0.114f * frame_rgb[offset + 2]); + } + } + + // Apply Sobel operators + float gx = -pixels[0][0] + pixels[0][2] + + -2*pixels[1][0] + 2*pixels[1][2] + + -pixels[2][0] + pixels[2][2]; + + float gy = -pixels[0][0] - 2*pixels[0][1] - pixels[0][2] + + pixels[2][0] + 2*pixels[2][1] + pixels[2][2]; + + // Calculate magnitude: sqrt(gx^2 + gy^2) + return sqrtf(gx * gx + gy * gy); +} + +// Apply symmetric cropping and suppress simultaneous letterbox+pillarbox +// ALWAYS makes left=right and top=bottom (perfect symmetry) +// When BOTH letterbox and pillarbox are detected simultaneously, suppress one based on current state +// Allows letterbox→pillarbox or pillarbox→letterbox transitions +static void apply_symmetric_cropping(uint16_t *top, uint16_t *right, + uint16_t *bottom, uint16_t *left, + int width, int height, + uint16_t current_top, uint16_t current_bottom, + uint16_t current_left, uint16_t current_right) { + const int MIN_BAR_SIZE_LETTER = (int)(0.04f * height); // Minimum bar size to consider (ignore <16 pixel bars) + const int MIN_BAR_SIZE_PILLAR = (int)(0.04f * width); // Minimum bar size to consider (ignore <16 pixel bars) + const int SIGNIFICANT_THRESHOLD_LETTER = (int)(0.08f * height); // Bar must be 32+ pixels to be considered significant + const int SIGNIFICANT_THRESHOLD_PILLAR = (int)(0.08f * width); // Bar must be 32+ pixels to be considered significant + + // Filter out small bars (noise/detection errors) + if (*top < MIN_BAR_SIZE_LETTER) *top = 0; + if (*bottom < MIN_BAR_SIZE_LETTER) *bottom = 0; + if (*left < MIN_BAR_SIZE_PILLAR) *left = 0; + if (*right < MIN_BAR_SIZE_PILLAR) *right = 0; + + // ALWAYS make letterbox (top/bottom) perfectly symmetric + if (*top > 0 || *bottom > 0) { + // Use minimum value to avoid over-cropping + uint16_t symmetric_value = (*top < *bottom) ? *top : *bottom; + *top = symmetric_value+1; + *bottom = symmetric_value+1; + } + + // ALWAYS make pillarbox (left/right) perfectly symmetric + if (*left > 0 || *right > 0) { + // Use minimum value to avoid over-cropping + uint16_t symmetric_value = (*left < *right) ? *left : *right; + *left = symmetric_value+1; + *right = symmetric_value+1; + } + + // Check if BOTH letterbox and pillarbox are detected simultaneously + int new_has_letterbox = (*top >= SIGNIFICANT_THRESHOLD_LETTER || *bottom >= SIGNIFICANT_THRESHOLD_LETTER); + int new_has_pillarbox = (*left >= SIGNIFICANT_THRESHOLD_PILLAR || *right >= SIGNIFICANT_THRESHOLD_PILLAR); + int current_has_letterbox = (current_top >= SIGNIFICANT_THRESHOLD_LETTER || current_bottom >= SIGNIFICANT_THRESHOLD_LETTER); + int current_has_pillarbox = (current_left >= SIGNIFICANT_THRESHOLD_PILLAR || current_right >= SIGNIFICANT_THRESHOLD_PILLAR); + + // Only suppress when BOTH are detected AND one is much smaller (likely false positive) + // Completely suppress windowboxing + if (new_has_letterbox && new_has_pillarbox) { + int letterbox_size = *top + *bottom; + int pillarbox_size = *left + *right; + + // to allow windowboxing: + // Only suppress if one is less than 25% of total masking + // This allows legitimate windowboxing while filtering false positives + float letterbox_ratio_geom = (float)letterbox_size / height; + float pillarbox_ratio_geom = (float)pillarbox_size / width; + float ratio_sum = letterbox_ratio_geom + pillarbox_ratio_geom; + float letterbox_ratio = letterbox_ratio_geom / ratio_sum; + float pillarbox_ratio = pillarbox_ratio_geom / ratio_sum; + + if (letterbox_ratio < 0.25f) { + *top = 0; + *bottom = 0; + } else if (pillarbox_ratio < 0.25f) + *left = 0; + *right = 0; + } + // Otherwise keep both (legitimate windowboxing) + } +} + +// Detect letterbox/pillarbox bars in the current frame +// Returns 1 if masking detected, 0 otherwise +// Sets top, right, bottom, left to the size of detected bars in pixels +static int detect_letterbox_pillarbox(tav_encoder_t *enc, + uint16_t *top, uint16_t *right, + uint16_t *bottom, uint16_t *left) { + if (!enc->current_frame_rgb) return 0; + + const int width = enc->width; + const int height = enc->height; + const int SAMPLE_RATE_HORZ = 4; // Sample every 4th pixel for performance + const int SAMPLE_RATE_VERT = 4; // Sample every 4th pixel for performance + const float Y_THRESHOLD = 2.0f; // Y < 2 for dark pixels + const float CHROMA_THRESHOLD = 1.0f; // Co/Cg close to 0 (in ±255 scale) + const float EDGE_ACTIVITY_THRESHOLD = 1.0f; // Mean Sobel magnitude < 1.0 + const float ROW_COL_BLACK_RATIO = 0.999f; // 99.9% of sampled pixels must be black + + *top = 0; + *bottom = 0; + *left = 0; + *right = 0; + + // Detect top letterbox + for (int y = 0; y < height / 4; y++) { + int black_pixel_count = 0; + float total_edge_activity = 0.0f; + int sampled_pixels = 0; + + for (int x = 0; x < width; x += SAMPLE_RATE_HORZ) { + int idx = y * width + x; + + // Use pre-converted YCoCg values (optimization: avoid RGB→YCoCg conversion in loop) + float yval = enc->current_frame_y[idx]; + float co = enc->current_frame_co[idx]; + float cg = enc->current_frame_cg[idx]; + + // Check if pixel is dark and neutral (letterbox bar) + if (yval < Y_THRESHOLD && + fabs(co) < CHROMA_THRESHOLD && + fabs(cg) < CHROMA_THRESHOLD) { + black_pixel_count++; + } + + // Calculate edge activity + total_edge_activity += calculate_sobel_magnitude(enc->current_frame_rgb, + width, height, x, y); + sampled_pixels++; + } + + float black_ratio = (float)black_pixel_count / sampled_pixels; + float mean_edge_activity = total_edge_activity / sampled_pixels; + + // Row is part of letterbox if mostly black AND low edge activity + if (black_ratio > ROW_COL_BLACK_RATIO && + mean_edge_activity < EDGE_ACTIVITY_THRESHOLD) { + *top = y + 1; + } else { + break; // Found content + } + } + + // Detect bottom letterbox + for (int y = height - 1; y >= height * 3 / 4; y--) { + int black_pixel_count = 0; + float total_edge_activity = 0.0f; + int sampled_pixels = 0; + + for (int x = 0; x < width; x += SAMPLE_RATE_HORZ) { + int idx = y * width + x; + + // Use pre-converted YCoCg values (optimization) + float yval = enc->current_frame_y[idx]; + float co = enc->current_frame_co[idx]; + float cg = enc->current_frame_cg[idx]; + + if (yval < Y_THRESHOLD && + fabs(co) < CHROMA_THRESHOLD && + fabs(cg) < CHROMA_THRESHOLD) { + black_pixel_count++; + } + + total_edge_activity += calculate_sobel_magnitude(enc->current_frame_rgb, + width, height, x, y); + sampled_pixels++; + } + + float black_ratio = (float)black_pixel_count / sampled_pixels; + float mean_edge_activity = total_edge_activity / sampled_pixels; + + if (black_ratio > ROW_COL_BLACK_RATIO && + mean_edge_activity < EDGE_ACTIVITY_THRESHOLD) { + *bottom = height - y; + } else { + break; + } + } + + // Detect left pillarbox + for (int x = 0; x < width / 4; x++) { + int black_pixel_count = 0; + float total_edge_activity = 0.0f; + int sampled_pixels = 0; + + for (int y = 0; y < height; y += SAMPLE_RATE_VERT) { + int idx = y * width + x; + + // Use pre-converted YCoCg values (optimization) + float yval = enc->current_frame_y[idx]; + float co = enc->current_frame_co[idx]; + float cg = enc->current_frame_cg[idx]; + + if (yval < Y_THRESHOLD && + fabs(co) < CHROMA_THRESHOLD && + fabs(cg) < CHROMA_THRESHOLD) { + black_pixel_count++; + } + + total_edge_activity += calculate_sobel_magnitude(enc->current_frame_rgb, + width, height, x, y); + sampled_pixels++; + } + + float black_ratio = (float)black_pixel_count / sampled_pixels; + float mean_edge_activity = total_edge_activity / sampled_pixels; + + if (black_ratio > ROW_COL_BLACK_RATIO && + mean_edge_activity < EDGE_ACTIVITY_THRESHOLD) { + *left = x + 1; + } else { + break; + } + } + + // Detect right pillarbox + for (int x = width - 1; x >= width * 3 / 4; x--) { + int black_pixel_count = 0; + float total_edge_activity = 0.0f; + int sampled_pixels = 0; + + for (int y = 0; y < height; y += SAMPLE_RATE_VERT) { + int idx = y * width + x; + + // Use pre-converted YCoCg values (optimization) + float yval = enc->current_frame_y[idx]; + float co = enc->current_frame_co[idx]; + float cg = enc->current_frame_cg[idx]; + + if (yval < Y_THRESHOLD && + fabs(co) < CHROMA_THRESHOLD && + fabs(cg) < CHROMA_THRESHOLD) { + black_pixel_count++; + } + + total_edge_activity += calculate_sobel_magnitude(enc->current_frame_rgb, + width, height, x, y); + sampled_pixels++; + } + + float black_ratio = (float)black_pixel_count / sampled_pixels; + float mean_edge_activity = total_edge_activity / sampled_pixels; + + if (black_ratio > ROW_COL_BLACK_RATIO && + mean_edge_activity < EDGE_ACTIVITY_THRESHOLD) { + *right = width - x; + } else { + break; + } + } + + // Apply symmetric cropping preference and minimum bar size filtering + // Note: During detection phase, no current state available (use 0,0,0,0) + apply_symmetric_cropping(top, right, bottom, left, width, height, 0, 0, 0, 0); + + // Return 1 if any masking was detected + return (*top > 0 || *bottom > 0 || *left > 0 || *right > 0); +} + +// Refine geometry change detection - find exact frame where change occurred +// Uses linear scan to find first frame with new geometry +static int refine_geometry_change(tav_encoder_t *enc, int start_frame, int end_frame, + uint16_t old_top, uint16_t old_right, + uint16_t old_bottom, uint16_t old_left) { + #define GEOMETRY_TOLERANCE 4 // ±4 pixels tolerance + + // Linear scan from start to find first frame with new geometry + for (int i = start_frame; i <= end_frame && i < enc->frame_analyses_count; i++) { + frame_analysis_t *m = &enc->frame_analyses[i]; + + // Check if this frame has different geometry (beyond tolerance) + if (abs((int)m->letterbox_top - (int)old_top) > GEOMETRY_TOLERANCE || + abs((int)m->letterbox_right - (int)old_right) > GEOMETRY_TOLERANCE || + abs((int)m->letterbox_bottom - (int)old_bottom) > GEOMETRY_TOLERANCE || + abs((int)m->letterbox_left - (int)old_left) > GEOMETRY_TOLERANCE) { + return i; // Found the change point + } + } + + return end_frame; // No change found, use end frame + + #undef GEOMETRY_TOLERANCE +} + +// Write all screen masking packets before first frame (similar to SSF-TC subtitles) +// Uses two-stage approach: coarse detection (8-frame stride) + frame-exact refinement +static void write_all_screen_mask_packets(tav_encoder_t *enc, FILE *output) { + if (!enc->enable_letterbox_detect || !enc->two_pass_mode) { + return; // Letterbox detection requires two-pass mode + } + + if (!enc->frame_analyses || enc->frame_analyses_count == 0) { + return; // No analysis data + } + +#define COARSE_STRIDE 16 // Sample every 8 frames for coarse detection +#define CHANGE_THRESHOLD 16 // Require 16+ pixel change to consider geometry change +#define SKIP_INITIAL_FRAMES 60 // Skip first N frames (often black/fade-in) + + // Track current geometry + uint16_t current_top = 0, current_right = 0, current_bottom = 0, current_left = 0; + int packets_written = 0; + int last_checked_frame = SKIP_INITIAL_FRAMES; + + // Stage 1: Coarse scan every COARSE_STRIDE frames to detect geometry changes + for (int i = SKIP_INITIAL_FRAMES; i < enc->frame_analyses_count; i += COARSE_STRIDE) { + frame_analysis_t *metrics = &enc->frame_analyses[i]; + + // Check if geometry changed significantly + int is_first = (packets_written == 0); + int is_significant_change = + abs((int)metrics->letterbox_top - (int)current_top) >= CHANGE_THRESHOLD || + abs((int)metrics->letterbox_right - (int)current_right) >= CHANGE_THRESHOLD || + abs((int)metrics->letterbox_bottom - (int)current_bottom) >= CHANGE_THRESHOLD || + abs((int)metrics->letterbox_left - (int)current_left) >= CHANGE_THRESHOLD; + + if (is_first || is_significant_change) { + // Stage 2: Refine - find exact frame where change occurred + int change_frame; + if (is_first) { + change_frame = 0; // First packet always at frame 0 + } else { + // Search backwards from i to last_checked_frame to find exact change point + change_frame = refine_geometry_change(enc, last_checked_frame, i, + current_top, current_right, + current_bottom, current_left); + } + + // Get geometry from the change frame + frame_analysis_t *change_metrics = &enc->frame_analyses[change_frame]; + + // Apply symmetric cropping to final geometry (with current state for context) + uint16_t final_top = change_metrics->letterbox_top; + uint16_t final_right = change_metrics->letterbox_right; + uint16_t final_bottom = change_metrics->letterbox_bottom; + uint16_t final_left = change_metrics->letterbox_left; + apply_symmetric_cropping(&final_top, &final_right, &final_bottom, &final_left, + enc->width, enc->height, + current_top, current_bottom, current_left, current_right); + + // Emit packet + write_screen_mask_packet(output, change_frame, + final_top, final_right, final_bottom, final_left); + + // Update current geometry + current_top = final_top; + current_right = final_right; + current_bottom = final_bottom; + current_left = final_left; + packets_written++; + + if (enc->verbose) { + printf(" Frame %d: Screen mask t=%u r=%u b=%u l=%u (frame-exact detection)\n", + change_frame, final_top, final_right, final_bottom, final_left); + } + } + + last_checked_frame = i; + } + + if (packets_written > 0) { + printf("Wrote %d screen masking packet(s) (frame-exact detection)\n", packets_written); + } + +#undef COARSE_STRIDE +#undef CHANGE_THRESHOLD +#undef SKIP_INITIAL_FRAMES +} + // Write extended header packet with metadata // Returns the file offset where ENDT value is written (for later update) static long write_extended_header(tav_encoder_t *enc) { @@ -8297,6 +8716,15 @@ static int write_tad_packet_samples(tav_encoder_t *enc, FILE *output, int sample if (!enc->pcm_file || enc->audio_remaining <= 0 || samples_to_read <= 0) { return 0; } + + // Check if we have enough audio for a minimum chunk + // Don't encode if less than minimum - avoids encoding mostly padding/zeros + size_t min_bytes_needed = TAD32_MIN_CHUNK_SIZE * 2 * sizeof(float); + if (enc->audio_remaining < min_bytes_needed) { + enc->audio_remaining = 0; // Mark audio as exhausted + return 0; + } + size_t bytes_to_read = samples_to_read * 2 * sizeof(float); // Stereo Float32LE // Don't read more than what's available @@ -9457,9 +9885,11 @@ static int two_pass_first_pass(tav_encoder_t *enc, const char *input_file) { // Compute metrics frame_analysis_t metrics; - metrics.frame_number = frame_num; compute_frame_metrics(enc, gray, prev_dwt, sub_width, sub_height, ANALYSIS_DWT_LEVELS, &metrics); + // Set frame number AFTER compute_frame_metrics (which does memset) + metrics.frame_number = frame_num; + // Detect scene change using hybrid detector if (frame_num > 0) { metrics.is_scene_change = detect_scene_change_wavelet( @@ -9473,6 +9903,29 @@ static int two_pass_first_pass(tav_encoder_t *enc, const char *input_file) { metrics.is_scene_change = 0; // First frame is always start of first GOP } + // Detect letterbox/pillarbox if enabled + if (enc->enable_letterbox_detect) { + // Set current_frame_rgb temporarily for detection + uint8_t *saved_current = enc->current_frame_rgb; + enc->current_frame_rgb = frame_rgb; + + metrics.has_letterbox = detect_letterbox_pillarbox( + enc, + &metrics.letterbox_top, + &metrics.letterbox_right, + &metrics.letterbox_bottom, + &metrics.letterbox_left + ); + + enc->current_frame_rgb = saved_current; + } else { + metrics.has_letterbox = 0; + metrics.letterbox_top = 0; + metrics.letterbox_right = 0; + metrics.letterbox_bottom = 0; + metrics.letterbox_left = 0; + } + // Store analysis if (enc->frame_analyses_count >= enc->frame_analyses_capacity) { // Expand array @@ -9650,6 +10103,7 @@ int main(int argc, char *argv[]) { {"tad-audio", no_argument, 0, 1028}, {"raw-coeffs", no_argument, 0, 1029}, {"single-pass", no_argument, 0, 1050}, // disable two-pass encoding with wavelet-based scene detection + {"no-letterbox-detect", no_argument, 0, 1051}, // disable letterbox/pillarbox detection {"help", no_argument, 0, '?'}, {0, 0, 0, 0} }; @@ -9880,6 +10334,10 @@ int main(int argc, char *argv[]) { enc->two_pass_mode = 0; printf("Two-pass wavelet-based scene change detection disabled\n"); break; + case 1051: // --no-letterbox-detect + enc->enable_letterbox_detect = 0; + printf("Letterbox/pillarbox detection disabled\n"); + break; case 'a': int bitrate = atoi(optarg); int valid_bitrate = validate_mp2_bitrate(bitrate); @@ -10088,6 +10546,10 @@ int main(int argc, char *argv[]) { write_all_subtitles_tc(enc, enc->output_fp); } + // Write all screen masking packets upfront (before first frame) + // This must be done AFTER first pass analysis completes, so we'll defer it + // to after the two-pass analysis block below + if (enc->output_fps != enc->fps) { printf("Frame rate conversion enabled: %d fps output\n", enc->output_fps); } @@ -10131,6 +10593,9 @@ int main(int argc, char *argv[]) { TEMPORAL_GOP_SIZE, ANALYSIS_GOP_MAX_SIZE); } + // Write all screen masking packets NOW (after first pass analysis) + write_all_screen_mask_packets(enc, enc->output_fp); + printf("\n=== Two-Pass Encoding: Second Pass (Encoding) ===\n"); }