From 313122c01ee08d0db5513272d20b65702499a30e Mon Sep 17 00:00:00 2001 From: minjaesong Date: Thu, 28 Aug 2025 13:57:00 +0900 Subject: [PATCH] support for SAMI subtitles; b/i tags for SAMI and SubRip --- assets/disk0/tvdos/bin/playtev.js | 73 ++++++- video_encoder/encoder_tev.c | 348 +++++++++++++++++++++++++++++- 2 files changed, 414 insertions(+), 7 deletions(-) diff --git a/assets/disk0/tvdos/bin/playtev.js b/assets/disk0/tvdos/bin/playtev.js index ab13464..cd22f1a 100644 --- a/assets/disk0/tvdos/bin/playtev.js +++ b/assets/disk0/tvdos/bin/playtev.js @@ -97,6 +97,70 @@ function clearSubtitleArea() { con.color_pair(oldFgColor, oldBgColor) } +function getVisualLength(line) { + // Calculate the visual length of a line excluding formatting tags + let visualLength = 0 + let i = 0 + + while (i < line.length) { + if (i < line.length - 2 && line[i] === '<') { + // Check for formatting tags and skip them + if (line.substring(i, i + 3).toLowerCase() === '' || + line.substring(i, i + 3).toLowerCase() === '') { + i += 3 // Skip tag + } else if (i < line.length - 3 && + (line.substring(i, i + 4).toLowerCase() === '' || + line.substring(i, i + 4).toLowerCase() === '')) { + i += 4 // Skip closing tag + } else { + // Not a formatting tag, count the character + visualLength++ + i++ + } + } else { + // Regular character, count it + visualLength++ + i++ + } + } + + return visualLength +} + +function displayFormattedLine(line) { + // Parse line and handle and tags with color changes + // Default subtitle color: yellow (231), formatted text: white (254) + + let i = 0 + let inBoldOrItalic = false + + while (i < line.length) { + if (i < line.length - 2 && line[i] === '<') { + // Check for opening tags + if (line.substring(i, i + 3).toLowerCase() === '' || + line.substring(i, i + 3).toLowerCase() === '') { + con.color_pair(254, 0) // Switch to white for formatted text + inBoldOrItalic = true + i += 3 + } else if (i < line.length - 3 && + (line.substring(i, i + 4).toLowerCase() === '' || + line.substring(i, i + 4).toLowerCase() === '')) { + con.color_pair(231, 0) // Switch back to yellow for normal text + inBoldOrItalic = false + i += 4 + } else { + // Not a formatting tag, print the character + print(line[i]) + i++ + } + } else { + // Regular character, print it + print(line[i]) + i++ + } + } +} + function displaySubtitle(text, position = 0) { if (!text || text.length === 0) { clearSubtitleArea() @@ -113,7 +177,8 @@ function displaySubtitle(text, position = 0) { // Calculate position based on subtitle position setting let startRow, startCol - let longestLineLength = lines.map(s => s.length).sort().last() + // Calculate visual length without formatting tags for positioning + let longestLineLength = lines.map(s => getVisualLength(s)).sort().last() switch (position) { case 2: // center left @@ -154,7 +219,7 @@ function displaySubtitle(text, position = 0) { case 5: // top right case 6: // center right case 7: // bottom right - startCol = Math.max(1, 78 - line.length) + startCol = Math.max(1, 78 - getVisualLength(line)) break case 0: // bottom center case 4: // top center @@ -166,7 +231,9 @@ function displaySubtitle(text, position = 0) { con.move(row, startCol) // TODO insert half-width pillars to cap the subtitle blocks - print(line) // Unicode-capable print function + + // Parse and display line with formatting tag support + displayFormattedLine(line) } con.color_pair(oldFgColor, oldBgColor) diff --git a/video_encoder/encoder_tev.c b/video_encoder/encoder_tev.c index a7bcbff..08645cb 100644 --- a/video_encoder/encoder_tev.c +++ b/video_encoder/encoder_tev.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -898,6 +899,12 @@ static int srt_time_to_frame(const char *time_str, int fps) { return (int)(total_seconds * fps + 0.5); // Round to nearest frame } +// Convert SAMI milliseconds to frame number +static int sami_ms_to_frame(int milliseconds, int fps) { + double seconds = milliseconds / 1000.0; + return (int)(seconds * fps + 0.5); // Round to nearest frame +} + // Parse SubRip subtitle file static subtitle_entry_t* parse_srt_file(const char *filename, int fps) { FILE *file = fopen(filename, "r"); @@ -1026,6 +1033,335 @@ static subtitle_entry_t* parse_srt_file(const char *filename, int fps) { return head; } +// Strip HTML tags from text but preserve and formatting tags +static char* strip_html_tags(const char *html) { + if (!html) return NULL; + + size_t len = strlen(html); + char *result = malloc(len + 1); + if (!result) return NULL; + + int in_tag = 0; + int out_pos = 0; + int i = 0; + + while (i < len) { + if (html[i] == '<') { + // Check if this is a formatting tag we want to preserve + int preserve_tag = 0; + + // Check for , , , tags + if (i + 1 < len) { + if ((i + 2 < len && strncasecmp(&html[i], "", 3) == 0) || + (i + 3 < len && strncasecmp(&html[i], "", 4) == 0) || + (i + 2 < len && strncasecmp(&html[i], "", 3) == 0) || + (i + 3 < len && strncasecmp(&html[i], "", 4) == 0)) { + preserve_tag = 1; + } + } + + if (preserve_tag) { + // Copy the entire tag + while (i < len && html[i] != '>') { + result[out_pos++] = html[i++]; + } + if (i < len) { + result[out_pos++] = html[i++]; // Copy the '>' + } + } else { + // Skip non-formatting tags + in_tag = 1; + i++; + } + } else if (html[i] == '>') { + in_tag = 0; + i++; + } else if (!in_tag) { + result[out_pos++] = html[i++]; + } else { + i++; + } + } + + result[out_pos] = '\0'; + return result; +} + +// Parse SAMI subtitle file +static subtitle_entry_t* parse_smi_file(const char *filename, int fps) { + FILE *file = fopen(filename, "r"); + if (!file) { + fprintf(stderr, "Failed to open subtitle file: %s\n", filename); + return NULL; + } + + subtitle_entry_t *head = NULL; + subtitle_entry_t *tail = NULL; + char line[2048]; + char *content = NULL; + size_t content_size = 0; + size_t content_pos = 0; + + // Read entire file into memory for easier parsing + while (fgets(line, sizeof(line), file)) { + size_t line_len = strlen(line); + + // Expand content buffer if needed + if (content_pos + line_len + 1 > content_size) { + content_size = content_size ? content_size * 2 : 8192; + char *new_content = realloc(content, content_size); + if (!new_content) { + free(content); + fclose(file); + fprintf(stderr, "Memory allocation failed while parsing SAMI file\n"); + return NULL; + } + content = new_content; + } + + strcpy(content + content_pos, line); + content_pos += line_len; + } + fclose(file); + + if (!content) return NULL; + + // Convert to lowercase for case-insensitive parsing + char *content_lower = malloc(strlen(content) + 1); + if (!content_lower) { + free(content); + return NULL; + } + + for (int i = 0; content[i]; i++) { + content_lower[i] = tolower(content[i]); + } + content_lower[strlen(content)] = '\0'; + + // Find BODY section + char *body_start = strstr(content_lower, "'); + if (!body_start) { + free(content); + free(content_lower); + return NULL; + } + body_start++; + + // Calculate offset in original content + size_t body_offset = body_start - content_lower; + char *body_content = content + body_offset; + + // Parse SYNC tags + char *pos = content_lower + body_offset; + char *original_pos = body_content; + + while ((pos = strstr(pos, " strstr(pos, ">")) { + pos++; + continue; + } + + // Parse start time + start_attr = strchr(start_attr, '='); + if (!start_attr) { + pos++; + continue; + } + start_attr++; + + // Skip whitespace and quotes + while (*start_attr && (*start_attr == ' ' || *start_attr == '"' || *start_attr == '\'')) { + start_attr++; + } + + int start_ms = atoi(start_attr); + if (start_ms < 0) { + pos++; + continue; + } + + // Find end of sync tag + char *sync_end = strchr(pos, '>'); + if (!sync_end) { + pos++; + continue; + } + sync_end++; + + // Find next sync tag or end of body + char *next_sync = strstr(sync_end, ""); + char *text_end = next_sync; + + if (body_end && (!next_sync || body_end < next_sync)) { + text_end = body_end; + } + + if (!text_end) { + // Use end of content + text_end = content_lower + strlen(content_lower); + } + + // Extract subtitle text + size_t text_len = text_end - sync_end; + if (text_len > 0) { + // Get text from original content (not lowercase version) + size_t sync_offset = sync_end - content_lower; + char *subtitle_text = malloc(text_len + 1); + if (!subtitle_text) break; + + strncpy(subtitle_text, content + sync_offset, text_len); + subtitle_text[text_len] = '\0'; + + // Strip HTML tags and clean up text + char *clean_text = strip_html_tags(subtitle_text); + free(subtitle_text); + + if (clean_text && strlen(clean_text) > 0) { + // Remove leading/trailing whitespace + char *start = clean_text; + while (*start && (*start == ' ' || *start == '\t' || *start == '\n' || *start == '\r')) { + start++; + } + + char *end = start + strlen(start) - 1; + while (end > start && (*end == ' ' || *end == '\t' || *end == '\n' || *end == '\r')) { + *end = '\0'; + end--; + } + + if (strlen(start) > 0) { + // Create subtitle entry + subtitle_entry_t *entry = calloc(1, sizeof(subtitle_entry_t)); + if (entry) { + entry->start_frame = sami_ms_to_frame(start_ms, fps); + entry->text = strdup(start); + + // Set end frame to next subtitle start or a default duration + if (next_sync) { + // Parse next sync start time + char *next_start = strstr(next_sync, "start"); + if (next_start) { + next_start = strchr(next_start, '='); + if (next_start) { + next_start++; + while (*next_start && (*next_start == ' ' || *next_start == '"' || *next_start == '\'')) { + next_start++; + } + int next_ms = atoi(next_start); + if (next_ms > start_ms) { + entry->end_frame = sami_ms_to_frame(next_ms, fps); + } else { + entry->end_frame = entry->start_frame + fps * 3; // 3 second default + } + } + } + } else { + entry->end_frame = entry->start_frame + fps * 3; // 3 second default + } + + // Add to list + if (!head) { + head = entry; + tail = entry; + } else { + tail->next = entry; + tail = entry; + } + } + } + } + + free(clean_text); + } + + pos = sync_end; + } + + free(content); + free(content_lower); + return head; +} + +// Detect subtitle file format based on extension and content +static int detect_subtitle_format(const char *filename) { + // Check file extension first + const char *ext = strrchr(filename, '.'); + if (ext) { + ext++; // Skip the dot + if (strcasecmp(ext, "smi") == 0 || strcasecmp(ext, "sami") == 0) { + return 1; // SAMI format + } + if (strcasecmp(ext, "srt") == 0) { + return 0; // SubRip format + } + } + + // If extension is unclear, try to detect from content + FILE *file = fopen(filename, "r"); + if (!file) return 0; // Default to SRT + + char line[1024]; + int has_sami_tags = 0; + int has_srt_format = 0; + int lines_checked = 0; + + while (fgets(line, sizeof(line), file) && lines_checked < 20) { + // Convert to lowercase for checking + char *lower_line = malloc(strlen(line) + 1); + if (lower_line) { + for (int i = 0; line[i]; i++) { + lower_line[i] = tolower(line[i]); + } + lower_line[strlen(line)] = '\0'; + + // Check for SAMI indicators + if (strstr(lower_line, "") || strstr(lower_line, "") || strstr(lower_line, "start=")) { + has_sami_tags = 1; + free(lower_line); + break; + } + + // Check for SRT indicators (time format) + if (strstr(lower_line, "-->")) { + has_srt_format = 1; + } + + free(lower_line); + } + lines_checked++; + } + + fclose(file); + + // Return format based on detection + if (has_sami_tags) return 1; // SAMI + return 0; // Default to SRT +} + +// Parse subtitle file (auto-detect format) +static subtitle_entry_t* parse_subtitle_file(const char *filename, int fps) { + int format = detect_subtitle_format(filename); + + if (format == 1) { + return parse_smi_file(filename, fps); + } else { + return parse_srt_file(filename, fps); + } +} + // Free subtitle list static void free_subtitle_list(subtitle_entry_t *list) { while (list) { @@ -1653,7 +1989,7 @@ static void show_usage(const char *program_name) { printf("Options:\n"); printf(" -i, --input FILE Input video file\n"); printf(" -o, --output FILE Output video file (use '-' for stdout)\n"); - printf(" -s, --subtitles FILE SubRip (.srt) subtitle file\n"); + printf(" -s, --subtitles FILE SubRip (.srt) or SAMI (.smi) subtitle file\n"); printf(" -w, --width N Video width (default: %d)\n", DEFAULT_WIDTH); printf(" -h, --height N Video height (default: %d)\n", DEFAULT_HEIGHT); printf(" -f, --fps N Output frames per second (enables frame rate conversion)\n"); @@ -1686,6 +2022,7 @@ static void show_usage(const char *program_name) { printf(" %s -i input.mp4 -o output.mv2 # Use default setting (q=2)\n", program_name); printf(" %s -i input.avi -f 15 -q 3 -o output.mv2 # 15fps @ q=3\n", program_name); printf(" %s -i input.mp4 -s input.srt -o output.mv2 # With SubRip subtitles\n", program_name); + printf(" %s -i input.mp4 -s input.smi -o output.mv2 # With SAMI subtitles\n", program_name); // printf(" %s -i input.mp4 -b 800 -o output.mv2 # 800 kbps bitrate target\n", program_name); // printf(" %s -i input.avi -f 15 -b 500 -o output.mv2 # 15fps @ 500 kbps\n", program_name); // printf(" %s --test -b 1000 -o test.mv2 # Test with 1000 kbps target\n", program_name); @@ -1842,15 +2179,18 @@ int main(int argc, char *argv[]) { // Load subtitle file if specified printf("Loading subtitles...\n"); if (enc->subtitle_file) { - enc->subtitle_list = parse_srt_file(enc->subtitle_file, enc->fps); + int format = detect_subtitle_format(enc->subtitle_file); + const char *format_name = (format == 1) ? "SAMI" : "SubRip"; + + enc->subtitle_list = parse_subtitle_file(enc->subtitle_file, enc->fps); if (enc->subtitle_list) { enc->has_subtitles = 1; enc->current_subtitle = enc->subtitle_list; if (enc->verbose) { - printf("Loaded subtitles from: %s\n", enc->subtitle_file); + printf("Loaded %s subtitles from: %s\n", format_name, enc->subtitle_file); } } else { - fprintf(stderr, "Failed to parse subtitle file: %s\n", enc->subtitle_file); + fprintf(stderr, "Failed to parse %s subtitle file: %s\n", format_name, enc->subtitle_file); // Continue without subtitles } }