TAV update: D1 and D1PAL as an acceptable resolution keyword

This commit is contained in:
minjaesong
2025-11-23 23:22:14 +09:00
parent 1c7ab17b1c
commit dd60b2c569
5 changed files with 270 additions and 16 deletions

View File

@@ -1337,7 +1337,7 @@ try {
// Start async decode
graphics.tavDecodeGopToVideoBufferAsync(
compressedPtr, compressedSize, gopSize,
header.width, header.height,
header.width, decodeHeight, // Use decodeHeight for interlaced field support
header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout,
@@ -1411,7 +1411,7 @@ try {
// Start async decode to ready slot
graphics.tavDecodeGopToVideoBufferAsync(
compressedPtr, compressedSize, gopSize,
header.width, header.height,
header.width, decodeHeight, // Use decodeHeight for interlaced field support
header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout,
@@ -1454,7 +1454,7 @@ try {
// Start async decode to decoding slot
graphics.tavDecodeGopToVideoBufferAsync(
compressedPtr, compressedSize, gopSize,
header.width, header.height,
header.width, decodeHeight, // Use decodeHeight for interlaced field support
header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout,
@@ -1822,7 +1822,7 @@ try {
graphics.tavDecodeGopToVideoBufferAsync(
readyGopData.compressedPtr, readyGopData.compressedSize, readyGopData.gopSize,
header.width, header.height,
header.width, decodeHeight, // Use decodeHeight for interlaced field support
header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout,
@@ -1931,7 +1931,21 @@ try {
const bufferOffset = bufferSlot * SLOT_SIZE
let uploadStart = sys.nanoTime()
graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset)
// For interlaced: use specialized function that handles field copying and deinterlacing
if (isInterlaced) {
graphics.uploadInterlacedGopFrameToFramebuffer(
currentGopFrameIndex, currentGopSize,
header.width, decodeHeight, header.height,
trueFrameCount, bufferOffset,
prevFieldAddr, currentFieldAddr, nextFieldAddr,
CURRENT_RGB_ADDR
)
} else {
// Progressive: upload directly from videoBuffer
graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset)
}
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
// Update active screen mask for this GOP frame
@@ -2041,7 +2055,7 @@ try {
if (readyGopData.needsDecode) {
graphics.tavDecodeGopToVideoBufferAsync(
readyGopData.compressedPtr, readyGopData.compressedSize, readyGopData.gopSize,
header.width, header.height,
header.width, decodeHeight, // Use decodeHeight for interlaced field support
header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout,
@@ -2119,7 +2133,7 @@ try {
// Start async decode
graphics.tavDecodeGopToVideoBufferAsync(
overflow.compressedPtr, overflow.compressedSize, overflow.gopSize,
header.width, header.height,
header.width, decodeHeight, // Use decodeHeight for interlaced field support
header.qualityLevel,
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
header.channelLayout,

View File

@@ -6712,6 +6712,82 @@ class GraphicsJSR223Delegate(private val vm: VM) {
}
}
/**
* Upload interlaced GOP frame from videoBuffer with deinterlacing.
* Handles field extraction and temporal deinterlacing for GOP frames.
*
* @param frameIndex Current frame index in GOP (0-based)
* @param gopSize Total number of frames in GOP
* @param width Frame width
* @param fieldHeight Height of each field (half of display height)
* @param fullHeight Full display height (2 * fieldHeight)
* @param frameCount Global frame counter for dithering
* @param bufferOffset Start offset of GOP in videoBuffer
* @param prevFieldAddr Memory address for previous field buffer
* @param currentFieldAddr Memory address for current field buffer
* @param nextFieldAddr Memory address for next field buffer
* @param deinterlaceOutputAddr Memory address for deinterlaced output
*/
fun uploadInterlacedGopFrameToFramebuffer(
frameIndex: Int,
gopSize: Int,
width: Int,
fieldHeight: Int,
fullHeight: Int,
frameCount: Int,
bufferOffset: Long,
prevFieldAddr: Long,
currentFieldAddr: Long,
nextFieldAddr: Long,
deinterlaceOutputAddr: Long
) {
val gpu = (vm.peripheralTable[1].peripheral as GraphicsAdapter)
val fieldSize = width * fieldHeight * 3L
// Copy 3 consecutive fields from videoBuffer to field buffers
// Previous field (frame N-1, or N if first frame)
val prevFrameIdx = if (frameIndex > 0) frameIndex - 1 else 0
val prevFieldOffset = bufferOffset + (prevFrameIdx * fieldSize)
UnsafeHelper.memcpyRaw(
gpu.videoBuffer,
UnsafeHelper.getArrayOffset(gpu.videoBuffer) + prevFieldOffset,
null,
vm.usermem.ptr + prevFieldAddr,
fieldSize
)
// Current field (frame N)
val currFieldOffset = bufferOffset + (frameIndex * fieldSize)
UnsafeHelper.memcpyRaw(
gpu.videoBuffer,
UnsafeHelper.getArrayOffset(gpu.videoBuffer) + currFieldOffset,
null,
vm.usermem.ptr + currentFieldAddr,
fieldSize
)
// Next field (frame N+1, or N if last frame)
val nextFrameIdx = if (frameIndex < gopSize - 1) frameIndex + 1 else frameIndex
val nextFieldOffset = bufferOffset + (nextFrameIdx * fieldSize)
UnsafeHelper.memcpyRaw(
gpu.videoBuffer,
UnsafeHelper.getArrayOffset(gpu.videoBuffer) + nextFieldOffset,
null,
vm.usermem.ptr + nextFieldAddr,
fieldSize
)
// Deinterlace fields into full frame
tavDeinterlace(
frameCount, width, fieldHeight,
prevFieldAddr, currentFieldAddr, nextFieldAddr,
deinterlaceOutputAddr, "yadif"
)
// Upload deinterlaced full-height frame
uploadRGBToFramebuffer(deinterlaceOutputAddr, width, fullHeight, frameCount, false)
}
// Async GOP decode state
private val asyncDecodeComplete = java.util.concurrent.atomic.AtomicBoolean(false)
private var asyncDecodeResult: Array<Any>? = null

View File

@@ -3,8 +3,8 @@
CC = gcc
CXX = g++
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native
DBGFLAGS =
# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)

View File

@@ -1023,6 +1023,52 @@ static void dwt_53_inverse_1d(float *data, int length) {
free(temp);
}
// Biorthogonal 2,4 (LeGall 2/4) INVERSE 1D transform
static void dwt_bior24_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(sizeof(float) * length);
int half = (length + 1) / 2;
int i;
int nE = half;
int nO = length / 2;
float *even = temp;
float *odd = temp + nE;
// Load L and H
for (i = 0; i < nE; i++) {
even[i] = data[i];
}
for (i = 0; i < nO; i++) {
odd[i] = data[half + i];
}
// ---- Inverse update: s[i] = s[i] - 0.25*d[i] ----
for (i = 0; i < nE; i++) {
float d = (i < nO) ? odd[i] : 0.0f;
even[i] = even[i] - 0.25f * d;
}
// ---- Inverse predict: o[i] = d[i] + 0.5*s[i] ----
for (i = 0; i < nO; i++) {
odd[i] = odd[i] + 0.5f * even[i];
}
// Interleave back into output
for (i = 0; i < nO; i++) {
data[2 * i] = even[i];
data[2 * i + 1] = odd[i];
}
if (nE > nO) {
// Trailing even sample for odd length
data[2 * nO] = even[nO];
}
free(temp);
}
// Multi-level inverse DWT (matches TSVM exactly with correct non-power-of-2 handling)
static void apply_inverse_dwt_multilevel(float *data, int width, int height, int levels, int filter_type) {
int max_size = (width > height) ? width : height;
@@ -1044,14 +1090,14 @@ static void apply_inverse_dwt_multilevel(float *data, int width, int height, int
}
// Debug: Print dimension sequence
static int debug_once = 1;
/*static int debug_once = 1;
if (debug_once) {
fprintf(stderr, "DWT dimension sequence for %dx%d with %d levels:\n", width, height, levels);
for (int i = 0; i <= levels; i++) {
fprintf(stderr, " Level %d: %dx%d\n", i, widths[i], heights[i]);
}
debug_once = 0;
}
}*/
// TSVM: for (level in levels - 1 downTo 0)
// Apply inverse transforms using pre-calculated dimensions

View File

@@ -2013,13 +2013,13 @@ static int calculate_max_decomp_levels(tav_encoder_t *enc, int width, int height
int min_size = (!enc->monoblock) ? TILE_SIZE_Y : (width < height ? width : height);
// Keep halving until we reach a minimum size (at least 4 pixels)
while (min_size >= 8) { // Need at least 8 pixels to safely halve to 4
while (min_size >= 16) { // apparently you don't want it to be deep
min_size /= 2;
levels++;
}
// Cap at a reasonable maximum to avoid going too deep
return levels > 10 ? 10 : levels;
// Cap at a reasonable maximum to avoid going deep
return levels > 6 ? 6 : levels;
}
// Bitrate control functions
@@ -2274,6 +2274,16 @@ static int parse_resolution(const char *res_str, int *width, int *height, const
*height = 144;
return 1;
}
if (strcmp(res_str, "d1") == 0 || strcmp(res_str, "D1") == 0) {
*width = 720;
*height = 486;
return 1;
}
if (strcmp(res_str, "d1pal") == 0 || strcmp(res_str, "D1PAL") == 0) {
*width = 720;
*height = 576;
return 1;
}
if (strcmp(res_str, "half") == 0 || strcmp(res_str, "HALF") == 0) {
*width = DEFAULT_WIDTH >> 1;
*height = DEFAULT_HEIGHT >> 1;
@@ -2447,6 +2457,8 @@ static void show_usage(const char *program_name) {
printf("\n\nVideo Size Keywords:");
printf("\n -s cif: equal to 352x288");
printf("\n -s qcif: equal to 176x144");
printf("\n -s d1: equal to 720x486");
printf("\n -s d1pal: equal to 720x576");
printf("\n -s half: equal to %dx%d", DEFAULT_WIDTH >> 1, DEFAULT_HEIGHT >> 1);
printf("\n -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT);
printf("\n -s original: use input video's original resolution");
@@ -3131,6 +3143,112 @@ static void dwt_haar_forward_1d(float *data, int length) {
free(temp);
}
// Biorthogonal 2,4 (LeGall 2/4) FORWARD 1D transform
static void dwt_bior24_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(sizeof(float) * length);
int half = (length + 1) / 2;
int i;
// Even = low-pass input samples
// Odd = high-pass input samples
// Use lifting: predict (P) then update (U)
// Temporary arrays for even and odd parts
// even[k] = data[2k]
// odd[k] = data[2k+1]
int nE = half;
int nO = length / 2;
float *even = temp; // reuse temp for even
float *odd = temp + nE; // reuse temp for odd
// Split into even and odd samples
for (i = 0; i < nE; i++) {
even[i] = data[2 * i];
}
for (i = 0; i < nO; i++) {
odd[i] = data[2 * i + 1];
}
// ---- Predict step: d[i] = odd[i] - 0.5 * even[i] ----
for (i = 0; i < nO; i++) {
odd[i] = odd[i] - 0.5f * even[i];
}
// ---- Update step: s[i] = even[i] + 0.25 * d[i] ----
for (i = 0; i < nE; i++) {
// When odd array has fewer samples (odd length case),
// treat missing d value as 0.
float d = (i < nO) ? odd[i] : 0.0f;
even[i] = even[i] + 0.25f * d;
}
// Now write back in your Haar layout:
// [LLLL | HHHH]
for (i = 0; i < nE; i++) {
data[i] = even[i];
}
for (i = 0; i < nO; i++) {
data[half + i] = odd[i];
}
// Any leftover slot for odd-length = zero (like Haar)
for (i = nO; i < (length - half); i++) {
data[half + i] = 0.0f;
}
free(temp);
}
// Biorthogonal 2,4 (LeGall 2/4) INVERSE 1D transform
static void dwt_bior24_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(sizeof(float) * length);
int half = (length + 1) / 2;
int i;
int nE = half;
int nO = length / 2;
float *even = temp;
float *odd = temp + nE;
// Load L and H
for (i = 0; i < nE; i++) {
even[i] = data[i];
}
for (i = 0; i < nO; i++) {
odd[i] = data[half + i];
}
// ---- Inverse update: s[i] = s[i] - 0.25*d[i] ----
for (i = 0; i < nE; i++) {
float d = (i < nO) ? odd[i] : 0.0f;
even[i] = even[i] - 0.25f * d;
}
// ---- Inverse predict: o[i] = d[i] + 0.5*s[i] ----
for (i = 0; i < nO; i++) {
odd[i] = odd[i] + 0.5f * even[i];
}
// Interleave back into output
for (i = 0; i < nO; i++) {
data[2 * i] = even[i];
data[2 * i + 1] = odd[i];
}
if (nE > nO) {
// Trailing even sample for odd length
data[2 * nO] = even[nO];
}
free(temp);
}
// Haar wavelet inverse 1D transform
// Reconstructs from averages (low-pass) and differences (high-pass)
static void dwt_haar_inverse_1d(float *data, int length) {
@@ -11048,8 +11166,8 @@ int main(int argc, char *argv[]) {
enc->perceptual_tuning = 0;
}
// disable monoblock mode if either width or height exceeds tie size
if (enc->width > TILE_SIZE_X || enc->height > TILE_SIZE_Y) {
// disable monoblock mode if either width or height exceeds D1 PAL size
if (enc->width > 720 || enc->height > 576) {
enc->monoblock = 0;
}