mirror of
https://github.com/curioustorvald/tsvm.git
synced 2026-03-07 19:51:51 +09:00
TAV update: D1 and D1PAL as an acceptable resolution keyword
This commit is contained in:
@@ -1337,7 +1337,7 @@ try {
|
||||
// Start async decode
|
||||
graphics.tavDecodeGopToVideoBufferAsync(
|
||||
compressedPtr, compressedSize, gopSize,
|
||||
header.width, header.height,
|
||||
header.width, decodeHeight, // Use decodeHeight for interlaced field support
|
||||
header.qualityLevel,
|
||||
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
||||
header.channelLayout,
|
||||
@@ -1411,7 +1411,7 @@ try {
|
||||
// Start async decode to ready slot
|
||||
graphics.tavDecodeGopToVideoBufferAsync(
|
||||
compressedPtr, compressedSize, gopSize,
|
||||
header.width, header.height,
|
||||
header.width, decodeHeight, // Use decodeHeight for interlaced field support
|
||||
header.qualityLevel,
|
||||
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
||||
header.channelLayout,
|
||||
@@ -1454,7 +1454,7 @@ try {
|
||||
// Start async decode to decoding slot
|
||||
graphics.tavDecodeGopToVideoBufferAsync(
|
||||
compressedPtr, compressedSize, gopSize,
|
||||
header.width, header.height,
|
||||
header.width, decodeHeight, // Use decodeHeight for interlaced field support
|
||||
header.qualityLevel,
|
||||
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
||||
header.channelLayout,
|
||||
@@ -1822,7 +1822,7 @@ try {
|
||||
|
||||
graphics.tavDecodeGopToVideoBufferAsync(
|
||||
readyGopData.compressedPtr, readyGopData.compressedSize, readyGopData.gopSize,
|
||||
header.width, header.height,
|
||||
header.width, decodeHeight, // Use decodeHeight for interlaced field support
|
||||
header.qualityLevel,
|
||||
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
||||
header.channelLayout,
|
||||
@@ -1931,7 +1931,21 @@ try {
|
||||
const bufferOffset = bufferSlot * SLOT_SIZE
|
||||
|
||||
let uploadStart = sys.nanoTime()
|
||||
graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset)
|
||||
|
||||
// For interlaced: use specialized function that handles field copying and deinterlacing
|
||||
if (isInterlaced) {
|
||||
graphics.uploadInterlacedGopFrameToFramebuffer(
|
||||
currentGopFrameIndex, currentGopSize,
|
||||
header.width, decodeHeight, header.height,
|
||||
trueFrameCount, bufferOffset,
|
||||
prevFieldAddr, currentFieldAddr, nextFieldAddr,
|
||||
CURRENT_RGB_ADDR
|
||||
)
|
||||
} else {
|
||||
// Progressive: upload directly from videoBuffer
|
||||
graphics.uploadVideoBufferFrameToFramebuffer(currentGopFrameIndex, header.width, header.height, trueFrameCount, bufferOffset)
|
||||
}
|
||||
|
||||
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0
|
||||
|
||||
// Update active screen mask for this GOP frame
|
||||
@@ -2041,7 +2055,7 @@ try {
|
||||
if (readyGopData.needsDecode) {
|
||||
graphics.tavDecodeGopToVideoBufferAsync(
|
||||
readyGopData.compressedPtr, readyGopData.compressedSize, readyGopData.gopSize,
|
||||
header.width, header.height,
|
||||
header.width, decodeHeight, // Use decodeHeight for interlaced field support
|
||||
header.qualityLevel,
|
||||
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
||||
header.channelLayout,
|
||||
@@ -2119,7 +2133,7 @@ try {
|
||||
// Start async decode
|
||||
graphics.tavDecodeGopToVideoBufferAsync(
|
||||
overflow.compressedPtr, overflow.compressedSize, overflow.gopSize,
|
||||
header.width, header.height,
|
||||
header.width, decodeHeight, // Use decodeHeight for interlaced field support
|
||||
header.qualityLevel,
|
||||
QLUT[header.qualityY], QLUT[header.qualityCo], QLUT[header.qualityCg],
|
||||
header.channelLayout,
|
||||
|
||||
@@ -6712,6 +6712,82 @@ class GraphicsJSR223Delegate(private val vm: VM) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload interlaced GOP frame from videoBuffer with deinterlacing.
|
||||
* Handles field extraction and temporal deinterlacing for GOP frames.
|
||||
*
|
||||
* @param frameIndex Current frame index in GOP (0-based)
|
||||
* @param gopSize Total number of frames in GOP
|
||||
* @param width Frame width
|
||||
* @param fieldHeight Height of each field (half of display height)
|
||||
* @param fullHeight Full display height (2 * fieldHeight)
|
||||
* @param frameCount Global frame counter for dithering
|
||||
* @param bufferOffset Start offset of GOP in videoBuffer
|
||||
* @param prevFieldAddr Memory address for previous field buffer
|
||||
* @param currentFieldAddr Memory address for current field buffer
|
||||
* @param nextFieldAddr Memory address for next field buffer
|
||||
* @param deinterlaceOutputAddr Memory address for deinterlaced output
|
||||
*/
|
||||
fun uploadInterlacedGopFrameToFramebuffer(
|
||||
frameIndex: Int,
|
||||
gopSize: Int,
|
||||
width: Int,
|
||||
fieldHeight: Int,
|
||||
fullHeight: Int,
|
||||
frameCount: Int,
|
||||
bufferOffset: Long,
|
||||
prevFieldAddr: Long,
|
||||
currentFieldAddr: Long,
|
||||
nextFieldAddr: Long,
|
||||
deinterlaceOutputAddr: Long
|
||||
) {
|
||||
val gpu = (vm.peripheralTable[1].peripheral as GraphicsAdapter)
|
||||
val fieldSize = width * fieldHeight * 3L
|
||||
|
||||
// Copy 3 consecutive fields from videoBuffer to field buffers
|
||||
// Previous field (frame N-1, or N if first frame)
|
||||
val prevFrameIdx = if (frameIndex > 0) frameIndex - 1 else 0
|
||||
val prevFieldOffset = bufferOffset + (prevFrameIdx * fieldSize)
|
||||
UnsafeHelper.memcpyRaw(
|
||||
gpu.videoBuffer,
|
||||
UnsafeHelper.getArrayOffset(gpu.videoBuffer) + prevFieldOffset,
|
||||
null,
|
||||
vm.usermem.ptr + prevFieldAddr,
|
||||
fieldSize
|
||||
)
|
||||
|
||||
// Current field (frame N)
|
||||
val currFieldOffset = bufferOffset + (frameIndex * fieldSize)
|
||||
UnsafeHelper.memcpyRaw(
|
||||
gpu.videoBuffer,
|
||||
UnsafeHelper.getArrayOffset(gpu.videoBuffer) + currFieldOffset,
|
||||
null,
|
||||
vm.usermem.ptr + currentFieldAddr,
|
||||
fieldSize
|
||||
)
|
||||
|
||||
// Next field (frame N+1, or N if last frame)
|
||||
val nextFrameIdx = if (frameIndex < gopSize - 1) frameIndex + 1 else frameIndex
|
||||
val nextFieldOffset = bufferOffset + (nextFrameIdx * fieldSize)
|
||||
UnsafeHelper.memcpyRaw(
|
||||
gpu.videoBuffer,
|
||||
UnsafeHelper.getArrayOffset(gpu.videoBuffer) + nextFieldOffset,
|
||||
null,
|
||||
vm.usermem.ptr + nextFieldAddr,
|
||||
fieldSize
|
||||
)
|
||||
|
||||
// Deinterlace fields into full frame
|
||||
tavDeinterlace(
|
||||
frameCount, width, fieldHeight,
|
||||
prevFieldAddr, currentFieldAddr, nextFieldAddr,
|
||||
deinterlaceOutputAddr, "yadif"
|
||||
)
|
||||
|
||||
// Upload deinterlaced full-height frame
|
||||
uploadRGBToFramebuffer(deinterlaceOutputAddr, width, fullHeight, frameCount, false)
|
||||
}
|
||||
|
||||
// Async GOP decode state
|
||||
private val asyncDecodeComplete = java.util.concurrent.atomic.AtomicBoolean(false)
|
||||
private var asyncDecodeResult: Array<Any>? = null
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
|
||||
CC = gcc
|
||||
CXX = g++
|
||||
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE
|
||||
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE
|
||||
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native
|
||||
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native
|
||||
DBGFLAGS =
|
||||
|
||||
# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
|
||||
|
||||
@@ -1023,6 +1023,52 @@ static void dwt_53_inverse_1d(float *data, int length) {
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// Biorthogonal 2,4 (LeGall 2/4) INVERSE 1D transform
|
||||
static void dwt_bior24_inverse_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(sizeof(float) * length);
|
||||
int half = (length + 1) / 2;
|
||||
int i;
|
||||
|
||||
int nE = half;
|
||||
int nO = length / 2;
|
||||
|
||||
float *even = temp;
|
||||
float *odd = temp + nE;
|
||||
|
||||
// Load L and H
|
||||
for (i = 0; i < nE; i++) {
|
||||
even[i] = data[i];
|
||||
}
|
||||
for (i = 0; i < nO; i++) {
|
||||
odd[i] = data[half + i];
|
||||
}
|
||||
|
||||
// ---- Inverse update: s[i] = s[i] - 0.25*d[i] ----
|
||||
for (i = 0; i < nE; i++) {
|
||||
float d = (i < nO) ? odd[i] : 0.0f;
|
||||
even[i] = even[i] - 0.25f * d;
|
||||
}
|
||||
|
||||
// ---- Inverse predict: o[i] = d[i] + 0.5*s[i] ----
|
||||
for (i = 0; i < nO; i++) {
|
||||
odd[i] = odd[i] + 0.5f * even[i];
|
||||
}
|
||||
|
||||
// Interleave back into output
|
||||
for (i = 0; i < nO; i++) {
|
||||
data[2 * i] = even[i];
|
||||
data[2 * i + 1] = odd[i];
|
||||
}
|
||||
if (nE > nO) {
|
||||
// Trailing even sample for odd length
|
||||
data[2 * nO] = even[nO];
|
||||
}
|
||||
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// Multi-level inverse DWT (matches TSVM exactly with correct non-power-of-2 handling)
|
||||
static void apply_inverse_dwt_multilevel(float *data, int width, int height, int levels, int filter_type) {
|
||||
int max_size = (width > height) ? width : height;
|
||||
@@ -1044,14 +1090,14 @@ static void apply_inverse_dwt_multilevel(float *data, int width, int height, int
|
||||
}
|
||||
|
||||
// Debug: Print dimension sequence
|
||||
static int debug_once = 1;
|
||||
/*static int debug_once = 1;
|
||||
if (debug_once) {
|
||||
fprintf(stderr, "DWT dimension sequence for %dx%d with %d levels:\n", width, height, levels);
|
||||
for (int i = 0; i <= levels; i++) {
|
||||
fprintf(stderr, " Level %d: %dx%d\n", i, widths[i], heights[i]);
|
||||
}
|
||||
debug_once = 0;
|
||||
}
|
||||
}*/
|
||||
|
||||
// TSVM: for (level in levels - 1 downTo 0)
|
||||
// Apply inverse transforms using pre-calculated dimensions
|
||||
|
||||
@@ -2013,13 +2013,13 @@ static int calculate_max_decomp_levels(tav_encoder_t *enc, int width, int height
|
||||
int min_size = (!enc->monoblock) ? TILE_SIZE_Y : (width < height ? width : height);
|
||||
|
||||
// Keep halving until we reach a minimum size (at least 4 pixels)
|
||||
while (min_size >= 8) { // Need at least 8 pixels to safely halve to 4
|
||||
while (min_size >= 16) { // apparently you don't want it to be deep
|
||||
min_size /= 2;
|
||||
levels++;
|
||||
}
|
||||
|
||||
// Cap at a reasonable maximum to avoid going too deep
|
||||
return levels > 10 ? 10 : levels;
|
||||
// Cap at a reasonable maximum to avoid going deep
|
||||
return levels > 6 ? 6 : levels;
|
||||
}
|
||||
|
||||
// Bitrate control functions
|
||||
@@ -2274,6 +2274,16 @@ static int parse_resolution(const char *res_str, int *width, int *height, const
|
||||
*height = 144;
|
||||
return 1;
|
||||
}
|
||||
if (strcmp(res_str, "d1") == 0 || strcmp(res_str, "D1") == 0) {
|
||||
*width = 720;
|
||||
*height = 486;
|
||||
return 1;
|
||||
}
|
||||
if (strcmp(res_str, "d1pal") == 0 || strcmp(res_str, "D1PAL") == 0) {
|
||||
*width = 720;
|
||||
*height = 576;
|
||||
return 1;
|
||||
}
|
||||
if (strcmp(res_str, "half") == 0 || strcmp(res_str, "HALF") == 0) {
|
||||
*width = DEFAULT_WIDTH >> 1;
|
||||
*height = DEFAULT_HEIGHT >> 1;
|
||||
@@ -2447,6 +2457,8 @@ static void show_usage(const char *program_name) {
|
||||
printf("\n\nVideo Size Keywords:");
|
||||
printf("\n -s cif: equal to 352x288");
|
||||
printf("\n -s qcif: equal to 176x144");
|
||||
printf("\n -s d1: equal to 720x486");
|
||||
printf("\n -s d1pal: equal to 720x576");
|
||||
printf("\n -s half: equal to %dx%d", DEFAULT_WIDTH >> 1, DEFAULT_HEIGHT >> 1);
|
||||
printf("\n -s default: equal to %dx%d", DEFAULT_WIDTH, DEFAULT_HEIGHT);
|
||||
printf("\n -s original: use input video's original resolution");
|
||||
@@ -3131,6 +3143,112 @@ static void dwt_haar_forward_1d(float *data, int length) {
|
||||
free(temp);
|
||||
}
|
||||
|
||||
// Biorthogonal 2,4 (LeGall 2/4) FORWARD 1D transform
|
||||
static void dwt_bior24_forward_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(sizeof(float) * length);
|
||||
int half = (length + 1) / 2;
|
||||
int i;
|
||||
|
||||
// Even = low-pass input samples
|
||||
// Odd = high-pass input samples
|
||||
// Use lifting: predict (P) then update (U)
|
||||
|
||||
// Temporary arrays for even and odd parts
|
||||
// even[k] = data[2k]
|
||||
// odd[k] = data[2k+1]
|
||||
int nE = half;
|
||||
int nO = length / 2;
|
||||
|
||||
float *even = temp; // reuse temp for even
|
||||
float *odd = temp + nE; // reuse temp for odd
|
||||
|
||||
// Split into even and odd samples
|
||||
for (i = 0; i < nE; i++) {
|
||||
even[i] = data[2 * i];
|
||||
}
|
||||
for (i = 0; i < nO; i++) {
|
||||
odd[i] = data[2 * i + 1];
|
||||
}
|
||||
|
||||
// ---- Predict step: d[i] = odd[i] - 0.5 * even[i] ----
|
||||
for (i = 0; i < nO; i++) {
|
||||
odd[i] = odd[i] - 0.5f * even[i];
|
||||
}
|
||||
|
||||
// ---- Update step: s[i] = even[i] + 0.25 * d[i] ----
|
||||
for (i = 0; i < nE; i++) {
|
||||
// When odd array has fewer samples (odd length case),
|
||||
// treat missing d value as 0.
|
||||
float d = (i < nO) ? odd[i] : 0.0f;
|
||||
even[i] = even[i] + 0.25f * d;
|
||||
}
|
||||
|
||||
// Now write back in your Haar layout:
|
||||
// [LLLL | HHHH]
|
||||
for (i = 0; i < nE; i++) {
|
||||
data[i] = even[i];
|
||||
}
|
||||
for (i = 0; i < nO; i++) {
|
||||
data[half + i] = odd[i];
|
||||
}
|
||||
// Any leftover slot for odd-length = zero (like Haar)
|
||||
for (i = nO; i < (length - half); i++) {
|
||||
data[half + i] = 0.0f;
|
||||
}
|
||||
|
||||
free(temp);
|
||||
}
|
||||
|
||||
|
||||
// Biorthogonal 2,4 (LeGall 2/4) INVERSE 1D transform
|
||||
static void dwt_bior24_inverse_1d(float *data, int length) {
|
||||
if (length < 2) return;
|
||||
|
||||
float *temp = malloc(sizeof(float) * length);
|
||||
int half = (length + 1) / 2;
|
||||
int i;
|
||||
|
||||
int nE = half;
|
||||
int nO = length / 2;
|
||||
|
||||
float *even = temp;
|
||||
float *odd = temp + nE;
|
||||
|
||||
// Load L and H
|
||||
for (i = 0; i < nE; i++) {
|
||||
even[i] = data[i];
|
||||
}
|
||||
for (i = 0; i < nO; i++) {
|
||||
odd[i] = data[half + i];
|
||||
}
|
||||
|
||||
// ---- Inverse update: s[i] = s[i] - 0.25*d[i] ----
|
||||
for (i = 0; i < nE; i++) {
|
||||
float d = (i < nO) ? odd[i] : 0.0f;
|
||||
even[i] = even[i] - 0.25f * d;
|
||||
}
|
||||
|
||||
// ---- Inverse predict: o[i] = d[i] + 0.5*s[i] ----
|
||||
for (i = 0; i < nO; i++) {
|
||||
odd[i] = odd[i] + 0.5f * even[i];
|
||||
}
|
||||
|
||||
// Interleave back into output
|
||||
for (i = 0; i < nO; i++) {
|
||||
data[2 * i] = even[i];
|
||||
data[2 * i + 1] = odd[i];
|
||||
}
|
||||
if (nE > nO) {
|
||||
// Trailing even sample for odd length
|
||||
data[2 * nO] = even[nO];
|
||||
}
|
||||
|
||||
free(temp);
|
||||
}
|
||||
|
||||
|
||||
// Haar wavelet inverse 1D transform
|
||||
// Reconstructs from averages (low-pass) and differences (high-pass)
|
||||
static void dwt_haar_inverse_1d(float *data, int length) {
|
||||
@@ -11048,8 +11166,8 @@ int main(int argc, char *argv[]) {
|
||||
enc->perceptual_tuning = 0;
|
||||
}
|
||||
|
||||
// disable monoblock mode if either width or height exceeds tie size
|
||||
if (enc->width > TILE_SIZE_X || enc->height > TILE_SIZE_Y) {
|
||||
// disable monoblock mode if either width or height exceeds D1 PAL size
|
||||
if (enc->width > 720 || enc->height > 576) {
|
||||
enc->monoblock = 0;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user