296 Commits

Author SHA1 Message Date
minjaesong
ed63af903b TAV-DT: belief propagation LDPC decoder 2025-12-17 21:25:27 +09:00
minjaesong
c742bc354a TAV-DT: slight format change to allow detailed failure mode inspection 2025-12-17 10:02:17 +09:00
minjaesong
e893ca2df5 TAV-DT: more format revision 2025-12-17 03:49:07 +09:00
minjaesong
ca037c8e74 TAV: encoder vendor string update 2025-12-17 02:09:31 +09:00
minjaesong
1144338059 TAV: different filter for FPS up/downconversion 2025-12-17 02:06:36 +09:00
minjaesong
71dff4b0e0 TAV: size option working same as old encoder 2025-12-17 01:49:07 +09:00
minjaesong
9e1191c0c2 TAV: dead zone defaults 2025-12-16 19:16:47 +09:00
minjaesong
3a19b6aea8 TAV: output FPS conversion 2025-12-16 18:05:12 +09:00
minjaesong
67413f2749 TAV-DT: no Zstd 2025-12-16 10:54:03 +09:00
minjaesong
4929d84cec TAV-DT format revision and soft sync recovery 2025-12-15 17:42:08 +09:00
minjaesong
3d76006ad9 TAV-DT LDPC test 2025-12-15 17:14:52 +09:00
minjaesong
506fcbe79d TAV-DT syncing on damaged stream wip 2025-12-15 02:00:01 +09:00
minjaesong
42341b4e10 TAV-DT multithreaded decoding 2025-12-12 16:26:30 +09:00
minjaesong
b9d9d221dd TAV-DT multithreaded encoding 2025-12-12 16:01:22 +09:00
minjaesong
01a89f3b36 Working TAV-DT encoder/decoder 2025-12-12 08:39:40 +09:00
minjaesong
300f88a44c tav-dt format change 2025-12-11 18:25:25 +09:00
minjaesong
2d6167393a fixing issue where graphics mode 5 failing to display anything 2025-12-09 22:34:56 +09:00
minjaesong
f1d1e36164 tavenc: fixed malformed intra frame packets 2025-12-09 17:36:09 +09:00
minjaesong
50092aef60 tavdec: multithreaded audio decode 2025-12-09 10:44:32 +09:00
minjaesong
017aef26ab tavenc/dec: interlaced mode 2025-12-09 08:40:42 +09:00
minjaesong
efdb915208 tavenc: header support for extended dimensions (max(width,height) > 65535) 2025-12-09 04:59:40 +09:00
minjaesong
5d99191b5e tavenc: proper sync packet after i-frames 2025-12-09 04:44:40 +09:00
minjaesong
621c312922 tavenc: extended header, automatic multithreading, tad autoselect fix 2025-12-09 03:44:35 +09:00
minjaesong
c6c50c2ebe tavenc: multithreaded decoding 2025-12-08 16:07:20 +09:00
minjaesong
34a1f0e3db tavenc: multithreaded encoding 2025-12-08 11:06:03 +09:00
minjaesong
9b72a62cdb tavenc: shelving the tiling mode due to poor performance 2025-12-07 23:42:20 +09:00
minjaesong
9e2c9e6efd tavenc: tiling on uniform 2025-12-07 23:27:07 +09:00
minjaesong
0907e22f53 tavenc: tiling 2025-12-07 15:27:32 +09:00
minjaesong
3828bd7fbc reference tav decoder 2025-12-07 04:58:36 +09:00
minjaesong
189646a8dc libtavenc: fixed gop size 2025-12-07 04:36:19 +09:00
minjaesong
7f951366da tavlib: quant fix 2 2025-12-07 02:51:17 +09:00
minjaesong
a2233aedaf tavlib: quant fix 2025-12-06 08:58:21 +09:00
minjaesong
dad1da741f tav: librarying 2025-12-05 09:21:47 +09:00
minjaesong
94ae24e9e4 tav: librarying 2025-12-05 03:39:32 +09:00
minjaesong
d3cc05789f tav: more dt decoder fix 2025-12-02 20:20:59 +09:00
minjaesong
bc5779d4f5 tav: temporal CDF 5/3 is causing coeff overflow on -q 5, replacing with Haar+sports mode 2025-12-02 16:39:30 +09:00
minjaesong
046fa98025 working TAV-DT decoder 2025-12-02 14:37:36 +09:00
minjaesong
196b9a0c01 TAV-DT encoding/scene detection const sweak 2025-12-02 04:49:21 +09:00
minjaesong
7848357cf2 TAV-DT doc 2025-12-02 00:36:46 +09:00
minjaesong
a3f8be9773 additional noise on post shader 2025-12-01 21:23:56 +09:00
minjaesong
8adf44e5ae more TAV doc 2025-12-01 18:43:37 +09:00
minjaesong
602b42c6ff JS impl tav_inspector 2025-12-01 14:58:32 +09:00
minjaesong
79eb81e8d0 tav: D1 res is 720x480 2025-12-01 01:52:39 +09:00
minjaesong
c522244574 tav: fix: incorrect brightness jumping on MT mode 2025-11-30 23:46:59 +09:00
minjaesong
28b391cfd4 tav: multithread mode progress indicator 2025-11-30 18:19:06 +09:00
minjaesong
26d216ca13 tav: lean memory usage 2025-11-30 14:23:00 +09:00
minjaesong
f598daec1e tav: mt by default 2025-11-30 13:59:39 +09:00
minjaesong
3f8cf6a38c tav: lean memory usage 2025-11-30 13:41:41 +09:00
minjaesong
902d971ae7 tav: first working multithreaded encoder 2025-11-30 13:24:24 +09:00
minjaesong
5ecf2dcadd tav: multithread wip 2025-11-30 09:58:21 +09:00
minjaesong
9edeca929d tav: encoder ENDT fix 2025-11-29 01:58:54 +09:00
minjaesong
3b401139e9 more experiments for avx512 2025-11-26 02:36:49 +09:00
minjaesong
acaade1062 better CRT/Composite shader 2025-11-26 01:03:40 +09:00
minjaesong
e3099195e4 gpu: 5bpp mode 2025-11-25 09:53:53 +09:00
minjaesong
08bb33bf27 TAV: preset implementation 2025-11-24 20:01:08 +09:00
minjaesong
6132012e74 TAV: code cleanup, better preset selection 2025-11-24 11:16:19 +09:00
minjaesong
dbbb471a11 TAV: decomp level 4 for default res is optimal (fast enc/dec, minimal filesize change) 2025-11-24 03:34:08 +09:00
minjaesong
53b87ff735 TAV: avx512 intrinsic 2025-11-24 03:14:29 +09:00
minjaesong
d49ec39b73 TAV: interlaced 3d dwt decoding 2025-11-24 01:04:58 +09:00
minjaesong
dd60b2c569 TAV update: D1 and D1PAL as an acceptable resolution keyword 2025-11-23 23:22:14 +09:00
minjaesong
1c7ab17b1c TAV update: CDF 5/3 for motion coder 2025-11-23 18:16:12 +09:00
minjaesong
e928d2d3ec TAV fix: '--intra-only --interlaced' would cause crash due to buffer size mismatch 2025-11-22 21:11:54 +09:00
minjaesong
5129e354bb TAV: new resolution keyword: original" 2025-11-22 02:25:17 +09:00
minjaesong
be1b92f188 TAV: fixed memory bugs on intra-only coding 2025-11-21 21:58:07 +09:00
minjaesong
2533b2dc19 TAV: videotex mode; TSVM documentation fix 2025-11-21 16:53:35 +09:00
minjaesong
a61a21d28b TAV: lax scene detection algo 2025-11-20 02:42:11 +09:00
minjaesong
92274a8e19 TAV: letterbox detection encoding complete 2025-11-20 02:36:35 +09:00
minjaesong
44cc54264a TAV: letterbox detection 2 2025-11-17 11:47:36 +09:00
minjaesong
8199cbc955 TAV: letterbox detection 2025-11-17 04:26:52 +09:00
minjaesong
aa7e20695d fix: wrong timecode calculation on NTSC framerates 2025-11-16 02:49:03 +09:00
minjaesong
5c87325366 fix: wrong subtitle timecode on certain SRT files 2025-11-15 19:54:53 +09:00
minjaesong
64e100e532 fix: audio and subtitles don't sync up
note: it seems encoder outputs malformed subtitle on Tom Scott video
2025-11-15 14:16:12 +09:00
minjaesong
233f1e7dcd more authentic CRT shader 2025-11-14 17:54:20 +09:00
minjaesong
19f813eb7d TAV: dead code removal 2025-11-13 11:10:39 +09:00
minjaesong
a45a919c84 zfm: size string for gigabytes 2025-11-13 10:35:45 +09:00
minjaesong
9247471bf2 more TAV/TAD documentation update 2025-11-11 21:15:27 +09:00
minjaesong
6add391d07 TAV encoder: vendor string update 2025-11-11 14:54:28 +09:00
minjaesong
b19afdae3a TAV decoder: no need for dequant rounding now 2025-11-11 14:08:35 +09:00
minjaesong
bd530f803f fix: TAV C decoder outputting wrong brightness 2025-11-11 13:28:11 +09:00
minjaesong
901f6b52b4 fix: TAV decomposition level errenously calculating on monoblock when it shouldn't 2025-11-11 10:00:29 +09:00
minjaesong
bff5021a7a fix: EZBC for TAV producing dark bloches on white background due to coeff clipping 2025-11-11 03:22:10 +09:00
minjaesong
9425c58e53 TAV/TAD fix wip 2025-11-11 00:17:51 +09:00
minjaesong
c1d6a959f5 TAV/TAD doc update 2025-11-10 17:01:44 +09:00
minjaesong
edb951fb1a removing obsolete makefile directives 2025-11-10 10:52:25 +09:00
minjaesong
0f5875d45b fix: TAD for TSVM producing distorted audio 2025-11-10 10:43:49 +09:00
minjaesong
0e6f2162c8 TAV and TAD now shares same code for encoding and decoding 2025-11-10 10:35:17 +09:00
minjaesong
28e9a88f8d TAD: EZBC with fixes 2025-11-10 01:45:33 +09:00
minjaesong
3f97f1a59e TAD: embedded zero tree coding (basically 1D EZBC) 2025-11-09 13:34:28 +09:00
minjaesong
c0d1d54bed TAD: more tuning 2025-11-08 02:04:11 +09:00
minjaesong
aa9ecee7ca TAD: pre/de-emphasis 2025-11-07 23:13:08 +09:00
minjaesong
8878d37e5b TAD: pre/de-emphasis 2025-11-07 15:16:35 +09:00
minjaesong
e743fbf3c0 TAD: more tuning 2025-11-07 10:51:01 +09:00
minjaesong
d9d395c62c TAD: psychoacoustically optimised quantisation 2025-11-07 03:40:56 +09:00
minjaesong
00c882aa8d TEV/TAV: SSF-TC impl 2025-11-06 01:18:19 +09:00
minjaesong
af3679921d TAV decoder: now with working audio 2025-11-04 18:22:41 +09:00
minjaesong
332e8760ad TAV decoder: GOP decoding, GOP grain synthesis 2025-11-04 12:00:50 +09:00
minjaesong
c85b007ba9 TAV decoder fix: limited RGB range 2025-11-04 02:10:32 +09:00
minjaesong
61b0bdaed7 doc update 2025-11-04 00:43:14 +09:00
minjaesong
9d98cc1a21 TAV decoder: rewrote to output to file, currently only does I-frames which is NOT a regression from the old code 🤷 2025-11-03 22:49:44 +09:00
minjaesong
76c42f20b3 TAV fix: odd number base quantiser causing luminance flicker on every first GOP frames 2025-11-03 03:11:12 +09:00
minjaesong
e871264ae5 TAV: wip 2025-11-03 02:36:12 +09:00
minjaesong
f3b68e1164 TAV: fixed video luminance errors on -q 4 and 5 2025-10-31 04:41:48 +09:00
minjaesong
755d4deb95 TAV: more various fixes and confirming temporal level 3 is unsuitable 2025-10-31 01:40:02 +09:00
minjaesong
46ad919407 TAD documentation update 2025-10-30 22:13:29 +09:00
minjaesong
c61bf7750f TAV: fix - TAD audio incorrectly decoding due to incorrect step size reconstruction 2025-10-30 22:02:57 +09:00
minjaesong
991d035bcc TAD: working kotlin decoder 2025-10-30 17:08:42 +09:00
minjaesong
480d2d8538 TAV: TAD integration wip 2025-10-30 00:34:15 +09:00
minjaesong
4a6edeca09 TAD: imma just finalise it here 2025-10-29 23:59:08 +09:00
minjaesong
692defdbb8 TAD: more slight dithering 2025-10-29 03:51:49 +09:00
minjaesong
ee2ddef1c1 TAD: coefficient dithering on decoder 2025-10-29 03:19:36 +09:00
minjaesong
999e1deda0 TAD: coefficient dithering 2025-10-29 02:52:09 +09:00
minjaesong
a67d8b5f08 TAD: auto filename selection 2025-10-29 02:11:04 +09:00
minjaesong
f06f339d99 TAD: bringing coeff weight back 2025-10-29 01:47:14 +09:00
minjaesong
86864c4b7a TAD: somehow removing entropy coding yields better compression? 2025-10-28 04:19:11 +09:00
minjaesong
86de627734 TAD: back to twobitmap 2025-10-28 04:04:41 +09:00
minjaesong
c6de68291d TAD: quantised value stats 2025-10-28 03:15:15 +09:00
minjaesong
b3a91bf6cb makefile: zstd probing for macOS 2025-10-27 09:42:06 +09:00
minjaesong
1d0f369827 TAD: arbitrary steps with bitplanes 2025-10-27 09:14:52 +09:00
minjaesong
9c27d114fc TAD: even the slight companding vastly improves low-volume samples but also increases encoded size by a lot 2025-10-27 01:22:36 +09:00
minjaesong
67f7c091eb TAD: better compression using bitmap and delta prediction 2025-10-26 20:30:28 +09:00
minjaesong
370d511f44 TAD: better bit allocation using statistics 2025-10-26 18:16:28 +09:00
minjaesong
9fcb7fc95c TAD: more wip 2025-10-26 02:49:39 +09:00
minjaesong
52f25f7d04 TAV: two-pass GOP slicer 2025-10-25 00:01:37 +09:00
minjaesong
69583e5f1e TAV: frame statistics for 3D-DWT mode 2025-10-24 18:38:50 +09:00
minjaesong
56a1bac19a TAV: video with TAD audio playback 2025-10-24 18:10:53 +09:00
minjaesong
3adc50365b TAV: TAD encoding 2025-10-24 17:05:16 +09:00
minjaesong
cd88885fbf TAD: kotlin side update 2025-10-24 09:19:55 +09:00
minjaesong
9dc71095a0 TAD: now processing entirely in float 2025-10-24 09:12:28 +09:00
minjaesong
a9319fd812 TAD: Terrarum Advanced Audio to use with video compression 2025-10-24 02:06:52 +09:00
minjaesong
6f669f4fd9 TAV: hopefully more steady playback 2025-10-23 16:01:23 +09:00
minjaesong
e147072d03 removing obsolete files 2025-10-23 10:09:11 +09:00
minjaesong
53da0bfcee TAV: fix: iframes not decoding 2025-10-23 09:26:58 +09:00
minjaesong
34427d61d7 TAV: minimal size for GOP 2025-10-23 01:05:46 +09:00
minjaesong
7f7222fe54 TAV: fix initial GOP skipping 2025-10-22 19:31:10 +09:00
minjaesong
4265891093 TAV: pcm8 audio 2025-10-22 10:43:47 +09:00
minjaesong
758b134abd TAV: experimental separate audio format mode 2025-10-22 09:33:15 +09:00
minjaesong
4eec98cdca TAV: half-fixed 3d dwt playback 2025-10-22 01:32:19 +09:00
minjaesong
9ac0424be3 TAV: double buffered playback 2025-10-21 16:17:00 +09:00
minjaesong
f0ad0ef034 TAV: EZBC on the header 2025-10-20 18:53:40 +09:00
minjaesong
9553b281af TAV: EZBC entropy coding 2025-10-20 18:36:20 +09:00
minjaesong
019f0aaed5 TAV: trying mpeg-style mocomp 2025-10-19 17:56:06 +09:00
minjaesong
120058be6d TAV: some more mocomp shit 2025-10-18 05:47:17 +09:00
minjaesong
3b9e02b17f TAV: will replace frame aligning with something else, or maybe with nothing 2025-10-17 06:48:21 +09:00
minjaesong
93622fc8ca TAV: 3D DWT makes coherent picture at least 2025-10-17 02:01:08 +09:00
minjaesong
0cf1173dd6 TAV: iS tHiS aN iMpRoVeMeNt¿ 2025-10-16 09:37:20 +09:00
minjaesong
cc2f3e4d57 tav_inspector update 2025-10-16 04:39:47 +09:00
minjaesong
e179a15f33 TAV: more experiments 2025-10-16 02:35:53 +09:00
minjaesong
e19af854dc TAV: Haar delta decoding 2025-10-16 01:39:51 +09:00
minjaesong
ea72dec996 TAV: still bugfixing 2025-10-16 00:03:58 +09:00
minjaesong
7e248bc83d TAV: experimental 3D DWT encoder 2025-10-15 16:04:27 +09:00
minjaesong
b40b2ff0a1 TAV: much better delta coding 2025-10-15 00:20:46 +09:00
minjaesong
5dcf2177d5 TAV: no delta coding by default (but allows skip coding) 2025-10-14 14:16:05 +09:00
minjaesong
871d7bcdfe TAV: packet inspector 2025-10-14 01:25:16 +09:00
minjaesong
4c48d761b9 TAV: extended header spec 2025-10-14 00:36:39 +09:00
minjaesong
94749a3ad6 TAV: timecode packets 2025-10-14 00:27:26 +09:00
minjaesong
e705d274de TAV decoding minor changes 2025-10-13 01:42:44 +09:00
minjaesong
222b9866a8 fix: RGB upload function broken for non-native size 2025-10-10 00:11:51 +09:00
minjaesong
0b7b8cdd35 fix: incorrect MMIO addressing of FB3/4 2025-10-09 20:55:14 +09:00
minjaesong
31457974be more refactoring 2025-10-09 16:50:49 +09:00
minjaesong
0284b37678 graphics mode 5, faster RGB upload 2025-10-09 16:44:41 +09:00
minjaesong
912e35a122 graphics mode 5 2025-10-09 16:01:26 +09:00
minjaesong
78a7cdc08f minor fix 2025-10-09 01:51:03 +09:00
minjaesong
1a072f6a0c tav: grain synthesis on the spec 2025-10-09 00:01:53 +09:00
minjaesong
17b5063ef0 tav player: seek and pause 2025-10-08 21:25:33 +09:00
minjaesong
9826efd98a tav: int maths on still frame detection 2025-10-08 16:41:42 +09:00
minjaesong
f980f23efe tav: just more stat numbers 2025-10-08 16:17:14 +09:00
minjaesong
67445b040c tav: skip frame coding 2025-10-08 11:09:06 +09:00
minjaesong
d08511a39d film grain effect to alleviate 3d scene with low-res texture look 2025-10-08 02:07:29 +09:00
minjaesong
f918cd429c REWIND command for serial device 2025-10-07 23:43:24 +09:00
minjaesong
769b6481da HSDPA supporting file larger than 2GB 2025-10-07 23:33:29 +09:00
minjaesong
00e390d879 playtav: cue navigation using arrow keys 2025-10-07 21:41:37 +09:00
minjaesong
7474a9d472 proper UCF writer 2025-10-07 21:07:10 +09:00
minjaesong
abce002cdd testdiskdrive now uses input stream which enables I/O on files larger than 2GB 2025-10-07 19:54:43 +09:00
minjaesong
e36d4041ce TAV: subtitle font handling 2025-10-07 18:14:07 +09:00
minjaesong
da084c0074 TAV: interlaced mode 2025-10-07 17:51:47 +09:00
minjaesong
3c9441e67f TAV: less aggressive deadzonning 2025-10-07 15:27:55 +09:00
minjaesong
581b270c31 TAV: interlacing for backwards compat 2025-10-07 14:42:34 +09:00
minjaesong
fd62df99a4 TAV: yet another preset change 2025-10-07 04:36:57 +09:00
minjaesong
ad232d1c84 TAV: twobitmap for better compression 2025-10-07 03:55:56 +09:00
minjaesong
0b066a693e visualiser update 2025-10-07 02:55:53 +09:00
minjaesong
cdec0fe020 TAV: even more preset changes 2025-10-07 02:27:15 +09:00
minjaesong
21e3fe4c1e TAV: default Zstd level to 9 for faster encoding 2025-10-06 20:47:53 +09:00
minjaesong
364e33eede better hangul print 2025-10-06 20:43:45 +09:00
minjaesong
5d4e775ad0 TAV: default quality and zstd level change 2025-10-06 20:43:35 +09:00
minjaesong
f7d98e74e3 quantisation deadzonning (massive compression gain) 2025-10-05 23:35:09 +09:00
minjaesong
d6019019dc filter preset change 2025-10-05 19:17:20 +09:00
minjaesong
6c83c925cb terranmon.txt update 2025-10-05 18:53:31 +09:00
minjaesong
5c3a3a112c new: unicode.visualStrlen 2025-10-04 22:39:17 +09:00
minjaesong
71102f1d70 fixing hangul print functions 2025-10-04 01:10:14 +09:00
minjaesong
6222e9d8bd revived unicode print function 2025-10-03 23:28:23 +09:00
minjaesong
60e0ff9e61 fix: font rom not uploading 2025-10-03 22:42:42 +09:00
minjaesong
7d61074a13 Faster ICtCp colour conversion using extensive LUT 2025-10-03 14:50:52 +09:00
minjaesong
5f14169e6b encoder preset change (more quality) 2025-10-03 14:41:23 +09:00
minjaesong
29b3da1dbc ICtCp float math 2025-10-03 02:00:14 +09:00
minjaesong
27ad3361ea UCF reading and writing 2025-10-02 23:49:57 +09:00
minjaesong
d4fae0071b tav: ictcp decoding fix 2025-10-02 20:20:11 +09:00
minjaesong
0666734c9d tav: --dump-frame 2025-10-01 22:54:12 +09:00
minjaesong
f1cad6d9fa tav: coefficient dump visualiser 2025-10-01 22:48:57 +09:00
minjaesong
027d3289ca media player gui wip 2025-10-01 20:25:10 +09:00
minjaesong
b9f38dfa08 TAV: revised psychovisual model 2025-10-01 12:21:04 +09:00
minjaesong
3e40b048a7 TAV: allowing multi-title if video is larger than default size 2025-10-01 09:32:34 +09:00
minjaesong
70dfc7bf13 TAV: better initial q-index prediction for target bitrate mode 2025-10-01 01:53:25 +09:00
minjaesong
8c7550e581 TAV: removed quantiser halving for non-CDF9/7 filters 2025-10-01 01:31:54 +09:00
minjaesong
ff6821eb55 better preservation of high frequency diagonals 2025-10-01 01:07:05 +09:00
minjaesong
4e219d1a71 much better bitrate control 2025-09-30 17:53:46 +09:00
minjaesong
8688fc3742 better bitrate control 2025-09-30 17:05:15 +09:00
minjaesong
0f784eb741 bitrate mode wip 2025-09-30 16:06:58 +09:00
minjaesong
7f22ec8cc7 quality preset change 2025-09-30 03:19:58 +09:00
minjaesong
b457be4bbf minor fix 2025-09-30 01:45:15 +09:00
minjaesong
41a8b578b5 Apparently you can push the chroma extremely far 2025-09-30 01:05:14 +09:00
minjaesong
836e69a40b CoCg-only channel handling 2025-09-29 22:16:44 +09:00
minjaesong
8b808ca297 TAV sharpen filters 2025-09-29 21:38:23 +09:00
minjaesong
7608e7433a TAV: channel layouts 2025-09-29 16:34:08 +09:00
minjaesong
907cc37b01 TAV: more format doc updates 2025-09-29 14:52:37 +09:00
minjaesong
1d3d218238 TAV: channel-concatenated coeffs preprocessing 2025-09-29 14:42:52 +09:00
minjaesong
5012ca4085 TAV: decompression done on GPU 2025-09-29 01:35:19 +09:00
minjaesong
66909537a0 TAV: improved compression using some coefficient preprocessing 2025-09-29 01:17:53 +09:00
minjaesong
01278815c7 added CLAUDE.md 2025-09-29 00:27:32 +09:00
minjaesong
65a01f36a4 more encoder param tuning 2025-09-28 20:28:11 +09:00
minjaesong
6ff634cc12 more wavelets for experimentation 2025-09-28 08:55:15 +09:00
minjaesong
d85f8002cc perceptual model copied to TAV decoder_tav.c 2025-09-27 00:50:25 +09:00
minjaesong
c50d015515 TAV decoder for ffmpeg/ffplay 2025-09-27 00:43:20 +09:00
minjaesong
efab1c3a88 TAV: more documentation 2025-09-25 18:27:46 +09:00
minjaesong
4d9981ec23 TAV: now writes encoder q-value to the header 2025-09-25 18:15:53 +09:00
minjaesong
ca18595134 TAV: GOP length adjustment 2025-09-25 16:20:50 +09:00
minjaesong
b4e9d84f5f TAV decoder visual "hack" (increasing acutance) 2025-09-25 15:49:58 +09:00
minjaesong
e2dd0744d2 simpler method of delta coding 2025-09-25 00:48:20 +09:00
minjaesong
2b59d5dd8d Revert "predictive delta encoding wip"
This reverts commit 21fd10d2 but introduces changes from d117b15e
2025-09-25 00:43:03 +09:00
minjaesong
bb3f715ad6 spatial delta prediction 2025-09-25 00:34:39 +09:00
minjaesong
699c6394a1 multiframe-based delta prediction 2025-09-24 23:39:34 +09:00
minjaesong
d117b15e0f better NTSC framerate handling 2025-09-24 21:53:01 +09:00
minjaesong
5564fa5c9b predictive delta encoding with dithering 2025-09-24 21:37:20 +09:00
minjaesong
21fd10d2b6 predictive delta encoding wip 2025-09-24 16:55:23 +09:00
minjaesong
338a0b2e5d tev/tav spec update 2025-09-24 02:43:26 +09:00
minjaesong
0b3497b013 tav playing concatenated video streams (fixed) 2025-09-24 00:41:49 +09:00
minjaesong
a9ba57c09a tav playing concatenated video streams 2025-09-22 23:40:33 +09:00
minjaesong
05101ecd08 turns out extra sync packet at the end of video was a terrible idea 2025-09-22 22:38:32 +09:00
minjaesong
e001445095 more perceptual optimisation 2025-09-22 14:45:59 +09:00
minjaesong
4851f61c56 how far can i push the chroma subquantisation? 2025-09-22 10:37:33 +09:00
minjaesong
be43384968 resurrecting delta encoding 2025-09-22 02:47:46 +09:00
minjaesong
28624309d7 even more psychovisual model 2025-09-22 01:01:15 +09:00
minjaesong
3584520ff9 more psychovisual model 2025-09-22 00:03:45 +09:00
minjaesong
613b8a97dd still working on the psychovisual model 2025-09-21 21:13:39 +09:00
minjaesong
5d48cc62eb still working on the psychovisual model 2025-09-21 01:36:31 +09:00
minjaesong
206e43a308 TAV: first working psychovisual tuning 2025-09-20 11:15:04 +09:00
minjaesong
d3a18c081a TAV: base code for adding psychovisual model 2025-09-20 02:02:59 +09:00
minjaesong
c14b692114 --arate option to override audio bitrate 2025-09-18 01:28:02 +09:00
minjaesong
89aada888d i hate ntsc framerate 2025-09-18 00:57:49 +09:00
minjaesong
b373471629 fix: framerate conversion not working as it should for TAV 2025-09-18 00:54:23 +09:00
minjaesong
35f1a0c2f2 monoblock TAV 2025-09-18 00:37:44 +09:00
minjaesong
f4b03b55b6 monoblock TAV 2025-09-17 23:36:37 +09:00
minjaesong
8279b15b43 TAV using subtitle parsing of TEV 2025-09-17 09:37:00 +09:00
minjaesong
9652143d93 more code cleanup 2025-09-17 01:24:05 +09:00
minjaesong
89e8fc39ce final encoder code cleanup 2025-09-17 00:55:23 +09:00
minjaesong
9ca575eee4 show_usage change 2025-09-16 23:11:31 +09:00
minjaesong
953de6feb6 Merge wavelet_video branch: Add TAV encoder with audio buffer management and NTSC frame duplication
- Implemented TAV (TSVM Advanced Video) encoder with DWT-based compression
- Added sophisticated audio buffer deficit tracking system ported from TEV
- Fixed NTSC frame duplication ghosting by emitting extra sync packets
- Resolved merge conflicts in GraphicsJSR223Delegate.kt

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-16 23:05:20 +09:00
minjaesong
ae59946883 ntsc framerate handling 2025-09-16 22:59:45 +09:00
minjaesong
a639e116c5 TEV-like statistics 2025-09-16 22:40:58 +09:00
minjaesong
9e8aeeb112 audio handling 2025-09-16 22:23:31 +09:00
minjaesong
47f93194a7 p-frame for tav 2025-09-16 18:57:11 +09:00
minjaesong
be193269d8 280x224 macrotile 2025-09-16 16:18:01 +09:00
minjaesong
391adffad4 encoder optimisation 2025-09-16 15:20:28 +09:00
minjaesong
dab56ee55d decoder optimisation 2025-09-16 14:46:56 +09:00
minjaesong
3011c73168 code cleanup 2025-09-16 10:26:03 +09:00
minjaesong
a5da200507 wavelet deblocking using simulated overlapping tiles 2025-09-16 10:03:17 +09:00
minjaesong
54f335e3de working wavelet encoder 2025-09-16 02:51:00 +09:00
minjaesong
4bb234a89b wip 2025-09-15 23:47:28 +09:00
minjaesong
4c0a282de7 removing any reference on interlaced 2025-09-15 19:14:51 +09:00
minjaesong
113c01b851 112x112 blocks for TAV, which greatly improves the encoding speed 2025-09-15 19:08:53 +09:00
minjaesong
1343dd10cf TAV with ICtCp colour space 2025-09-15 16:35:44 +09:00
minjaesong
b497570a3b using "correct" colourimetry
(cherry picked from commit ded609e65e)
2025-09-15 13:00:58 +09:00
minjaesong
9f901681a6 first working version 2025-09-15 12:56:42 +09:00
minjaesong
ded609e65e using "correct" colourimetry 2025-09-15 10:14:44 +09:00
minjaesong
34cf5cb591 ICtCp colour space impl 2025-09-15 09:52:23 +09:00
minjaesong
9c2aa96b73 ICtCp colour space wip 2025-09-15 02:12:16 +09:00
minjaesong
d446a4e2f5 wip6 2025-09-14 22:26:02 +09:00
minjaesong
db57516a46 wip5 2025-09-13 23:06:31 +09:00
minjaesong
712506c91c wip4 2025-09-13 22:02:56 +09:00
minjaesong
722e8e893f wip3 2025-09-13 15:24:32 +09:00
minjaesong
dca09cf4a3 wip2 2025-09-13 13:32:14 +09:00
minjaesong
62d6ee94cf tav wip 2025-09-13 13:28:01 +09:00
minjaesong
198e951102 various encoder bug fixes 2025-09-13 00:39:12 +09:00
minjaesong
1f5f72733a dead code pruning 2025-09-12 19:13:12 +09:00
minjaesong
957522a460 Knusperli-esque post deblocking filter 2025-09-12 14:32:12 +09:00
minjaesong
433e3ea3ae Impl Knusperli deblocking decoding
https://github.com/google/knusperli
2025-09-12 13:50:37 +09:00
minjaesong
dc223fe00b encoder example text update 2025-09-12 09:24:41 +09:00
minjaesong
190cb130bf two pass coding for Knusperli deblocking 2025-09-11 00:23:45 +09:00
minjaesong
29907ec357 TEV doc update 2025-09-10 02:08:10 +09:00
minjaesong
8601f614b4 optimised tevDecode 2025-09-10 01:49:32 +09:00
minjaesong
3f9747ebf0 optimised deinterlacing 2025-09-10 01:33:31 +09:00
minjaesong
c498526a90 uploadRGBToFramebuffer optimisation with bulk mem access 2025-09-10 01:21:16 +09:00
minjaesong
5a5ac8ef74 complexity calculation now considers chroma 2025-09-09 19:02:15 +09:00
minjaesong
9f7a4ef2e7 tev rate factor impl fix 2025-09-09 10:07:52 +09:00
minjaesong
3495dfca5e tev slight optimisation 2025-09-09 09:47:56 +09:00
minjaesong
cf1ee80aa1 fixing another stupid bug 2025-09-09 00:11:45 +09:00
minjaesong
b80d5b858a encoder complexity function update 2025-09-08 23:58:25 +09:00
97 changed files with 40805 additions and 4667 deletions

2
.gitignore vendored
View File

@@ -9,6 +9,8 @@ buildapp/out/TerranBASIC*
buildapp/TerranBASIC_linux.*
buildapp/TerranBASIC_macOS.*
buildapp/TerranBASIC_windows.*
*.o
*.a
# Java native errors
hs_err_pid*

389
CLAUDE.md Normal file
View File

@@ -0,0 +1,389 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Overview
**tsvm** is a virtual machine that mimics 8-bit era computer architecture and runs programs written in JavaScript. The project includes:
- The virtual machine core
- Reference BIOS implementation
- TVDOS (operating system)
- Videotron2K video display controller emulator
- TerranBASIC integration
- Multiple platform build system
## Architecture
### Core Components
- **tsvm_core/**: Core virtual machine implementation in Kotlin
- `VM.kt`: Main virtual machine class with memory management and peripheral slots
- `peripheral/`: Hardware peripherals (graphics adapters, disk drives, TTY, audio, etc.)
- `vdc/`: Videotron2K video display controller
- Various delegates for JavaScript integration via GraalVM
- **tsvm_executable/**: Main emulator application
- `VMGUI.kt`: LibGDX-based GUI implementation
- `TsvmEmulator.java`: Main application entry point
- Menu systems for configuration, audio, memory management
- **TerranBASICexecutable/**: TerranBASIC interpreter application
- `TerranBASIC.java`: Entry point for BASIC interpreter
- `VMGUI.kt`: GUI for BASIC environment
### Key Technologies
- **Kotlin/Java**: Primary implementation language
- **LibGDX**: Graphics and windowing framework
- **GraalVM**: JavaScript execution engine for running programs in the VM
- **LWJGL**: Native library bindings
- **IntelliJ IDEA**: Development environment (*.iml module files)
### Virtual Hardware
The VM emulates various peripherals through the `peripheral/` package:
- Graphics adapters with different capabilities
- Disk drives (including TevdDiskDrive for custom disk format)
- TTY terminals and character LCD displays
- Audio devices and MP2 audio environment
- Network modems and serial interfaces
- Memory management units
## Build and Development
### Building Applications
Use the build scripts in `buildapp/`:
- `build_app_linux_x86.sh` - Linux x86_64 AppImage
- `build_app_linux_arm.sh` - Linux ARM64 AppImage
- `build_app_mac_x86.sh` - macOS Intel
- `build_app_mac_arm.sh` - macOS Apple Silicon
- `build_app_windows_x86.sh` - Windows x86
### Prerequisites
1. Download JDK 17 runtimes to `~/Documents/openjdk/*` with specific naming:
- `jdk-17.0.1-x86` (Linux AMD64)
- `jdk-17.0.1-arm` (Linux Aarch64)
- `jdk-17.0.1-windows` (Windows AMD64)
- `jdk-17.0.1.jdk-arm` (macOS Apple Silicon)
- `jdk-17.0.1.jdk-x86` (macOS Intel)
2. Run `jlink` commands to create custom Java runtimes in `out/runtime-*` directories
### Development Commands
- **Build JAR**: Use IntelliJ IDEA build system to compile modules
- **Run Emulator**: Execute `TsvmEmulator.java` main method or use built JAR
- **Run TerranBASIC**: Execute `TerranBASIC.java` main method
- **Package Apps**: Run appropriate build script from `buildapp/` directory
### Assets and File System
- `assets/disk0/`: Virtual disk content including TVDOS system files
- `assets/bios/`: BIOS ROM files and implementations
- `My_BASIC_Programs/`: Example BASIC programs for testing
- TVDOS filesystem uses custom format with specialised drivers
## Videotron2K
The Videotron2K is a specialised video display controller with:
- Assembly-like programming language
- 6 general registers (r1-r6) and special registers (tmr, frm, px, py, c1-c6)
- Scene-based programming model
- Drawing commands (plot, fillin, goto, fillscr)
- Conditional execution with postfixes (zr, nz, gt, ls, ge, le)
Programs are structured with SCENE blocks and executed with perform commands.
## Memory Management
- VM supports up to USER_SPACE_SIZE memory
- 64-byte malloc units with reserved blocks
- Peripheral slots (1-8 configurable)
- Memory-mapped I/O for peripheral access
- JavaScript programs run in sandboxed GraalVM context
### Peripheral Memory Addressing
Peripheral memories can be accessed using `vm.peek()` and `vm.poke()` functions, which takes absolute address.
- Peripherals take up negative number of the memory space, and their addressing is in backwards (e.g. Slot 1 starts at -1048577 and ends at -2097152)
- Peripherals take up two memory regions: MMIO area and Memory Space area; MMIO is accessed by PeriBase (and its children) using `mmio_read()` and `mmio_write()`, and the Memory Space is accessed using `peek()` and `poke()`.
- Peripheral at slot *n* takes following addresses
1. MMIO area (-131072×n)-1 to -131072×(n+1)
2. Memory Space area -(1048576×n)-1 to (-1048576×(n+1))
## Testing
- Use example programs in `My_BASIC_Programs/` for BASIC testing
- JavaScript test programs available in `assets/disk0/`
- Videotron2K assembly examples in documentation
## Notes
- The 'gzip' namespace in TSVM's JS programs is a misnomer: the actual 'gzip' functions (defined in CompressorDelegate.kt) call Zstd functions.
## TVDOS
### TVDOS Movie Formats
#### Legacy iPF Format
- Format documentation on `terranmon.txt` (search for "TSVM MOV file format" and "TSVM Interchangeable Picture Format (aka iPF Type 1/2)")
- Video Encoder implementation on `assets/disk0/tvdos/bin/encodemov.js` (iPF Format 1 and 2) and `assets/disk0/tvdos/bin/encodemov2.js` (iPF Format 1-delta)
- Actual encoding/decoding code is in `GraphicsJSR223Delegate.kt`
- Audio uses standard MP2
#### TEV Format (TSVM Enhanced Video)
- **Modern video codec** optimized for TSVM hardware with 60-80% better compression than iPF
- **C Encoder**: `video_encoder/encoder_tev.c` - Hardware-accelerated encoder with motion compensation and DCT
- How to build: `make clean && make`
- **Rate Control**: Supports both quality mode (`-q 0-4`) and bitrate mode (`-b N` kbps)
- **JS Decoder**: `assets/disk0/tvdos/bin/playtev.js` - Native decoder for TEV format playback
- How to build: `must be done manually by the user; the TSVM is not machine-interactable`
- **Hardware accelerated decoding**: Extended GraphicsJSR223Delegate.kt with TEV functions:
- `tevDecode()` - The main decoding function (now accepts rate control factor)
- `tevIdct8x8()` - Fast 8×8 DCT transforms
- `tevMotionCopy8x8()` - Sub-pixel motion compensation
- **Features**:
- 16×16 DCT blocks (vs 4×4 in iPF) for better compression
- Motion compensation with ±8 pixel search range
- YCoCg-R 4:2:0 Chroma subsampling (more aggressive quantisation on Cg channel)
- Full 8-Bit RGB colour for increased visual fidelity, rendered down to TSVM-compliant 4-Bit RGB with dithering upon playback
- **Usage Examples**:
```bash
# Quality mode
./encoder_tev -i input.mp4 -o output.tev -q 3
# Playback
playtev output.tev
```
- **Format documentation**: `terranmon.txt` (search for "TSVM Enhanced Video (TEV) Format")
- **Version**: 2.1 (includes rate control factor in all video packets)
#### TAV Format (TSVM Advanced Video)
- **Successor to TEV**: DWT-based video codec using wavelet transforms instead of DCT
- **C Encoder**: `video_encoder/encoder_tav.c` - Multi-wavelet encoder with perceptual quantisation
- How to build: `make tav`
- **Wavelet Support**: Multiple wavelet types for different compression characteristics
- **JS Decoder**: `assets/disk0/tvdos/bin/playtav.js` - Native decoder for TAV format playback
- **Hardware accelerated decoding**: Extended GraphicsJSR223Delegate.kt with TAV functions
- **Packet analyser**: `video_encoder/tav_inspector.c` - Debugging tool that parses TAV packets into human-readable form
- **Features**:
- **Multiple Wavelet Types**: 5/3 reversible, 9/7 irreversible, CDF 13/7, DD-4, Haar
- **Single-tile encoding**: One large DWT tile for optimal quality (no blocking artifacts)
- **Perceptual quantisation**: HVS-optimized coefficient scaling
- **YCoCg-R colour space**: Efficient chroma representation with "simulated" subsampling using anisotropic quantisation (search for "ANISOTROPY_MULT_CHROMA" on the encoder)
- **6-level DWT decomposition**: Deep frequency analysis for better compression (deeper levels possible but 6 is the maximum for the default TSVM size)
- **Significance Map Compression**: Improved coefficient storage format exploiting sparsity for 16-18% additional compression (2025-09-29 update)
- **Concatenated Maps Layout**: Cross-channel compression optimisation for additional 1.6% improvement (2025-09-29 enhanced)
- **Usage Examples**:
```bash
# Different wavelets
./encoder_tav -i input.mp4 -w 0 -o output.tav # 5/3 reversible (lossless capable)
./encoder_tav -i input.mp4 -w 1 -o output.tav # 9/7 irreversible (default, best compression)
./encoder_tav -i input.mp4 -w 2 -o output.tav # CDF 13/7 (experimental)
./encoder_tav -i input.mp4 -w 16 -o output.tav # DD-4 (four-point interpolating)
./encoder_tav -i input.mp4 -w 255 -o output.tav # Haar (demonstration)
# Quality levels (0-5)
./encoder_tav -i input.mp4 -q 0 -o output.tav # Lowest quality, smallest file
./encoder_tav -i input.mp4 -q 5 -o output.tav # Highest quality, largest file
# Temporal 3D DWT (GOP-based encoding)
./encoder_tav -i input.mp4 --temporal-dwt -o output.tav
# Playback
playtav output.tav
```
**CRITICAL IMPLEMENTATION NOTES**:
**Wavelet Coefficient Layout**:
- TAV uses **2D Spatial Layout** in memory: `[LL, LH, HL, HH, LH, HL, HH, ...]` for each decomposition level
- **Forward transform must output**: `temp[0...half-1] = low-pass`, `temp[half...length-1] = high-pass`
- **Inverse transform must expect**: Same 2D spatial layout and exactly reverse forward operations
- **Common mistake**: Assuming linear layout leads to grid/checkerboard artifacts
**Wavelet Implementation Pattern**:
- All wavelets must follow the **exact same structure** as the working 5/3 implementation:
```c
// Forward: 1. Predict step, 2. Update step
temp[half + i] = data[odd_index] - prediction; // High-pass
temp[i] = data[even_index] + update; // Low-pass
// Inverse: Reverse order - 1. Undo update, 2. Undo predict
temp[i] -= update; // Undo low-pass update
temp[half + i] += prediction; // Undo high-pass predict
```
- **Boundary handling**: Use symmetric extension for filter taps beyond array bounds
- **Reconstruction**: Interleave even/odd samples: `data[2*i] = low[i], data[2*i+1] = high[i]`
**Debugging Grid Artifacts**:
- **Symptom**: Checkerboard or grid patterns in decoded video
- **Cause**: Mismatch between encoder/decoder coefficient layout or lifting step operations
- **Solution**: Ensure forward and inverse transforms use identical coefficient indexing and reverse operations exactly
**Supported Wavelets**:
- **0**: 5/3 reversible (lossless when unquantised, JPEG 2000 standard)
- **1**: 9/7 irreversible (best compression, CDF 9/7 variant, default choice)
- **2**: CDF 13/7 (experimental, simplified implementation)
- **16**: DD-4 (four-point interpolating Deslauriers-Dubuc, for still images)
- **255**: Haar (demonstration only, simplest possible wavelet)
- **Format documentation**: `terranmon.txt` (search for "TSVM Advanced Video (TAV) Format")
- **Version**: Current (perceptual quantisation, multi-wavelet support, EZBC compression)
#### TAV Temporal 3D DWT (GOP Unified Encoding)
Implemented on 2025-10-15 for improved temporal compression through group-of-pictures (GOP) encoding:
**Key Features**:
- **3D DWT**: Applies DWT in both spatial (2D) and temporal (1D) dimensions for optimal spacetime compression
- **Unified GOP Preprocessing**: Single EZBC tree for all frames and channels in a GOP (width×height×N_frames×3_channels)
- **GOP Size**: Typically 8 frames (configurable), with scene change detection for adaptive GOPs
- **Single-frame Fallback**: GOP size of 1 automatically uses traditional I-frame encoding
**Packet Format**:
- **0x12 (GOP_UNIFIED)**: `[gop_size][compressed_size][compressed_data]`
- **0xFC (GOP_SYNC)**: `[frame_count]` - Indicates N frames were decoded from GOP block
- **Timecode Emission**: One timecode packet per GOP (not per frame)
**Technical Implementation**:
```c
// Unified preprocessing structure (encoder_tav.c:2371-2509)
[All_Y_maps][All_Co_maps][All_Cg_maps][All_Y_values][All_Co_values][All_Cg_values]
// Where maps are grouped by channel across all GOP frames for optimal Zstd compression
```
**Usage**:
```bash
# Enable temporal 3D DWT
./encoder_tav -i input.mp4 --temporal-dwt -o output.tav
# Inspect GOP structure
./tav_inspector output.tav -v
```
**Compression Benefits**:
- **Temporal Coherence**: Exploits similarity across consecutive frames
- **Unified Compression**: Zstd compresses entire GOP as single block, finding patterns across time
- **Adaptive GOPs**: Scene change detection ensures optimal GOP boundaries
#### TAD Format (TSVM Advanced Audio)
- **Perceptual audio codec** for TSVM using CDF 9/7 biorthogonal wavelets
- **C Encoder**: `video_encoder/encoder_tad.c` - Core Encoder library; `video_encoder/encoder_tad_standalone.c` - Standalone encoder with FFmpeg integration
- How to build: `make tad`
- **Quality Levels**: 0-5 (0=lowest quality/smallest, 5=highest quality/largest; designed to be in sync with TAV encoder)
- **C Decoders**:
- `video_encoder/decoder_tad.c` - Shared decoder library with `tad32_decode_chunk()` function
- `video_encoder/decoder_tad.h` - Exports shared decoder API
- `video_encoder/decoder_tav.c` - TAV decoder that uses shared TAD decoder for audio packets
- **Shared Architecture** (Fixed 2025-11-10): Both standalone TAD and TAV decoders now use the same `tad32_decode_chunk()` implementation, eliminating code duplication and ensuring identical output
- **Kotlin Decoder**: `AudioAdapter.kt` - Hardware-accelerated TAD decoder for TSVM runtime
- **Quantisation Fix** (2025-11-10): Fixed BASE_QUANTISER_WEIGHTS to use channel-specific 2D array (Mid/Side) instead of single 1D array, resolving severe audio distortion
- **Features**:
- **32 KHz stereo**: TSVM audio hardware native format
- **Variable chunk sizes**: Any size ≥1024 samples, including non-power-of-2 (e.g., 32016 for TAV 1-second GOPs)
- **Pre-emphasis filter**: First-order IIR filter (α=0.5) shifts quantisation noise to lower frequencies
- **Gamma compression**: Dynamic range compression (γ=0.5) before quantisation
- **M/S stereo decorrelation**: Exploits stereo correlation for better compression
- **9-level CDF 9/7 DWT**: Fixed 9 decomposition levels for all chunk sizes
- **Perceptual quantisation**: Channel-specific (Mid/Side) frequency-dependent weights with lambda companding (λ=6.0)
- **EZBC encoding**: Binary tree embedded zero block coding exploits coefficient sparsity (86.9% Mid, 97.8% Side)
- **Zstd compression**: Level 7 on concatenated EZBC bitstreams for additional compression
- **Non-power-of-2 support**: Fixed 2025-10-30 to handle arbitrary chunk sizes correctly
- **Usage Examples**:
```bash
# Encode with default quality (Q3)
encoder_tad -i input.mp4 -o output.tad
# Encode with highest quality
encoder_tad -i input.mp4 -o output.tad -q 5
# Encode without Zstd compression
encoder_tad -i input.mp4 -o output.tad --no-zstd
# Verbose output with statistics
encoder_tad -i input.mp4 -o output.tad -v
# Decode back to PCM16
decoder_tad -i input.tad -o output.pcm
```
- **Format documentation**: `terranmon.txt` (search for "TSVM Advanced Audio (TAD) Format")
- **Version**: 1.1 (EZBC encoding with non-power-of-2 support, updated 2025-10-30; decoder architecture and Kotlin quantisation weights fixed 2025-11-10; documentation updated 2025-11-10 to reflect pre-emphasis and EZBC)
**TAD Encoding Pipeline**:
1. **Pre-emphasis filter** (α=0.5) - Shifts quantisation noise toward lower frequencies
2. **Gamma compression** (γ=0.5) - Dynamic range compression
3. **M/S decorrelation** - Transforms L/R to Mid/Side
4. **9-level CDF 9/7 DWT** - Wavelet decomposition (fixed 9 levels)
5. **Perceptual quantisation** - Lambda companding (λ=6.0) with channel-specific weights
6. **EZBC encoding** - Binary tree embedded zero block coding per channel
7. **Zstd compression** (level 7) - Additional compression on concatenated EZBC bitstreams
**TAD Compression Performance**:
- **Target Compression**: 2:1 against PCMu8 baseline (4:1 against PCM16LE input)
- **Achieved Compression**: 2.51:1 against PCMu8 at quality level 3
- **Audio Quality**: Preserves full 0-16 KHz bandwidth
- **Coefficient Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical)
- **EZBC Benefits**: Exploits sparsity, progressive refinement, spatial clustering
**TAD Integration with TAV**:
TAD is designed as an includable API for TAV video encoder integration. The variable chunk size
support enables synchronized audio/video encoding where audio chunks can match video GOP boundaries.
TAV embeds TAD-compressed audio using packet type 0x24 with Zstd compression.
**TAD Hardware Acceleration**:
TSVM accelerates TAD decoding with AudioAdapter.kt (backend) and AudioJSR223Delegate.kt (API):
- Backend decoder in AudioAdapter.kt with non-power-of-2 chunk size support (fixed 2025-10-30)
- API functions in AudioJSR223Delegate.kt for JavaScript access
- Supports chunk sizes from 1024 to 32768+ samples (any size ≥1024)
- Fixed 9-level CDF 9/7 inverse DWT with correct length tracking for non-power-of-2 sizes
**Critical Implementation Note (Fixed 2025-10-30)**:
Multi-level inverse DWT must pre-calculate the exact sequence of lengths from forward transform:
```kotlin
val lengths = IntArray(levels + 1)
lengths[0] = chunk_size
for (i in 1..levels) {
lengths[i] = (lengths[i - 1] + 1) / 2
}
// Apply inverse DWT using lengths[level] for each level
```
Using simple doubling (`length *= 2`) is incorrect for non-power-of-2 sizes and causes
mirrored subband artifacts.
**TAD Decoding Pipeline**:
1. **Zstd decompression** - Decompress concatenated EZBC bitstreams
2. **EZBC decoding** - Binary tree decoder reconstructs quantised int8 coefficients per channel
3. **Lambda decompanding** - Inverse Laplacian CDF mapping with channel-specific weights
4. **9-level inverse CDF 9/7 DWT** - Wavelet reconstruction with proper non-power-of-2 length tracking
5. **M/S to L/R conversion** - Transform Mid/Side back to Left/Right
6. **Gamma expansion** (γ⁻¹=2.0) - Restore dynamic range
7. **De-emphasis filter** (α=0.5) - Reverse pre-emphasis, remove frequency shaping
8. **PCM32f to PCM8** - Noise-shaped dithering for final 8-bit output
**Critical Quantisation Weights Note (Fixed 2025-11-10)**:
The TAD decoder MUST use channel-specific quantisation weights for Mid (channel 0) and Side (channel 1) channels. The Kotlin decoder (AudioAdapter.kt) originally used a single 1D weight array, which caused severe audio distortion. The correct implementation uses a 2D array:
```kotlin
// CORRECT (Fixed 2025-11-10)
private val BASE_QUANTISER_WEIGHTS = arrayOf(
floatArrayOf( // Mid channel (0)
4.0f, 2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f, 1.3f, 2.0f
),
floatArrayOf( // Side channel (1)
6.0f, 5.0f, 2.6f, 2.4f, 1.8f, 1.3f, 1.0f, 1.0f, 1.6f, 3.2f
)
)
// During dequantisation:
val weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiserScale
coeffs[i] = normalisedVal * TAD32_COEFF_SCALARS[sideband] * weight
```
The different weights for Mid and Side channels reflect the perceptual importance of different frequency bands in each channel. Using incorrect weights causes:
- DC frequency underamplification (using 1.0 instead of 4.0/6.0)
- Incorrect stereo imaging and extreme side channel distortion
- Severe frequency response errors that manifest as "clipping-like" distortion

View File

@@ -5,4 +5,5 @@ set PATH=\tvdos\installer;\tvdos\tuidev;$PATH
set KEYBOARD=us_colemak
rem this line specifies which shell to be presented after the boot precess:
zfm
command -fancy

View File

@@ -8,31 +8,32 @@ if (!exec_args[1]) {
return 1
}
let lowfilename = exec_args[1] + "_low.chr"
let highfilename = exec_args[1] + "_high.chr"
const fullFilePath = _G.shell.resolvePathInput(exec_args[1]).full
let lowfilename = fullFilePath + "_low.chr"
let highfilename = fullFilePath + "_high.chr"
let workarea = sys.malloc(1920)
// dump low rom
sys.poke(-1299460, 16)
for (let i = 0; i < 1920; i++) {
let byte = sys.peek(-1300607 - i)
let byte = sys.peek(-133121 - i)
sys.poke(workarea + i, byte)
}
filesystem.open("A", lowfilename, "W")
dma.ramToCom(workarea, filesystem._toPorts("A")[0], 1920)
const lowfile = files.open(lowfilename)
lowfile.pwrite(workarea, 1920, 0)
println("Wrote CHR rom " + lowfilename)
// dump high rom
sys.poke(-1299460, 17)
for (let i = 0; i < 1920; i++) {
let byte = sys.peek(-1300607 - i)
let byte = sys.peek(-133121 - i)
sys.poke(workarea + i, byte)
}
filesystem.open("A", highfilename, "W")
dma.ramToCom(workarea, filesystem._toPorts("A")[0], 1920)
const highfile = files.open(highfilename)
highfile.pwrite(workarea, 1920, 0)
println("Wrote CHR rom " + highfilename)
sys.free(workarea)

View File

@@ -1,2 +1,2 @@
println("몬스터 시트라, 이 이름은 특이하게 생긴 프랑스 자동차나 스칸디나비아 보드카에서 온 것은 아닙니다. 고대부터 유래된 과일 시트론에서 영감을 받아 태어난 이 제품은 레몬과 비슷하지만 더 원초적이고 투박합니다. 마치 몬스터 에너지처럼요. 이 고대의 과일과 선조들에게서 영감을 얻은 우리는 전형적인 드링크를 새롭게 해석한 울트라 시트라를 만들었습니다. 울트라 시트라는 새콤달콤한 맛이 입안에서 잔잔하게 퍼지며 상쾌한 맛으로 마무리하죠. 저칼로리에 무설탕이지만 몬스터 에너지만의 블렌드는 변함없이 가득 담겨있답니다.")
println("멕시코에서는 매년 할로윈 이후 '죽은 자의 날'을 기념합니다. 신비한 분위기 속의 메리골드 꽃과 추억들은 떠난 이들을 축제로 이끕니다. 누구나 매혹될 이국적인 천사의 주스 블렌드, 망고 로코. 환상적인 맛과 몬스터 에너지 만의 마법으로 파티는 계속될 것입니다.")
unicode.println("몬스터 시트라, 이 이름은 특이하게 생긴 프랑스 자동차나 스칸디나비아 보드카에서 온 것은 아닙니다. 고대부터 유래된 과일 시트론에서 영감을 받아 태어난 이 제품은 레몬과 비슷하지만 더 원초적이고 투박합니다. 마치 몬스터 에너지처럼요. 이 고대의 과일과 선조들에게서 영감을 얻은 우리는 전형적인 드링크를 새롭게 해석한 울트라 시트라를 만들었습니다. 울트라 시트라는 새콤달콤한 맛이 입안에서 잔잔하게 퍼지며 상쾌한 맛으로 마무리하죠. 저칼로리에 무설탕이지만 몬스터 에너지만의 블렌드는 변함없이 가득 담겨있답니다.")
unicode.println("멕시코에서는 매년 할로윈 이후 '죽은 자의 날'을 기념합니다. 신비한 분위기 속의 메리골드 꽃과 추억들은 떠난 이들을 축제로 이끕니다. 누구나 매혹될 이국적인 천사의 주스 블렌드, 망고 로코. 환상적인 맛과 몬스터 에너지 만의 마법으로 파티는 계속될 것입니다.")

View File

@@ -2,26 +2,18 @@ if (!exec_args[1]) {
printerrln("Usage: jpdectest image.jpg")
}
filesystem.open("A", exec_args[1], "R")
const fullFilePath = _G.shell.resolvePathInput(exec_args[1])
const file = files.open(fullFilePath.full)
const fileLen = file.size
const infile = sys.malloc(file.size); file.pread(infile, fileLen, 0)
let status = com.getStatusCode(0)
let infile = undefined
if (0 != status) return status
let fileLen = filesystem.getFileLen("A")
println(`DMA reading ${fileLen} bytes from disk...`)
infile = sys.malloc(fileLen)
dma.comToRam(0, 0, infile, fileLen)
println("decoding")
//println("decoding")
// decode
const [imgw, imgh, channels, imageData] = graphics.decodeImageResample(infile, fileLen, -1, -1)
println(`dim: ${imgw}x${imgh}`)
println(`converting to displayable format...`)
//println(`dim: ${imgw}x${imgh}`)
//println(`converting to displayable format...`)
// convert colour
graphics.setGraphicsMode(0)

View File

@@ -2,26 +2,18 @@ if (!exec_args[1]) {
printerrln("Usage: jpdectesthigh image.jpg")
}
filesystem.open("A", exec_args[1], "R")
const fullFilePath = _G.shell.resolvePathInput(exec_args[1])
const file = files.open(fullFilePath.full)
const fileLen = file.size
const infile = sys.malloc(file.size); file.pread(infile, fileLen, 0)
let status = com.getStatusCode(0)
let infile = undefined
if (0 != status) return status
let fileLen = filesystem.getFileLen("A")
println(`DMA reading ${fileLen} bytes from disk...`)
infile = sys.malloc(fileLen)
dma.comToRam(0, 0, infile, fileLen)
println("decoding")
//println("decoding")
// decode
const [imgw, imgh, channels, imageData] = graphics.decodeImageResample(infile, fileLen, -1, -1)
println(`dim: ${imgw}x${imgh}`)
println(`converting to displayable format...`)
//println(`dim: ${imgw}x${imgh}`)
//println(`converting to displayable format...`)
// convert colour
graphics.setGraphicsMode(4)

View File

@@ -1,7 +1,7 @@
///////////////////////////////////////////////////////////////////////////////
// High Speed Disk Peripheral Adapter (HSDPA) Driver for TVDOS
// This driver treats each disk from HSDPA as a single large file
// Created by Claude on 2025-08-16
// Created by CuriousTorvald and Claude on 2025-08-16
///////////////////////////////////////////////////////////////////////////////
// Add TAPE device names to reserved names
@@ -117,8 +117,9 @@ for (let tapeIndex = 0; tapeIndex < 4; tapeIndex++) {
// Get file size - for HSDPA tapes, we don't know the size ahead of time
// So we return a very large number to indicate it's available
// Using Number.MAX_SAFE_INTEGER to support files >2GB
driver.getFileLen = (fd) => {
return 0x7FFFFFFF // Return max positive 32-bit integer
return Number.MAX_SAFE_INTEGER // 2^53 - 1 (9007199254740991) - safe for JS arithmetic
}
// Sequential read from tape

View File

@@ -1366,12 +1366,12 @@ unicode.getUniprint = (c) => {
return unicode.uniprint[k]
}}
print = function(str) {
unicode.print = (str) => {
if ((typeof str === 'string' || str instanceof String) && str.length > 0) {
let cp = unicode.utf8toCodepoints(str)
cp.forEach(c => {
let q = unicode.getUniprint(c)
if (q == undefined || !q[0](c)) {
con.addch(4)
con.curs_right()
@@ -1381,6 +1381,34 @@ print = function(str) {
}
})
}
else {
sys.print(str)
}
}
unicode.println = (str) => {
unicode.print(str+'\n\n')
}
unicode.strlen = (str) => {
// Convert string to an array of codepoints using spread operator
// This correctly handles surrogate pairs and counts each codepoint as one
return unicode.utf8toCodepoints(str).length
}
unicode.visualStrlen = (str) => {
function isTripleWidth(c) {
return (0xAC00 <= c && c <= 0xD7FF) && [1,4,8,10,13].includes(((c - 0xAC00) / 588)|0)
}
function isDoubleWidth(c) {
return (0x3000 <= c && c <= 0x303f) || (0x3100 <= c && c <= 0x312f) || (0x3200 <= c && c <= 0x33ff) ||
(0xAC00 <= c && c <= 0xD7FF) || (0xFE30 <= c && c <= 0xFE4F) || (0xFF00 <= c && c <= 0xff60)
}
// Convert string to an array of codepoints using spread operator
// This correctly handles surrogate pairs and counts each codepoint as one
return unicode.utf8toCodepoints(str).reduce((acc, c) => acc + (isTripleWidth(c) ? 3 : isDoubleWidth(c) ? 2 : 1), 0)
}
Object.freeze(unicode);

View File

@@ -1,236 +0,0 @@
println("DEPRECATION NOTICE: MP3 Playback function will be removed for following reason")
println("\tMP3 does not really fit in the time TSVM targets to emulate")
return 1
const Mp3 = require('mp3dec')
const pcm = require("pcm")
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i"
function printdbg(s) { if (0) serial.println(s) }
class SequentialFileBuffer {
constructor(path, offset, length) {
if (Array.isArray(path)) throw Error("arg #1 is path(string), not array")
this.path = path
this.file = files.open(path)
this.offset = offset || 0
this.originalOffset = offset
this.length = length || this.file.size
this.seq = require("seqread")
this.seq.prepare(path)
}
/*readFull(n) {
throw Error()
let ptr = this.seq.readBytes(n)
return ptr
}*/
readStr(n) {
let ptr = this.seq.readBytes(n)
let s = ''
for (let i = 0; i < n; i++) {
if (i >= this.length) break
s += String.fromCharCode(sys.peek(ptr + i))
}
sys.free(ptr)
return s
}
readByteNumbers(n) {
let ptr = this.seq.readBytes(n)
try {
let s = []
for (let i = 0; i < n; i++) {
if (i >= this.length) break
s.push(sys.peek(ptr + i))
}
sys.free(ptr)
return s
}
catch (e) {
println(`n: ${n}; ptr: ${ptr}`)
println(e)
}
}
unread(diff) {
let newSkipLen = this.seq.getReadCount() - diff
this.seq.prepare(this.path)
this.seq.skip(newSkipLen)
}
rewind() {
this.seq.prepare(this.path)
}
seek(p) {
this.seq.prepare(this.path)
this.seq.skip(p)
}
get byteLength() {
return this.length
}
/*get remaining() {
return this.length - this.getReadCount()
}*/
}
con.curs_set(0)
let [cy, cx] = con.getyx()
let [__, CONSOLE_WIDTH] = con.getmaxyx()
let paintWidth = CONSOLE_WIDTH - 16
if (interactive) {
println("Decoding...")
}
printdbg("pre-decode...")
let filebuf = new SequentialFileBuffer(_G.shell.resolvePathInput(exec_args[1]).full)
const FILE_SIZE = filebuf.length
let decoder = Mp3.newDecoder(filebuf)
if (decoder === null) throw Error("decoder is null")
const HEADER_SIZE = decoder.headerSize + 3
const FRAME_SIZE = decoder.frameSize // only works reliably for CBR
//serial.println(`header size: ${HEADER_SIZE}`)
//serial.println(`frame size: ${FRAME_SIZE}`)
audio.resetParams(0)
audio.purgeQueue(0)
audio.setPcmMode(0)
audio.setPcmQueueCapacityIndex(0, 5) // queue size is now 24
const QUEUE_MAX = audio.getPcmQueueCapacity(0)
audio.setMasterVolume(0, 255)
audio.play(0)
let decodedLength = 0
let readPtr = sys.malloc(8000)
let decodePtr = sys.malloc(12000)
function bytesToSec(i) {
return i / (FRAME_SIZE * 1000 / bufRealTimeLen)
}
function secToReadable(n) {
let mins = ''+((n/60)|0)
let secs = ''+(n % 60)
return `${mins.padStart(2,'0')}:${secs.padStart(2,'0')}`
}
function decodeAndResample(inPtr, outPtr, inputLen) {
// TODO resample
for (let k = 0; k < inputLen / 2; k+=2) {
let sample = [
pcm.u16Tos16(sys.peek(inPtr + k*2 + 0) | (sys.peek(inPtr + k*2 + 1) << 8)),
pcm.u16Tos16(sys.peek(inPtr + k*2 + 2) | (sys.peek(inPtr + k*2 + 3) << 8))
]
sys.poke(outPtr + k, pcm.s16Tou8(sample[0]))
sys.poke(outPtr + k + 1, pcm.s16Tou8(sample[1]))
// soothing visualiser(????)
// printvis(`${sampleToVisual(sample[0])} | ${sampleToVisual(sample[1])}`)
}
}
function printPlayBar() {
}
let stopPlay = false
con.curs_set(0)
if (interactive) {
con.move(cy, cy)
println("Push and hold Backspace to exit")
}
[cy, cx] = con.getyx()
function printPlayBar(currently) {
if (interactive) {
// let currently = decodedLength
let total = FILE_SIZE - HEADER_SIZE
let currentlySec = Math.round(bytesToSec(currently))
let totalSec = Math.round(bytesToSec(total))
con.move(cy, 1)
print(' '.repeat(15))
con.move(cy, 1)
print(`${secToReadable(currentlySec)} / ${secToReadable(totalSec)}`)
con.move(cy, 15)
print(' ')
let progressbar = '\x84205u'.repeat(paintWidth + 1)
print(progressbar)
con.mvaddch(cy, 16 + Math.round(paintWidth * (currently / total)), 0xDB)
}
}
let t1 = sys.nanoTime()
let errorlevel = 0
let bufRealTimeLen = 36
try {
decoder.decode((ptr, len, pos)=>{
if (interactive) {
sys.poke(-40, 1)
if (sys.peek(-41) == 67) {
stopPlay = true
throw "STOP"
}
}
printPlayBar(pos)
let t2 = sys.nanoTime()
decodedLength += len
// serial.println(`Audio queue size: ${audio.getPosition(0)}/${QUEUE_MAX}`)
if (audio.getPosition(0) >= QUEUE_MAX) {
while (audio.getPosition(0) >= (QUEUE_MAX >>> 1)) {
printdbg(`Queue full, waiting until the queue has some space (${audio.getPosition(0)}/${QUEUE_MAX})`)
// serial.println(`Queue full, waiting until the queue has some space (${audio.getPosition(0)}/${QUEUE_MAX})`)
sys.sleep(bufRealTimeLen)
}
}
decodeAndResample(ptr, decodePtr, len)
audio.putPcmDataByPtr(decodePtr, len >> 1, 0)
audio.setSampleUploadLength(0, len >> 1)
audio.startSampleUpload(0)
let decodingTime = (t2 - t1) / 1000000.0
bufRealTimeLen = (len >> 1) / 64000.0 * 1000
t1 = t2
printdbg(`Decoded ${decodedLength} bytes; target: ${bufRealTimeLen} ms, lag: ${decodingTime - bufRealTimeLen} ms`)
}) // now you got decoded PCM data
}
catch (e) {
if (e != "STOP") {
printerrln(e)
errorlevel = 1
}
}
finally {
//audio.stop(0)
sys.free(readPtr)
sys.free(decodePtr)
}
return errorlevel

View File

@@ -1,4 +1,4 @@
// usage: playmov moviefile.mov [/i]
// usage: playmv1 moviefile.mv1 [/i]
const SND_BASE_ADDR = audio.getBaseAddr()
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i"
const WIDTH = 560

View File

@@ -0,0 +1,361 @@
const SND_BASE_ADDR = audio.getBaseAddr()
const SND_MEM_ADDR = audio.getMemAddr()
const TAD_INPUT_ADDR = SND_MEM_ADDR - 262144 // TAD input buffer (matches TAV packet 0x24)
const TAD_DECODED_ADDR = SND_MEM_ADDR - 262144 + 65536 // TAD decoded buffer
if (!SND_BASE_ADDR) return 10
// Check for help flag or missing arguments
if (!exec_args[1] || exec_args[1] == "-h" || exec_args[1] == "--help") {
serial.println("Usage: playtad <file.tad> [-i | -d] [quality]")
serial.println(" -i Interactive mode (progress bar, press Backspace to exit)")
serial.println(" -d Dump mode (show first 3 chunks with payload hex and decoded samples)")
serial.println("")
serial.println("Examples:")
serial.println(" playtad audio.tad -i # Play with progress bar")
serial.println(" playtad audio.tad -d # Dump first 3 chunks for debugging")
return 0
}
const pcm = require("pcm")
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i"
const dumpCoeffs = exec_args[2] && exec_args[2].toLowerCase() == "-d"
function printdbg(s) { if (0) serial.println(s) }
class SequentialFileBuffer {
constructor(path, offset, length) {
if (Array.isArray(path)) throw Error("arg #1 is path(string), not array")
this.path = path
this.file = files.open(path)
this.offset = offset || 0
this.originalOffset = offset
this.length = length || this.file.size
this.seq = require("seqread")
this.seq.prepare(path)
}
readBytes(size, ptr) {
return this.seq.readBytes(size, ptr)
}
readByte() {
let ptr = this.seq.readBytes(1)
let val = sys.peek(ptr)
sys.free(ptr)
return val
}
readShort() {
let ptr = this.seq.readBytes(2)
let val = sys.peek(ptr) | (sys.peek(ptr + 1) << 8)
sys.free(ptr)
return val
}
readInt() {
let ptr = this.seq.readBytes(4)
let val = sys.peek(ptr) | (sys.peek(ptr + 1) << 8) | (sys.peek(ptr + 2) << 16) | (sys.peek(ptr + 3) << 24)
sys.free(ptr)
return val
}
readStr(n) {
let ptr = this.seq.readBytes(n)
let s = ''
for (let i = 0; i < n; i++) {
if (i >= this.length) break
s += String.fromCharCode(sys.peek(ptr + i))
}
sys.free(ptr)
return s
}
unread(diff) {
let newSkipLen = this.seq.getReadCount() - diff
this.seq.prepare(this.path)
this.seq.skip(newSkipLen)
}
rewind() {
this.seq.prepare(this.path)
}
seek(p) {
this.seq.prepare(this.path)
this.seq.skip(p)
}
get byteLength() {
return this.length
}
get fileHeader() {
return this.seq.fileHeader
}
getReadCount() {
return this.seq.getReadCount()
}
}
// Read TAD chunk header to determine format
let filebuf = new SequentialFileBuffer(_G.shell.resolvePathInput(exec_args[1]).full)
const FILE_SIZE = filebuf.length
if (FILE_SIZE < 7) {
serial.println(`ERROR: File too small (${FILE_SIZE} bytes). Expected TAD format.`)
return 1
}
// Read first chunk header (standalone TAD format: no TAV wrapper)
let firstSampleCount = filebuf.readShort()
let firstMaxIndex = filebuf.readByte()
let firstPayloadSize = filebuf.readInt()
// Validate first chunk
if (firstSampleCount < 0 || firstSampleCount > 65536) {
serial.println(`ERROR: Invalid sample count ${firstSampleCount}. File may be corrupted.`)
return 1
}
if (firstMaxIndex < 0 || firstMaxIndex > 255) {
serial.println(`ERROR: Invalid max index ${firstMaxIndex}. File may be corrupted.`)
return 1
}
if (firstPayloadSize < 1 || firstPayloadSize > 65536) {
serial.println(`ERROR: Invalid payload size ${firstPayloadSize}. File may be corrupted.`)
return 1
}
// Rewind to start
filebuf.rewind()
// Calculate approximate frame info
const AVG_CHUNK_SIZE = 7 + firstPayloadSize // TAD header (2+1+4) + payload
const SAMPLE_RATE = 32000
const bufRealTimeLen = Math.floor((firstSampleCount / SAMPLE_RATE) * 1000) // milliseconds per chunk
if (dumpCoeffs) {
serial.println(`TAD Coefficient Dump Mode`)
serial.println(`File: ${filebuf.file.name}`)
serial.println(`First chunk header:`)
serial.println(` Sample Count: ${firstSampleCount}`)
serial.println(` Max Index: ${firstMaxIndex}`)
serial.println(` Payload Size: ${firstPayloadSize} bytes`)
serial.println(`Chunk Duration: ${bufRealTimeLen} ms`)
serial.println(``)
}
let bytes_left = FILE_SIZE
let decodedLength = 0
let chunkNumber = 0
con.curs_set(0)
let [__, CONSOLE_WIDTH] = con.getmaxyx()
if (interactive) {
let [cy, cx] = con.getyx()
// file name
con.mvaddch(cy, 1)
con.prnch(0xC9);con.prnch(0xCD);con.prnch(0xB5)
print(filebuf.file.name)
con.prnch(0xC6);con.prnch(0xCD)
print("\x84205u".repeat(CONSOLE_WIDTH - 26 - filebuf.file.name.length))
con.prnch(0xB5)
print("Hold Bksp to Exit")
con.prnch(0xC6);con.prnch(0xCD);con.prnch(0xBB)
// L R pillar
con.prnch(0xBA)
con.mvaddch(cy+1, CONSOLE_WIDTH, 0xBA)
// media info
let mediaInfoStr = `TAD Q${firstMaxIndex} ${SAMPLE_RATE/1000}kHz`
con.move(cy+2,1)
con.prnch(0xC8)
print("\x84205u".repeat(CONSOLE_WIDTH - 5 - mediaInfoStr.length))
con.prnch(0xB5)
print(mediaInfoStr)
con.prnch(0xC6);con.prnch(0xCD);con.prnch(0xBC)
con.move(cy+1, 2)
}
let [cy, cx] = con.getyx()
let paintWidth = CONSOLE_WIDTH - 20
function bytesToSec(i) {
// Approximate: use first chunk's ratio
return Math.round((i / FILE_SIZE) * (FILE_SIZE / AVG_CHUNK_SIZE) * (bufRealTimeLen / 1000))
}
function secToReadable(n) {
let mins = ''+((n/60)|0)
let secs = ''+(n % 60)
return `${mins.padStart(2,'0')}:${secs.padStart(2,'0')}`
}
function printPlayBar() {
if (interactive) {
let currently = decodedLength
let total = FILE_SIZE
let currentlySec = bytesToSec(currently)
let totalSec = bytesToSec(total)
con.move(cy, 3)
print(' '.repeat(15))
con.move(cy, 3)
print(`${secToReadable(currentlySec)} / ${secToReadable(totalSec)}`)
con.move(cy, 17)
print(' ')
let progressbar = '\x84196u'.repeat(paintWidth + 1)
print(progressbar)
con.mvaddch(cy, 18 + Math.round(paintWidth * (currently / total)), 0xDB)
}
}
audio.resetParams(0)
audio.purgeQueue(0)
audio.setPcmMode(0)
audio.setPcmQueueCapacityIndex(0, 2) // queue size is now 8
const QUEUE_MAX = audio.getPcmQueueCapacity(0)
audio.setMasterVolume(0, 255)
audio.play(0)
let stopPlay = false
let errorlevel = 0
try {
while (bytes_left > 0 && !stopPlay) {
if (interactive) {
sys.poke(-40, 1)
if (sys.peek(-41) == 67) { // Backspace key
stopPlay = true
}
}
printPlayBar()
// Read TAD chunk header (standalone TAD format)
// Format: [sample_count][max_index][payload_size][payload]
let sampleCount = filebuf.readShort()
let maxIndex = filebuf.readByte()
let payloadSize = filebuf.readInt()
// Validate every chunk (not just first one)
if (sampleCount < 0 || sampleCount > 65536) {
serial.println(`ERROR: Chunk ${chunkNumber}: Invalid sample count ${sampleCount}. File may be corrupted.`)
errorlevel = 1
break
}
if (maxIndex < 0 || maxIndex > 255) {
serial.println(`ERROR: Chunk ${chunkNumber}: Invalid max index ${maxIndex}. File may be corrupted.`)
errorlevel = 1
break
}
if (payloadSize < 1 || payloadSize > 65536) {
serial.println(`ERROR: Chunk ${chunkNumber}: Invalid payload size ${payloadSize}. File may be corrupted.`)
errorlevel = 1
break
}
if (payloadSize + 7 > bytes_left) {
serial.println(`ERROR: Chunk ${chunkNumber}: Chunk size ${payloadSize + 7} exceeds remaining file size ${bytes_left}`)
errorlevel = 1
break
}
if (dumpCoeffs && chunkNumber < 3) {
serial.println(`=== Chunk ${chunkNumber} ===`)
serial.println(` Sample Count: ${sampleCount}`)
serial.println(` Max Index: ${maxIndex}`)
serial.println(` Payload Size: ${payloadSize} bytes`)
serial.println(` Bytes remaining in file: ${bytes_left}`)
}
// Rewind 7 bytes to re-read the header along with payload
// This allows reading the complete chunk (header + payload) in one call
filebuf.unread(7)
// Read entire chunk (header + payload) to TAD input buffer
// This matches TAV's approach for packet 0x24
let totalChunkSize = 7 + payloadSize
filebuf.readBytes(totalChunkSize, TAD_INPUT_ADDR)
if (dumpCoeffs && chunkNumber < 3) {
// Dump first 32 bytes of compressed payload (skip 7-byte header)
serial.print(` Compressed data (first 32 bytes): `)
for (let i = 0; i < Math.min(32, payloadSize); i++) {
let b = sys.peek(TAD_INPUT_ADDR + 7 + i)
serial.print(`${(b & 0xFF).toString(16).padStart(2, '0')} `)
}
serial.println('')
}
// Decode TAD chunk
audio.tadDecode()
if (dumpCoeffs && chunkNumber < 3) {
// After decoding, the decoded PCMu8 samples are in tadDecodedBin
serial.println(` Decoded ${sampleCount} samples`)
// Dump first 16 decoded samples (PCMu8 stereo interleaved)
serial.print(` Decoded (first 16 L samples): `)
for (let i = 0; i < 16; i++) {
serial.print(`${sys.peek(TAD_DECODED_ADDR + i * 2) & 0xFF} `)
}
serial.println('')
serial.print(` Decoded (first 16 R samples): `)
for (let i = 0; i < 16; i++) {
serial.print(`${sys.peek(TAD_DECODED_ADDR + i * 2 + 1) & 0xFF} `)
}
serial.println('')
serial.println('')
}
// Upload decoded audio to queue
audio.tadUploadDecoded(0, sampleCount)
if (!dumpCoeffs) {
// Sleep for the duration of the audio chunk to pace playback
// This prevents uploading everything at once
sys.sleep(bufRealTimeLen)
}
// Chunk size = header (7 bytes) + payload
let chunkSize = 7 + payloadSize
bytes_left -= chunkSize
decodedLength += chunkSize
chunkNumber++
// Limit coefficient dump to first 3 chunks
if (dumpCoeffs && chunkNumber >= 3) {
serial.println(`... (remaining chunks omitted)`)
// Keep playing but don't dump more
}
}
}
catch (e) {
printerrln(e)
errorlevel = 1
}
finally {
if (interactive) {
con.move(cy + 3, 1)
con.curs_set(1)
}
}
return errorlevel

File diff suppressed because it is too large Load Diff

View File

@@ -1,16 +1,17 @@
// Created by Claude on 2025-08-18.
// Created by CuriousTorvald and Claude on 2025-08-18.
// TSVM Enhanced Video (TEV) Format Decoder - YCoCg-R 4:2:0 Version
// Usage: playtev moviefile.tev [options]
// Options: -i (interactive), -debug-mv (show motion vector debug visualization)
// -deinterlace=algorithm (yadif or bwdif, default: yadif)
// -nodeblock (disble deblocking filter)
// -nodeblock (disable post-processing deblocking filter)
// -boundaryaware (enable boundary-aware decoding to prevent artifacts at DCT level)
const WIDTH = 560
const HEIGHT = 448
const BLOCK_SIZE = 16 // 16x16 blocks for YCoCg-R
const TEV_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x54, 0x45, 0x56] // "\x1FTSVM TEV"
const TEV_VERSION_YCOCG = 2 // YCoCg-R version
const TEV_VERSION_XYB = 3 // XYB version
const TEV_VERSION_ICtCp = 3 // ICtCp version
const SND_BASE_ADDR = audio.getBaseAddr()
const pcm = require("pcm")
const MP2_FRAME_SIZE = [144,216,252,288,360,432,504,576,720,864,1008,1152,1440,1728]
@@ -25,7 +26,8 @@ const TEV_MODE_MOTION = 0x03
const TEV_PACKET_IFRAME = 0x10
const TEV_PACKET_PFRAME = 0x11
const TEV_PACKET_AUDIO_MP2 = 0x20
const TEV_PACKET_SUBTITLE = 0x30
const TEV_PACKET_SUBTITLE = 0x30 // Legacy SSF (frame-locked)
const TEV_PACKET_SUBTITLE_TC = 0x31 // SSF-TC (timecode-based)
const TEV_PACKET_SYNC = 0xFF
// Subtitle opcodes (SSF format)
@@ -41,11 +43,16 @@ let subtitleVisible = false
let subtitleText = ""
let subtitlePosition = 0 // 0=bottom center (default)
// SSF-TC subtitle event buffer
let subtitleEvents = [] // Array of {timecode_ns, index, opcode, text}
let nextSubtitleEventIndex = 0 // Next event to check
// Parse command line options
let interactive = false
let debugMotionVectors = false
let deinterlaceAlgorithm = "yadif"
let enableDeblocking = true // Default: enabled (use -nodeblock to disable)
let enableDeblocking = false // Default: disabled (use -deblock to enable)
let enableBoundaryAwareDecoding = false // Default: disabled (use -boundaryaware to enable) // suitable for still frame and slide shows, absolutely unsuitable for videos
if (exec_args.length > 2) {
for (let i = 2; i < exec_args.length; i++) {
@@ -54,8 +61,10 @@ if (exec_args.length > 2) {
interactive = true
} else if (arg === "-debug-mv") {
debugMotionVectors = true
} else if (arg === "-nodeblock") {
enableDeblocking = false
} else if (arg === "-deblock") {
enableDeblocking = true
} else if (arg === "-boundaryaware") {
enableBoundaryAwareDecoding = true
} else if (arg.startsWith("-deinterlace=")) {
deinterlaceAlgorithm = arg.substring(13)
}
@@ -70,18 +79,17 @@ let notifHideTimer = 0
const NOTIF_SHOWUPTIME = 3000000000
let [cy, cx] = con.getyx()
let seqreadserial = require("seqread")
let seqreadtape = require("seqreadtape")
let gui = require("playgui")
let seqread = undefined
let fullFilePathStr = fullFilePath.full
// Select seqread driver to use
if (fullFilePathStr.startsWith('$:/TAPE') || fullFilePathStr.startsWith('$:\\TAPE')) {
seqread = seqreadtape
seqread = require("seqreadtape")
seqread.prepare(fullFilePathStr)
seqread.seek(0)
} else {
seqread = seqreadserial
seqread = require("seqread")
seqread.prepare(fullFilePathStr)
}
@@ -97,6 +105,9 @@ audio.purgeQueue(0)
audio.setPcmMode(0)
audio.setMasterVolume(0, 255)
// set colour zero as half-opaque black
graphics.setPalette(0, 0, 0, 0, 9)
// Subtitle display functions
function clearSubtitleArea() {
// Clear the subtitle area at the bottom of the screen
@@ -268,6 +279,99 @@ function displaySubtitle(text, position = 0) {
con.color_pair(oldFgColor, oldBgColor)
}
// Parse SSF-TC subtitle packet and add to event buffer (0x31)
function parseSubtitlePacketTC(packetSize) {
// Read subtitle index (24-bit, little-endian)
let indexByte0 = seqread.readOneByte()
let indexByte1 = seqread.readOneByte()
let indexByte2 = seqread.readOneByte()
let index = indexByte0 | (indexByte1 << 8) | (indexByte2 << 16)
// Read timecode (64-bit, little-endian)
let timecode_ns = 0
for (let i = 0; i < 8; i++) {
let byte = seqread.readOneByte()
timecode_ns += byte * Math.pow(2, i * 8)
}
// Read opcode
let opcode = seqread.readOneByte()
let remainingBytes = packetSize - 12 // Subtract 3 (index) + 8 (timecode) + 1 (opcode)
// Read text if present
let text = null
if (remainingBytes > 1 && (opcode === SSF_OP_SHOW || (opcode >= 0x10 && opcode <= 0x2F))) {
let textBytes = seqread.readBytes(remainingBytes)
text = ""
for (let i = 0; i < remainingBytes - 1; i++) { // -1 for null terminator
let byte = sys.peek(textBytes + i)
if (byte === 0) break
text += String.fromCharCode(byte)
}
sys.free(textBytes)
} else if (remainingBytes > 0) {
// Skip remaining bytes
let skipBytes = seqread.readBytes(remainingBytes)
sys.free(skipBytes)
}
// Add event to buffer
subtitleEvents.push({
timecode_ns: timecode_ns,
index: index,
opcode: opcode,
text: text
})
}
// Process subtitle events based on current playback time
function processSubtitleEvents(currentTimeNs) {
// Process all events whose timecode has been reached
while (nextSubtitleEventIndex < subtitleEvents.length) {
let event = subtitleEvents[nextSubtitleEventIndex]
if (event.timecode_ns > currentTimeNs) {
break // Haven't reached this event yet
}
// Execute the subtitle event
switch (event.opcode) {
case SSF_OP_SHOW:
subtitleText = event.text || ""
subtitleVisible = true
displaySubtitle(subtitleText, subtitlePosition)
break
case SSF_OP_HIDE:
subtitleVisible = false
subtitleText = ""
clearSubtitleArea()
break
case SSF_OP_MOVE:
if (event.text && event.text.length > 0) {
let newPosition = event.text.charCodeAt(0)
if (newPosition >= 0 && newPosition <= 8) {
subtitlePosition = newPosition
if (subtitleVisible && subtitleText.length > 0) {
clearSubtitleArea()
displaySubtitle(subtitleText, subtitlePosition)
}
}
}
break
case SSF_OP_UPLOAD_LOW_FONT:
case SSF_OP_UPLOAD_HIGH_FONT:
// Font upload handled during packet parsing
break
}
nextSubtitleEventIndex++
}
}
// Process legacy frame-locked subtitle packet (0x30)
function processSubtitlePacket(packetSize) {
// Read subtitle packet data according to SSF format
// uint24 index + uint8 opcode + variable arguments
@@ -384,15 +488,15 @@ if (!magicMatching) {
// Read header
let version = seqread.readOneByte()
if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_XYB) {
println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_XYB} for XYB)`)
if (version !== TEV_VERSION_YCOCG && version !== TEV_VERSION_ICtCp) {
println(`Unsupported TEV version: ${version} (expected ${TEV_VERSION_YCOCG} for YCoCg-R or ${TEV_VERSION_ICtCp} for ICtCp)`)
return 1
}
let colorSpace = (version === TEV_VERSION_XYB) ? "XYB" : "YCoCg-R"
let colorSpace = (version === TEV_VERSION_ICtCp) ? "ICtCp" : "YCoCg"
if (interactive) {
con.move(1,1)
println(`Push and hold Backspace to exit | TEV Format ${version} (${colorSpace}) | Deblocking: ${enableDeblocking ? 'ON' : 'OFF'}`)
println(`Push and hold Backspace to exit | ${colorSpace} | Deblock: ${enableDeblocking ? 'ON' : 'OFF'} | EdgeAware: ${enableBoundaryAwareDecoding ? 'ON' : 'OFF'}`);
}
let width = seqread.readShort()
@@ -417,6 +521,7 @@ serial.println(` FPS: ${(isNTSC) ? (fps * 1000 / 1001) : fps}`)
serial.println(` Duration: ${totalFrames / fps}`)
serial.println(` Audio: ${hasAudio ? "Yes" : "No"}`)
serial.println(` Resolution: ${width}x${height}, ${isInterlaced ? "interlaced" : "progressive"}`)
serial.println(` Quality: Y=${qualityY}, Co=${qualityCo}, Cg=${qualityCg}`)
// DEBUG interlace raw output
@@ -436,13 +541,14 @@ function updateDataRateBin(rate) {
}
}
function getVideoRate(rate) {
function getVideoRate() {
let baseRate = videoRateBin.reduce((a, c) => a + c, 0)
let mult = fps / videoRateBin.length
return baseRate * mult
}
let FRAME_TIME = 1.0 / fps
let FRAME_TIME_NS = (1000000000.0 / fps) // Frame time in nanoseconds for subtitle timing
// Ultra-fast approach: always render to display, use dedicated previous frame buffer
const FRAME_PIXELS = width * height
@@ -577,11 +683,12 @@ function rotateFieldBuffers() {
}
let frameDuped = false
let currentFrameType = "I"
// Main decoding loop - simplified for performance
try {
let t1 = sys.nanoTime()
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH && trueFrameCount < totalFrames) {
while (!stopPlay && seqread.getReadCount() < FILE_LENGTH /*&& trueFrameCount < totalFrames*/) {
// Handle interactive controls
if (interactive) {
@@ -609,7 +716,7 @@ try {
PREV_RGB_ADDR = temp
} else if (packetType == TEV_PACKET_IFRAME || packetType == TEV_PACKET_PFRAME) {
// Video frame packet (always includes rate control factor)
// Video frame packet
let payloadLen = seqread.readInt()
let compressedPtr = seqread.readBytes(payloadLen)
updateDataRateBin(payloadLen)
@@ -624,11 +731,6 @@ try {
// Decompress using gzip
// Optimized buffer size calculation for TEV YCoCg-R blocks
let blocksX = (width + 15) >> 4 // 16x16 blocks
let blocksY = (height + 15) >> 4
let tevBlockSize = 1 + 4 + 2 + (256 * 2) + (64 * 2) + (64 * 2) // mode + mv + cbp + Y(16x16) + Co(8x8) + Cg(8x8)
let decompressedSize = Math.max(payloadLen * 4, blocksX * blocksY * tevBlockSize) // More efficient sizing
let actualSize
let decompressStart = sys.nanoTime()
try {
@@ -643,7 +745,7 @@ try {
continue
}
// Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or XYB based on version)
// Hardware-accelerated TEV decoding to RGB buffers (YCoCg-R or ICtCp based on version)
try {
// duplicate every 1000th frame (pass a turn every 1000n+501st) if NTSC
if (!isNTSC || frameCount % 1000 != 501 || frameDuped) {
@@ -655,14 +757,14 @@ try {
if (isInterlaced) {
// For interlaced: decode current frame into currentFieldAddr
// For display: use prevFieldAddr as current, currentFieldAddr as next
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, [qualityY, qualityCo, qualityCg], trueFrameCount, debugMotionVectors, version, enableDeblocking)
graphics.tevDecode(blockDataPtr, nextFieldAddr, currentFieldAddr, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
graphics.tevDeinterlace(trueFrameCount, width, decodingHeight, prevFieldAddr, currentFieldAddr, nextFieldAddr, CURRENT_RGB_ADDR, deinterlaceAlgorithm)
// Rotate field buffers for next frame: NEXT -> CURRENT -> PREV
rotateFieldBuffers()
} else {
// Progressive or first frame: normal decoding without temporal prediction
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, [qualityY, qualityCo, qualityCg], trueFrameCount, debugMotionVectors, version, enableDeblocking)
graphics.tevDecode(blockDataPtr, CURRENT_RGB_ADDR, PREV_RGB_ADDR, width, decodingHeight, qualityY, qualityCo, qualityCg, trueFrameCount, debugMotionVectors, version, enableDeblocking, enableBoundaryAwareDecoding)
}
decodeTime = (sys.nanoTime() - decodeStart) / 1000000.0 // Convert to milliseconds
@@ -670,7 +772,7 @@ try {
// Upload RGB buffer to display framebuffer with dithering
let uploadStart = sys.nanoTime()
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, width, height, frameCount, true)
graphics.uploadRGBToFramebuffer(CURRENT_RGB_ADDR, width, height, frameCount, false)
uploadTime = (sys.nanoTime() - uploadStart) / 1000000.0 // Convert to milliseconds
}
else {
@@ -679,6 +781,12 @@ try {
serial.println(`Frame ${frameCount}: Duplicating previous frame`)
}
// Process SSF-TC subtitle events based on current playback time
if (subtitleEvents.length > 0) {
let currentTimeNs = frameCount * FRAME_TIME_NS
processSubtitleEvents(currentTimeNs)
}
// Defer audio playback until a first frame is sent
if (isInterlaced) {
// fire audio after frame 1
@@ -710,6 +818,8 @@ try {
serial.println(`Frame ${frameCount}: Decompress=${decompressTime.toFixed(1)}ms, Decode=${decodeTime.toFixed(1)}ms, Upload=${uploadTime.toFixed(1)}ms, Bias=${biasTime.toFixed(1)}ms, Total=${totalTime.toFixed(1)}ms`)
}
currentFrameType = packetType == TEV_PACKET_IFRAME ? "I" : "P"
} else if (packetType == TEV_PACKET_AUDIO_MP2) {
// MP2 Audio packet
let audioLen = seqread.readInt()
@@ -724,9 +834,14 @@ try {
audio.mp2UploadDecoded(0)
} else if (packetType == TEV_PACKET_SUBTITLE) {
// Subtitle packet
// Legacy frame-locked subtitle packet (0x30)
let packetSize = seqread.readInt()
processSubtitlePacket(packetSize)
} else if (packetType == TEV_PACKET_SUBTITLE_TC) {
// SSF-TC subtitle packet (0x31) - parse and buffer for later playback
let packetSize = seqread.readInt()
parseSubtitlePacketTC(packetSize)
} else if (packetType == 0x00) {
// Silently discard, faulty subtitle creation can cause this as 0x00 is used as an argument terminator
} else {
@@ -743,27 +858,37 @@ try {
if (interactive) {
notifHideTimer += (t2 - t1)
if (!notifHidden && notifHideTimer > (NOTIF_SHOWUPTIME + FRAME_TIME)) {
con.move(1, 1)
print(' '.repeat(79))
// clearing function here
notifHidden = true
}
if (!hasSubtitle) {
con.move(31, 1)
graphics.setTextFore(161)
print(`Frame: ${frameCount}/${totalFrames} (${((frameCount / akku2 * 100)|0) / 100}f) `)
con.move(32, 1)
graphics.setTextFore(161)
print(`VRate: ${(getVideoRate() / 1024 * 8)|0} kbps `)
con.move(1, 1)
con.color_pair(253, 0)
let guiStatus = {
fps: fps,
videoRate: getVideoRate(),
frameCount: frameCount,
totalFrames: totalFrames,
frameMode: currentFrameType,
qY: qualityY,
qCo: qualityCo,
qCg: qualityCg,
akku: akku2,
fileName: fullFilePathStr,
fileOrd: 1,
resolution: `${width}x${height}${(isInterlaced) ? 'i' : ''}`,
colourSpace: colorSpace,
currentStatus: 1
}
gui.printBottomBar(guiStatus)
gui.printTopBar(guiStatus, 1)
}
t1 = t2
}
}
catch (e) {
printerrln(`TEV ${colorSpace} decode error: ${e}`)
serial.printerr(`TEV ${colorSpace} decode error: ${e}`)
errorlevel = 1
}
finally {
@@ -781,7 +906,10 @@ finally {
if (interactive) {
//con.clear()
}
// set colour zero as opaque black
}
graphics.setPalette(0, 0, 0, 0, 0)
con.move(cy, cx) // restore cursor
return errorlevel

View File

@@ -0,0 +1,358 @@
// TSVM Universal Cue Format (UCF) Player
// Created by CuriousTorvald and Claude on 2025-09-22
// Usage: playucf cuefile.ucf [options]
// Options: -i (interactive mode)
if (!exec_args[1]) {
serial.println("Usage: playucf cuefile.ucf [options]")
serial.println("Options: -i (interactive mode)")
return 1
}
const interactive = exec_args[2] && exec_args[2].toLowerCase() == "-i"
const fullFilePath = _G.shell.resolvePathInput(exec_args[1])
if (!files.open(fullFilePath.full).exists) {
serial.println(`Error: File not found: ${fullFilePath.full}`)
return 2
}
// UCF Format constants
const UCF_MAGIC = [0x1F, 0x54, 0x53, 0x56, 0x4D, 0x55, 0x43, 0x46] // "\x1FTSVM UCF"
const UCF_VERSION = 1
const ADDRESSING_EXTERNAL = 0x01
const ADDRESSING_INTERNAL = 0x02
// Media player mappings based on file extensions
const PLAYER_MAP = {
'mp2': 'playmp2',
'wav': 'playwav',
'pcm': 'playpcm',
'mv1': 'playmv1',
'mv2': 'playtev',
'mv3': 'playtav'
}
// Helper class for UCF file reading with internal addressing support
class UCFSequentialReader {
constructor(path, baseOffset = 0) {
this.path = path
this.baseOffset = baseOffset
this.currentOffset = 0
// Detect if this is a TAPE device path
if (path.startsWith("$:/TAPE") || path.startsWith("$:\\TAPE")) {
this.seq = require("seqreadtape")
} else {
this.seq = require("seqread")
}
this.seq.prepare(path)
// Skip to the base offset for internal addressing
if (baseOffset > 0) {
this.seq.skip(baseOffset)
this.currentOffset = baseOffset
}
}
readBytes(length) {
this.currentOffset += length
return this.seq.readBytes(length)
}
readOneByte() {
this.currentOffset += 1
return this.seq.readOneByte()
}
readShort() {
this.currentOffset += 2
return this.seq.readShort()
}
readString(length) {
this.currentOffset += length
return this.seq.readString(length)
}
skip(n) {
this.currentOffset += n
this.seq.skip(n)
}
// Skip to absolute position from base offset
seekTo(position) {
let targetOffset = this.baseOffset + position
if (targetOffset < this.currentOffset) {
// Need to rewind and seek forward
this.seq.prepare(this.path)
this.currentOffset = 0
if (targetOffset > 0) {
this.seq.skip(targetOffset)
this.currentOffset = targetOffset
}
} else if (targetOffset > this.currentOffset) {
// Skip forward
let skipAmount = targetOffset - this.currentOffset
this.seq.skip(skipAmount)
this.currentOffset = targetOffset
}
}
getPosition() {
return this.currentOffset - this.baseOffset
}
}
// Parse UCF file
serial.println(`Playing UCF: ${fullFilePath.full}`)
let reader = new UCFSequentialReader(fullFilePath.full)
// Read and validate magic
let magic = []
for (let i = 0; i < 8; i++) {
magic.push(reader.readOneByte())
}
let magicValid = true
for (let i = 0; i < 8; i++) {
if (magic[i] !== UCF_MAGIC[i]) {
magicValid = false
break
}
}
if (!magicValid) {
serial.println("Error: Invalid UCF magic signature")
return 3
}
// Read header
let version = reader.readOneByte()
if (version !== UCF_VERSION) {
serial.println(`Error: Unsupported UCF version: ${version} (expected ${UCF_VERSION})`)
return 4
}
let numElements = reader.readShort()
// Skip reserved bytes (5 bytes)
reader.skip(5)
serial.println(`UCF Version: ${version}, Elements: ${numElements}`)
// Parse cue elements
let cueElements = []
for (let i = 0; i < numElements; i++) {
let element = {}
element.addressingModeAndIntent = reader.readOneByte()
element.addressingMode = element.addressingModeAndIntent & 15
let nameLength = reader.readShort()
element.name = reader.readString(nameLength)
if (element.addressingMode === ADDRESSING_EXTERNAL) {
let pathLength = reader.readShort()
element.path = reader.readString(pathLength)
serial.println(`Element ${i + 1}: ${element.name} -> ${element.path} (external)`)
} else if (element.addressingMode === ADDRESSING_INTERNAL) {
// Read 48-bit offset (6 bytes, little endian)
let offsetBytes = []
for (let j = 0; j < 6; j++) {
offsetBytes.push(reader.readOneByte())
}
element.offset = 0
for (let j = 0; j < 6; j++) {
element.offset |= (offsetBytes[j] << (j * 8))
}
serial.println(`Element ${i + 1}: ${element.name} -> offset ${element.offset} (internal)`)
} else {
serial.println(`Error: Unknown addressing mode: ${element.addressingMode}`)
return 5
}
cueElements.push(element)
}
// Function to get file extension
function getFileExtension(filename) {
let lastDot = filename.lastIndexOf('.')
if (lastDot === -1) return ''
return filename.substring(lastDot + 1).toLowerCase()
}
// Function to determine player for a file
function getPlayerForFile(filename) {
let ext = getFileExtension(filename)
return PLAYER_MAP[ext] || null
}
// Function to create a temporary file for internal addressing
function createTempFileForInternal(element, ucfPath) {
// Create a unique temporary filename
let tempFilename = `$:\\TMP\\temp_ucf_${Date.now()}_${element.name.replace(/[^a-zA-Z0-9]/g, '_')}`
// For internal addressing, we abuse seqread by creating a "virtual" file view
// We'll return a special path that our modified exec environment can handle
return {
isTemporary: true,
path: tempFilename,
ucfPath: ucfPath,
offset: element.offset,
name: element.name
}
}
// Play each cue element in sequence
for (let i = 0; i < cueElements.length; i++) {
let element = cueElements[i]
serial.println(`\nPlaying element ${i + 1}/${numElements}: ${element.name}`)
if (interactive && i > 0) {
serial.print("Press ENTER to continue, 'q' to quit: ")
let input = serial.readLine()
if (input && input.toLowerCase().startsWith('q')) {
serial.println("Playback stopped by user")
break
}
}
let playerFile = null
let targetPath = null
if (element.addressingMode === ADDRESSING_EXTERNAL) {
// External addressing - resolve relative path
let elementPath = element.path
if (!elementPath.startsWith('A:\\') && !elementPath.startsWith('A:/')) {
// Relative path - resolve relative to UCF file location
let ucfDir = fullFilePath.full.substring(0, fullFilePath.full.lastIndexOf('\\'))
targetPath = ucfDir + '\\' + elementPath.replace(/\//g, '\\')
} else {
targetPath = elementPath
}
if (!files.open(targetPath).exists) {
serial.println(`Warning: External file not found: ${targetPath}`)
continue
}
playerFile = getPlayerForFile(element.name)
} else if (element.addressingMode === ADDRESSING_INTERNAL) {
// Internal addressing - create temporary file reference
let tempFile = createTempFileForInternal(element, fullFilePath.full)
targetPath = tempFile.path
playerFile = getPlayerForFile(element.name)
// For internal addressing, we need to extract the data to a temporary location
// or use a specialized player that can handle offset-based reading
// Since we can't easily create temp files, we'll modify the exec_args for the player
// Create a new UCF reader positioned at the file offset
let fileReader = new UCFSequentialReader(fullFilePath.full, element.offset)
// We need to somehow pass this to the player...
// The most elegant solution is to create a wrapper that temporarily modifies
// the file system view or uses a custom SequentialFileBuffer
// For now, let's use a simpler approach: save exec_args and restore them
let originalExecArgs = [...exec_args]
// Modify the global environment to provide the offset reader
let originalFilesOpen = files.open
files.open = function(path) {
if (path === targetPath || path.endsWith(targetPath)) {
// Return a mock file object that uses our offset reader
return {
exists: true,
size: 2147483648, // Arbitrary large size
path: path,
_ucfReader: fileReader
}
}
return originalFilesOpen.call(this, path)
}
// Also modify seqread require to use our reader
let originalRequire = require
require = function(moduleName) {
if (moduleName === "seqread" || moduleName === "seqreadtape") {
return {
prepare: function(path) {
if (path === targetPath || path.endsWith(targetPath)) {
// Already prepared in fileReader
return 0
}
return fileReader.seq.prepare(path)
},
readBytes: function(length, ptr) { return fileReader.readBytes(length, ptr) },
readOneByte: function() { return fileReader.readOneByte() },
readShort: function() { return fileReader.readShort() },
readInt: function() { return fileReader.seq.readInt() },
readFourCC: function() { return fileReader.seq.readFourCC() },
readString: function(length) { return fileReader.readString(length) },
skip: function(n) { return fileReader.skip(n) },
getReadCount: function() { return fileReader.getPosition() },
fileHeader: fileReader.seq.fileHeader
}
}
return originalRequire.call(this, moduleName)
}
try {
// Execute the player with modified environment
exec_args[1] = targetPath
if (playerFile) {
let playerPath = `A:\\tvdos\\bin\\${playerFile}.js`
if (files.open(playerPath).exists) {
eval(files.readText(playerPath))
} else {
serial.println(`Warning: Player not found: ${playerFile}`)
}
} else {
serial.println(`Warning: No player found for file type: ${element.name}`)
}
} catch (e) {
serial.println(`Error playing ${element.name}: ${e.message}`)
} finally {
// Restore original environment
files.open = originalFilesOpen
require = originalRequire
exec_args = originalExecArgs
}
continue
}
if (!playerFile) {
serial.println(`Warning: No player found for file type: ${element.name}`)
continue
}
// Execute the appropriate player
let playerPath = `A:\\tvdos\\bin\\${playerFile}.js`
if (!files.open(playerPath).exists) {
serial.println(`Warning: Player script not found: ${playerPath}`)
continue
}
// Save and modify exec_args for the player
let originalExecArgs = [...exec_args]
exec_args[1] = targetPath
try {
eval(files.readText(playerPath))
} catch (e) {
serial.println(`Error playing ${element.name}: ${e.message}`)
} finally {
// Restore original exec_args
exec_args = originalExecArgs
}
}
serial.println("\nUCF playback completed")
return 0

View File

@@ -27,12 +27,14 @@ const COL_HL_EXT = {
"adpcm": 31,
"pcm": 32,
"mp3": 33,
"tad": 33,
"mp2": 34,
"mov": 213,
"mv2": 214,
"mv3": 214,
"mv1": 213,
"mv2": 213,
"mv3": 213,
"tav": 213,
"ipf1": 190,
"ipf2": 191,
"ipf2": 190,
"txt": 223,
"md": 223,
"log": 223
@@ -43,9 +45,11 @@ const EXEC_FUNS = {
"adpcm": (f) => _G.shell.execute(`playwav "${f}" -i`),
"mp3": (f) => _G.shell.execute(`playmp3 "${f}" -i`),
"mp2": (f) => _G.shell.execute(`playmp2 "${f}" -i`),
"mov": (f) => _G.shell.execute(`playmov "${f}" -i`),
"mv1": (f) => _G.shell.execute(`playmv1 "${f}" -i`),
"mv2": (f) => _G.shell.execute(`playtev "${f}" -i`),
"mv3": (f) => _G.shell.execute(`playtev "${f}" -i`),
"mv3": (f) => _G.shell.execute(`playtav "${f}" -i`),
"tav": (f) => _G.shell.execute(`playtav "${f}" -i`),
"tad": (f) => _G.shell.execute(`playtad "${f}" -i`),
"pcm": (f) => _G.shell.execute(`playpcm "${f}" -i`),
"ipf1": (f) => _G.shell.execute(`decodeipf "${f}" -i`),
"ipf2": (f) => _G.shell.execute(`decodeipf "${f}" -i`),
@@ -68,6 +72,7 @@ let cursor = [0, 0] // absolute position!
function bytesToReadable(i) {
return ''+ (
(i > 999999999) ? (((i / 10000000)|0)/100 + "G") :
(i > 999999) ? (((i / 10000)|0)/100 + "M") :
(i > 9999) ? (((i / 100)|0)/10 + "K") :
i
@@ -474,6 +479,7 @@ let filenavOninput = (window, event) => {
firstRunLatch = true
con.curs_set(0);clearScr()
refreshFilePanelCache(windowMode)
redraw()
}
}

Binary file not shown.

Binary file not shown.

View File

@@ -1,37 +1,3 @@
let status = 0
let workarea = sys.malloc(1920)
// install LOCHRROM
let hangulRomL = files.open("A:/tvdos/i18n/hang_lo.chr")
if (!hangulRomL.exists) {
printerrln("hang_lo.chr not found")
sys.free(workarea)
return status
}
//dma.comToRam(filesystem._toPorts("A")[0], 0, workarea, 1920)
hangulRomL.pread(workarea, 1920, 0)
for (let i = 0; i < 1920; i++) sys.poke(-1300607 - i, sys.peek(workarea + i))
sys.poke(-1299460, 18)
// install HICHRROM
let hangulRomH = files.open("A:/tvdos/i18n/hang_hi.chr")
if (!hangulRomH.exists) {
printerrln("hang_hi.chr not found")
sys.free(workarea)
sys.poke(-1299460, 20) // clean up the crap
return status
}
//dma.comToRam(filesystem._toPorts("A")[0], 0, workarea, 1920)
hangulRomH.pread(workarea, 1920, 0)
for (let i = 0; i < 1920; i++) sys.poke(-1300607 - i, sys.peek(workarea + i))
sys.poke(-1299460, 19)
sys.free(workarea)
graphics.setHalfrowMode(true)
/*
* A character is defined as one of:
* 1. [I,x] (Initial only)
@@ -100,7 +66,37 @@ i:{ // Cell Indices: [c0,c2]
18:[5,0],
19:[5,11],
20:[0,14]
},f:{ // Cell Indices: [c3,c5]
},fvert:{ // Cell Indices: [c3,c5] for non-horizontal vowels (ㅏ,ㅐ,ㅑ,ㅒ and compound vowels)
// c3,c5:[null,ㄱ,ㄴ,ㄷ,...]
0:[0,0],
1:[0,1],
2:[1,1],
3:[1,7],
4:[0,2],
5:[2,9],
6:[2,14],
7:[0,3],
8:[0,4],
9:[4,1],
10:[4,5],
11:[4,6],
12:[4,7],
13:[4,12],
14:[4,13],
15:[4,14],
16:[0,5],
17:[0,6],
18:[6,7],
19:[0,7],
20:[7,7],
21:[0,8],
22:[0,9],
23:[0,10],
24:[0,11],
25:[0,12],
26:[0,13],
27:[0,14]
},fhorz:{ // Cell Indices: [c3,c5] for horizontal vowels (ㅗ,ㅛ,ㅜ,ㅠ,ㅡ)
// c3,c5:[null,ㄱ,ㄴ,ㄷ,...]
0:[0,0],
1:[1,0],
@@ -151,7 +147,7 @@ function toLineChar(i,p,f) {
let out = []
let ibuf = charmap.i[i]
let pbuf = charmap.p[p]
let fbuf = charmap.f[f]
let fbuf = ([8,12,13,17,18].includes(p)) ? charmap.fhorz[f] : charmap.fvert[f]
let dbl = 2*(ibuf.length == 2) // 0 or 2
/* 0 | 0 */out[0] = ibuf[0]
/* x | 2 */out[2] = ibuf[1]
@@ -189,7 +185,9 @@ let printHangul = (char) => {
if (i % 2 == 0)
con.curs_down()
else
cursReturn()
cursReturn()
//if (graphics.getCursorYX()[1] == 1) con.curs_down();
})
}
@@ -217,17 +215,18 @@ if (unicode.uniprint) {
let f = (c - 0xAC00) % 28
let char = toLineChar(i,p,f)
let w = Math.ceil(char.length / 2.0)|0
if (con.getyx()[1] + w > termw) println()
if (con.getyx()[1] + w > termw) print('\n\n');
printHangul(char)
}
}
])
println("조합한글 커널모듈이 로드되었습니다.")
return 0
}
else {
println("Failed to load Assembly Hangul kernel module: incompatible DOS version")
return 1
unicode.uniprint.unshift([
c => 0x20 == c,
c => {
if (con.getyx()[1] >= termw) print('\n\n');
else print(' ')
}
])
}

View File

@@ -0,0 +1,30 @@
let status = 0
let workarea = sys.malloc(1920)
// install LOCHRROM
let hangulRomL = files.open("A:/tvdos/i18n/hang_lo.chr")
if (!hangulRomL.exists) {
printerrln("hang_lo.chr not found")
sys.free(workarea)
return status
}
hangulRomL.pread(workarea, 1920, 0)
for (let i = 0; i < 1920; i++) sys.poke(-133121 - i, sys.peek(workarea + i))
sys.poke(-1299460, 18)
// install HICHRROM
let hangulRomH = files.open("A:/tvdos/i18n/hang_hi.chr")
if (!hangulRomH.exists) {
printerrln("hang_hi.chr not found")
sys.free(workarea)
sys.poke(-1299460, 20) // clean up the crap
return status
}
hangulRomH.pread(workarea, 1920, 0)
for (let i = 0; i < 1920; i++) sys.poke(-133121 - i, sys.peek(workarea + i))
sys.poke(-1299460, 19)
sys.free(workarea)

View File

@@ -0,0 +1,289 @@
// Common GUI for media player
// Created by CuriousTorvald on 2025-09-30.
// Subtitle display functions
function clearSubtitleArea() {
// Clear the subtitle area at the bottom of the screen
// Text mode is 80x32, so clear the bottom few lines
let oldFgColour = con.get_color_fore()
let oldBgColour = con.get_color_back()
con.color_pair(255, 255) // transparent to clear
// Clear bottom 4 lines for subtitles
for (let row = 28; row <= 31; row++) {
con.move(row, 1)
for (let col = 1; col <= 80; col++) {
print(" ")
}
}
con.color_pair(oldFgColour, oldBgColour)
}
function getVisualLength(line) {
// Remove HTML tags and count the remaining text using unicode.strlen()
const withoutTags = line.replace(/<\/?[bi]>/gi, '')
return unicode.visualStrlen(withoutTags)
}
function displayFormattedLine(line, useUnicode) {
// Parse line and handle <b> and <i> tags with colour changes
// Default subtitle colour: yellow (231), formatted text: white (254)
let i = 0
let inBoldOrItalic = false
let buffer = "" // Accumulate characters for batch printing
// Helper function to flush the buffer
function flushBuffer() {
if (buffer.length > 0) {
useUnicode ? unicode.print(buffer) : print(buffer)
buffer = ""
}
}
// insert initial padding block
con.color_pair(0, 255)
con.prnch(0xDE)
con.color_pair(231, 0)
while (i < line.length) {
if (i < line.length - 2 && line[i] === '<') {
// Check for opening tags
if (line.substring(i, i + 3).toLowerCase() === '<b>' ||
line.substring(i, i + 3).toLowerCase() === '<i>') {
flushBuffer() // Flush before color change
con.color_pair(254, 0) // Switch to white for formatted text
inBoldOrItalic = true
i += 3
} else if (i < line.length - 3 &&
(line.substring(i, i + 4).toLowerCase() === '</b>' ||
line.substring(i, i + 4).toLowerCase() === '</i>')) {
flushBuffer() // Flush before color change
con.color_pair(231, 0) // Switch back to yellow for normal text
inBoldOrItalic = false
i += 4
} else {
// Not a formatting tag, add to buffer
buffer += line[i]
i++
}
} else {
// Regular character, add to buffer
buffer += line[i]
i++
}
}
// Flush any remaining buffered text
flushBuffer()
// insert final padding block
con.color_pair(0, 255)
con.prnch(0xDD)
con.color_pair(231, 0)
}
function displaySubtitle(text, useUnicode = false, position = 0) {
if (!text || text.length === 0) {
clearSubtitleArea()
return
}
// Set subtitle colours: yellow (231) on black (0)
let oldFgColour = con.get_color_fore()
let oldBgColour = con.get_color_back()
con.color_pair(231, 0)
// Split text into lines
let lines = text.split('\n')
// Calculate position based on subtitle position setting
let startRow, startCol
// Calculate visual length without formatting tags for positioning
let longestLineLength = lines.map(s => getVisualLength(s)).sort().last()
switch (position) {
case 2: // center left
case 6: // center right
case 8: // dead center
startRow = 16 - Math.floor(lines.length / 2)
break
case 3: // top left
case 4: // top center
case 5: // top right
startRow = 2
break
case 0: // bottom center
case 1: // bottom left
case 7: // bottom right
default:
startRow = 31 - lines.length
startRow = 31 - lines.length
startRow = 31 - lines.length // Default to bottom center
}
// Display each line
for (let i = 0; i < lines.length; i++) {
let line = lines[i].trim()
if (line.length === 0) continue
let row = startRow + i
if (row < 1) row = 1
if (row > 32) row = 32
// Calculate column based on alignment
switch (position) {
case 1: // bottom left
case 2: // center left
case 3: // top left
startCol = 1
break
case 5: // top right
case 6: // center right
case 7: // bottom right
startCol = Math.max(1, 78 - getVisualLength(line) - 2)
break
case 0: // bottom center
case 4: // top center
case 8: // dead center
default:
startCol = Math.max(1, Math.floor((80 - longestLineLength - 2) / 2) + 1)
break
}
con.move(row, startCol)
// Parse and display line with formatting tag support
displayFormattedLine(line, useUnicode)
}
con.color_pair(oldFgColour, oldBgColour)
}
function emit(c) {
return "\x84"+c+"u"
}
function formatTime(seconds) {
const hours = Math.floor(seconds / 3600)
const minutes = Math.floor((seconds % 3600) / 60)
const secs = Math.floor(seconds % 60)
return [hours, minutes, secs]
.map(val => val.toString().padStart(2, '0'))
.join(':')
}
function drawProgressBar(progress, width) {
// Clamp progress between 0 and 1
progress = Math.max(0, Math.min(1, progress));
// Calculate position in "half-character" resolution
const position = progress * width * 2;
const charIndex = Math.floor(position / 2);
const isRightHalf = (position % 2) >= 1;
let bar = '';
for (let i = 0; i < width; i++) {
if (i == charIndex) {
bar += isRightHalf ? '\xDE' : '\xDD';
} else {
bar += '\xC4';
}
}
return bar;
}
/*
status = {
videoRate: int,
frameCount: int,
totalFrames: int,
fps: int,
frameMode: String,
qY: int,
qCo: int,
qCg: int,
akku: float,
fileName: String,
fileOrd: int,
currentStatus: int (0: stop/init, 1: play, 2: pause),
resolution: string,
colourSpace: string
}
*/
function printBottomBar(status) {
con.color_pair(253, 0)
con.move(32, 1)
const fullTimeInSec = status.totalFrames / status.fps
const progress = status.frameCount / (status.totalFrames - 1)
const elapsed = progress * fullTimeInSec
const remaining = (1 - progress) * fullTimeInSec
const BAR = '\xB3'
const statIcon = [emit(0xFE), emit(0x10), emit(0x13)]
let sLeft = `${emit(0x1E)}${status.fileOrd}${emit(0x1F)}${BAR}${statIcon[status.currentStatus]} `
let sRate = `${BAR}${(''+((status.videoRate/128)|0)).padStart(6, ' ')}`
let timeElapsed = formatTime(elapsed)
let timeRemaining = formatTime(remaining)
let barWidth = 80 - (sLeft.length - 8 - ((status.currentStatus == 0) ? 1 : 0) + timeElapsed.length + timeRemaining.length + sRate.length) - 2
let bar = drawProgressBar(progress, barWidth)
let s = sLeft + timeElapsed + ' ' + bar + ' ' + timeRemaining + sRate
print(s);con.addch(0x4B)
con.move(1, 1)
}
function printTopBar(status, moreInfo) {
con.color_pair(253, 0)
con.move(1)
const BAR = '\xB3'
if (moreInfo) {
let filename = status.fileName.split("\\").pop()
let sF = `F ${(''+status.frameCount).padStart((''+status.totalFrames).length, ' ')}${status.frameMode}/${status.totalFrames}`
let sQ = `Q${(''+status.qY).padStart(4,' ')},${(''+status.qCo).padStart(2,' ')},${(''+status.qCg).padStart(2,' ')}`
let sFPS = `${(status.frameCount / status.akku).toFixed(2)}f`
let sRes = `${status.resolution}`
let sCol = `${status.colourSpace}`
let sLeft = sF + BAR + sQ + BAR + sFPS + BAR + sRes + BAR + sCol + BAR
let filenameSpace = 80 - sLeft.length
if (filename.length > filenameSpace) {
filename = filename.slice(0, filenameSpace - 1) + '~'
}
let remainingSpc = filenameSpace - status.fileName.length
let sRight = (remainingSpc > 0) ? ' '.repeat(filenameSpace - status.fileName.length + 3) : ''
print(sLeft + filename + sRight)
} else {
let s = status.fileName
if (s.length > 80) {
s = s.slice(0, 79) + '~'
}
let spcs = 80 - s.length
let spcsLeft = (spcs / 2)|0
let spcsRight = spcs - spcsLeft
print(' '.repeat(spcsLeft))
print(s)
print(' '.repeat(spcsRight))
}
con.move(1, 1)
}
exports = {
clearSubtitleArea,
displaySubtitle,
printTopBar,
printBottomBar
}

View File

@@ -155,4 +155,35 @@ function getReadCount() {
return readCount
}
exports = {fileHeader, prepare, readBytes, readInt, readShort, readFourCC, readOneByte, readString, skip, getReadCount}
function rewind() {
// Send REWIND command to reset stream position
com.sendMessage(port, "REWIND")
let statusCode = com.getStatusCode(port)
if (statusCode != 0) {
throw Error("REWIND failed with "+statusCode)
}
readCount = 0
}
function seek(position) {
if (position < 0) {
throw Error("seek: position must be non-negative")
}
let relPos = position - readCount
if (relPos == 0) {
return // Already at target position
} else if (relPos < 0) {
// Seeking backward - must rewind and skip forward
rewind()
if (position > 0) {
skip(position)
}
} else {
// Seeking forward - skip the difference
skip(relPos)
}
}
exports = {fileHeader, prepare, readBytes, readInt, readShort, readFourCC, readOneByte, readString, skip, getReadCount, seek, rewind}

View File

@@ -203,7 +203,7 @@ function skip(n0) {
let n = n0
while (n > 0) {
let skiplen = Math.min(n, 16777215)
serial.println(`skip ${skiplen}; remaining: ${n}`)
// serial.println(`skip ${skiplen}; remaining: ${n}`)
hsdpaSkip(skiplen)
n -= skiplen
}
@@ -237,14 +237,23 @@ function isReady() {
}
function seek(position) {
if (position < 0) {
throw Error("seek: position must be non-negative")
}
let relPos = position - readCount
if (position == 0) {
return
} else if (position > 0) {
skip(relPos)
if (relPos == 0) {
return // Already at target position
} else if (relPos < 0) {
// Seeking backward - must rewind and skip forward
hsdpaRewind() // This resets readCount to 0
if (position > 0) {
skip(position)
}
} else {
hsdpaRewind()
skip(position)
// Seeking forward - skip the difference
skip(relPos)
}
}

View File

@@ -0,0 +1,721 @@
// TAV Packet Inspector - JavaScript port for TSVM
// Ported from tav_inspector.c by CuriousTorvald and Claude
// Usage: tav_inspector <input.tav> <output.txt> [options]
const seqread = require('seqread')
// Frame mode constants
const FRAME_MODE_SKIP = 0x00
const FRAME_MODE_INTRA = 0x01
const FRAME_MODE_DELTA = 0x02
// Packet type constants
const TAV_PACKET_IFRAME = 0x10
const TAV_PACKET_PFRAME = 0x11
const TAV_PACKET_GOP_UNIFIED = 0x12
const TAV_PACKET_GOP_UNIFIED_MOTION = 0x13
const TAV_PACKET_PFRAME_RESIDUAL = 0x14
const TAV_PACKET_BFRAME_RESIDUAL = 0x15
const TAV_PACKET_PFRAME_ADAPTIVE = 0x16
const TAV_PACKET_BFRAME_ADAPTIVE = 0x17
const TAV_PACKET_AUDIO_MP2 = 0x20
const TAV_PACKET_AUDIO_PCM8 = 0x21
const TAV_PACKET_AUDIO_TAD = 0x24
const TAV_PACKET_SUBTITLE = 0x30
const TAV_PACKET_SUBTITLE_TC = 0x31
const TAV_PACKET_VIDEOTEX = 0x3F
const TAV_PACKET_AUDIO_TRACK = 0x40
const TAV_PACKET_VIDEO_CH2_I = 0x70
const TAV_PACKET_VIDEO_CH2_P = 0x71
const TAV_PACKET_VIDEO_CH3_I = 0x72
const TAV_PACKET_VIDEO_CH3_P = 0x73
const TAV_PACKET_VIDEO_CH4_I = 0x74
const TAV_PACKET_VIDEO_CH4_P = 0x75
const TAV_PACKET_VIDEO_CH5_I = 0x76
const TAV_PACKET_VIDEO_CH5_P = 0x77
const TAV_PACKET_VIDEO_CH6_I = 0x78
const TAV_PACKET_VIDEO_CH6_P = 0x79
const TAV_PACKET_VIDEO_CH7_I = 0x7A
const TAV_PACKET_VIDEO_CH7_P = 0x7B
const TAV_PACKET_VIDEO_CH8_I = 0x7C
const TAV_PACKET_VIDEO_CH8_P = 0x7D
const TAV_PACKET_VIDEO_CH9_I = 0x7E
const TAV_PACKET_VIDEO_CH9_P = 0x7F
const TAV_PACKET_EXIF = 0xE0
const TAV_PACKET_ID3V1 = 0xE1
const TAV_PACKET_ID3V2 = 0xE2
const TAV_PACKET_VORBIS_COMMENT = 0xE3
const TAV_PACKET_CD_TEXT = 0xE4
const TAV_PACKET_EXTENDED_HDR = 0xEF
const TAV_PACKET_LOOP_START = 0xF0
const TAV_PACKET_LOOP_END = 0xF1
const TAV_PACKET_SCREEN_MASK = 0xF2
const TAV_PACKET_GOP_SYNC = 0xFC
const TAV_PACKET_TIMECODE = 0xFD
const TAV_PACKET_SYNC_NTSC = 0xFE
const TAV_PACKET_SYNC = 0xFF
const TAV_PACKET_NOOP = 0x00
const QLUT = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,264,272,280,288,296,304,312,320,328,336,344,352,360,368,376,384,392,400,408,416,424,432,440,448,456,464,472,480,488,496,504,512,528,544,560,576,592,608,624,640,656,672,688,704,720,736,752,768,784,800,816,832,848,864,880,896,912,928,944,960,976,992,1008,1024,1056,1088,1120,1152,1184,1216,1248,1280,1312,1344,1376,1408,1440,1472,1504,1536,1568,1600,1632,1664,1696,1728,1760,1792,1824,1856,1888,1920,1952,1984,2016,2048,2112,2176,2240,2304,2368,2432,2496,2560,2624,2688,2752,2816,2880,2944,3008,3072,3136,3200,3264,3328,3392,3456,3520,3584,3648,3712,3776,3840,3904,3968,4032,4096]
const CLAYOUT = ["Luma-Chroma", "Luma-Chroma-Alpha", "Luma", "Luma-Alpha", "Chroma", "Chroma-Alpha"]
const VERDESC = ["null", "YCoCg tiled, uniform", "ICtCp tiled, uniform", "YCoCg monoblock, uniform", "ICtCp monoblock, uniform", "YCoCg monoblock, perceptual", "ICtCp monoblock, perceptual", "YCoCg tiled, perceptual", "ICtCp tiled, perceptual"]
const TEMPORAL_WAVELET = ["Haar", "CDF 5/3"]
function getPacketTypeName(type) {
switch (type) {
case TAV_PACKET_IFRAME: return "I-FRAME"
case TAV_PACKET_PFRAME: return "P-FRAME"
case TAV_PACKET_GOP_UNIFIED: return "GOP (3D DWT Unified)"
case TAV_PACKET_GOP_UNIFIED_MOTION: return "GOP (3D DWT Unified with Motion Data)"
case TAV_PACKET_PFRAME_RESIDUAL: return "P-FRAME (residual)"
case TAV_PACKET_BFRAME_RESIDUAL: return "B-FRAME (residual)"
case TAV_PACKET_PFRAME_ADAPTIVE: return "P-FRAME (quadtree)"
case TAV_PACKET_BFRAME_ADAPTIVE: return "B-FRAME (quadtree)"
case TAV_PACKET_AUDIO_MP2: return "AUDIO MP2"
case TAV_PACKET_AUDIO_PCM8: return "AUDIO PCM8 (zstd)"
case TAV_PACKET_AUDIO_TAD: return "AUDIO TAD (zstd)"
case TAV_PACKET_SUBTITLE: return "SUBTITLE (SSF frame-locked)"
case TAV_PACKET_SUBTITLE_TC: return "SUBTITLE (SSF-TC timecoded)"
case TAV_PACKET_VIDEOTEX: return "VIDEOTEX (text-mode video)"
case TAV_PACKET_AUDIO_TRACK: return "AUDIO TRACK (Separate MP2)"
case TAV_PACKET_EXIF: return "METADATA (EXIF)"
case TAV_PACKET_ID3V1: return "METADATA (ID3v1)"
case TAV_PACKET_ID3V2: return "METADATA (ID3v2)"
case TAV_PACKET_VORBIS_COMMENT: return "METADATA (Vorbis)"
case TAV_PACKET_CD_TEXT: return "METADATA (CD-Text)"
case TAV_PACKET_EXTENDED_HDR: return "EXTENDED HEADER"
case TAV_PACKET_LOOP_START: return "LOOP START"
case TAV_PACKET_LOOP_END: return "LOOP END"
case TAV_PACKET_SCREEN_MASK: return "SCREEN MASK"
case TAV_PACKET_GOP_SYNC: return "GOP SYNC"
case TAV_PACKET_TIMECODE: return "TIMECODE"
case TAV_PACKET_SYNC_NTSC: return "SYNC (NTSC)"
case TAV_PACKET_SYNC: return "SYNC"
case TAV_PACKET_NOOP: return "NO-OP"
default:
if (type >= 0x70 && type <= 0x7F) {
return "MUX VIDEO"
}
return "UNKNOWN"
}
}
// Read int64 (little-endian)
function readInt64() {
let lo = seqread.readInt() >>> 0
let hi = seqread.readInt() >>> 0
return lo + hi * 4294967296
}
// Read uint24 (little-endian)
function readUint24() {
let b0 = seqread.readOneByte()
let b1 = seqread.readOneByte()
let b2 = seqread.readOneByte()
return b0 | (b1 << 8) | (b2 << 16)
}
// Get frame info from compressed data
function getFrameInfo(compressedSize) {
let info = { mode: -1, quantiser: 0xFF }
if (compressedSize === 0) return info
// Read compressed data into memory
let compressedPtr = sys.malloc(compressedSize)
if (compressedPtr === 0) {
seqread.skip(compressedSize)
return info
}
seqread.readBytes(compressedSize, compressedPtr)
// Decompress (max 2MB buffer)
let decompressedSize = 2 * 1024 * 1024
let decompressedPtr = sys.malloc(decompressedSize)
if (decompressedPtr === 0) {
sys.free(compressedPtr)
return info
}
try {
let actualSize = gzip.decompFromTo(compressedPtr, compressedSize, decompressedPtr)
if (actualSize >= 1) {
info.mode = sys.peek(decompressedPtr) & 0xFF
}
if (info.mode !== FRAME_MODE_SKIP && actualSize >= 2) {
info.quantiser = sys.peek(decompressedPtr + 1) & 0xFF
}
} catch (e) {
// Decompression failed, keep default values
}
sys.free(decompressedPtr)
sys.free(compressedPtr)
return info
}
// Parse extended header
function parseExtendedHeader(output) {
let numPairs = seqread.readShort()
output.push(` - ${numPairs} key-value pairs:\n`)
for (let i = 0; i < numPairs; i++) {
let key = seqread.readFourCC()
let valueType = seqread.readOneByte()
let valueTypeStr = "Unknown"
switch (valueType) {
case 0x00: valueTypeStr = "Int16"; break
case 0x01: valueTypeStr = "Int24"; break
case 0x02: valueTypeStr = "Int32"; break
case 0x03: valueTypeStr = "Int48"; break
case 0x04: valueTypeStr = "Int64"; break
case 0x10: valueTypeStr = "Bytes"; break
}
output.push(` ${key} (type: ${valueTypeStr} (0x${valueType.toString(16).padStart(2,'0')})): `)
if (valueType === 0x04) { // Int64
let value = readInt64()
if (key === "CDAT") {
let timeSec = Math.floor(value / 1000000)
let date = new Date(timeSec * 1000)
output.push(date.toUTCString())
} else {
output.push((value / 1000000000).toFixed(6) + " seconds")
}
} else if (valueType === 0x10) { // Bytes
let length = seqread.readShort()
let data = seqread.readString(length)
output.push(`"${data}"`)
} else {
output.push("Unknown type")
}
if (i < numPairs - 1) {
output.push("\n")
}
}
}
// Parse subtitle packet
function parseSubtitlePacket(size, isTimecoded, output) {
let index = readUint24()
let timecodeNs = 0
let headerSize = 4 // 3 bytes index + 1 byte opcode
if (isTimecoded) {
timecodeNs = readInt64()
headerSize += 8
}
let opcode = seqread.readOneByte()
output.push(` [Index=${index}`)
if (isTimecoded) {
output.push(`, Time=${(timecodeNs / 1000000000).toFixed(3)}s`)
}
output.push(`, Opcode=0x${opcode.toString(16).padStart(2,'0')}`)
switch (opcode) {
case 0x01: output.push(" (SHOW)"); break
case 0x02: output.push(" (HIDE)"); break
case 0x03: output.push(" (MOVE)"); break
case 0x80: output.push(" (UPLOAD LOW FONT)"); break
case 0x81: output.push(" (UPLOAD HIGH FONT)"); break
default:
if (opcode >= 0x10 && opcode <= 0x2F) output.push(" (SHOW LANG)")
else if (opcode >= 0x30 && opcode <= 0x41) output.push(" (REVEAL)")
break
}
output.push("]")
// Read text content for SHOW commands
let remaining = size - headerSize
if ((opcode === 0x01 || (opcode >= 0x10 && opcode <= 0x2F) || (opcode >= 0x30 && opcode <= 0x41)) && remaining > 0) {
let text = seqread.readString(remaining)
// Clean up control characters
text = text.replace(/[\n\r\t]/g, ' ')
output.push(` Text: "${text}"`)
} else {
seqread.skip(remaining)
}
}
// Parse videotex packet
function parseVideotexPacket(size, output) {
let compressedPtr = sys.malloc(size)
if (compressedPtr === 0) {
seqread.skip(size)
output.push(` - size=${size} bytes`)
return
}
seqread.readBytes(size, compressedPtr)
let decompressSize = 8192
let decompressedPtr = sys.malloc(decompressSize)
if (decompressedPtr === 0) {
sys.free(compressedPtr)
output.push(` - size=${size} bytes`)
return
}
try {
let actualSize = gzip.decompFromTo(compressedPtr, size, decompressedPtr)
if (actualSize >= 2) {
let rows = sys.peek(decompressedPtr) & 0xFF
let cols = sys.peek(decompressedPtr + 1) & 0xFF
let ratio = (actualSize / size).toFixed(2)
output.push(` - size=${size} bytes (decompressed: ${actualSize} bytes, grid: ${cols}x${rows}, ratio: ${ratio}:1)`)
} else {
output.push(` - size=${size} bytes (decompression failed)`)
}
} catch (e) {
output.push(` - size=${size} bytes (decompression failed)`)
}
sys.free(decompressedPtr)
sys.free(compressedPtr)
}
// Main function
function main() {
if (exec_args.length < 3) {
println("Usage: tav_inspector <input.tav> <output.txt>")
println(" Analyzes TAV file packets and writes report to output file")
return 1
}
let inputPath = _G.shell.resolvePathInput(exec_args[1]).full
let outputPath = _G.shell.resolvePathInput(exec_args[2]).full
const FILE_LENGTH = files.open(inputPath).size
// Prepare sequential reader
try {
seqread.prepare(inputPath)
} catch (e) {
println(`Error: Cannot open file ${inputPath}`)
println(e.toString())
return 1
}
let output = []
// Read and verify TAV header (32 bytes)
let magic = seqread.readString(8)
let expectedMagic = "\x1FTSVMTAV"
if (magic !== expectedMagic) {
println("Error: Invalid TAV magic number")
return 1
}
// Parse header fields
let version = seqread.readOneByte()
let baseVersion = (version > 8) ? (version - 8) : version
let temporalMotionCoder = (version > 8) ? 1 : 0
let width = seqread.readShort()
let height = seqread.readShort()
let fps = seqread.readOneByte()
let totalFrames = seqread.readInt()
let wavelet = seqread.readOneByte()
let decompLevels = seqread.readOneByte()
let quantY = seqread.readOneByte()
let quantCo = seqread.readOneByte()
let quantCg = seqread.readOneByte()
let extraFlags = seqread.readOneByte()
let videoFlags = seqread.readOneByte()
let quality = seqread.readOneByte()
let channelLayout = seqread.readOneByte()
let entropyCoder = seqread.readOneByte()
let encoderPreset = seqread.readOneByte()
seqread.skip(3) // Reserved bytes
let waveletNames = ["LGT 5/3", "CDF 9/7", "CDF 13/7", "Reserved", "Reserved",
"Reserved", "Reserved", "Reserved", "Reserved",
"Reserved", "Reserved", "Reserved", "Reserved",
"Reserved", "Reserved", "Reserved", "DD-4"]
// Write header information
output.push("TAV Packet Inspector\n")
output.push(`File: ${inputPath}\n`)
output.push("==================================================\n\n")
output.push("TAV Header:\n")
output.push(` Version: ${version} (base: ${baseVersion} - ${VERDESC[baseVersion]}, temporal: ${TEMPORAL_WAVELET[temporalMotionCoder]})\n`)
output.push(` Resolution: ${width}x${height}\n`)
output.push(` Frame rate: ${fps} fps`)
if (videoFlags & 0x02) output.push(" (NTSC)")
output.push("\n")
output.push(` Total frames: ${totalFrames}\n`)
output.push(` Wavelet: ${wavelet}`)
if (wavelet < 17) output.push(` (${waveletNames[wavelet === 16 ? 16 : wavelet]})`)
if (wavelet === 255) output.push(" (Haar)")
output.push("\n")
output.push(` Decomp levels: ${decompLevels}\n`)
output.push(` Quantisers: Y=${QLUT[quantY]}, Co=${QLUT[quantCo]}, Cg=${QLUT[quantCg]} (Index=${quantY},${quantCo},${quantCg})\n`)
if (quality > 0)
output.push(` Quality: ${quality - 1}\n`)
else
output.push(" Quality: n/a\n")
output.push(` Channel layout: ${CLAYOUT[channelLayout]}\n`)
output.push(` Entropy coder: ${entropyCoder === 0 ? "Twobit-map" : "EZBC"}\n`)
output.push(" Encoder preset: ")
if (encoderPreset === 0) {
output.push("Default\n")
} else {
let presets = []
if (encoderPreset & 0x01) presets.push("Sports")
if (encoderPreset & 0x02) presets.push("Anime")
output.push(presets.join(", ") + "\n")
}
output.push(" Flags:\n")
output.push(` Has audio: ${(extraFlags & 0x01) ? "Yes" : "No"}\n`)
output.push(` Has subtitles: ${(extraFlags & 0x02) ? "Yes" : "No"}\n`)
output.push(` Progressive: ${(videoFlags & 0x01) ? "No (interlaced)" : "Yes"}\n`)
output.push(` Lossless: ${(videoFlags & 0x04) ? "Yes" : "No"}\n`)
if (extraFlags & 0x04) output.push(" Progressive TX: Enabled\n")
if (extraFlags & 0x08) output.push(" ROI encoding: Enabled\n")
output.push("\nPackets:\n")
output.push("==================================================\n")
// Statistics
let stats = {
iframeCount: 0,
pframeCount: 0,
pframeIntraCount: 0,
pframeDeltaCount: 0,
pframeSkipCount: 0,
gopUnifiedCount: 0,
gopUnifiedMotionCount: 0,
gopSyncCount: 0,
totalGopFrames: 0,
audioCount: 0,
audioMp2Count: 0,
audioPcm8Count: 0,
audioTadCount: 0,
audioTrackCount: 0,
subtitleCount: 0,
videotexCount: 0,
timecodeCount: 0,
syncCount: 0,
syncNtscCount: 0,
extendedHeaderCount: 0,
metadataCount: 0,
loopPointCount: 0,
muxVideoCount: 0,
unknownCount: 0,
totalVideoBytes: 0,
totalAudioBytes: 0,
audioMp2Bytes: 0,
audioPcm8Bytes: 0,
audioTadBytes: 0,
audioTrackBytes: 0,
videotexBytes: 0
}
let packetNum = 0
let currentFrame = 0
// Parse packets
try {
while (seqread.getReadCount() < FILE_LENGTH) {
let packetOffset = seqread.getReadCount()
let packetType = seqread.readOneByte()
output.push(`Packet ${packetNum} (offset 0x${packetOffset.toString(16).toUpperCase()}): Type 0x${packetType.toString(16).padStart(2,'0').toUpperCase()} (${getPacketTypeName(packetType)})`)
switch (packetType) {
case TAV_PACKET_EXTENDED_HDR:
stats.extendedHeaderCount++
parseExtendedHeader(output)
break
case TAV_PACKET_TIMECODE:
stats.timecodeCount++
let timecodeNs = readInt64()
let timecodeSec = (timecodeNs / 1000000000).toFixed(6)
output.push(` - ${timecodeSec} seconds (Frame ${currentFrame})`)
break
case TAV_PACKET_GOP_UNIFIED:
case TAV_PACKET_GOP_UNIFIED_MOTION:
let gopSize = seqread.readOneByte()
let size0 = 0
if (packetType === TAV_PACKET_GOP_UNIFIED_MOTION) {
size0 = seqread.readInt()
stats.totalVideoBytes += size0
stats.gopUnifiedMotionCount++
seqread.skip(size0)
}
let size1 = seqread.readInt()
stats.totalVideoBytes += size1
seqread.skip(size1)
stats.totalGopFrames += gopSize
if (packetType === TAV_PACKET_GOP_UNIFIED) {
stats.gopUnifiedCount++
}
let totalSize = size0 + size1
let bytesPerFrame = (totalSize / gopSize).toFixed(2)
output.push(` - GOP size=${gopSize}, data size=${totalSize} bytes (${bytesPerFrame} bytes/frame)`)
break
case TAV_PACKET_GOP_SYNC:
let frameCount = seqread.readOneByte()
stats.gopSyncCount++
currentFrame += frameCount
output.push(` - ${frameCount} frames decoded from GOP block`)
break
case TAV_PACKET_IFRAME:
case TAV_PACKET_PFRAME:
case TAV_PACKET_VIDEO_CH2_I:
case TAV_PACKET_VIDEO_CH2_P:
case TAV_PACKET_VIDEO_CH3_I:
case TAV_PACKET_VIDEO_CH3_P:
case TAV_PACKET_VIDEO_CH4_I:
case TAV_PACKET_VIDEO_CH4_P:
case TAV_PACKET_VIDEO_CH5_I:
case TAV_PACKET_VIDEO_CH5_P:
case TAV_PACKET_VIDEO_CH6_I:
case TAV_PACKET_VIDEO_CH6_P:
case TAV_PACKET_VIDEO_CH7_I:
case TAV_PACKET_VIDEO_CH7_P:
case TAV_PACKET_VIDEO_CH8_I:
case TAV_PACKET_VIDEO_CH8_P:
case TAV_PACKET_VIDEO_CH9_I:
case TAV_PACKET_VIDEO_CH9_P:
let size = seqread.readInt()
stats.totalVideoBytes += size
let frameInfo = getFrameInfo(size)
if (packetType === TAV_PACKET_PFRAME ||
(packetType >= 0x71 && packetType <= 0x7F && (packetType & 1))) {
// P-frame
if (packetType === TAV_PACKET_PFRAME) {
stats.pframeCount++
if (frameInfo.mode === FRAME_MODE_INTRA) stats.pframeIntraCount++
else if (frameInfo.mode === FRAME_MODE_DELTA) stats.pframeDeltaCount++
else if (frameInfo.mode === FRAME_MODE_SKIP) stats.pframeSkipCount++
currentFrame++
} else {
stats.muxVideoCount++
}
} else {
// I-frame
if (packetType === TAV_PACKET_IFRAME) {
stats.iframeCount++
currentFrame++
} else {
stats.muxVideoCount++
}
}
output.push(` - size=${size} bytes`)
if (frameInfo.mode >= 0) {
if (frameInfo.mode === FRAME_MODE_SKIP) output.push(" [SKIP]")
else if (frameInfo.mode === FRAME_MODE_DELTA) output.push(" [DELTA]")
else if (frameInfo.mode === FRAME_MODE_INTRA) output.push(" [INTRA]")
if (frameInfo.mode !== FRAME_MODE_SKIP) {
if (frameInfo.quantiser !== 0xFF) {
output.push(` [Q=${frameInfo.quantiser}]`)
}
}
}
if (packetType >= 0x70 && packetType <= 0x7F) {
let channel = Math.floor((packetType - 0x70) / 2) + 2
output.push(` (Channel ${channel})`)
}
break
case TAV_PACKET_AUDIO_MP2:
stats.audioCount++
stats.audioMp2Count++
let mp2Size = seqread.readInt()
stats.totalAudioBytes += mp2Size
stats.audioMp2Bytes += mp2Size
output.push(` - size=${mp2Size} bytes`)
seqread.skip(mp2Size)
break
case TAV_PACKET_AUDIO_PCM8:
stats.audioCount++
stats.audioPcm8Count++
let pcm8Size = seqread.readInt()
stats.totalAudioBytes += pcm8Size
stats.audioPcm8Bytes += pcm8Size
output.push(` - size=${pcm8Size} bytes (zstd compressed)`)
seqread.skip(pcm8Size)
break
case TAV_PACKET_AUDIO_TAD:
stats.audioCount++
stats.audioTadCount++
let sampleCount0 = seqread.readShort()
let payloadSizePlus7 = seqread.readInt()
let sampleCount = seqread.readShort()
let quantiser = seqread.readOneByte()
let compressedSize = seqread.readInt()
stats.totalAudioBytes += compressedSize
stats.audioTadBytes += compressedSize
output.push(` - samples=${sampleCount}, size=${compressedSize} bytes, quantiser=${quantiser * 2 + 1} steps (index ${quantiser})`)
seqread.skip(compressedSize)
break
case TAV_PACKET_AUDIO_TRACK:
stats.audioCount++
stats.audioTrackCount++
let trackSize = seqread.readInt()
stats.totalAudioBytes += trackSize
stats.audioTrackBytes += trackSize
output.push(` - size=${trackSize} bytes (separate track)`)
seqread.skip(trackSize)
break
case TAV_PACKET_SUBTITLE:
case TAV_PACKET_SUBTITLE_TC:
stats.subtitleCount++
let subSize = seqread.readInt()
output.push(` - size=${subSize} bytes`)
parseSubtitlePacket(subSize, packetType === TAV_PACKET_SUBTITLE_TC, output)
break
case TAV_PACKET_VIDEOTEX:
stats.videotexCount++
let vtSize = seqread.readInt()
stats.videotexBytes += vtSize
parseVideotexPacket(vtSize, output)
break
case TAV_PACKET_EXIF:
case TAV_PACKET_ID3V1:
case TAV_PACKET_ID3V2:
case TAV_PACKET_VORBIS_COMMENT:
case TAV_PACKET_CD_TEXT:
stats.metadataCount++
let metaSize = seqread.readInt()
output.push(` - size=${metaSize} bytes`)
seqread.skip(metaSize)
break
case TAV_PACKET_LOOP_START:
case TAV_PACKET_LOOP_END:
stats.loopPointCount++
output.push(" (no payload)")
break
case TAV_PACKET_SCREEN_MASK:
let frameNumber = seqread.readInt()
let top = seqread.readShort()
let right = seqread.readShort()
let bottom = seqread.readShort()
let left = seqread.readShort()
output.push(` - Frame=${frameNumber} [top=${top}, right=${right}, bottom=${bottom}, left=${left}]`)
break
case TAV_PACKET_SYNC:
stats.syncCount++
break
case TAV_PACKET_SYNC_NTSC:
stats.syncNtscCount++
break
case TAV_PACKET_NOOP:
// Silent no-op
break
default:
stats.unknownCount++
output.push(" (UNKNOWN)")
break
}
output.push("\n")
packetNum++
}
} catch (e) {
output.push(`\nError during packet parsing: ${e}\n`)
}
// Print summary
output.push("\n==================================================\n")
output.push("Summary Statistics:\n")
output.push("==================================================\n")
output.push(`Total packets: ${packetNum}\n`)
output.push("\nVideo:\n")
output.push(` I-frames: ${stats.iframeCount}\n`)
output.push(` P-frames: ${stats.pframeCount}`)
if (stats.pframeCount > 0) {
output.push(` (INTRA: ${stats.pframeIntraCount}, DELTA: ${stats.pframeDeltaCount}, SKIP: ${stats.pframeSkipCount}`)
let knownModes = stats.pframeIntraCount + stats.pframeDeltaCount + stats.pframeSkipCount
if (knownModes < stats.pframeCount) {
output.push(`, Unknown: ${stats.pframeCount - knownModes}`)
}
output.push(")")
}
output.push("\n")
if (stats.gopUnifiedCount + stats.gopUnifiedMotionCount > 0) {
let avgFramesPerGop = (stats.totalGopFrames / (stats.gopUnifiedCount + stats.gopUnifiedMotionCount)).toFixed(1)
output.push(` 3D GOP packets: ${stats.gopUnifiedCount + stats.gopUnifiedMotionCount} (total frames: ${stats.totalGopFrames}, avg ${avgFramesPerGop} frames/GOP)\n`)
output.push(` GOP sync packets: ${stats.gopSyncCount}\n`)
}
output.push(` Mux video: ${stats.muxVideoCount}\n`)
output.push(` Total video bytes: ${stats.totalVideoBytes} (${(stats.totalVideoBytes / 1024 / 1024).toFixed(2)} MB)\n`)
output.push("\nAudio:\n")
output.push(` Total packets: ${stats.audioCount}\n`)
if (stats.audioMp2Count > 0) {
output.push(` MP2: ${stats.audioMp2Count} packets, ${stats.audioMp2Bytes} bytes (${(stats.audioMp2Bytes / 1024 / 1024).toFixed(2)} MB)\n`)
}
if (stats.audioPcm8Count > 0) {
output.push(` PCM8 (zstd): ${stats.audioPcm8Count} packets, ${stats.audioPcm8Bytes} bytes (${(stats.audioPcm8Bytes / 1024 / 1024).toFixed(2)} MB)\n`)
}
if (stats.audioTadCount > 0) {
output.push(` TAD32 (zstd): ${stats.audioTadCount} packets, ${stats.audioTadBytes} bytes (${(stats.audioTadBytes / 1024 / 1024).toFixed(2)} MB)\n`)
}
if (stats.audioTrackCount > 0) {
output.push(` Separate track: ${stats.audioTrackCount} packets, ${stats.audioTrackBytes} bytes (${(stats.audioTrackBytes / 1024 / 1024).toFixed(2)} MB)\n`)
}
output.push(` Total audio bytes: ${stats.totalAudioBytes} (${(stats.totalAudioBytes / 1024 / 1024).toFixed(2)} MB)\n`)
output.push("\nOther:\n")
output.push(` Timecodes: ${stats.timecodeCount}\n`)
output.push(` Subtitles: ${stats.subtitleCount}\n`)
if (stats.videotexCount > 0) {
output.push(` Videotex frames: ${stats.videotexCount} (${stats.videotexBytes} bytes, ${(stats.videotexBytes / 1024 / 1024).toFixed(2)} MB)\n`)
}
output.push(` Extended headers: ${stats.extendedHeaderCount}\n`)
output.push(` Metadata packets: ${stats.metadataCount}\n`)
output.push(` Loop points: ${stats.loopPointCount}\n`)
output.push(` Sync packets: ${stats.syncCount}\n`)
output.push(` NTSC sync packets: ${stats.syncNtscCount}\n`)
output.push(` Unknown packets: ${stats.unknownCount}\n`)
// Write output to file
try {
let outputStr = output.join("")
files.open(outputPath).swrite(outputStr)
println(`Analysis complete. Report written to ${outputPath}`)
return 0
} catch (e) {
println(`Error writing output file: ${e}`)
return 1
}
}
return main()

View File

@@ -0,0 +1,200 @@
import kotlin.math.ceil
object Random {
fun uniformRand(low: Int, high: Int) = (Math.random() * (high + 1)).toInt()
fun triangularRand(low: Float, high: Float): Float {
val a = (Math.random() + Math.random()) / 2.0
return ((high - low) * a + low).toFloat()
}
fun gaussianRand(avg: Float, stddev: Float): Float {
// Box-Muller transform to generate random numbers with standard normal distribution
// This implementation uses the polar form for better efficiency
// We need two uniform random values between 0 and 1
val random = kotlin.random.Random
// Using the polar form of the Box-Muller transformation
var u: Double
var v: Double
var s: Double
do {
// Generate two uniform random numbers between -1 and 1
u = Math.random() * 2 - 1
v = Math.random() * 2 - 1
// Calculate sum of squares
s = u * u + v * v
} while (s >= 1 || s == 0.0)
// Calculate polar transformation
val multiplier = kotlin.math.sqrt(-2.0 * kotlin.math.ln(s) / s)
// Transform to the desired mean and standard deviation
// We only use one of the two generated values here
return (avg + stddev * u * multiplier).toFloat()
}
}
sealed class SeekSimulator {
abstract fun computeSeekTime(currentSector: Int, targetSector: Int): Float
class Tape(
val totalSectors: Int,
val tapeLengthMeters: Float = 200f,
val baseSeekTime: Float = 0.5f, // seconds base inertia
val tapeSpeedMetersPerSec: Float = 2.0f, // normal speed
) : SeekSimulator() {
override fun computeSeekTime(currentSector: Int, targetSector: Int): Float {
val posCurrent = (currentSector.toFloat() / totalSectors) * tapeLengthMeters
val posTarget = (targetSector.toFloat() / totalSectors) * tapeLengthMeters
val distance = kotlin.math.abs(posTarget - posCurrent)
// Inject random tape jitter
val effectiveSpeed = tapeSpeedMetersPerSec * Random.triangularRand(0.9f, 1.1f)
return baseSeekTime + (distance / effectiveSpeed)
}
}
class Disc(
val totalTracks: Int,
val armSeekBaseTime: Float = 0.005f, // fast seek, seconds
val armSeekMultiplier: Float = 0.002f, // slower for bigger jumps
val rotationLatencyAvg: Float = 0.008f, // seconds (half-rotation average)
) : SeekSimulator() {
override fun computeSeekTime(currentSector: Int, targetSector: Int): Float {
val cylCurrent = sectorToTrack(currentSector)
val cylTarget = sectorToTrack(targetSector)
val deltaTracks = kotlin.math.abs(cylTarget - cylCurrent)
val armSeek = armSeekBaseTime + (armSeekMultiplier * kotlin.math.sqrt(deltaTracks.toFloat()))
val rotationLatency = Random.gaussianRand(rotationLatencyAvg, rotationLatencyAvg * 0.2f)
return armSeek + rotationLatency
}
private fun sectorToTrack(sector: Int): Int {
// Simplistic assumption: sector layout maps 1:1 to track at this level
return sector % totalTracks
}
}
class Drum(
val rpm: Float = 3000f
) : SeekSimulator() {
override fun computeSeekTime(currentSector: Int, targetSector: Int): Float {
val degreesPerSector = 360.0f / 10000.0f // Assume 10k sectors per drum circumference
val angleCurrent = currentSector * degreesPerSector
val angleTarget = targetSector * degreesPerSector
val deltaAngle = kotlin.math.abs(angleTarget - angleCurrent) % 360f
val rotationLatencySeconds = (deltaAngle / 360f) * (60f / rpm)
// Add a little mechanical jitter
val jitteredLatency = rotationLatencySeconds * Random.triangularRand(0.95f, 1.05f)
return jitteredLatency
}
}
}
class SeekLatencySampler(
val simulator: SeekSimulator,
val totalSectors: Int,
val sampleCount: Int = 10000
) {
data class Sample(val fromSector: Int, val toSector: Int, val latency: Float)
val samples = mutableListOf<Sample>()
fun runSampling() {
samples.clear()
var lastSector = Random.uniformRand(0, totalSectors - 1)
repeat(sampleCount) {
val nextSector = Random.uniformRand(0, totalSectors - 1)
val latency = simulator.computeSeekTime(lastSector, nextSector)
samples.add(Sample(lastSector, nextSector, latency))
lastSector = nextSector
}
}
fun analyzeAndPrint() {
if (samples.isEmpty()) {
println("No samples generated. Run runSampling() first.")
return
}
val latencies = samples.map { it.latency }
val minLatency = latencies.minOrNull() ?: 0f
val maxLatency = latencies.maxOrNull() ?: 0f
val avgLatency = latencies.average().toFloat()
val stddevLatency = kotlin.math.sqrt(latencies.map { (it - avgLatency).let { diff -> diff * diff } }.average()).toFloat()
println("=== Seek Latency Stats ===")
println("Samples: $sampleCount")
println("Min: ${"%.4f".format(minLatency)} s")
println("Max: ${"%.4f".format(maxLatency)} s")
println("Avg: ${"%.4f".format(avgLatency)} s")
println("Stddev: ${"%.4f".format(stddevLatency)} s")
printSimpleHistogram(latencies)
}
private fun printSimpleHistogram(latencies: List<Float>, bins: Int = 30) {
val min = latencies.minOrNull() ?: return
val max = latencies.maxOrNull() ?: return
val binSize = (max - min) / bins
val histogram = IntArray(bins) { 0 }
latencies.forEach { latency ->
val bin = kotlin.math.min(((latency - min) / binSize).toInt(), bins - 1)
histogram[bin]++
}
println("--- Latency Distribution ---")
histogram.forEachIndexed { index, count ->
val lower = min + binSize * index
val upper = lower + binSize
val bar = "#".repeat(count / (sampleCount / 200)) // Scale bar length
println("${"%.4f".format(lower)} - ${"%.4f".format(upper)} s: $bar")
}
}
}
fun main() {
val tapeSimulator = SeekSimulator.Tape(
totalSectors = 100000,
tapeLengthMeters = 200f,
baseSeekTime = 0.2f,
tapeSpeedMetersPerSec = 5.0f
)
val discSimulator = SeekSimulator.Disc(
totalTracks = 3810,
armSeekBaseTime = 0.005f,
armSeekMultiplier = 0.002f,
rotationLatencyAvg = 0.008f
)
val drumSimulator = SeekSimulator.Drum(
rpm = 3000f
)
listOf(tapeSimulator, discSimulator, drumSimulator).forEach { sim ->
SeekLatencySampler(
simulator = sim,
totalSectors = 100000,
sampleCount = 5000
).also {
it.runSampling()
it.analyzeAndPrint()
}
}
}

View File

@@ -104,6 +104,13 @@ Description: reads one block of file. Any subsequent read attempts will return n
than a single block, rest of the bytes will be filled with zero, and size-of-the-block (see terranmon.txt)
will be set accordingly.
REWIND
Description: resets the read position to the beginning of the file for the currently open file. This allows seeking
backwards in streaming read mode without closing and reopening the file. Only applicable for files opened
with OPENR. If no file is open or the file is not in read mode, returns error status code 135
(NO_FILE_OPENED).
CLOSE
Description: closes any file that is open.

File diff suppressed because it is too large Load Diff

View File

@@ -82,6 +82,7 @@ class AudioJSR223Delegate(private val vm: VM) {
// fun mp2DecodeFrame(mp2: MP2Env.MP2, framePtr: Long?, pcm: Boolean, outL: Long, outR: Long) = getFirstSnd()?.mp2Env?.decodeFrame(mp2, framePtr, pcm, outL, outR)
fun getBaseAddr(): Int? = getFirstSnd()?.let { return it.vm.findPeriSlotNum(it)?.times(-131072)?.minus(1) }
fun getMemAddr(): Int? = getFirstSnd()?.let { return it.vm.findPeriSlotNum(it)?.times(-1048576)?.minus(1) }
fun mp2Init() = getFirstSnd()?.mmio_write(40L, 16)
fun mp2Decode() = getFirstSnd()?.mmio_write(40L, 1)
fun mp2InitThenDecode() = getFirstSnd()?.mmio_write(40L, 17)
@@ -93,6 +94,33 @@ class AudioJSR223Delegate(private val vm: VM) {
}
}
fun tadDecode() {
getFirstSnd()?.mmio_write(42L, 1)
}
fun tadIsBusy() = getFirstSnd()?.mmio_read(44L)?.toInt() == 1
fun tadUploadDecoded(playhead: Int, sampleLength: Int) {
if (sampleLength > 32768) throw Error("Sample size too long: expected <= 32768, got $sampleLength")
getFirstSnd()?.let { snd ->
val ba = ByteArray(sampleLength * 2) // 32768 samples * 2 channels
UnsafeHelper.memcpyRaw(null, snd.tadDecodedBin.ptr, ba, UnsafeHelper.getArrayOffset(ba), sampleLength * 2L)
snd.playheads[playhead].pcmQueue.addLast(ba)
}
}
fun putTadDataByPtr(ptr: Int, length: Int, destOffset: Int) {
getFirstSnd()?.let { snd ->
val vkMult = if (ptr >= 0) 1 else -1
for (k in 0L until length) {
val vk = k * vkMult
snd.tadInputBin[k + destOffset] = vm.peek(ptr + vk)!!
}
}
}
fun getTadData(index: Int) = getFirstSnd()?.tadDecodedBin?.get(index.toLong())
/*

View File

@@ -3,6 +3,7 @@ package net.torvald.tsvm
import com.badlogic.gdx.utils.compression.Lzma
import io.airlift.compress.zstd.ZstdInputStream
import io.airlift.compress.zstd.ZstdOutputStream
import net.torvald.UnsafeHelper
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
@@ -19,30 +20,51 @@ class CompressorDelegate(private val vm: VM) {
*/
fun compFromTo(input: Int, len: Int, output: Int): Int {
val inbytes = ByteArray(len) { vm.peek(input.toLong() + it)!! }
comp(inbytes).let {
it.forEachIndexed { index, byte ->
vm.poke(output.toLong() + index, byte)
val bytes = comp(inbytes)
vm.getDev(output.toLong(), bytes.size.toLong(), true).let {
if (it != null) {
val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong())
}
else {
bytes.forEachIndexed { index, byte ->
vm.poke(output.toLong() + index, byte)
}
}
return it.size
}
return bytes.size
}
fun compTo(str: String, output: Int): Int {
comp(str).let {
it.forEachIndexed { index, byte ->
vm.poke(output.toLong() + index, byte)
val bytes = comp(str)
vm.getDev(output.toLong(), bytes.size.toLong(), true).let {
if (it != null) {
val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong())
}
else {
bytes.forEachIndexed { index, byte ->
vm.poke(output.toLong() + index, byte)
}
}
return it.size
}
return bytes.size
}
fun compTo(ba: ByteArray, output: Int): Int {
comp(ba).let {
it.forEachIndexed { index, byte ->
vm.poke(output.toLong() + index, byte)
val bytes = comp(ba)
vm.getDev(output.toLong(), bytes.size.toLong(), true).let {
if (it != null) {
val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong())
}
else {
bytes.forEachIndexed { index, byte ->
vm.poke(output.toLong() + index, byte)
}
}
return it.size
}
return bytes.size
}
@@ -51,16 +73,32 @@ class CompressorDelegate(private val vm: VM) {
fun decompTo(str: String, pointer: Int): Int {
val bytes = decomp(str)
bytes.forEachIndexed { index, byte ->
vm.poke(pointer.toLong() + index, byte)
vm.getDev(pointer.toLong(), bytes.size.toLong(), true).let {
if (it != null) {
val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, pointer.toLong() - bytes.size, bytes.size.toLong())
}
else {
bytes.forEachIndexed { index, byte ->
vm.poke(pointer.toLong() + index, byte)
}
}
}
return bytes.size
}
fun decompTo(ba: ByteArray, pointer: Int): Int {
val bytes = decomp(ba)
bytes.forEachIndexed { index, byte ->
vm.poke(pointer.toLong() + index, byte)
vm.getDev(pointer.toLong(), bytes.size.toLong(), true).let {
if (it != null) {
val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, pointer.toLong() - bytes.size, bytes.size.toLong())
}
else {
bytes.forEachIndexed { index, byte ->
vm.poke(pointer.toLong() + index, byte)
}
}
}
return bytes.size
}
@@ -70,12 +108,19 @@ class CompressorDelegate(private val vm: VM) {
*/
fun decompFromTo(input: Int, len: Int, output: Int): Int {
val inbytes = ByteArray(len) { vm.peek(input.toLong() + it)!! }
decomp(inbytes).let {
it.forEachIndexed { index, byte ->
vm.poke(output.toLong() + index, byte)
}
return it.size
val bytes = decomp(inbytes)
vm.getDev(output.toLong(), bytes.size.toLong(), true).let {
// if (it != null) {
// val bytesReversed = bytes.reversedArray() // backward addressing: copy over reversed bytes starting from the end of the destination
// UnsafeHelper.memcpyRaw(bytesReversed, UnsafeHelper.getArrayOffset(bytesReversed), null, output.toLong() - bytes.size, bytes.size.toLong())
// }
// else {
bytes.forEachIndexed { index, byte ->
vm.poke(output.toLong() + index, byte)
}
// }
}
return bytes.size
}
companion object {

File diff suppressed because it is too large Load Diff

View File

@@ -438,13 +438,89 @@ class VM(
(memspace as PeriBase).poke(offset, value)
}
fun peek(addr:Long): Byte? {
fun pokeShort(addr: Long, value: Short) {
val value0 = value.toByte()
val value1 = value.toInt().shr(8).toByte()
val (memspace, offset) = translateAddr(addr)
if (memspace == null)
throw ErrorIllegalAccess(this, addr)
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
memspace.set(offset+0, value0)
memspace.set(offset+1, value1)
}
}
else {
(memspace as PeriBase).poke(offset+0, value0)
(memspace as PeriBase).poke(offset+1, value1)
}
}
fun pokeFloat(addr: Long, value: Float) {
val vi = value.toRawBits()
val value0 = vi.toByte()
val value1 = vi.shr(8).toByte()
val value2 = vi.shr(16).toByte()
val value3 = vi.shr(24).toByte()
val (memspace, offset) = translateAddr(addr)
if (memspace == null)
throw ErrorIllegalAccess(this, addr)
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
memspace.set(offset+0, value0)
memspace.set(offset+1, value1)
memspace.set(offset+2, value2)
memspace.set(offset+3, value3)
}
}
else {
(memspace as PeriBase).poke(offset+0, value0)
(memspace as PeriBase).poke(offset+1, value1)
(memspace as PeriBase).poke(offset+2, value2)
(memspace as PeriBase).poke(offset+3, value3)
}
}
fun pokeInt(addr: Long, value: Int) {
val value0 = value.toByte()
val value1 = value.shr(8).toByte()
val value2 = value.shr(16).toByte()
val value3 = value.shr(24).toByte()
val (memspace, offset) = translateAddr(addr)
if (memspace == null)
throw ErrorIllegalAccess(this, addr)
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
memspace.set(offset+0, value0)
memspace.set(offset+1, value1)
memspace.set(offset+2, value2)
memspace.set(offset+3, value3)
}
}
else {
(memspace as PeriBase).poke(offset+0, value0)
(memspace as PeriBase).poke(offset+1, value1)
(memspace as PeriBase).poke(offset+2, value2)
(memspace as PeriBase).poke(offset+3, value3)
}
}
fun peek(addr:Long): Byte {
val (memspace, offset) = translateAddr(addr)
// println("peek $addr -> ${offset}@${memspace?.javaClass?.canonicalName}")
return if (memspace == null)
null
throw NullPointerException()//null
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
@@ -452,7 +528,76 @@ class VM(
memspace.get(offset)
}
else
(memspace as PeriBase).peek(offset)
(memspace as PeriBase).peek(offset)!!
}
fun peekShort(addr: Long): Short {
val (memspace, offset) = translateAddr(addr)
return if (memspace == null)
throw NullPointerException()//null
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
(memspace.get(offset+0).toUint() or
memspace.get(offset+1).toUint().shl(8)).toShort()
}
}
else {
((memspace as PeriBase).peek(offset+0)!!.toUint() or
(memspace as PeriBase).peek(offset+1)!!.toUint().shl(8)).toShort()
}
}
fun peekFloat(addr: Long): Float {
val (memspace, offset) = translateAddr(addr)
return if (memspace == null)
throw NullPointerException()//null
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
Float.fromBits(memspace.get(offset+0).toUint() or
memspace.get(offset+1).toUint().shl(8) or
memspace.get(offset+2).toUint().shl(16) or
memspace.get(offset+3).toUint().shl(24)
)
}
}
else {
Float.fromBits((memspace as PeriBase).peek(offset+0)!!.toUint() or
(memspace as PeriBase).peek(offset+1)!!.toUint().shl(8) or
(memspace as PeriBase).peek(offset+2)!!.toUint().shl(16) or
(memspace as PeriBase).peek(offset+3)!!.toUint().shl(24)
)
}
}
fun peekInt(addr: Long): Int? {
val (memspace, offset) = translateAddr(addr)
return if (memspace == null)
throw NullPointerException()//null
else if (memspace is UnsafePtr) {
if (addr >= memspace.size)
throw ErrorIllegalAccess(this, addr)
else {
(memspace.get(offset+0).toUint() or
memspace.get(offset+1).toUint().shl(8) or
memspace.get(offset+2).toUint().shl(16) or
memspace.get(offset+3).toUint().shl(24)
)
}
}
else {
((memspace as PeriBase).peek(offset+0)!!.toUint() or
(memspace as PeriBase).peek(offset+1)!!.toUint().shl(8) or
(memspace as PeriBase).peek(offset+2)!!.toUint().shl(16) or
(memspace as PeriBase).peek(offset+3)!!.toUint().shl(24)
)
}
}
private fun findEmptySpace(blockSize: Int): Int? {
@@ -522,8 +667,8 @@ class VM(
val fromDev = getDev(from, len, false)
val toDev = getDev(to, len, true)
// println("from = $from, to = $to")
// println("fromDev = $fromDev, toDev = $toDev")
// System.err.println("[VM.memcpy] from = $from, to = $to")
// System.err.println("[VM.memcpy] fromDev = $fromDev, toDev = $toDev")
if (fromDev != null && toDev != null)
UnsafeHelper.memcpy(fromDev, toDev, len)
@@ -580,7 +725,7 @@ class VM(
private fun relPtrInDev(from: Long, len: Long, start: Int, end: Int) =
(from in start..end && (from + len) in start..end)
private fun getDev(from: Long, len: Long, isDest: Boolean): Long? {
internal fun getDev(from: Long, len: Long, isDest: Boolean): Long? {
return if (from >= 0) usermem.ptr + from
// MMIO area
else if (from in -1048576..-1 && (from - len) in -1048577..-1) {
@@ -600,6 +745,7 @@ class VM(
else if (dev is AudioAdapter) {
if (relPtrInDev(fromRel, len, 64, 2367)) dev.mediaDecodedBin.ptr + fromRel - 64
else if (relPtrInDev(fromRel, len, 2368, 4096)) dev.mediaFrameBin.ptr + fromRel - 2368
else if (relPtrInDev(fromRel, len, 65536, 131072)) dev.pcmBin.ptr + fromRel - 65536
else null
}
else if (dev is GraphicsAdapter) {
@@ -625,7 +771,9 @@ class VM(
if (relPtrInDev(fromRel, len, 0, 250879)) dev.framebuffer.ptr + fromRel - 0
else if (relPtrInDev(fromRel, len, 250880, 251903)) dev.unusedArea.ptr + fromRel - 250880
else if (relPtrInDev(fromRel, len, 253950, 261631)) dev.textArea.ptr + fromRel - 253950
else if (relPtrInDev(fromRel, len, 262144, 513023)) dev.framebuffer2?.ptr?.plus(fromRel)?.minus(253950)
else if (relPtrInDev(fromRel, len, 262144, 513023)) dev.framebuffer2?.ptr?.plus(fromRel)?.minus(262144)
else if (relPtrInDev(fromRel, len, 524288, 775167)) dev.framebuffer3?.ptr?.plus(fromRel)?.minus(524288)
else if (relPtrInDev(fromRel, len, 786432, 1037371)) dev.framebuffer4?.ptr?.plus(fromRel)?.minus(786432)
else null
}
else if (dev is RamBank) {

View File

@@ -4,6 +4,7 @@ import net.torvald.UnsafeHelper
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUlong
import net.torvald.tsvm.peripheral.*
import kotlin.math.absoluteValue
/**
* Pass the instance of the class to the ScriptEngine's binding, preferably under the namespace of "vm"
@@ -14,14 +15,22 @@ class VMJSR223Delegate(private val vm: VM) {
(from in start..end && (from + len) in start..end)
private fun getDev(from: Long, len: Long, isDest: Boolean): Long? {
return if (from >= 0) vm.usermem.ptr + from
// System.err.print("getDev(from=$from, len=$len, isDest=$isDest) -> ")
return if (from >= 0) {
// System.err.println("USERMEM offset=$from")
vm.usermem.ptr + from
}
// MMIO area
else if (from in -1048576..-1 && (from - len) in -1048577..-1) {
val fromIndex = (-from-1) / 131072
val fromIndex = ((-from-1) / 131072).absoluteValue
val dev = vm.peripheralTable[fromIndex.toInt()].peripheral ?: return null
val fromRel = (-from-1) % 131072
if (fromRel + len > 131072) return null
// System.err.println("MMIO dev=${dev.typestring}, fromIndex=$fromIndex, fromRel=$fromRel")
return if (dev is IOSpace) {
if (relPtrInDev(fromRel, len, 1024, 2047)) dev.peripheralFast.ptr + fromRel - 1024
else if (relPtrInDev(fromRel, len, 4096, 8191)) (if (isDest) dev.blockTransferTx[0] else dev.blockTransferRx[0]).ptr + fromRel - 4096
@@ -50,6 +59,8 @@ class VMJSR223Delegate(private val vm: VM) {
val fromRel = (-from-1) % 1048576
if (fromRel + len > 1048576) return null
// System.err.println("MEMORY dev=${dev.typestring}, fromIndex=$fromIndex, fromRel=$fromRel")
return if (dev is AudioAdapter) {
if (relPtrInDev(fromRel, len, 0, 114687)) dev.sampleBin.ptr + fromRel - 0
else null
@@ -111,8 +122,8 @@ class VMJSR223Delegate(private val vm: VM) {
val fromDev = getDev(from, len, false)
val toDev = getDev(to, len, true)
// println("from = $from, to = $to")
// println("fromDev = $fromDev, toDev = $toDev")
// System.err.println("[sys.memcpy] from = $from, to = $to")
// System.err.println("[sys.memcpy] fromDev = $fromDev, toDev = $toDev")
if (fromDev != null && toDev != null)
UnsafeHelper.memcpy(fromDev, toDev, len)

View File

@@ -4,6 +4,7 @@ import com.badlogic.gdx.Gdx
import com.badlogic.gdx.backends.lwjgl3.audio.OpenALLwjgl3Audio
import com.badlogic.gdx.utils.GdxRuntimeException
import com.badlogic.gdx.utils.Queue
import io.airlift.compress.zstd.ZstdInputStream
import net.torvald.UnsafeHelper
import net.torvald.UnsafePtr
import net.torvald.terrarum.modulecomputers.virtualcomputer.tvd.toUint
@@ -11,6 +12,9 @@ import net.torvald.tsvm.ThreeFiveMiniUfloat
import net.torvald.tsvm.VM
import net.torvald.tsvm.getHashStr
import net.torvald.tsvm.toInt
import java.io.ByteArrayInputStream
import kotlin.math.pow
import kotlin.math.roundToInt
private class RenderRunnable(val playhead: AudioAdapter.Playhead) : Runnable {
private fun printdbg(msg: Any) {
@@ -125,6 +129,71 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
@Volatile private var mp2Busy = false
// TAD (Terrarum Advanced Audio) decoder buffers
internal val tadInputBin = UnsafeHelper.allocate(65536L, this) // Input: compressed TAD chunk (max 64KB)
internal val tadDecodedBin = UnsafeHelper.allocate(65536L, this) // Output: PCMu8 stereo (32768 samples * 2 channels)
internal var tadQuality = 2 // Quality level used during encoding (0-5)
@Volatile private var tadBusy = false
// TAD decoder constants - Coefficient scalars for each subband (matching C decoder)
// Index 0 = LL band, Index 1-9 = H bands (L9 to L1)
private val TAD32_COEFF_SCALARS = floatArrayOf(
64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f
)
// Base quantiser weight table (10 subbands: LL + 9 H bands)
// CRITICAL: Different weights for Mid (channel 0) and Side (channel 1) channels!
private val BASE_QUANTISER_WEIGHTS = arrayOf(
floatArrayOf( // Mid channel (channel 0)
4.0f, // LL (L9) DC
2.0f, // H (L9) 31.25 hz
1.8f, // H (L8) 62.5 hz
1.6f, // H (L7) 125 hz
1.4f, // H (L6) 250 hz
1.2f, // H (L5) 500 hz
1.0f, // H (L4) 1 khz
1.0f, // H (L3) 2 khz
1.3f, // H (L2) 4 khz
2.0f // H (L1) 8 khz
),
floatArrayOf( // Side channel (channel 1)
6.0f, // LL (L9) DC
5.0f, // H (L9) 31.25 hz
2.6f, // H (L8) 62.5 hz
2.4f, // H (L7) 125 hz
1.8f, // H (L6) 250 hz
1.3f, // H (L5) 500 hz
1.0f, // H (L4) 1 khz
1.0f, // H (L3) 2 khz
1.6f, // H (L2) 4 khz
3.2f // H (L1) 8 khz
)
)
private val LAMBDA_FIXED = 6.0f
// Deadzone marker for stochastic reconstruction (must match encoder)
private val DEADZONE_MARKER_QUANT = (-128).toByte()
// Deadband thresholds (must match encoder)
private val DEADBANDS = arrayOf(
floatArrayOf( // Mid channel
1.0f, 0.3f, 0.3f, 0.3f, 0.3f, 0.2f, 0.2f, 0.05f, 0.05f, 0.05f
),
floatArrayOf( // Side channel
1.0f, 0.3f, 0.3f, 0.3f, 0.3f, 0.2f, 0.2f, 0.05f, 0.05f, 0.05f
)
)
// Dither state for noise shaping (2 channels, 2 history samples each)
private val ditherError = Array(2) { FloatArray(2) }
// De-emphasis filter state (persistent across chunks to prevent discontinuities)
private var deemphPrevXL = 0.0f
private var deemphPrevYL = 0.0f
private var deemphPrevXR = 0.0f
private var deemphPrevYR = 0.0f
private val renderRunnables: Array<RenderRunnable>
private val renderThreads: Array<Thread>
private val writeQueueingRunnables: Array<WriteQueueingRunnable>
@@ -216,7 +285,9 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
in 0..114687 -> sampleBin[addr]
in 114688..131071 -> (adi - 114688).let { instruments[it / 64].getByte(it % 64) }
in 131072..262143 -> (adi - 131072).let { playdata[it / (8*64)][(it / 8) % 64].getByte(it % 8) }
else -> peek(addr % 262144)
in 262144..327679 -> tadInputBin[addr - 262144] // TAD input buffer (65536 bytes)
in 327680..393215 -> tadDecodedBin[addr - 327680] // TAD decoded output (65536 bytes)
else -> peek(addr % 393216)
}
}
@@ -227,6 +298,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
in 0..114687 -> { sampleBin[addr] = byte }
in 114688..131071 -> (adi - 114688).let { instruments[it / 64].setByte(it % 64, bi) }
in 131072..262143 -> (adi - 131072).let { playdata[it / (8*64)][(it / 8) % 64].setByte(it % 8, bi) }
in 262144..327679 -> tadInputBin[addr - 262144] = byte // TAD input buffer
in 327680..393215 -> tadDecodedBin[addr - 327680] = byte // TAD decoded output
}
}
@@ -239,6 +312,9 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
in 30..39 -> playheads[3].read(adi - 30)
40 -> -1
41 -> mp2Busy.toInt().toByte()
42 -> -1 // TAD control (write-only)
43 -> tadQuality.toByte()
44 -> tadBusy.toInt().toByte()
in 64..2367 -> mediaDecodedBin[addr - 64]
in 2368..4095 -> mediaFrameBin[addr - 2368]
in 4096..4097 -> 0
@@ -265,6 +341,14 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
if (bi and 16 != 0) { mp2Context = mp2Env.initialise() }
if (bi and 1 != 0) decodeMp2()
}
42 -> {
// TAD control: bit 0 = decode
if (bi and 1 != 0) decodeTad()
}
43 -> {
// TAD quality (0-5)
tadQuality = bi.coerceIn(0, 5)
}
in 64..2367 -> { mediaDecodedBin[addr - 64] = byte }
in 2368..4095 -> { mediaFrameBin[addr - 2368] = byte }
in 32768..65535 -> { (adi - 32768).let {
@@ -287,6 +371,8 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
pcmBin.destroy()
mediaFrameBin.destroy()
mediaDecodedBin.destroy()
tadInputBin.destroy()
tadDecodedBin.destroy()
}
else {
System.err.println("AudioAdapter already disposed")
@@ -304,6 +390,672 @@ class AudioAdapter(val vm: VM) : PeriBase(VM.PERITYPE_SOUND) {
mp2Env.decodeFrameU8(mp2Context, periMmioBase - 2368, true, periMmioBase - 64)
}
//=============================================================================
// TAD (Terrarum Advanced Audio) Decoder
//=============================================================================
// Laplacian-distributed noise (for stochastic reconstruction)
private fun laplacianNoise(scale: Float): Float {
val u = urand() - 0.5f // [-0.5, 0.5)
val sign = if (u >= 0.0f) 1.0f else -1.0f
var absU = kotlin.math.abs(u)
// Avoid log(0)
if (absU >= 0.49999f) absU = 0.49999f
// Inverse Laplacian CDF with λ = 1/scale
val x = -sign * kotlin.math.ln(1.0f - 2.0f * absU) * scale
return x
}
// Uniform random in [0, 1) - kept for compatibility
private fun frand01(): Float {
return urand()
}
// TPDF (Triangular Probability Density Function) noise in [-1, +1)
private fun tpdf1(): Float {
return frand01() - frand01()
}
// Lambda-based decompanding decoder (inverse of Laplacian CDF-based encoder)
// Converts quantised index back to normalised float in [-1, 1]
private fun lambdaDecompanding(quantVal: Byte, maxIndex: Int): Float {
// Handle zero
if (quantVal == 0.toByte()) {
return 0.0f
}
val sign = if (quantVal < 0) -1 else 1
var absIndex = kotlin.math.abs(quantVal.toInt())
// Clamp to valid range
if (absIndex > maxIndex) absIndex = maxIndex
// Map index back to normalised CDF [0, 1]
val normalisedCdf = absIndex.toFloat() / maxIndex
// Map from [0, 1] back to [0.5, 1.0] (CDF range for positive half)
val cdf = 0.5f + normalisedCdf * 0.5f
// Inverse Laplacian CDF for x >= 0: x = -(1/λ) * ln(2*(1-F))
// For F in [0.5, 1.0]: x = -(1/λ) * ln(2*(1-F))
var absVal = -(1.0f / LAMBDA_FIXED) * kotlin.math.ln(2.0f * (1.0f - cdf))
// Clamp to [0, 1]
absVal = absVal.coerceIn(0.0f, 1.0f)
return sign * absVal
}
private fun signum(x: Float): Float {
return when {
x > 0.0f -> 1.0f
x < 0.0f -> -1.0f
else -> 0.0f
}
}
// Gamma expansion (inverse of gamma compression)
private fun expandGamma(left: FloatArray, right: FloatArray, count: Int) {
for (i in 0 until count) {
// decode(y) = sign(y) * |y|^(1/γ) where γ=0.5
val x = left[i]
val a = kotlin.math.abs(x)
left[i] = signum(x) * a * a
val y = right[i]
val b = kotlin.math.abs(y)
right[i] = signum(y) * b * b
}
}
//=============================================================================
// De-emphasis Filter
//=============================================================================
private fun calculateDeemphasisCoeffs(): Triple<Float, Float, Float> {
// De-emphasis factor
val alpha = 0.5f
val b0 = 1.0f
val b1 = 0.0f // No feedforward delay
val a1 = -alpha // NEGATIVE because equation has minus sign: y = x - a1*prev_y
return Triple(b0, b1, a1)
}
private fun applyDeemphasis(left: FloatArray, right: FloatArray, count: Int) {
val (b0, b1, a1) = calculateDeemphasisCoeffs()
// Left channel - use instance state variables (persistent across chunks)
for (i in 0 until count) {
val x = left[i]
val y = b0 * x + b1 * deemphPrevXL - a1 * deemphPrevYL
left[i] = y
deemphPrevXL = x
deemphPrevYL = y
}
// Right channel - use instance state variables (persistent across chunks)
for (i in 0 until count) {
val x = right[i]
val y = b0 * x + b1 * deemphPrevXR - a1 * deemphPrevYR
right[i] = y
deemphPrevXR = x
deemphPrevYR = y
}
}
// M/S stereo correlation (no dithering - that's now in spectral interpolation)
private fun msCorrelate(mid: FloatArray, side: FloatArray, left: FloatArray, right: FloatArray, sampleCount: Int) {
for (i in 0 until sampleCount) {
// Decode M/S → L/R
val m = mid[i]
val s = side[i]
left[i] = (m + s).coerceIn(-1.0f, 1.0f)
right[i] = (m - s).coerceIn(-1.0f, 1.0f)
}
}
// PCM32f to PCM8 conversion with noise-shaped dithering
private fun pcm32fToPcm8(fleft: FloatArray, fright: FloatArray, sampleCount: Int) {
val b1 = 1.5f // 1st feedback coefficient
val b2 = -0.75f // 2nd feedback coefficient
val scale = 127.5f
val bias = 128
// Reduced dither amplitude to coordinate with coefficient-domain dithering
val ditherScale = 0.2f // Reduced from 0.5
for (i in 0 until sampleCount) {
// --- LEFT channel ---
val feedbackL = b1 * ditherError[0][0] + b2 * ditherError[0][1]
val ditherL = ditherScale * tpdf1() // Reduced TPDF dither
val shapedL = (fleft[i] + feedbackL + ditherL / scale).coerceIn(-1.0f, 1.0f)
val qL = (shapedL * scale).roundToInt().coerceIn(-128, 127)
tadDecodedBin[i * 2L] = (qL + bias).toByte()
val qerrL = shapedL - qL.toFloat() / scale
ditherError[0][1] = ditherError[0][0] // shift history
ditherError[0][0] = qerrL
// --- RIGHT channel ---
val feedbackR = b1 * ditherError[1][0] + b2 * ditherError[1][1]
val ditherR = ditherScale * tpdf1()
val shapedR = (fright[i] + feedbackR + ditherR / scale).coerceIn(-1.0f, 1.0f)
val qR = (shapedR * scale).roundToInt().coerceIn(-128, 127)
tadDecodedBin[i * 2L + 1] = (qR + bias).toByte()
val qerrR = shapedR - qR.toFloat() / scale
ditherError[1][1] = ditherError[1][0]
ditherError[1][0] = qerrR
}
}
//=============================================================================
// Binary Tree EZBC Decoder (1D Variant for TAD)
//=============================================================================
// Bitstream reader for EZBC
private class TadBitstreamReader(private val data: ByteArray) {
private var bytePos = 0
private var bitPos = 0
fun readBit(): Int {
if (bytePos >= data.size) {
println("ERROR: Bitstream underflow")
return 0
}
val bit = ((data[bytePos].toInt() and 0xFF) shr bitPos) and 1
bitPos++
if (bitPos == 8) {
bitPos = 0
bytePos++
}
return bit
}
fun readBits(numBits: Int): Int {
var value = 0
for (i in 0 until numBits) {
value = value or (readBit() shl i)
}
return value
}
fun getBytesConsumed(): Int {
return bytePos + if (bitPos > 0) 1 else 0
}
}
// Block structure for 1D binary tree
private data class TadBlock(val start: Int, val length: Int)
// Queue for block processing
private class TadBlockQueue {
private val blocks = ArrayList<TadBlock>()
fun push(block: TadBlock) {
blocks.add(block)
}
fun get(index: Int): TadBlock = blocks[index]
val size: Int get() = blocks.size
fun clear() {
blocks.clear()
}
}
// Track coefficient state for refinement
private data class TadCoeffState(var significant: Boolean = false, var firstBitplane: Int = 0)
// Check if all coefficients in block have |coeff| < threshold
private fun tadIsZeroBlock(coeffs: ByteArray, block: TadBlock, threshold: Int): Boolean {
for (i in block.start until block.start + block.length) {
if (kotlin.math.abs(coeffs[i].toInt()) >= threshold) {
return false
}
}
return true
}
// Get MSB position (bitplane number)
private fun tadGetMsbBitplane(value: Int): Int {
if (value == 0) return 0
var bitplane = 0
var v = value
while (v > 1) {
v = v shr 1
bitplane++
}
return bitplane
}
// Recursively decode a significant block - subdivide until size 1
private fun tadDecodeSignificantBlockRecursive(
bs: TadBitstreamReader,
coeffs: ByteArray,
states: Array<TadCoeffState>,
bitplane: Int,
block: TadBlock,
nextInsignificant: TadBlockQueue,
nextSignificant: TadBlockQueue
) {
// If size 1: read sign bit and reconstruct value
if (block.length == 1) {
val idx = block.start
val signBit = bs.readBit()
// Reconstruct absolute value from bitplane
val absVal = 1 shl bitplane
// Apply sign
coeffs[idx] = (if (signBit != 0) -absVal else absVal).toByte()
states[idx].significant = true
states[idx].firstBitplane = bitplane
nextSignificant.push(block)
return
}
// Block is > 1: subdivide into left and right halves
val mid = block.length / 2.coerceAtLeast(1)
// Process left child
val left = TadBlock(block.start, mid)
val leftSig = bs.readBit()
if (leftSig != 0) {
tadDecodeSignificantBlockRecursive(bs, coeffs, states, bitplane, left, nextInsignificant, nextSignificant)
} else {
nextInsignificant.push(left)
}
// Process right child (if exists)
if (block.length > mid) {
val right = TadBlock(block.start + mid, block.length - mid)
val rightSig = bs.readBit()
if (rightSig != 0) {
tadDecodeSignificantBlockRecursive(bs, coeffs, states, bitplane, right, nextInsignificant, nextSignificant)
} else {
nextInsignificant.push(right)
}
}
}
// Binary tree EZBC decoding for a single channel (1D variant)
private fun tadDecodeChannelEzbc(input: ByteArray, inputSize: Int, coeffs: ByteArray): Int {
val bs = TadBitstreamReader(input)
// Read header: MSB bitplane and length
val msbBitplane = bs.readBits(8)
val count = bs.readBits(16)
// Initialise coefficient array to zero
coeffs.fill(0)
// Track coefficient significance
val states = Array(count) { TadCoeffState() }
// Initialise queues
val insignificantQueue = TadBlockQueue()
val nextInsignificant = TadBlockQueue()
val significantQueue = TadBlockQueue()
val nextSignificant = TadBlockQueue()
// Start with root block as insignificant
val root = TadBlock(0, count)
insignificantQueue.push(root)
// Process bitplanes from MSB to LSB
for (bitplane in msbBitplane downTo 0) {
val threshold = 1 shl bitplane
// Process insignificant blocks
for (i in 0 until insignificantQueue.size) {
val block = insignificantQueue.get(i)
val sig = bs.readBit()
if (sig == 0) {
// Still insignificant
nextInsignificant.push(block)
} else {
// Became significant: recursively decode
tadDecodeSignificantBlockRecursive(
bs, coeffs, states, bitplane, block,
nextInsignificant, nextSignificant
)
}
}
// Refinement pass: read next bit for already-significant coefficients
for (i in 0 until significantQueue.size) {
val block = significantQueue.get(i)
val idx = block.start
val bit = bs.readBit()
// Add this bit to the coefficient's magnitude
if (bit != 0) {
val sign = if (coeffs[idx] < 0) -1 else 1
val absVal = kotlin.math.abs(coeffs[idx].toInt())
coeffs[idx] = (sign * (absVal or (1 shl bitplane))).toByte()
}
// Add to nextSignificant so it continues being refined
nextSignificant.push(block)
}
// Swap queues for next bitplane
insignificantQueue.clear()
for (i in 0 until nextInsignificant.size) {
insignificantQueue.push(nextInsignificant.get(i))
}
nextInsignificant.clear()
significantQueue.clear()
for (i in 0 until nextSignificant.size) {
significantQueue.push(nextSignificant.get(i))
}
nextSignificant.clear()
}
return bs.getBytesConsumed()
}
private fun decodeTad() {
tadBusy = true
try {
// Read chunk header from tadInputBin
var offset = 0L
val sampleCount = (
(tadInputBin[offset++].toUint()) or
((tadInputBin[offset++].toUint()) shl 8)
)
val maxIndex = tadInputBin[offset++].toUint()
val payloadSize = (
(tadInputBin[offset++].toUint()) or
((tadInputBin[offset++].toUint()) shl 8) or
((tadInputBin[offset++].toUint()) shl 16) or
((tadInputBin[offset++].toUint()) shl 24)
)
// Decompress payload
val compressed = ByteArray(payloadSize)
UnsafeHelper.memcpyRaw(null, tadInputBin.ptr + offset, compressed, UnsafeHelper.getArrayOffset(compressed), payloadSize.toLong())
val payload: ByteArray = try {
ZstdInputStream(ByteArrayInputStream(compressed)).use { zstd ->
zstd.readBytes()
}
} catch (e: Exception) {
println("ERROR: Zstd decompression failed: ${e.message}")
return
}
// Decode using binary tree EZBC - FIXED!
val quantMid = ByteArray(sampleCount)
val quantSide = ByteArray(sampleCount)
// Decode Mid channel
val midBytesConsumed = tadDecodeChannelEzbc(
payload,
payload.size,
quantMid
)
// Decode Side channel (starts after Mid channel data)
val sideBytesConsumed = tadDecodeChannelEzbc(
payload.sliceArray(midBytesConsumed until payload.size),
payload.size - midBytesConsumed,
quantSide
)
// Calculate DWT levels from sample count
val dwtLevels = calculateDwtLevels(sampleCount)
// Dequantise to Float32
val dwtMid = FloatArray(sampleCount)
val dwtSide = FloatArray(sampleCount)
dequantiseDwtCoefficients(0, quantMid, dwtMid, sampleCount, maxIndex, dwtLevels)
dequantiseDwtCoefficients(1, quantSide, dwtSide, sampleCount, maxIndex, dwtLevels)
// Inverse DWT using CDF 9/7 wavelet (produces Float32 samples in range [-1.0, 1.0])
dwt97InverseMultilevel(dwtMid, sampleCount, dwtLevels)
dwt97InverseMultilevel(dwtSide, sampleCount, dwtLevels)
// M/S to L/R correlation
val pcm32Left = FloatArray(sampleCount)
val pcm32Right = FloatArray(sampleCount)
msCorrelate(dwtMid, dwtSide, pcm32Left, pcm32Right, sampleCount)
// Expand dynamic range (gamma expansion)
expandGamma(pcm32Left, pcm32Right, sampleCount)
// expandMuLaw(pcm32Left, pcm32Right, sampleCount)
// Apply de-emphasis filter (AFTER gamma expansion, BEFORE PCM32f to PCM8)
applyDeemphasis(pcm32Left, pcm32Right, sampleCount)
// Dither to 8-bit PCMu8
pcm32fToPcm8(pcm32Left, pcm32Right, sampleCount)
} catch (e: Exception) {
e.printStackTrace()
} finally {
tadBusy = false
}
}
private fun calculateDwtLevels(chunkSize: Int): Int {
// Hard-coded to 9 levels to match C decoder
return 9
}
// Compute RMS energy of a coefficient band
private fun computeBandRms(c: FloatArray, start: Int, len: Int): Float {
if (len == 0) return 0.0f
var sumsq = 0.0
for (i in 0 until len) {
val v = c[start + i].toDouble()
sumsq += v * v
}
return kotlin.math.sqrt((sumsq / len)).toFloat()
}
// Fast PRNG for light dithering (xorshift32)
private var xorshift32State = 0x9E3779B9u
private fun xorshift32(): UInt {
var x = xorshift32State
x = x xor (x shl 13)
x = x xor (x shr 17)
x = x xor (x shl 5)
xorshift32State = x
return x
}
private fun urand(): Float {
return (xorshift32() and 0xFFFFFFu).toFloat() / 16777216.0f
}
private fun tpdf(): Float {
return urand() - urand()
}
// Simplified spectral reconstruction for wavelet coefficients
// Conservative approach: only add light dither to reduce quantisation grain
private fun spectralInterpolateBand(c: FloatArray, start: Int, len: Int, Q: Float, lowerBandRms: Float) {
if (len < 4) return
xorshift32State = 0x9E3779B9u xor len.toUInt() xor (Q * 65536.0f).toUInt()
val ditherAmp = 0.05f * Q // Very light dither (~-60 dBFS)
// Just add ultra-light TPDF dither to reduce quantisation grain
for (i in 0 until len) {
c[start + i] += tpdf() * ditherAmp
}
}
private fun dequantiseDwtCoefficients(channel: Int, quantised: ByteArray, coeffs: FloatArray, count: Int,
maxIndex: Int, dwtLevels: Int) {
// Calculate sideband boundaries dynamically
val firstBandSize = count shr dwtLevels
val sidebandStarts = IntArray(dwtLevels + 2)
sidebandStarts[0] = 0
sidebandStarts[1] = firstBandSize
for (i in 2..dwtLevels + 1) {
sidebandStarts[i] = sidebandStarts[i - 1] + (firstBandSize shl (i - 2))
}
// Dequantise all coefficients with stochastic reconstruction for deadzoned values
val quantiserScale = 1.0f
for (i in 0 until count) {
var sideband = dwtLevels
for (s in 0..dwtLevels) {
if (i < sidebandStarts[s + 1]) {
sideband = s
break
}
}
// Check for deadzone marker
/*if (quantised[i] == DEADZONE_MARKER_QUANT) {
// Stochastic reconstruction: generate Laplacian noise in deadband range
val deadbandThreshold = DEADBANDS[channel][sideband]
// Generate Laplacian-distributed noise scaled to deadband width
// Use scale = threshold/3 to keep ~99% of samples within [-threshold, +threshold]
var noise = laplacianNoise(deadbandThreshold / 3.0f)
// Clamp to deadband range
if (noise > deadbandThreshold) noise = deadbandThreshold
if (noise < -deadbandThreshold) noise = -deadbandThreshold
// Apply scalar (but not quantiser weight - noise is already in correct range)
coeffs[i] = noise * TAD32_COEFF_SCALARS[sideband]
} else {*/
// Normal dequantisation using lambda decompanding
val normalisedVal = lambdaDecompanding(quantised[i], maxIndex)
// Denormalise using the subband scalar and apply base weight + quantiser scaling
// CRITICAL: Use channel-specific weights (Mid=0, Side=1)
val weight = BASE_QUANTISER_WEIGHTS[channel][sideband] * quantiserScale
coeffs[i] = normalisedVal * TAD32_COEFF_SCALARS[sideband] * weight
// }
}
// Note: Stochastic reconstruction replaces the old spectral interpolation step
// No need for additional processing - deadzoned coefficients already have appropriate noise
}
// 9/7 inverse DWT (CDF 9/7 wavelet - matches C implementation)
private fun dwt97Inverse1d(data: FloatArray, length: Int) {
if (length < 2) return
val temp = FloatArray(length)
val half = (length + 1) / 2
// Split into low and high frequency components (matching TSVM layout)
for (i in 0 until half) {
temp[i] = data[i] // Low-pass coefficients (first half)
}
for (i in 0 until length / 2) {
if (half + i < length) {
temp[half + i] = data[half + i] // High-pass coefficients (second half)
}
}
// 9/7 inverse lifting coefficients from TSVM
val alpha = -1.586134342f
val beta = -0.052980118f
val gamma = 0.882911076f
val delta = 0.443506852f
val K = 1.230174105f
// Step 1: Undo scaling
for (i in 0 until half) {
temp[i] /= K // Low-pass coefficients
}
for (i in 0 until length / 2) {
if (half + i < length) {
temp[half + i] *= K // High-pass coefficients
}
}
// Step 2: Undo δ update
for (i in 0 until half) {
val dCurr = if (half + i < length) temp[half + i] else 0.0f
val dPrev = if (i > 0 && half + i - 1 < length) temp[half + i - 1] else dCurr
temp[i] -= delta * (dCurr + dPrev)
}
// Step 3: Undo γ predict
for (i in 0 until length / 2) {
if (half + i < length) {
val sCurr = temp[i]
val sNext = if (i + 1 < half) temp[i + 1] else sCurr
temp[half + i] -= gamma * (sCurr + sNext)
}
}
// Step 4: Undo β update
for (i in 0 until half) {
val dCurr = if (half + i < length) temp[half + i] else 0.0f
val dPrev = if (i > 0 && half + i - 1 < length) temp[half + i - 1] else dCurr
temp[i] -= beta * (dCurr + dPrev)
}
// Step 5: Undo α predict
for (i in 0 until length / 2) {
if (half + i < length) {
val sCurr = temp[i]
val sNext = if (i + 1 < half) temp[i + 1] else sCurr
temp[half + i] -= alpha * (sCurr + sNext)
}
}
// Reconstruction - interleave low and high pass
for (i in 0 until length) {
if (i % 2 == 0) {
// Even positions: low-pass coefficients
data[i] = temp[i / 2]
} else {
// Odd positions: high-pass coefficients
val idx = i / 2
if (half + idx < length) {
data[i] = temp[half + idx]
} else {
data[i] = 0.0f
}
}
}
}
private fun dwt97InverseMultilevel(data: FloatArray, length: Int, levels: Int) {
// Pre-calculate all intermediate lengths used during forward transform
// Forward uses: data[0..length-1], then data[0..(length+1)/2-1], etc.
val lengths = IntArray(levels + 1)
lengths[0] = length
for (i in 1..levels) {
lengths[i] = (lengths[i - 1] + 1) / 2
}
// Inverse transform: apply inverse DWT using exact forward lengths in reverse order
// Forward applied DWT with lengths: [length, (length+1)/2, ((length+1)/2+1)/2, ...]
// Inverse must use same lengths in reverse: [..., ((length+1)/2+1)/2, (length+1)/2, length]
for (level in levels - 1 downTo 0) {
dwt97Inverse1d(data, lengths[level])
}
}

View File

@@ -109,6 +109,8 @@ abstract class BlockTransferInterface(val isMaster: Boolean, val isSlave: Boolea
* @param byteCount Number of bytes being transmitted
*/
protected fun applyBaudRateDelay(byteCount: Int) {
if (baudRate <= 0) return
// Calculate delay in milliseconds
// Baud rate is bits per second, and we assume 10 bits per byte (8 data bits + start/stop bits)
val bitsTransmitted = byteCount * 10

View File

@@ -50,7 +50,7 @@ data class SuperGraphicsAddonConfig(
val bankCount: Int = 1
)
class ReferenceGraphicsAdapter(assetsRoot: String, vm: VM) : GraphicsAdapter(assetsRoot, vm, GraphicsAdapter.DEFAULT_CONFIG_COLOR_CRT, SuperGraphicsAddonConfig(2))
class ReferenceGraphicsAdapter(assetsRoot: String, vm: VM) : GraphicsAdapter(assetsRoot, vm, GraphicsAdapter.DEFAULT_CONFIG_COLOR_CRT, SuperGraphicsAddonConfig(4))
class ReferenceGraphicsAdapter2(assetsRoot: String, vm: VM) : RemoteGraphicsAdapter(assetsRoot, vm, GraphicsAdapter.DEFAULT_CONFIG_COLOR_CRT, SuperGraphicsAddonConfig(2))
class ReferenceLikeLCD(assetsRoot: String, vm: VM) : GraphicsAdapter(assetsRoot, vm, GraphicsAdapter.DEFAULT_CONFIG_PMLCD)
@@ -76,6 +76,13 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
internal val framebuffer = UnsafeHelper.allocate(WIDTH.toLong() * HEIGHT, this)//Pixmap(WIDTH, HEIGHT, Pixmap.Format.Alpha)
internal val framebuffer2 = if (sgr.bankCount >= 2) UnsafeHelper.allocate(WIDTH.toLong() * HEIGHT, this) else null
internal val framebuffer3 = if (sgr.bankCount >= 3) UnsafeHelper.allocate(WIDTH.toLong() * HEIGHT, this) else null
internal val framebuffer4 = if (sgr.bankCount >= 4) UnsafeHelper.allocate(WIDTH.toLong() * HEIGHT, this) else null
init {
framebuffer4?.fillWith(-1)
}
internal val framebufferOut = Pixmap(WIDTH, HEIGHT, Pixmap.Format.RGBA8888)
protected var rendertex = Texture(1, 1, Pixmap.Format.RGBA8888)
internal val paletteOfFloats = FloatArray(1024) {
@@ -100,6 +107,8 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
internal val unusedArea = UnsafeHelper.allocate(1024, this)
internal val scanlineOffsets = UnsafeHelper.allocate(1024, this)
internal val videoBuffer = UnsafeHelper.allocate(58 * 1024 * 1024, this) // 48 MB for triple-buffering (3 slots × 24 frames × 752 kB)
protected val paletteShader = LoadShader(DRAW_SHADER_VERT, config.paletteShader)
protected val textShader = LoadShader(DRAW_SHADER_VERT, config.fragShader)
@@ -130,7 +139,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
private val outFBOregion = Array(2) { TextureRegion(outFBOs[it].colorBufferTexture) }
private val outFBObatch = SpriteBatch(1000, DefaultGL32Shaders.createSpriteBatchShader())
private var graphicsMode = 0
var graphicsMode = 0
private var layerArrangement = 0
@@ -227,7 +236,19 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
override fun peek(addr: Long): Byte? {
val adi = addr.toInt()
if (framebuffer2 != null && addr >= 262144) {
if (framebuffer4 != null && addr >= 786432) {
return when (addr - 786432) {
in 0 until 250880 -> framebuffer4[addr - 786432]
else -> null
}
}
else if (framebuffer3 != null && addr >= 524288) {
return when (addr - 524288) {
in 0 until 250880 -> framebuffer3[addr - 524288]
else -> null
}
}
else if (framebuffer2 != null && addr >= 262144) {
return when (addr - 262144) {
in 0 until 250880 -> framebuffer2[addr - 262144]
else -> null
@@ -275,6 +296,24 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
override fun poke(addr: Long, byte: Byte) {
val adi = addr.toInt()
val bi = byte.toInt().and(255)
if (framebuffer4 != null) {
when (addr - 786432) {
in 0 until 250880 -> {
lastUsedColour = byte
framebuffer4[addr - 786432] = byte
return
}
}
}
if (framebuffer3 != null) {
when (addr - 524288) {
in 0 until 250880 -> {
lastUsedColour = byte
framebuffer3[addr - 524288] = byte
return
}
}
}
if (framebuffer2 != null) {
when (addr - 262144) {
in 0 until 250880 -> {
@@ -384,6 +423,8 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
private fun runCommand(opcode: Byte) {
val arg1 = unusedArea[4].toInt().and(255)
val arg2 = unusedArea[5].toInt().and(255)
val arg3 = unusedArea[6].toInt().and(255)
val arg4 = unusedArea[7].toInt().and(255)
when (opcode.toInt()) {
1 -> {
@@ -399,6 +440,12 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
4 -> {
framebuffer2?.fillWith(arg1.toByte())
}
6 -> {
framebuffer3?.fillWith(arg1.toByte())
}
8 -> {
framebuffer4?.fillWith(arg1.toByte())
}
3 -> {
for (it in 0 until 1024) {
val rgba = DEFAULT_PALETTE[it / 4]
@@ -407,6 +454,8 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
}
framebuffer.fillWith(arg1.toByte())
framebuffer2?.fillWith(arg2.toByte())
framebuffer3?.fillWith(arg3.toByte())
framebuffer4?.fillWith(arg4.toByte())
}
16, 17 -> readFontRom(opcode - 16)
18, 19 -> writeFontRom(opcode - 18)
@@ -675,7 +724,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
for (line in 0 until ch) {
val word = mappedFontRom[char.toLong() * ch + line].toInt()
for (bm in scanline.indices step 4) {
val pixel = 255 * ((word shr (cw - 1 - bm)) and 1)
val pixel = 255 * ((word shr (cw - 1 - bm/4)) and 1)
val matte = (if (pixel == 0) 0 else 255).toByte()
scanline[bm+0] = matte
scanline[bm+1] = matte
@@ -899,6 +948,8 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
// textShader.tryDispose()
framebuffer.destroy()
framebuffer2?.destroy()
framebuffer3?.destroy()
framebuffer4?.destroy()
framebufferOut.tryDispose()
rendertex.tryDispose()
textArea.destroy()
@@ -915,6 +966,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
chrrom0.tryDispose()
chrrom.tryDispose()
unusedArea.destroy()
videoBuffer.destroy()
scanlineOffsets.destroy()
instArea.destroy()
mappedFontRom.destroy()
@@ -942,7 +994,52 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
chrrom.pixels.position(0)
framebufferOut.setColor(-1);framebufferOut.fill()
if (graphicsMode == 4 && framebuffer2 != null) {
if (graphicsMode == 8 && framebuffer4 != null && framebuffer3 != null && framebuffer2 != null) {
for (y in 0 until HEIGHT) {
var xoff = scanlineOffsets[2L * y].toUint() or scanlineOffsets[2L * y + 1].toUint().shl(8)
if (xoff.and(0x8000) != 0) xoff = xoff or 0xFFFF0000.toInt()
val xs = (0 + xoff).coerceIn(0, WIDTH - 1)..(WIDTH - 1 + xoff).coerceIn(0, WIDTH - 1)
if (xoff in -(WIDTH - 1) until WIDTH) {
for (x in xs) {
val r = framebuffer[y.toLong() * WIDTH + (x - xoff)].toUint() // coerceIn not required as (x - xoff) never escapes 0..559
val g = framebuffer2[y.toLong() * WIDTH + (x - xoff)].toUint() // coerceIn not required as (x - xoff) never escapes 0..559
val b = framebuffer3[y.toLong() * WIDTH + (x - xoff)].toUint() // coerceIn not required as (x - xoff) never escapes 0..559
val a = framebuffer4[y.toLong() * WIDTH + (x - xoff)].toUint() // coerceIn not required as (x - xoff) never escapes 0..559
framebufferOut.setColor(
r.shl(24) or g.shl(16) or b.shl(8) or a
)
framebufferOut.drawPixel(x, y)
}
}
}
}
else if (graphicsMode == 5 && framebuffer2 != null) {
for (y in 0 until HEIGHT) {
var xoff = scanlineOffsets[2L * y].toUint() or scanlineOffsets[2L * y + 1].toUint().shl(8)
if (xoff.and(0x8000) != 0) xoff = xoff or 0xFFFF0000.toInt()
val xs = (0 + xoff).coerceIn(0, WIDTH - 1)..(WIDTH - 1 + xoff).coerceIn(0, WIDTH - 1)
if (xoff in -(WIDTH - 1) until WIDTH) {
for (x in xs) {
val rg = framebuffer[y.toLong() * WIDTH + (x - xoff)].toUint() // coerceIn not required as (x - xoff) never escapes 0..559
val ba = framebuffer2[y.toLong() * WIDTH + (x - xoff)].toUint() // coerceIn not required as (x - xoff) never escapes 0..559
val r = rg.ushr(2).and(31)
val g = rg.and(3).shl(3) or ba.ushr(5)
val b = ba.and(31)
val a = rg.ushr(7) * 255
framebufferOut.setColor(
r.shl(27) or r.ushr(2).shl(24) or
g.shl(19) or g.ushr(2).shl(16) or
b.shl(11) or b.ushr(2).shl(8) or
a
)
framebufferOut.drawPixel(x, y)
}
}
}
}
else if (graphicsMode == 4 && framebuffer2 != null) {
for (y in 0 until HEIGHT) {
var xoff = scanlineOffsets[2L * y].toUint() or scanlineOffsets[2L * y + 1].toUint().shl(8)
if (xoff.and(0x8000) != 0) xoff = xoff or 0xFFFF0000.toInt()
@@ -968,7 +1065,7 @@ open class GraphicsAdapter(private val assetsRoot: String, val vm: VM, val confi
}
}
else if (graphicsMode == 3 && framebuffer2 != null) {
val layerOrder = (if (graphicsMode == 1) LAYERORDERS4 else LAYERORDERS2)[layerArrangement]
val layerOrder = LAYERORDERS2[layerArrangement]
val fb1 = if (layerOrder[0] == 0) framebuffer else framebuffer2
val fb2 = if (layerOrder[0] == 0) framebuffer2 else framebuffer

View File

@@ -78,8 +78,8 @@ open class HSDPA(val vm: VM, val baudRate: Long = 133_333_333L): PeriBase("hsdpa
}
private var opcodeBuf = 0
private var arg1 = 0
private var arg2 = 0
private var arg1 = 0L // Long to support >2GB sequential I/O position accumulation
private var arg2 = 0L // Long to support >2GB memory addressing
/**
* Reads a value from the MMIO register
@@ -167,18 +167,18 @@ open class HSDPA(val vm: VM, val baudRate: Long = 133_333_333L): PeriBase("hsdpa
val byteOffset = (address - REG_SEQ_IO_ARG1)
if (byteOffset == 0) {
// Reset arg1 when writing to LSB
arg1 = value.toUint()
arg1 = value.toUint().toLong()
} else {
arg1 = arg1 or (value.toUint() shl (byteOffset * 8))
arg1 = arg1 or ((value.toUint().toLong()) shl (byteOffset * 8))
}
}
in REG_SEQ_IO_ARG2..REG_SEQ_IO_ARG2+2 -> {
val byteOffset = (address - REG_SEQ_IO_ARG2)
if (byteOffset == 0) {
// Reset arg2 when writing to LSB
arg2 = value.toUint()
arg2 = value.toUint().toLong()
} else {
arg2 = arg2 or (value.toUint() shl (byteOffset * 8))
arg2 = arg2 or ((value.toUint().toLong()) shl (byteOffset * 8))
}
}
else -> null
@@ -390,23 +390,23 @@ open class HSDPA(val vm: VM, val baudRate: Long = 133_333_333L): PeriBase("hsdpa
/**
* Skip bytes in sequential I/O mode
*/
protected open fun sequentialIOSkip(bytes: Int) {
protected open fun sequentialIOSkip(bytes: Long) {
sequentialIOPosition += bytes
}
/**
* Read bytes from disk to VM memory in sequential I/O mode
*/
protected open fun sequentialIORead(bytes: Int, vmMemoryPointer: Int) {
protected open fun sequentialIORead(bytes: Long, vmMemoryPointer: Long) {
// Default implementation - subclasses should override
// For now, just advance the position
sequentialIOPosition += bytes
}
/**
* Write bytes from VM memory to disk in sequential I/O mode
*/
protected open fun sequentialIOWrite(bytes: Int, vmMemoryPointer: Int) {
protected open fun sequentialIOWrite(bytes: Long, vmMemoryPointer: Long) {
// Default implementation - subclasses should override
// For now, just advance the position
sequentialIOPosition += bytes

View File

@@ -2,7 +2,9 @@ package net.torvald.tsvm.peripheral
import net.torvald.tsvm.VM
import java.io.File
import java.io.RandomAccessFile
import java.io.FileInputStream
import java.nio.ByteBuffer
import java.nio.channels.FileChannel
/**
* Host File High Speed Disk Peripheral Adapter (HostFileHSDPA)
@@ -10,7 +12,7 @@ import java.io.RandomAccessFile
* A testing version of HSDPA that uses actual files on the host computer as disk sources.
* Each disk corresponds to a single file on the host filesystem.
*
* Created by Claude on 2025-08-16.
* Created by CuriousTorvald and Claude on 2025-08-16.
*/
class HostFileHSDPA : HSDPA {
@@ -25,7 +27,8 @@ class HostFileHSDPA : HSDPA {
}
// Host files for each disk slot
private val hostFiles = Array<RandomAccessFile?>(MAX_DISKS) { null }
private val hostFileStreams = Array<FileInputStream?>(MAX_DISKS) { null }
private val hostFileChannels = Array<FileChannel?>(MAX_DISKS) { null }
private val hostFilePaths = Array<String?>(MAX_DISKS) { null }
private fun initializeHostFiles(hostFilePathsList: List<String>) {
@@ -33,7 +36,9 @@ class HostFileHSDPA : HSDPA {
for (i in 0 until minOf(hostFilePathsList.size, MAX_DISKS)) {
val file = File(hostFilePathsList[i])
if (file.exists() && file.isFile) {
this.hostFiles[i] = RandomAccessFile(file, "r")
val stream = FileInputStream(file)
this.hostFileStreams[i] = stream
this.hostFileChannels[i] = stream.channel
this.hostFilePaths[i] = hostFilePathsList[i]
println("HostFileHSDPA: Attached file '${hostFilePathsList[i]}' to disk $i")
} else {
@@ -50,15 +55,18 @@ class HostFileHSDPA : HSDPA {
*/
fun attachHostFile(diskIndex: Int, filePath: String) {
if (diskIndex < 0 || diskIndex >= MAX_DISKS) return
try {
// Close existing file if any
hostFiles[diskIndex]?.close()
hostFileChannels[diskIndex]?.close()
hostFileStreams[diskIndex]?.close()
// Open new file
val file = File(filePath)
if (file.exists() && file.isFile) {
hostFiles[diskIndex] = RandomAccessFile(file, "r")
val stream = FileInputStream(file)
hostFileStreams[diskIndex] = stream
hostFileChannels[diskIndex] = stream.channel
hostFilePaths[diskIndex] = filePath
println("HSDPA: Attached file '$filePath' to disk $diskIndex")
} else {
@@ -75,10 +83,12 @@ class HostFileHSDPA : HSDPA {
*/
fun detachHostFile(diskIndex: Int) {
if (diskIndex < 0 || diskIndex >= MAX_DISKS) return
try {
hostFiles[diskIndex]?.close()
hostFiles[diskIndex] = null
hostFileChannels[diskIndex]?.close()
hostFileStreams[diskIndex]?.close()
hostFileChannels[diskIndex] = null
hostFileStreams[diskIndex] = null
hostFilePaths[diskIndex] = null
println("HSDPA: Detached file from disk $diskIndex")
} catch (e: Exception) {
@@ -93,15 +103,15 @@ class HostFileHSDPA : HSDPA {
*/
fun getAttachedFileSize(diskIndex: Int): Long {
if (diskIndex < 0 || diskIndex >= MAX_DISKS) return 0L
return try {
hostFiles[diskIndex]?.length() ?: 0L
hostFileChannels[diskIndex]?.size() ?: 0L
} catch (e: Exception) {
0L
}
}
override fun sequentialIOSkip(bytes: Int) {
override fun sequentialIOSkip(bytes: Long) {
sequentialIOPosition += bytes
// Clamp position to file bounds if needed
val activeDiskIndex = getActiveDiskIndex()
@@ -113,34 +123,36 @@ class HostFileHSDPA : HSDPA {
}
}
override fun sequentialIORead(bytes: Int, vmMemoryPointer0: Int) {
override fun sequentialIORead(bytes: Long, vmMemoryPointer0: Long) {
val activeDiskIndex = getActiveDiskIndex()
if (activeDiskIndex < 0 || hostFiles[activeDiskIndex] == null) {
if (activeDiskIndex < 0 || hostFileChannels[activeDiskIndex] == null) {
// No file attached, just advance position
sequentialIOPosition += bytes
return
}
// convert Uint24 to Int32
val vmMemoryPointer = if (vmMemoryPointer0 and 0x800000 != 0)
(0xFF000000.toInt() or vmMemoryPointer0)
// convert Int24 memory pointer to Int32
val vmMemoryPointer = if (vmMemoryPointer0 and 0x800000 != 0L)
(0xFF000000.toInt().toLong() or vmMemoryPointer0)
else
vmMemoryPointer0
try {
val file = hostFiles[activeDiskIndex]!!
val readPosition = sequentialIOPosition
file.seek(sequentialIOPosition)
// Read data into a temporary buffer
val readBuffer = ByteArray(bytes)
val bytesRead = file.read(readBuffer)
val channel = hostFileChannels[activeDiskIndex]!!
// Read data using positional read (supports >2GB positions)
val buffer = ByteBuffer.allocate(bytes.toInt())
val bytesRead = channel.read(buffer, sequentialIOPosition)
if (bytesRead > 0) {
buffer.flip()
val readBuffer = ByteArray(bytesRead)
buffer.get(readBuffer)
// Copy data to VM memory
// Handle negative addresses (backwards addressing) vs positive addresses
if (vmMemoryPointer < 0) {
// Negative addresses use backwards addressing
// Negative addresses use backwards addressing
for (i in 0 until bytesRead) {
vm.poke(vmMemoryPointer - i.toLong(), readBuffer[i])
}
@@ -151,42 +163,43 @@ class HostFileHSDPA : HSDPA {
}
}
sequentialIOPosition += bytesRead
}
// Fill remaining bytes with zeros if we read less than requested
if (bytesRead < bytes) {
val actualBytesRead = if (bytesRead > 0) bytesRead else 0
if (actualBytesRead < bytes) {
if (vmMemoryPointer < 0) {
// Negative addresses use backwards addressing
for (i in bytesRead until bytes) {
for (i in actualBytesRead until bytes.toInt()) {
vm.poke(vmMemoryPointer - i.toLong(), 0)
}
} else {
// Positive addresses use forward addressing
for (i in bytesRead until bytes) {
for (i in actualBytesRead until bytes.toInt()) {
vm.poke(vmMemoryPointer + i.toLong(), 0)
}
}
sequentialIOPosition += (bytes - bytesRead)
sequentialIOPosition += (bytes - actualBytesRead)
}
} catch (e: Exception) {
// Just advance position on error
sequentialIOPosition += bytes
}
}
override fun sequentialIOWrite(bytes: Int, vmMemoryPointer0: Int) {
override fun sequentialIOWrite(bytes: Long, vmMemoryPointer0: Long) {
val activeDiskIndex = getActiveDiskIndex()
if (activeDiskIndex < 0 || hostFiles[activeDiskIndex] == null) {
if (activeDiskIndex < 0 || hostFileChannels[activeDiskIndex] == null) {
// No file attached, just advance position
sequentialIOPosition += bytes
return
}
// convert Uint24 to Int32
val vmMemoryPointer = if (vmMemoryPointer0 and 0x800000 != 0)
(0xFF000000.toInt() or vmMemoryPointer0)
// convert Int24 memory pointer to Int32
val vmMemoryPointer = if (vmMemoryPointer0 and 0x800000 != 0L)
(0xFF000000.toInt().toLong() or vmMemoryPointer0)
else
vmMemoryPointer0
@@ -207,11 +220,12 @@ class HostFileHSDPA : HSDPA {
override fun dispose() {
super.dispose()
// Close all open files
for (i in 0 until MAX_DISKS) {
try {
hostFiles[i]?.close()
hostFileChannels[i]?.close()
hostFileStreams[i]?.close()
} catch (e: Exception) {
// Ignore errors during cleanup
}

View File

@@ -5,6 +5,7 @@ import java.io.ByteArrayOutputStream
import java.io.File
import java.io.FileInputStream
import java.io.IOException
import java.io.InputStream
import java.util.*
/**
@@ -88,6 +89,11 @@ class TestDiskDrive(private val vm: VM, private val driveNum: Int, theRootPath:
field = value
}*/
// Streaming read mode fields
private var readInputStream: InputStream? = null
private var readStreamActive = false
private var readFileSize = -1L
init {
statusCode.set(STATE_CODE_STANDBY)
@@ -101,12 +107,24 @@ class TestDiskDrive(private val vm: VM, private val driveNum: Int, theRootPath:
private fun resetBuf() {
blockSendCount = 0
messageComposeBuffer.reset()
closeReadStream()
}
private fun closeReadStream() {
readInputStream?.close()
readInputStream = null
readStreamActive = false
readFileSize = -1L
}
override fun hasNext(): Boolean {
// For streaming read mode, check if stream has more data
if (readStreamActive) {
return readInputStream?.available() ?: 0 > 0
}
// For buffered messages, check buffer position
return (blockSendCount * BLOCK_SIZE < blockSendBuffer.size)
}
@@ -115,6 +133,40 @@ class TestDiskDrive(private val vm: VM, private val driveNum: Int, theRootPath:
* Disk drive must send prepared message (or file transfer packet) to the computer.
*/
override fun startSendImpl(recipient: BlockTransferInterface): Int {
// Handle streaming read mode
if (readStreamActive) {
val stream = readInputStream ?: return 0
try {
val buffer = ByteArray(BLOCK_SIZE)
val bytesRead = stream.read(buffer)
if (bytesRead <= 0) {
// End of file
closeReadStream()
return 0
}
// Send only the bytes that were actually read
val sendBuffer = if (bytesRead < BLOCK_SIZE) {
buffer.copyOf(bytesRead)
} else {
buffer
}
recipient.writeout(sendBuffer)
blockSendCount += 1
return bytesRead
}
catch (e: IOException) {
closeReadStream()
statusCode.set(STATE_CODE_SYSTEM_IO_ERROR)
return 0
}
}
// Handle buffered message mode (for LIST, GETLEN, etc.)
if (blockSendCount == 0) {
blockSendBuffer = messageComposeBuffer.toByteArray()
}
@@ -203,6 +255,7 @@ class TestDiskDrive(private val vm: VM, private val driveNum: Int, theRootPath:
if (inputString.startsWith("DEVRST\u0017")) {
printdbg("Device Reset")
//readModeLength = -1
closeReadStream()
fileOpen = false
fileOpenMode = -1
file = File(rootPath.toURI())
@@ -337,20 +390,47 @@ class TestDiskDrive(private val vm: VM, private val driveNum: Int, theRootPath:
statusCode.set(STATE_CODE_STANDBY)
}
else if (inputString.startsWith("CLOSE")) {
closeReadStream()
fileOpen = false
fileOpenMode = -1
statusCode.set(STATE_CODE_STANDBY)
}
else if (inputString.startsWith("REWIND")) {
// Rewind the stream to beginning for seeking
if (readStreamActive && file.isFile) {
try {
closeReadStream()
// Reopen the file at position 0
readInputStream = FileInputStream(file)
readStreamActive = true
readFileSize = file.length()
blockSendCount = 0
statusCode.set(STATE_CODE_STANDBY)
}
catch (e: IOException) {
closeReadStream()
statusCode.set(STATE_CODE_SYSTEM_IO_ERROR)
}
}
else {
statusCode.set(STATE_CODE_NO_FILE_OPENED)
}
}
else if (inputString.startsWith("READ")) {
//readModeLength = inputString.substring(4 until inputString.length).toInt()
resetBuf()
if (file.isFile) {
try {
messageComposeBuffer.write(file.readBytes())
// Open file for streaming reads instead of loading entire file
readInputStream = FileInputStream(file)
readStreamActive = true
readFileSize = file.length()
blockSendCount = 0
statusCode.set(STATE_CODE_STANDBY)
}
catch (e: IOException) {
closeReadStream()
statusCode.set(STATE_CODE_SYSTEM_IO_ERROR)
}
}

View File

@@ -0,0 +1,337 @@
// ============================================================================
// CRT + NTSC Composite/S-Video Signal Simulation Shader (Enhanced Version)
// ============================================================================
// Features:
// - Runtime-switchable composite/S-Video mode (no recompilation)
// - Adjustable signal and CRT parameters via uniforms
// - Accurate NTSC color artifact simulation
// - Animated dot crawl effect
// - Trinitron phosphor mask
// - Optional bloom/glow effect
// ============================================================================
// === UNIFORMS ===
uniform float time = 0.0; // Frame count
uniform vec2 resolution = vec2(640.0, 480.0); // Virtual resolution (e.g., 640x480)
uniform float displayScale = 2.0;
uniform sampler2D u_texture; // Input texture
uniform vec2 flip = vec2(0.0, 0.0); // UV flip control (0,1 = flip Y)
uniform float noiseMagnitude = 0.0;
// Signal mode: 0 = S-Video, 1 = Composite, 2 = CGA Composite
// Can be changed at runtime without recompilation
uniform int signalMode = 1; // Default should be 1 for composite
// CGA-specific settings
uniform float cgaHue; // Hue adjustment for CGA (default: 0.0, range: -PI to PI)
uniform float cgaSaturation; // Saturation multiplier for CGA (default: 1.0)
// Optional adjustable parameters (set reasonable defaults if not provided)
uniform float lumaFilterWidth; // Default: 1.5
uniform float chromaIFilterWidth; // Default: 3.5
uniform float chromaQFilterWidth; // Default: 6.0
uniform float compositeFilterWidth; // Default: 1.5
uniform float phosphorIntensity; // Default: 0.25
uniform float scanlineIntensity; // Default: 0.12
in vec2 v_texCoords;
out vec4 fragColor;
// === CONSTANTS ===
const float PI = 3.14159265358979323846;
const float TAU = 6.28318530717958647692;
// NTSC color subcarrier: 3.579545 MHz
// At 640 pixels for ~52.6µs active video: cycles/pixel ≈ 0.2917
const float CC_PER_PIXEL = 0.2917;
// CGA specific: 14.318 MHz pixel clock = exactly 4× color subcarrier
// This means exactly 4 pixels per color cycle = 0.25 cycles per pixel
const float CGA_CC_PER_PIXEL = 0.25;
// Filter kernel radius (samples to each side)
const int FILTER_RADIUS = 12;
// === COLOR SPACE CONVERSION ===
// GLSL matrices are column-major
const mat3 RGB_TO_YIQ = mat3(
0.299, 0.596, 0.211, // Column 0: R coefficients for Y,I,Q
0.587, -0.274, -0.523, // Column 1: G coefficients
0.114, -0.322, 0.312 // Column 2: B coefficients
);
const mat3 YIQ_TO_RGB = mat3(
1.000, 1.000, 1.000, // Column 0: Y coefficients for R,G,B
0.956, -0.272, -1.107, // Column 1: I coefficients
0.621, -0.647, 1.704 // Column 2: Q coefficients
);
// === DEFAULT VALUES ===
// Used when uniforms aren't set (value of 0)
float getLumaFilter() {
return lumaFilterWidth > 0.0 ? lumaFilterWidth : 1.15;
}
float getChromaIFilter() {
return chromaIFilterWidth > 0.0 ? chromaIFilterWidth : 3.5;
}
float getChromaQFilter() {
return chromaQFilterWidth > 0.0 ? chromaQFilterWidth : 6.0;
}
float getCompositeFilter() {
return compositeFilterWidth > 0.0 ? compositeFilterWidth : 1.35;
}
float getPhosphorStrength() {
return phosphorIntensity > 0.0 ? phosphorIntensity : 0.25;
}
float getScanlineStrength() {
return scanlineIntensity > 0.0 ? scanlineIntensity : 0.12;
}
float getCgaSaturation() {
return cgaSaturation > 0.0 ? cgaSaturation : 1.0;
}
// === HELPER FUNCTIONS ===
float gaussianWeight(float x, float sigma) {
return exp(-0.5 * x * x / (sigma * sigma));
}
vec3 sampleTexture(vec2 uv) {
return texture(u_texture, clamp(uv, 0.0, 1.0)).rgb;
}
float calcCarrierPhase(float pixelX, float pixelY, float frameOffset) {
float phase = pixelX * TAU * CC_PER_PIXEL;
phase += pixelY * PI; // 180° per line (from 227.5 cycles/line)
phase += frameOffset;
return phase;
}
float rand(vec2 uv, vec3 v) {
// Combine spatial and temporal coordinates
vec3 p = vec3(uv, time);
// Hash function - creates pseudo-random values
p = fract(p * v);
p += dot(p, p.yzx + 19.19);
return fract((p.x + p.y) * p.z);
}
float encodeComposite(vec2 uv, vec3 rgb, float phase) {
vec3 yiq = RGB_TO_YIQ * rgb;
return yiq.x + yiq.y * cos(phase) + yiq.z * sin(phase) + (
rand(uv, vec3(443.897, 441.423, 437.195)) -
rand(uv, vec3(403.283, 479.028, 512.303))
) * noiseMagnitude;
}
// === COMPOSITE SIGNAL DECODE ===
vec3 decodeComposite(vec2 uv, vec2 texelSize, float basePhase) {
float compFilter = getCompositeFilter();
float iFilter = getChromaIFilter();
float qFilter = getChromaQFilter();
float yAccum = 0.0, iAccum = 0.0, qAccum = 0.0;
float yWeight = 0.0, iWeight = 0.0, qWeight = 0.0;
for (int i = -FILTER_RADIUS; i <= FILTER_RADIUS; i++) {
float offset = float(i);
vec2 sampleUV = uv + vec2(offset * texelSize.x, 0.0);
vec3 srcRGB = sampleTexture(sampleUV);
float samplePhase = basePhase + offset * TAU * CC_PER_PIXEL;
float composite = encodeComposite(uv, srcRGB, samplePhase);
// Low-pass for luma
float yw = gaussianWeight(offset, compFilter);
yAccum += composite * yw;
yWeight += yw;
// Demodulate and filter chroma
float iw = gaussianWeight(offset, iFilter);
float qw = gaussianWeight(offset, qFilter);
iAccum += composite * cos(samplePhase) * 2.0 * iw;
qAccum += composite * sin(samplePhase) * 2.0 * qw;
iWeight += iw;
qWeight += qw;
}
vec3 yiq = vec3(yAccum / yWeight, iAccum / iWeight, qAccum / qWeight);
return YIQ_TO_RGB * yiq;
}
// === S-VIDEO SIGNAL DECODE ===
vec3 decodeSVideo(vec2 uv, vec2 texelSize, float basePhase) {
float yFilter = getLumaFilter();
float iFilter = getChromaIFilter();
float qFilter = getChromaQFilter();
float yAccum = 0.0, iAccum = 0.0, qAccum = 0.0;
float yWeight = 0.0, iWeight = 0.0, qWeight = 0.0;
for (int i = -FILTER_RADIUS; i <= FILTER_RADIUS; i++) {
float offset = float(i);
vec2 sampleUV = uv + vec2(offset * texelSize.x, 0.0);
vec3 srcRGB = sampleTexture(sampleUV);
vec3 yiq = RGB_TO_YIQ * srcRGB;
float samplePhase = basePhase + offset * TAU * CC_PER_PIXEL;
float chromaSignal = yiq.y * cos(samplePhase) + yiq.z * sin(samplePhase);
// Luma is separate - no cross-color
float yw = gaussianWeight(offset, yFilter);
yAccum += yiq.x * yw;
yWeight += yw;
// Chroma demodulation
float iw = gaussianWeight(offset, iFilter);
float qw = gaussianWeight(offset, qFilter);
iAccum += chromaSignal * cos(samplePhase) * 2.0 * iw;
qAccum += chromaSignal * sin(samplePhase) * 2.0 * qw;
iWeight += iw;
qWeight += qw;
}
vec3 yiqOut = vec3(yAccum / yWeight, iAccum / iWeight, qAccum / qWeight);
return YIQ_TO_RGB * yiqOut;
}
// === CGA COMPOSITE DECODE ===
// CGA has exactly 4 pixels per color cycle (14.318 MHz / 3.579545 MHz = 4)
// This creates the famous artifact colors from specific bit patterns
vec3 decodeCGAComposite(vec2 uv, vec2 texelSize, float pixelX, float pixelY) {
// CGA-specific filter widths - slightly different from generic NTSC
// CGA monitors typically had less filtering, making artifacts more pronounced
float yFilter = 1.2;
float chromaFilter = 2.5;
// CGA color burst phase - this determines the base hue
// Adjusted to match the canonical CGA artifact color palette
float cgaPhaseOffset = cgaHue + PI * 0.5; // Adjust for correct color alignment
// CGA doesn't have the 227.5 cycle per line offset in the same way
// The phase is more deterministic based on pixel position
float basePhase = pixelX * TAU * CGA_CC_PER_PIXEL + cgaPhaseOffset;
// Odd lines have 180° phase shift (creates the alternating pattern)
if (mod(pixelY, 2.0) >= 1.0) {
basePhase += PI;
}
float yAccum = 0.0, iAccum = 0.0, qAccum = 0.0;
float yWeight = 0.0, chromaWeight = 0.0;
// Use smaller filter radius for sharper CGA look
const int CGA_RADIUS = 8;
for (int i = -CGA_RADIUS; i <= CGA_RADIUS; i++) {
float offset = float(i);
vec2 sampleUV = uv + vec2(offset * texelSize.x, 0.0);
// CGA outputs either black (0) or white (1) in 640x200 mode
// Get the source value (treating as monochrome for artifact generation)
vec3 srcRGB = sampleTexture(sampleUV);
float srcLuma = dot(srcRGB, vec3(0.299, 0.587, 0.114));
// For CGA artifact colors, we use the luma as the composite signal level
// In reality, CGA outputs either 0V or ~0.7V for the two states
float composite = srcLuma;
float samplePhase = basePhase + offset * TAU * CGA_CC_PER_PIXEL;
// Low-pass filter for luma
float yw = gaussianWeight(offset, yFilter);
yAccum += composite * yw;
yWeight += yw;
// Demodulate chroma
float cw = gaussianWeight(offset, chromaFilter);
iAccum += composite * cos(samplePhase) * 2.0 * cw;
qAccum += composite * sin(samplePhase) * 2.0 * cw;
chromaWeight += cw;
}
float y = yAccum / yWeight;
float i = (iAccum / chromaWeight) * getCgaSaturation();
float q = (qAccum / chromaWeight) * getCgaSaturation();
// Convert to RGB
vec3 rgb = YIQ_TO_RGB * vec3(y, i, q);
return rgb;
}
// === TRINITRON PHOSPHOR MASK ===
vec3 trinitronMask(vec2 screenPos) {
float strength = getPhosphorStrength();
float outputX = screenPos.x * displayScale;
float stripe = mod(outputX, 3.0);
float bleed = 0.15;
vec3 mask;
if (stripe < 1.0) {
mask = vec3(1.0, bleed, bleed);
} else if (stripe < 2.0) {
mask = vec3(bleed, 1.0, bleed);
} else {
mask = vec3(bleed, bleed, 1.0);
}
float compensation = 1.0 / (0.333 + 0.667 * bleed);
mask *= compensation * 0.85;
return mix(vec3(1.0), mask, strength);
}
// === SCANLINE MASK ===
float scanlineMask(vec2 screenPos) {
float strength = getScanlineStrength();
float outputY = screenPos.y * displayScale;
float scanline = sin(outputY * PI);
scanline = scanline * 0.5 + 0.5;
scanline = pow(scanline, 0.4);
return mix(1.0 - strength, 1.0, scanline);
}
// === MAIN ===
void main() {
vec2 uv = v_texCoords;
uv.x = mix(uv.x, 1.0 - uv.x, flip.x);
uv.y = mix(uv.y, 1.0 - uv.y, flip.y);
vec2 texelSize = 1.0 / resolution;
float pixelX = uv.x * resolution.x;
float pixelY = uv.y * resolution.y;
// Frame phase for dot crawl (4-frame cycle)
float framePhase = mod(time, 4.0) * PI * 0.5;
float basePhase = calcCarrierPhase(pixelX, pixelY, framePhase);
// Decode signal based on mode
vec3 rgb;
if (signalMode == 2) {
// CGA Composite mode - deterministic artifact colors
rgb = decodeCGAComposite(uv, texelSize, pixelX, pixelY);
} else if (signalMode == 1) {
rgb = decodeComposite(uv, texelSize, basePhase);
} else {
rgb = decodeSVideo(uv, texelSize, basePhase);
}
// CRT display effects
vec2 screenPos = vec2(pixelX, pixelY);
// rgb *= trinitronMask(screenPos);
// rgb *= scanlineMask(screenPos);
fragColor = vec4(clamp(rgb, 0.0, 1.0), 1.0);
}

View File

@@ -0,0 +1,90 @@
package net.torvald.util
import kotlin.experimental.or
class Float16() {
var bits = 0.toShort()
private set
constructor(fval: Float) : this() {
fromFloat(fval)
}
fun toFloat() = Float16.toFloat(bits)
fun fromFloat(fval: Float) {
bits = Float16.fromFloat(fval)
}
operator fun times(other: Float) = fromFloat(this.toFloat() * other)
operator fun times(other: Float16) = fromFloat(this.toFloat() * other.toFloat())
operator fun div(other: Float) = fromFloat(this.toFloat() / other)
operator fun div(other: Float16) = fromFloat(this.toFloat() / other.toFloat())
// operators are stripped: you don't calculate from FP16; this is only for storing values //
companion object {
fun toFloat(hbits: Short): Float {
val hbits = hbits.toInt().and(0xFFFF)
var mant = hbits and 0x03ff // 10 bits mantissa
var exp = hbits and 0x7c00 // 5 bits exponent
if (exp == 0x7c00)
// NaN/Inf
exp = 0x3fc00 // -> NaN/Inf
else if (exp != 0)
// normalized value
{
exp += 0x1c000 // exp - 15 + 127
if (mant == 0 && exp > 0x1c400)
// smooth transition
return java.lang.Float.intBitsToFloat(hbits and 0x8000 shl 16 or (exp shl 13) or 0x3ff)
}
else if (mant != 0)
// && exp==0 -> subnormal
{
exp = 0x1c400 // make it normal
do {
mant = mant shl 1 // mantissa * 2
exp -= 0x400 // decrease exp by 1
} while (mant and 0x400 == 0) // while not normal
mant = mant and 0x3ff // discard subnormal bit
} // else +/-0 -> +/-0
return java.lang.Float.intBitsToFloat(// combine all parts
hbits and 0x8000 shl 16 or (exp or mant shl 13)) // value << ( 23 - 10 )
}
fun fromFloat(fval: Float): Short {
val fbits = java.lang.Float.floatToIntBits(fval)
val sign = fbits.ushr(16).and(0x8000).toShort() // sign only
var `val` = (fbits and 0x7fffffff) + 0x1000 // rounded value
if (`val` >= 0x47800000)
// might be or become NaN/Inf
{ // avoid Inf due to rounding
if (fbits and 0x7fffffff >= 0x47800000) { // is or must become NaN/Inf
if (`val` < 0x7f800000)
// was value but too large
return sign or 0x7c00 // make it +/-Inf
return sign or 0x7c00 or // remains +/-Inf or NaN
(fbits and 0x007fffff).ushr(13).toShort() // keep NaN (and Inf) bits
}
return sign or 0x7bff.toShort() // unrounded not quite Inf
}
if (`val` >= 0x38800000)
// remains normalized value
return sign or (`val` - 0x38000000).ushr(13).toShort() // exp - 127 + 15
if (`val` < 0x33000000)
// too small for subnormal
return sign // becomes +/-0
`val` = (fbits and 0x7fffffff).ushr(23) // tmp exp for subnormal calc
return sign or ((fbits and 0x7fffff or 0x800000) // add subnormal bit
+ 0x800000.ushr(`val` - 102) // round depending on cut off
).ushr(126 - `val`) // div by 2^(1-(exp-127+15)) and >> 13 | exp=0
.toShort()
}
}
}

View File

@@ -97,7 +97,7 @@ class VMGUI(val loaderInfo: EmulInstance, val viewportWidth: Int, val viewportHe
camera.update()
batch.projectionMatrix = camera.combined
crtShader = loadShaderInline(CRT_POST_SHADER)
crtShader = loadShaderInline(Gdx.files.classpath("net/torvald/tsvm/shader_crt_post.frag").readString())
gpuFBO = FrameBuffer(Pixmap.Format.RGBA8888, viewportWidth, viewportHeight, false)
winFBO = FrameBuffer(Pixmap.Format.RGBA8888, viewportWidth, viewportHeight, false)
@@ -291,6 +291,7 @@ class VMGUI(val loaderInfo: EmulInstance, val viewportWidth: Int, val viewportHe
batch.shader = crtShader
batch.shader.setUniformf("resolution", viewportWidth.toFloat(), viewportHeight.toFloat())
batch.shader.setUniformf("interlacer", (framecount % 2).toFloat())
batch.shader.setUniformf("time", (framecount % 640).toFloat())
batch.setBlendFunctionSeparate(GL20.GL_SRC_ALPHA, GL20.GL_ONE_MINUS_SRC_ALPHA, GL20.GL_SRC_ALPHA, GL20.GL_ONE)
batch.draw(gpuFBO.colorBufferTexture, 0f, 0f)
}
@@ -491,8 +492,8 @@ vec4 grading(vec4 col0, vec4 args) {
return pow(rgb, power);
}
const vec4 gradLow = vec4(0.05, 0.05, 0.05, 0.8);
const vec4 gradHigh = vec4(0.2, 0.2, 0.2, 1.0);
const vec4 gradLow = vec4(0.02, 0.02, 0.02, 1.0);
const vec4 gradHigh = vec4(0.12, 0.12, 0.12, 1.0);
const float SQRT_2 = 1.4142135623730950488;
vec4 getRadialGrad(vec2 uv0) {
@@ -554,4 +555,4 @@ void main() {
fragColor = nearestColour(inColor + spread * (bayer[int(entry.y) * int(bayerSize) + int(entry.x)] / bayerDivider - 0.5));
}
"""
"""

View File

@@ -1,61 +1,221 @@
# Created by Claude on 2025-08-17.
# Makefile for TSVM Enhanced Video (TEV) encoder
# Created by CuriousTorvald and Claude on 2025-08-17.
# Makefile for TSVM Enhanced Video (TEV) encoder and libraries
CC = gcc
CFLAGS = -std=c99 -Wall -Wextra -O2 -D_GNU_SOURCE
LIBS = -lm -lzstd
CXX = g++
CFLAGS = -std=c99 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
CXXFLAGS = -std=c++11 -Wall -Wextra -Ofast -D_GNU_SOURCE -march=native -mavx512f -mavx512dq -mavx512bw -mavx512vl -Iinclude
DBGFLAGS =
PREFIX = /usr/local
# Zstd flags (use pkg-config if available, fallback for cross-platform compatibility)
ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd 2>/dev/null || echo "")
ZSTD_LIBS = $(shell pkg-config --libs libzstd 2>/dev/null || echo "-lzstd")
LIBS = -lm $(ZSTD_LIBS)
# =============================================================================
# Library Object Files
# =============================================================================
# libtavenc - TAV encoder library
LIBTAVENC_OBJ = lib/libtavenc/tav_encoder_lib.o \
lib/libtavenc/tav_encoder_color.o \
lib/libtavenc/tav_encoder_dwt.o \
lib/libtavenc/tav_encoder_quantize.o \
lib/libtavenc/tav_encoder_ezbc.o \
lib/libtavenc/tav_encoder_utils.o \
lib/libtavenc/tav_encoder_tile.o
# libtavdec - TAV decoder library
LIBTAVDEC_OBJ = lib/libtavdec/tav_video_decoder.o
# libtadenc - TAD encoder library
LIBTADENC_OBJ = lib/libtadenc/encoder_tad.o
# libtaddec - TAD decoder library
LIBTADDEC_OBJ = lib/libtaddec/decoder_tad.o
# libfec - Forward Error Correction library (LDPC + Reed-Solomon)
LIBFEC_OBJ = lib/libfec/ldpc.o lib/libfec/reed_solomon.o lib/libfec/ldpc_payload.o
# =============================================================================
# Targets
# =============================================================================
# Source files and targets
SOURCES = encoder_tev.c
TARGETS = encoder_tev
TARGETS = libs encoder_tav_ref decoder_tav_ref tav_inspector tad tav_dt
LIBRARIES = lib/libtavenc.a lib/libtavdec.a lib/libtadenc.a lib/libtaddec.a lib/libfec.a
TAV_TARGETS = encoder_tav_ref decoder_tav_ref tav_inspector
TAD_TARGETS = encoder_tad decoder_tad
DT_TARGETS = encoder_tav_dt decoder_tav_dt tavdt_noise_injector
# Build all encoders
all: $(TARGETS)
# Build all encoders (default)
all: clean $(TARGETS)
# Build main encoder
encoder_tev: encoder_tev.c
rm -f encoder_tev
$(CC) $(CFLAGS) -o $@ $< $(LIBS)
# Build all libraries
libs: $(LIBRARIES)
# Default target
$(TARGETS): all
# Reference encoder using libtavenc (replaces old monolithic encoder)
encoder_tav_ref: src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a
rm -f encoder_tav_ref
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o encoder_tav_ref src/encoder_tav.c lib/libtavenc.a lib/libtadenc.a $(LIBS)
@echo ""
@echo "Reference encoder built: encoder_tav_ref"
@echo "This is the official reference implementation with all features"
# Reference decoder using libtavdec (replaces old monolithic decoder)
decoder_tav_ref: src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a
rm -f decoder_tav_ref
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -o decoder_tav_ref src/decoder_tav.c lib/libtavdec.a lib/libtaddec.a $(LIBS)
@echo ""
@echo "Reference decoder built: decoder_tav_ref"
@echo "This is the official reference implementation with all features"
tav_inspector: tav_inspector.c
rm -f tav_inspector
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o tav_inspector $< $(LIBS)
tav: $(TAV_TARGETS)
# Build TAD (Terrarum Advanced Audio) tools
encoder_tad: src/encoder_tad_standalone.c lib/libtadenc/encoder_tad.c include/encoder_tad.h
rm -f encoder_tad encoder_tad_standalone.o encoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c lib/libtadenc/encoder_tad.c -o encoder_tad.o
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c src/encoder_tad_standalone.c -o encoder_tad_standalone.o
$(CC) $(DBGFLAGS) -o encoder_tad encoder_tad_standalone.o encoder_tad.o $(LIBS)
decoder_tad: lib/libtaddec/decoder_tad.c
rm -f decoder_tad
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -o decoder_tad $< $(LIBS)
# Build all TAD tools
tad: $(TAD_TARGETS)
# =============================================================================
# Library Build Rules
# =============================================================================
# Compile library object files
lib/libtavenc/%.o: lib/libtavenc/%.c
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
lib/libtavdec/%.o: lib/libtavdec/%.c
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
lib/libtadenc/%.o: lib/libtadenc/%.c
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -c $< -o $@
lib/libtaddec/%.o: lib/libtaddec/%.c
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -DTAD_DECODER_LIB -c $< -o $@
lib/libfec/%.o: lib/libfec/%.c
$(CC) $(CFLAGS) -Ilib/libfec -c $< -o $@
# Build static libraries
lib/libtavenc.a: $(LIBTAVENC_OBJ)
ar rcs $@ $^
lib/libtavdec.a: $(LIBTAVDEC_OBJ)
ar rcs $@ $^
lib/libtadenc.a: $(LIBTADENC_OBJ)
ar rcs $@ $^
lib/libtaddec.a: $(LIBTADDEC_OBJ)
ar rcs $@ $^
lib/libfec.a: $(LIBFEC_OBJ)
ar rcs $@ $^
# =============================================================================
# TAV-DT (Digital Tape) Encoder/Decoder
# =============================================================================
# TAV-DT encoder with FEC (multithreaded)
encoder_tav_dt: src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a
rm -f encoder_tav_dt
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o encoder_tav_dt src/encoder_tav_dt.c lib/libtavenc.a lib/libtadenc.a lib/libfec.a $(LIBS) -lpthread
@echo ""
@echo "TAV-DT encoder built: encoder_tav_dt"
@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
# TAV-DT decoder with FEC (multithreaded)
decoder_tav_dt: src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a
rm -f decoder_tav_dt
$(CC) $(CFLAGS) $(ZSTD_CFLAGS) -Iinclude -Ilib/libfec -o decoder_tav_dt src/decoder_tav_dt.c lib/libtavdec.a lib/libtaddec.a lib/libfec.a $(LIBS) -lpthread
@echo ""
@echo "TAV-DT decoder built: decoder_tav_dt"
@echo "Digital Tape format with LDPC and Reed-Solomon FEC (multithreaded)"
# TAV-DT noise injector (channel simulator)
tavdt_noise_injector: tavdt_noise_injector.c
rm -f tavdt_noise_injector
$(CC) -std=c99 -Wall -Ofast -D_GNU_SOURCE -o tavdt_noise_injector tavdt_noise_injector.c -lm
@echo ""
@echo "TAV-DT noise injector built: tavdt_noise_injector"
@echo "Simulates QPSK satellite channel noise (AWGN + burst)"
# Build all TAV-DT tools
tav_dt: $(DT_TARGETS)
# Build with debug symbols
debug: CFLAGS += -g -DDEBUG
debug: $(TARGETS)
debug: CFLAGS += -g -DDEBUG -fsanitize=address -fno-omit-frame-pointer
debug: DBGFLAGS += -fsanitize=address -fno-omit-frame-pointer
debug: clean $(TARGETS)
# Clean build artifacts
clean:
rm -f $(TARGETS)
rm -f $(TARGETS) $(TAD_TARGETS) $(DT_TARGETS) $(LIBRARIES) *.o lib/*/*.o
# Install (copy to PATH)
install: $(TARGETS)
cp $(TARGETS) /usr/local/bin/
cp encoder_tav_ref $(PREFIX)/bin/
cp decoder_tav_ref $(PREFIX)/bin/
cp encoder_tad $(PREFIX)/bin/
cp decoder_tad $(PREFIX)/bin/
cp encoder_tav_dt $(PREFIX)/bin/
cp decoder_tav_dt $(PREFIX)/bin/
cp tav_inspector $(PREFIX)/bin/
# Check for required dependencies
check-deps:
@echo "Checking dependencies..."
@echo "Using Zstd compression for better efficiency"
@pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install with: sudo apt install libzstd-dev" && exit 1)
@pkg-config --exists libzstd || (echo "Error: libzstd-dev not found. Install libzstd-dev or equivalent" && exit 1)
@echo "All dependencies found."
# Help
help:
@echo "TSVM Enhanced Video (TEV) Encoder"
@echo "TSVM Advanced Video (TAV) and Audio (TAD) Encoders"
@echo ""
@echo "Targets:"
@echo " all - Build both encoders (default)"
@echo " encoder_tev - Build the main TEV encoder"
@echo " encoder_tev_xyb - Build the XYB color space encoder"
@echo " all - Build video encoders (default)"
@echo " libs - Build all codec libraries (.a files)"
@echo " tav - Build the TAV advanced video encoder"
@echo " tav_dt - Build all TAV-DT (Digital Tape) tools with FEC"
@echo " tavdt_noise_injector - Build TAV-DT channel noise simulator"
@echo " tad - Build all TAD audio tools (encoder, decoder)"
@echo " encoder_tad - Build TAD audio encoder"
@echo " decoder_tad - Build TAD audio decoder"
@echo " tests - Build test programs"
@echo " debug - Build with debug symbols"
@echo " clean - Remove build artifacts"
@echo " install - Install to /usr/local/bin"
@echo " check-deps - Check for required dependencies"
@echo " help - Show this help"
@echo ""
@echo "Libraries:"
@echo " lib/libtavenc.a - TAV encoder library"
@echo " lib/libtavdec.a - TAV decoder library"
@echo " lib/libtadenc.a - TAD encoder library"
@echo " lib/libtaddec.a - TAD decoder library"
@echo " lib/libfec.a - Forward Error Correction library (LDPC + RS)"
@echo ""
@echo "Usage:"
@echo " make # Build both encoders"
@echo " ./encoder_tev input.mp4 -o output.tev"
@echo " ./encoder_tev_xyb input.mp4 -o output.tev"
@echo " make # Build video encoders"
@echo " make libs # Build all libraries"
@echo " make tav # Build TAV encoder"
@echo " make tav_dt # Build TAV-DT encoder/decoder with FEC"
@echo " make tad # Build all TAD audio tools"
@echo " sudo make install # Install all encoders"
.PHONY: all clean install check-deps help debug
.PHONY: all libs clean install check-deps help debug tad tav_dt tests

350
video_encoder/TAD_README.md Normal file
View File

@@ -0,0 +1,350 @@
# TAD - TSVM Advanced Audio Codec
A perceptually-optimised wavelet-based audio codec designed for resource-constrained systems, featuring CDF 9/7 wavelets, EZBC sparse coding, and sophisticated perceptual quantisation.
## Overview
TAD (TSVM Advanced Audio) is a modern audio codec built on discrete wavelet transform (DWT) using Cohen-Daubechies-Feauveau (CDF) 9/7 biorthogonal wavelets. It combines perceptual quantisation, advanced entropy coding, and careful optimisation for resource-constrained systems.
### Key Advantages
- **Perceptual optimisation**: HVS-aware quantisation preserves audio quality where it matters
- **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity (86.9% zeros in typical content)
- **Variable chunk sizes**: Supports any chunk size ≥1024 samples, including non-power-of-2
- **Stereo decorrelation**: Mid/Side encoding exploits stereo correlation for better compression
- **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
## Features
### Compression Technology
- **CDF 9/7 Biorthogonal Wavelets**
- 9-level fixed decomposition for all chunk sizes
- Lifting scheme implementation for efficient computation
- Optimal frequency discrimination for audio signals
- **Pre-processing**
- First-order IIR pre-emphasis filter (α=0.5) shifts quantisation noise to lower frequencies, where they are less objectionable to listeners
- Gamma companding (γ=0.5) for dynamic range compression before quantisation
- Mid/Side stereo transformation exploits stereo correlation
- Lambda companding (λ=6.0) with Laplacian CDF mapping for full bit utilisation
- **Perceptual Quantisation**
- Channel-specific (Mid/Side) frequency-dependent weights
- Subband-aware quantisation preserves perceptually important frequencies
- **EZBC Encoding**
- Binary tree embedded zero block coding
- Exploits coefficient sparsity (86.9% Mid, 97.8% Side typical)
- Progressive refinement structure
- Spatial clustering of non-zero coefficients
- **Entropy Coding**
- Zstandard compression (level 7) on concatenated EZBC bitstreams
- Cross-channel compression optimisation
- Optional Zstd bypass for debugging
### Audio Format
- **Sample Rate**: 32 KHz (TSVM audio hardware native format)
- **Channels**: Stereo (L/R input, Mid/Side internal representation)
- **Chunk Sizes**: Variable, any size ≥1024 samples (including non-power-of-2)
- **Bit Depth**: 32-bit float internal, 8-bit unsigned PCM output with noise-shaped dithering
- **Bandwidth**: Full 0-16 KHz frequency range preserved
### Quality Levels
Six quality levels (0-5) provide a wide range of compression/quality trade-offs:
- **Level 0**: Lowest quality, smallest file size
- **Level 3**: Default, balanced quality/compression (2.51:1 vs PCMu8)
- **Level 5**: Highest quality, largest file size
Quality levels are designed to be synchronised with TAV video codec for unified encoding.
## Building
### Prerequisites
- C compiler (GCC/Clang)
- Zstandard library (libzstd)
- Math library (libm)
### Compilation
```bash
# Build TAD encoder/decoder
make tad
# Build all tools
make all
# Clean build artifacts
make clean
```
### Build Targets
- `encoder_tad` - Standalone audio encoder with FFmpeg calls
- `decoder_tad` - Standalone audio decoder
## Usage
### Basic Encoding
Encoding requires FFmpeg executable installed in your system.
```bash
# Default encoding (quality level 3)
./encoder_tad -i input.mp3 -o output.tad
# Specify quality level (0-5)
./encoder_tad -i input.m4a -o output.tad -q 0 # Lowest quality
./encoder_tad -i input.ogg -o output.tad -q 5 # Highest quality
# Disable Zstd compression (for debugging)
./encoder_tad -i input.opus -o output.tad --no-zstd
# Verbose output with statistics
./encoder_tad -i input.flac -o output.tad -v
```
### Decoding
```bash
# Decode to PCMu8
./decoder_tad -i input.tad -o output.pcm --raw-pcm
# Decode to WAV
./decoder_tad -i input.tad -o output.wav
```
### Input Formats
TAD encoder accepts any audio format supported by FFmpeg:
- Audio files: WAV, MP3, FLAC, OGG, AAC, etc.
- Video files with audio streams: MP4, MKV, AVI, etc.
- Raw PCM formats
Audio is automatically resampled to 32 KHz stereo if necessary.
## Technical Architecture
### Encoder Pipeline
1. **Input Processing**
- FFmpeg demuxing and audio stream extraction
- Resampling to 32 KHz stereo
- Conversion to PCM32f
2. **Pre-emphasis Filter**
- First-order IIR filter with α=0.5
- Shifts quantisation noise toward lower frequencies
- Improves perceptual quality
3. **Gamma Companding**
- Dynamic range compression with γ=0.5
- Applied independently to each sample
- Reduces quantisation error for low-amplitude signals
4. **Stereo Decorrelation**
- Left/Right to Mid/Side transformation
- Mid = (L + R) / 2
- Side = (L - R) / 2
- Exploits stereo correlation for better compression
5. **9-Level CDF 9/7 DWT**
- Fixed 9 decomposition levels for all chunk sizes
- Forward lifting scheme implementation
- Correct length tracking for non-power-of-2 sizes
6. **Perceptual Quantisation**
- Channel-specific (Mid/Side) subband weights
- Lambda companding with λ=6.0
- Laplacian CDF mapping: `sign(x) * floor(λ * log(1 + |x|/λ))`
- Quantised to int8 coefficients
7. **EZBC Encoding**
- Binary tree structure per channel
- Progressive refinement by bitplanes
- Zero block coding exploits sparsity
- Independent bitstreams for Mid and Side
8. **Zstd Compression**
- Level 7 compression on concatenated `[Mid_bitstream][Side_bitstream]`
- Cross-channel optimisation opportunities
- Adaptive compression based on content
### Decoder Pipeline
1. **Container Parsing**
- TAD packet identification (type 0x24)
- Chunk size extraction
- Compressed data boundaries
2. **Zstd Decompression**
- Decompress concatenated bitstreams
- Split into Mid and Side EZBC streams
3. **EZBC Decoding**
- Binary tree decoder per channel
- Reconstruct quantised int8 coefficients
- Progressive refinement reconstruction
4. **Lambda Decompanding**
- Inverse Laplacian CDF with channel-specific weights
- Reconstruct float32 DWT coefficients
- Apply subband-specific perceptual weights
5. **9-Level Inverse CDF 9/7 DWT**
- Inverse lifting scheme implementation
- Correct length tracking for non-power-of-2 chunk sizes
- Pre-calculated length sequence from forward transform
6. **Mid/Side to Left/Right**
- L = Mid + Side
- R = Mid - Side
- Reconstruct stereo channels
7. **Gamma Decompanding**
- Inverse gamma with γ⁻¹=2.0
- Restore original dynamic range
8. **De-emphasis Filter**
- Reverse pre-emphasis with α=0.5
- Remove frequency shaping
- Restore flat frequency response
9. **PCM32f to PCM8u Conversion**
- Noise-shaped dithering for 8-bit output
- Clamping to valid range
- Final output format
### Wavelet Implementation
CDF 9/7 wavelet follows a **two-stage lifting scheme**:
```c
// Forward Transform: Predict → Update
// Predict step (generate high-pass)
temp[half + i] = data[odd] - α * (data[even_left] + data[even_right]);
// Update step (generate low-pass)
temp[i] = data[even] + β * (temp[half + i - 1] + temp[half + i]);
// Normalization (K factor)
temp[i] *= K;
temp[half + i] /= K;
// Inverse Transform: Denormalize → Undo Update → Undo Predict (reversed order)
temp[i] /= K;
temp[half + i] *= K;
temp[i] -= β * (temp[half + i - 1] + temp[half + i]);
data[odd] = temp[half + i] + α * (temp[i] + temp[i + 1]);
data[even] = temp[i];
```
**CDF 9/7 Coefficients**:
- α = -1.586134342
- β = -0.052980118
- γ = +0.882911075
- δ = +0.443506852
- K = 1.230174105
### Non-Power-of-2 Chunk Size Handling
Critical implementation detail for variable chunk sizes:
```c
// Pre-calculate exact length sequence from forward transform
int lengths[MAX_LEVELS + 1];
lengths[0] = chunk_size;
for (int i = 1; i <= levels; i++) {
lengths[i] = (lengths[i - 1] + 1) / 2;
}
// Apply inverse DWT using lengths[level] for each level
// NEVER use simple doubling (length *= 2) - incorrect for non-power-of-2!
```
Incorrect length tracking causes mirrored subband artefacts in decoded audio.
### Perceptual Quantisation Weights
Channel-specific weights for Mid (channel 0) and Side (channel 1):
```c
// Base quantiser weights per subband (9 levels + approximation)
float BASE_QUANTISER_WEIGHTS[2][10] = {
// Mid channel (0)
{4.0f, 2.0f, 1.8f, 1.6f, 1.4f, 1.2f, 1.0f, 1.0f, 1.3f, 2.0f},
// Side channel (1)
{6.0f, 5.0f, 2.6f, 2.4f, 1.8f, 1.3f, 1.0f, 1.0f, 1.6f, 3.2f}
};
// During dequantisation:
float weight = BASE_QUANTISER_WEIGHTS[channel][subband] * quantiser_scale;
coeffs[i] = normalised_val * TAD32_COEFF_SCALARS[subband] * weight;
```
Different weights for Mid and Side channels reflect perceptual importance of frequency bands in each channel. DC frequency has highest weight (4.0 Mid, 6.0 Side) due to energy concentration.
## Performance Characteristics
### Compression Efficiency
- **Target Compression**: 2:1 against PCMu8 baseline (4:1 against PCM16LE input)
- **Achieved Compression**: 2.51:1 against PCMu8 at quality level 3
- **Audio Quality**: Preserves full 0-16 KHz bandwidth
- **Coefficient Sparsity**: 86.9% zeros in Mid channel, 97.8% in Side channel (typical)
- **EZBC Benefits**: Exploits sparsity, progressive refinement, spatial clustering
### Computational Complexity
- **Encoding**: O(n log n) per chunk for DWT, O(n) for EZBC encoding
- **Decoding**: O(n log n) per chunk for inverse DWT, O(n) for EZBC decoding
- **Memory**: O(n) working memory for chunk processing
### Quality Characteristics
- **Frequency Response**: Flat 0-16 KHz within perceptual limits
- **Dynamic Range**: Preserved through gamma companding
- **Stereo Imaging**: Maintained through Mid/Side decorrelation
- **Perceptual Quality**: Optimised for human auditory system characteristics
## Integration with TAV
TAD is designed as an includable API for TAV video encoder integration:
- **Variable Chunk Sizes**: Audio chunks can match video GOP boundaries (e.g., 32016 samples for 1-second TAV GOP)
- **Unified Quality Levels**: TAD quality 0-5 synchronised with TAV quality 0-5
- **Embedded Packets**: TAV embeds TAD-compressed audio using packet type 0x24
- **Shared Container**: Single .tav file contains both video and audio streams
### TAV Integration Example
```c
// TAD handles non-power-of-2 chunk size correctly
tad_encode_chunk(audio_buffer, audio_samples_per_gop, output_buffer, &output_size);
// TAV embeds TAD packet
tav_write_packet(TAV_PACKET_AUDIO, output_buffer, output_size);
```
## Format Specification
For complete packet structure and bitstream format details, refer to `format documentation.txt`.
### Key Packet Types
- `0x24`: TAD audio packet (used in standalone .tad files and embedded in .tav files)
## Related Projects
- **TAV** (TSVM Advanced Video): Wavelet-based video codec with integrated TAD audio
- **TSVM**: Target virtual machine platform for TAD playback
## Licence
MIT.

261
video_encoder/TAV_README.md Normal file
View File

@@ -0,0 +1,261 @@
# TAV - TSVM Advanced Video Codec
A perceptually-optimised wavelet-based video codec designed for resource-constrained systems, featuring multiple wavelet types, temporal 3D DWT, and sophisticated compression techniques.
## Overview
TAV (TSVM Advanced Video) is a modern video codec built on discrete wavelet transformation (DWT). It combines cutting-edge compression techniques with careful optimisation for resource-constrained systems.
### Key Advantages
- **No blocking artefacts**: Large-tile DWT encoding with padding eliminates DCT block boundaries
- **No colour banding**: Wavelets spreads gradients across scales, preventing banding in the first place
- **Perceptual optimisation**: HVS-aware quantisation preserves visual quality where it matters
- **Temporal coherence**: 3D DWT with GOP encoding exploits inter-frame similarity
- **Efficient sparse coding**: EZBC encoding exploits coefficient sparsity for 16-18% additional compression
- **Hardware-friendly**: Designed for efficient decoding on resource-constrained platforms
## Features
### Compression Technology
- **Wavelet Types**
- **5/3 Reversible** (JPEG 2000 standard): Lossless-capable, good for archival
- **9/7 Irreversible** (default): Best overall compression, CDF 9/7 variant
- **Spatial Encoding**
- Large-tile encoding with padding, with optional single-tile mode (no blocking artefacts)
- 6-level DWT decomposition for deep frequency analysis
- Perceptual quantisation with HVS-optimised coefficient scaling
- YCoCg-R colour space with anisotropic chroma quantisation
- **Temporal Encoding** (3D DWT Mode)
- Group-of-pictures (GOP) encoding with adaptive size (typically 20 frames)
- Unified EZBC encoding across temporal dimension
- Adaptive GOP boundaries with scene change detection
- **EZBC Encoding**
- Binary tree embedded zero block coding exploits coefficient sparsity
- Progressive refinement structure with bitplane encoding
- Concatenated channel layout for cross-channel compression optimisation
- Typical sparsity: 86.9% (Y), 97.8% (Co), 99.5% (Cg)
- 16-18% compression improvement over naive coefficient encoding
### Audio Integration
TAV seamlessly integrates with the TAD (TSVM Advanced Audio) codec for synchronised audio/video encoding:
- Variable chunk sizes match video GOP boundaries
- Embedded TAD packets (type 0x24) with Zstd compression
- Unified container format
## Building
### Prerequisites
- C compiler (GCC/Clang)
- Zstandard library
- OpenCV 4 library (only used by experimental motion estimation feature)
### Compilation
```bash
# Build TAV encoder/decoder
make tav
# Build all tools including TAD audio codec
make all
# Clean build artefacts
make clean
```
### Build Targets
- `encoder_tav` - Main video encoder
- `decoder_tav` - Standalone video decoder
- `tav_inspector` - Packet analysis and debugging tool
## Usage
### Basic Encoding
Encoding requires FFmpeg executable installed in your system.
```bash
# Default encoding (CDF 9/7 wavelet, quality level 3)
./encoder_tav -i input.mp4 -o output.tav
# Quality levels (0-5)
./encoder_tav -i input.avi -q 0 -o output.tav # Lowest quality, smallest file
./encoder_tav -i input.mkv -q 5 -o output.tav # Highest quality, largest file
```
### Intra-only Encoding
```bash
# Enable Intra-only encoding
./encoder_tav -i input.mp4 --intra-only -o output.tav
```
### Decoding and Inspection
```bash
# Decode TAV to raw video
./decoder_tav -i input.tav -o output.mkv
# Inspect packet structure (debugging)
./tav_inspector input.tav -v
```
### Frame Limiting
```bash
# Encode only first N frames (useful for testing)
./encoder_tav -i input.mp4 -o output.tav --encode-limit 100
```
## Technical Architecture
### Encoder Pipeline
1. **Input Processing**
- FFmpeg demuxing and frame extraction
- RGB to YCoCg-R colour space conversion
- Resolution validation and padding
2. **DWT Transform**
- Spatial: 6-level decomposition per frame
- Temporal: 1D DWT across GOP frames (3D DWT mode)
- Lifting scheme implementation for all wavelets
3. **Perceptual Quantisation**
- HVS-based subband weights
- Anisotropic chroma quantisation (YCoCg-R specific)
- Quality-dependent quantisation matrices
4. **EZBC Encoding**
- Binary tree embedded zero block coding per channel
- Progressive refinement by bitplanes
- Concatenated bitstream layout: `[Y_bitstream][Co_bitstream][Cg_bitstream]`
- Cross-channel compression optimisation
5. **Entropy Coding**
- Zstandard compression (level 7) on concatenated EZBC bitstreams
- Cross-channel compression opportunities
- Adaptive compression based on GOP structure
### Decoder Pipeline
1. **Container Parsing**
- Packet type identification (0x00-0xFF)
- Timecode synchronisation
- GOP boundary detection
2. **Entropy Decoding**
- Zstd decompression of concatenated bitstreams
- EZBC binary tree decoding per channel
- Progressive coefficient reconstruction
3. **Inverse Quantisation**
- Perceptual weight application
- Subband-specific scaling
- Coefficient reconstruction from sparse representation
4. **Inverse DWT**
- Temporal: 1D inverse DWT across frames (3D DWT mode)
- Spatial: 6-level inverse wavelet reconstruction
5. **Output Conversion**
- YCoCg-R to RGB colour space
- Clamping and dithering
- Frame buffering for display
### Wavelet Implementation
All wavelets follow a **lifting scheme** pattern with symmetric boundary extension:
```c
// Forward Transform: Predict → Update
temp[half + i] = data[odd] - predict(data[even]); // High-pass
temp[i] = data[even] + update(temp[half]); // Low-pass
// Inverse Transform: Undo Update → Undo Predict (reversed order)
data[even] = temp[i] - update(temp[half]); // Undo low-pass
data[odd] = temp[half + i] + predict(data[even]); // Undo high-pass
```
**Critical**: Forward and inverse transforms must use identical coefficient indexing and exactly reverse operations to avoid grid artefacts.
### Coefficient Layout
TAV uses **2D Spatial Layout** in memory for each decomposition level:
```
[LL] [LH] [HL] [HH] [LH] [HL] [HH] ...
└── Level 0 ──┘ └─── Level 1 ───┘
```
- `LL`: Low-pass (approximation) - progressively smaller with each level
- `LH`, `HL`, `HH`: High-pass subbands (horizontal, vertical, diagonal detail)
## Performance Characteristics
### Compression Efficiency
- **Sparsity Exploitation**: Typical quantised coefficient sparsity
- Y channel: 86.9% zeros
- Co channel: 97.8% zeros
- Cg channel: 99.5% zeros
- **EZBC Benefits**: 16-18% compression improvement over naive coefficient encoding through sparsity exploitation
- **Temporal Coherence**: Additional 15-25% improvement with 3D DWT (content-dependent)
### Computational Complexity
- **Encoding**: O(n log n) per frame for spatial DWT
- **Decoding**: O(n log n) per frame, optimised lifting scheme implementation
- **Memory**: Single-tile encoding requires O(w × h) working memory
### Quality Characteristics
- **No blocking artefacts**: Wavelet-based encoding is inherently smooth
- **Perceptual optimisation**: Better subjective quality than bitrate-equivalent DCT codecs
- **Scalability**: 6 quality levels (0-5) provide wide range of bitrate/quality trade-offs
- **Temporal stability**: 3D DWT mode reduces flickering and temporal artefacts
## Format Specification
For complete packet structure and bitstream format details, refer to `format documentation.txt`.
### Key Packet Types
- `0x00`: Metadata and initialisation
- `0x01`: I-frame (intra-coded frame)
- `0x12`: GOP unified packet (3D DWT mode)
- `0x24`: Embedded TAD audio
- `0xFC`: GOP synchronisation
- `0xFD`: Timecode
## Debugging Tools
### TAV Inspector
Analyse TAV packet structure and decode individual frames:
```bash
# Verbose packet analysis
./tav_inspector input.tav -v
# Extract specific frame ranges
./tav_inspector input.tav --frame-range 100-200
```
## Related Projects
- **TAD** (TSVM Advanced Audio): Perceptual audio codec using CDF 9/7 wavelets
- **TSVM**: Target virtual machine platform for TAV playback
## Licence
MIT.

View File

@@ -0,0 +1,424 @@
/**
* TAV+UCF Payload Writer for TAV Files
* Creates a TAV header-only (32 bytes) + UCF cue file (4KB) for concatenated TAV files
* Total output size: 4096 bytes (32 + 4064)
* Usage: ./create_ucf_payload input.tav output.ucf [track_names.txt]
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#define TAV_HEADER_SIZE 32
#define UCF_SIZE 4064
#define TAV_OFFSET_BIAS (TAV_HEADER_SIZE + UCF_SIZE)
#define TAV_MAGIC "\x1FTSVMTA" // Matches both TAV and TAP
typedef struct {
uint8_t magic[8];
uint8_t version;
uint16_t width;
uint16_t height;
uint8_t fps;
uint32_t total_frames;
// ... rest of header fields
} __attribute__((packed)) TAVHeader;
// Write TAV header-only payload (File Role = 1)
static void write_tav_header_only(FILE *out) {
uint8_t header[TAV_HEADER_SIZE] = {0};
// Magic: "\x1FTSVMTAV"
header[0] = 0x1F;
header[1] = 'T';
header[2] = 'S';
header[3] = 'V';
header[4] = 'M';
header[5] = 'T';
header[6] = 'A';
header[7] = 'V';
// Version: 5 (YCoCg-R perceptual)
header[8] = 5;
// Width: 560 (little-endian)
header[9] = 0x30;
header[10] = 0x02;
// Height: 448 (little-endian)
header[11] = 0xC0;
header[12] = 0x01;
// FPS: 30
header[13] = 30;
// Total Frames: 0xFFFFFFFF (still image marker / not applicable)
header[14] = 0xFF;
header[15] = 0xFF;
header[16] = 0xFF;
header[17] = 0xFF;
// Wavelet Filter Type: 1 (9/7 irreversible, default)
header[18] = 1;
// Decomposition Levels: 6
header[19] = 6;
// Quantiser Indices (Y, Co, Cg): 255 (not applicable for header-only)
header[20] = 0xFF;
header[21] = 0xFF;
header[22] = 0xFF;
// Extra Feature Flags: 0x80 (bit 7 = has no actual packets)
header[23] = 0x80;
// Video Flags: 0
header[24] = 0;
// Encoder quality level: 0
header[25] = 0;
// Channel layout: 0 (Y-Co-Cg)
header[26] = 0;
// Reserved[4]: zeros (27-30 already initialised to 0)
// File Role: 1 (header-only, UCF payload follows)
header[31] = 1;
fwrite(header, 1, TAV_HEADER_SIZE, out);
}
// Write UCF header
static void write_ucf_header(FILE *out, uint16_t num_cues) {
uint8_t magic[8] = {0x1F, 'T', 'S', 'V', 'M', 'U', 'C', 'F'};
uint8_t version = 1;
uint32_t cue_file_size = TAV_OFFSET_BIAS;
uint8_t reserved = 0;
fwrite(magic, 1, 8, out);
fwrite(&version, 1, 1, out);
fwrite(&num_cues, 2, 1, out);
fwrite(&cue_file_size, 4, 1, out);
fwrite(&reserved, 1, 1, out);
}
// Write UCF cue element (internal addressing, human+machine interactable)
static void write_cue_element(FILE *out, uint64_t offset, const char *name) {
uint8_t addressing_mode = 0x22; // 0x20 (human) | 0x01 (machine) | 0x02 (internal)
uint16_t name_len = strlen(name);
// Offset with 4KB bias
uint64_t biased_offset = offset + TAV_OFFSET_BIAS;
fwrite(&addressing_mode, 1, 1, out);
fwrite(&name_len, 2, 1, out);
fwrite(name, 1, name_len, out);
// Write 48-bit (6-byte) offset
fwrite(&biased_offset, 6, 1, out);
}
// Read track names from file (newline-delimited)
static char **read_track_names(const char *filename, int *count_out) {
FILE *f = fopen(filename, "r");
if (!f) {
return NULL;
}
char **names = NULL;
int count = 0;
int capacity = 16;
char line[256];
names = malloc(capacity * sizeof(char *));
if (!names) {
fclose(f);
return NULL;
}
while (fgets(line, sizeof(line), f)) {
// Remove trailing newline
size_t len = strlen(line);
if (len > 0 && line[len - 1] == '\n') {
line[len - 1] = '\0';
len--;
}
if (len > 0 && line[len - 1] == '\r') {
line[len - 1] = '\0';
len--;
}
// Skip empty lines
if (len == 0) {
continue;
}
// Expand capacity if needed
if (count >= capacity) {
capacity *= 2;
char **new_names = realloc(names, capacity * sizeof(char *));
if (!new_names) {
// Cleanup on failure
for (int i = 0; i < count; i++) {
free(names[i]);
}
free(names);
fclose(f);
return NULL;
}
names = new_names;
}
// Allocate and copy name
names[count] = strdup(line);
if (!names[count]) {
// Cleanup on failure
for (int i = 0; i < count; i++) {
free(names[i]);
}
free(names);
fclose(f);
return NULL;
}
count++;
}
fclose(f);
*count_out = count;
return names;
}
// Find all TAV headers in the file (with smart packet-wise skipping)
static int find_tav_headers(FILE *in, uint64_t **offsets_out) {
uint64_t *offsets = NULL;
int count = 0;
int capacity = 16;
offsets = malloc(capacity * sizeof(uint64_t));
if (!offsets) {
fprintf(stderr, "Error: Memory allocation failed\n");
return -1;
}
// Seek to beginning
fseek(in, 0, SEEK_SET);
uint8_t magic[8];
while (1) {
// Remember current position before reading
uint64_t pos = ftell(in);
// Try to read magic
if (fread(magic, 1, 8, in) != 8) {
// End of file
break;
}
// Check for TAV magic signature
if (memcmp(magic, TAV_MAGIC, 7) == 0 && (magic[7] == 'V' || magic[7] == 'P')) {
// Found TAV header
if (count >= capacity) {
capacity *= 2;
uint64_t *new_offsets = realloc(offsets, capacity * sizeof(uint64_t));
if (!new_offsets) {
fprintf(stderr, "Error: Memory reallocation failed\n");
free(offsets);
return -1;
}
offsets = new_offsets;
}
offsets[count++] = pos;
printf("Found TAV header at offset: 0x%lX (%lu)\n", pos, pos);
// Skip past this header (32 bytes total)
uint64_t packet_pos = pos + 32;
fseek(in, packet_pos, SEEK_SET);
// Smart packet-wise skipping
while (1) {
uint8_t packet_type;
if (fread(&packet_type, 1, 1, in) != 1) {
// End of file
break;
}
// Check if this is the start of next TAV file (0x1F is prohibited as packet type)
if (packet_type == 0x1F) {
// Rewind 1 byte to re-read as magic at the top of outer loop
fseek(in, packet_pos, SEEK_SET);
break;
}
// printf("TAV Packet 0x%02X at 0x%lX\n", packet_type, packet_pos);
// Sync packets (0xFE, 0xFF) have no payload size - they're single-byte packets
if (packet_type == 0xFE || packet_type == 0xFF) {
packet_pos += 1;
fseek(in, packet_pos, SEEK_SET);
continue;
}
// Read payload size (uint32, little-endian)
uint32_t payload_size = 0;
if (fread(&payload_size, 4, 1, in) != 1) {
// End of file
break;
}
// Skip packet: 1 byte (type) + 4 bytes (size) + payload_size
packet_pos += 1 + 4 + payload_size;
fseek(in, packet_pos, SEEK_SET);
}
} else {
// Move forward by 1 byte for next search
fseek(in, pos + 1, SEEK_SET);
}
}
*offsets_out = offsets;
return count;
}
int main(int argc, char *argv[]) {
if (argc < 3 || argc > 4) {
fprintf(stderr, "Usage: %s <input.tav> <output.ucf> [track_names.txt]\n", argv[0]);
fprintf(stderr, "Creates a 4KB UCF payload for concatenated TAV file\n");
fprintf(stderr, " track_names.txt: Optional file with track names (one per line)\n");
return 1;
}
const char *input_path = argv[1];
const char *output_path = argv[2];
const char *names_path = (argc == 4) ? argv[3] : NULL;
// Read track names if provided
char **track_names = NULL;
int num_names = 0;
if (names_path) {
track_names = read_track_names(names_path, &num_names);
if (track_names) {
printf("Loaded %d track name(s) from '%s'\n", num_names, names_path);
} else {
fprintf(stderr, "Warning: Could not read track names from '%s', using defaults\n", names_path);
}
}
// Open input file
FILE *in = fopen(input_path, "rb");
if (!in) {
fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
return 1;
}
// Find all TAV headers
uint64_t *offsets = NULL;
int num_tracks = find_tav_headers(in, &offsets);
fclose(in);
if (num_tracks < 0) {
fprintf(stderr, "Error: Failed to scan input file\n");
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
return 1;
}
if (num_tracks == 0) {
fprintf(stderr, "Error: No TAV headers found in input file\n");
free(offsets);
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
return 1;
}
printf("\nFound %d TAV header(s)\n", num_tracks);
// Create output UCF file
FILE *out = fopen(output_path, "wb");
if (!out) {
fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
free(offsets);
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
return 1;
}
// Write TAV header-only payload (File Role = 1)
write_tav_header_only(out);
printf("Written TAV header-only payload (%d bytes)\n", TAV_HEADER_SIZE);
// Write UCF header
write_ucf_header(out, num_tracks);
// Write cue elements
for (int i = 0; i < num_tracks; i++) {
char default_name[32];
const char *name;
// Use custom name if available, otherwise generate default
if (track_names && i < num_names) {
name = track_names[i];
} else {
snprintf(default_name, sizeof(default_name), "Track %d", i + 1);
name = default_name;
}
write_cue_element(out, offsets[i], name);
printf("Written cue element: '%s' at offset 0x%lX (biased: 0x%lX)\n",
name, offsets[i], offsets[i] + TAV_OFFSET_BIAS);
}
// Get current file position
long current_pos = ftell(out);
// Fill remaining space with zeros to reach TAV header + 4KB UCF
size_t target_size = TAV_HEADER_SIZE + UCF_SIZE;
if (current_pos < target_size) {
size_t remaining = target_size - current_pos;
uint8_t *zeros = calloc(remaining, 1);
if (zeros) {
fwrite(zeros, 1, remaining, out);
free(zeros);
}
}
fclose(out);
free(offsets);
// Clean up track names
if (track_names) {
for (int i = 0; i < num_names; i++) {
free(track_names[i]);
}
free(track_names);
}
printf("\nTAV+UCF payload created successfully: %s\n", output_path);
printf("File size: %zu bytes (TAV header: %d + UCF: %d)\n",
(size_t)(TAV_HEADER_SIZE + UCF_SIZE), TAV_HEADER_SIZE, UCF_SIZE);
printf("\nTo create seekable TAV file, prepend this payload to your concatenated TAV file:\n");
printf(" cat %s input.tav > output_seekable.tav\n", output_path);
return 0;
}

View File

@@ -100,8 +100,8 @@ static ycocg_t rgb_to_ycocg_correct(uint8_t r, uint8_t g, uint8_t b, float dithe
return result;
}
static int quantize_4bit_y(float value) {
// Y quantization: round(y * 15)
static int quantise_4bit_y(float value) {
// Y quantisation: round(y * 15)
return (int)round(fmaxf(0.0f, fminf(15.0f, value * 15.0f)));
}
@@ -360,7 +360,7 @@ static void encode_ipf1_block_correct(uint8_t *rgb_data, int width, int height,
pixels[idx] = (ycocg_t){0.0f, 0.0f, 0.0f};
}
y_values[idx] = quantize_4bit_y(pixels[idx].y);
y_values[idx] = quantise_4bit_y(pixels[idx].y);
co_values[idx] = pixels[idx].co;
cg_values[idx] = pixels[idx].cg;
}
@@ -567,7 +567,7 @@ static int process_audio(encoder_config_t *config, int frame_num, FILE *output)
return 1;
}
// Initialize packet size on first frame
// Initialise packet size on first frame
if (config->mp2_packet_size == 0) {
uint8_t header[4];
if (fread(header, 1, 4, config->mp2_file) != 4) return 1;
@@ -589,7 +589,7 @@ static int process_audio(encoder_config_t *config, int frame_num, FILE *output)
double packets_per_frame = frame_audio_time / packet_audio_time;
// Only insert audio when buffer would go below 2 frames
// Initialize with 2 packets on first frame to prime the buffer
// Initialise with 2 packets on first frame to prime the buffer
int packets_to_insert = 0;
if (frame_num == 1) {
packets_to_insert = 2;
@@ -654,7 +654,7 @@ static void write_tvdos_header(encoder_config_t *config, FILE *output) {
fwrite(reserved, 1, 10, output);
}
// Initialize encoder configuration
// Initialise encoder configuration
static encoder_config_t *init_encoder_config() {
encoder_config_t *config = calloc(1, sizeof(encoder_config_t));
if (!config) return NULL;
@@ -807,7 +807,7 @@ static void print_usage(const char *program_name) {
int main(int argc, char *argv[]) {
encoder_config_t *config = init_encoder_config();
if (!config) {
fprintf(stderr, "Failed to initialize encoder\n");
fprintf(stderr, "Failed to initialise encoder\n");
return 1;
}
@@ -904,7 +904,7 @@ int main(int argc, char *argv[]) {
// Write TVDOS header
write_tvdos_header(config, output);
// Initialize progress tracking
// Initialise progress tracking
gettimeofday(&config->start_time, NULL);
config->last_progress_time = config->start_time;
config->total_output_bytes = 8 + 2 + 2 + 2 + 4 + 2 + 2 + 10; // TVDOS header size

View File

@@ -0,0 +1,183 @@
// Created by CuriousTorvald and Claude on 2025-10-17
// MPEG-style bidirectional block motion compensation for TAV encoder
// Simplified: Single-level diamond search, variable blocks, overlaps, sub-pixel refinement
#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
extern "C" {
// Dense optical flow estimation using Farneback algorithm
// Computes flow at every pixel, then samples at block centers for motion vectors
// Much more spatially coherent than independent block matching
void estimate_optical_flow_motion(
const float *current_y, // Current frame Y channel (width×height)
const float *reference_y, // Reference frame Y channel
int width, int height,
int block_size, // Block size (e.g., 16)
int16_t *mvs_x, // Output: motion vectors X (in 1/4-pixel units)
int16_t *mvs_y // Output: motion vectors Y (in 1/4-pixel units)
) {
// Convert float Y channels to 8-bit grayscale for OpenCV
cv::Mat cur_gray(height, width, CV_8UC1);
cv::Mat ref_gray(height, width, CV_8UC1);
// Detect if Y is in [0,1] range and scale to [0,255] if needed
float y_min = current_y[0], y_max = current_y[0];
for (int i = 1; i < width * height; i++) {
if (current_y[i] < y_min) y_min = current_y[i];
if (current_y[i] > y_max) y_max = current_y[i];
}
float scale = (y_max <= 1.1f) ? 255.0f : 1.0f;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int idx = y * width + x;
cur_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, current_y[idx] * scale)));
ref_gray.at<uint8_t>(y, x) = (uint8_t)std::round(std::max(0.0f, std::min(255.0f, reference_y[idx] * scale)));
}
}
// Compute dense optical flow using Farneback algorithm
// IMPORTANT: We need BACKWARD flow (current → reference) for motion compensation
// This tells us where to PULL pixels FROM in the reference frame
cv::Mat flow;
cv::calcOpticalFlowFarneback(
cur_gray, // Current frame (source)
ref_gray, // Reference frame (destination)
flow, // Output flow (2-channel float: dx, dy per pixel)
0.5, // pyr_scale: pyramid scale (0.5 = each layer is half size)
3, // levels: number of pyramid levels
20, // winsize: averaging window size
3, // iterations: number of iterations at each pyramid level
5, // poly_n: size of pixel neighborhood (5 or 7)
1.2, // poly_sigma: standard deviation of Gaussian for polynomial expansion
0 // flags: 0 = normal, OPTFLOW_USE_INITIAL_FLOW = use input flow as initial estimate
);
// Sample flow at block centers to get motion vectors
int num_blocks_x = (width + block_size - 1) / block_size;
int num_blocks_y = (height + block_size - 1) / block_size;
for (int by = 0; by < num_blocks_y; by++) {
for (int bx = 0; bx < num_blocks_x; bx++) {
int block_idx = by * num_blocks_x + bx;
// Block center position
int center_x = bx * block_size + block_size / 2;
int center_y = by * block_size + block_size / 2;
// Clamp to frame boundaries
if (center_x >= width) center_x = width - 1;
if (center_y >= height) center_y = height - 1;
// Get flow at block center
cv::Point2f flow_vec = flow.at<cv::Point2f>(center_y, center_x);
// Convert to 1/4-pixel units and store
// Flow is in pixels, positive = motion to the right/down
mvs_x[block_idx] = (int16_t)std::round(flow_vec.x * 4.0f);
mvs_y[block_idx] = (int16_t)std::round(flow_vec.y * 4.0f);
}
}
}
// Block-based motion compensation with bilinear interpolation (sub-pixel precision)
// MVs are in 1/4-pixel units
// This implements the warp() function from MC-EZBC pseudocode
void warp_block_motion(
const float *src, // Source frame
int width, int height,
const int16_t *mvs_x, // Motion vectors X (1/4-pixel units)
const int16_t *mvs_y, // Motion vectors Y (1/4-pixel units)
int block_size, // Block size (e.g., 16)
float *dst // Output warped frame
) {
int num_blocks_x = (width + block_size - 1) / block_size;
int num_blocks_y = (height + block_size - 1) / block_size;
// Process each block
for (int by = 0; by < num_blocks_y; by++) {
for (int bx = 0; bx < num_blocks_x; bx++) {
int block_idx = by * num_blocks_x + bx;
// Get motion vector for this block (in 1/4-pixel units)
float mv_x = mvs_x[block_idx] / 4.0f; // Convert to pixels
float mv_y = mvs_y[block_idx] / 4.0f;
// Block boundaries in destination frame
int block_x_start = bx * block_size;
int block_y_start = by * block_size;
int block_x_end = std::min(block_x_start + block_size, width);
int block_y_end = std::min(block_y_start + block_size, height);
// Warp each pixel in the block
for (int y = block_y_start; y < block_y_end; y++) {
for (int x = block_x_start; x < block_x_end; x++) {
// Source position (backward warping)
float src_x = x - mv_x;
float src_y = y - mv_y;
// Clamp to valid range
src_x = std::max(0.0f, std::min((float)(width - 1), src_x));
src_y = std::max(0.0f, std::min((float)(height - 1), src_y));
// Bilinear interpolation
int x0 = (int)src_x;
int y0 = (int)src_y;
int x1 = std::min(x0 + 1, width - 1);
int y1 = std::min(y0 + 1, height - 1);
float fx = src_x - x0;
float fy = src_y - y0;
float val00 = src[y0 * width + x0];
float val10 = src[y0 * width + x1];
float val01 = src[y1 * width + x0];
float val11 = src[y1 * width + x1];
float val_top = (1.0f - fx) * val00 + fx * val10;
float val_bot = (1.0f - fx) * val01 + fx * val11;
float val = (1.0f - fy) * val_top + fy * val_bot;
dst[y * width + x] = val;
}
}
}
}
}
// Bidirectional motion compensation for MC-EZBC predict step
// Implements: prediction = 0.5 * (warp(f0, MV_fwd) + warp(f1, MV_bwd))
void warp_bidirectional(
const float *f0, const float *f1,
int width, int height,
const int16_t *mvs_fwd_x, const int16_t *mvs_fwd_y, // F0 → F1
const int16_t *mvs_bwd_x, const int16_t *mvs_bwd_y, // F1 → F0
int block_size,
float *prediction // Output: 0.5 * (warped_f0 + warped_f1)
) {
int num_pixels = width * height;
// Allocate temporary buffers
float *warped_f0 = new float[num_pixels];
float *warped_f1 = new float[num_pixels];
// Warp f0 forward using forward MVs
warp_block_motion(f0, width, height, mvs_fwd_x, mvs_fwd_y, block_size, warped_f0);
// Warp f1 backward using backward MVs
warp_block_motion(f1, width, height, mvs_bwd_x, mvs_bwd_y, block_size, warped_f1);
// Average the two warped frames
for (int i = 0; i < num_pixels; i++) {
prediction[i] = 0.5f * (warped_f0[i] + warped_f1[i]);
}
delete[] warped_f0;
delete[] warped_f1;
}
} // extern "C"

View File

@@ -0,0 +1,795 @@
/*
encoder_tav_text.c
Text-based video encoder for TSVM using custom font ROMs
Outputs Videotex files with custom header and packet type 0x3F (text mode)
File structure:
- Videotex header (32 bytes): magic "\x1FTSVM-VT", version, grid dims, fps, total_frames
- Extended header packet (0xEF): BGNT, ENDT, CDAT, VNDR, FMPG
- Font ROM packets (0x30): lowrom and highrom (1920 bytes each)
- Per-frame sequence: [audio 0x20], [timecode 0xFD], [videotex 0x3F], [sync 0xFF]
Videotex packet structure (0x3F): Zstd([rows][cols][fg-array][bg-array][char-array])
- rows: uint8 (32)
- cols: uint8 (80)
- fg-array: rows*cols bytes (foreground colors, 0xF0=black, 0xFE=white)
- bg-array: rows*cols bytes (background colors, 0xF0=black, 0xFE=white)
- char-array: rows*cols bytes (glyph indices 0-255)
Total uncompressed size: 2 + (80*32*3) = 7682 bytes
Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
Video size: 80×32 characters (560×448 pixels with 7×14 font)
Audio: MP2 encoding at 96 kbps, 32 KHz stereo (packet 0x20)
Each text frame is treated as an I-frame with sync packet
Usage:
gcc -Ofast -std=c11 -Wall encoder_tav_text.c -o encoder_tav_text -lm -lzstd
./encoder_tav_text -i video.mp4 -f font.chr -o output.mv3
*/
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <zstd.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#define ENCODER_VENDOR_STRING "Encoder-TAV-Text 20251121 (videotex)"
#define CHAR_W 7
#define CHAR_H 14
#define GRID_W 80
#define GRID_H 32
#define PIXEL_W (GRID_W * CHAR_W) // 560
#define PIXEL_H (GRID_H * CHAR_H) // 448
#define PATCH_SZ (CHAR_W * CHAR_H)
#define SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 1152
// TAV packet types
#define PACKET_TIMECODE 0xFD
#define PACKET_SYNC 0xFF
#define PACKET_AUDIO_MP2 0x20
#define PACKET_SSF 0x30
#define PACKET_TEXT 0x3F
#define PACKET_EXTENDED_HDR 0xEF
// SSF opcodes for font ROM
#define SSF_OPCODE_LOWROM 0x80
#define SSF_OPCODE_HIGHROM 0x81
// Font ROM size constants
#define FONTROM_PADDED_SIZE 1920
#define GLYPHS_PER_ROM 128
// Color mapping (4-bit RGB to TSVM palette)
#define COLOR_BLACK 0xF0
#define COLOR_WHITE 0xFE
// Generate random filename for temporary audio file
static void generate_random_filename(char *filename) {
srand(time(NULL));
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the .mp2 extension
strcpy(filename + 37, ".mp2");
filename[41] = '\0'; // Null terminate
}
char TEMP_AUDIO_FILE[42];
// Global flag to disable inverted character matching
int g_no_invert_char = 0;
typedef struct {
uint8_t *data; // Binary glyph data (PATCH_SZ bytes per glyph)
int count; // Number of glyphs
} FontROM;
// Get FFmpeg version string
char *get_ffmpeg_version(void) {
FILE *pipe = popen("ffmpeg -version 2>&1 | head -1", "r");
if (!pipe) return NULL;
char *version = malloc(256);
if (!version) {
pclose(pipe);
return NULL;
}
if (fgets(version, 256, pipe)) {
// Remove trailing newline
size_t len = strlen(version);
if (len > 0 && version[len - 1] == '\n') {
version[len - 1] = '\0';
}
pclose(pipe);
return version;
}
free(version);
pclose(pipe);
return NULL;
}
// Detect video FPS using ffprobe
float detect_fps(const char *video_path) {
char cmd[1024];
snprintf(cmd, sizeof(cmd),
"ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate "
"-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
video_path);
FILE *pipe = popen(cmd, "r");
if (!pipe) return 30.0f; // fallback
char fps_str[64] = {0};
if (fgets(fps_str, sizeof(fps_str), pipe)) {
// Parse fraction like "30/1" or "24000/1001"
int num = 0, den = 1;
if (sscanf(fps_str, "%d/%d", &num, &den) == 2 && den > 0) {
pclose(pipe);
return (float)num / (float)den;
}
}
pclose(pipe);
return 30.0f; // fallback
}
// Load font ROM (14 bytes per glyph, no header)
FontROM *load_font_rom(const char *path) {
FILE *f = fopen(path, "rb");
if (!f) return NULL;
fseek(f, 0, SEEK_END);
long size = ftell(f);
fseek(f, 0, SEEK_SET);
if (size % 14 != 0) {
fprintf(stderr, "Warning: ROM size not divisible by 14 (got %ld bytes)\n", size);
}
int glyph_count = size / 14;
FontROM *rom = malloc(sizeof(FontROM));
rom->count = glyph_count;
rom->data = malloc(glyph_count * PATCH_SZ);
// Read and unpack glyphs
for (int g = 0; g < glyph_count; g++) {
uint8_t row_bytes[14];
if (fread(row_bytes, 14, 1, f) != 1) {
free(rom->data);
free(rom);
fclose(f);
return NULL;
}
// Unpack bits to binary pixels
for (int row = 0; row < CHAR_H; row++) {
for (int col = 0; col < CHAR_W; col++) {
// Bit 6 = leftmost, bit 0 = rightmost
int bit = (row_bytes[row] >> (6 - col)) & 1;
rom->data[g * PATCH_SZ + row * CHAR_W + col] = bit;
}
}
}
fclose(f);
fprintf(stderr, "Loaded font ROM: %d glyphs\n", glyph_count);
return rom;
}
// Find best matching glyph for a grayscale patch
int find_best_glyph(const uint8_t *patch, const FontROM *rom, uint8_t *out_bg, uint8_t *out_fg) {
// Try both normal and inverted matching (unless --no-invert-char is set)
int best_glyph = 0;
float best_error = INFINITY;
uint8_t best_bg = COLOR_BLACK, best_fg = COLOR_WHITE;
for (int g = 0; g < rom->count; g++) {
const uint8_t *glyph = &rom->data[g * PATCH_SZ];
// Try normal: glyph 1 = fg, glyph 0 = bg
float err_normal = 0;
for (int i = 0; i < PATCH_SZ; i++) {
int expected = glyph[i] ? 255 : 0;
int diff = patch[i] - expected;
err_normal += diff * diff;
}
if (err_normal < best_error) {
best_error = err_normal;
best_glyph = g;
best_bg = COLOR_BLACK;
best_fg = COLOR_WHITE;
}
// Try inverted: glyph 0 = fg, glyph 1 = bg (skip if --no-invert-char)
if (!g_no_invert_char) {
float err_inverted = 0;
for (int i = 0; i < PATCH_SZ; i++) {
int expected = glyph[i] ? 0 : 255;
int diff = patch[i] - expected;
err_inverted += diff * diff;
}
if (err_inverted < best_error) {
best_error = err_inverted;
best_glyph = g;
best_bg = COLOR_WHITE;
best_fg = COLOR_BLACK;
}
}
}
*out_bg = best_bg;
*out_fg = best_fg;
return best_glyph;
}
// Convert frame to text mode
void frame_to_text(const uint8_t *pixels, const FontROM *rom,
uint8_t *bg_col, uint8_t *fg_col, uint8_t *chars) {
uint8_t patch[PATCH_SZ];
for (int gr = 0; gr < GRID_H; gr++) {
for (int gc = 0; gc < GRID_W; gc++) {
int idx = gr * GRID_W + gc;
// Extract patch
for (int y = 0; y < CHAR_H; y++) {
for (int x = 0; x < CHAR_W; x++) {
int px = gc * CHAR_W + x;
int py = gr * CHAR_H + y;
patch[y * CHAR_W + x] = pixels[py * PIXEL_W + px];
}
}
// Find best match
chars[idx] = find_best_glyph(patch, rom, &bg_col[idx], &fg_col[idx]);
}
}
}
// Get current time in nanoseconds since UNIX epoch
uint64_t get_current_time_ns(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (uint64_t)tv.tv_sec * 1000000000ULL + (uint64_t)tv.tv_usec * 1000ULL;
}
// Parse MP2 packet header to get accurate packet size
int get_mp2_packet_size(uint8_t *header) {
int bitrate_index = (header[2] >> 4) & 0x0F;
int bitrates[] = {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384};
if (bitrate_index >= 15) return MP2_DEFAULT_PACKET_SIZE;
int bitrate = bitrates[bitrate_index];
if (bitrate == 0) return MP2_DEFAULT_PACKET_SIZE;
int sampling_freq_index = (header[2] >> 2) & 0x03;
int sampling_freqs[] = {44100, 48000, 32000, 0};
int sampling_freq = sampling_freqs[sampling_freq_index];
if (sampling_freq == 0) return MP2_DEFAULT_PACKET_SIZE;
int padding = (header[2] >> 1) & 0x01;
return (144 * bitrate * 1000) / sampling_freq + padding;
}
// Write Videotex header (32 bytes, similar to TAV but simpler)
void write_videotex_header(FILE *f, uint8_t fps, uint32_t total_frames) {
fwrite("\x1FTSVMTAV", 8, 1, f);
// Version: 1 (uint8)
fputc(1, f);
// Grid dimensions (uint8 each)
uint16_t width = GRID_W;
uint16_t height = GRID_H;
fwrite(&width, sizeof(uint16_t), 1, f); // cols = 80
fwrite(&height, sizeof(uint16_t), 1, f); // rows = 32
// FPS (uint8)
fputc(fps, f);
// Total frames (uint32, little-endian)
fwrite(&total_frames, sizeof(uint32_t), 1, f);
fputc(0, f); // wavelet filter type
fputc(0, f); // decomposition levels
fputc(0, f); // quantiser Y
fputc(0, f); // quantiser Co
fputc(0, f); // quantiser Cg
// Feature Flags
fputc(0x03, f); // bit 0 = has audio; bit 1 = has subtitle (Videotex is classified as subtitles)
// Video Flags
fputc(0x80, f); // bit 7 = has no video (Videotex is classified as subtitles)
fputc(0, f); // encoder quality level
fputc(0x02, f); // channel layout: Y only
fputc(0, f); // entropy coder
fputc(0, f); // reserved
fputc(0, f); // reserved
fputc(0, f); // device orientation: no rotation
fputc(0, f); // file role: generic
}
// Write extended header packet with metadata
// Returns the file offset where ENDT value is written (for later update)
long write_extended_header(FILE *f, uint64_t creation_time_ns, const char *ffmpeg_version) {
fputc(PACKET_EXTENDED_HDR, f);
// Helper macros for key-value pairs
#define WRITE_KV_UINT64(key_str, value) do { \
fwrite(key_str, 1, 4, f); \
uint8_t value_type = 0x04; /* Uint64 */ \
fwrite(&value_type, 1, 1, f); \
uint64_t val = (value); \
fwrite(&val, sizeof(uint64_t), 1, f); \
} while(0)
#define WRITE_KV_BYTES(key_str, data, len) do { \
fwrite(key_str, 1, 4, f); \
uint8_t value_type = 0x10; /* Bytes */ \
fwrite(&value_type, 1, 1, f); \
uint16_t length = (len); \
fwrite(&length, sizeof(uint16_t), 1, f); \
fwrite((data), 1, (len), f); \
} while(0)
// Count key-value pairs (BGNT, ENDT, CDAT, VNDR, FMPG)
uint16_t num_pairs = ffmpeg_version ? 5 : 4; // FMPG is optional
fwrite(&num_pairs, sizeof(uint16_t), 1, f);
// BGNT: Video begin time (0 for frame 0)
WRITE_KV_UINT64("BGNT", 0ULL);
// ENDT: Video end time (placeholder, will be updated at end)
long endt_offset = ftell(f);
WRITE_KV_UINT64("ENDT", 0ULL);
// CDAT: Creation time in nanoseconds since UNIX epoch
WRITE_KV_UINT64("CDAT", creation_time_ns);
// VNDR: Encoder name and version
const char *vendor_str = ENCODER_VENDOR_STRING;
WRITE_KV_BYTES("VNDR", vendor_str, strlen(vendor_str));
// FMPG: FFmpeg version (if available)
if (ffmpeg_version) {
WRITE_KV_BYTES("FMPG", ffmpeg_version, strlen(ffmpeg_version));
}
#undef WRITE_KV_UINT64
#undef WRITE_KV_BYTES
// Return offset of ENDT value (skip key, type byte)
return endt_offset + 4 + 1; // 4 bytes for "ENDT", 1 byte for type
}
// Write font ROM packet (SSF packet type 0x30)
void write_fontrom_packet(FILE *f, const uint8_t *rom_data, size_t data_size, uint8_t opcode) {
// Prepare padded ROM data (pad to FONTROM_PADDED_SIZE with zeros)
uint8_t *padded_data = calloc(1, FONTROM_PADDED_SIZE);
memcpy(padded_data, rom_data, data_size);
// Packet structure:
// [type:0x30][size:uint32][index:uint24][opcode:uint8][length:uint16][data][terminator:0x00]
uint32_t packet_size = 3 + 1 + 2 + FONTROM_PADDED_SIZE + 1;
// Write packet type and size
fputc(PACKET_SSF, f);
fwrite(&packet_size, sizeof(uint32_t), 1, f);
// Write SSF payload
// Index (3 bytes, always 0 for font ROM)
fputc(0, f);
fputc(0, f);
fputc(0, f);
// Opcode (0x80=lowrom, 0x81=highrom)
fputc(opcode, f);
// Payload length (uint16, little-endian)
uint16_t payload_len = FONTROM_PADDED_SIZE;
fwrite(&payload_len, sizeof(uint16_t), 1, f);
// Font data (padded to 1920 bytes)
fwrite(padded_data, 1, FONTROM_PADDED_SIZE, f);
// Terminator
fputc(0x00, f);
free(padded_data);
fprintf(stderr, "Font ROM uploaded: %zu bytes (padded to %d), opcode 0x%02X\n",
data_size, FONTROM_PADDED_SIZE, opcode);
}
// Write timecode packet (nanoseconds)
void write_timecode(FILE *f, uint64_t timecode_ns) {
fputc(PACKET_TIMECODE, f);
fwrite(&timecode_ns, sizeof(uint64_t), 1, f);
}
// Write sync packet
void write_sync(FILE *f) {
fputc(PACKET_SYNC, f);
}
// Write MP2 audio packet
void write_audio_mp2(FILE *f, const uint8_t *data, uint32_t size) {
fputc(PACKET_AUDIO_MP2, f);
fwrite(&size, sizeof(uint32_t), 1, f);
fwrite(data, 1, size, f);
}
// Write text packet with separated arrays (better compression)
void write_text_packet(FILE *f, const uint8_t *bg_col, const uint8_t *fg_col,
const uint8_t *chars, int rows, int cols) {
int grid_size = rows * cols;
// Prepare uncompressed data: [rows][cols][fg-array][bg-array][char-array]
// Separated arrays compress much better (fg/bg are just 0xF0/0xFE runs)
size_t uncompressed_size = 2 + grid_size * 3;
uint8_t *uncompressed = malloc(uncompressed_size);
uncompressed[0] = rows;
uncompressed[1] = cols;
// Copy arrays in order: foreground, background, characters
memcpy(&uncompressed[2], fg_col, grid_size); // Foreground first
memcpy(&uncompressed[2 + grid_size], bg_col, grid_size); // Background second
memcpy(&uncompressed[2 + grid_size * 2], chars, grid_size); // Characters third
// Compress with Zstd
size_t max_compressed = ZSTD_compressBound(uncompressed_size);
uint8_t *compressed = malloc(max_compressed);
size_t compressed_size = ZSTD_compress(compressed, max_compressed,
uncompressed, uncompressed_size, 3);
if (ZSTD_isError(compressed_size)) {
fprintf(stderr, "Zstd compression error\n");
exit(1);
}
// Write packet: [type][size][data]
fputc(PACKET_TEXT, f);
uint32_t size32 = compressed_size;
fwrite(&size32, 4, 1, f);
fwrite(compressed, compressed_size, 1, f);
free(compressed);
free(uncompressed);
}
int main(int argc, char **argv) {
if (argc < 7) {
fprintf(stderr, "Usage: %s -i <video> -f <font.chr> -o <output.tav> [--no-invert-char]\n", argv[0]);
return 1;
}
const char *input_video = NULL;
const char *font_path = NULL;
const char *output_path = NULL;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-i") == 0 && i+1 < argc) input_video = argv[++i];
else if (strcmp(argv[i], "-f") == 0 && i+1 < argc) font_path = argv[++i];
else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) output_path = argv[++i];
else if (strcmp(argv[i], "--no-invert-char") == 0) g_no_invert_char = 1;
}
if (!input_video || !font_path || !output_path) {
fprintf(stderr, "Missing required arguments\n");
return 1;
}
if (g_no_invert_char) {
fprintf(stderr, "Inverted character matching disabled\n");
}
// Generate random temp filename for audio
generate_random_filename(TEMP_AUDIO_FILE);
// Capture creation time and FFmpeg version for extended header
uint64_t creation_time_ns = get_current_time_ns();
char *ffmpeg_version = get_ffmpeg_version();
// Detect video FPS
float fps_float = detect_fps(input_video);
uint8_t fps = (uint8_t)(fps_float + 0.5f); // Round to nearest integer
fprintf(stderr, "Detected FPS: %.2f (using %d in TAV header)\n", fps_float, fps);
// Load font ROM
FontROM *rom = load_font_rom(font_path);
if (!rom) {
fprintf(stderr, "Failed to load font ROM: %s\n", font_path);
return 1;
}
// Open FFmpeg pipe for grayscale frames at 560×448
char ffmpeg_cmd[1024];
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
"ffmpeg -i \"%s\" -vf \"scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d\" "
"-f rawvideo -pix_fmt gray - 2>/dev/null",
input_video, PIXEL_W, PIXEL_H, PIXEL_W, PIXEL_H);
fprintf(stderr, "Opening video stream...\n");
FILE *video_pipe = popen(ffmpeg_cmd, "r");
if (!video_pipe) {
fprintf(stderr, "Failed to open FFmpeg pipe\n");
return 1;
}
// Extract MP2 audio to temporary file using libtwolame
fprintf(stderr, "Extracting MP2 audio...\n");
char audio_cmd[1024];
snprintf(audio_cmd, sizeof(audio_cmd),
"ffmpeg -v quiet -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 224k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
input_video, SAMPLE_RATE, TEMP_AUDIO_FILE);
int audio_result = system(audio_cmd);
if (audio_result != 0) {
fprintf(stderr, "Warning: Audio extraction failed, continuing without audio\n");
}
// Open MP2 file for reading
FILE *mp2_file = NULL;
long audio_remaining = 0;
if (audio_result == 0) {
mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
if (mp2_file) {
fseek(mp2_file, 0, SEEK_END);
audio_remaining = ftell(mp2_file);
fseek(mp2_file, 0, SEEK_SET);
fprintf(stderr, "Audio ready: %ld bytes\n", audio_remaining);
}
}
// Open output file
FILE *out = fopen(output_path, "wb");
if (!out) {
fprintf(stderr, "Failed to open output file\n");
pclose(video_pipe);
if (mp2_file) fclose(mp2_file);
return 1;
}
// Write Videotex header with placeholder total_frames (will update at end)
long header_offset = ftell(out);
write_videotex_header(out, fps, 0);
// Write extended header packet (before first timecode)
long endt_offset = write_extended_header(out, creation_time_ns, ffmpeg_version);
// Upload font ROM to TSVM (split into lowrom and highrom)
fprintf(stderr, "Uploading font ROM to TSVM...\n");
FILE *rom_file = fopen(font_path, "rb");
if (rom_file) {
fseek(rom_file, 0, SEEK_END);
long rom_size = ftell(rom_file);
fseek(rom_file, 0, SEEK_SET);
uint8_t *raw_rom = malloc(rom_size);
if (raw_rom && fread(raw_rom, 1, rom_size, rom_file) == rom_size) {
// Split into lowrom and highrom
size_t bytes_per_half = (GLYPHS_PER_ROM * 14); // 128 glyphs × 14 bytes = 1792
// Write lowrom (first 128 glyphs)
if (rom_size >= bytes_per_half) {
write_fontrom_packet(out, raw_rom, bytes_per_half, SSF_OPCODE_LOWROM);
}
// Write highrom (second 128 glyphs)
if (rom_size >= bytes_per_half * 2) {
write_fontrom_packet(out, raw_rom + bytes_per_half, bytes_per_half, SSF_OPCODE_HIGHROM);
} else if (rom_size > bytes_per_half) {
// Partial highrom
write_fontrom_packet(out, raw_rom + bytes_per_half, rom_size - bytes_per_half, SSF_OPCODE_HIGHROM);
}
free(raw_rom);
}
fclose(rom_file);
}
// Allocate buffers
size_t frame_size = PIXEL_W * PIXEL_H;
uint8_t *gray_pixels = malloc(frame_size);
uint8_t *bg_col = malloc(GRID_W * GRID_H);
uint8_t *fg_col = malloc(GRID_W * GRID_H);
uint8_t *chars = malloc(GRID_W * GRID_H);
// Audio buffer for MP2 packets
#define MP2_BUFFER_SIZE 2048
uint8_t *audio_buffer = malloc(MP2_BUFFER_SIZE);
uint32_t frame_num = 0;
uint64_t total_audio_bytes = 0;
// Audio timing calculation
double frame_audio_time = 1.0 / fps_float; // Time per video frame
double packet_audio_time = (double)MP2_DEFAULT_PACKET_SIZE / SAMPLE_RATE; // Time per audio packet
double packets_per_frame = frame_audio_time / packet_audio_time;
double audio_frames_in_buffer = 0.0; // Simulated audio buffer level
fprintf(stderr, "Encoding text-mode video (%dx%d chars, %dx%d pixels)...\n",
GRID_W, GRID_H, PIXEL_W, PIXEL_H);
// Track encoding start time
struct timeval start_time, now;
gettimeofday(&start_time, NULL);
// Read and process frames
while (fread(gray_pixels, 1, frame_size, video_pipe) == frame_size) {
// Calculate timecode in nanoseconds
uint64_t timecode_ns = (uint64_t)(frame_num * 1000000000.0 / fps_float);
// Write audio packets for this frame (based on timing)
if (mp2_file && audio_remaining > 0) {
// Simulate buffer consumption
audio_frames_in_buffer -= packets_per_frame;
// Calculate how many packets we need to maintain buffer
double target_level = fmax(packets_per_frame, 2.0);
int packets_to_insert = 0;
if (audio_frames_in_buffer < target_level) {
double deficit = target_level - audio_frames_in_buffer;
packets_to_insert = (int)ceil(deficit);
}
// Insert the calculated number of audio packets
for (int q = 0; q < packets_to_insert; q++) {
// Peek at header to get actual packet size
long pos = ftell(mp2_file);
uint8_t header[4];
if (fread(header, 1, 4, mp2_file) != 4) break;
fseek(mp2_file, pos, SEEK_SET); // Rewind to re-read with full packet
int actual_packet_size = get_mp2_packet_size(header);
size_t bytes_to_read = actual_packet_size;
// Clamp to remaining audio
if (bytes_to_read > audio_remaining) {
bytes_to_read = audio_remaining;
}
// Sanity check
if (bytes_to_read > MP2_BUFFER_SIZE) {
fprintf(stderr, "ERROR: MP2 packet size %zu exceeds buffer\n", bytes_to_read);
break;
}
// Read full packet
size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
if (bytes_read == 0) break;
// Write MP2 audio packet
write_audio_mp2(out, audio_buffer, bytes_read);
// Track audio
audio_remaining -= bytes_read;
audio_frames_in_buffer++;
total_audio_bytes += bytes_read;
}
}
// Write timecode
write_timecode(out, timecode_ns);
// Convert to text mode
frame_to_text(gray_pixels, rom, bg_col, fg_col, chars);
// Write text packet (treated as I-frame)
write_text_packet(out, bg_col, fg_col, chars, GRID_H, GRID_W);
// Write sync packet after each frame
write_sync(out);
frame_num++;
if (frame_num % 30 == 0) {
// Calculate encoding speed
gettimeofday(&now, NULL);
double elapsed = (now.tv_sec - start_time.tv_sec) +
(now.tv_usec - start_time.tv_usec) / 1000000.0;
double encoding_fps = frame_num / elapsed;
fprintf(stderr, "\rEncoded %u frames (%.1f fps)", frame_num, encoding_fps);
fflush(stderr);
}
}
// Write any remaining audio
if (mp2_file && audio_remaining > 0) {
while (audio_remaining > 0) {
// Peek at header to get actual packet size
long pos = ftell(mp2_file);
uint8_t header[4];
if (fread(header, 1, 4, mp2_file) != 4) break;
fseek(mp2_file, pos, SEEK_SET);
int actual_packet_size = get_mp2_packet_size(header);
size_t bytes_to_read = (actual_packet_size < audio_remaining) ? actual_packet_size : audio_remaining;
if (bytes_to_read > MP2_BUFFER_SIZE) break;
size_t bytes_read = fread(audio_buffer, 1, bytes_to_read, mp2_file);
if (bytes_read == 0) break;
write_audio_mp2(out, audio_buffer, bytes_read);
audio_remaining -= bytes_read;
total_audio_bytes += bytes_read;
}
}
// Final timing
gettimeofday(&now, NULL);
double total_time = (now.tv_sec - start_time.tv_sec) +
(now.tv_usec - start_time.tv_usec) / 1000000.0;
double final_fps = frame_num / total_time;
fprintf(stderr, "\nDone! Encoded %u frames in %.2fs (%.1f fps)\n",
frame_num, total_time, final_fps);
fprintf(stderr, "Audio: %llu bytes (%.2f MB)\n",
(unsigned long long)total_audio_bytes,
total_audio_bytes / 1024.0 / 1024.0);
// Update total_frames in header
if (frame_num > 0) {
fseek(out, header_offset + 14, SEEK_SET); // Offset to total_frames field
fwrite(&frame_num, sizeof(uint32_t), 1, out);
fprintf(stderr, "Updated total_frames in header: %u\n", frame_num);
}
// Update ENDT in extended header (calculate end time for last frame)
if (frame_num > 0) {
// Calculate duration: (frame_num - 1) frames * (1/fps) seconds in nanoseconds
uint64_t duration_ns = (uint64_t)((frame_num - 1) * 1000000000.0 / fps_float);
uint64_t endt_ns = duration_ns;
fseek(out, endt_offset, SEEK_SET);
fwrite(&endt_ns, sizeof(uint64_t), 1, out);
fprintf(stderr, "Updated ENDT in extended header: %llu ns (%.3f seconds)\n",
(unsigned long long)endt_ns, endt_ns / 1000000000.0);
}
// Cleanup
pclose(video_pipe);
if (mp2_file) {
fclose(mp2_file);
unlink(TEMP_AUDIO_FILE); // Remove temporary audio file
}
fclose(out);
free(gray_pixels);
free(bg_col);
free(fg_col);
free(chars);
free(audio_buffer);
free(rom->data);
free(rom);
if (ffmpeg_version) free(ffmpeg_version);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,815 +0,0 @@
// Created by Claude on 2025-08-17.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include <math.h>
#include <zlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include <getopt.h>
#include <sys/time.h>
// TSVM Enhanced Video (TEV) format constants
#define TEV_MAGIC "\x1F\x54\x53\x56\x4D\x54\x45\x56" // "\x1FTSVM TEV"
#define TEV_VERSION 1
// Block encoding modes (8x8 blocks)
#define TEV_MODE_SKIP 0x00 // Skip block (copy from reference)
#define TEV_MODE_INTRA 0x01 // Intra DCT coding (I-frame blocks)
#define TEV_MODE_INTER 0x02 // Inter DCT coding with motion compensation
#define TEV_MODE_MOTION 0x03 // Motion vector only (good prediction)
// Video packet types
#define TEV_PACKET_IFRAME 0x10 // Intra frame (keyframe)
#define TEV_PACKET_PFRAME 0x11 // Predicted frame
#define TEV_PACKET_AUDIO_MP2 0x20 // MP2 audio
#define TEV_PACKET_SYNC 0xFF // Sync packet
// Quality settings for quantization
static const uint8_t QUANT_TABLES[8][64] = {
// Quality 0 (lowest)
{80, 60, 50, 80, 120, 200, 255, 255,
55, 60, 70, 95, 130, 255, 255, 255,
70, 65, 80, 120, 200, 255, 255, 255,
70, 85, 110, 145, 255, 255, 255, 255,
90, 110, 185, 255, 255, 255, 255, 255,
120, 175, 255, 255, 255, 255, 255, 255,
245, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255},
// Quality 1-6 (intermediate)...
{40, 30, 25, 40, 60, 100, 128, 150,
28, 30, 35, 48, 65, 128, 150, 180,
35, 33, 40, 60, 100, 128, 150, 180,
35, 43, 55, 73, 128, 150, 180, 200,
45, 55, 93, 128, 150, 180, 200, 220,
60, 88, 128, 150, 180, 200, 220, 240,
123, 128, 150, 180, 200, 220, 240, 250,
128, 150, 180, 200, 220, 240, 250, 255},
// ... (simplified for example)
{20, 15, 13, 20, 30, 50, 64, 75,
14, 15, 18, 24, 33, 64, 75, 90,
18, 17, 20, 30, 50, 64, 75, 90,
18, 22, 28, 37, 64, 75, 90, 100,
23, 28, 47, 64, 75, 90, 100, 110,
30, 44, 64, 75, 90, 100, 110, 120,
62, 64, 75, 90, 100, 110, 120, 125,
64, 75, 90, 100, 110, 120, 125, 128},
{16, 12, 10, 16, 24, 40, 51, 60,
11, 12, 14, 19, 26, 51, 60, 72,
14, 13, 16, 24, 40, 51, 60, 72,
14, 17, 22, 29, 51, 60, 72, 80,
18, 22, 37, 51, 60, 72, 80, 88,
24, 35, 51, 60, 72, 80, 88, 96,
49, 51, 60, 72, 80, 88, 96, 100,
51, 60, 72, 80, 88, 96, 100, 102},
{12, 9, 8, 12, 18, 30, 38, 45,
8, 9, 11, 14, 20, 38, 45, 54,
11, 10, 12, 18, 30, 38, 45, 54,
11, 13, 17, 22, 38, 45, 54, 60,
14, 17, 28, 38, 45, 54, 60, 66,
18, 26, 38, 45, 54, 60, 66, 72,
37, 38, 45, 54, 60, 66, 72, 75,
38, 45, 54, 60, 66, 72, 75, 77},
{10, 7, 6, 10, 15, 25, 32, 38,
7, 7, 9, 12, 16, 32, 38, 45,
9, 8, 10, 15, 25, 32, 38, 45,
9, 11, 14, 18, 32, 38, 45, 50,
12, 14, 23, 32, 38, 45, 50, 55,
15, 22, 32, 38, 45, 50, 55, 60,
31, 32, 38, 45, 50, 55, 60, 63,
32, 38, 45, 50, 55, 60, 63, 65},
{8, 6, 5, 8, 12, 20, 26, 30,
6, 6, 7, 10, 13, 26, 30, 36,
7, 7, 8, 12, 20, 26, 30, 36,
7, 9, 11, 15, 26, 30, 36, 40,
10, 11, 19, 26, 30, 36, 40, 44,
12, 17, 26, 30, 36, 40, 44, 48,
25, 26, 30, 36, 40, 44, 48, 50,
26, 30, 36, 40, 44, 48, 50, 52},
// Quality 7 (highest)
{2, 1, 1, 2, 3, 5, 6, 7,
1, 1, 1, 2, 3, 6, 7, 9,
1, 1, 2, 3, 5, 6, 7, 9,
1, 2, 3, 4, 6, 7, 9, 10,
2, 3, 5, 6, 7, 9, 10, 11,
3, 4, 6, 7, 9, 10, 11, 12,
6, 6, 7, 9, 10, 11, 12, 13,
6, 7, 9, 10, 11, 12, 13, 13}
};
// Audio constants (reuse MP2 from existing system)
#define MP2_SAMPLE_RATE 32000
#define MP2_DEFAULT_PACKET_SIZE 0x240
// Encoding parameters
#define MAX_MOTION_SEARCH 16
#define KEYFRAME_INTERVAL 30
#define BLOCK_SIZE 8
// Default values
#define DEFAULT_WIDTH 560
#define DEFAULT_HEIGHT 448
#define TEMP_AUDIO_FILE "/tmp/tev_temp_audio.mp2"
typedef struct __attribute__((packed)) {
uint8_t mode; // Block encoding mode
int16_t mv_x, mv_y; // Motion vector (1/4 pixel precision)
uint16_t cbp; // Coded block pattern (which 8x8 have non-zero coeffs)
int16_t dct_coeffs[3][64]; // Quantized DCT coefficients (R,G,B)
} tev_block_t;
typedef struct {
char *input_file;
char *output_file;
int width;
int height;
int fps;
int total_frames;
double duration;
int has_audio;
int output_to_stdout;
int quality; // 0-7, higher = better quality
// Frame buffers (8-bit RGB format for encoding)
uint8_t *current_rgb, *previous_rgb, *reference_rgb;
// Encoding workspace
uint8_t *rgb_workspace; // 8x8 RGB blocks (192 bytes)
float *dct_workspace; // DCT coefficients (192 floats)
tev_block_t *block_data; // Encoded block data
uint8_t *compressed_buffer; // Zstd output
// Audio handling
FILE *mp2_file;
int mp2_packet_size;
size_t audio_remaining;
uint8_t *mp2_buffer;
// Compression context
z_stream gzip_stream;
// FFmpeg processes
FILE *ffmpeg_video_pipe;
// Progress tracking
struct timeval start_time;
size_t total_output_bytes;
// Statistics
int blocks_skip, blocks_intra, blocks_inter, blocks_motion;
} tev_encoder_t;
// Quantize DCT coefficient using quality table
static int16_t quantize_coeff(float coeff, uint8_t quant, int is_dc) {
if (is_dc) {
// DC coefficient uses fixed quantizer
return (int16_t)roundf(coeff / 8.0f);
} else {
// AC coefficients use quality table
return (int16_t)roundf(coeff / quant);
}
}
// These functions are reserved for future rate-distortion optimization
// Currently using simplified encoding logic
// Convert RGB to 4096-color format
static void copy_rgb_frame(uint8_t *rgb_input, uint8_t *rgb_frame, int pixels) {
// Copy input RGB data to frame buffer (preserving full 8-bit precision)
memcpy(rgb_frame, rgb_input, pixels * 3);
}
// Simple motion estimation (full search)
static void estimate_motion(tev_encoder_t *enc, int block_x, int block_y,
int16_t *best_mv_x, int16_t *best_mv_y) {
int best_sad = INT_MAX;
*best_mv_x = 0;
*best_mv_y = 0;
int start_x = block_x * BLOCK_SIZE;
int start_y = block_y * BLOCK_SIZE;
// Search in range [-16, +16] pixels
for (int mv_y = -MAX_MOTION_SEARCH; mv_y <= MAX_MOTION_SEARCH; mv_y++) {
for (int mv_x = -MAX_MOTION_SEARCH; mv_x <= MAX_MOTION_SEARCH; mv_x++) {
int ref_x = start_x + mv_x;
int ref_y = start_y + mv_y;
// Check bounds
if (ref_x >= 0 && ref_y >= 0 &&
ref_x + BLOCK_SIZE <= enc->width &&
ref_y + BLOCK_SIZE <= enc->height) {
int sad = 0;
// Calculate Sum of Absolute Differences
for (int dy = 0; dy < BLOCK_SIZE; dy++) {
for (int dx = 0; dx < BLOCK_SIZE; dx++) {
int cur_offset = (start_y + dy) * enc->width + (start_x + dx);
int ref_offset = (ref_y + dy) * enc->width + (ref_x + dx);
int cur_r = enc->current_rgb[cur_offset * 3];
int cur_g = enc->current_rgb[cur_offset * 3 + 1];
int cur_b = enc->current_rgb[cur_offset * 3 + 2];
int ref_r = enc->previous_rgb[ref_offset * 3];
int ref_g = enc->previous_rgb[ref_offset * 3 + 1];
int ref_b = enc->previous_rgb[ref_offset * 3 + 2];
// SAD on 8-bit RGB channels
sad += abs(cur_r - ref_r) + abs(cur_g - ref_g) + abs(cur_b - ref_b);
}
}
if (sad < best_sad) {
best_sad = sad;
*best_mv_x = mv_x * 4; // Convert to 1/4 pixel units
*best_mv_y = mv_y * 4;
}
}
}
}
}
// Encode an 8x8 block using the best mode
static void encode_block(tev_encoder_t *enc, int block_x, int block_y, int is_keyframe) {
int block_idx = block_y * ((enc->width + 7) / 8) + block_x;
tev_block_t *block = &enc->block_data[block_idx];
int start_x = block_x * BLOCK_SIZE;
int start_y = block_y * BLOCK_SIZE;
// Extract 8x8 RGB block from current frame
for (int y = 0; y < BLOCK_SIZE; y++) {
for (int x = 0; x < BLOCK_SIZE; x++) {
int pixel_x = block_x * BLOCK_SIZE + x;
int pixel_y = block_y * BLOCK_SIZE + y;
int offset = (y * BLOCK_SIZE + x) * 3;
if (pixel_x < enc->width && pixel_y < enc->height) {
int frame_offset = pixel_y * enc->width + pixel_x;
// Copy RGB data directly (preserving full 8-bit precision)
enc->rgb_workspace[offset] = enc->current_rgb[frame_offset * 3]; // R
enc->rgb_workspace[offset + 1] = enc->current_rgb[frame_offset * 3 + 1]; // G
enc->rgb_workspace[offset + 2] = enc->current_rgb[frame_offset * 3 + 2]; // B
} else {
// Pad with black
enc->rgb_workspace[offset] = 0;
enc->rgb_workspace[offset + 1] = 0;
enc->rgb_workspace[offset + 2] = 0;
}
}
}
// Initialize block
memset(block, 0, sizeof(tev_block_t));
if (is_keyframe) {
// Keyframes use INTRA mode
block->mode = TEV_MODE_INTRA;
enc->blocks_intra++;
} else {
// Try different modes and pick the best
// Try SKIP mode - compare with previous frame
int skip_sad = 0;
for (int dy = 0; dy < BLOCK_SIZE; dy++) {
for (int dx = 0; dx < BLOCK_SIZE; dx++) {
int pixel_x = start_x + dx;
int pixel_y = start_y + dy;
if (pixel_x < enc->width && pixel_y < enc->height) {
int offset = pixel_y * enc->width + pixel_x;
int cur_r = enc->current_rgb[offset * 3];
int cur_g = enc->current_rgb[offset * 3 + 1];
int cur_b = enc->current_rgb[offset * 3 + 2];
int prev_r = enc->previous_rgb[offset * 3];
int prev_g = enc->previous_rgb[offset * 3 + 1];
int prev_b = enc->previous_rgb[offset * 3 + 2];
skip_sad += abs(cur_r - prev_r) + abs(cur_g - prev_g) + abs(cur_b - prev_b);
}
}
}
if (skip_sad < 8) { // Much stricter threshold for SKIP
block->mode = TEV_MODE_SKIP;
enc->blocks_skip++;
return;
}
// Try MOTION mode
estimate_motion(enc, block_x, block_y, &block->mv_x, &block->mv_y);
// Calculate motion compensation SAD
int motion_sad = 0;
for (int y = 0; y < BLOCK_SIZE; y++) {
for (int x = 0; x < BLOCK_SIZE; x++) {
int cur_x = block_x * BLOCK_SIZE + x;
int cur_y = block_y * BLOCK_SIZE + y;
int ref_x = cur_x + block->mv_x;
int ref_y = cur_y + block->mv_y;
if (cur_x < enc->width && cur_y < enc->height &&
ref_x >= 0 && ref_x < enc->width && ref_y >= 0 && ref_y < enc->height) {
int cur_offset = cur_y * enc->width + cur_x;
int ref_offset = ref_y * enc->width + ref_x;
uint8_t cur_r = enc->current_rgb[cur_offset * 3];
uint8_t cur_g = enc->current_rgb[cur_offset * 3 + 1];
uint8_t cur_b = enc->current_rgb[cur_offset * 3 + 2];
uint8_t ref_r = enc->previous_rgb[ref_offset * 3];
uint8_t ref_g = enc->previous_rgb[ref_offset * 3 + 1];
uint8_t ref_b = enc->previous_rgb[ref_offset * 3 + 2];
motion_sad += abs(cur_r - ref_r) + abs(cur_g - ref_g) + abs(cur_b - ref_b);
} else {
motion_sad += 48; // Penalty for out-of-bounds reference
}
}
}
// Decide on encoding mode based on analysis
if (motion_sad < 32 && (abs(block->mv_x) > 0 || abs(block->mv_y) > 0)) {
// Good motion prediction
block->mode = TEV_MODE_MOTION;
enc->blocks_motion++;
return; // Motion blocks don't need DCT coefficients
} else if (motion_sad < 64) {
// Use INTER mode (motion compensation + DCT residual)
block->mode = TEV_MODE_INTER;
enc->blocks_inter++;
} else {
// Fall back to INTRA mode
block->mode = TEV_MODE_INTRA;
enc->blocks_intra++;
}
}
// Full 8x8 DCT implementation for all blocks (keyframe and P-frame)
const uint8_t *quant_table = QUANT_TABLES[enc->quality];
// DCT-II basis functions (precomputed for 8x8)
static double dct_basis[8][8];
static int basis_initialized = 0;
if (!basis_initialized) {
for (int u = 0; u < 8; u++) {
for (int x = 0; x < 8; x++) {
double cu = (u == 0) ? sqrt(1.0/8.0) : sqrt(2.0/8.0);
dct_basis[u][x] = cu * cos((2.0 * x + 1.0) * u * M_PI / 16.0);
}
}
basis_initialized = 1;
}
// Convert RGB block to DCT input format (subtract 128 to center around 0)
double rgb_block[3][8][8];
for (int y = 0; y < 8; y++) {
for (int x = 0; x < 8; x++) {
int offset = (y * 8 + x) * 3;
rgb_block[0][y][x] = enc->rgb_workspace[offset] - 128.0; // R: 0-255 -> -128 to +127
rgb_block[1][y][x] = enc->rgb_workspace[offset + 1] - 128.0; // G: 0-255 -> -128 to +127
rgb_block[2][y][x] = enc->rgb_workspace[offset + 2] - 128.0; // B: 0-255 -> -128 to +127
}
}
// Apply 2D DCT to each channel
double dct_coeffs[3][8][8];
for (int channel = 0; channel < 3; channel++) {
for (int u = 0; u < 8; u++) {
for (int v = 0; v < 8; v++) {
double sum = 0.0;
for (int x = 0; x < 8; x++) {
for (int y = 0; y < 8; y++) {
sum += dct_basis[u][x] * dct_basis[v][y] * rgb_block[channel][y][x];
}
}
dct_coeffs[channel][u][v] = sum;
}
}
}
// Quantize and store DCT coefficients
for (int channel = 0; channel < 3; channel++) {
for (int u = 0; u < 8; u++) {
for (int v = 0; v < 8; v++) {
int coeff_index = u * 8 + v;
int is_dc = (coeff_index == 0);
block->dct_coeffs[channel][coeff_index] =
quantize_coeff(dct_coeffs[channel][u][v], quant_table[coeff_index], is_dc);
// Debug DC coefficient for first block
if (block_x == 0 && block_y == 0 && channel < 3 && coeff_index == 0) {
fprintf(stderr, "Ch%d: DCT raw=%.2f, stored=%d, ",
channel, dct_coeffs[channel][u][v], (int)block->dct_coeffs[channel][coeff_index]);
// Show raw bytes in memory
uint8_t *bytes = (uint8_t*)&block->dct_coeffs[channel][coeff_index];
fprintf(stderr, "bytes=[%d,%d]\n", bytes[0], bytes[1]);
}
}
}
}
}
// Execute command and capture output
static char *execute_command(const char *command) {
FILE *pipe = popen(command, "r");
if (!pipe) return NULL;
char *result = malloc(4096);
size_t len = fread(result, 1, 4095, pipe);
result[len] = '\0';
pclose(pipe);
return result;
}
// Get video metadata using ffprobe
static int get_video_metadata(tev_encoder_t *enc) {
char command[1024];
char *output;
// Get frame count
snprintf(command, sizeof(command),
"ffprobe -v quiet -select_streams v:0 -count_frames -show_entries stream=nb_read_frames -of csv=p=0 \"%s\"",
enc->input_file);
output = execute_command(command);
if (!output) {
fprintf(stderr, "Failed to get frame count\n");
return 0;
}
enc->total_frames = atoi(output);
free(output);
// Get frame rate
snprintf(command, sizeof(command),
"ffprobe -v quiet -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 \"%s\"",
enc->input_file);
output = execute_command(command);
if (!output) {
fprintf(stderr, "Failed to get frame rate\n");
return 0;
}
int num, den;
if (sscanf(output, "%d/%d", &num, &den) == 2) {
enc->fps = (den > 0) ? (num / den) : 30;
} else {
enc->fps = (int)round(atof(output));
}
free(output);
// Get duration
snprintf(command, sizeof(command),
"ffprobe -v quiet -show_entries format=duration -of csv=p=0 \"%s\"",
enc->input_file);
output = execute_command(command);
if (output) {
enc->duration = atof(output);
free(output);
}
// Check if has audio
snprintf(command, sizeof(command),
"ffprobe -v quiet -select_streams a:0 -show_entries stream=index -of csv=p=0 \"%s\"",
enc->input_file);
output = execute_command(command);
enc->has_audio = (output && strlen(output) > 0 && atoi(output) >= 0);
if (output) free(output);
if (enc->total_frames <= 0 && enc->duration > 0) {
enc->total_frames = (int)(enc->duration * enc->fps);
}
fprintf(stderr, "Video metadata:\n");
fprintf(stderr, " Frames: %d\n", enc->total_frames);
fprintf(stderr, " FPS: %d\n", enc->fps);
fprintf(stderr, " Duration: %.2fs\n", enc->duration);
fprintf(stderr, " Audio: %s\n", enc->has_audio ? "Yes" : "No");
fprintf(stderr, " Resolution: %dx%d\n", enc->width, enc->height);
return (enc->total_frames > 0 && enc->fps > 0);
}
// Start FFmpeg process for video conversion
static int start_video_conversion(tev_encoder_t *enc) {
char command[2048];
snprintf(command, sizeof(command),
"ffmpeg -i \"%s\" -f rawvideo -pix_fmt rgb24 -vf scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d -y - 2>/dev/null",
enc->input_file, enc->width, enc->height, enc->width, enc->height);
enc->ffmpeg_video_pipe = popen(command, "r");
return (enc->ffmpeg_video_pipe != NULL);
}
// Start audio conversion
static int start_audio_conversion(tev_encoder_t *enc) {
if (!enc->has_audio) return 1;
char command[2048];
snprintf(command, sizeof(command),
"ffmpeg -i \"%s\" -acodec libtwolame -psymodel 4 -b:a 192k -ar %d -ac 2 -y \"%s\" 2>/dev/null",
enc->input_file, MP2_SAMPLE_RATE, TEMP_AUDIO_FILE);
int result = system(command);
if (result == 0) {
enc->mp2_file = fopen(TEMP_AUDIO_FILE, "rb");
if (enc->mp2_file) {
fseek(enc->mp2_file, 0, SEEK_END);
enc->audio_remaining = ftell(enc->mp2_file);
fseek(enc->mp2_file, 0, SEEK_SET);
return 1;
}
}
fprintf(stderr, "Warning: Failed to convert audio\n");
enc->has_audio = 0;
return 1;
}
// Write TEV header
static void write_tev_header(tev_encoder_t *enc, FILE *output) {
fwrite(TEV_MAGIC, 1, 8, output);
uint8_t version = TEV_VERSION;
fwrite(&version, 1, 1, output);
uint8_t flags = enc->has_audio ? 0x01 : 0x00;
fwrite(&flags, 1, 1, output);
fwrite(&enc->width, 2, 1, output);
fwrite(&enc->height, 2, 1, output);
fwrite(&enc->fps, 2, 1, output);
fwrite(&enc->total_frames, 4, 1, output);
uint8_t quality = enc->quality;
fwrite(&quality, 1, 1, output);
uint8_t reserved[5] = {0};
fwrite(reserved, 1, 5, output);
}
// Process and encode one frame
static int process_frame(tev_encoder_t *enc, int frame_num, FILE *output) {
// Read RGB data
size_t rgb_size = enc->width * enc->height * 3;
uint8_t *rgb_buffer = malloc(rgb_size);
if (fread(rgb_buffer, 1, rgb_size, enc->ffmpeg_video_pipe) != rgb_size) {
free(rgb_buffer);
return 0; // End of video
}
// Convert to 4096-color format
copy_rgb_frame(rgb_buffer, enc->current_rgb, enc->width * enc->height);
free(rgb_buffer);
int is_keyframe = (frame_num == 1) || (frame_num % KEYFRAME_INTERVAL == 0);
// Reset statistics
enc->blocks_skip = enc->blocks_intra = enc->blocks_inter = enc->blocks_motion = 0;
// Encode all 8x8 blocks
int blocks_x = (enc->width + 7) / 8;
int blocks_y = (enc->height + 7) / 8;
for (int by = 0; by < blocks_y; by++) {
for (int bx = 0; bx < blocks_x; bx++) {
encode_block(enc, bx, by, is_keyframe);
}
}
// Debug struct layout
fprintf(stderr, "Block size: %zu, DCT offset: %zu\n",
sizeof(tev_block_t), offsetof(tev_block_t, dct_coeffs));
// No endian conversion needed - system is already little-endian
// Compress block data using gzip
size_t block_data_size = blocks_x * blocks_y * sizeof(tev_block_t);
// Reset compression stream
enc->gzip_stream.next_in = (Bytef*)enc->block_data;
enc->gzip_stream.avail_in = block_data_size;
enc->gzip_stream.next_out = (Bytef*)enc->compressed_buffer;
enc->gzip_stream.avail_out = block_data_size * 2;
if (deflateReset(&enc->gzip_stream) != Z_OK) {
fprintf(stderr, "Gzip deflateReset failed\n");
return -1;
}
int result = deflate(&enc->gzip_stream, Z_FINISH);
if (result != Z_STREAM_END) {
fprintf(stderr, "Gzip compression failed: %d\n", result);
return -1;
}
size_t compressed_size = enc->gzip_stream.total_out;
// Write video packet
uint8_t packet_type[2] = {is_keyframe ? TEV_PACKET_IFRAME : TEV_PACKET_PFRAME, 0x00};
fwrite(packet_type, 1, 2, output);
uint32_t size = (uint32_t)compressed_size;
fwrite(&size, 4, 1, output);
fwrite(enc->compressed_buffer, 1, compressed_size, output);
// Write sync packet
uint8_t sync[2] = {0xFF, 0xFF};
fwrite(sync, 1, 2, output);
enc->total_output_bytes += 2 + 4 + compressed_size + 2;
// Swap frame buffers for next frame
uint8_t *temp_rgb = enc->previous_rgb;
enc->previous_rgb = enc->current_rgb;
enc->current_rgb = temp_rgb;
fprintf(stderr, "\rFrame %d/%d [%c] - Skip:%d Intra:%d Inter:%d - Ratio:%.1f%%",
frame_num, enc->total_frames, is_keyframe ? 'I' : 'P',
enc->blocks_skip, enc->blocks_intra, enc->blocks_inter,
(compressed_size * 100.0) / block_data_size);
fflush(stderr);
return 1;
}
// Initialize encoder
static tev_encoder_t *init_encoder() {
tev_encoder_t *enc = calloc(1, sizeof(tev_encoder_t));
if (!enc) return NULL;
enc->width = DEFAULT_WIDTH;
enc->height = DEFAULT_HEIGHT;
enc->quality = 5; // Default quality
enc->output_to_stdout = 1;
return enc;
}
// Allocate buffers
static int allocate_buffers(tev_encoder_t *enc) {
int pixels = enc->width * enc->height;
int blocks = ((enc->width + 7) / 8) * ((enc->height + 7) / 8);
enc->current_rgb = malloc(pixels * 3); // RGB: 3 bytes per pixel
enc->previous_rgb = malloc(pixels * 3);
enc->reference_rgb = malloc(pixels * 3);
enc->rgb_workspace = malloc(BLOCK_SIZE * BLOCK_SIZE * 3);
enc->dct_workspace = malloc(BLOCK_SIZE * BLOCK_SIZE * 3 * sizeof(float));
enc->block_data = malloc(blocks * sizeof(tev_block_t));
enc->compressed_buffer = malloc(blocks * sizeof(tev_block_t) * 2);
enc->mp2_buffer = malloc(2048);
// Initialize gzip compression stream
enc->gzip_stream.zalloc = Z_NULL;
enc->gzip_stream.zfree = Z_NULL;
enc->gzip_stream.opaque = Z_NULL;
int gzip_init_result = deflateInit2(&enc->gzip_stream, Z_DEFAULT_COMPRESSION,
Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY); // 15+16 for gzip format
return (enc->current_rgb && enc->previous_rgb && enc->reference_rgb &&
enc->rgb_workspace && enc->dct_workspace && enc->block_data && enc->compressed_buffer &&
enc->mp2_buffer && gzip_init_result == Z_OK);
}
// Cleanup
static void cleanup_encoder(tev_encoder_t *enc) {
if (!enc) return;
if (enc->ffmpeg_video_pipe) pclose(enc->ffmpeg_video_pipe);
if (enc->mp2_file) fclose(enc->mp2_file);
deflateEnd(&enc->gzip_stream);
free(enc->input_file);
free(enc->output_file);
free(enc->current_rgb);
free(enc->previous_rgb);
free(enc->reference_rgb);
free(enc->rgb_workspace);
free(enc->dct_workspace);
free(enc->block_data);
free(enc->compressed_buffer);
free(enc->mp2_buffer);
unlink(TEMP_AUDIO_FILE);
free(enc);
}
// Print usage
static void print_usage(const char *program_name) {
printf("TSVM Enhanced Video (TEV) Encoder\n\n");
printf("Usage: %s [options] input_video\n\n", program_name);
printf("Options:\n");
printf(" -o, --output FILE Output TEV file (default: stdout)\n");
printf(" -s, --size WxH Video resolution (default: 560x448)\n");
printf(" -q, --quality N Quality level 0-7 (default: 5)\n");
printf(" -h, --help Show this help\n\n");
printf("TEV Features:\n");
printf(" - 8x8 DCT-based compression with motion compensation\n");
printf(" - Native 4096-color support (4:4:4 RGB)\n");
printf(" - Zstd compression for optimal efficiency\n");
printf(" - Hardware-accelerated encoding functions\n\n");
printf("Examples:\n");
printf(" %s input.mp4 -o output.tev\n", program_name);
printf(" %s input.avi -s 1024x768 -q 7 -o output.tev\n", program_name);
}
int main(int argc, char *argv[]) {
tev_encoder_t *enc = init_encoder();
if (!enc) {
fprintf(stderr, "Failed to initialize encoder\n");
return 1;
}
// Parse arguments
static struct option long_options[] = {
{"output", required_argument, 0, 'o'},
{"size", required_argument, 0, 's'},
{"quality", required_argument, 0, 'q'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
int c;
while ((c = getopt_long(argc, argv, "o:s:q:h", long_options, NULL)) != -1) {
switch (c) {
case 'o':
enc->output_file = strdup(optarg);
enc->output_to_stdout = 0;
break;
case 's':
if (sscanf(optarg, "%dx%d", &enc->width, &enc->height) != 2) {
fprintf(stderr, "Invalid resolution: %s\n", optarg);
cleanup_encoder(enc);
return 1;
}
break;
case 'q':
enc->quality = atoi(optarg);
if (enc->quality < 0 || enc->quality > 7) {
fprintf(stderr, "Quality must be 0-7\n");
cleanup_encoder(enc);
return 1;
}
break;
case 'h':
print_usage(argv[0]);
cleanup_encoder(enc);
return 0;
default:
print_usage(argv[0]);
cleanup_encoder(enc);
return 1;
}
}
if (optind >= argc) {
fprintf(stderr, "Input file required\n");
print_usage(argv[0]);
cleanup_encoder(enc);
return 1;
}
enc->input_file = strdup(argv[optind]);
// Initialize
if (!get_video_metadata(enc) || !allocate_buffers(enc) ||
!start_video_conversion(enc) || !start_audio_conversion(enc)) {
cleanup_encoder(enc);
return 1;
}
FILE *output = enc->output_to_stdout ? stdout : fopen(enc->output_file, "wb");
if (!output) {
fprintf(stderr, "Failed to open output\n");
cleanup_encoder(enc);
return 1;
}
write_tev_header(enc, output);
gettimeofday(&enc->start_time, NULL);
enc->total_output_bytes = 8 + 1 + 1 + 2 + 2 + 2 + 4 + 1 + 5; // TEV header size
// Process all frames
for (int frame = 1; frame <= enc->total_frames; frame++) {
int result = process_frame(enc, frame, output);
if (result <= 0) break;
}
fprintf(stderr, "\nEncoding complete\n");
if (!enc->output_to_stdout) {
fclose(output);
fprintf(stderr, "Output: %s (%.1f MB)\n", enc->output_file,
enc->total_output_bytes / (1024.0 * 1024.0));
}
cleanup_encoder(enc);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
// Affine estimation for TAV mesh warping
// This file contains logic to estimate per-cell affine transforms from block motion
#include <cmath>
#include <cstdlib>
#include <cstring>
extern "C" {
// Estimate affine transform for a mesh cell from surrounding block motion vectors
// Uses least-squares fitting of motion vectors to affine model: [x'] = [a11 a12][x] + [tx]
// [y'] [a21 a22][y] [ty]
//
// Returns 1 if affine improves residual by >threshold, 0 if translation-only is better
int estimate_cell_affine(
const float *flow_x, const float *flow_y,
int width, int height,
int cell_x, int cell_y, // Cell position in mesh coordinates
int cell_w, int cell_h, // Cell size in pixels
float threshold, // Residual improvement threshold (e.g. 0.10 = 10%)
short *out_tx, short *out_ty, // Translation (1/8 pixel)
short *out_a11, short *out_a12, // Affine matrix (1/256 fixed-point)
short *out_a21, short *out_a22
) {
// Compute cell bounding box
int x_start = cell_x * cell_w;
int y_start = cell_y * cell_h;
int x_end = (cell_x + 1) * cell_w;
int y_end = (cell_y + 1) * cell_h;
if (x_end > width) x_end = width;
if (y_end > height) y_end = height;
// Sample motion vectors from a 4×4 grid within the cell
const int samples_x = 4;
const int samples_y = 4;
float sample_motion_x[16];
float sample_motion_y[16];
int sample_px[16];
int sample_py[16];
int n_samples = 0;
for (int sy = 0; sy < samples_y; sy++) {
for (int sx = 0; sx < samples_x; sx++) {
int px = x_start + (x_end - x_start) * sx / (samples_x - 1);
int py = y_start + (y_end - y_start) * sy / (samples_y - 1);
if (px >= width) px = width - 1;
if (py >= height) py = height - 1;
int idx = py * width + px;
sample_motion_x[n_samples] = flow_x[idx];
sample_motion_y[n_samples] = flow_y[idx];
sample_px[n_samples] = px - (x_start + x_end) / 2; // Relative to cell center
sample_py[n_samples] = py - (y_start + y_end) / 2;
n_samples++;
}
}
// 1. Compute translation-only model (average motion)
float avg_dx = 0, avg_dy = 0;
for (int i = 0; i < n_samples; i++) {
avg_dx += sample_motion_x[i];
avg_dy += sample_motion_y[i];
}
avg_dx /= n_samples;
avg_dy /= n_samples;
// Translation residual
float trans_residual = 0;
for (int i = 0; i < n_samples; i++) {
float dx_err = sample_motion_x[i] - avg_dx;
float dy_err = sample_motion_y[i] - avg_dy;
trans_residual += dx_err * dx_err + dy_err * dy_err;
}
// 2. Estimate affine model using least-squares
// Solve: [vx] = [a11 a12][px] + [tx]
// [vy] [a21 a22][py] [ty]
// Using normal equations for 2×2 affine
double sum_x = 0, sum_y = 0, sum_xx = 0, sum_yy = 0, sum_xy = 0;
double sum_vx = 0, sum_vy = 0, sum_vx_x = 0, sum_vx_y = 0;
double sum_vy_x = 0, sum_vy_y = 0;
for (int i = 0; i < n_samples; i++) {
double px = sample_px[i];
double py = sample_py[i];
double vx = sample_motion_x[i];
double vy = sample_motion_y[i];
sum_x += px;
sum_y += py;
sum_xx += px * px;
sum_yy += py * py;
sum_xy += px * py;
sum_vx += vx;
sum_vy += vy;
sum_vx_x += vx * px;
sum_vx_y += vx * py;
sum_vy_x += vy * px;
sum_vy_y += vy * py;
}
// Solve 2×2 system for [a11, a12, tx] and [a21, a22, ty]
double n = n_samples;
double det = n * sum_xx * sum_yy + 2 * sum_x * sum_y * sum_xy -
sum_xx * sum_y * sum_y - sum_yy * sum_x * sum_x - n * sum_xy * sum_xy;
if (fabs(det) < 1e-6) {
// Singular matrix, fall back to translation
*out_tx = (short)(avg_dx * 8.0f);
*out_ty = (short)(avg_dy * 8.0f);
*out_a11 = 256; // Identity
*out_a12 = 0;
*out_a21 = 0;
*out_a22 = 256;
return 0; // Translation only
}
// Solve for affine parameters (simplified for readability)
double a11 = (sum_vx_x * sum_yy * n - sum_vx_y * sum_xy * n - sum_vx * sum_y * sum_y +
sum_vx * sum_xy * sum_y + sum_vx_y * sum_x * sum_y - sum_vx_x * sum_y * sum_y) / det;
double a12 = (sum_vx_y * sum_xx * n - sum_vx_x * sum_xy * n - sum_vx * sum_x * sum_xy +
sum_vx * sum_xx * sum_y + sum_vx_x * sum_x * sum_y - sum_vx_y * sum_x * sum_x) / det;
double tx = (sum_vx - a11 * sum_x - a12 * sum_y) / n;
double a21 = (sum_vy_x * sum_yy * n - sum_vy_y * sum_xy * n - sum_vy * sum_y * sum_y +
sum_vy * sum_xy * sum_y + sum_vy_y * sum_x * sum_y - sum_vy_x * sum_y * sum_y) / det;
double a22 = (sum_vy_y * sum_xx * n - sum_vy_x * sum_xy * n - sum_vy * sum_x * sum_xy +
sum_vy * sum_xx * sum_y + sum_vy_x * sum_x * sum_y - sum_vy_y * sum_x * sum_x) / det;
double ty = (sum_vy - a21 * sum_x - a22 * sum_y) / n;
// Affine residual
float affine_residual = 0;
for (int i = 0; i < n_samples; i++) {
double px = sample_px[i];
double py = sample_py[i];
double pred_vx = a11 * px + a12 * py + tx;
double pred_vy = a21 * px + a22 * py + ty;
double dx_err = sample_motion_x[i] - pred_vx;
double dy_err = sample_motion_y[i] - pred_vy;
affine_residual += dx_err * dx_err + dy_err * dy_err;
}
// Decision: Use affine if residual improves by > threshold
float improvement = (trans_residual - affine_residual) / (trans_residual + 1e-6f);
if (improvement > threshold) {
// Use affine
*out_tx = (short)(tx * 8.0f);
*out_ty = (short)(ty * 8.0f);
*out_a11 = (short)(a11 * 256.0);
*out_a12 = (short)(a12 * 256.0);
*out_a21 = (short)(a21 * 256.0);
*out_a22 = (short)(a22 * 256.0);
return 1; // Affine
} else {
// Use translation
*out_tx = (short)(avg_dx * 8.0f);
*out_ty = (short)(avg_dy * 8.0f);
*out_a11 = 256; // Identity
*out_a12 = 0;
*out_a21 = 0;
*out_a22 = 256;
return 0; // Translation only
}
}
} // extern "C"

Binary file not shown.

View File

@@ -0,0 +1,65 @@
// Simple coefficient preprocessing for better compression
// Insert right before Zstd compression
#ifndef COEFFICIENT_COMPRESS_H
#define COEFFICIENT_COMPRESS_H
#include <stdint.h>
#include <string.h>
// Preprocess coefficients using significance map
// Returns new buffer size, modifies buffer in-place if possible
static size_t preprocess_coefficients(int16_t *coeffs, int coeff_count, uint8_t *output_buffer) {
// Count non-zero coefficients
int nonzero_count = 0;
for (int i = 0; i < coeff_count; i++) {
if (coeffs[i] != 0) nonzero_count++;
}
// Create significance map (1 bit per coefficient, packed into bytes)
int map_bytes = (coeff_count + 7) / 8; // Round up to nearest byte
uint8_t *sig_map = output_buffer;
int16_t *values = (int16_t *)(output_buffer + map_bytes);
// Clear significance map
memset(sig_map, 0, map_bytes);
// Fill significance map and extract non-zero values
int value_idx = 0;
for (int i = 0; i < coeff_count; i++) {
if (coeffs[i] != 0) {
// Set bit in significance map
int byte_idx = i / 8;
int bit_idx = i % 8;
sig_map[byte_idx] |= (1 << bit_idx);
// Store the value
values[value_idx++] = coeffs[i];
}
}
return map_bytes + (nonzero_count * sizeof(int16_t));
}
// Decoder: reconstruct coefficients from significance map
static void postprocess_coefficients(uint8_t *compressed_data, int coeff_count, int16_t *output_coeffs) {
int map_bytes = (coeff_count + 7) / 8;
uint8_t *sig_map = compressed_data;
int16_t *values = (int16_t *)(compressed_data + map_bytes);
// Clear output
memset(output_coeffs, 0, coeff_count * sizeof(int16_t));
// Reconstruct coefficients
int value_idx = 0;
for (int i = 0; i < coeff_count; i++) {
int byte_idx = i / 8;
int bit_idx = i % 8;
if (sig_map[byte_idx] & (1 << bit_idx)) {
output_coeffs[i] = values[value_idx++];
}
}
}
#endif // COEFFICIENT_COMPRESS_H

View File

@@ -0,0 +1,39 @@
#ifndef TAD32_DECODER_H
#define TAD32_DECODER_H
#include <stdint.h>
#include <stddef.h>
// TAD32 (Terrarum Advanced Audio - PCM32f version) Decoder
// DWT-based perceptual audio codec for TSVM
// Shared decoder library used by both decoder_tad (standalone) and decoder_tav (video decoder)
// Constants (must match encoder)
#define TAD32_SAMPLE_RATE 32000
#define TAD32_CHANNELS 2 // Stereo
#define TAD_DEFAULT_CHUNK_SIZE 32768 // Default chunk size for standalone TAD files
/**
* Decode audio chunk with TAD32 codec
*
* @param input Input TAD32 chunk data
* @param input_size Size of input buffer
* @param pcmu8_stereo Output PCMu8 stereo samples (interleaved L,R)
* @param bytes_consumed [out] Number of bytes consumed from input
* @param samples_decoded [out] Number of samples decoded per channel
* @return 0 on success, -1 on error
*
* Input format:
* uint16 sample_count (samples per channel)
* uint8 max_index (maximum quantisation index)
* uint32 payload_size (bytes in payload)
* * payload (encoded M/S data, Zstd-compressed with EZBC)
*
* Output format:
* PCMu8 stereo interleaved (8-bit unsigned PCM, L,R pairs)
* Range: [0, 255] where 128 = silence
*/
int tad32_decode_chunk(const uint8_t *input, size_t input_size, uint8_t *pcmu8_stereo,
size_t *bytes_consumed, size_t *samples_decoded);
#endif // TAD32_DECODER_H

View File

@@ -0,0 +1,63 @@
#ifndef TAD32_ENCODER_H
#define TAD32_ENCODER_H
#include <stdint.h>
#include <stddef.h>
// TAD32 (Terrarum Advanced Audio - PCM32f version) Encoder
// DWT-based perceptual audio codec for TSVM
// Alternative version: PCM32f throughout encoding, PCM8 conversion only at decoder
// Constants
#define TAD32_COEFF_SCALARS {64.0f, 45.255f, 32.0f, 22.627f, 16.0f, 11.314f, 8.0f, 5.657f, 4.0f, 2.828f} // value only valid for CDF 9/7 with decomposition level 9. Index 0 = LL band
#define TAD32_MIN_CHUNK_SIZE 1024 // Minimum: 1024 samples
#define TAD32_SAMPLE_RATE 32000
#define TAD32_CHANNELS 2 // Stereo
#define TAD32_QUALITY_MIN 0
#define TAD32_QUALITY_MAX 6
#define TAD32_QUALITY_DEFAULT 3
#define TAD32_ZSTD_LEVEL 15
static inline int tad32_quality_to_max_index(int quality) {
static const int quality_map[6] = {21, 31, 44, 63, 89, 127};
if (quality < 0) quality = 0;
if (quality > 5) quality = 5;
return quality_map[quality];
}
/**
* Encode audio chunk with TAD32 codec (PCM32f version)
*
* @param pcm32_stereo Input PCM32fLE stereo samples (interleaved L,R)
* @param num_samples Number of samples per channel (min 1024)
* @param max_index Maximum quantisation index (7=3bit, 15=4bit, 31=5bit, 63=6bit, 127=7bit)
* @param quantiser_scale Quantiser scaling factor (1.0=baseline, 2.0=2x coarser quantisation)
* Higher values = more aggressive quantisation = smaller files
* @param zstd_level Zstd compression level (1-22). Use negative value to disable compression.
* When disabled, MSB of payload_size is set to indicate uncompressed data.
* @param output Output buffer (must be large enough)
* @return Number of bytes written to output, or 0 on error
*
* Output format:
* uint16 sample_count (samples per channel)
* uint8 max_index (maximum quantisation index)
* uint32 payload_size (bytes in payload; MSB=1 indicates uncompressed)
* * payload (encoded M/S data, optionally Zstd-compressed)
*/
size_t tad32_encode_chunk(const float *pcm32_stereo, size_t num_samples,
int max_index,
float quantiser_scale, int zstd_level, uint8_t *output);
/**
* Print accumulated coefficient statistics
* Only effective if TAD_COEFF_STATS environment variable is set
*/
void tad32_print_statistics(void);
/**
* Free accumulated statistics memory
* Should be called after tad32_print_statistics()
*/
void tad32_free_statistics(void);
#endif // TAD32_ENCODER_H

View File

@@ -0,0 +1,74 @@
// TEV Entropy Coder - Specialised for DCT coefficients
// Replaces gzip with video-optimized compression
#ifndef ENTROPY_CODER_H
#define ENTROPY_CODER_H
#include <stdint.h>
#include <stdio.h>
// Bit writer for variable-length codes
typedef struct {
uint8_t *buffer;
size_t buffer_size;
size_t byte_pos;
int bit_pos; // 0-7, next bit to write
} bit_writer_t;
// Bit reader for decoding
typedef struct {
const uint8_t *buffer;
size_t buffer_size;
size_t byte_pos;
int bit_pos; // 0-7, next bit to read
} bit_reader_t;
// Huffman table entry
typedef struct {
uint16_t code; // Huffman code
uint8_t bits; // Code length in bits
} huffman_entry_t;
// Video entropy coder optimized for TEV coefficients
typedef struct {
// Huffman tables for different coefficient types
huffman_entry_t y_dc_table[512]; // Y DC coefficients (-255 to +255)
huffman_entry_t y_ac_table[512]; // Y AC coefficients
huffman_entry_t c_dc_table[512]; // Chroma DC coefficients
huffman_entry_t c_ac_table[512]; // Chroma AC coefficients
huffman_entry_t run_table[256]; // Zero run lengths (0-255)
// Motion vector Huffman tables
huffman_entry_t mv_table[65]; // Motion vectors (-32 to +32)
// Bit writer/reader
bit_writer_t writer;
bit_reader_t reader;
} entropy_coder_t;
static const huffman_entry_t BLOCK_MODE_HUFFMAN[16];
void write_bits(bit_writer_t *writer, uint32_t value, int bits);
uint32_t read_bits(bit_reader_t *reader, int bits);
// Initialise entropy coder
entropy_coder_t* entropy_coder_create(uint8_t *buffer, size_t buffer_size);
void entropy_coder_destroy(entropy_coder_t *coder);
// Encoding functions
int encode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
int encode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
int encode_motion_vector(entropy_coder_t *coder, int16_t mv_x, int16_t mv_y);
int encode_block_mode(entropy_coder_t *coder, uint8_t mode);
// Decoding functions
void entropy_coder_init_reader(entropy_coder_t *coder, const uint8_t *buffer, size_t buffer_size);
int decode_y_block(entropy_coder_t *coder, int16_t *y_coeffs);
int decode_chroma_block(entropy_coder_t *coder, int16_t *chroma_coeffs, int is_cg);
int decode_motion_vector(entropy_coder_t *coder, int16_t *mv_x, int16_t *mv_y);
int decode_block_mode(entropy_coder_t *coder, uint8_t *mode);
// Get compressed size
size_t entropy_coder_get_size(entropy_coder_t *coder);
void entropy_coder_reset(entropy_coder_t *coder);
#endif // ENTROPY_CODER_H

View File

@@ -0,0 +1,837 @@
/*
* TAV AVX-512 Optimisations
*
* This file contains AVX-512 optimised versions of performance-critical functions
* in the TAV encoder. Runtime CPU detection ensures fallback to scalar versions
* on non-AVX-512 systems.
*
* Optimised functions:
* - 1D DWT transforms (5/3, 9/7, Haar, Bior13/7, DD4)
* - Quantisation functions
* - RGB to YCoCg colour conversion
* - 2D DWT gather/scatter operations
*
* Compile with: -mavx512f -mavx512dq -mavx512bw -mavx512vl
*/
#ifndef TAV_AVX512_H
#define TAV_AVX512_H
#include <immintrin.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <stdio.h>
// =============================================================================
// SIMD Capability Detection
// =============================================================================
typedef enum {
SIMD_NONE = 0,
SIMD_AVX512F = 1
} simd_level_t;
// Global SIMD level (set by tav_simd_init)
static simd_level_t g_simd_level = SIMD_NONE;
// CPU feature detection
static inline int cpu_has_avx512f(void) {
#ifdef __AVX512F__
return __builtin_cpu_supports("avx512f") &&
__builtin_cpu_supports("avx512dq");
#else
return 0;
#endif
}
// Initialize SIMD detection (call once at startup)
static inline void tav_simd_init(void) {
#ifdef __AVX512F__
if (cpu_has_avx512f()) {
g_simd_level = SIMD_AVX512F;
fprintf(stderr, "[TAV] AVX-512 optimisations enabled\n");
} else {
g_simd_level = SIMD_NONE;
fprintf(stderr, "[TAV] AVX-512 not available, using scalar fallback\n");
}
#else
g_simd_level = SIMD_NONE;
fprintf(stderr, "[TAV] Compiled without AVX-512 support\n");
#endif
}
#ifdef __AVX512F__
// =============================================================================
// Helper Functions
// =============================================================================
// Horizontal sum of 16 floats
static inline float _mm512_reduce_add_ps_compat(__m512 v) {
__m256 low = _mm512_castps512_ps256(v);
__m256 high = _mm512_extractf32x8_ps(v, 1);
__m256 sum256 = _mm256_add_ps(low, high);
__m128 sum128 = _mm_add_ps(_mm256_castps256_ps128(sum256), _mm256_extractf128_ps(sum256, 1));
sum128 = _mm_hadd_ps(sum128, sum128);
sum128 = _mm_hadd_ps(sum128, sum128);
return _mm_cvtss_f32(sum128);
}
// Clamp helper for vectorised operations
static inline __m512 _mm512_clamp_ps(__m512 v, __m512 min_val, __m512 max_val) {
return _mm512_min_ps(_mm512_max_ps(v, min_val), max_val);
}
// =============================================================================
// AVX-512 Optimised 1D DWT Forward Transforms
// =============================================================================
// 5/3 Reversible Forward DWT with AVX-512
static inline void dwt_53_forward_1d_avx512(float *data, int length) {
if (length < 2) return;
float *temp = (float*)calloc(length, sizeof(float));
int half = (length + 1) / 2;
// Predict step (high-pass) - vectorised
// temp[half + i] = data[2*i+1] - 0.5 * (data[2*i] + data[2*i+2])
int i;
for (i = 0; i + 16 <= half; i += 16) {
__mmask16 valid_mask = 0xFFFF;
// Check boundary for last iteration
for (int j = 0; j < 16; j++) {
int idx = 2 * (i + j) + 1;
if (idx >= length) {
valid_mask &= ~(1 << j);
}
}
if (valid_mask == 0) break;
// Load data[2*i] - stride 2 load
float even_curr_vals[16], even_next_vals[16], odd_vals[16];
for (int j = 0; j < 16; j++) {
if (valid_mask & (1 << j)) {
even_curr_vals[j] = data[2 * (i + j)];
even_next_vals[j] = (2 * (i + j) + 2 < length) ? data[2 * (i + j) + 2] : data[2 * (i + j)];
odd_vals[j] = data[2 * (i + j) + 1];
} else {
even_curr_vals[j] = 0.0f;
even_next_vals[j] = 0.0f;
odd_vals[j] = 0.0f;
}
}
__m512 even_curr = _mm512_loadu_ps(even_curr_vals);
__m512 even_next = _mm512_loadu_ps(even_next_vals);
__m512 odd = _mm512_loadu_ps(odd_vals);
__m512 pred = _mm512_mul_ps(_mm512_add_ps(even_curr, even_next), _mm512_set1_ps(0.5f));
__m512 high = _mm512_sub_ps(odd, pred);
_mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
}
// Handle remaining elements
for (; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
temp[half + i] = data[idx] - pred;
}
}
// Update step (low-pass) - vectorised
// temp[i] = data[2*i] + 0.25 * (temp[half+i-1] + temp[half+i])
for (i = 0; i + 16 <= half; i += 16) {
__m512 even = _mm512_loadu_ps(&data[2 * i]); // Load with stride 2 (simplified)
// Manual gather for strided load
float even_vals[16];
for (int j = 0; j < 16 && (i + j) < half; j++) {
even_vals[j] = data[2 * (i + j)];
}
even = _mm512_loadu_ps(even_vals);
// Load high-pass neighbours
float high_prev[16], high_curr[16];
for (int j = 0; j < 16 && (i + j) < half; j++) {
high_prev[j] = ((i + j) > 0) ? temp[half + (i + j) - 1] : 0.0f;
high_curr[j] = ((i + j) < half - 1) ? temp[half + (i + j)] : 0.0f;
}
__m512 hp = _mm512_loadu_ps(high_prev);
__m512 hc = _mm512_loadu_ps(high_curr);
__m512 update = _mm512_mul_ps(_mm512_add_ps(hp, hc), _mm512_set1_ps(0.25f));
__m512 low = _mm512_add_ps(even, update);
__mmask16 store_mask = (i + 16 <= half) ? 0xFFFF : (1 << (half - i)) - 1;
_mm512_mask_storeu_ps(&temp[i], store_mask, low);
}
// Handle remaining elements
for (; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] = data[2 * i] + update;
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// 9/7 Irreversible Forward DWT with AVX-512
static inline void dwt_97_forward_1d_avx512(float *data, int length) {
if (length < 2) return;
int half = (length + 1) / 2;
// Allocate aligned temp buffer once (64-byte align for cache lines)
float *temp = NULL;
#if defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE)
if (posix_memalign((void**)&temp, 64, (size_t)length * sizeof(float)) != 0) {
temp = (float*)malloc((size_t)length * sizeof(float));
}
#else
temp = (float*)aligned_alloc(64, ((size_t)length * sizeof(float) + 63) & ~63);
if (!temp) temp = (float*)malloc((size_t)length * sizeof(float));
#endif
if (!temp) return; // allocation failure: bail out (preserve original behavior could be different)
// FAST SPLIT: interleave into temp: first half = evens, second half = odds
// This is simple, streaming-friendly, and much faster than per-iteration small-array gathers.
{
float *even = temp;
float *odd = temp + half;
int i = 0;
// process pairs to minimize branches and memory ops
for (; i + 1 < length; i += 2) {
even[0] = data[i];
odd[0] = data[i + 1];
++even; ++odd;
}
if (i < length) { // odd leftover
even[0] = data[i];
}
}
// Lifting coefficients as vectors
const __m512 alpha_vec = _mm512_set1_ps(-1.586134342f);
const __m512 beta_vec = _mm512_set1_ps(-0.052980118f);
const __m512 gamma_vec = _mm512_set1_ps(0.882911076f);
const __m512 delta_vec = _mm512_set1_ps(0.443506852f);
const __m512 K_vec = _mm512_set1_ps(1.230174105f);
const __m512 invK_vec = _mm512_set1_ps(1.0f / 1.230174105f);
// Helper variables
int i;
// -----------------------
// Step 1: Predict α
// d[i] += alpha * (s[i] + s[i+1])
// -----------------------
if (half > 0) {
// handle small or trivial cases
if (half == 1) {
if (half < length) {
temp[half + 0] += -1.586134342f * (temp[0] + temp[0]);
}
} else {
// main vectorised body: ensure s_next loads (i+1) valid -> i <= half-2
int limit = (half - 1);
int n_full = (limit / 16) * 16; // process up to n_full (multiple of 16)
i = 0;
for (; i + 32 <= n_full; i += 32) {
// unroll 2x (i and i+16)
__m512 s0 = _mm512_loadu_ps(&temp[i]);
__m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
__m512 d0 = _mm512_loadu_ps(&temp[half + i]);
__m512 sum0 = _mm512_add_ps(s0, s0n);
d0 = _mm512_fmadd_ps(alpha_vec, sum0, d0);
_mm512_storeu_ps(&temp[half + i], d0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
__m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
__m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
__m512 sum1 = _mm512_add_ps(s1, s1n);
d1 = _mm512_fmadd_ps(alpha_vec, sum1, d1);
_mm512_storeu_ps(&temp[half + i + 16], d1);
}
for (; i + 16 <= n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
__m512 sn = _mm512_loadu_ps(&temp[i + 1]);
__m512 d = _mm512_loadu_ps(&temp[half + i]);
__m512 sum = _mm512_add_ps(s, sn);
d = _mm512_fmadd_ps(alpha_vec, sum, d);
_mm512_storeu_ps(&temp[half + i], d);
}
// scalar remainder up to limit (half-2 -> last vector handled below)
for (; i < limit; ++i) {
temp[half + i] += -1.586134342f * (temp[i] + temp[i + 1]);
}
// handle last index i = half-1 (mirror)
int last = half - 1;
if (half + last < length) {
float s_curr = temp[last];
float s_next = s_curr;
temp[half + last] += -1.586134342f * (s_curr + s_next);
}
}
}
// -----------------------
// Step 2: Update β
// s[i] += beta * (d[i-1] + d[i])
// -----------------------
if (half > 0) {
// handle i == 0 separately (d_prev = d_curr for boundary semantics)
if (half >= 1) {
// i == 0
if (half + 0 < length) {
float d_curr0 = temp[half + 0];
temp[0] += -0.052980118f * (d_curr0 + d_curr0);
}
}
if (half > 1) {
// main vector loop starting from i = 1 to half-1 (we will write s[i] for i>=1)
int start = 1;
int limit = half; // exclusive
int n_elems = limit - start;
int n_full = (n_elems / 16) * 16;
i = start;
for (; i + 32 <= start + n_full; i += 32) {
// unroll 2x
__m512 s0 = _mm512_loadu_ps(&temp[i]);
__m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
__m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
__m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
s0 = _mm512_fmadd_ps(beta_vec, sum0, s0);
_mm512_storeu_ps(&temp[i], s0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
__m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
__m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
__m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
s1 = _mm512_fmadd_ps(beta_vec, sum1, s1);
_mm512_storeu_ps(&temp[i + 16], s1);
}
for (; i + 16 <= start + n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
__m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
__m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
__m512 sum = _mm512_add_ps(dprev, dcurr);
s = _mm512_fmadd_ps(beta_vec, sum, s);
_mm512_storeu_ps(&temp[i], s);
}
// scalar remainder
for (; i < limit; ++i) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
temp[i] += -0.052980118f * (d_prev + d_curr);
}
}
}
// -----------------------
// Step 3: Predict γ
// d[i] += gamma * (s[i] + s[i+1])
// -----------------------
if (half > 0) {
if (half == 1) {
if (half < length) {
temp[half + 0] += 0.882911076f * (temp[0] + temp[0]);
}
} else {
int limit = (half - 1);
int n_full = (limit / 16) * 16;
i = 0;
for (; i + 32 <= n_full; i += 32) {
__m512 s0 = _mm512_loadu_ps(&temp[i]);
__m512 s0n = _mm512_loadu_ps(&temp[i + 1]);
__m512 d0 = _mm512_loadu_ps(&temp[half + i]);
__m512 sum0 = _mm512_add_ps(s0, s0n);
d0 = _mm512_fmadd_ps(gamma_vec, sum0, d0);
_mm512_storeu_ps(&temp[half + i], d0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
__m512 s1n = _mm512_loadu_ps(&temp[i + 17]);
__m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
__m512 sum1 = _mm512_add_ps(s1, s1n);
d1 = _mm512_fmadd_ps(gamma_vec, sum1, d1);
_mm512_storeu_ps(&temp[half + i + 16], d1);
}
for (; i + 16 <= n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
__m512 sn = _mm512_loadu_ps(&temp[i + 1]);
__m512 d = _mm512_loadu_ps(&temp[half + i]);
__m512 sum = _mm512_add_ps(s, sn);
d = _mm512_fmadd_ps(gamma_vec, sum, d);
_mm512_storeu_ps(&temp[half + i], d);
}
for (; i < limit; ++i) {
temp[half + i] += 0.882911076f * (temp[i] + temp[i + 1]);
}
// last index mirror
int last = half - 1;
if (half + last < length) {
float s_curr = temp[last];
float s_next = s_curr;
temp[half + last] += 0.882911076f * (s_curr + s_next);
}
}
}
// -----------------------
// Step 4: Update δ
// s[i] += delta * (d[i-1] + d[i])
// -----------------------
if (half > 0) {
// i == 0
if (half >= 1) {
if (half + 0 < length) {
float d_curr0 = temp[half + 0];
temp[0] += 0.443506852f * (d_curr0 + d_curr0);
}
}
if (half > 1) {
int start = 1;
int limit = half; // exclusive
int n_elems = limit - start;
int n_full = (n_elems / 16) * 16;
i = start;
for (; i + 32 <= start + n_full; i += 32) {
__m512 s0 = _mm512_loadu_ps(&temp[i]);
__m512 dcurr0 = _mm512_loadu_ps(&temp[half + i]);
__m512 dprev0 = _mm512_loadu_ps(&temp[half + i - 1]);
__m512 sum0 = _mm512_add_ps(dprev0, dcurr0);
s0 = _mm512_fmadd_ps(delta_vec, sum0, s0);
_mm512_storeu_ps(&temp[i], s0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
__m512 dcurr1 = _mm512_loadu_ps(&temp[half + i + 16]);
__m512 dprev1 = _mm512_loadu_ps(&temp[half + i + 15]);
__m512 sum1 = _mm512_add_ps(dprev1, dcurr1);
s1 = _mm512_fmadd_ps(delta_vec, sum1, s1);
_mm512_storeu_ps(&temp[i + 16], s1);
}
for (; i + 16 <= start + n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
__m512 dcurr = _mm512_loadu_ps(&temp[half + i]);
__m512 dprev = _mm512_loadu_ps(&temp[half + i - 1]);
__m512 sum = _mm512_add_ps(dprev, dcurr);
s = _mm512_fmadd_ps(delta_vec, sum, s);
_mm512_storeu_ps(&temp[i], s);
}
for (; i < limit; ++i) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (half + i - 1 < length && i > 0) ? temp[half + i - 1] : d_curr;
temp[i] += 0.443506852f * (d_prev + d_curr);
}
}
}
// -----------------------
// Step 5: Scaling
// s *= K, d *= invK
// -----------------------
// s (first half)
{
int n_full = (half / 16) * 16;
i = 0;
for (; i + 32 <= n_full; i += 32) {
__m512 s0 = _mm512_loadu_ps(&temp[i]);
s0 = _mm512_mul_ps(s0, K_vec);
_mm512_storeu_ps(&temp[i], s0);
__m512 s1 = _mm512_loadu_ps(&temp[i + 16]);
s1 = _mm512_mul_ps(s1, K_vec);
_mm512_storeu_ps(&temp[i + 16], s1);
}
for (; i + 16 <= n_full; i += 16) {
__m512 s = _mm512_loadu_ps(&temp[i]);
s = _mm512_mul_ps(s, K_vec);
_mm512_storeu_ps(&temp[i], s);
}
for (; i < half; ++i) temp[i] *= 1.230174105f;
}
// d (second half)
{
int dlen = length - half;
int n_full = (dlen / 16) * 16;
i = 0;
for (; i + 32 <= n_full; i += 32) {
__m512 d0 = _mm512_loadu_ps(&temp[half + i]);
d0 = _mm512_mul_ps(d0, invK_vec);
_mm512_storeu_ps(&temp[half + i], d0);
__m512 d1 = _mm512_loadu_ps(&temp[half + i + 16]);
d1 = _mm512_mul_ps(d1, invK_vec);
_mm512_storeu_ps(&temp[half + i + 16], d1);
}
for (; i + 16 <= n_full; i += 16) {
__m512 d = _mm512_loadu_ps(&temp[half + i]);
d = _mm512_mul_ps(d, invK_vec);
_mm512_storeu_ps(&temp[half + i], d);
}
for (; i < dlen; ++i) {
if (half + i < length) temp[half + i] /= 1.230174105f;
}
}
// Copy back and free
memcpy(data, temp, (size_t)length * sizeof(float));
free(temp);
}
// Haar Forward DWT with AVX-512
static inline void dwt_haar_forward_1d_avx512(float *data, int length) {
if (length < 2) return;
float *temp = (float*)malloc(length * sizeof(float));
int half = (length + 1) / 2;
const __m512 half_vec = _mm512_set1_ps(0.5f);
// Process 16 pairs at a time
int i;
for (i = 0; i + 16 <= half; i += 16) {
__mmask16 valid_mask = 0xFFFF;
float even_vals[16], odd_vals[16];
for (int j = 0; j < 16; j++) {
even_vals[j] = data[2 * (i + j)];
if (2 * (i + j) + 1 < length) {
odd_vals[j] = data[2 * (i + j) + 1];
} else {
odd_vals[j] = even_vals[j];
valid_mask &= ~(1 << j);
}
}
__m512 even = _mm512_loadu_ps(even_vals);
__m512 odd = _mm512_loadu_ps(odd_vals);
// Low-pass: (even + odd) / 2
__m512 low = _mm512_mul_ps(_mm512_add_ps(even, odd), half_vec);
// High-pass: (even - odd) / 2
__m512 high = _mm512_mul_ps(_mm512_sub_ps(even, odd), half_vec);
_mm512_storeu_ps(&temp[i], low);
_mm512_mask_storeu_ps(&temp[half + i], valid_mask, high);
}
// Remaining scalar
for (; i < half; i++) {
if (2 * i + 1 < length) {
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
} else {
temp[i] = data[2 * i];
if (half + i < length) {
temp[half + i] = 0.0f;
}
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// =============================================================================
// AVX-512 Optimised Quantisation Functions
// =============================================================================
static inline void quantise_dwt_coefficients_avx512(
float *coeffs, int16_t *quantised, int size,
float effective_q, float dead_zone_threshold,
int width, int height, int decomp_levels, int is_chroma,
int (*get_subband_level)(int, int, int, int),
int (*get_subband_type)(int, int, int, int)
) {
const __m512 q_vec = _mm512_set1_ps(effective_q);
const __m512 inv_q_vec = _mm512_set1_ps(1.0f / effective_q);
const __m512 half_vec = _mm512_set1_ps(0.5f);
const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
const __m512 zero_vec = _mm512_setzero_ps();
const __m512i min_i32 = _mm512_set1_epi32(-32768);
const __m512i max_i32 = _mm512_set1_epi32(32767);
int i;
for (i = 0; i + 16 <= size; i += 16) {
__m512 coeff = _mm512_loadu_ps(&coeffs[i]);
__m512 quant = _mm512_mul_ps(coeff, inv_q_vec);
// Dead-zone handling (simplified - full version needs per-coeff logic)
if (dead_zone_threshold > 0.0f && !is_chroma) {
__m512 threshold_vec = _mm512_set1_ps(dead_zone_threshold);
__m512 abs_quant = _mm512_abs_ps(quant);
__mmask16 dead_mask = _mm512_cmp_ps_mask(abs_quant, threshold_vec, _CMP_LE_OQ);
quant = _mm512_mask_blend_ps(dead_mask, quant, zero_vec);
}
// Manual rounding to match scalar behaviour (round away from zero)
// First add 0.5 or -0.5 based on sign
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
quant = _mm512_add_ps(quant, round_val);
// Now truncate to int32 (this matches scalar (int32_t) cast after adding 0.5)
__m512i quant_i32 = _mm512_cvttps_epi32(quant); // cvtt = truncate (round toward zero)
quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
// Pack to int16 (AVX-512 has cvtsepi32_epi16)
__m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
_mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
}
// Remaining scalar
for (; i < size; i++) {
float quantised_val = coeffs[i] / effective_q;
// Dead-zone (simplified)
if (dead_zone_threshold > 0.0f && !is_chroma) {
if (fabsf(quantised_val) <= dead_zone_threshold) {
quantised_val = 0.0f;
}
}
int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
}
}
// Perceptual quantisation with per-coefficient weighting
static inline void quantise_dwt_coefficients_perceptual_avx512(
float *coeffs, int16_t *quantised, int size,
float *weights, // Pre-computed per-coefficient weights
float base_quantiser
) {
const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
const __m512 half_vec = _mm512_set1_ps(0.5f);
const __m512 nhalf_vec = _mm512_set1_ps(-0.5f);
const __m512 zero_vec = _mm512_setzero_ps();
const __m512i min_i32 = _mm512_set1_epi32(-32768);
const __m512i max_i32 = _mm512_set1_epi32(32767);
int i;
for (i = 0; i + 16 <= size; i += 16) {
__m512 coeff = _mm512_loadu_ps(&coeffs[i]);
__m512 weight = _mm512_loadu_ps(&weights[i]);
// effective_q = base_q * weight
__m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
__m512 quant = _mm512_div_ps(coeff, effective_q);
// Manual rounding to match scalar behaviour
__mmask16 pos_mask = _mm512_cmp_ps_mask(quant, zero_vec, _CMP_GE_OQ);
__m512 round_val = _mm512_mask_blend_ps(pos_mask, nhalf_vec, half_vec);
quant = _mm512_add_ps(quant, round_val);
// Truncate to int32 (matches scalar cast after rounding)
__m512i quant_i32 = _mm512_cvttps_epi32(quant);
quant_i32 = _mm512_max_epi32(quant_i32, min_i32);
quant_i32 = _mm512_min_epi32(quant_i32, max_i32);
__m256i quant_i16 = _mm512_cvtsepi32_epi16(quant_i32);
_mm256_storeu_si256((__m256i*)&quantised[i], quant_i16);
}
// Remaining scalar
for (; i < size; i++) {
float effective_q = base_quantiser * weights[i];
float quantised_val = coeffs[i] / effective_q;
int32_t val = (int32_t)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f));
quantised[i] = (int16_t)((val < -32768) ? -32768 : (val > 32767 ? 32767 : val));
}
}
// =============================================================================
// AVX-512 Optimised Dequantisation Functions
// =============================================================================
// Basic dequantisation: quantised[i] * effective_q
static inline void dequantise_dwt_coefficients_avx512(
const int16_t *quantised, float *coeffs, int size,
float effective_q
) {
const __m512 q_vec = _mm512_set1_ps(effective_q);
int i;
for (i = 0; i + 16 <= size; i += 16) {
// Load 16 int16 values
__m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
// Convert int16 to int32
__m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
// Convert int32 to float
__m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
// Multiply by quantiser
__m512 dequant = _mm512_mul_ps(quant_f32, q_vec);
_mm512_storeu_ps(&coeffs[i], dequant);
}
// Remaining scalar
for (; i < size; i++) {
coeffs[i] = (float)quantised[i] * effective_q;
}
}
// Perceptual dequantisation with per-coefficient weights
static inline void dequantise_dwt_coefficients_perceptual_avx512(
const int16_t *quantised, float *coeffs, int size,
const float *weights, float base_quantiser
) {
const __m512 base_q_vec = _mm512_set1_ps(base_quantiser);
int i;
for (i = 0; i + 16 <= size; i += 16) {
// Load 16 int16 values
__m256i quant_i16 = _mm256_loadu_si256((__m256i*)&quantised[i]);
// Convert int16 → int32 → float
__m512i quant_i32 = _mm512_cvtepi16_epi32(quant_i16);
__m512 quant_f32 = _mm512_cvtepi32_ps(quant_i32);
// Load weights
__m512 weight = _mm512_loadu_ps(&weights[i]);
// effective_q = base_q * weight
__m512 effective_q = _mm512_mul_ps(base_q_vec, weight);
// dequant = quantised * effective_q
__m512 dequant = _mm512_mul_ps(quant_f32, effective_q);
_mm512_storeu_ps(&coeffs[i], dequant);
}
// Remaining scalar
for (; i < size; i++) {
float effective_q = base_quantiser * weights[i];
coeffs[i] = (float)quantised[i] * effective_q;
}
}
// =============================================================================
// AVX-512 Optimised RGB to YCoCg Conversion
// =============================================================================
static inline void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height) {
const int total_pixels = width * height;
const __m512 half_vec = _mm512_set1_ps(0.5f);
int i;
// Process 16 pixels at a time (48 bytes of RGB data)
for (i = 0; i + 16 <= total_pixels; i += 16) {
// Load 16 RGB triplets (48 bytes)
// We need to deinterleave R, G, B channels
// Manual load and deinterleave (AVX-512 doesn't have direct RGB deinterleave)
float r_vals[16], g_vals[16], b_vals[16];
for (int j = 0; j < 16; j++) {
r_vals[j] = (float)rgb[(i + j) * 3 + 0];
g_vals[j] = (float)rgb[(i + j) * 3 + 1];
b_vals[j] = (float)rgb[(i + j) * 3 + 2];
}
__m512 r = _mm512_loadu_ps(r_vals);
__m512 g = _mm512_loadu_ps(g_vals);
__m512 b = _mm512_loadu_ps(b_vals);
// YCoCg-R transform:
// co = r - b
// tmp = b + co * 0.5
// cg = g - tmp
// y = tmp + cg * 0.5
__m512 co_vec = _mm512_sub_ps(r, b);
__m512 tmp = _mm512_fmadd_ps(co_vec, half_vec, b); // tmp = b + co * 0.5
__m512 cg_vec = _mm512_sub_ps(g, tmp);
__m512 y_vec = _mm512_fmadd_ps(cg_vec, half_vec, tmp); // y = tmp + cg * 0.5
_mm512_storeu_ps(&y[i], y_vec);
_mm512_storeu_ps(&co[i], co_vec);
_mm512_storeu_ps(&cg[i], cg_vec);
}
// Remaining pixels (scalar)
for (; i < total_pixels; i++) {
const float r = rgb[i * 3 + 0];
const float g = rgb[i * 3 + 1];
const float b = rgb[i * 3 + 2];
co[i] = r - b;
const float tmp = b + co[i] * 0.5f;
cg[i] = g - tmp;
y[i] = tmp + cg[i] * 0.5f;
}
}
// =============================================================================
// AVX-512 Optimised 2D DWT with Gather/Scatter
// =============================================================================
// Optimised column extraction using gather
static inline void dwt_2d_extract_column_avx512(
const float *tile_data, float *column,
int x, int width, int height
) {
// Create gather indices for column extraction
// indices[i] = (i * width + x)
int y;
for (y = 0; y + 16 <= height; y += 16) {
// Build gather indices
int indices[16];
for (int j = 0; j < 16; j++) {
indices[j] = (y + j) * width + x;
}
__m512i vindex = _mm512_loadu_si512((__m512i*)indices);
__m512 col_data = _mm512_i32gather_ps(vindex, tile_data, 4);
_mm512_storeu_ps(&column[y], col_data);
}
// Remaining scalar
for (; y < height; y++) {
column[y] = tile_data[y * width + x];
}
}
// Optimised column insertion using scatter
static inline void dwt_2d_insert_column_avx512(
float *tile_data, const float *column,
int x, int width, int height
) {
int y;
for (y = 0; y + 16 <= height; y += 16) {
// Build scatter indices
int indices[16];
for (int j = 0; j < 16; j++) {
indices[j] = (y + j) * width + x;
}
__m512i vindex = _mm512_loadu_si512((__m512i*)indices);
__m512 col_data = _mm512_loadu_ps(&column[y]);
_mm512_i32scatter_ps(tile_data, vindex, col_data, 4);
}
// Remaining scalar
for (; y < height; y++) {
tile_data[y * width + x] = column[y];
}
}
#endif // __AVX512F__
#endif // TAV_AVX512_H

View File

@@ -0,0 +1,295 @@
/**
* TAV Encoder Library - Public API
*
* High-level interface for encoding video using the TSVM Advanced Video (TAV) codec.
* Supports GOP-based encoding with internal multi-threading for optimal performance.
*
* Created by CuriousTorvald and Claude on 2025-12-03.
*/
#ifndef TAV_ENCODER_LIB_H
#define TAV_ENCODER_LIB_H
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// Opaque Encoder Context
// =============================================================================
/**
* TAV encoder context - opaque to users.
* Created with tav_encoder_create(), freed with tav_encoder_free().
*/
typedef struct tav_encoder_context tav_encoder_context_t;
// =============================================================================
// Configuration Structures
// =============================================================================
/**
* Video encoding parameters.
*/
typedef struct {
// === Video Dimensions ===
int width; // Frame width (must be even)
int height; // Frame height (must be even)
int fps_num; // Framerate numerator (e.g., 60 for 60fps)
int fps_den; // Framerate denominator (e.g., 1 for 60/1)
// === Wavelet Configuration ===
int wavelet_type; // Spatial wavelet: 0=CDF 5/3, 1=CDF 9/7 (default), 2=CDF 13/7, 16=DD-4, 255=Haar
int temporal_wavelet; // Temporal wavelet: 0=Haar, 1=CDF 5/3 (default for smooth motion)
int decomp_levels; // Spatial DWT levels (0=auto, typically 6)
int temporal_levels; // Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
// === Color Space ===
int channel_layout; // 0=YCoCg-R (default), 1=ICtCp (for HDR/BT.2100 sources)
int perceptual_tuning; // 1=enable HVS perceptual quantization (default), 0=uniform
// === GOP Configuration ===
int enable_temporal_dwt; // 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
int gop_size; // Frames per GOP (8, 16, or 24; 0=auto based on framerate)
int enable_two_pass; // 1=enable two-pass with scene change detection (default), 0=single-pass
// === Quality Control ===
int quality_level;
int quantiser_y; // Luma quantiser (0-255, indexed against QLUT)
int quantiser_co; // Orange chrominance quantiser (0-255, indexed against QLUT)
int quantiser_cg; // Green chrominance quantiser (0-255, indexed against QLUT)
float dead_zone_threshold; // Dead-zone quantization threshold (0.0=disabled, 0.6-1.5 typical)
// === Entropy Coding ===
int entropy_coder; // 0=Twobitmap (default), 1=EZBC (better for high-quality)
int zstd_level; // Zstd compression level (3-22, default: 7)
// === Multi-threading ===
int num_threads; // Worker threads (0=single-threaded, -1=auto, 1-16=explicit)
// === Encoder Presets ===
int encoder_preset; // Preset flags: 0x01=sports (finer temporal quant), 0x02=anime (disable grain)
// === Advanced Options ===
int verbose; // 1=enable debug output, 0=quiet (default)
int monoblock; // -1=auto (based on dimensions), 0=force tiled, 1=force monoblock
} tav_encoder_params_t;
/**
* Initialize encoder parameters with default values.
*
* @param params Parameter structure to initialize
* @param width Frame width
* @param height Frame height
*/
void tav_encoder_params_init(tav_encoder_params_t *params, int width, int height);
/**
* Encoder output packet.
* Contains encoded video or audio data.
*/
typedef struct {
uint8_t *data; // Packet data (owned by encoder, valid until next encode/flush)
size_t size; // Packet size in bytes
uint8_t packet_type; // TAV packet type (0x10=I-frame, 0x12=GOP, 0x24=audio, etc.)
int frame_number; // Frame number (for video packets)
int is_video; // 1=video packet, 0=audio packet
} tav_encoder_packet_t;
// =============================================================================
// Encoder Lifecycle
// =============================================================================
/**
* Create TAV encoder context.
*
* Allocates internal buffers, initializes thread pool (if multi-threading enabled),
* and prepares encoder for frame submission.
*
* @param params Encoder parameters (copied internally)
* @return Encoder context, or NULL on failure
*/
tav_encoder_context_t *tav_encoder_create(const tav_encoder_params_t *params);
/**
* Free TAV encoder context.
*
* Shuts down thread pool, frees all buffers and resources.
* Any unflushed frames in the GOP buffer will be lost.
*
* @param ctx Encoder context
*/
void tav_encoder_free(tav_encoder_context_t *ctx);
/**
* Get last error message.
*
* @param ctx Encoder context
* @return Error message string (valid until next encode operation)
*/
const char *tav_encoder_get_error(tav_encoder_context_t *ctx);
/**
* Get encoder parameters (with calculated values).
* After context creation, params will contain actual values used
* (e.g., auto-calculated decomp_levels, gop_size).
*
* @param ctx Encoder context
* @param params Output parameters structure
*/
void tav_encoder_get_params(tav_encoder_context_t *ctx, tav_encoder_params_t *params);
/**
* DEBUG: Validate encoder context integrity
* Returns 1 if context appears valid, 0 otherwise
*/
int tav_encoder_validate_context(tav_encoder_context_t *ctx);
// =============================================================================
// Video Encoding
// =============================================================================
/*
* DEPRECATED: tav_encoder_encode_frame() and tav_encoder_flush() have been
* removed. Use tav_encoder_encode_gop() instead, which works for both
* single-threaded and multi-threaded modes. The CLI should buffer frames
* and call encode_gop() when a full GOP is ready.
*/
/**
* Encode a complete GOP (Group of Pictures) directly.
*
* This function is STATELESS and THREAD-SAFE with separate contexts.
* Perfect for multithreaded encoding from CLI:
* - Each thread creates its own encoder context
* - Each thread calls encode_gop() with a batch of frames
* - No shared state, no locking needed
*
* Example multithreaded usage:
* ```c
* // Worker thread function
* void* worker(void* arg) {
* work_item_t* item = (work_item_t*)arg;
*
* // Create thread-local encoder context
* tav_encoder_context_t* ctx = tav_encoder_create(&shared_params);
*
* // Encode this GOP
* tav_encoder_packet_t* packet;
* tav_encoder_encode_gop(ctx, item->frames, item->num_frames,
* item->frame_numbers, &packet);
*
* // Store packet in output queue
* queue_push(output_queue, packet);
*
* tav_encoder_free(ctx);
* return NULL;
* }
* ```
*
* @param ctx Encoder context (one per thread)
* @param rgb_frames Array of RGB24 frames [frame][width*height*3]
* @param num_frames Number of frames in GOP (1-24)
* @param frame_numbers Frame indices for timecodes (can be NULL)
* @param packet Output packet pointer
* @return 1 if packet ready, -1 on error
*/
int tav_encoder_encode_gop(tav_encoder_context_t *ctx,
const uint8_t **rgb_frames,
int num_frames,
const int *frame_numbers,
tav_encoder_packet_t **packet);
/**
* Free a packet returned by encode_frame(), flush(), or encode_gop().
*
* @param packet Packet to free (can be NULL)
*/
void tav_encoder_free_packet(tav_encoder_packet_t *packet);
// =============================================================================
// Audio Encoding (Optional)
// =============================================================================
/**
* Encode audio samples (TAD codec).
*
* Audio is encoded synchronously and returned immediately.
* For TAV muxing: interleave audio packets with video packets by frame PTS.
*
* @param ctx Encoder context
* @param pcm_samples PCM32f stereo samples (interleaved: L,R,L,R,...), num_samples×2 floats
* @param num_samples Number of samples per channel
* @param packet Output packet pointer
* @return 1 if packet ready, -1 on error
*/
int tav_encoder_encode_audio(tav_encoder_context_t *ctx,
const float *pcm_samples,
size_t num_samples,
tav_encoder_packet_t **packet);
// =============================================================================
// Statistics and Info
// =============================================================================
/**
* Get encoding statistics.
*/
typedef struct {
int64_t frames_encoded; // Total frames encoded
int64_t gops_encoded; // Total GOPs encoded
size_t total_bytes; // Total bytes output (video + audio)
size_t video_bytes; // Video bytes
size_t audio_bytes; // Audio bytes
double avg_bitrate_kbps; // Average bitrate (kbps)
double encoding_fps; // Encoding speed (frames/sec)
} tav_encoder_stats_t;
/**
* Get encoding statistics.
*
* @param ctx Encoder context
* @param stats Output statistics structure
*/
void tav_encoder_get_stats(tav_encoder_context_t *ctx, tav_encoder_stats_t *stats);
// =============================================================================
// TAV Packet Types (for reference)
// =============================================================================
#define TAV_PACKET_IFRAME 0x10 // I-frame (intra-only, single frame)
#define TAV_PACKET_PFRAME 0x11 // P-frame (delta from previous)
#define TAV_PACKET_GOP_UNIFIED 0x12 // GOP unified (3D DWT, multiple frames)
#define TAV_PACKET_AUDIO_TAD 0x24 // TAD audio (DWT-based perceptual codec)
#define TAV_PACKET_AUDIO_PCM8 0x20 // PCM8 audio (legacy)
#define TAV_PACKET_LOOP_START 0xF0 // Loop point start (no payload)
#define TAV_PACKET_GOP_SYNC 0xFC // GOP sync (frame count marker)
#define TAV_PACKET_TIMECODE 0xFD // Timecode metadata
#define TAV_PACKET_SYNC 0xFF // Sync packet (no payload)
// =============================================================================
// Tile Settings (for multi-tile mode)
// =============================================================================
#define TAV_TILE_SIZE_X 640 // Base tile width
#define TAV_TILE_SIZE_Y 540 // Base tile height
#define TAV_DWT_FILTER_HALF_SUPPORT 4 // For 9/7 filter (filter lengths 9,7 → L=4)
#define TAV_TILE_MARGIN_LEVELS 3 // Use margin for 3 levels: 4 * (2^3) = 32px
#define TAV_TILE_MARGIN (TAV_DWT_FILTER_HALF_SUPPORT * (1 << TAV_TILE_MARGIN_LEVELS)) // 32px
#define TAV_PADDED_TILE_SIZE_X (TAV_TILE_SIZE_X + 2 * TAV_TILE_MARGIN) // 704
#define TAV_PADDED_TILE_SIZE_Y (TAV_TILE_SIZE_Y + 2 * TAV_TILE_MARGIN) // 604
// Monoblock threshold: D1 PAL resolution (720x576)
// If width > 720 OR height > 576, automatically switch to tiled mode
#define TAV_MONOBLOCK_MAX_WIDTH 720
#define TAV_MONOBLOCK_MAX_HEIGHT 576
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_LIB_H

View File

@@ -0,0 +1,275 @@
/*
* TAV SIMD Function Dispatcher
*
* This file provides runtime CPU detection and function pointer dispatch
* for SIMD-optimized versions of performance-critical TAV encoder functions.
*
* Usage:
* 1. Include this header after defining all scalar functions
* 2. Call tav_simd_init() once at encoder initialization
* 3. Use function pointers (e.g., dwt_53_forward_1d_ptr) throughout code
*
* The dispatcher will automatically select AVX-512, AVX2, or scalar versions
* based on runtime CPU capabilities.
*/
#ifndef TAV_SIMD_DISPATCH_H
#define TAV_SIMD_DISPATCH_H
#include <stdint.h>
// =============================================================================
// Function Pointer Types
// =============================================================================
// 1D DWT function pointer types
typedef void (*dwt_1d_func_t)(float *data, int length);
// Quantization function pointer types
typedef void (*quantise_basic_func_t)(
float *coeffs, int16_t *quantised, int size,
float effective_q, float dead_zone_threshold,
int width, int height, int decomp_levels, int is_chroma,
int (*get_subband_level)(int, int, int, int),
int (*get_subband_type)(int, int, int, int)
);
typedef void (*quantise_perceptual_func_t)(
float *coeffs, int16_t *quantised, int size,
float *weights, float base_quantiser
);
// Color conversion function pointer type
typedef void (*rgb_to_ycocg_func_t)(
const uint8_t *rgb, float *y, float *co, float *cg,
int width, int height
);
// 2D DWT column operations
typedef void (*dwt_2d_column_extract_func_t)(
const float *tile_data, float *column,
int x, int width, int height
);
typedef void (*dwt_2d_column_insert_func_t)(
float *tile_data, const float *column,
int x, int width, int height
);
// =============================================================================
// Global Function Pointers (initialized by tav_simd_init)
// =============================================================================
// DWT 1D transforms
static dwt_1d_func_t dwt_53_forward_1d_ptr = NULL;
static dwt_1d_func_t dwt_97_forward_1d_ptr = NULL;
static dwt_1d_func_t dwt_haar_forward_1d_ptr = NULL;
static dwt_1d_func_t dwt_53_inverse_1d_ptr = NULL;
static dwt_1d_func_t dwt_haar_inverse_1d_ptr = NULL;
// Quantization
static quantise_basic_func_t quantise_dwt_coefficients_ptr = NULL;
static quantise_perceptual_func_t quantise_dwt_coefficients_perceptual_ptr = NULL;
// Color conversion
static rgb_to_ycocg_func_t rgb_to_ycocg_ptr = NULL;
// 2D DWT column operations
static dwt_2d_column_extract_func_t dwt_2d_extract_column_ptr = NULL;
static dwt_2d_column_insert_func_t dwt_2d_insert_column_ptr = NULL;
// =============================================================================
// SIMD Capability Detection
// =============================================================================
typedef enum {
SIMD_NONE = 0,
SIMD_AVX512F = 1,
SIMD_AVX2 = 2,
SIMD_SSE42 = 3
} simd_level_t;
static simd_level_t detected_simd_level = SIMD_NONE;
static inline simd_level_t detect_simd_capabilities(void) {
#if defined(__GNUC__) || defined(__clang__)
// Use GCC/Clang built-in CPU detection
if (!__builtin_cpu_supports("sse4.2")) {
return SIMD_NONE;
}
#ifdef __AVX512F__
if (__builtin_cpu_supports("avx512f") &&
__builtin_cpu_supports("avx512dq") &&
__builtin_cpu_supports("avx512bw") &&
__builtin_cpu_supports("avx512vl")) {
return SIMD_AVX512F;
}
#endif
#ifdef __AVX2__
if (__builtin_cpu_supports("avx2")) {
return SIMD_AVX2;
}
#endif
if (__builtin_cpu_supports("sse4.2")) {
return SIMD_SSE42;
}
#endif
return SIMD_NONE;
}
// =============================================================================
// Scalar Fallback Wrappers
// =============================================================================
// These wrappers adapt the scalar functions to match function pointer signatures
static void quantise_dwt_coefficients_scalar_wrapper(
float *coeffs, int16_t *quantised, int size,
float effective_q, float dead_zone_threshold,
int width, int height, int decomp_levels, int is_chroma,
int (*get_subband_level)(int, int, int, int),
int (*get_subband_type)(int, int, int, int)
);
// Implementation provided by including encoder - just declare prototype
static void quantise_dwt_coefficients_perceptual_scalar_wrapper(
float *coeffs, int16_t *quantised, int size,
float *weights, float base_quantiser
);
// Implementation provided by including encoder
static void dwt_2d_extract_column_scalar(
const float *tile_data, float *column,
int x, int width, int height
) {
for (int y = 0; y < height; y++) {
column[y] = tile_data[y * width + x];
}
}
static void dwt_2d_insert_column_scalar(
float *tile_data, const float *column,
int x, int width, int height
) {
for (int y = 0; y < height; y++) {
tile_data[y * width + x] = column[y];
}
}
// =============================================================================
// SIMD Initialization
// =============================================================================
static void tav_simd_init(void) {
// Detect CPU capabilities
detected_simd_level = detect_simd_capabilities();
const char *simd_names[] = {"None", "AVX-512", "AVX2", "SSE4.2"};
fprintf(stderr, "[TAV] SIMD level detected: %s\n",
simd_names[detected_simd_level]);
#ifdef __AVX512F__
if (detected_simd_level == SIMD_AVX512F) {
fprintf(stderr, "[TAV] Using AVX-512 optimizations\n");
// DWT functions
extern void dwt_53_forward_1d_avx512(float *data, int length);
extern void dwt_97_forward_1d_avx512(float *data, int length);
extern void dwt_haar_forward_1d_avx512(float *data, int length);
dwt_53_forward_1d_ptr = dwt_53_forward_1d_avx512;
dwt_97_forward_1d_ptr = dwt_97_forward_1d_avx512;
dwt_haar_forward_1d_ptr = dwt_haar_forward_1d_avx512;
// Quantization
// Note: Need wrapper functions that match the complex signature
// For now, using scalar versions
extern void dwt_53_forward_1d(float *data, int length);
extern void dwt_97_forward_1d(float *data, int length);
extern void dwt_haar_forward_1d(float *data, int length);
extern void dwt_53_inverse_1d(float *data, int length);
extern void dwt_haar_inverse_1d(float *data, int length);
// Fallback to scalar for inverse (can optimize later)
dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
// Color conversion
extern void rgb_to_ycocg_avx512(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
rgb_to_ycocg_ptr = rgb_to_ycocg_avx512;
// 2D column operations
extern void dwt_2d_extract_column_avx512(const float *tile_data, float *column, int x, int width, int height);
extern void dwt_2d_insert_column_avx512(float *tile_data, const float *column, int x, int width, int height);
dwt_2d_extract_column_ptr = dwt_2d_extract_column_avx512;
dwt_2d_insert_column_ptr = dwt_2d_insert_column_avx512;
// Quantization uses scalar for now (needs integration work)
extern void dwt_53_forward_1d(float *data, int length);
extern void dwt_97_forward_1d(float *data, int length);
extern void dwt_haar_forward_1d(float *data, int length);
extern void dwt_53_inverse_1d(float *data, int length);
extern void dwt_haar_inverse_1d(float *data, int length);
extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
return;
}
#endif
// Fallback to scalar implementations
fprintf(stderr, "[TAV] Using scalar (non-SIMD) implementations\n");
extern void dwt_53_forward_1d(float *data, int length);
extern void dwt_97_forward_1d(float *data, int length);
extern void dwt_haar_forward_1d(float *data, int length);
extern void dwt_53_inverse_1d(float *data, int length);
extern void dwt_haar_inverse_1d(float *data, int length);
extern void rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg, int width, int height);
dwt_53_forward_1d_ptr = dwt_53_forward_1d;
dwt_97_forward_1d_ptr = dwt_97_forward_1d;
dwt_haar_forward_1d_ptr = dwt_haar_forward_1d;
dwt_53_inverse_1d_ptr = dwt_53_inverse_1d;
dwt_haar_inverse_1d_ptr = dwt_haar_inverse_1d;
rgb_to_ycocg_ptr = rgb_to_ycocg;
dwt_2d_extract_column_ptr = dwt_2d_extract_column_scalar;
dwt_2d_insert_column_ptr = dwt_2d_insert_column_scalar;
quantise_dwt_coefficients_ptr = quantise_dwt_coefficients_scalar_wrapper;
quantise_dwt_coefficients_perceptual_ptr = quantise_dwt_coefficients_perceptual_scalar_wrapper;
}
// =============================================================================
// Convenience Macros for Code Readability
// =============================================================================
// Use these macros in encoder code for cleaner dispatch
#define DWT_53_FORWARD_1D(data, length) \
dwt_53_forward_1d_ptr((data), (length))
#define DWT_97_FORWARD_1D(data, length) \
dwt_97_forward_1d_ptr((data), (length))
#define DWT_HAAR_FORWARD_1D(data, length) \
dwt_haar_forward_1d_ptr((data), (length))
#define RGB_TO_YCOCG(rgb, y, co, cg, width, height) \
rgb_to_ycocg_ptr((rgb), (y), (co), (cg), (width), (height))
#define DWT_2D_EXTRACT_COLUMN(tile_data, column, x, width, height) \
dwt_2d_extract_column_ptr((tile_data), (column), (x), (width), (height))
#define DWT_2D_INSERT_COLUMN(tile_data, column, x, width, height) \
dwt_2d_insert_column_ptr((tile_data), (column), (x), (width), (height))
#endif // TAV_SIMD_DISPATCH_H

View File

@@ -0,0 +1,78 @@
// Created by CuriousTorvald and Claude on 2025-12-02.
// TAV Video Decoder Library - Shared decoding functions for TAV format
// Can be used by both regular TAV decoder and TAV-DT decoder
#ifndef TAV_VIDEO_DECODER_H
#define TAV_VIDEO_DECODER_H
#include <stdint.h>
#include <stddef.h>
// Video decoder context - opaque to users
typedef struct tav_video_context tav_video_context_t;
// Video parameters structure
typedef struct {
int width;
int height;
int decomp_levels; // Spatial DWT levels (typically 4)
int temporal_levels; // Temporal DWT levels (typically 2)
int wavelet_filter; // 0=CDF 5/3, 1=CDF 9/7, 2=CDF 13/7, 16=DD-4, 255=Haar
int temporal_wavelet; // Temporal wavelet (0=CDF 5/3, 1=CDF 9/7)
int entropy_coder; // 0=Twobitmap, 1=EZBC, 2=RAW
int channel_layout; // 0=YCoCg-R, 1=ICtCp
int perceptual_tuning; // 1=perceptual quantisation, 0=uniform
uint8_t quantiser_y; // Base quantiser index for Y/I
uint8_t quantiser_co; // Base quantiser index for Co/Ct
uint8_t quantiser_cg; // Base quantiser index for Cg/Cp
uint8_t encoder_preset; // Encoder preset flags (sports, anime, etc.)
int monoblock; // 1=single tile (monoblock), 0=multi-tile
int no_zstd; // 1=packets are uncompressed (Video Flags bit 4), 0=Zstd compressed
} tav_video_params_t;
// Create video decoder context
// Returns NULL on failure
tav_video_context_t *tav_video_create(const tav_video_params_t *params);
// Free video decoder context
void tav_video_free(tav_video_context_t *ctx);
// Decode GOP_UNIFIED packet (0x12) to RGB24 frames
// Input: compressed_data - GOP packet data (after packet type byte)
// compressed_size - size of compressed data
// gop_size - number of frames in GOP (read from packet)
// Output: rgb_frames - array of pointers to RGB24 frame buffers (width*height*3 each)
// Must be pre-allocated by caller (gop_size pointers, each pointing to width*height*3 bytes)
// Returns: 0 on success, -1 on error
int tav_video_decode_gop(tav_video_context_t *ctx,
const uint8_t *compressed_data, uint32_t compressed_size,
uint8_t gop_size, uint8_t **rgb_frames);
// Decode IFRAME packet (0x10) to RGB24 frame
// Input: compressed_data - I-frame packet data (after packet type byte)
// packet_size - size of packet data
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
// Must be pre-allocated by caller
// Returns: 0 on success, -1 on error
int tav_video_decode_iframe(tav_video_context_t *ctx,
const uint8_t *compressed_data, uint32_t packet_size,
uint8_t *rgb_frame);
// Decode PFRAME packet (0x11) to RGB24 frame (delta from reference)
// Input: compressed_data - P-frame packet data (after packet type byte)
// packet_size - size of packet data
// Output: rgb_frame - pointer to RGB24 frame buffer (width*height*3 bytes)
// Must be pre-allocated by caller
// Returns: 0 on success, -1 on error
// Note: Requires previous frame to be decoded first (stored internally as reference)
int tav_video_decode_pframe(tav_video_context_t *ctx,
const uint8_t *compressed_data, uint32_t packet_size,
uint8_t *rgb_frame);
// Get last error message
const char *tav_video_get_error(tav_video_context_t *ctx);
// Enable verbose debug output
void tav_video_set_verbose(tav_video_context_t *ctx, int verbose);
#endif // TAV_VIDEO_DECODER_H

View File

@@ -0,0 +1,397 @@
/**
* LDPC Rate 1/2 Codec Implementation
*
* LDPC for TAV-DT header protection.
* Uses a systematic rate 1/2 code with sum-product belief propagation decoder.
*
* The parity-check matrix is designed for good error correction on small blocks.
* Each parity bit is computed as XOR of multiple data bits using a pseudo-random
* but deterministic pattern.
*
* Created by CuriousTorvald and Claude on 2025-12-09.
* Updated 2025-12-17: Replaced bit-flipping with belief propagation decoder.
*/
#include "ldpc.h"
#include <string.h>
#include <stdio.h>
#include <math.h>
// Channel LLR magnitude for hard-decision input
// Higher value = more confidence in received bits
// For BER ~0.01, optimal is about 4.6; we use slightly lower for robustness
#define CHANNEL_LLR_MAG 4.0f
// Clipping value to prevent numerical overflow in tanh operations
#define LLR_CLIP 20.0f
// =============================================================================
// Parity-Check Matrix Generation
// =============================================================================
// For rate 1/2 LDPC: n = 2k bits, parity-check matrix H is (n-k) x n = k x 2k
// We use H = [P | I_k] where P is the parity pattern matrix
// This gives systematic encoding: c = [data | parity] where parity = P * data
// Parity pattern: each parity bit j depends on data bits where pattern[j][i] = 1
// We use a regular pattern with column weight 3 (each data bit affects 3 parity bits)
// and row weight varies to cover the data bits well
// Simple hash function for generating parity connections
static inline uint32_t hash_mix(uint32_t a, uint32_t b) {
a ^= b;
a = (a ^ (a >> 16)) * 0x85ebca6b;
a = (a ^ (a >> 13)) * 0xc2b2ae35;
return a ^ (a >> 16);
}
// Get bit from byte array
static inline int get_bit(const uint8_t *data, int bit_idx) {
return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
}
// Set bit in byte array
static inline void set_bit(uint8_t *data, int bit_idx, int value) {
int byte_idx = bit_idx >> 3;
int bit_pos = 7 - (bit_idx & 7);
if (value) {
data[byte_idx] |= (1 << bit_pos);
} else {
data[byte_idx] &= ~(1 << bit_pos);
}
}
// Flip bit in byte array
static inline void flip_bit(uint8_t *data, int bit_idx) {
int byte_idx = bit_idx >> 3;
int bit_pos = 7 - (bit_idx & 7);
data[byte_idx] ^= (1 << bit_pos);
}
// Get list of data bits that affect parity bit j
// Returns number of connected data bits, stores indices in connections[]
// For rate 1/2: data bits are 0 to k*8-1, parity bits are k*8 to 2*k*8-1
static int get_parity_connections(int parity_idx, int k_bits, int *connections) {
int count = 0;
// Use a deterministic pseudo-random pattern
// Each parity bit connects to approximately k_bits/3 data bits
// Different seeds for different parity positions ensure coverage
uint32_t seed = hash_mix(0xDEADBEEF, (uint32_t)parity_idx);
for (int i = 0; i < k_bits; i++) {
// Each data bit has ~3/k_bits chance of connecting to this parity bit
// Total connections per parity ~ 3 (column weight)
uint32_t h = hash_mix(seed, (uint32_t)i);
if ((h % (k_bits / 3 + 1)) == 0) {
connections[count++] = i;
}
}
// Ensure at least 2 connections per parity bit
if (count < 2) {
connections[count++] = parity_idx % k_bits;
connections[count++] = (parity_idx + k_bits / 2) % k_bits;
}
return count;
}
// Get list of parity bits affected by data bit i
static int get_data_connections(int data_idx, int k_bits, int *connections) {
int count = 0;
for (int j = 0; j < k_bits; j++) {
int parity_conns[LDPC_MAX_DATA_BYTES * 8];
int n_conns = get_parity_connections(j, k_bits, parity_conns);
for (int c = 0; c < n_conns; c++) {
if (parity_conns[c] == data_idx) {
connections[count++] = j;
break;
}
}
}
return count;
}
// =============================================================================
// Initialization
// =============================================================================
static int ldpc_initialized = 0;
void ldpc_init(void) {
if (ldpc_initialized) return;
// No pre-computation needed - patterns generated on the fly
ldpc_initialized = 1;
}
// =============================================================================
// Encoding
// =============================================================================
size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!ldpc_initialized) ldpc_init();
if (data_len > LDPC_MAX_DATA_BYTES) {
data_len = LDPC_MAX_DATA_BYTES;
}
int k_bits = (int)(data_len * 8); // Number of data bits
// Copy data to output (systematic encoding)
memcpy(output, data, data_len);
// Initialize parity bytes to zero
memset(output + data_len, 0, data_len);
// Compute parity bits
for (int j = 0; j < k_bits; j++) {
// Get data bits connected to parity bit j
int connections[LDPC_MAX_DATA_BYTES * 8];
int n_conns = get_parity_connections(j, k_bits, connections);
// Parity bit = XOR of connected data bits
int parity = 0;
for (int c = 0; c < n_conns; c++) {
parity ^= get_bit(data, connections[c]);
}
// Set parity bit
set_bit(output + data_len, j, parity);
}
return data_len * 2;
}
// =============================================================================
// Decoding
// =============================================================================
int ldpc_check_syndrome(const uint8_t *codeword, size_t len) {
if (!ldpc_initialized) ldpc_init();
size_t data_len = len / 2;
int k_bits = (int)(data_len * 8);
// Check all parity equations
for (int j = 0; j < k_bits; j++) {
int connections[LDPC_MAX_DATA_BYTES * 8];
int n_conns = get_parity_connections(j, k_bits, connections);
// Compute syndrome bit: XOR of connected data bits XOR parity bit
int syndrome = get_bit(codeword + data_len, j);
for (int c = 0; c < n_conns; c++) {
syndrome ^= get_bit(codeword, connections[c]);
}
if (syndrome != 0) {
return 0; // Syndrome non-zero: errors detected
}
}
return 1; // Zero syndrome: valid codeword
}
// Clip LLR to prevent overflow
static inline float clip_llr(float llr) {
if (llr > LLR_CLIP) return LLR_CLIP;
if (llr < -LLR_CLIP) return -LLR_CLIP;
return llr;
}
// Sign of a float (returns +1 or -1)
static inline float sign_f(float x) {
return (x >= 0.0f) ? 1.0f : -1.0f;
}
int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output) {
if (!ldpc_initialized) ldpc_init();
if (encoded_len < 2 || (encoded_len & 1) != 0) {
return -1; // Invalid length
}
size_t data_len = encoded_len / 2;
if (data_len > LDPC_MAX_DATA_BYTES) {
return -1;
}
int k_bits = (int)(data_len * 8);
int n_bits = k_bits * 2; // Total codeword bits (data + parity)
// Pre-compute the parity check matrix structure for efficiency
// For each check node j: which variable nodes it connects to
int check_to_var[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
int check_degree[LDPC_MAX_DATA_BYTES * 8];
for (int j = 0; j < k_bits; j++) {
int connections[LDPC_MAX_DATA_BYTES * 8];
int n_conns = get_parity_connections(j, k_bits, connections);
// Check j connects to: data bits in connections[] + parity bit j
check_degree[j] = n_conns + 1;
for (int c = 0; c < n_conns; c++) {
check_to_var[j][c] = connections[c]; // Data bit index
}
check_to_var[j][n_conns] = k_bits + j; // Parity bit index
}
// Initialize channel LLRs from received hard bits
// LLR > 0 means bit is probably 0, LLR < 0 means bit is probably 1
float channel_llr[LDPC_MAX_DATA_BYTES * 16];
for (int i = 0; i < n_bits; i++) {
int bit = get_bit(encoded, i);
channel_llr[i] = bit ? -CHANNEL_LLR_MAG : CHANNEL_LLR_MAG;
}
// Message arrays for BP
// check_to_var_msg[j][idx] = message from check j to variable check_to_var[j][idx]
float check_to_var_msg[LDPC_MAX_DATA_BYTES * 8][LDPC_MAX_DATA_BYTES * 8 + 1];
// Initialize check-to-variable messages to zero
memset(check_to_var_msg, 0, sizeof(check_to_var_msg));
// Belief Propagation iterations
for (int iter = 0; iter < LDPC_MAX_ITERATIONS; iter++) {
// Step 1: Variable-to-check messages (implicit, computed on the fly)
// var_to_check[v→j] = channel_llr[v] + sum of all check_to_var_msg[k][idx_v] for k != j
// Step 2: Check-to-variable messages using min-sum approximation
// For each check node j, for each connected variable v:
// check_to_var_msg[j→v] = sign * min(|incoming messages from other vars|)
for (int j = 0; j < k_bits; j++) {
int degree = check_degree[j];
// First, compute variable-to-check messages for all variables in this check
float var_to_check[LDPC_MAX_DATA_BYTES * 8 + 1];
for (int idx = 0; idx < degree; idx++) {
int v = check_to_var[j][idx];
// Sum all incoming check messages to variable v, except from check j
float sum = channel_llr[v];
for (int jj = 0; jj < k_bits; jj++) {
if (jj == j) continue;
// Find if check jj connects to variable v
for (int idx2 = 0; idx2 < check_degree[jj]; idx2++) {
if (check_to_var[jj][idx2] == v) {
sum += check_to_var_msg[jj][idx2];
break;
}
}
}
var_to_check[idx] = clip_llr(sum);
}
// Now compute check-to-variable messages using min-sum
for (int idx = 0; idx < degree; idx++) {
float sign_prod = 1.0f;
float min_abs = 1e30f;
for (int idx2 = 0; idx2 < degree; idx2++) {
if (idx2 == idx) continue;
float msg = var_to_check[idx2];
sign_prod *= sign_f(msg);
float abs_msg = fabsf(msg);
if (abs_msg < min_abs) min_abs = abs_msg;
}
// Min-sum with scaling factor 0.75 for better performance
check_to_var_msg[j][idx] = clip_llr(sign_prod * min_abs * 0.75f);
}
}
// Step 3: Compute posterior LLRs and make hard decisions
float posterior[LDPC_MAX_DATA_BYTES * 16];
for (int v = 0; v < n_bits; v++) {
float sum = channel_llr[v];
// Add all incoming check-to-variable messages
for (int j = 0; j < k_bits; j++) {
for (int idx = 0; idx < check_degree[j]; idx++) {
if (check_to_var[j][idx] == v) {
sum += check_to_var_msg[j][idx];
break;
}
}
}
posterior[v] = sum;
}
// Make hard decisions
uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
memset(decoded, 0, encoded_len);
for (int v = 0; v < n_bits; v++) {
if (posterior[v] < 0) {
set_bit(decoded, v, 1);
}
}
// Check syndrome
int syndrome_count = 0;
for (int j = 0; j < k_bits; j++) {
int syn = 0;
for (int idx = 0; idx < check_degree[j]; idx++) {
syn ^= get_bit(decoded, check_to_var[j][idx]);
}
if (syn) syndrome_count++;
}
// If all syndromes are zero, we're done
if (syndrome_count == 0) {
memcpy(output, decoded, data_len);
return 0;
}
// Early termination if syndrome count is very small (nearly converged)
if (iter > 5 && syndrome_count <= 2) {
// Try one more iteration, if still stuck, accept
}
}
// Decoding did not converge - compute final estimate
float posterior[LDPC_MAX_DATA_BYTES * 16];
for (int v = 0; v < n_bits; v++) {
float sum = channel_llr[v];
for (int j = 0; j < k_bits; j++) {
for (int idx = 0; idx < check_degree[j]; idx++) {
if (check_to_var[j][idx] == v) {
sum += check_to_var_msg[j][idx];
break;
}
}
}
posterior[v] = sum;
}
uint8_t decoded[LDPC_MAX_DATA_BYTES * 2];
memset(decoded, 0, encoded_len);
for (int v = 0; v < n_bits; v++) {
if (posterior[v] < 0) {
set_bit(decoded, v, 1);
}
}
// Check final syndrome count
int final_syndromes = 0;
for (int j = 0; j < k_bits; j++) {
int syn = 0;
for (int idx = 0; idx < check_degree[j]; idx++) {
syn ^= get_bit(decoded, check_to_var[j][idx]);
}
if (syn) final_syndromes++;
}
// Accept if syndrome count is low enough
if (final_syndromes <= k_bits / 4) {
memcpy(output, decoded, data_len);
return 0; // Soft success
}
// Total failure - return original data as best effort
memcpy(output, encoded, data_len);
return -1;
}

View File

@@ -0,0 +1,68 @@
/**
* LDPC Rate 1/2 Codec for TAV-DT
*
* Simple LDPC implementation for header protection in TAV-DT format.
* Rate 1/2: k data bytes → 2k encoded bytes (doubles the size)
*
* Uses systematic encoding where first k bytes are data, last k bytes are parity.
* Decoding uses iterative bit-flipping algorithm.
*
* Designed for small blocks (headers up to 64 bytes).
*
* Created by CuriousTorvald and Claude on 2025-12-09.
*/
#ifndef LDPC_H
#define LDPC_H
#include <stdint.h>
#include <stddef.h>
// Maximum block size (data bytes before encoding)
#define LDPC_MAX_DATA_BYTES 64
// LDPC decoder parameters
#define LDPC_MAX_ITERATIONS 50
/**
* Initialize LDPC codec.
* Must be called once before using encode/decode functions.
* Thread-safe: uses static initialization.
*/
void ldpc_init(void);
/**
* Encode data block with LDPC rate 1/2.
*
* @param data Input data bytes
* @param data_len Length of input data (1 to LDPC_MAX_DATA_BYTES)
* @param output Output buffer (must hold 2 * data_len bytes)
* @return Output length (2 * data_len)
*
* Output format: [data bytes][parity bytes]
* The output is systematic: first data_len bytes are the original data.
*/
size_t ldpc_encode(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode LDPC rate 1/2 encoded block.
*
* @param encoded Input encoded data (2 * data_len bytes)
* @param encoded_len Length of encoded data (must be even, max 2*LDPC_MAX_DATA_BYTES)
* @param output Output buffer for decoded data (encoded_len / 2 bytes)
* @return 0 on success, -1 if decoding failed (too many errors)
*
* Uses iterative bit-flipping decoder.
*/
int ldpc_decode(const uint8_t *encoded, size_t encoded_len, uint8_t *output);
/**
* Calculate syndrome for validation.
*
* @param codeword Encoded codeword (2 * data_len bytes)
* @param len Length of codeword
* @return 1 if valid (zero syndrome), 0 if errors detected
*/
int ldpc_check_syndrome(const uint8_t *codeword, size_t len);
#endif // LDPC_H

View File

@@ -0,0 +1,478 @@
/**
* LDPC(255,223) Codec Implementation - Enhanced Version
*
* This implements a high-rate LDPC code designed to compete with RS(255,223).
*
* Key improvements in this version:
* - Sum-Product (Belief Propagation) decoder for optimal performance
* - Quasi-cyclic H matrix with optimized degree distribution
* - Layered scheduling for faster convergence
* - Adaptive LLR initialization
*
* Created by CuriousTorvald and Claude on 2025-12-15.
*/
#include "ldpc_payload.h"
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
// =============================================================================
// Constants
// =============================================================================
#define N_BITS (LDPC_P_BLOCK_SIZE * 8) // 2040 total bits
#define K_BITS (LDPC_P_DATA_SIZE * 8) // 1784 data bits
#define M_BITS (LDPC_P_PARITY_SIZE * 8) // 256 parity bits
// LLR bounds - tighter bounds help prevent numerical issues
#define LLR_MAX 20.0f
#define LLR_MIN -20.0f
// Decoding parameters
#define LDPC_MAX_ITER 100
// =============================================================================
// Sparse Matrix Storage
// =============================================================================
#define MAX_CHECK_DEGREE 50
#define MAX_VAR_DEGREE 12
static int ldpc_p_initialized = 0;
static int check_degree[M_BITS];
static int check_to_var[M_BITS][MAX_CHECK_DEGREE];
static int check_to_var_idx[M_BITS][MAX_CHECK_DEGREE];
static int var_degree[N_BITS];
static int var_to_check[N_BITS][MAX_VAR_DEGREE];
static int var_to_check_idx[N_BITS][MAX_VAR_DEGREE];
// =============================================================================
// Bit manipulation
// =============================================================================
static inline int get_bit(const uint8_t *data, int bit_idx) {
return (data[bit_idx >> 3] >> (7 - (bit_idx & 7))) & 1;
}
static inline void set_bit(uint8_t *data, int bit_idx, int value) {
int byte_idx = bit_idx >> 3;
int bit_pos = 7 - (bit_idx & 7);
if (value) {
data[byte_idx] |= (1 << bit_pos);
} else {
data[byte_idx] &= ~(1 << bit_pos);
}
}
// =============================================================================
// H Matrix Construction - Quasi-Cyclic with Optimized Distribution
// =============================================================================
// Hash function for deterministic pseudo-random connections
static inline uint32_t hash32(uint32_t a, uint32_t b) {
uint32_t h = a ^ (b * 0x9E3779B9);
h ^= h >> 16;
h *= 0x85EBCA6B;
h ^= h >> 13;
h *= 0xC2B2AE35;
h ^= h >> 16;
return h;
}
static void add_edge(int check, int var) {
// Check if already connected
for (int i = 0; i < check_degree[check]; i++) {
if (check_to_var[check][i] == var) return;
}
if (check_degree[check] >= MAX_CHECK_DEGREE || var_degree[var] >= MAX_VAR_DEGREE) {
return;
}
int cidx = check_degree[check];
int vidx = var_degree[var];
check_to_var[check][cidx] = var;
check_to_var_idx[check][cidx] = vidx;
check_degree[check]++;
var_to_check[var][vidx] = check;
var_to_check_idx[var][vidx] = cidx;
var_degree[var]++;
}
// Simplified cycle check - only check direct neighbors (faster)
static int would_create_short_cycle(int v, int c) {
// Quick check: if v is already connected to c, skip
for (int i = 0; i < var_degree[v]; i++) {
if (var_to_check[v][i] == c) return 1;
}
// For speed, only do basic 4-cycle check for low-degree nodes
if (var_degree[v] > 4 || check_degree[c] > 20) return 0;
// Check for 4-cycles
for (int i = 0; i < var_degree[v]; i++) {
int c_prime = var_to_check[v][i];
for (int j = 0; j < check_degree[c_prime] && j < 15; j++) {
int v_prime = check_to_var[c_prime][j];
if (v_prime == v) continue;
for (int k = 0; k < var_degree[v_prime] && k < 8; k++) {
if (var_to_check[v_prime][k] == c) {
return 1;
}
}
}
}
return 0;
}
// Quasi-cyclic expansion: shift value determines cyclic permutation
static int qc_shift(int base_idx, int shift, int size) {
return (base_idx + shift) % size;
}
static void build_h_matrix(void) {
memset(check_degree, 0, sizeof(check_degree));
memset(var_degree, 0, sizeof(var_degree));
// ==========================================================================
// H matrix with staircase parity and PEG-based data connections
// ==========================================================================
// --- Part 1: Staircase parity structure ---
for (int c = 0; c < M_BITS; c++) {
int parity_bit = K_BITS + c;
add_edge(c, parity_bit);
if (c > 0) {
add_edge(c, K_BITS + c - 1);
}
}
// --- Part 2: Connect data bits using PEG approach ---
for (int v = 0; v < K_BITS; v++) {
// Target 6 connections per variable
int target = 6;
for (int d = 0; d < target; d++) {
uint32_t h = hash32((uint32_t)v * 2654435769U, (uint32_t)d * 1597334677U);
// Find best check (lowest degree)
int best_c = -1;
int best_deg = MAX_CHECK_DEGREE;
for (int attempt = 0; attempt < 16; attempt++) {
int c = (int)((h + attempt * 127) % M_BITS);
if (check_degree[c] < best_deg && check_degree[c] < MAX_CHECK_DEGREE - 2) {
// Check not already connected
int connected = 0;
for (int i = 0; i < var_degree[v]; i++) {
if (var_to_check[v][i] == c) { connected = 1; break; }
}
if (!connected) {
best_deg = check_degree[c];
best_c = c;
if (best_deg < 30) break; // Good enough
}
}
}
if (best_c >= 0 && var_degree[v] < MAX_VAR_DEGREE - 1) {
add_edge(best_c, v);
}
}
}
// --- Part 3: Fill in low-degree variables ---
for (int v = 0; v < K_BITS; v++) {
while (var_degree[v] < 5) {
uint32_t h = hash32((uint32_t)v * 12345, (uint32_t)var_degree[v] * 67890);
int added = 0;
for (int attempt = 0; attempt < 64 && !added; attempt++) {
int c = (int)((h + attempt * 31) % M_BITS);
if (check_degree[c] < MAX_CHECK_DEGREE - 2) {
int prev = var_degree[v];
add_edge(c, v);
if (var_degree[v] > prev) added = 1;
}
}
if (!added) break;
}
}
// --- Part 4: Balance check degrees ---
for (int c = 0; c < M_BITS; c++) {
int target = 35;
int attempts = 0;
while (check_degree[c] < target && attempts < 150) {
uint32_t h = hash32((uint32_t)c * 48271, (uint32_t)attempts * 16807);
int v = (int)(h % K_BITS);
if (var_degree[v] < MAX_VAR_DEGREE - 1) {
add_edge(c, v);
}
attempts++;
}
}
}
void ldpc_p_init(void) {
if (ldpc_p_initialized) return;
build_h_matrix();
ldpc_p_initialized = 1;
}
// =============================================================================
// Syndrome Check
// =============================================================================
int ldpc_p_check_syndrome(const uint8_t *codeword) {
if (!ldpc_p_initialized) ldpc_p_init();
for (int c = 0; c < M_BITS; c++) {
int syndrome = 0;
for (int i = 0; i < check_degree[c]; i++) {
int v = check_to_var[c][i];
syndrome ^= get_bit(codeword, v);
}
if (syndrome != 0) {
return 0;
}
}
return 1;
}
// =============================================================================
// Encoding
// =============================================================================
size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!ldpc_p_initialized) ldpc_p_init();
if (data_len > LDPC_P_DATA_SIZE) {
data_len = LDPC_P_DATA_SIZE;
}
// Copy data to output and pad if necessary
memcpy(output, data, data_len);
if (data_len < LDPC_P_DATA_SIZE) {
memset(output + data_len, 0, LDPC_P_DATA_SIZE - data_len);
}
// Initialize parity bytes to zero
memset(output + LDPC_P_DATA_SIZE, 0, LDPC_P_PARITY_SIZE);
// Compute syndrome contribution from data bits
int syndrome[M_BITS];
for (int c = 0; c < M_BITS; c++) {
syndrome[c] = 0;
for (int i = 0; i < check_degree[c]; i++) {
int v = check_to_var[c][i];
if (v < K_BITS) {
syndrome[c] ^= get_bit(output, v);
}
}
}
// Back-substitution for parity bits (staircase structure)
int prev_parity = 0;
for (int c = 0; c < M_BITS; c++) {
int parity_bit = syndrome[c] ^ prev_parity;
set_bit(output + LDPC_P_DATA_SIZE, c, parity_bit);
prev_parity = parity_bit;
}
return LDPC_P_BLOCK_SIZE;
}
// =============================================================================
// Min-Sum Decoder with Optimized Parameters
// =============================================================================
// Clamp LLR to valid range
static inline float clamp_llr(float x) {
if (x > LLR_MAX) return LLR_MAX;
if (x < LLR_MIN) return LLR_MIN;
return x;
}
int ldpc_p_decode(uint8_t *data, size_t data_len) {
if (!ldpc_p_initialized) ldpc_p_init();
size_t total_len = data_len + LDPC_P_PARITY_SIZE;
if (total_len > LDPC_P_BLOCK_SIZE) {
return -1;
}
// Working codeword buffer
uint8_t codeword[LDPC_P_BLOCK_SIZE];
memcpy(codeword, data, total_len);
if (total_len < LDPC_P_BLOCK_SIZE) {
memset(codeword + total_len, 0, LDPC_P_BLOCK_SIZE - total_len);
}
// Quick check - if already valid, no decoding needed
if (ldpc_p_check_syndrome(codeword)) {
return 0;
}
// ==========================================================================
// Initialize channel LLRs
// ==========================================================================
float var_llr[N_BITS];
float llr_magnitude = 6.0f;
for (int v = 0; v < N_BITS; v++) {
int bit = get_bit(codeword, v);
var_llr[v] = bit ? -llr_magnitude : llr_magnitude;
}
// Message storage
static float c2v[M_BITS][MAX_CHECK_DEGREE];
for (int c = 0; c < M_BITS; c++) {
for (int i = 0; i < check_degree[c]; i++) {
c2v[c][i] = 0.0f;
}
}
// ==========================================================================
// Normalized Min-Sum Decoding with Layered Scheduling
// ==========================================================================
float v2c[MAX_CHECK_DEGREE];
const float alpha = 0.75f; // Normalization factor
for (int iter = 0; iter < LDPC_MAX_ITER; iter++) {
// Process each check node (layer)
for (int c = 0; c < M_BITS; c++) {
int deg = check_degree[c];
// Step 1: Compute variable-to-check messages
for (int i = 0; i < deg; i++) {
int v = check_to_var[c][i];
v2c[i] = var_llr[v] - c2v[c][i];
}
// Step 2: Compute check-to-variable messages using min-sum
for (int i = 0; i < deg; i++) {
float sign_prod = 1.0f;
float min1 = LLR_MAX, min2 = LLR_MAX;
for (int j = 0; j < deg; j++) {
if (j == i) continue;
float val = v2c[j];
if (val < 0) sign_prod = -sign_prod;
float absval = fabsf(val);
if (absval < min1) {
min2 = min1;
min1 = absval;
} else if (absval < min2) {
min2 = absval;
}
}
// Normalized min-sum message
float msg_mag = alpha * min1;
float new_c2v = sign_prod * msg_mag;
// Update variable LLR immediately (layered approach)
int v = check_to_var[c][i];
var_llr[v] = clamp_llr(var_llr[v] - c2v[c][i] + new_c2v);
c2v[c][i] = new_c2v;
}
}
// Make hard decisions
for (int v = 0; v < N_BITS; v++) {
set_bit(codeword, v, var_llr[v] < 0 ? 1 : 0);
}
// Check if valid codeword
if (ldpc_p_check_syndrome(codeword)) {
memcpy(data, codeword, data_len);
return iter + 1;
}
// Adaptive restart at iteration milestones
if (iter == 25 || iter == 50 || iter == 75) {
float new_mag = 4.0f - (iter / 25) * 0.5f;
for (int v = 0; v < N_BITS; v++) {
int bit = get_bit(codeword, v);
var_llr[v] = bit ? -new_mag : new_mag;
}
for (int c = 0; c < M_BITS; c++) {
for (int i = 0; i < check_degree[c]; i++) {
c2v[c][i] = 0.0f;
}
}
}
}
// Failed to converge
memcpy(data, codeword, data_len);
return -1;
}
// =============================================================================
// Block-level operations
// =============================================================================
size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!ldpc_p_initialized) ldpc_p_init();
size_t output_len = 0;
size_t remaining = data_len;
const uint8_t *src = data;
uint8_t *dst = output;
while (remaining > 0) {
size_t block_data = (remaining > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining;
ldpc_p_encode(src, block_data, dst);
src += block_data;
dst += LDPC_P_BLOCK_SIZE;
output_len += LDPC_P_BLOCK_SIZE;
remaining -= block_data;
}
return output_len;
}
int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
if (!ldpc_p_initialized) ldpc_p_init();
int total_iterations = 0;
size_t remaining_output = output_len;
uint8_t *src = data;
uint8_t *dst = output;
while (total_len >= LDPC_P_BLOCK_SIZE && remaining_output > 0) {
size_t bytes_to_copy = (remaining_output > LDPC_P_DATA_SIZE) ? LDPC_P_DATA_SIZE : remaining_output;
int result = ldpc_p_decode(src, LDPC_P_DATA_SIZE);
if (result < 0) {
return -1;
}
total_iterations += result;
memcpy(dst, src, bytes_to_copy);
src += LDPC_P_BLOCK_SIZE;
dst += bytes_to_copy;
total_len -= LDPC_P_BLOCK_SIZE;
remaining_output -= bytes_to_copy;
}
return total_iterations;
}

View File

@@ -0,0 +1,97 @@
/**
* LDPC(255,223) Codec for TAV-DT Payloads
*
* Alternative to RS(255,223) with same rate (~0.875):
* - Block size: 255 bytes (223 data + 32 parity)
* - Uses quasi-cyclic LDPC structure for efficiency
* - Soft-decision belief propagation decoder
*
* Designed as drop-in replacement for RS(255,223):
* - Same input/output sizes
* - Same API style
* - Different error correction characteristics:
* - LDPC: Better at high BER (>1e-3), gradual degradation
* - RS: Better at low BER, hard threshold at 16 byte errors
*
* Created by CuriousTorvald and Claude on 2025-12-15.
*/
#ifndef LDPC_PAYLOAD_H
#define LDPC_PAYLOAD_H
#include <stdint.h>
#include <stddef.h>
// LDPC(255,223) parameters - matches RS(255,223) for drop-in replacement
#define LDPC_P_BLOCK_SIZE 255 // Total codeword size (bytes)
#define LDPC_P_DATA_SIZE 223 // Data bytes per block
#define LDPC_P_PARITY_SIZE 32 // Parity bytes per block
// Decoder parameters
#define LDPC_P_MAX_ITERATIONS 30 // Maximum BP iterations
#define LDPC_P_EARLY_TERM 1 // Enable early termination on valid codeword
/**
* Initialize LDPC(255,223) codec.
* Must be called once before using encode/decode functions.
* Thread-safe: uses static initialization.
*/
void ldpc_p_init(void);
/**
* Encode data block with LDPC(255,223).
*
* @param data Input data (up to LDPC_P_DATA_SIZE bytes)
* @param data_len Length of input data (1 to LDPC_P_DATA_SIZE)
* @param output Output buffer (must hold data_len + LDPC_P_PARITY_SIZE bytes)
* Format: [data][parity]
* @return Total output length (data_len + LDPC_P_PARITY_SIZE)
*
* Note: For data shorter than LDPC_P_DATA_SIZE, the encoder pads with zeros
* internally but only outputs actual data + parity.
*/
size_t ldpc_p_encode(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode and correct LDPC(255,223) encoded block.
*
* @param data Buffer containing [data][parity] (modified in-place)
* @param data_len Length of data portion (1 to LDPC_P_DATA_SIZE)
* @return Number of iterations used (1-30), or -1 if uncorrectable
*
* On success, data buffer contains corrected data.
* On failure, data buffer contents are undefined.
*/
int ldpc_p_decode(uint8_t *data, size_t data_len);
/**
* Encode data with automatic block splitting.
* For data larger than LDPC_P_DATA_SIZE, splits into multiple blocks.
*
* @param data Input data
* @param data_len Length of input data
* @param output Output buffer (must hold ceil(data_len/223) * 255 bytes)
* @return Total output length
*/
size_t ldpc_p_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode data with automatic block splitting.
*
* @param data Buffer containing LDPC-encoded blocks (modified in-place)
* @param total_len Total length of encoded data (multiple of LDPC_P_BLOCK_SIZE)
* @param output Output buffer for decoded data
* @param output_len Expected length of decoded data
* @return Total iterations across all blocks, or -1 if any block failed
*/
int ldpc_p_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
/**
* Check if codeword is valid (syndrome check).
*
* @param codeword Full codeword (LDPC_P_BLOCK_SIZE bytes)
* @return 1 if valid (zero syndrome), 0 if errors detected
*/
int ldpc_p_check_syndrome(const uint8_t *codeword);
#endif // LDPC_PAYLOAD_H

View File

@@ -0,0 +1,417 @@
/**
* Reed-Solomon (255,223) Codec Implementation
*
* Standard RS code over GF(2^8) for TAV-DT forward error correction.
*
* Created by CuriousTorvald and Claude on 2025-12-09.
*/
#include "reed_solomon.h"
#include <string.h>
#include <stdio.h>
// =============================================================================
// Galois Field GF(2^8) Arithmetic
// =============================================================================
// Primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 = 0x11D
#define GF_PRIMITIVE 0x11D
#define GF_SIZE 256
#define GF_MAX 255
// Lookup tables for GF(2^8) arithmetic
static uint8_t gf_exp[512]; // Anti-log table (doubled for easy modular reduction)
static uint8_t gf_log[256]; // Log table
static uint8_t gf_generator[RS_PARITY_SIZE + 1]; // Generator polynomial coefficients
static int rs_initialized = 0;
// Initialize GF(2^8) exp/log tables
static void init_gf_tables(void) {
uint16_t x = 1;
for (int i = 0; i < GF_MAX; i++) {
gf_exp[i] = (uint8_t)x;
gf_log[x] = (uint8_t)i;
// Multiply by alpha (primitive element = 2)
x <<= 1;
if (x & 0x100) {
x ^= GF_PRIMITIVE;
}
}
// Double the exp table for easy modular reduction
for (int i = GF_MAX; i < 512; i++) {
gf_exp[i] = gf_exp[i - GF_MAX];
}
// gf_log[0] is undefined, set to 0 for safety
gf_log[0] = 0;
}
// GF multiplication
static inline uint8_t gf_mul(uint8_t a, uint8_t b) {
if (a == 0 || b == 0) return 0;
return gf_exp[gf_log[a] + gf_log[b]];
}
// GF division
static inline uint8_t gf_div(uint8_t a, uint8_t b) {
if (a == 0) return 0;
if (b == 0) return 0; // Division by zero - shouldn't happen
return gf_exp[gf_log[a] + GF_MAX - gf_log[b]];
}
// GF power
static inline uint8_t gf_pow(uint8_t a, int n) {
if (n == 0) return 1;
if (a == 0) return 0;
return gf_exp[(gf_log[a] * n) % GF_MAX];
}
// GF inverse
static inline uint8_t gf_inv(uint8_t a) {
if (a == 0) return 0;
return gf_exp[GF_MAX - gf_log[a]];
}
// =============================================================================
// Generator Polynomial
// =============================================================================
// Build generator polynomial: g(x) = (x - alpha^0)(x - alpha^1)...(x - alpha^31)
static void init_generator(void) {
// Start with g(x) = 1
gf_generator[0] = 1;
for (int i = 1; i <= RS_PARITY_SIZE; i++) {
gf_generator[i] = 0;
}
// Multiply by (x - alpha^i) for i = 0 to 31
for (int i = 0; i < RS_PARITY_SIZE; i++) {
uint8_t alpha_i = gf_exp[i]; // alpha^i
// Multiply current polynomial by (x - alpha^i)
for (int j = RS_PARITY_SIZE; j > 0; j--) {
gf_generator[j] = gf_generator[j - 1] ^ gf_mul(gf_generator[j], alpha_i);
}
gf_generator[0] = gf_mul(gf_generator[0], alpha_i);
}
}
// =============================================================================
// Public API
// =============================================================================
void rs_init(void) {
if (rs_initialized) return;
init_gf_tables();
init_generator();
rs_initialized = 1;
}
size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!rs_initialized) rs_init();
// Validate input
if (data_len > RS_DATA_SIZE) {
data_len = RS_DATA_SIZE;
}
// Copy data to output
memcpy(output, data, data_len);
// Initialize parity bytes to zero
memset(output + data_len, 0, RS_PARITY_SIZE);
// Create padded message polynomial (RS_DATA_SIZE + RS_PARITY_SIZE coefficients)
// Message is shifted to leave room for parity (systematic encoding)
uint8_t msg[RS_BLOCK_SIZE];
memset(msg, 0, sizeof(msg));
memcpy(msg, data, data_len);
// Polynomial division: compute remainder of msg(x) * x^32 / g(x)
uint8_t remainder[RS_PARITY_SIZE];
memset(remainder, 0, RS_PARITY_SIZE);
for (size_t i = 0; i < data_len; i++) {
uint8_t coef = msg[i] ^ remainder[0];
// Shift remainder
memmove(remainder, remainder + 1, RS_PARITY_SIZE - 1);
remainder[RS_PARITY_SIZE - 1] = 0;
// Subtract coef * g(x) from remainder
if (coef != 0) {
for (int j = 0; j < RS_PARITY_SIZE; j++) {
remainder[j] ^= gf_mul(gf_generator[RS_PARITY_SIZE - 1 - j], coef);
}
}
}
// Append parity to output
memcpy(output + data_len, remainder, RS_PARITY_SIZE);
return data_len + RS_PARITY_SIZE;
}
// =============================================================================
// Berlekamp-Massey Decoder
// =============================================================================
// Compute syndromes S_i = r(alpha^i) for i = 0..31
static void compute_syndromes(const uint8_t *r, size_t len, uint8_t *syndromes) {
for (int i = 0; i < RS_PARITY_SIZE; i++) {
syndromes[i] = 0;
for (size_t j = 0; j < len; j++) {
syndromes[i] ^= gf_mul(r[j], gf_pow(gf_exp[i], (int)(len - 1 - j)));
}
}
}
// Berlekamp-Massey algorithm to find error locator polynomial
static int berlekamp_massey(const uint8_t *syndromes, uint8_t *sigma, int *sigma_deg) {
uint8_t C[RS_PARITY_SIZE + 1]; // Connection polynomial
uint8_t B[RS_PARITY_SIZE + 1]; // Previous connection polynomial
int L = 0; // Current length of LFSR
int m = 1; // Number of steps since last update
uint8_t b = 1; // Previous discrepancy
// Initialize: C(x) = 1, B(x) = 1
memset(C, 0, sizeof(C));
memset(B, 0, sizeof(B));
C[0] = 1;
B[0] = 1;
for (int n = 0; n < RS_PARITY_SIZE; n++) {
// Compute discrepancy
uint8_t d = syndromes[n];
for (int i = 1; i <= L; i++) {
d ^= gf_mul(C[i], syndromes[n - i]);
}
if (d == 0) {
// No update needed
m++;
} else if (2 * L <= n) {
// Update both C and L
uint8_t T[RS_PARITY_SIZE + 1];
memcpy(T, C, sizeof(T));
uint8_t factor = gf_div(d, b);
for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
C[i + m] ^= gf_mul(factor, B[i]);
}
L = n + 1 - L;
memcpy(B, T, sizeof(B));
b = d;
m = 1;
} else {
// Only update C
uint8_t factor = gf_div(d, b);
for (int i = 0; i <= RS_PARITY_SIZE - m; i++) {
C[i + m] ^= gf_mul(factor, B[i]);
}
m++;
}
}
// Copy result
memcpy(sigma, C, RS_PARITY_SIZE + 1);
*sigma_deg = L;
return L;
}
// Chien search: find error positions (roots of sigma)
static int chien_search(const uint8_t *sigma, int sigma_deg, size_t n, uint8_t *positions, int *num_errors) {
*num_errors = 0;
// Evaluate sigma(alpha^(-i)) for i = 0 to n-1
for (size_t i = 0; i < n; i++) {
uint8_t eval = 0;
for (int j = 0; j <= sigma_deg; j++) {
// sigma(alpha^(-i)) = sum of sigma[j] * alpha^(-i*j)
int exp = (GF_MAX - (int)((i * j) % GF_MAX)) % GF_MAX;
eval ^= gf_mul(sigma[j], gf_exp[exp]);
}
if (eval == 0) {
// Found a root - error at position n-1-i
positions[*num_errors] = (uint8_t)(n - 1 - i);
(*num_errors)++;
}
}
// Check if we found the expected number of errors
return (*num_errors == sigma_deg) ? 0 : -1;
}
// Compute formal derivative of polynomial
static void poly_derivative(const uint8_t *poly, int deg, uint8_t *deriv) {
for (int i = 0; i < deg; i++) {
// Derivative of x^(i+1) is (i+1) * x^i
// In GF(2^m), coefficient is 1 if (i+1) is odd, 0 if even
deriv[i] = ((i + 1) & 1) ? poly[i + 1] : 0;
}
}
// Forney algorithm: compute error values
static void forney(const uint8_t *syndromes, const uint8_t *sigma, int sigma_deg,
const uint8_t *positions, int num_errors, size_t n, uint8_t *errors) {
// Compute error evaluator polynomial omega(x) = S(x) * sigma(x) mod x^2t
uint8_t omega[RS_PARITY_SIZE + 1];
memset(omega, 0, sizeof(omega));
for (int i = 0; i < RS_PARITY_SIZE; i++) {
for (int j = 0; j <= sigma_deg && i - j >= 0; j++) {
omega[i] ^= gf_mul(syndromes[i - j], sigma[j]);
}
}
// Compute formal derivative of sigma
uint8_t sigma_prime[RS_PARITY_SIZE];
poly_derivative(sigma, sigma_deg, sigma_prime);
// Compute error values using Forney formula
for (int i = 0; i < num_errors; i++) {
uint8_t pos = positions[i];
uint8_t Xi = gf_exp[n - 1 - pos]; // alpha^(n-1-pos)
uint8_t Xi_inv = gf_inv(Xi);
// Evaluate omega at Xi_inv
uint8_t omega_val = 0;
for (int j = 0; j < RS_PARITY_SIZE; j++) {
omega_val ^= gf_mul(omega[j], gf_pow(Xi_inv, j));
}
// Evaluate sigma' at Xi_inv
uint8_t sigma_prime_val = 0;
for (int j = 0; j < sigma_deg; j++) {
sigma_prime_val ^= gf_mul(sigma_prime[j], gf_pow(Xi_inv, j));
}
// Error value: e_i = Xi * omega(Xi_inv) / sigma'(Xi_inv)
errors[i] = gf_mul(Xi, gf_div(omega_val, sigma_prime_val));
}
}
int rs_decode(uint8_t *data, size_t data_len) {
if (!rs_initialized) rs_init();
size_t total_len = data_len + RS_PARITY_SIZE;
if (total_len > RS_BLOCK_SIZE) {
return -1;
}
// Compute syndromes
uint8_t syndromes[RS_PARITY_SIZE];
compute_syndromes(data, total_len, syndromes);
// Check if all syndromes are zero (no errors)
int has_errors = 0;
for (int i = 0; i < RS_PARITY_SIZE; i++) {
if (syndromes[i] != 0) {
has_errors = 1;
break;
}
}
if (!has_errors) {
return 0; // No errors
}
// Find error locator polynomial using Berlekamp-Massey
uint8_t sigma[RS_PARITY_SIZE + 1];
int sigma_deg;
int num_errors_expected = berlekamp_massey(syndromes, sigma, &sigma_deg);
if (num_errors_expected > RS_MAX_ERRORS) {
return -1; // Too many errors
}
// Find error positions using Chien search
uint8_t positions[RS_MAX_ERRORS];
int num_errors;
if (chien_search(sigma, sigma_deg, total_len, positions, &num_errors) != 0) {
return -1; // Inconsistent error count
}
// Compute error values using Forney algorithm
uint8_t error_values[RS_MAX_ERRORS];
forney(syndromes, sigma, sigma_deg, positions, num_errors, total_len, error_values);
// Apply corrections
for (int i = 0; i < num_errors; i++) {
if (positions[i] < total_len) {
data[positions[i]] ^= error_values[i];
}
}
return num_errors;
}
// =============================================================================
// Block-level operations
// =============================================================================
size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output) {
if (!rs_initialized) rs_init();
size_t output_len = 0;
size_t remaining = data_len;
const uint8_t *src = data;
uint8_t *dst = output;
while (remaining > 0) {
size_t block_data = (remaining > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining;
size_t encoded_len = rs_encode(src, block_data, dst);
// Pad to full block size for consistent block boundaries
if (encoded_len < RS_BLOCK_SIZE) {
memset(dst + encoded_len, 0, RS_BLOCK_SIZE - encoded_len);
}
src += block_data;
dst += RS_BLOCK_SIZE;
output_len += RS_BLOCK_SIZE;
remaining -= block_data;
}
return output_len;
}
int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len) {
if (!rs_initialized) rs_init();
int total_errors = 0;
size_t remaining_output = output_len;
uint8_t *src = data;
uint8_t *dst = output;
while (total_len >= RS_BLOCK_SIZE && remaining_output > 0) {
// Always decode with full RS_DATA_SIZE since encoder pads to full blocks
// But only copy the bytes we actually need
size_t bytes_to_copy = (remaining_output > RS_DATA_SIZE) ? RS_DATA_SIZE : remaining_output;
// Decode block with full data size (modifies src in place)
int errors = rs_decode(src, RS_DATA_SIZE);
if (errors < 0) {
return -1; // Uncorrectable block
}
total_errors += errors;
// Copy only the bytes we need to output
memcpy(dst, src, bytes_to_copy);
src += RS_BLOCK_SIZE;
dst += bytes_to_copy;
total_len -= RS_BLOCK_SIZE;
remaining_output -= bytes_to_copy;
}
return total_errors;
}

View File

@@ -0,0 +1,82 @@
/**
* Reed-Solomon (255,223) Codec for TAV-DT
*
* Standard RS code over GF(2^8):
* - Block size: 255 bytes (223 data + 32 parity)
* - Error correction: up to 16 byte errors
* - Error detection: up to 32 byte errors
*
* Uses primitive polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x11D)
* Generator polynomial: g(x) = product of (x - alpha^i) for i = 0..31
*
* Created by CuriousTorvald and Claude on 2025-12-09.
*/
#ifndef REED_SOLOMON_H
#define REED_SOLOMON_H
#include <stdint.h>
#include <stddef.h>
// RS(255,223) parameters
#define RS_BLOCK_SIZE 255 // Total codeword size
#define RS_DATA_SIZE 223 // Data bytes per block
#define RS_PARITY_SIZE 32 // Parity bytes per block (2t = 32, t = 16)
#define RS_MAX_ERRORS 16 // Maximum correctable errors (t)
/**
* Initialize Reed-Solomon codec.
* Must be called once before using encode/decode functions.
* Thread-safe: uses static initialization.
*/
void rs_init(void);
/**
* Encode data block with Reed-Solomon parity.
*
* @param data Input data (up to RS_DATA_SIZE bytes)
* @param data_len Length of input data (1 to RS_DATA_SIZE)
* @param output Output buffer (must hold data_len + RS_PARITY_SIZE bytes)
* Format: [data][parity]
* @return Total output length (data_len + RS_PARITY_SIZE)
*
* Note: For data shorter than RS_DATA_SIZE, the encoder pads with zeros
* internally but only outputs actual data + parity.
*/
size_t rs_encode(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode and correct Reed-Solomon encoded block.
*
* @param data Buffer containing [data][parity] (modified in-place)
* @param data_len Length of data portion (1 to RS_DATA_SIZE)
* @return Number of errors corrected (0-16), or -1 if uncorrectable
*
* On success, data buffer contains corrected data (parity may also be corrected).
* On failure, data buffer contents are undefined.
*/
int rs_decode(uint8_t *data, size_t data_len);
/**
* Encode data with automatic block splitting.
* For data larger than RS_DATA_SIZE, splits into multiple RS blocks.
*
* @param data Input data
* @param data_len Length of input data
* @param output Output buffer (must hold ceil(data_len/223) * 255 bytes)
* @return Total output length
*/
size_t rs_encode_blocks(const uint8_t *data, size_t data_len, uint8_t *output);
/**
* Decode data with automatic block splitting.
*
* @param data Buffer containing RS-encoded blocks (modified in-place)
* @param total_len Total length of encoded data (multiple of RS_BLOCK_SIZE)
* @param output Output buffer for decoded data
* @param output_len Expected length of decoded data
* @return Total errors corrected across all blocks, or -1 if any block failed
*/
int rs_decode_blocks(uint8_t *data, size_t total_len, uint8_t *output, size_t output_len);
#endif // REED_SOLOMON_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,354 @@
# libtavenc - TAV Video Encoder Library
**libtavenc** is a high-performance video encoding library implementing the TSVM Advanced Video (TAV) codec. It provides a clean C API for encoding RGB24 video frames using discrete wavelet transform (DWT) with perceptual quantization and GOP-based temporal compression.
## Features
- **Multiple Wavelet Types**: CDF 5/3, CDF 9/7, CDF 13/7, DD-4, Haar
- **3D DWT GOP Encoding**: Temporal + spatial wavelet compression
- **Perceptual Quantization**: HVS-optimized coefficient scaling
- **EZBC Entropy Coding**: Efficient coefficient compression with Zstd
- **Multi-threading**: Internal thread pool for optimal performance
- **Color Spaces**: YCoCg-R (default) and ICtCp (for HDR)
- **Quality Levels**: 0-5 (0=lowest/smallest, 5=highest/largest)
## Building
```bash
# Build static library
make lib/libtavenc.a
# Build with encoder CLI
make encoder_tav
# Install library and headers
make install-libs PREFIX=/usr/local
```
## Quick Start
### Basic Encoding
```c
#include "tav_encoder_lib.h"
#include <stdio.h>
int main() {
// Initialize encoder parameters
tav_encoder_params_t params;
tav_encoder_params_init(&params, 1920, 1080);
// Configure encoding options
params.fps_num = 60;
params.fps_den = 1;
params.wavelet_type = 1; // CDF 9/7 (default)
params.quality_y = 3; // Quality level 3
params.quality_co = 3;
params.quality_cg = 3;
params.enable_temporal_dwt = 1; // Enable 3D GOP encoding
params.gop_size = 0; // Auto-calculate (typically 16-24)
params.num_threads = 4; // 4 worker threads
// Create encoder context
tav_encoder_context_t *ctx = tav_encoder_create(&params);
if (!ctx) {
fprintf(stderr, "Failed to create encoder\n");
return -1;
}
// Get actual parameters (with auto-calculated values)
tav_encoder_get_params(ctx, &params);
printf("GOP size: %d frames\n", params.gop_size);
// Encode frames
uint8_t *rgb_frame = /* ... load RGB24 frame ... */;
tav_encoder_packet_t *packet;
for (int i = 0; i < num_frames; i++) {
int result = tav_encoder_encode_frame(ctx, rgb_frame, i, &packet);
if (result == 1) {
// Packet ready (GOP completed)
fwrite(packet->data, 1, packet->size, outfile);
tav_encoder_free_packet(packet);
}
else if (result == 0) {
// Frame buffered, waiting for GOP to fill
}
else {
// Error
fprintf(stderr, "Encoding error: %s\n", tav_encoder_get_error(ctx));
break;
}
}
// Flush remaining frames
while (tav_encoder_flush(ctx, &packet) == 1) {
fwrite(packet->data, 1, packet->size, outfile);
tav_encoder_free_packet(packet);
}
// Cleanup
tav_encoder_free(ctx);
return 0;
}
```
### Stateless GOP Encoding (Multi-threaded)
The library provides `tav_encoder_encode_gop()` for stateless GOP encoding, perfect for multi-threaded applications:
```c
#include "tav_encoder_lib.h"
#include <pthread.h>
typedef struct {
tav_encoder_params_t params;
uint8_t **rgb_frames;
int num_frames;
int *frame_numbers;
tav_encoder_packet_t *output_packet;
} gop_encode_job_t;
void *encode_gop_thread(void *arg) {
gop_encode_job_t *job = (gop_encode_job_t *)arg;
// Create thread-local encoder context
tav_encoder_context_t *ctx = tav_encoder_create(&job->params);
if (!ctx) {
return NULL;
}
// Encode entire GOP at once (stateless, thread-safe)
tav_encoder_encode_gop(ctx,
(const uint8_t **)job->rgb_frames,
job->num_frames,
job->frame_numbers,
&job->output_packet);
tav_encoder_free(ctx);
return NULL;
}
int main() {
// Setup parameters
tav_encoder_params_t params;
tav_encoder_params_init(&params, 1920, 1080);
params.enable_temporal_dwt = 1;
params.gop_size = 24;
// Create worker threads
pthread_t threads[4];
gop_encode_job_t jobs[4];
for (int i = 0; i < 4; i++) {
jobs[i].params = params;
jobs[i].rgb_frames = /* ... load GOP frames ... */;
jobs[i].num_frames = 24;
jobs[i].frame_numbers = /* ... frame indices ... */;
pthread_create(&threads[i], NULL, encode_gop_thread, &jobs[i]);
}
// Wait for completion
for (int i = 0; i < 4; i++) {
pthread_join(threads[i], NULL);
// Write output packet
if (jobs[i].output_packet) {
fwrite(jobs[i].output_packet->data, 1,
jobs[i].output_packet->size, outfile);
tav_encoder_free_packet(jobs[i].output_packet);
}
}
return 0;
}
```
## API Reference
### Context Management
#### `tav_encoder_create()`
Creates encoder context with specified parameters. Allocates internal buffers and initializes thread pool if multi-threading enabled.
**Returns**: Encoder context or NULL on failure
#### `tav_encoder_free()`
Frees encoder context and all resources. Any unflushed GOP frames are lost.
#### `tav_encoder_get_error()`
Returns last error message string.
#### `tav_encoder_get_params()`
Gets encoder parameters with calculated values (e.g., auto-calculated GOP size, decomposition levels).
### Frame Encoding
#### `tav_encoder_encode_frame()`
Encodes single RGB24 frame. Frames are buffered until GOP is full.
**Parameters**:
- `rgb_frame`: RGB24 planar format `[R...][G...][B...]`, width×height×3 bytes
- `frame_pts`: Presentation timestamp (frame number or time)
- `packet`: Output packet pointer (NULL if GOP not ready)
**Returns**:
- `1`: Packet ready (GOP completed)
- `0`: Frame buffered, waiting for more frames
- `-1`: Error
#### `tav_encoder_flush()`
Flushes remaining buffered frames and encodes final GOP. Call at end of stream.
**Returns**:
- `1`: Packet ready
- `0`: No more packets
- `-1`: Error
#### `tav_encoder_encode_gop()`
Stateless GOP encoding. Thread-safe with separate contexts.
**Parameters**:
- `rgb_frames`: Array of RGB24 frames `[frame][width×height×3]`
- `num_frames`: Number of frames in GOP (1-24)
- `frame_numbers`: Frame indices for timecodes (can be NULL)
- `packet`: Output packet pointer
**Returns**: `1` on success, `-1` on error
### Packet Management
#### `tav_encoder_free_packet()`
Frees packet returned by encoding functions.
## Encoder Parameters
### Video Dimensions
- `width`, `height`: Frame dimensions (must be even)
- `fps_num`, `fps_den`: Framerate (e.g., 60/1 for 60fps)
### Wavelet Configuration
- `wavelet_type`: Spatial wavelet
- `0`: CDF 5/3 (reversible, lossless-capable)
- `1`: CDF 9/7 (default, best compression)
- `2`: CDF 13/7 (experimental)
- `16`: DD-4 (four-point interpolating)
- `255`: Haar (demonstration)
- `temporal_wavelet`: Temporal wavelet for 3D DWT
- `0`: Haar (default for sports/high motion)
- `1`: CDF 5/3 (smooth motion)
- `decomp_levels`: Spatial DWT levels (0=auto, typically 6)
- `temporal_levels`: Temporal DWT levels (0=auto, typically 2 for 8-frame GOPs)
### Color Space
- `channel_layout`:
- `0`: YCoCg-R (default, efficient chroma)
- `1`: ICtCp (for HDR/BT.2100 sources)
- `perceptual_tuning`: 1=enable HVS perceptual quantization (default), 0=uniform
### GOP Configuration
- `enable_temporal_dwt`: 1=enable 3D DWT GOP encoding (default), 0=intra-only I-frames
- `gop_size`: Frames per GOP (8, 16, or 24; 0=auto based on framerate)
- `enable_two_pass`: 1=enable two-pass with scene change detection (default), 0=single-pass
### Quality Control
- `quality_y`: Luma quality (0-5, default: 3)
- `quality_co`: Orange chrominance quality (0-5, default: 3)
- `quality_cg`: Green chrominance quality (0-5, default: 3)
- `dead_zone_threshold`: Dead-zone quantization (0=disabled, 1-10 typical)
### Entropy Coding
- `entropy_coder`:
- `0`: Twobitmap (default, fast)
- `1`: EZBC (better compression for high-quality)
- `zstd_level`: Zstd compression level (3-22, default: 7)
### Multi-threading
- `num_threads`: Worker threads
- `0`: Single-threaded (default for CLI)
- `-1`: Auto-detect CPU cores
- `1-16`: Explicit thread count
### Encoder Presets
- `encoder_preset`: Preset flags
- `0x01`: Sports mode (finer temporal quantization)
- `0x02`: Anime mode (disable grain)
## TAV Packet Types
Output packets have type field indicating content:
- `0x10`: I-frame (intra-only, single frame)
- `0x11`: P-frame (delta from previous)
- `0x12`: GOP unified (3D DWT, multiple frames)
- `0x24`: TAD audio (DWT-based audio codec)
- `0xF0`: Loop point start
- `0xFC`: GOP sync (frame count marker)
- `0xFD`: Timecode metadata
## Performance Notes
### Threading Model
- Library manages internal thread pool when `num_threads > 0`
- GOP encoding is parallelized across worker threads
- For CLI tools: use `num_threads=0` (single-threaded) to avoid double-threading with external parallelism
- For library integration: use `num_threads=-1` or explicit count for optimal performance
### Memory Usage
- Each encoder context allocates:
- GOP buffer: `gop_size × width × height × 3` bytes (RGB frames)
- DWT coefficients: `~width × height × 12` bytes per channel
- Thread pool: `num_threads × (GOP buffer + workspace)`
- Typical 1920×1080 encoder with GOP=24: ~180 MB per context
### Encoding Speed
- Single-threaded: 10-15 fps (1920×1080 on modern CPU)
- Multi-threaded (4 threads): 30-40 fps
- GOP size affects latency: larger GOP = higher latency, better compression
## Integration with TAD Audio
TAV files typically include TAD-compressed audio. Link with both libraries:
```c
#include "tav_encoder_lib.h"
#include "encoder_tad.h"
// Encode video frame
tav_encoder_encode_frame(video_ctx, rgb_frame, pts, &video_packet);
// Encode audio chunk (32kHz stereo, float samples)
tad32_encode_chunk(audio_ctx, pcm_samples, num_samples, &audio_data, &audio_size);
// Mux both into TAV file (interleave by frame PTS)
```
## Error Handling
All functions return error codes and set error message accessible via `tav_encoder_get_error()`:
```c
if (tav_encoder_encode_frame(ctx, frame, pts, &packet) < 0) {
fprintf(stderr, "Encoding failed: %s\n", tav_encoder_get_error(ctx));
// Handle error
}
```
## Limitations
- Maximum resolution: 8192×8192
- GOP size: 1-48 frames
- Single-tile encoding only (no spatial tiling)
- Requires even width and height
## License
Part of the TSVM project.
## See Also
- `include/tav_encoder_lib.h` - Complete API documentation
- `src/encoder_tav.c` - CLI reference implementation
- `lib/libtadenc/` - TAD audio encoder library

View File

@@ -0,0 +1,255 @@
/**
* TAV Encoder - Color Space Conversion Library
*
* Provides RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions
* for the TSVM Advanced Video (TAV) encoder.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// =============================================================================
// Utility Functions
// =============================================================================
static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
static inline int iround(double v) {
return (int)floor(v + 0.5);
}
// =============================================================================
// sRGB Gamma Helpers
// =============================================================================
static inline double srgb_linearise(double val) {
if (val <= 0.04045) return val / 12.92;
return pow((val + 0.055) / 1.055, 2.4);
}
static inline double srgb_unlinearise(double val) {
if (val <= 0.0031308) return 12.92 * val;
return 1.055 * pow(val, 1.0/2.4) - 0.055;
}
// =============================================================================
// HLG (Hybrid Log-Gamma) Transfer Functions
// =============================================================================
static inline double HLG_OETF(double E) {
const double a = 0.17883277;
const double b = 0.28466892; // 1 - 4*a
const double c = 0.55991073; // 0.5 - a*ln(4*a)
if (E <= 1.0/12.0) return sqrt(3.0 * E);
return a * log(12.0 * E - b) + c;
}
static inline double HLG_EOTF(double Ep) {
const double a = 0.17883277;
const double b = 0.28466892;
const double c = 0.55991073;
if (Ep <= 0.5) {
double val = Ep * Ep / 3.0;
return val;
}
double val = (exp((Ep - c) / a) + b) / 12.0;
return val;
}
// =============================================================================
// Color Space Transformation Matrices
// =============================================================================
// BT.2100 RGB -> LMS matrix
static const double M_RGB_TO_LMS[3][3] = {
{1688.0/4096, 2146.0/4096, 262.0/4096},
{ 683.0/4096, 2951.0/4096, 462.0/4096},
{ 99.0/4096, 309.0/4096, 3688.0/4096}
};
// LMS -> RGB inverse matrix
static const double M_LMS_TO_RGB[3][3] = {
{ 6.1723815689243215, -5.319534979827695, 0.14699442094633924},
{-1.3243428148026244, 2.560286104841917, -0.2359203727576164},
{-0.011819739235953752, -0.26473549971186555, 1.2767952602537955}
};
// ICtCp matrix (L' M' S' -> I Ct Cp) - BT.2100 constants
static const double M_LMSPRIME_TO_ICTCP[3][3] = {
{ 2048.0/4096.0, 2048.0/4096.0, 0.0 },
{ 3625.0/4096.0, -7465.0/4096.0, 3840.0/4096.0 },
{ 9500.0/4096.0, -9212.0/4096.0, -288.0/4096.0 }
};
// ICtCp -> L' M' S' inverse matrix
static const double M_ICTCP_TO_LMSPRIME[3][3] = {
{ 1.0, 0.015718580108730416, 0.2095810681164055 },
{ 1.0, -0.015718580108730416, -0.20958106811640548},
{ 1.0, 1.0212710798422344, -0.6052744909924316 }
};
// =============================================================================
// YCoCg-R Color Space Conversion
// =============================================================================
/**
* Convert RGB24 to YCoCg-R color space for a full frame.
*
* YCoCg-R is a reversible color transform optimized for compression:
* - Y = luma (G + (R-B)/2)
* - Co = orange chrominance (R - B)
* - Cg = green chrominance (G - (R+B)/2)
*
* @param rgb Input RGB24 data (planar: RRRR...GGGG...BBBB...)
* @param y Output luma channel
* @param co Output orange chrominance
* @param cg Output green chrominance
* @param width Frame width
* @param height Frame height
*/
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
int width, int height)
{
const int total_pixels = width * height;
// Process 4 pixels at a time for better cache utilization
int i = 0;
const int simd_end = (total_pixels / 4) * 4;
// Vectorized processing for groups of 4 pixels
for (i = 0; i < simd_end; i += 4) {
const uint8_t *rgb_ptr = &rgb[i * 3];
// Process 4 pixels simultaneously with loop unrolling
for (int j = 0; j < 4; j++) {
const int idx = i + j;
const float r = rgb_ptr[j * 3 + 0];
const float g = rgb_ptr[j * 3 + 1];
const float b = rgb_ptr[j * 3 + 2];
// YCoCg-R transform
co[idx] = r - b;
const float tmp = b + co[idx] * 0.5f;
cg[idx] = g - tmp;
y[idx] = tmp + cg[idx] * 0.5f;
}
}
// Handle remaining pixels (1-3 pixels)
for (; i < total_pixels; i++) {
const float r = rgb[i * 3 + 0];
const float g = rgb[i * 3 + 1];
const float b = rgb[i * 3 + 2];
co[i] = r - b;
const float tmp = b + co[i] * 0.5f;
cg[i] = g - tmp;
y[i] = tmp + cg[i] * 0.5f;
}
}
// =============================================================================
// ICtCp Color Space Conversion (HDR-capable)
// =============================================================================
/**
* Convert sRGB8 to ICtCp color space using HLG transfer function.
*
* ICtCp is a perceptually uniform color space designed for HDR content:
* - I = intensity (luma)
* - Ct = tritanope (blue-yellow)
* - Cp = protanope (red-green)
*
* Uses BT.2100 ICtCp with HLG OETF for better perceptual uniformity.
*
* @param r8 Input red component (0-255)
* @param g8 Input green component (0-255)
* @param b8 Input blue component (0-255)
* @param out_I Output intensity (0-255)
* @param out_Ct Output tritanope (0-255, centered at 127.5)
* @param out_Cp Output protanope (0-255, centered at 127.5)
*/
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
double *out_I, double *out_Ct, double *out_Cp)
{
// 1) Linearize sRGB to 0..1
double r = srgb_linearise((double)r8 / 255.0);
double g = srgb_linearise((double)g8 / 255.0);
double b = srgb_linearise((double)b8 / 255.0);
// 2) Linear RGB -> LMS (3x3 multiply)
double L = M_RGB_TO_LMS[0][0]*r + M_RGB_TO_LMS[0][1]*g + M_RGB_TO_LMS[0][2]*b;
double M = M_RGB_TO_LMS[1][0]*r + M_RGB_TO_LMS[1][1]*g + M_RGB_TO_LMS[1][2]*b;
double S = M_RGB_TO_LMS[2][0]*r + M_RGB_TO_LMS[2][1]*g + M_RGB_TO_LMS[2][2]*b;
// 3) Apply HLG OETF (Hybrid Log-Gamma)
double Lp = HLG_OETF(L);
double Mp = HLG_OETF(M);
double Sp = HLG_OETF(S);
// 4) L'M'S' -> ICtCp
double I = M_LMSPRIME_TO_ICTCP[0][0]*Lp + M_LMSPRIME_TO_ICTCP[0][1]*Mp + M_LMSPRIME_TO_ICTCP[0][2]*Sp;
double Ct = M_LMSPRIME_TO_ICTCP[1][0]*Lp + M_LMSPRIME_TO_ICTCP[1][1]*Mp + M_LMSPRIME_TO_ICTCP[1][2]*Sp;
double Cp = M_LMSPRIME_TO_ICTCP[2][0]*Lp + M_LMSPRIME_TO_ICTCP[2][1]*Mp + M_LMSPRIME_TO_ICTCP[2][2]*Sp;
// 5) Scale and offset to 0-255 range
*out_I = FCLAMP(I * 255.0, 0.0, 255.0);
*out_Ct = FCLAMP(Ct * 255.0 + 127.5, 0.0, 255.0);
*out_Cp = FCLAMP(Cp * 255.0 + 127.5, 0.0, 255.0);
}
/**
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
*
* @param I8 Input intensity (0-255)
* @param Ct8 Input tritanope (0-255, centered at 127.5)
* @param Cp8 Input protanope (0-255, centered at 127.5)
* @param r8 Output red component (0-255)
* @param g8 Output green component (0-255)
* @param b8 Output blue component (0-255)
*/
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
uint8_t *r8, uint8_t *g8, uint8_t *b8)
{
// 1) Denormalize from 0-255 range
double I = I8 / 255.0;
double Ct = (Ct8 - 127.5) / 255.0;
double Cp = (Cp8 - 127.5) / 255.0;
// 2) ICtCp -> L' M' S' (3x3 inverse multiply)
double Lp = M_ICTCP_TO_LMSPRIME[0][0]*I + M_ICTCP_TO_LMSPRIME[0][1]*Ct + M_ICTCP_TO_LMSPRIME[0][2]*Cp;
double Mp = M_ICTCP_TO_LMSPRIME[1][0]*I + M_ICTCP_TO_LMSPRIME[1][1]*Ct + M_ICTCP_TO_LMSPRIME[1][2]*Cp;
double Sp = M_ICTCP_TO_LMSPRIME[2][0]*I + M_ICTCP_TO_LMSPRIME[2][1]*Ct + M_ICTCP_TO_LMSPRIME[2][2]*Cp;
// 3) Apply HLG inverse EOTF
double L = HLG_EOTF(Lp);
double M = HLG_EOTF(Mp);
double S = HLG_EOTF(Sp);
// 4) LMS -> linear sRGB (3x3 inverse multiply)
double r_lin = M_LMS_TO_RGB[0][0]*L + M_LMS_TO_RGB[0][1]*M + M_LMS_TO_RGB[0][2]*S;
double g_lin = M_LMS_TO_RGB[1][0]*L + M_LMS_TO_RGB[1][1]*M + M_LMS_TO_RGB[1][2]*S;
double b_lin = M_LMS_TO_RGB[2][0]*L + M_LMS_TO_RGB[2][1]*M + M_LMS_TO_RGB[2][2]*S;
// 5) Apply sRGB gamma and convert to 0-255 with rounding
double r = srgb_unlinearise(r_lin);
double g = srgb_unlinearise(g_lin);
double b = srgb_unlinearise(b_lin);
*r8 = (uint8_t)iround(FCLAMP(r * 255.0, 0.0, 255.0));
*g8 = (uint8_t)iround(FCLAMP(g * 255.0, 0.0, 255.0));
*b8 = (uint8_t)iround(FCLAMP(b * 255.0, 0.0, 255.0));
}

View File

@@ -0,0 +1,67 @@
/**
* TAV Encoder - Color Space Conversion Library
*
* Public API for RGB <-> YCoCg-R and RGB <-> ICtCp color space conversions.
*/
#ifndef TAV_ENCODER_COLOR_H
#define TAV_ENCODER_COLOR_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// YCoCg-R Color Space Conversion
// =============================================================================
/**
* Convert RGB24 to YCoCg-R color space for a full frame.
*
* @param rgb Input RGB24 data (interleaved: RGBRGBRGB...)
* @param y Output luma channel
* @param co Output orange chrominance
* @param cg Output green chrominance
* @param width Frame width
* @param height Frame height
*/
void tav_rgb_to_ycocg(const uint8_t *rgb, float *y, float *co, float *cg,
int width, int height);
// =============================================================================
// ICtCp Color Space Conversion (HDR-capable)
// =============================================================================
/**
* Convert sRGB8 to ICtCp color space using HLG transfer function.
*
* @param r8 Input red component (0-255)
* @param g8 Input green component (0-255)
* @param b8 Input blue component (0-255)
* @param out_I Output intensity (0-255)
* @param out_Ct Output tritanope (0-255, centered at 127.5)
* @param out_Cp Output protanope (0-255, centered at 127.5)
*/
void tav_srgb8_to_ictcp_hlg(uint8_t r8, uint8_t g8, uint8_t b8,
double *out_I, double *out_Ct, double *out_Cp);
/**
* Convert ICtCp back to sRGB8 using HLG inverse transfer function.
*
* @param I8 Input intensity (0-255)
* @param Ct8 Input tritanope (0-255, centered at 127.5)
* @param Cp8 Input protanope (0-255, centered at 127.5)
* @param r8 Output red component (0-255)
* @param g8 Output green component (0-255)
* @param b8 Output blue component (0-255)
*/
void tav_ictcp_hlg_to_srgb8(double I8, double Ct8, double Cp8,
uint8_t *r8, uint8_t *g8, uint8_t *b8);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_COLOR_H

View File

@@ -0,0 +1,619 @@
/**
* TAV Encoder - Discrete Wavelet Transform (DWT) Library
*
* Provides multi-resolution wavelet decomposition for video compression.
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, and Haar.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// =============================================================================
// Wavelet Type Constants
// =============================================================================
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 - Lossless capable
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 - Higher compression (default)
#define WAVELET_BIORTHOGONAL_13_7 2 // Biorthogonal 13/7
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
#define WAVELET_HAAR 255 // Haar - Simplest wavelet
// =============================================================================
// 1D Forward DWT Transforms
// =============================================================================
/**
* CDF 5/3 reversible wavelet forward 1D transform (lossless capable).
*
* Uses lifting scheme with predict and update steps.
* Output layout: [LL...LL, HH...HH] (low-pass, then high-pass)
*
* @param data In/out signal data (modified in-place)
* @param length Signal length (handles non-power-of-2)
*/
static void dwt_53_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = calloc(length, sizeof(float));
int half = (length + 1) / 2;
// Predict step (high-pass)
for (int i = 0; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float pred = 0.5f * (data[2 * i] + (2 * i + 2 < length ? data[2 * i + 2] : data[2 * i]));
temp[half + i] = data[idx] - pred;
}
}
// Update step (low-pass)
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] = data[2 * i] + update;
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* CDF 9/7 irreversible wavelet forward 1D transform (JPEG 2000 standard).
*
* Five-step lifting scheme with scaling for optimal compression.
* Output layout: [LL...LL, HH...HH]
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_97_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i]; // Even (low)
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1]; // Odd (high)
}
// JPEG2000 9/7 lifting coefficients
const float alpha = -1.586134342f;
const float beta = -0.052980118f;
const float gamma = 0.882911076f;
const float delta = 0.443506852f;
const float K = 1.230174105f;
// Step 1: Predict α
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += alpha * (s_curr + s_next);
}
}
// Step 2: Update β
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += beta * (d_prev + d_curr);
}
// Step 3: Predict γ
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
float s_curr = temp[i];
float s_next = (i + 1 < half) ? temp[i + 1] : s_curr;
temp[half + i] += gamma * (s_curr + s_next);
}
}
// Step 4: Update δ
for (int i = 0; i < half; i++) {
float d_curr = (half + i < length) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && half + i - 1 < length) ? temp[half + i - 1] : d_curr;
temp[i] += delta * (d_prev + d_curr);
}
// Step 5: Scaling
for (int i = 0; i < half; i++) {
temp[i] *= K;
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] /= K;
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* CDF 9/7 integer-reversible wavelet forward 1D (fixed-point lifting).
*
* Same structure as 9/7 irreversible but uses integer arithmetic.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_97_iint_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
for (int i = 0; i < half; ++i) temp[i] = data[2*i];
for (int i = 0; i < length/2; ++i) temp[half + i] = data[2*i + 1];
const int SHIFT = 16;
const int64_t ROUND = 1LL << (SHIFT - 1);
const int64_t A = -103949; // α
const int64_t B = -3472; // β
const int64_t G = 57862; // γ
const int64_t D = 29066; // δ
const int64_t K_FP = 80542; // ≈ 1.230174105 * 2^16
const int64_t Ki_FP = 53283; // ≈ (1/1.230174105) * 2^16
#define RN(x) (((x)>=0)?(((x)+ROUND)>>SHIFT):(-((-(x)+ROUND)>>SHIFT)))
// Predict α
for (int i = 0; i < length/2; ++i) {
int s = temp[i];
int sn = (i+1<half)? temp[i+1] : s;
temp[half+i] += RN(A * (int64_t)(s + sn));
}
// Update β
for (int i = 0; i < half; ++i) {
int d = (half+i<length)? temp[half+i]:0;
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
temp[i] += RN(B * (int64_t)(dp + d));
}
// Predict γ
for (int i = 0; i < length/2; ++i) {
int s = temp[i];
int sn = (i+1<half)? temp[i+1]:s;
temp[half+i] += RN(G * (int64_t)(s + sn));
}
// Update δ
for (int i = 0; i < half; ++i) {
int d = (half+i<length)? temp[half+i]:0;
int dp = (i>0 && half+i-1<length)? temp[half+i-1]:d;
temp[i] += RN(D * (int64_t)(dp + d));
}
// Scaling
for (int i = 0; i < half; ++i) {
temp[i] = (((int64_t)temp[i] * K_FP + ROUND) >> SHIFT);
}
for (int i = 0; i < length/2; ++i) {
if (half + i < length) {
temp[half + i] = (((int64_t)temp[half + i] * Ki_FP + ROUND) >> SHIFT);
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
#undef RN
}
/**
* Deslauriers-Dubuc 4-point interpolating wavelet forward 1D (DD-4).
*
* Uses four-sample prediction kernel: w[-1]=-1/16, w[0]=9/16, w[1]=9/16, w[2]=-1/16
* Good for smooth signals and still images.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_dd4_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Split into even/odd samples
for (int i = 0; i < half; i++) {
temp[i] = data[2 * i];
}
for (int i = 0; i < length / 2; i++) {
temp[half + i] = data[2 * i + 1];
}
// DD-4 prediction step with four-point kernel
for (int i = 0; i < length / 2; i++) {
// Get four neighbouring even samples with symmetric boundary extension
float s_m1, s_0, s_1, s_2;
s_m1 = (i > 0) ? temp[i - 1] : temp[0];
s_0 = temp[i];
s_1 = (i + 1 < half) ? temp[i + 1] : temp[half - 1];
s_2 = (i + 2 < half) ? temp[i + 2] : ((half > 1) ? temp[half - 2] : temp[half - 1]);
float prediction = (-1.0f/16.0f) * s_m1 + (9.0f/16.0f) * s_0 +
(9.0f/16.0f) * s_1 + (-1.0f/16.0f) * s_2;
temp[half + i] -= prediction;
}
// DD-4 update step
for (int i = 0; i < half; i++) {
float d_curr = (i < length / 2) ? temp[half + i] : 0.0f;
float d_prev = (i > 0 && i - 1 < length / 2) ? temp[half + i - 1] : 0.0f;
temp[i] += 0.25f * (d_prev + d_curr);
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* Biorthogonal 13/7 wavelet forward 1D.
*
* Analysis filters: Low-pass (13 taps), High-pass (7 taps)
* Simplified implementation using 5/3 structure with scaling.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_bior137_forward_1d(float *data, int length) {
if (length < 2) return;
const float K = 1.230174105f;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Predict step (high-pass)
for (int i = 0; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float left = data[2 * i];
float right = (2 * i + 2 < length) ? data[2 * i + 2] : data[2 * i];
float prediction = 0.5f * (left + right);
temp[half + i] = data[idx] - prediction;
}
}
// Update step (low-pass)
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] = data[2 * i] + update;
}
// Scaling
for (int i = 0; i < half; i++) {
temp[i] *= K;
}
for (int i = 0; i < length / 2; i++) {
if (half + i < length) {
temp[half + i] /= K;
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
/**
* Haar wavelet forward 1D transform.
*
* The simplest wavelet: averages (low-pass) and differences (high-pass).
* Useful for temporal DWT in GOPs.
*
* @param data In/out signal data
* @param length Signal length
*/
static void dwt_haar_forward_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
for (int i = 0; i < half; i++) {
if (2 * i + 1 < length) {
temp[i] = (data[2 * i] + data[2 * i + 1]) / 2.0f;
temp[half + i] = (data[2 * i] - data[2 * i + 1]) / 2.0f;
} else {
temp[i] = data[2 * i];
if (half + i < length) {
temp[half + i] = 0.0f;
}
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// =============================================================================
// 1D Inverse DWT Transforms
// =============================================================================
/**
* CDF 5/3 reversible wavelet inverse 1D transform.
*
* Reverses dwt_53_forward_1d() transform exactly.
*
* @param data In/out coefficient data
* @param length Signal length
*/
static void dwt_53_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Copy low-pass and high-pass coefficients
memcpy(temp, data, length * sizeof(float));
// Undo update step
for (int i = 0; i < half; i++) {
float update = 0.25f * ((i > 0 ? temp[half + i - 1] : 0) +
(i < half - 1 ? temp[half + i] : 0));
temp[i] -= update;
}
// Undo predict step
for (int i = 0; i < half; i++) {
int idx = 2 * i + 1;
if (idx < length) {
float pred = 0.5f * (temp[i] + ((i + 1 < half) ? temp[i + 1] : temp[i]));
data[2 * i] = temp[i];
data[idx] = temp[half + i] + pred;
} else {
data[2 * i] = temp[i];
}
}
free(temp);
}
/**
* Haar wavelet inverse 1D transform.
*
* Reverses dwt_haar_forward_1d() transform.
*
* @param data In/out coefficient data
* @param length Signal length
*/
static void dwt_haar_inverse_1d(float *data, int length) {
if (length < 2) return;
float *temp = malloc(length * sizeof(float));
int half = (length + 1) / 2;
// Reconstruct from averages and differences
for (int i = 0; i < half; i++) {
if (2 * i + 1 < length) {
temp[2 * i] = data[i] + data[half + i];
temp[2 * i + 1] = data[i] - data[half + i];
} else {
temp[2 * i] = data[i];
}
}
memcpy(data, temp, length * sizeof(float));
free(temp);
}
// =============================================================================
// 2D DWT Transform
// =============================================================================
/**
* Apply 2D forward DWT to a frame (in-place).
*
* Applies separable 1D transforms: horizontal (rows), then vertical (columns).
* Supports multi-level decomposition.
*
* @param data In/out 2D image data (row-major, width stride)
* @param width Image width
* @param height Image height
* @param levels Number of decomposition levels
* @param filter_type Wavelet type (WAVELET_* constant)
*/
void tav_dwt_2d_forward(float *data, int width, int height, int levels, int filter_type) {
const int max_size = (width > height) ? width : height;
float *temp_row = malloc(max_size * sizeof(float));
float *temp_col = malloc(max_size * sizeof(float));
// Pre-calculate dimensions for each level
int *widths = malloc((levels + 1) * sizeof(int));
int *heights = malloc((levels + 1) * sizeof(int));
widths[0] = width;
heights[0] = height;
for (int i = 1; i <= levels; i++) {
widths[i] = (widths[i - 1] + 1) / 2;
heights[i] = (heights[i - 1] + 1) / 2;
}
// Apply multi-level decomposition
for (int level = 0; level < levels; level++) {
int current_width = widths[level];
int current_height = heights[level];
if (current_width < 1 || current_height < 1) break;
// Row transform (horizontal)
for (int y = 0; y < current_height; y++) {
// Extract row
for (int x = 0; x < current_width; x++) {
temp_row[x] = data[y * width + x];
}
// Apply 1D DWT
switch (filter_type) {
case WAVELET_5_3_REVERSIBLE:
dwt_53_forward_1d(temp_row, current_width);
break;
case WAVELET_9_7_IRREVERSIBLE:
dwt_97_forward_1d(temp_row, current_width);
break;
case WAVELET_BIORTHOGONAL_13_7:
dwt_bior137_forward_1d(temp_row, current_width);
break;
case WAVELET_DD4:
dwt_dd4_forward_1d(temp_row, current_width);
break;
case WAVELET_HAAR:
dwt_haar_forward_1d(temp_row, current_width);
break;
}
// Write back
for (int x = 0; x < current_width; x++) {
data[y * width + x] = temp_row[x];
}
}
// Column transform (vertical)
for (int x = 0; x < current_width; x++) {
// Extract column
for (int y = 0; y < current_height; y++) {
temp_col[y] = data[y * width + x];
}
// Apply 1D DWT
switch (filter_type) {
case WAVELET_5_3_REVERSIBLE:
dwt_53_forward_1d(temp_col, current_height);
break;
case WAVELET_9_7_IRREVERSIBLE:
dwt_97_forward_1d(temp_col, current_height);
break;
case WAVELET_BIORTHOGONAL_13_7:
dwt_bior137_forward_1d(temp_col, current_height);
break;
case WAVELET_DD4:
dwt_dd4_forward_1d(temp_col, current_height);
break;
case WAVELET_HAAR:
dwt_haar_forward_1d(temp_col, current_height);
break;
}
// Write back
for (int y = 0; y < current_height; y++) {
data[y * width + x] = temp_col[y];
}
}
}
free(widths);
free(heights);
free(temp_row);
free(temp_col);
}
// =============================================================================
// 3D DWT Transform (Temporal + Spatial)
// =============================================================================
/**
* Apply 3D forward DWT to a GOP (group of pictures).
*
* First applies temporal DWT across frames at each spatial location,
* then applies 2D spatial DWT to each resulting temporal subband.
*
* @param gop_data Array of frame pointers [num_frames][width*height]
* @param width Frame width
* @param height Frame height
* @param num_frames Number of frames in GOP
* @param spatial_levels Number of 2D spatial decomposition levels
* @param temporal_levels Number of 1D temporal decomposition levels
* @param spatial_filter Wavelet type for spatial transform
* @param temporal_filter Wavelet type for temporal transform (0=Haar, 1=5/3)
*/
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
int spatial_levels, int temporal_levels,
int spatial_filter, int temporal_filter) {
if (num_frames < 2 || width < 2 || height < 2) return;
float *temporal_line = malloc(num_frames * sizeof(float));
// Pre-calculate temporal lengths for non-power-of-2 GOPs
int *temporal_lengths = malloc((temporal_levels + 1) * sizeof(int));
temporal_lengths[0] = num_frames;
for (int i = 1; i <= temporal_levels; i++) {
temporal_lengths[i] = (temporal_lengths[i - 1] + 1) / 2;
}
// Step 1: Apply temporal DWT across frames
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int pixel_idx = y * width + x;
// Extract temporal signal
for (int t = 0; t < num_frames; t++) {
temporal_line[t] = gop_data[t][pixel_idx];
}
// Apply temporal DWT with multiple levels
for (int level = 0; level < temporal_levels; level++) {
int level_frames = temporal_lengths[level];
if (level_frames >= 2) {
if (temporal_filter == 255) {
// Haar temporal (default)
dwt_haar_forward_1d(temporal_line, level_frames);
} else if (temporal_filter == 0) {
// CDF 5/3 temporal
dwt_53_forward_1d(temporal_line, level_frames);
} else {
// Fallback to Haar for unsupported wavelets
dwt_haar_forward_1d(temporal_line, level_frames);
}
}
}
// Write back temporal coefficients
for (int t = 0; t < num_frames; t++) {
gop_data[t][pixel_idx] = temporal_line[t];
}
}
}
free(temporal_lengths);
free(temporal_line);
// Step 2: Apply 2D spatial DWT to each temporal subband
for (int t = 0; t < num_frames; t++) {
tav_dwt_2d_forward(gop_data[t], width, height, spatial_levels, spatial_filter);
}
}
// =============================================================================
// Utility Functions
// =============================================================================
/**
* Calculate recommended number of decomposition levels for given dimensions.
*
* @param width Image width
* @param height Image height
* @return Recommended number of levels (1-6)
*/
int tav_dwt_calculate_levels(int width, int height) {
int levels = 0;
int min_size = (width < height) ? width : height;
// Keep halving until we reach minimum size
while (min_size >= 32) {
min_size /= 2;
levels++;
}
// Cap at reasonable maximum
return (levels > 6) ? 6 : levels;
}

View File

@@ -0,0 +1,88 @@
/**
* TAV Encoder - Discrete Wavelet Transform Library
*
* Public API for multi-resolution wavelet decomposition.
* Supports multiple wavelet types: CDF 5/3, 9/7, 13/7, DD-4, Haar
*/
#ifndef TAV_ENCODER_DWT_H
#define TAV_ENCODER_DWT_H
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// Wavelet Type Constants
// =============================================================================
#define WAVELET_5_3_REVERSIBLE 0 // CDF 5/3 reversible (lossless capable)
#define WAVELET_9_7_IRREVERSIBLE 1 // CDF 9/7 JPEG2000 (default, best compression)
#define WAVELET_BIORTHOGONAL_13_7 2 // CDF 13/7 experimental
#define WAVELET_DD4 16 // Deslauriers-Dubuc 4-point interpolating
#define WAVELET_HAAR 255 // Haar (demonstration only)
// =============================================================================
// 2D Discrete Wavelet Transform
// =============================================================================
/**
* Apply 2D wavelet transform to spatial data.
*
* Uses separable 1D transforms: apply horizontal rows, then vertical columns.
* Multi-level decomposition creates frequency subbands: LL, LH, HL, HH.
*
* @param data Input/output data array (modified in-place)
* @param width Frame width
* @param height Frame height
* @param levels Number of decomposition levels (0 = auto-calculate)
* @param filter_type Wavelet type (WAVELET_* constants)
*/
void tav_dwt_2d_forward(float *data, int width, int height,
int levels, int filter_type);
// =============================================================================
// 3D Discrete Wavelet Transform (GOP Temporal + Spatial)
// =============================================================================
/**
* Apply 3D wavelet transform to group-of-pictures (GOP).
*
* Process:
* 1. Apply temporal 1D DWT across frames at each spatial position
* 2. Apply spatial 2D DWT to each temporal subband frame
*
* @param gop_data Array of frame pointers [num_frames]
* @param width Frame width
* @param height Frame height
* @param num_frames Number of frames in GOP
* @param spatial_levels Spatial decomposition levels (0 = auto)
* @param temporal_levels Temporal decomposition levels
* @param spatial_filter Wavelet type for spatial transform
* @param temporal_filter Wavelet type for temporal transform
*/
void tav_dwt_3d_forward(float **gop_data, int width, int height, int num_frames,
int spatial_levels, int temporal_levels,
int spatial_filter, int temporal_filter);
// =============================================================================
// Utility Functions
// =============================================================================
/**
* Calculate optimal number of decomposition levels for given dimensions.
*
* Uses formula: floor(log2(min(width, height))) - 1
* Ensures at least 2x2 low-pass subband remains after decomposition.
*
* @param width Frame width
* @param height Frame height
* @return Recommended number of levels
*/
int tav_dwt_calculate_levels(int width, int height);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_DWT_H

View File

@@ -0,0 +1,415 @@
/**
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
*
* Implements binary tree embedded zero block coding for efficient storage
* of sparse wavelet coefficients. Exploits coefficient sparsity through
* hierarchical significance testing and progressive bitplane encoding.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
// =============================================================================
// EZBC Structures
// =============================================================================
/**
* Bitstream writer for bit-level encoding.
*/
typedef struct {
uint8_t *data;
size_t capacity;
size_t byte_pos;
uint8_t bit_pos; // 0-7, current bit position in current byte
} bitstream_t;
/**
* Block structure for EZBC quadtree decomposition.
*/
typedef struct {
int x, y; // Top-left position in 2D coefficient array
int width, height; // Block dimensions
} ezbc_block_t;
/**
* Queue for EZBC block processing.
*/
typedef struct {
ezbc_block_t *blocks;
size_t count;
size_t capacity;
} block_queue_t;
/**
* Track coefficient state for refinement.
*/
typedef struct {
bool significant; // Has been marked significant
int first_bitplane; // Bitplane where it became significant
} coeff_state_t;
/**
* EZBC encoding context for recursive processing.
*/
typedef struct {
bitstream_t *bs;
int16_t *coeffs;
coeff_state_t *states;
int width;
int height;
int bitplane;
int threshold;
block_queue_t *next_insignificant;
block_queue_t *next_significant;
int *sign_count;
} ezbc_context_t;
// =============================================================================
// Bitstream Operations
// =============================================================================
/**
* Initialize bitstream with initial capacity.
*/
static void bitstream_init(bitstream_t *bs, size_t initial_capacity) {
// Ensure minimum capacity to avoid issues with zero-size allocations
if (initial_capacity < 64) initial_capacity = 64;
bs->capacity = initial_capacity;
bs->data = calloc(1, initial_capacity);
if (!bs->data) {
fprintf(stderr, "ERROR: Failed to allocate bitstream buffer of size %zu\n", initial_capacity);
exit(1);
}
bs->byte_pos = 0;
bs->bit_pos = 0;
}
/**
* Write a single bit to bitstream.
*/
static void bitstream_write_bit(bitstream_t *bs, int bit) {
// Grow if needed
if (bs->byte_pos >= bs->capacity) {
size_t old_capacity = bs->capacity;
bs->capacity *= 2;
bs->data = realloc(bs->data, bs->capacity);
// Clear only the newly allocated memory region
memset(bs->data + old_capacity, 0, bs->capacity - old_capacity);
}
if (bit) {
bs->data[bs->byte_pos] |= (1 << bs->bit_pos);
}
bs->bit_pos++;
if (bs->bit_pos == 8) {
bs->bit_pos = 0;
bs->byte_pos++;
}
}
/**
* Write multiple bits to bitstream (LSB first).
*/
static void bitstream_write_bits(bitstream_t *bs, uint32_t value, int num_bits) {
for (int i = 0; i < num_bits; i++) {
bitstream_write_bit(bs, (value >> i) & 1);
}
}
/**
* Get current bitstream size in bytes.
*/
static size_t bitstream_size(bitstream_t *bs) {
return bs->byte_pos + (bs->bit_pos > 0 ? 1 : 0);
}
/**
* Free bitstream buffer.
*/
static void bitstream_free(bitstream_t *bs) {
free(bs->data);
}
// =============================================================================
// Block Queue Operations
// =============================================================================
/**
* Initialize block queue with initial capacity.
*/
static void queue_init(block_queue_t *q) {
q->capacity = 1024;
q->blocks = malloc(q->capacity * sizeof(ezbc_block_t));
q->count = 0;
}
/**
* Push block onto queue, growing if needed.
*/
static void queue_push(block_queue_t *q, ezbc_block_t block) {
if (q->count >= q->capacity) {
q->capacity *= 2;
q->blocks = realloc(q->blocks, q->capacity * sizeof(ezbc_block_t));
}
q->blocks[q->count++] = block;
}
/**
* Free block queue.
*/
static void queue_free(block_queue_t *q) {
free(q->blocks);
}
// =============================================================================
// EZBC Helper Functions
// =============================================================================
/**
* Check if all coefficients in block have |coeff| < threshold.
*/
static bool is_zero_block_ezbc(int16_t *coeffs, int width, int height,
const ezbc_block_t *block, int threshold) {
for (int y = block->y; y < block->y + block->height && y < height; y++) {
for (int x = block->x; x < block->x + block->width && x < width; x++) {
int idx = y * width + x;
if (abs(coeffs[idx]) >= threshold) {
return false;
}
}
}
return true;
}
/**
* Find maximum absolute value in coefficient array.
*/
static int find_max_abs_ezbc(int16_t *coeffs, size_t count) {
int max_abs = 0;
for (size_t i = 0; i < count; i++) {
int abs_val = abs(coeffs[i]);
if (abs_val > max_abs) {
max_abs = abs_val;
}
}
return max_abs;
}
/**
* Get MSB position (bitplane number).
* Returns floor(log2(value)), i.e., the position of the highest set bit.
*/
static int get_msb_bitplane(int value) {
if (value == 0) return 0;
int bitplane = 0;
while (value > 1) {
value >>= 1;
bitplane++;
}
return bitplane;
}
/**
* Recursively process a significant block - subdivide until 1x1.
*/
static void process_significant_block_recursive(ezbc_context_t *ctx, ezbc_block_t block) {
// If 1x1 block: emit sign bit and add to significant queue
if (block.width == 1 && block.height == 1) {
int idx = block.y * ctx->width + block.x;
bitstream_write_bit(ctx->bs, ctx->coeffs[idx] < 0 ? 1 : 0);
(*ctx->sign_count)++;
ctx->states[idx].significant = true;
ctx->states[idx].first_bitplane = ctx->bitplane;
queue_push(ctx->next_significant, block);
return;
}
// Block is > 1x1: subdivide into children and recursively process each
int mid_x = block.width / 2;
int mid_y = block.height / 2;
if (mid_x == 0) mid_x = 1;
if (mid_y == 0) mid_y = 1;
// Process top-left child
ezbc_block_t tl = {block.x, block.y, mid_x, mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tl, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1); // Significant
process_significant_block_recursive(ctx, tl);
} else {
bitstream_write_bit(ctx->bs, 0); // Insignificant
queue_push(ctx->next_insignificant, tl);
}
// Process top-right child (if exists)
if (block.width > mid_x) {
ezbc_block_t tr = {block.x + mid_x, block.y, block.width - mid_x, mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &tr, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1);
process_significant_block_recursive(ctx, tr);
} else {
bitstream_write_bit(ctx->bs, 0);
queue_push(ctx->next_insignificant, tr);
}
}
// Process bottom-left child (if exists)
if (block.height > mid_y) {
ezbc_block_t bl = {block.x, block.y + mid_y, mid_x, block.height - mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &bl, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1);
process_significant_block_recursive(ctx, bl);
} else {
bitstream_write_bit(ctx->bs, 0);
queue_push(ctx->next_insignificant, bl);
}
}
// Process bottom-right child (if exists)
if (block.width > mid_x && block.height > mid_y) {
ezbc_block_t br = {block.x + mid_x, block.y + mid_y, block.width - mid_x, block.height - mid_y};
if (!is_zero_block_ezbc(ctx->coeffs, ctx->width, ctx->height, &br, ctx->threshold)) {
bitstream_write_bit(ctx->bs, 1);
process_significant_block_recursive(ctx, br);
} else {
bitstream_write_bit(ctx->bs, 0);
queue_push(ctx->next_insignificant, br);
}
}
}
// =============================================================================
// Main EZBC Encoding Function
// =============================================================================
/**
* EZBC encoding for a single channel.
*
* Uses two separate queues for insignificant blocks and significant 1x1 blocks.
* Encodes coefficients progressively from MSB to LSB bitplane.
*
* Algorithm:
* 1. Find MSB bitplane from maximum absolute coefficient value
* 2. Write header: MSB bitplane, width, height
* 3. For each bitplane from MSB to 0:
* a. Process insignificant blocks: check if they become significant
* b. For newly significant blocks: recursively subdivide until 1x1
* c. Emit sign bits for newly significant 1x1 coefficients
* d. Process already-significant coefficients: emit refinement bits
* 4. Return encoded bitstream
*
* @param coeffs Input quantized coefficients (int16_t array)
* @param count Number of coefficients
* @param width Frame width
* @param height Frame height
* @param output Output buffer pointer (allocated by this function)
* @return Encoded size in bytes
*/
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
uint8_t **output) {
bitstream_t bs;
bitstream_init(&bs, count / 4); // Initial guess
// Track coefficient significance
coeff_state_t *states = calloc(count, sizeof(coeff_state_t));
// Find maximum value to determine MSB bitplane
int max_abs = find_max_abs_ezbc(coeffs, count);
int msb_bitplane = get_msb_bitplane(max_abs);
// Write header: MSB bitplane and dimensions
bitstream_write_bits(&bs, msb_bitplane, 8);
bitstream_write_bits(&bs, width, 16);
bitstream_write_bits(&bs, height, 16);
// Initialise two queues: insignificant blocks and significant 1x1 blocks
block_queue_t insignificant_queue, next_insignificant;
block_queue_t significant_queue, next_significant;
queue_init(&insignificant_queue);
queue_init(&next_insignificant);
queue_init(&significant_queue);
queue_init(&next_significant);
// Start with root block as insignificant
ezbc_block_t root = {0, 0, width, height};
queue_push(&insignificant_queue, root);
// Process bitplanes from MSB to LSB
for (int bitplane = msb_bitplane; bitplane >= 0; bitplane--) {
int threshold = 1 << bitplane;
int sign_bits_this_bitplane = 0;
// Process insignificant blocks - check if they become significant
for (size_t i = 0; i < insignificant_queue.count; i++) {
ezbc_block_t block = insignificant_queue.blocks[i];
// Check if this block has any coefficient >= threshold
if (is_zero_block_ezbc(coeffs, width, height, &block, threshold)) {
// Still insignificant: emit 0
bitstream_write_bit(&bs, 0);
// Keep in insignificant queue for next bitplane
queue_push(&next_insignificant, block);
} else {
// Became significant: emit 1
bitstream_write_bit(&bs, 1);
// Use recursive subdivision to process this block and all children
ezbc_context_t ctx = {
.bs = &bs,
.coeffs = coeffs,
.states = states,
.width = width,
.height = height,
.bitplane = bitplane,
.threshold = threshold,
.next_insignificant = &next_insignificant,
.next_significant = &next_significant,
.sign_count = &sign_bits_this_bitplane
};
process_significant_block_recursive(&ctx, block);
}
}
// Process significant 1x1 blocks - emit refinement bits
for (size_t i = 0; i < significant_queue.count; i++) {
ezbc_block_t block = significant_queue.blocks[i];
int idx = block.y * width + block.x;
int abs_val = abs(coeffs[idx]);
// Emit refinement bit at current bitplane
int bit = (abs_val >> bitplane) & 1;
bitstream_write_bit(&bs, bit);
// Keep in significant queue for next bitplane
queue_push(&next_significant, block);
}
// Swap queues for next bitplane
queue_free(&insignificant_queue);
queue_free(&significant_queue);
insignificant_queue = next_insignificant;
significant_queue = next_significant;
queue_init(&next_insignificant);
queue_init(&next_significant);
}
// Free all queues
queue_free(&insignificant_queue);
queue_free(&significant_queue);
queue_free(&next_insignificant);
queue_free(&next_significant);
free(states);
size_t final_size = bitstream_size(&bs);
*output = bs.data;
return final_size;
}

View File

@@ -0,0 +1,61 @@
/**
* TAV Encoder - EZBC (Embedded Zero Block Coding) Library
*
* Public API for EZBC entropy coding of wavelet coefficients.
*/
#ifndef TAV_ENCODER_EZBC_H
#define TAV_ENCODER_EZBC_H
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// EZBC Encoding
// =============================================================================
/**
* EZBC encoding for a single channel.
*
* Implements binary tree embedded zero block coding for efficient storage
* of sparse wavelet coefficients. Exploits coefficient sparsity through
* hierarchical significance testing and progressive bitplane encoding.
*
* Algorithm:
* 1. Find MSB bitplane from maximum absolute coefficient value
* 2. Write header: MSB bitplane (8 bits), width (16 bits), height (16 bits)
* 3. For each bitplane from MSB to 0:
* a. Process insignificant blocks: check if they become significant
* - Emit 0 if still insignificant, 1 if became significant
* b. For newly significant blocks: recursively subdivide until 1x1
* - Emit tree structure: 1=child is significant, 0=child insignificant
* c. Emit sign bits for newly significant 1x1 coefficients (1=negative, 0=positive)
* d. Process already-significant coefficients: emit refinement bits
* - Emit bit at current bitplane for progressive reconstruction
* 4. Return encoded bitstream
*
* Benefits:
* - Exploits coefficient sparsity (typical: 86.9% zeros in luma, 97.8% in chroma)
* - Progressive refinement from MSB to LSB
* - Spatial clustering through quadtree decomposition
* - No additional entropy coding needed (bitstream is already compressed)
*
* @param coeffs Input quantized coefficients (int16_t array)
* @param count Number of coefficients (width × height)
* @param width Frame width (must match coefficient array layout)
* @param height Frame height (must match coefficient array layout)
* @param output Output buffer pointer (allocated by this function, caller must free)
* @return Encoded size in bytes (including header)
*/
size_t tav_encode_channel_ezbc(int16_t *coeffs, size_t count, int width, int height,
uint8_t **output);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_EZBC_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,635 @@
/**
* TAV Encoder - Quantization Library
*
* Provides DWT coefficient quantization with perceptual weighting based on
* the Human Visual System (HVS). Implements separable 3D quantization for
* temporal GOP encoding.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// Forward declaration of encoder context (defined in main encoder)
typedef struct tav_encoder_s tav_encoder_t;
// =============================================================================
// Utility Functions
// =============================================================================
static inline int CLAMP(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
static inline float FCLAMP(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
// =============================================================================
// Constants for Perceptual Model
// =============================================================================
// Dead-zone quantization scaling factors (applied selectively to luma only)
#define DEAD_ZONE_FINEST_SCALE 1.0f // Full dead-zone for finest level
#define DEAD_ZONE_FINE_SCALE 0.5f // Reduced dead-zone for second-finest level
// Anisotropy parameters for horizontal vs vertical detail quantization
// Index by quality level (0-5)
static const float ANISOTROPY_MULT[] = {5.1f, 3.8f, 2.7f, 2.0f, 1.5f, 1.2f, 1.0f};
static const float ANISOTROPY_BIAS[] = {0.4f, 0.3f, 0.2f, 0.1f, 0.0f, 0.0f, 0.0f};
// Chroma-specific anisotropy (more aggressive quantization)
static const float ANISOTROPY_MULT_CHROMA[] = {7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f};
static const float ANISOTROPY_BIAS_CHROMA[] = {1.0f, 0.8f, 0.6f, 0.4f, 0.2f, 0.0f, 0.0f};
// Detail preservation factors for 2-pixel and 4-pixel structures
#define FOUR_PIXEL_DETAILER 0.88f
#define TWO_PIXEL_DETAILER 0.92f
// =============================================================================
// Subband Analysis Helper Functions
// =============================================================================
/**
* Get decomposition level for coefficient at 2D spatial position.
* Returns: level (1=finest to decomp_levels=coarsest, 0 for LL)
*/
static int get_subband_level_2d(int x, int y, int width, int height, int decomp_levels) {
// Recursively determine which level this coefficient belongs to
// by checking which quadrant it's in at each level
for (int level = 1; level <= decomp_levels; level++) {
int half_w = width >> 1;
int half_h = height >> 1;
// Check if in top-left quadrant (LL - contains finer levels)
if (x < half_w && y < half_h) {
// Continue to finer level
width = half_w;
height = half_h;
continue;
}
// In one of the detail bands (LH, HL, HH) at this level
return level;
}
// Reached LL subband at coarsest level
return 0;
}
/**
* Get subband type for coefficient at 2D spatial position.
* Returns: 0=LL, 1=LH, 2=HL, 3=HH
*/
static int get_subband_type_2d(int x, int y, int width, int height, int decomp_levels) {
// Recursively determine which subband this coefficient belongs to
for (int level = 1; level <= decomp_levels; level++) {
int half_w = width >> 1;
int half_h = height >> 1;
// Check if in top-left quadrant (LL - contains finer levels)
if (x < half_w && y < half_h) {
// Continue to finer level
width = half_w;
height = half_h;
continue;
}
// Determine which detail band at this level
if (x >= half_w && y < half_h) {
return 1; // LH (top-right)
} else if (x < half_w && y >= half_h) {
return 2; // HL (bottom-left)
} else {
return 3; // HH (bottom-right)
}
}
// Reached LL subband at coarsest level
return 0;
}
/**
* Legacy functions - convert linear index to 2D coords.
*/
static int get_subband_level(int linear_idx, int width, int height, int decomp_levels) {
int x = linear_idx % width;
int y = linear_idx / width;
return get_subband_level_2d(x, y, width, height, decomp_levels);
}
static int get_subband_type(int linear_idx, int width, int height, int decomp_levels) {
int x = linear_idx % width;
int y = linear_idx / width;
return get_subband_type_2d(x, y, width, height, decomp_levels);
}
/**
* Get temporal subband level for frame index in GOP.
* After temporal DWT with N levels, frames are organized as:
* - Frames 0...num_frames/(2^N) = tL...L (N low-passes, coarsest)
* - Remaining frames are temporal high-pass subbands at various levels
*
* Returns: 0 for coarsest (tLL), temporal_levels for finest (tHH)
*/
static int get_temporal_subband_level(int frame_idx, int num_frames, int temporal_levels) {
// Check each level boundary from coarsest to finest
for (int level = 0; level < temporal_levels; level++) {
int frames_at_this_level = num_frames >> (temporal_levels - level);
if (frame_idx < frames_at_this_level) {
return level;
}
}
// Finest level (first decomposition's high-pass)
return temporal_levels;
}
// =============================================================================
// Perceptual Model Functions (HVS-based weighting)
// =============================================================================
// Linear interpolation helper
static float lerp(float x, float y, float a) {
return x * (1.f - a) + y * a;
}
/**
* Perceptual model for LH subband (horizontal details).
* Human eyes are more sensitive to horizontal details than vertical.
* Curve: https://www.desmos.com/calculator/mjlpwqm8ge
*
* @param quality Quality level (0-5)
* @param level Normalized decomposition level (1.0-6.0)
* @return Perceptual weight multiplier
*/
static float perceptual_model3_LH(int quality, float level) {
float H4 = 1.2f;
float K = 2.f; // using fixed value for fixed curve; quantiser will scale it up anyway
float K12 = K * 12.f;
float x = level;
float Lx = H4 - ((K + 1.f) / 15.f) * (x - 4.f);
float C3 = -1.f / 45.f * (K12 + 92);
float G3x = (-x / 180.f) * (K12 + 5*x*x - 60*x + 252) - C3 + H4;
return (level >= 4) ? Lx : G3x;
}
/**
* Perceptual model for HL subband (vertical details).
* Derived from LH with anisotropy compensation.
*
* @param quality Quality level (0-5)
* @param LH LH subband weight
* @return Perceptual weight multiplier
*/
static float perceptual_model3_HL(int quality, float LH) {
return fmaf(LH, ANISOTROPY_MULT[quality], ANISOTROPY_BIAS[quality]);
}
/**
* Perceptual model for HH subband (diagonal details).
* Interpolates between LH and HL based on level.
*
* @param LH LH subband weight
* @param HL HL subband weight
* @param level Normalized decomposition level
* @return Perceptual weight multiplier
*/
static float perceptual_model3_HH(float LH, float HL, float level) {
float Kx = fmaf((sqrtf(level) - 1.f), 0.5f, 0.5f);
return lerp(LH, HL, Kx);
}
/**
* Perceptual model for LL subband (low-frequency baseband).
* Contains most image energy, preserve carefully.
*
* @param quality Quality level (0-5)
* @param level Normalized decomposition level
* @return Perceptual weight multiplier
*/
static float perceptual_model3_LL(int quality, float level) {
float n = perceptual_model3_LH(quality, level);
float m = perceptual_model3_LH(quality, level - 1) / n;
return n / m;
}
/**
* Chroma-specific perceptual model base curve.
* Less critical for human perception, more aggressive quantization.
*
* @param quality Quality level (0-5)
* @param level Normalized decomposition level
* @return Perceptual weight multiplier
*/
static float perceptual_model3_chroma_basecurve(int quality, float level) {
return 1.0f - (1.0f / (0.5f * quality * quality + 1.0f)) * (level - 4.0f);
}
/**
* Get perceptual weight for a specific subband and level.
* Implements HVS-optimized frequency weighting.
*
* NOTE: This function requires enc->quality_level field from encoder context.
*
* @param enc Encoder context (for quality_level)
* @param level0 Decomposition level (1-based: 1=finest, decomp_levels=coarsest)
* @param subband_type Subband type (0=LL, 1=LH, 2=HL, 3=HH)
* @param is_chroma 1 for chroma channels, 0 for luma
* @param max_levels Maximum decomposition levels
* @return Perceptual weight multiplier (≥1.0)
*/
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels);
/**
* Get perceptual weight for coefficient at linear index position.
* Maps linear coefficient index to DWT subband layout.
*
* NOTE: This function requires enc->widths[]/enc->heights[] arrays from encoder context.
*
* @param enc Encoder context (for widths/heights arrays and quality_level)
* @param linear_idx Linear coefficient index
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
* @return Perceptual weight multiplier (≥1.0)
*/
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma);
// =============================================================================
// Quantization Functions
// =============================================================================
/**
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
*
* This is the basic quantization function without perceptual weighting.
* Dead-zone quantization is applied selectively to luma channel only:
* - HH1 (finest diagonal): full dead-zone
* - LH1/HL1/HH2: half dead-zone
* - Coarser levels: no dead-zone (preserve structure)
*
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param quantiser Base quantizer value (1-4096)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma);
/**
* Quantize DWT coefficients with per-coefficient perceptual weighting.
*
* Applies HVS-optimized frequency weighting to each coefficient based on its
* position in the DWT subband tree. Implements the full perceptual model with
* dead-zone quantization for luma.
*
* NOTE: This function requires encoder context fields:
* - enc->widths[]/enc->heights[] for subband layout
* - enc->quality_level for perceptual model
* - enc->dead_zone_threshold for dead-zone quantization
*
* @param enc Encoder context
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param base_quantiser Base quantizer value (before perceptual weighting)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
* @param frame_count Current frame number (for any frame-dependent logic)
*/
void tav_quantise_perceptual(tav_encoder_t *enc,
float *coeffs, int16_t *quantised, int size,
int base_quantiser, float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma, int frame_count);
/**
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
*
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
* - Temporal DWT applied first → temporal subbands at different levels
* - Spatial 2D DWT applied to each temporal subband
*
* Quantization strategy:
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
* - tLL (level 0): coarsest temporal → smallest quantizer
* - tHH (highest level): finest temporal → largest quantizer
* 2. Apply spatial perceptual weighting to tH_base
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
*
* NOTE: This function requires encoder context fields:
* - enc->encoder_preset for sports mode detection
* - enc->temporal_decomp_levels for temporal level calculation
* - enc->verbose for debug output
* - Plus all fields needed by tav_quantise_perceptual()
*
* @param enc Encoder context
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
* @param quantised Output quantized coefficients [frame][pixel]
* @param num_frames Number of temporal subband frames
* @param spatial_size Number of spatial coefficients per frame
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_3d_dwt(tav_encoder_t *enc,
float **gop_coeffs, int16_t **quantised, int num_frames,
int spatial_size, int base_quantiser, int is_chroma);
/**
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
*
* Implements Floyd-Steinberg style error diffusion to avoid quantization
* artifacts when converting float quantizer values to integers for rate control.
*
* NOTE: This function requires encoder context fields:
* - enc->adjusted_quantiser_y_float (current float quantizer)
* - enc->dither_accumulator (accumulated error, modified by this function)
*
* @param enc Encoder context
* @return Integer quantizer value (0-254)
*/
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
// =============================================================================
// Perceptual Weight Implementation (requires encoder context)
// =============================================================================
// NOTE: This implementation requires encoder context (enc->quality_level)
// Struct definition will be in encoder header when integrated
#ifndef TAV_ENCODER_QUANTIZE_INTERNAL
// Forward declare structure access - will be properly defined when integrated
struct tav_encoder_s {
int quality_level;
int *widths;
int *heights;
int decomp_levels;
float dead_zone_threshold;
int encoder_preset;
int temporal_decomp_levels;
int verbose;
int frame_count;
float adjusted_quantiser_y_float;
float dither_accumulator;
int width;
int height;
int perceptual_tuning;
};
#endif
static float get_perceptual_weight(tav_encoder_t *enc, int level0, int subband_type, int is_chroma, int max_levels) {
// Psychovisual model based on DWT coefficient statistics and Human Visual System sensitivity
float level = 1.0f + ((level0 - 1.0f) / (max_levels - 1.0f)) * 5.0f;
// strategy: more horizontal detail
if (!is_chroma) {
// LL subband - contains most image energy, preserve carefully
if (subband_type == 0)
return perceptual_model3_LL(enc->quality_level, level);
// LH subband - horizontal details (human eyes more sensitive)
float LH = perceptual_model3_LH(enc->quality_level, level);
if (subband_type == 1)
return LH;
// HL subband - vertical details
float HL = perceptual_model3_HL(enc->quality_level, LH);
if (subband_type == 2)
return HL * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
// HH subband - diagonal details
else return perceptual_model3_HH(LH, HL, level) * (2.2f >= level && level >= 1.8f ? TWO_PIXEL_DETAILER : 3.2f >= level && level >= 2.8f ? FOUR_PIXEL_DETAILER : 1.0f);
} else {
// CHROMA CHANNELS: Less critical for human perception, more aggressive quantisation
float base = perceptual_model3_chroma_basecurve(enc->quality_level, level - 1);
if (subband_type == 0) { // LL chroma - still important but less than luma
return 1.0f;
} else if (subband_type == 1) { // LH chroma - horizontal chroma details
return FCLAMP(base, 1.0f, 100.0f);
} else if (subband_type == 2) { // HL chroma - vertical chroma details (even less critical)
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level], 1.0f, 100.0f);
} else { // HH chroma - diagonal chroma details (most aggressive)
return FCLAMP(base * ANISOTROPY_MULT_CHROMA[enc->quality_level] + ANISOTROPY_BIAS_CHROMA[enc->quality_level], 1.0f, 100.0f);
}
}
}
static float get_perceptual_weight_for_position(tav_encoder_t *enc, int linear_idx, int width, int height, int decomp_levels, int is_chroma) {
// If perceptual tuning is disabled, use uniform quantization (weight = 1.0)
if (!enc->perceptual_tuning) {
return 1.0f;
}
// Map linear coefficient index to DWT subband using same layout as decoder
int offset = 0;
// First: LL subband at maximum decomposition level
int ll_width = enc->widths[decomp_levels];
int ll_height = enc->heights[decomp_levels];
int ll_size = ll_width * ll_height;
if (linear_idx < offset + ll_size) {
// LL subband at maximum level - use get_perceptual_weight for consistency
return get_perceptual_weight(enc, decomp_levels, 0, is_chroma, decomp_levels);
}
offset += ll_size;
// Then: LH, HL, HH subbands for each level from max down to 1
for (int level = decomp_levels; level >= 1; level--) {
int level_width = enc->widths[decomp_levels - level + 1];
int level_height = enc->heights[decomp_levels - level + 1];
const int subband_size = level_width * level_height;
// LH subband (horizontal details)
if (linear_idx < offset + subband_size) {
return get_perceptual_weight(enc, level, 1, is_chroma, decomp_levels);
}
offset += subband_size;
// HL subband (vertical details)
if (linear_idx < offset + subband_size) {
return get_perceptual_weight(enc, level, 2, is_chroma, decomp_levels);
}
offset += subband_size;
// HH subband (diagonal details)
if (linear_idx < offset + subband_size) {
return get_perceptual_weight(enc, level, 3, is_chroma, decomp_levels);
}
offset += subband_size;
}
// Fallback for out-of-bounds indices
return 1.0f;
}
// =============================================================================
// Quantization Function Implementations
// =============================================================================
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma) {
float effective_q = quantiser;
effective_q = FCLAMP(effective_q, 1.0f, 4096.0f);
// Scalar implementation (AVX-512 version would go in separate optimized module)
for (int i = 0; i < size; i++) {
float quantised_val = coeffs[i] / effective_q;
// Apply dead-zone quantisation ONLY to luma channel and specific subbands
if (dead_zone_threshold > 0.0f && !is_chroma) {
int level = get_subband_level(i, width, height, decomp_levels);
int subband_type = get_subband_type(i, width, height, decomp_levels);
float level_threshold = 0.0f;
if (level == 1) {
// Finest level
if (subband_type == 3) {
// HH1: full dead-zone
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
} else if (subband_type == 1 || subband_type == 2) {
// LH1, HL1: half dead-zone
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
} else if (level == 2) {
// Second-finest level
if (subband_type == 3) {
// HH2: half dead-zone
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
}
if (fabsf(quantised_val) <= level_threshold) {
quantised_val = 0.0f;
}
}
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
}
}
void tav_quantise_perceptual(tav_encoder_t *enc,
float *coeffs, int16_t *quantised, int size,
int base_quantiser, float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma, int frame_count) {
float effective_base_q = base_quantiser;
effective_base_q = FCLAMP(effective_base_q, 1.0f, 4096.0f);
for (int i = 0; i < size; i++) {
// Apply perceptual weight based on coefficient's position in DWT layout
float weight = get_perceptual_weight_for_position(enc, i, width, height, decomp_levels, is_chroma);
float effective_q = effective_base_q * weight;
float quantised_val = coeffs[i] / effective_q;
// Apply dead-zone quantisation ONLY to luma channel
if (dead_zone_threshold > 0.0f && !is_chroma) {
int level = get_subband_level(i, width, height, decomp_levels);
int subband_type = get_subband_type(i, width, height, decomp_levels);
float level_threshold = 0.0f;
if (level == 1) {
if (subband_type == 3) {
level_threshold = dead_zone_threshold * DEAD_ZONE_FINEST_SCALE;
} else if (subband_type == 1 || subband_type == 2) {
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
} else if (level == 2) {
if (subband_type == 3) {
level_threshold = dead_zone_threshold * DEAD_ZONE_FINE_SCALE;
}
}
if (fabsf(quantised_val) <= level_threshold) {
quantised_val = 0.0f;
}
}
quantised[i] = (int16_t)CLAMP((int)(quantised_val + (quantised_val >= 0 ? 0.5f : -0.5f)), -32768, 32767);
}
}
void tav_quantise_3d_dwt(tav_encoder_t *enc,
float **gop_coeffs, int16_t **quantised, int num_frames,
int spatial_size, int base_quantiser, int is_chroma) {
// Sports preset: use finer temporal quantisation (less aggressive)
const float BETA = (enc->encoder_preset & 0x01) ? 0.0f : 0.6f;
const float KAPPA = (enc->encoder_preset & 0x01) ? 1.0f : 1.14f;
// Process each temporal subband independently (separable approach)
for (int t = 0; t < num_frames; t++) {
// Step 1: Determine temporal subband level
int temporal_level = get_temporal_subband_level(t, num_frames, enc->temporal_decomp_levels);
// Step 2: Compute temporal base quantiser using exponential scaling
float temporal_scale = powf(2.0f, BETA * powf(temporal_level, KAPPA));
float temporal_quantiser = base_quantiser * temporal_scale;
int temporal_base_quantiser = (int)roundf(temporal_quantiser);
temporal_base_quantiser = CLAMP(temporal_base_quantiser, 1, 255);
// Step 3: Apply spatial quantisation within this temporal subband
// Check if perceptual tuning is enabled (stored in encoder_preset bit 1)
// NOTE: perceptual_tuning field is NOT in tav_encoder_s, so we check context flag
// For now, just use perceptual (this will be controlled by caller disabling)
tav_quantise_perceptual(
enc,
gop_coeffs[t], // Input: spatial coefficients for this temporal subband
quantised[t], // Output: quantised spatial coefficients
spatial_size, // Number of spatial coefficients
temporal_base_quantiser, // Temporally-scaled base quantiser
enc->dead_zone_threshold, // Dead zone threshold
enc->width, // Frame width
enc->height, // Frame height
enc->decomp_levels, // Spatial decomposition levels
is_chroma, // Is chroma channel
enc->frame_count + t // Frame number
);
/*if (enc->verbose && (t == 0 || t == num_frames - 1)) {
printf(" Temporal subband %d: level=%d, tH_base=%d\n",
t, temporal_level, temporal_base_quantiser);
}*/
}
}
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc) {
float qy_float = enc->adjusted_quantiser_y_float;
// Add accumulated dithering error
float qy_with_error = qy_float + enc->dither_accumulator;
// Round to nearest integer
int qy_int = (int)(qy_with_error + 0.5f);
// Calculate quantisation error and accumulate for next frame
// This is Floyd-Steinberg style error diffusion
float quantisation_error = qy_with_error - (float)qy_int;
enc->dither_accumulator = quantisation_error * 0.5f; // Diffuse 50% of error to next frame
// Clamp to valid range
qy_int = CLAMP(qy_int, 0, 254);
return qy_int;
}

View File

@@ -0,0 +1,138 @@
/**
* TAV Encoder - Quantization Library
*
* Public API for DWT coefficient quantization with perceptual weighting.
*/
#ifndef TAV_ENCODER_QUANTIZE_H
#define TAV_ENCODER_QUANTIZE_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// Forward declaration of encoder context (defined in main encoder)
typedef struct tav_encoder_s tav_encoder_t;
// =============================================================================
// Uniform Quantization
// =============================================================================
/**
* Quantize DWT coefficients with uniform quantization and optional dead-zone.
*
* This is the basic quantization function without perceptual weighting.
* Dead-zone quantization is applied selectively to luma channel only:
* - HH1 (finest diagonal): full dead-zone
* - LH1/HL1/HH2: half dead-zone
* - Coarser levels: no dead-zone (preserve structure)
*
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param quantiser Base quantizer value (1-4096)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_uniform(float *coeffs, int16_t *quantised, int size, int quantiser,
float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma);
// =============================================================================
// Perceptual Quantization
// =============================================================================
/**
* Quantize DWT coefficients with per-coefficient perceptual weighting.
*
* Applies HVS-optimized frequency weighting to each coefficient based on its
* position in the DWT subband tree. Implements the full perceptual model with
* dead-zone quantization for luma.
*
* NOTE: This function requires encoder context fields:
* - enc->widths[]/enc->heights[] for subband layout
* - enc->quality_level for perceptual model
* - enc->dead_zone_threshold for dead-zone quantization
*
* @param enc Encoder context
* @param coeffs Input DWT coefficients (float)
* @param quantised Output quantized coefficients (int16_t)
* @param size Number of coefficients
* @param base_quantiser Base quantizer value (before perceptual weighting)
* @param dead_zone_threshold Dead-zone threshold (0.0 = disabled)
* @param width Frame width
* @param height Frame height
* @param decomp_levels Number of decomposition levels
* @param is_chroma 1 for chroma channels, 0 for luma
* @param frame_count Current frame number (for any frame-dependent logic)
*/
void tav_quantise_perceptual(tav_encoder_t *enc,
float *coeffs, int16_t *quantised, int size,
int base_quantiser, float dead_zone_threshold, int width, int height,
int decomp_levels, int is_chroma, int frame_count);
// =============================================================================
// 3D GOP Quantization
// =============================================================================
/**
* Quantize 3D DWT coefficients with SEPARABLE temporal-spatial quantization.
*
* After 3D DWT (temporal + spatial), GOP coefficients have this structure:
* - Temporal DWT applied first → temporal subbands at different levels
* - Spatial 2D DWT applied to each temporal subband
*
* Quantization strategy:
* 1. Compute temporal base quantizer: tH_base(level) = Qbase * 2^(beta*level^kappa)
* - tLL (level 0): coarsest temporal → smallest quantizer
* - tHH (highest level): finest temporal → largest quantizer
* 2. Apply spatial perceptual weighting to tH_base
* 3. Final quantizer: Q_effective = tH_base × spatial_weight
*
* NOTE: This function requires encoder context fields:
* - enc->encoder_preset for sports mode detection
* - enc->temporal_decomp_levels for temporal level calculation
* - enc->verbose for debug output
* - Plus all fields needed by tav_quantise_perceptual()
*
* @param enc Encoder context
* @param gop_coeffs GOP coefficients [frame][pixel] (temporal subbands)
* @param quantised Output quantized coefficients [frame][pixel]
* @param num_frames Number of temporal subband frames
* @param spatial_size Number of spatial coefficients per frame
* @param base_quantiser Base quantizer value (before temporal/spatial scaling)
* @param is_chroma 1 for chroma channels, 0 for luma
*/
void tav_quantise_3d_dwt(tav_encoder_t *enc,
float **gop_coeffs, int16_t **quantised, int num_frames,
int spatial_size, int base_quantiser, int is_chroma);
// =============================================================================
// Rate Control
// =============================================================================
/**
* Convert floating-point quantizer to integer with dithering (for bitrate mode).
*
* Implements Floyd-Steinberg style error diffusion to avoid quantization
* artifacts when converting float quantizer values to integers for rate control.
*
* NOTE: This function requires encoder context fields:
* - enc->adjusted_quantiser_y_float (current float quantizer)
* - enc->dither_accumulator (accumulated error, modified by this function)
*
* @param enc Encoder context
* @return Integer quantizer value (0-254)
*/
int tav_quantiser_float_to_int_dithered(tav_encoder_t *enc);
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_QUANTIZE_H

View File

@@ -0,0 +1,159 @@
/**
* TAV Encoder Library - Tile Processing Implementation
*/
#include "tav_encoder_tile.h"
#include "tav_encoder_dwt.h"
#include <string.h>
#include <stdlib.h>
#define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x)))
void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
int frame_width, int frame_height,
int tile_x, int tile_y,
float *padded_y, float *padded_co, float *padded_cg) {
const int core_start_x = tile_x * TAV_TILE_SIZE_X;
const int core_start_y = tile_y * TAV_TILE_SIZE_Y;
// Process row by row with bulk copying for core region where possible
for (int py = 0; py < TAV_PADDED_TILE_SIZE_Y; py++) {
// Map padded row to source image row
int src_y = core_start_y + py - TAV_TILE_MARGIN;
// Handle vertical boundary conditions with mirroring
if (src_y < 0) {
src_y = -src_y;
} else if (src_y >= frame_height) {
src_y = frame_height - 1 - (src_y - frame_height);
}
src_y = CLAMP(src_y, 0, frame_height - 1);
// Calculate source and destination row offsets
const int padded_row_offset = py * TAV_PADDED_TILE_SIZE_X;
const int src_row_offset = src_y * frame_width;
// Margin boundaries in padded tile
const int core_start_px = TAV_TILE_MARGIN;
const int core_end_px = TAV_TILE_MARGIN + TAV_TILE_SIZE_X;
// Check if core region is entirely within frame bounds
const int core_src_start_x = core_start_x;
const int core_src_end_x = core_start_x + TAV_TILE_SIZE_X;
if (core_src_start_x >= 0 && core_src_end_x <= frame_width) {
// Bulk copy core region in one operation
const int src_core_offset = src_row_offset + core_src_start_x;
memcpy(&padded_y[padded_row_offset + core_start_px],
&frame_y[src_core_offset],
TAV_TILE_SIZE_X * sizeof(float));
memcpy(&padded_co[padded_row_offset + core_start_px],
&frame_co[src_core_offset],
TAV_TILE_SIZE_X * sizeof(float));
memcpy(&padded_cg[padded_row_offset + core_start_px],
&frame_cg[src_core_offset],
TAV_TILE_SIZE_X * sizeof(float));
// Handle left margin pixels individually
for (int px = 0; px < core_start_px; px++) {
int src_x = core_start_x + px - TAV_TILE_MARGIN;
if (src_x < 0) src_x = -src_x;
src_x = CLAMP(src_x, 0, frame_width - 1);
int src_idx = src_row_offset + src_x;
int padded_idx = padded_row_offset + px;
padded_y[padded_idx] = frame_y[src_idx];
padded_co[padded_idx] = frame_co[src_idx];
padded_cg[padded_idx] = frame_cg[src_idx];
}
// Handle right margin pixels individually
for (int px = core_end_px; px < TAV_PADDED_TILE_SIZE_X; px++) {
int src_x = core_start_x + px - TAV_TILE_MARGIN;
if (src_x >= frame_width) {
src_x = frame_width - 1 - (src_x - frame_width);
}
src_x = CLAMP(src_x, 0, frame_width - 1);
int src_idx = src_row_offset + src_x;
int padded_idx = padded_row_offset + px;
padded_y[padded_idx] = frame_y[src_idx];
padded_co[padded_idx] = frame_co[src_idx];
padded_cg[padded_idx] = frame_cg[src_idx];
}
} else {
// Fallback: process entire row pixel by pixel (for edge tiles)
for (int px = 0; px < TAV_PADDED_TILE_SIZE_X; px++) {
int src_x = core_start_x + px - TAV_TILE_MARGIN;
// Handle horizontal boundary conditions with mirroring
if (src_x < 0) {
src_x = -src_x;
} else if (src_x >= frame_width) {
src_x = frame_width - 1 - (src_x - frame_width);
}
src_x = CLAMP(src_x, 0, frame_width - 1);
int src_idx = src_row_offset + src_x;
int padded_idx = padded_row_offset + px;
padded_y[padded_idx] = frame_y[src_idx];
padded_co[padded_idx] = frame_co[src_idx];
padded_cg[padded_idx] = frame_cg[src_idx];
}
}
}
}
// Use existing 2D DWT from tav_encoder_dwt.c
// For padded tiles, we simply call the existing function with tile dimensions
void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type) {
// Use the existing 2D DWT with padded tile dimensions
tav_dwt_2d_forward(tile_data, TAV_PADDED_TILE_SIZE_X, TAV_PADDED_TILE_SIZE_Y,
levels, filter_type);
}
void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type) {
// Note: Inverse transform not yet implemented in library for arbitrary dimensions
// For now, this is a placeholder - decoder uses different code path
(void)tile_data;
(void)levels;
(void)filter_type;
}
void tav_crop_tile_margins(const float *padded_data, float *core_data) {
for (int y = 0; y < TAV_TILE_SIZE_Y; y++) {
const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
const int core_row = y * TAV_TILE_SIZE_X;
memcpy(&core_data[core_row], &padded_data[padded_row], TAV_TILE_SIZE_X * sizeof(float));
}
}
void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
int actual_width, int actual_height) {
for (int y = 0; y < actual_height; y++) {
const int padded_row = (y + TAV_TILE_MARGIN) * TAV_PADDED_TILE_SIZE_X + TAV_TILE_MARGIN;
const int core_row = y * actual_width;
memcpy(&core_data[core_row], &padded_data[padded_row], actual_width * sizeof(float));
}
}
void tav_get_tile_dimensions(int frame_width, int frame_height,
int tile_x, int tile_y,
int *tile_width, int *tile_height) {
// Calculate the starting position of this tile
int start_x = tile_x * TAV_TILE_SIZE_X;
int start_y = tile_y * TAV_TILE_SIZE_Y;
// Calculate how much of the frame is left from this starting position
int remaining_width = frame_width - start_x;
int remaining_height = frame_height - start_y;
// Tile width is the minimum of standard tile size and remaining width
*tile_width = (remaining_width < TAV_TILE_SIZE_X) ? remaining_width : TAV_TILE_SIZE_X;
*tile_height = (remaining_height < TAV_TILE_SIZE_Y) ? remaining_height : TAV_TILE_SIZE_Y;
}

View File

@@ -0,0 +1,103 @@
/**
* TAV Encoder Library - Tile Processing
*
* Functions for padded tile extraction and DWT processing.
* Used when video dimensions exceed monoblock threshold (720x576).
*/
#ifndef TAV_ENCODER_TILE_H
#define TAV_ENCODER_TILE_H
#include <stdint.h>
#include <stddef.h>
#include "../../include/tav_encoder_lib.h"
// Tile dimensions (from header)
// TAV_TILE_SIZE_X = 640, TAV_TILE_SIZE_Y = 540
// TAV_PADDED_TILE_SIZE_X = 704, TAV_PADDED_TILE_SIZE_Y = 604
// TAV_TILE_MARGIN = 32
/**
* Extract a padded tile from full-frame YCoCg buffers.
*
* Extracts a tile at position (tile_x, tile_y) with TAV_TILE_MARGIN pixels
* of padding on all sides for seamless DWT processing. Uses symmetric
* extension (mirroring) at frame boundaries.
*
* @param frame_y Full frame Y channel
* @param frame_co Full frame Co channel
* @param frame_cg Full frame Cg channel
* @param frame_width Full frame width
* @param frame_height Full frame height
* @param tile_x Tile X index (0-based)
* @param tile_y Tile Y index (0-based)
* @param padded_y Output: Padded tile Y (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y floats)
* @param padded_co Output: Padded tile Co
* @param padded_cg Output: Padded tile Cg
*/
void tav_extract_padded_tile(const float *frame_y, const float *frame_co, const float *frame_cg,
int frame_width, int frame_height,
int tile_x, int tile_y,
float *padded_y, float *padded_co, float *padded_cg);
/**
* Apply 2D DWT forward transform to a padded tile.
*
* Uses fixed PADDED_TILE_SIZE dimensions (704x604) for optimal performance.
*
* @param tile_data Tile data (modified in-place)
* @param levels Number of decomposition levels
* @param filter_type Wavelet filter type (0=CDF 5/3, 1=CDF 9/7, etc.)
*/
void tav_dwt_2d_forward_padded_tile(float *tile_data, int levels, int filter_type);
/**
* Apply 2D DWT inverse transform to a padded tile.
*
* @param tile_data Tile data (modified in-place)
* @param levels Number of decomposition levels
* @param filter_type Wavelet filter type
*/
void tav_dwt_2d_inverse_padded_tile(float *tile_data, int levels, int filter_type);
/**
* Crop a padded tile to its core region (removing margins).
*
* Extracts the central TAV_TILE_SIZE_X × TAV_TILE_SIZE_Y region from a padded tile.
*
* @param padded_data Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
* @param core_data Output: Core tile (TILE_SIZE_X * TILE_SIZE_Y)
*/
void tav_crop_tile_margins(const float *padded_data, float *core_data);
/**
* Crop a padded tile to actual dimensions for edge tiles.
*
* For tiles at the right/bottom edges of a frame, the actual tile may be
* smaller than TILE_SIZE_X × TILE_SIZE_Y. This function handles that case.
*
* @param padded_data Padded tile (PADDED_TILE_SIZE_X * PADDED_TILE_SIZE_Y)
* @param core_data Output: Core tile data
* @param actual_width Actual tile width (may be < TILE_SIZE_X for edge tiles)
* @param actual_height Actual tile height (may be < TILE_SIZE_Y for edge tiles)
*/
void tav_crop_tile_margins_edge(const float *padded_data, float *core_data,
int actual_width, int actual_height);
/**
* Calculate actual tile dimensions for a given tile position.
*
* Edge tiles may be smaller than the standard tile size.
*
* @param frame_width Full frame width
* @param frame_height Full frame height
* @param tile_x Tile X index
* @param tile_y Tile Y index
* @param tile_width Output: Actual tile width
* @param tile_height Output: Actual tile height
*/
void tav_get_tile_dimensions(int frame_width, int frame_height,
int tile_x, int tile_y,
int *tile_width, int *tile_height);
#endif // TAV_ENCODER_TILE_H

View File

@@ -0,0 +1,441 @@
/**
* TAV Encoder - Utilities Library
*
* Common utility functions and helpers used across the encoder.
* Includes math utilities, clamping, filename generation, etc.
*
* Extracted from encoder_tav.c as part of library refactoring.
*/
#define _POSIX_C_SOURCE 200112L
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <math.h>
// =============================================================================
// Math Utilities
// =============================================================================
/**
* Clamp integer value to range [min, max].
*/
int tav_clamp_int(int x, int min, int max) {
return x < min ? min : (x > max ? max : x);
}
/**
* Clamp float value to range [min, max].
*/
float tav_clamp_float(float x, float min, float max) {
return x < min ? min : (x > max ? max : x);
}
/**
* Clamp double value to range [min, max].
*/
double tav_clamp_double(double x, double min, double max) {
return x < min ? min : (x > max ? max : x);
}
/**
* Round double to nearest integer.
*/
int tav_iround(double v) {
return (int)floor(v + 0.5);
}
/**
* Linear interpolation between two values.
* @param a Start value (when t=0)
* @param b End value (when t=1)
* @param t Interpolation factor (0.0 to 1.0)
* @return Interpolated value
*/
float tav_lerp(float a, float b, float t) {
return a * (1.0f - t) + b * t;
}
/**
* Double precision linear interpolation.
*/
double tav_lerp_double(double a, double b, double t) {
return a * (1.0 - t) + b * t;
}
/**
* Get minimum of two integers.
*/
int tav_min_int(int a, int b) {
return a < b ? a : b;
}
/**
* Get maximum of two integers.
*/
int tav_max_int(int a, int b) {
return a > b ? a : b;
}
/**
* Get minimum of two floats.
*/
float tav_min_float(float a, float b) {
return a < b ? a : b;
}
/**
* Get maximum of two floats.
*/
float tav_max_float(float a, float b) {
return a > b ? a : b;
}
/**
* Compute absolute value of integer.
*/
int tav_abs_int(int x) {
return x < 0 ? -x : x;
}
/**
* Compute absolute value of float.
*/
float tav_abs_float(float x) {
return x < 0.0f ? -x : x;
}
/**
* Sign function: returns -1, 0, or 1.
*/
int tav_sign(int x) {
return (x > 0) - (x < 0);
}
/**
* Check if integer is power of 2.
*/
int tav_is_power_of_2(int x) {
return x > 0 && (x & (x - 1)) == 0;
}
/**
* Round up to next power of 2.
*/
int tav_next_power_of_2(int x) {
if (x <= 0) return 1;
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return x + 1;
}
/**
* Compute floor of log2(x).
* Returns -1 for x <= 0.
*/
int tav_floor_log2(int x) {
if (x <= 0) return -1;
int log = 0;
while (x > 1) {
x >>= 1;
log++;
}
return log;
}
/**
* Compute ceil of log2(x).
* Returns -1 for x <= 0.
*/
int tav_ceil_log2(int x) {
if (x <= 0) return -1;
if (x == 1) return 0;
int log = tav_floor_log2(x);
// Check if x is power of 2
if ((1 << log) == x) {
return log;
}
return log + 1;
}
// =============================================================================
// Random Filename Generation
// =============================================================================
/**
* Generate a random temporary filename with .mp2 extension.
* Format: /tmp/[32 random chars].mp2
*
* @param filename Output buffer (must be at least 42 bytes)
*/
void tav_generate_random_filename(char *filename) {
static int seeded = 0;
if (!seeded) {
srand(time(NULL));
seeded = 1;
}
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the .mp2 extension
strcpy(filename + 37, ".mp2");
filename[41] = '\0'; // Null terminate
}
/**
* Generate a random temporary filename with custom extension.
* Format: /tmp/[32 random chars].[ext]
*
* @param filename Output buffer (must be large enough for path + extension)
* @param ext File extension (without leading dot, e.g., "tmp", "wav")
*/
void tav_generate_random_filename_ext(char *filename, const char *ext) {
static int seeded = 0;
if (!seeded) {
srand(time(NULL));
seeded = 1;
}
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the extension
filename[37] = '.';
strcpy(filename + 38, ext);
}
// =============================================================================
// Memory Utilities
// =============================================================================
/**
* Safe malloc with error checking.
* Exits program on allocation failure.
*/
void *tav_malloc(size_t size) {
void *ptr = malloc(size);
if (!ptr && size > 0) {
fprintf(stderr, "ERROR: Failed to allocate %zu bytes\n", size);
exit(1);
}
return ptr;
}
/**
* Safe calloc with error checking.
* Exits program on allocation failure.
*/
void *tav_calloc(size_t count, size_t size) {
void *ptr = calloc(count, size);
if (!ptr && count > 0 && size > 0) {
fprintf(stderr, "ERROR: Failed to allocate %zu elements of %zu bytes\n", count, size);
exit(1);
}
return ptr;
}
/**
* Safe realloc with error checking.
* Exits program on allocation failure.
*/
void *tav_realloc(void *ptr, size_t size) {
void *new_ptr = realloc(ptr, size);
if (!new_ptr && size > 0) {
fprintf(stderr, "ERROR: Failed to reallocate to %zu bytes\n", size);
exit(1);
}
return new_ptr;
}
/**
* Allocate aligned memory.
* Returns NULL on failure.
*/
void *tav_aligned_alloc(size_t alignment, size_t size) {
// Ensure alignment is power of 2
if (!tav_is_power_of_2(alignment)) {
fprintf(stderr, "ERROR: Alignment must be power of 2, got %zu\n", alignment);
return NULL;
}
#ifdef _WIN32
return _aligned_malloc(size, alignment);
#else
void *ptr = NULL;
if (posix_memalign(&ptr, alignment, size) != 0) {
return NULL;
}
return ptr;
#endif
}
/**
* Free aligned memory.
*/
void tav_aligned_free(void *ptr) {
#ifdef _WIN32
_aligned_free(ptr);
#else
free(ptr);
#endif
}
// =============================================================================
// Array Utilities
// =============================================================================
/**
* Fill integer array with constant value.
*/
void tav_array_fill_int(int *array, size_t count, int value) {
for (size_t i = 0; i < count; i++) {
array[i] = value;
}
}
/**
* Fill float array with constant value.
*/
void tav_array_fill_float(float *array, size_t count, float value) {
for (size_t i = 0; i < count; i++) {
array[i] = value;
}
}
/**
* Copy integer array.
*/
void tav_array_copy_int(int *dst, const int *src, size_t count) {
memcpy(dst, src, count * sizeof(int));
}
/**
* Copy float array.
*/
void tav_array_copy_float(float *dst, const float *src, size_t count) {
memcpy(dst, src, count * sizeof(float));
}
/**
* Find maximum value in integer array.
*/
int tav_array_max_int(const int *array, size_t count) {
if (count == 0) return 0;
int max_val = array[0];
for (size_t i = 1; i < count; i++) {
if (array[i] > max_val) {
max_val = array[i];
}
}
return max_val;
}
/**
* Find minimum value in integer array.
*/
int tav_array_min_int(const int *array, size_t count) {
if (count == 0) return 0;
int min_val = array[0];
for (size_t i = 1; i < count; i++) {
if (array[i] < min_val) {
min_val = array[i];
}
}
return min_val;
}
/**
* Find maximum absolute value in float array.
*/
float tav_array_max_abs_float(const float *array, size_t count) {
if (count == 0) return 0.0f;
float max_abs = fabsf(array[0]);
for (size_t i = 1; i < count; i++) {
float abs_val = fabsf(array[i]);
if (abs_val > max_abs) {
max_abs = abs_val;
}
}
return max_abs;
}
/**
* Compute sum of integer array.
*/
long long tav_array_sum_int(const int *array, size_t count) {
long long sum = 0;
for (size_t i = 0; i < count; i++) {
sum += array[i];
}
return sum;
}
/**
* Compute sum of float array.
*/
double tav_array_sum_float(const float *array, size_t count) {
double sum = 0.0;
for (size_t i = 0; i < count; i++) {
sum += array[i];
}
return sum;
}
/**
* Compute mean of float array.
*/
float tav_array_mean_float(const float *array, size_t count) {
if (count == 0) return 0.0f;
return (float)(tav_array_sum_float(array, count) / count);
}
/**
* Swap two integer values.
*/
void tav_swap_int(int *a, int *b) {
int temp = *a;
*a = *b;
*b = temp;
}
/**
* Swap two float values.
*/
void tav_swap_float(float *a, float *b) {
float temp = *a;
*a = *b;
*b = temp;
}
/**
* Swap two pointer values.
*/
void tav_swap_ptr(void **a, void **b) {
void *temp = *a;
*a = *b;
*b = temp;
}

View File

@@ -0,0 +1,165 @@
/**
* TAV Encoder - Utilities Library
*
* Public API for common utility functions and helpers.
*/
#ifndef TAV_ENCODER_UTILS_H
#define TAV_ENCODER_UTILS_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// =============================================================================
// Math Utilities
// =============================================================================
/** Clamp integer value to range [min, max] */
int tav_clamp_int(int x, int min, int max);
/** Clamp float value to range [min, max] */
float tav_clamp_float(float x, float min, float max);
/** Clamp double value to range [min, max] */
double tav_clamp_double(double x, double min, double max);
/** Round double to nearest integer */
int tav_iround(double v);
/** Linear interpolation between two floats */
float tav_lerp(float a, float b, float t);
/** Linear interpolation between two doubles */
double tav_lerp_double(double a, double b, double t);
/** Get minimum of two integers */
int tav_min_int(int a, int b);
/** Get maximum of two integers */
int tav_max_int(int a, int b);
/** Get minimum of two floats */
float tav_min_float(float a, float b);
/** Get maximum of two floats */
float tav_max_float(float a, float b);
/** Compute absolute value of integer */
int tav_abs_int(int x);
/** Compute absolute value of float */
float tav_abs_float(float x);
/** Sign function: returns -1, 0, or 1 */
int tav_sign(int x);
/** Check if integer is power of 2 */
int tav_is_power_of_2(int x);
/** Round up to next power of 2 */
int tav_next_power_of_2(int x);
/** Compute floor of log2(x) */
int tav_floor_log2(int x);
/** Compute ceil of log2(x) */
int tav_ceil_log2(int x);
// =============================================================================
// Random Filename Generation
// =============================================================================
/**
* Generate a random temporary filename with .mp2 extension.
* Format: /tmp/[32 random chars].mp2
*
* @param filename Output buffer (must be at least 42 bytes)
*/
void tav_generate_random_filename(char *filename);
/**
* Generate a random temporary filename with custom extension.
* Format: /tmp/[32 random chars].[ext]
*
* @param filename Output buffer (must be large enough)
* @param ext File extension (without leading dot)
*/
void tav_generate_random_filename_ext(char *filename, const char *ext);
// =============================================================================
// Memory Utilities
// =============================================================================
/** Safe malloc with error checking (exits on failure) */
void *tav_malloc(size_t size);
/** Safe calloc with error checking (exits on failure) */
void *tav_calloc(size_t count, size_t size);
/** Safe realloc with error checking (exits on failure) */
void *tav_realloc(void *ptr, size_t size);
/** Allocate aligned memory (returns NULL on failure) */
void *tav_aligned_alloc(size_t alignment, size_t size);
/** Free aligned memory */
void tav_aligned_free(void *ptr);
// =============================================================================
// Array Utilities
// =============================================================================
/** Fill integer array with constant value */
void tav_array_fill_int(int *array, size_t count, int value);
/** Fill float array with constant value */
void tav_array_fill_float(float *array, size_t count, float value);
/** Copy integer array */
void tav_array_copy_int(int *dst, const int *src, size_t count);
/** Copy float array */
void tav_array_copy_float(float *dst, const float *src, size_t count);
/** Find maximum value in integer array */
int tav_array_max_int(const int *array, size_t count);
/** Find minimum value in integer array */
int tav_array_min_int(const int *array, size_t count);
/** Find maximum absolute value in float array */
float tav_array_max_abs_float(const float *array, size_t count);
/** Compute sum of integer array */
long long tav_array_sum_int(const int *array, size_t count);
/** Compute sum of float array */
double tav_array_sum_float(const float *array, size_t count);
/** Compute mean of float array */
float tav_array_mean_float(const float *array, size_t count);
/** Swap two integer values */
void tav_swap_int(int *a, int *b);
/** Swap two float values */
void tav_swap_float(float *a, float *b);
/** Swap two pointer values */
void tav_swap_ptr(void **a, void **b);
// =============================================================================
// Convenience Macros (for backward compatibility)
// =============================================================================
#define CLAMP(x, min, max) tav_clamp_int(x, min, max)
#define FCLAMP(x, min, max) tav_clamp_float(x, min, max)
#ifdef __cplusplus
}
#endif
#endif // TAV_ENCODER_UTILS_H

152
video_encoder/range_coder.c Normal file
View File

@@ -0,0 +1,152 @@
// Simple range coder for TAD audio codec
// Based on range coding with Laplacian probability model
#include "range_coder.h"
#include <string.h>
#include <math.h>
#define TOP_VALUE 0xFFFFFFFFU
#define BOTTOM_VALUE 0x00FFFFFF
static inline void range_encoder_put_byte(RangeEncoder *enc, uint8_t byte) {
if (enc->buffer_pos < enc->buffer_capacity) {
enc->buffer[enc->buffer_pos++] = byte;
}
}
static inline uint8_t range_decoder_get_byte(RangeDecoder *dec) {
if (dec->buffer_pos < dec->buffer_size) {
return dec->buffer[dec->buffer_pos++];
}
return 0;
}
static void range_encoder_renormalise(RangeEncoder *enc) {
while (enc->range <= BOTTOM_VALUE) {
range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
enc->low <<= 8;
enc->range <<= 8;
}
}
static void range_decoder_renormalise(RangeDecoder *dec) {
while (dec->range <= BOTTOM_VALUE) {
dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
dec->low <<= 8;
dec->range <<= 8;
}
}
void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity) {
enc->low = 0;
enc->range = TOP_VALUE;
enc->buffer = buffer;
enc->buffer_pos = 0;
enc->buffer_capacity = capacity;
}
// Calculate Laplacian CDF for a given value
// CDF(x) = 0.5 * exp(λx) for x < 0
// CDF(x) = 1 - 0.5 * exp(-λx) for x ≥ 0
static inline double laplacian_cdf(int16_t value, float lambda) {
if (value < 0) {
return 0.5 * exp(lambda * value);
} else {
return 1.0 - 0.5 * exp(-lambda * value);
}
}
void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda) {
// Clamp to valid range
if (value < -max_abs_value) value = -max_abs_value;
if (value > max_abs_value) value = max_abs_value;
// Calculate cumulative probabilities using Laplacian distribution
// We need CDF at value and value+1 to get the probability mass for this symbol
double cdf_low = (value == -max_abs_value) ? 0.0 : laplacian_cdf(value - 1, lambda);
double cdf_high = laplacian_cdf(value, lambda);
// Normalise to get cumulative counts in range [0, SCALE]
const uint32_t SCALE = 0x10000; // 65536 for precision
uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
// Ensure we have at least 1 unit of probability
if (cum_high <= cum_low) cum_high = cum_low + 1;
if (cum_high > SCALE) cum_high = SCALE;
// Encode using cumulative probabilities
uint64_t range_64 = (uint64_t)enc->range;
enc->low += (uint32_t)((range_64 * cum_low) / SCALE);
enc->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
range_encoder_renormalise(enc);
}
size_t range_encoder_finish(RangeEncoder *enc) {
// Flush remaining bytes
for (int i = 0; i < 4; i++) {
range_encoder_put_byte(enc, (enc->low >> 24) & 0xFF);
enc->low <<= 8;
}
return enc->buffer_pos;
}
void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size) {
dec->low = 0;
dec->range = TOP_VALUE;
dec->code = 0;
dec->buffer = buffer;
dec->buffer_pos = 0;
dec->buffer_size = size;
// Read initial bytes into code
for (int i = 0; i < 4; i++) {
dec->code = (dec->code << 8) | range_decoder_get_byte(dec);
}
}
int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda) {
const uint32_t SCALE = 0x10000; // Must match encoder
// Calculate current position in probability space
uint64_t range_64 = (uint64_t)dec->range;
uint32_t cum_freq = (uint32_t)(((uint64_t)(dec->code - dec->low) * SCALE) / range_64);
// Binary search to find symbol whose CDF range contains cum_freq
int16_t low = -max_abs_value;
int16_t high = max_abs_value;
int16_t value = 0;
while (low <= high) {
int16_t mid = (low + high) / 2;
double cdf_low = (mid == -max_abs_value) ? 0.0 : laplacian_cdf(mid - 1, lambda);
double cdf_high = laplacian_cdf(mid, lambda);
uint32_t cum_low = (uint32_t)(cdf_low * SCALE);
uint32_t cum_high = (uint32_t)(cdf_high * SCALE);
if (cum_high <= cum_low) cum_high = cum_low + 1;
if (cum_freq >= cum_low && cum_freq < cum_high) {
// Found the symbol
value = mid;
// Update decoder state
dec->low += (uint32_t)((range_64 * cum_low) / SCALE);
dec->range = (uint32_t)((range_64 * (cum_high - cum_low)) / SCALE);
range_decoder_renormalise(dec);
return value;
} else if (cum_freq < cum_low) {
high = mid - 1;
} else {
low = mid + 1;
}
}
// Fallback: shouldn't happen with correct encoding
range_decoder_renormalise(dec);
return value;
}

View File

@@ -0,0 +1,42 @@
#ifndef RANGE_CODER_H
#define RANGE_CODER_H
#include <stdint.h>
#include <stddef.h>
// Simple range coder for signed 16-bit integers
// Uses adaptive frequency model for better compression
typedef struct {
uint32_t low;
uint32_t range;
uint8_t *buffer;
size_t buffer_pos;
size_t buffer_capacity;
} RangeEncoder;
typedef struct {
uint32_t low;
uint32_t range;
uint32_t code;
const uint8_t *buffer;
size_t buffer_pos;
size_t buffer_size;
} RangeDecoder;
// Initialise encoder
void range_encoder_init(RangeEncoder *enc, uint8_t *buffer, size_t capacity);
// Encode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
void range_encode_int16_laplacian(RangeEncoder *enc, int16_t value, int16_t max_abs_value, float lambda);
// Finalise encoding and return bytes written
size_t range_encoder_finish(RangeEncoder *enc);
// Initialise decoder
void range_decoder_init(RangeDecoder *dec, const uint8_t *buffer, size_t size);
// Decode a signed 16-bit value with Laplacian distribution (λ=5.0, μ=0)
int16_t range_decode_int16_laplacian(RangeDecoder *dec, int16_t max_abs_value, float lambda);
#endif // RANGE_CODER_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,344 @@
// Created by CuriousTorvald and Claude on 2025-10-24.
// TAD32 (Terrarum Advanced Audio - PCM32 version) Encoder - Standalone program
// Alternative version: PCM32 throughout encoding, PCM8 conversion only at decoder
// Uses encoder_tad32.c library for encoding functions
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <getopt.h>
#include <math.h>
#include <time.h>
#include "encoder_tad.h"
#define ENCODER_VENDOR_STRING "Encoder-TAD32 (PCM32f version) 20251107"
// TAD32 format constants
#define TAD32_DEFAULT_CHUNK_SIZE 32768 // Using a prime number to force the worst condition
// Temporary file for FFmpeg PCM extraction
char TEMP_PCM_FILE[42];
static void generate_random_filename(char *filename) {
srand(time(NULL));
const char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const int charset_size = sizeof(charset) - 1;
// Start with the prefix
strcpy(filename, "/tmp/");
// Generate 32 random characters
for (int i = 0; i < 32; i++) {
filename[5 + i] = charset[rand() % charset_size];
}
// Add the extension
strcpy(filename + 37, ".tad");
filename[41] = '\0'; // Null terminate
}
//=============================================================================
// Main Encoder
//=============================================================================
static void print_usage(const char *prog_name) {
printf("Usage: %s -i <input> [options]\n", prog_name);
printf("Options:\n");
printf(" -i <file> Input audio file (any format supported by FFmpeg)\n");
printf(" -o <file> Output TAD32 file (optional, auto-generated as input.qN.tad)\n");
printf(" -q <level> Quality level (0-5, default: %d)\n", TAD32_QUALITY_DEFAULT);
printf(" 0 = lowest quality/smallest (max_index=31)\n");
printf(" 1 = low quality (max_index=35)\n");
printf(" 2 = medium quality (max_index=39)\n");
printf(" 3 = good quality (max_index=47) [DEFAULT]\n");
printf(" 4 = high quality (max_index=56)\n");
printf(" 5 = very high quality/largest (max_index=89)\n");
printf(" -v Verbose output\n");
printf(" -h, --help Show this help\n");
printf("\nVersion: %s\n", ENCODER_VENDOR_STRING);
printf("Note: This is the PCM32 alternative version for comparison testing.\n");
printf(" PCM32 is processed throughout encoding; PCM8 conversion happens at decoder.\n");
}
int main(int argc, char *argv[]) {
generate_random_filename(TEMP_PCM_FILE);
char *input_file = NULL;
char *output_file = NULL;
int quality = TAD32_QUALITY_DEFAULT; // Default quality level (0-5)
float quantiser_scale = 1.0f; // Default quantiser scaling
int verbose = 0;
// Parse command line arguments
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
int opt;
int option_index = 0;
while ((opt = getopt_long(argc, argv, "i:o:q:s:vh", long_options, &option_index)) != -1) {
switch (opt) {
case 'i':
input_file = optarg;
break;
case 'o':
output_file = optarg;
break;
case 'q':
quality = atoi(optarg);
if (quality < TAD32_QUALITY_MIN || quality > TAD32_QUALITY_MAX) {
fprintf(stderr, "Error: Quality must be in range %d-%d\n", TAD32_QUALITY_MIN, TAD32_QUALITY_MAX);
return 1;
}
break;
case 's':
quantiser_scale = atof(optarg);
if (quantiser_scale < 0.5f || quantiser_scale > 4.0f) {
fprintf(stderr, "Error: Quantiser scale must be in range 0.5-4.0\n");
return 1;
}
break;
case 'v':
verbose = 1;
break;
case 'h':
print_usage(argv[0]);
return 0;
default:
print_usage(argv[0]);
return 1;
}
}
if (!input_file) {
fprintf(stderr, "Error: Input file is required\n");
print_usage(argv[0]);
return 1;
}
// Convert quality (0-5) to max_index for quantisation
int max_index = tad32_quality_to_max_index(quality);
// Generate output filename if not provided
if (!output_file) {
// Allocate space for output filename
size_t input_len = strlen(input_file);
output_file = malloc(input_len + 32); // Extra space for .qNN.tad
// Find the last directory separator
const char *basename_start = strrchr(input_file, '/');
if (!basename_start) basename_start = strrchr(input_file, '\\');
basename_start = basename_start ? basename_start + 1 : input_file;
// Copy directory part
size_t dir_len = basename_start - input_file;
strncpy(output_file, input_file, dir_len);
// Find the extension (last dot after basename)
const char *ext = strrchr(basename_start, '.');
if (ext && ext > basename_start) {
// Copy basename without extension
size_t name_len = ext - basename_start;
strncpy(output_file + dir_len, basename_start, name_len);
output_file[dir_len + name_len] = '\0';
} else {
// No extension, copy entire basename
strcpy(output_file + dir_len, basename_start);
}
// Append .qNN.tad (use quality level for filename)
sprintf(output_file + strlen(output_file), ".q%d.tad", quality);
if (verbose) {
printf("Auto-generated output path: %s\n", output_file);
}
}
if (verbose) {
printf("%s\n", ENCODER_VENDOR_STRING);
printf("Input: %s\n", input_file);
printf("Output: %s\n", output_file);
printf("Quality level: %d (max_index=%d)\n", quality, max_index);
printf("Quantiser scale: %.2f\n", quantiser_scale);
}
// Detect original sample rate for high-quality resampling
char sample_rate_str[32] = "48000"; // Default fallback
char detect_cmd[2048];
snprintf(detect_cmd, sizeof(detect_cmd),
"ffprobe -v error -select_streams a:0 -show_entries stream=sample_rate "
"-of default=noprint_wrappers=1:nokey=1 \"%s\" 2>/dev/null",
input_file);
FILE *probe = popen(detect_cmd, "r");
if (probe) {
if (fgets(sample_rate_str, sizeof(sample_rate_str), probe)) {
// Remove newline
sample_rate_str[strcspn(sample_rate_str, "\n")] = 0;
}
pclose(probe);
}
int original_rate = atoi(sample_rate_str);
if (original_rate <= 0 || original_rate > 192000) {
original_rate = 48000; // Fallback
}
if (verbose) {
printf("Detected original sample rate: %d Hz\n", original_rate);
printf("Extracting and resampling audio to %d Hz...\n", TAD32_SAMPLE_RATE);
}
// Extract and resample in two passes for better quality
// Pass 1: Extract at original sample rate
char temp_original_pcm[256];
snprintf(temp_original_pcm, sizeof(temp_original_pcm), "%s.orig", TEMP_PCM_FILE);
char ffmpeg_cmd[2048];
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
"ffmpeg -hide_banner -v error -i \"%s\" -f f32le -acodec pcm_f32le -ac %d -y \"%s\" 2>&1",
input_file, TAD32_CHANNELS, temp_original_pcm);
int result = system(ffmpeg_cmd);
if (result != 0) {
fprintf(stderr, "Error: FFmpeg extraction failed\n");
return 1;
}
// Pass 2: Resample to 32kHz with high-quality SoXR resampler and highpass filter
snprintf(ffmpeg_cmd, sizeof(ffmpeg_cmd),
"ffmpeg -hide_banner -v error -f f32le -ar %d -ac %d -i \"%s\" "
"-f f32le -acodec pcm_f32le -ar %d -ac %d "
"-af \"aresample=resampler=soxr:precision=28:cutoff=0.99:dither_scale=0,highpass=f=16\" "
"-y \"%s\" 2>&1",
original_rate, TAD32_CHANNELS, temp_original_pcm, TAD32_SAMPLE_RATE, TAD32_CHANNELS, TEMP_PCM_FILE);
result = system(ffmpeg_cmd);
remove(temp_original_pcm); // Clean up intermediate file
if (result != 0) {
fprintf(stderr, "Error: FFmpeg resampling failed\n");
return 1;
}
// Open PCM file
FILE *pcm_file = fopen(TEMP_PCM_FILE, "rb");
if (!pcm_file) {
fprintf(stderr, "Error: Could not open temporary PCM file\n");
return 1;
}
// Get file size
fseek(pcm_file, 0, SEEK_END);
size_t pcm_size = ftell(pcm_file);
fseek(pcm_file, 0, SEEK_SET);
size_t total_samples = pcm_size / (TAD32_CHANNELS * sizeof(float));
// Pad to even sample count
if (total_samples % 2 == 1) {
total_samples++;
if (verbose) {
printf("Odd sample count detected, padding with one zero sample\n");
}
}
size_t num_chunks = (total_samples + TAD32_DEFAULT_CHUNK_SIZE - 1) / TAD32_DEFAULT_CHUNK_SIZE;
if (verbose) {
printf("Total samples: %zu (%.2f seconds)\n", total_samples,
(double)total_samples / TAD32_SAMPLE_RATE);
printf("Chunks: %zu (chunk size: %d samples)\n", num_chunks, TAD32_DEFAULT_CHUNK_SIZE);
}
// Open output file
FILE *output = fopen(output_file, "wb");
if (!output) {
fprintf(stderr, "Error: Could not open output file\n");
fclose(pcm_file);
return 1;
}
// Process chunks using linked TAD32 encoder library
size_t total_output_size = 0;
float *chunk_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * TAD32_CHANNELS * sizeof(float));
uint8_t *output_buffer = malloc(TAD32_DEFAULT_CHUNK_SIZE * 4 * sizeof(float)); // Generous buffer
for (size_t chunk_idx = 0; chunk_idx < num_chunks; chunk_idx++) {
size_t chunk_samples = TAD32_DEFAULT_CHUNK_SIZE;
size_t remaining = total_samples - (chunk_idx * TAD32_DEFAULT_CHUNK_SIZE);
if (remaining < TAD32_DEFAULT_CHUNK_SIZE) {
chunk_samples = remaining;
}
// Read chunk
size_t samples_read = fread(chunk_buffer, TAD32_CHANNELS * sizeof(float),
chunk_samples, pcm_file);
(void)samples_read; // Unused, but kept for compatibility
// Pad with zeros if necessary
if (chunk_samples < TAD32_DEFAULT_CHUNK_SIZE) {
memset(&chunk_buffer[chunk_samples * TAD32_CHANNELS], 0,
(TAD32_DEFAULT_CHUNK_SIZE - chunk_samples) * TAD32_CHANNELS * sizeof(float));
}
// Encode chunk using linked tad32_encode_chunk() from encoder_tad32.c
size_t encoded_size = tad32_encode_chunk(chunk_buffer, TAD32_DEFAULT_CHUNK_SIZE,
max_index,
quantiser_scale, TAD32_ZSTD_LEVEL, output_buffer);
if (encoded_size == 0) {
fprintf(stderr, "Error: Chunk encoding failed at chunk %zu\n", chunk_idx);
free(chunk_buffer);
free(output_buffer);
fclose(pcm_file);
fclose(output);
return 1;
}
// Write chunk to output
fwrite(output_buffer, 1, encoded_size, output);
total_output_size += encoded_size;
if (verbose && (chunk_idx % 10 == 0 || chunk_idx == num_chunks - 1)) {
printf("Processed chunk %zu/%zu (%.1f%%)\r", chunk_idx + 1, num_chunks,
(chunk_idx + 1) * 100.0 / num_chunks);
fflush(stdout);
}
}
if (verbose) {
printf("\n");
}
// Print coefficient statistics if enabled
tad32_print_statistics();
tad32_free_statistics();
// Cleanup
free(chunk_buffer);
free(output_buffer);
fclose(pcm_file);
fclose(output);
remove(TEMP_PCM_FILE);
// Print statistics
size_t pcmu8_size = total_samples * TAD32_CHANNELS; // PCMu8 baseline
float compression_ratio = (float)pcmu8_size / total_output_size;
printf("Encoding complete!\n");
printf("PCMu8 size: %zu bytes\n", pcmu8_size);
printf("TAD32 size: %zu bytes\n", total_output_size);
printf("Compression ratio: %.2f:1 (%.1f%% of PCMu8)\n",
compression_ratio, (total_output_size * 100.0) / pcmu8_size);
if (compression_ratio < 1.8) {
printf("Warning: Compression ratio below 2:1 target. Try lower quantisation bits or different settings.\n");
}
return 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,294 @@
// Visualise DWT Coefficients as Image
// Converts .bin coefficient file to PPM image with logarithmic color mapping
// Usage: ./visualise_coefficients <input.bin> <output.ppm> <width> <height>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
// Logarithmic color mapping for coefficient visualisation
// Zero: Black (#000000)
// Positive: Red to Yellow (#FF0000 to #FFFF00) - logarithmic
// Negative: Blue to Cyan (#0000FF to #00FFFF) - logarithmic
typedef struct {
uint8_t r, g, b;
} rgb_t;
static rgb_t map_coefficient_to_color(int16_t coeff) {
rgb_t color = {0, 0, 0};
if (coeff == 0) {
// Zero: pure black
return color;
}
if (coeff == 1) {
// +1: Light green #55FF55
color.r = 0x55;
color.g = 0xFF;
color.b = 0x55;
return color;
}
if (coeff == -1) {
// -1: Dark green #005500
color.r = 0x00;
color.g = 0x55;
color.b = 0x00;
return color;
}
if (coeff > 0) {
// Positive: Red (#FF0000) to Yellow (#FFFF00)
// Logarithmic mapping: log2(1) = 0, log2(32767) ≈ 14.99
double log_val = log2((double)coeff);
double log_max = log2(32767.0);
double normalised = log_val / log_max; // 0.0 to 1.0
color.r = 255;
color.g = (uint8_t)(normalised * 255.0);
color.b = 0;
} else {
// Negative: Blue (#0000FF) to Cyan (#00FFFF)
// Logarithmic mapping: log2(1) = 0, log2(32768) = 15
double log_val = log2((double)(-coeff));
double log_max = log2(32768.0);
double normalised = log_val / log_max; // 0.0 to 1.0
color.r = 0;
color.g = (uint8_t)(normalised * 255.0);
color.b = 255;
}
return color;
}
int main(int argc, char *argv[]) {
if (argc != 5) {
printf("Usage: %s <input.bin> <output.ppm> <width> <height>\n", argv[0]);
printf("Example: %s frame_060.tavframe.y.bin output.ppm 560 448\n", argv[0]);
return 1;
}
const char *input_file = argv[1];
const char *output_file = argv[2];
int width = atoi(argv[3]);
int height = atoi(argv[4]);
if (width <= 0 || height <= 0) {
printf("Error: Invalid dimensions %dx%d\n", width, height);
return 1;
}
size_t expected_count = width * height;
// Load coefficient file
FILE *fp_in = fopen(input_file, "rb");
if (!fp_in) {
printf("Error: Cannot open %s\n", input_file);
return 1;
}
// Get file size
fseek(fp_in, 0, SEEK_END);
long file_size = ftell(fp_in);
fseek(fp_in, 0, SEEK_SET);
size_t coeff_count = file_size / sizeof(int16_t);
if (coeff_count != expected_count) {
printf("Warning: File contains %zu coefficients, expected %zu (%dx%d)\n",
coeff_count, expected_count, width, height);
}
// Allocate coefficient buffer
int16_t *coeffs = malloc(expected_count * sizeof(int16_t));
if (!coeffs) {
printf("Error: Memory allocation failed\n");
fclose(fp_in);
return 1;
}
// Read coefficients
size_t read_count = fread(coeffs, sizeof(int16_t), expected_count, fp_in);
fclose(fp_in);
if (read_count != expected_count) {
printf("Error: Read %zu coefficients, expected %zu\n", read_count, expected_count);
free(coeffs);
return 1;
}
// Analyse coefficient distribution - Overall and per-subband
size_t zeros = 0, ones = 0, positives = 0, negatives = 0;
int16_t min_val = INT16_MAX, max_val = INT16_MIN;
// Calculate overall statistics
for (size_t i = 0; i < expected_count; i++) {
if (coeffs[i] == 0) zeros++;
else if (coeffs[i] == 1 || coeffs[i] == -1) ones++;
else if (coeffs[i] > 0) positives++;
else negatives++;
if (coeffs[i] < min_val) min_val = coeffs[i];
if (coeffs[i] > max_val) max_val = coeffs[i];
}
printf("Overall coefficient statistics:\n");
printf(" Total: %zu\n", expected_count);
printf(" Zeros: %zu (%.1f%%)\n", zeros, 100.0 * zeros / expected_count);
printf(" Ones: %zu (%.1f%%)\n", ones, 100.0 * ones / expected_count);
printf(" Positives: %zu (%.1f%%)\n", positives, 100.0 * positives / expected_count);
printf(" Negatives: %zu (%.1f%%)\n", negatives, 100.0 * negatives / expected_count);
printf(" Range: [%d, %d]\n\n", min_val, max_val);
// Per-subband statistics using 2D spatial layout
// The coefficients are stored in 2D spatial arrangement like the PPM image
int num_levels = 6;
// Helper macro to get coefficient from 2D position
#define GET_COEFF(x, y) coeffs[(y) * width + (x)]
// Calculate subband dimensions for each level
int level_w[7], level_h[7]; // level_w[1] = width/2, level_w[6] = width/64
for (int i = 1; i <= num_levels; i++) {
level_w[i] = width / (1 << i);
level_h[i] = height / (1 << i);
}
// LL6 subband (top-left corner)
{
int ll_w = level_w[6], ll_h = level_h[6];
size_t ll_zeros = 0, ll_ones = 0, ll_pos = 0, ll_neg = 0;
int16_t ll_min = INT16_MAX, ll_max = INT16_MIN;
for (int y = 0; y < ll_h; y++) {
for (int x = 0; x < ll_w; x++) {
int16_t val = GET_COEFF(x, y);
if (val == 0) ll_zeros++;
else if (val == 1 || val == -1) ll_ones++;
else if (val > 0) ll_pos++;
else ll_neg++;
if (val < ll_min) ll_min = val;
if (val > ll_max) ll_max = val;
}
}
size_t ll_total = ll_w * ll_h;
printf("LL%d subband (%dx%d):\n", num_levels, ll_w, ll_h);
printf(" Total: %zu\n", ll_total);
printf(" Zeros: %zu (%.1f%%)\n", ll_zeros, 100.0 * ll_zeros / ll_total);
printf(" Ones: %zu (%.1f%%)\n", ll_ones, 100.0 * ll_ones / ll_total);
printf(" Positives: %zu (%.1f%%)\n", ll_pos, 100.0 * ll_pos / ll_total);
printf(" Negatives: %zu (%.1f%%)\n", ll_neg, 100.0 * ll_neg / ll_total);
printf(" Range: [%d, %d]\n\n", ll_min, ll_max);
}
// Process each level from deepest (6) to finest (1)
for (int level = num_levels; level >= 1; level--) {
int half_w = level_w[level];
int half_h = level_h[level];
// LH subband (horizontal high-pass) - right of LL region
size_t lh_zeros = 0, lh_ones = 0, lh_pos = 0, lh_neg = 0;
int16_t lh_min = INT16_MAX, lh_max = INT16_MIN;
int lh_x0 = half_w, lh_y0 = 0;
int lh_x1 = half_w * 2, lh_y1 = half_h;
for (int y = lh_y0; y < lh_y1; y++) {
for (int x = lh_x0; x < lh_x1; x++) {
int16_t val = GET_COEFF(x, y);
if (val == 0) lh_zeros++;
else if (val == 1 || val == -1) lh_ones++;
else if (val > 0) lh_pos++;
else lh_neg++;
if (val < lh_min) lh_min = val;
if (val > lh_max) lh_max = val;
}
}
// HL subband (vertical high-pass) - below LL region
size_t hl_zeros = 0, hl_ones = 0, hl_pos = 0, hl_neg = 0;
int16_t hl_min = INT16_MAX, hl_max = INT16_MIN;
int hl_x0 = 0, hl_y0 = half_h;
int hl_x1 = half_w, hl_y1 = half_h * 2;
for (int y = hl_y0; y < hl_y1; y++) {
for (int x = hl_x0; x < hl_x1; x++) {
int16_t val = GET_COEFF(x, y);
if (val == 0) hl_zeros++;
else if (val == 1 || val == -1) hl_ones++;
else if (val > 0) hl_pos++;
else hl_neg++;
if (val < hl_min) hl_min = val;
if (val > hl_max) hl_max = val;
}
}
// HH subband (diagonal high-pass) - bottom-right of LL region
size_t hh_zeros = 0, hh_ones = 0, hh_pos = 0, hh_neg = 0;
int16_t hh_min = INT16_MAX, hh_max = INT16_MIN;
int hh_x0 = half_w, hh_y0 = half_h;
int hh_x1 = half_w * 2, hh_y1 = half_h * 2;
for (int y = hh_y0; y < hh_y1; y++) {
for (int x = hh_x0; x < hh_x1; x++) {
int16_t val = GET_COEFF(x, y);
if (val == 0) hh_zeros++;
else if (val == 1 || val == -1) hh_ones++;
else if (val > 0) hh_pos++;
else hh_neg++;
if (val < hh_min) hh_min = val;
if (val > hh_max) hh_max = val;
}
}
size_t sub_total = half_w * half_h;
printf("Level %d subbands (%dx%d each):\n", level, half_w, half_h);
printf(" LH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
level, sub_total, lh_zeros, 100.0*lh_zeros/sub_total, lh_ones, 100.0*lh_ones/sub_total,
lh_pos, 100.0*lh_pos/sub_total, lh_neg, 100.0*lh_neg/sub_total, lh_min, lh_max);
printf(" HL%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n",
level, sub_total, hl_zeros, 100.0*hl_zeros/sub_total, hl_ones, 100.0*hl_ones/sub_total,
hl_pos, 100.0*hl_pos/sub_total, hl_neg, 100.0*hl_neg/sub_total, hl_min, hl_max);
printf(" HH%d: Total=%zu, Zeros=%zu (%.1f%%), Ones=%zu (%.1f%%), Pos=%zu (%.1f%%), Neg=%zu (%.1f%%), Range=[%d,%d]\n\n",
level, sub_total, hh_zeros, 100.0*hh_zeros/sub_total, hh_ones, 100.0*hh_ones/sub_total,
hh_pos, 100.0*hh_pos/sub_total, hh_neg, 100.0*hh_neg/sub_total, hh_min, hh_max);
}
#undef GET_COEFF
// Write PPM image
FILE *fp_out = fopen(output_file, "wb");
if (!fp_out) {
printf("Error: Cannot create %s\n", output_file);
free(coeffs);
return 1;
}
// PPM header
fprintf(fp_out, "P6\n%d %d\n255\n", width, height);
// Write pixel data
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
size_t idx = y * width + x;
rgb_t color = map_coefficient_to_color(coeffs[idx]);
fwrite(&color, 3, 1, fp_out);
}
}
fclose(fp_out);
free(coeffs);
printf("\nWrote %dx%d image to %s\n", width, height, output_file);
printf("Color mapping:\n");
printf(" Black: Zero coefficients\n");
printf(" Light Green (#55FF55): +1 coefficients\n");
printf(" Dark Green (#00AA00): -1 coefficients\n");
printf(" Red→Yellow: Positive coefficients > +1 (logarithmic)\n");
printf(" Blue→Cyan: Negative coefficients < -1 (logarithmic)\n");
return 0;
}

View File

@@ -0,0 +1,402 @@
// TAV-DT Noise Injector - Simulates satellite transmission channel noise
// Models QPSK over Ku-band satellite with AWGN and burst interference
// to compile: gcc -O2 -o tavdt_noise_injector tavdt_noise_injector.c -lm
// Created by CuriousTorvald and Claude on 2025-12-14
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
#include <getopt.h>
#include <time.h>
// Buffer size for streaming processing
#define BUFFER_SIZE (1024 * 1024) // 1 MB chunks
// Default TAV-DT bitrate for timing calculations (~2 Mbps)
#define DEFAULT_BITRATE_BPS 2000000.0
// Global bitrate (can be overridden by --bitrate)
static double g_bitrate_bps = DEFAULT_BITRATE_BPS;
// Burst noise parameters
#define BURST_LENGTH_MEAN 100.0
#define BURST_LENGTH_STDDEV 30.0
#define BURST_LENGTH_MIN 10
//=============================================================================
// PRNG Functions (xorshift64)
//=============================================================================
static uint64_t xorshift64(uint64_t *state) {
uint64_t x = *state;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return *state = x;
}
// Returns uniform random in [0, 1)
static double rand_uniform(uint64_t *state) {
return (double)xorshift64(state) / (double)UINT64_MAX;
}
// Box-Muller transform for Gaussian random numbers
static double gaussian_rand(uint64_t *state, double mean, double stddev) {
double u1 = rand_uniform(state);
double u2 = rand_uniform(state);
// Avoid log(0)
if (u1 < 1e-15) u1 = 1e-15;
double z = sqrt(-2.0 * log(u1)) * cos(2.0 * M_PI * u2);
return mean + stddev * z;
}
//=============================================================================
// BER Calculation
//=============================================================================
// Calculate BER from SNR in dB for QPSK modulation
// BER = 0.5 * erfc(sqrt(Eb/N0))
// For QPSK, Eb/N0 = SNR (2 bits per symbol)
static double snr_to_ber(double snr_db) {
double snr_linear = pow(10.0, snr_db / 10.0);
double eb_n0 = snr_linear;
return 0.5 * erfc(sqrt(eb_n0));
}
//=============================================================================
// Burst State Management
//=============================================================================
typedef struct {
double current_time_sec; // Elapsed playback time
double next_burst_time; // When next burst occurs
int burst_bytes_remaining; // Bytes left in current burst (0 = no active burst)
double burst_interval; // Mean interval between bursts (60.0 / bursts_per_minute)
double burst_ber; // BER during burst
int burst_count; // Total bursts applied
int total_burst_bytes; // Total bytes affected by bursts
int verbose; // Verbose output flag
} burst_state_t;
static void burst_state_init(burst_state_t *state, double bursts_per_minute,
double burst_ber, int verbose, uint64_t *seed) {
state->current_time_sec = 0.0;
state->burst_bytes_remaining = 0;
state->burst_ber = burst_ber;
state->burst_count = 0;
state->total_burst_bytes = 0;
state->verbose = verbose;
if (bursts_per_minute > 0) {
state->burst_interval = 60.0 / bursts_per_minute;
// Schedule first burst using exponential distribution
state->next_burst_time = -state->burst_interval * log(rand_uniform(seed));
} else {
state->burst_interval = 0;
state->next_burst_time = 1e30; // Never burst
}
}
static void burst_state_advance_time(burst_state_t *state, double delta_sec, uint64_t *seed) {
double end_time = state->current_time_sec + delta_sec;
// Check if any bursts should occur during this time span
while (state->burst_interval > 0 && state->next_burst_time < end_time) {
// A burst should start during this chunk
if (state->burst_bytes_remaining == 0) {
double length = gaussian_rand(seed, BURST_LENGTH_MEAN, BURST_LENGTH_STDDEV);
state->burst_bytes_remaining = (int)fmax(BURST_LENGTH_MIN, length);
state->burst_count++;
if (state->verbose) {
fprintf(stderr, " [burst] time %.2fs, %d bytes\n",
state->next_burst_time, state->burst_bytes_remaining);
}
}
// Schedule next burst
double wait = -state->burst_interval * log(rand_uniform(seed));
if (wait < 0.001) wait = 0.001; // Minimum 1ms between bursts
state->next_burst_time += wait;
}
state->current_time_sec = end_time;
}
//=============================================================================
// Noise Application Functions
//=============================================================================
// Apply AWGN-based bit errors to buffer
// Returns number of bits flipped
static int apply_background_noise(uint8_t *data, size_t len, double ber, uint64_t *seed) {
int bits_flipped = 0;
// Optimization: if BER is extremely low, use probability-based skipping
if (ber < 1e-10) {
return 0; // Effectively no errors at this BER
}
for (size_t i = 0; i < len; i++) {
for (int bit = 0; bit < 8; bit++) {
if (rand_uniform(seed) < ber) {
data[i] ^= (1 << bit);
bits_flipped++;
}
}
}
return bits_flipped;
}
// Apply burst noise to buffer (checks/updates burst state)
// Returns number of bits flipped
static int apply_burst_noise(uint8_t *data, size_t len, burst_state_t *state, uint64_t *seed) {
int bits_flipped = 0;
if (state->burst_bytes_remaining <= 0) {
return 0;
}
// Apply burst BER to bytes while burst is active
size_t burst_bytes = (size_t)state->burst_bytes_remaining;
if (burst_bytes > len) {
burst_bytes = len;
}
for (size_t i = 0; i < burst_bytes; i++) {
for (int bit = 0; bit < 8; bit++) {
if (rand_uniform(seed) < state->burst_ber) {
data[i] ^= (1 << bit);
bits_flipped++;
}
}
}
state->total_burst_bytes += burst_bytes;
state->burst_bytes_remaining -= burst_bytes;
return bits_flipped;
}
//=============================================================================
// Byte Position to Time Conversion
//=============================================================================
// Convert byte position to approximate playback time based on bitrate
static double bytes_to_time(size_t byte_pos) {
return (double)(byte_pos * 8) / g_bitrate_bps;
}
//=============================================================================
// Main Program
//=============================================================================
static void print_usage(const char *prog) {
fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
fprintf(stderr, "Simulates QPSK satellite transmission channel noise\n\n");
fprintf(stderr, "Usage: %s -i input.tavdt -o output.tavdt --snr N [options]\n\n", prog);
fprintf(stderr, "Required:\n");
fprintf(stderr, " -i, --input FILE Input TAV-DT file\n");
fprintf(stderr, " -o, --output FILE Output corrupted file\n");
fprintf(stderr, " --snr N Signal-to-noise ratio in dB (0-30)\n");
fprintf(stderr, "\nOptional:\n");
fprintf(stderr, " --burst N Burst events per minute (default: 0)\n");
fprintf(stderr, " --burst-ber N BER during burst events (default: 0.5)\n");
fprintf(stderr, " --bitrate N Stream bitrate in Mbps for timing (default: 2.0)\n");
fprintf(stderr, " --seed N RNG seed for reproducibility\n");
fprintf(stderr, " -v, --verbose Show detailed progress\n");
fprintf(stderr, " -h, --help Show this help\n");
fprintf(stderr, "\nSNR Reference:\n");
fprintf(stderr, " 0 dB: Worst case (BER ~7.9e-2, 1 in 13 bits)\n");
fprintf(stderr, " 6 dB: Poor but working (BER ~2.4e-3)\n");
fprintf(stderr, " 9 dB: Typical working (BER ~1.9e-4)\n");
fprintf(stderr, " 12 dB: Good condition (BER ~3.8e-6)\n");
fprintf(stderr, " 30 dB: Near-perfect (BER ~2.9e-16)\n");
}
int main(int argc, char *argv[]) {
const char *input_file = NULL;
const char *output_file = NULL;
double snr_db = -1;
double bursts_per_minute = 0;
double burst_ber = 0.5;
uint64_t seed = 0;
int seed_provided = 0;
int verbose = 0;
static struct option long_options[] = {
{"input", required_argument, 0, 'i'},
{"output", required_argument, 0, 'o'},
{"snr", required_argument, 0, 's'},
{"burst", required_argument, 0, 'b'},
{"burst-ber", required_argument, 0, 'B'},
{"bitrate", required_argument, 0, 'r'},
{"seed", required_argument, 0, 'S'},
{"verbose", no_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
int opt;
while ((opt = getopt_long(argc, argv, "i:o:vh", long_options, NULL)) != -1) {
switch (opt) {
case 'i':
input_file = optarg;
break;
case 'o':
output_file = optarg;
break;
case 's':
snr_db = atof(optarg);
break;
case 'b':
bursts_per_minute = atof(optarg);
break;
case 'B':
burst_ber = atof(optarg);
break;
case 'r':
g_bitrate_bps = atof(optarg) * 1000000.0; // Convert Mbps to bps
break;
case 'S':
seed = strtoull(optarg, NULL, 10);
seed_provided = 1;
break;
case 'v':
verbose = 1;
break;
case 'h':
default:
print_usage(argv[0]);
return opt == 'h' ? 0 : 1;
}
}
// Validate arguments
if (!input_file || !output_file || snr_db < 0) {
fprintf(stderr, "Error: Missing required arguments\n\n");
print_usage(argv[0]);
return 1;
}
if (burst_ber < 0 || burst_ber > 1) {
fprintf(stderr, "Error: --burst-ber must be between 0 and 1\n");
return 1;
}
// Initialize RNG
if (!seed_provided) {
seed = (uint64_t)time(NULL) ^ ((uint64_t)clock() << 32);
}
// Ensure seed is not zero (xorshift64 requirement)
if (seed == 0) seed = 0x853c49e6748fea9bULL;
// Warm up the generator (small seeds produce poor initial values)
for (int i = 0; i < 10; i++) xorshift64(&seed);
// Calculate BER from SNR
double ber = snr_to_ber(snr_db);
// Open files
FILE *in_fp = fopen(input_file, "rb");
if (!in_fp) {
fprintf(stderr, "Error: Cannot open input file: %s\n", input_file);
return 1;
}
FILE *out_fp = fopen(output_file, "wb");
if (!out_fp) {
fprintf(stderr, "Error: Cannot open output file: %s\n", output_file);
fclose(in_fp);
return 1;
}
// Print header info
fprintf(stderr, "TAV-DT Noise Injector v1.0\n");
fprintf(stderr, "Input: %s\n", input_file);
fprintf(stderr, "Output: %s\n", output_file);
fprintf(stderr, "SNR: %.1f dB (BER: %.2e)\n", snr_db, ber);
if (bursts_per_minute > 0) {
fprintf(stderr, "Burst: %.1f events/minute (burst BER: %.2f)\n",
bursts_per_minute, burst_ber);
} else {
fprintf(stderr, "Burst: disabled\n");
}
if (seed_provided) {
fprintf(stderr, "Seed: %llu\n", (unsigned long long)seed);
}
fprintf(stderr, "\n");
// Initialize burst state
burst_state_t burst;
burst_state_init(&burst, bursts_per_minute, burst_ber, verbose, &seed);
// Allocate buffer for streaming processing
uint8_t *buffer = malloc(BUFFER_SIZE);
if (!buffer) {
fprintf(stderr, "Error: Cannot allocate buffer\n");
fclose(in_fp);
fclose(out_fp);
return 1;
}
// Processing statistics
long long total_bytes = 0;
long long bits_flipped_bg = 0;
long long bits_flipped_burst = 0;
int chunk_count = 0;
// Process file in chunks
size_t bytes_read;
while ((bytes_read = fread(buffer, 1, BUFFER_SIZE, in_fp)) > 0) {
// Calculate time delta for this chunk (for burst scheduling)
double delta_sec = bytes_to_time(bytes_read);
burst_state_advance_time(&burst, delta_sec, &seed);
// Apply noise to chunk
bits_flipped_bg += apply_background_noise(buffer, bytes_read, ber, &seed);
bits_flipped_burst += apply_burst_noise(buffer, bytes_read, &burst, &seed);
// Write corrupted chunk
fwrite(buffer, 1, bytes_read, out_fp);
total_bytes += bytes_read;
chunk_count++;
if (verbose && chunk_count % 10 == 0) {
double time_pos = bytes_to_time(total_bytes);
fprintf(stderr, "\rProcessed %.1f MB (%.1f sec)...",
total_bytes / (1024.0 * 1024.0), time_pos);
}
}
if (verbose) {
fprintf(stderr, "\r \r");
}
// Clean up
free(buffer);
fclose(in_fp);
fclose(out_fp);
// Print summary
double duration_sec = bytes_to_time(total_bytes);
long long total_bits = total_bytes * 8;
fprintf(stderr, "Complete.\n");
fprintf(stderr, " Total bytes: %lld (%.1f sec @ ~%.1f Mbps)\n",
total_bytes, duration_sec, g_bitrate_bps / 1000000.0);
fprintf(stderr, " Background bits flipped: %lld (%.4f%%)\n",
bits_flipped_bg, 100.0 * bits_flipped_bg / total_bits);
if (bursts_per_minute > 0) {
fprintf(stderr, " Burst events: %d (%d bytes total)\n",
burst.burst_count, burst.total_burst_bytes);
fprintf(stderr, " Burst bits flipped: %lld\n", bits_flipped_burst);
}
return 0;
}

View File

@@ -0,0 +1,328 @@
// Test mesh warp round-trip consistency
// Warps a frame forward, then backward, and checks if we get the original back
// This is critical for MC-lifting invertibility
#include <opencv2/opencv.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cstdio>
#include <ctime>
// Include the mesh functions from encoder
extern "C" {
void estimate_motion_optical_flow(
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
int width, int height,
float **out_flow_x, float **out_flow_y
);
void build_mesh_from_flow(
const float *flow_x, const float *flow_y,
int width, int height,
int mesh_w, int mesh_h,
int16_t *mesh_dx, int16_t *mesh_dy
);
void smooth_mesh_laplacian(
int16_t *mesh_dx, int16_t *mesh_dy,
int mesh_width, int mesh_height,
float smoothness, int iterations
);
}
// Mesh warp with bilinear interpolation (translation only)
static void apply_mesh_warp_rgb(
const cv::Mat &src,
cv::Mat &dst,
const int16_t *mesh_dx,
const int16_t *mesh_dy,
int mesh_w, int mesh_h
) {
int width = src.cols;
int height = src.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
dst = cv::Mat(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int cell_x = x / cell_w;
int cell_y = y / cell_h;
cell_x = std::min(cell_x, mesh_w - 2);
cell_y = std::min(cell_y, mesh_h - 2);
int idx_00 = cell_y * mesh_w + cell_x;
int idx_10 = idx_00 + 1;
int idx_01 = (cell_y + 1) * mesh_w + cell_x;
int idx_11 = idx_01 + 1;
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
alpha = std::max(0.0f, std::min(1.0f, alpha));
beta = std::max(0.0f, std::min(1.0f, beta));
float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
alpha * beta * (mesh_dx[idx_11] / 8.0f);
float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
alpha * beta * (mesh_dy[idx_11] / 8.0f);
float src_x = x + dx;
float src_y = y + dy;
int sx0 = (int)floorf(src_x);
int sy0 = (int)floorf(src_y);
int sx1 = sx0 + 1;
int sy1 = sy0 + 1;
sx0 = std::max(0, std::min(width - 1, sx0));
sy0 = std::max(0, std::min(height - 1, sy0));
sx1 = std::max(0, std::min(width - 1, sx1));
sy1 = std::max(0, std::min(height - 1, sy1));
float fx = src_x - sx0;
float fy = src_y - sy0;
for (int c = 0; c < 3; c++) {
float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
float val = (1 - fx) * (1 - fy) * val_00 +
fx * (1 - fy) * val_10 +
(1 - fx) * fy * val_01 +
fx * fy * val_11;
dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
}
}
}
}
int main(int argc, char** argv) {
const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
int num_tests = (argc > 2) ? atoi(argv[2]) : 5;
printf("Opening video: %s\n", video_file);
cv::VideoCapture cap(video_file);
if (!cap.isOpened()) {
fprintf(stderr, "Error: Cannot open video file\n");
return 1;
}
int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
printf("Video: %dx%d, %d frames\n", width, height, total_frames);
// Mesh dimensions (32×32 cells)
int mesh_cell_size = 32;
int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
if (mesh_w < 2) mesh_w = 2;
if (mesh_h < 2) mesh_h = 2;
printf("Mesh: %dx%d (approx %dx%d px cells)\n\n",
mesh_w, mesh_h, width / mesh_w, height / mesh_h);
float smoothness = 0.5f;
int smooth_iterations = 8;
srand(time(NULL));
double total_forward_psnr = 0.0;
double total_roundtrip_psnr = 0.0;
double total_half_roundtrip_psnr = 0.0;
for (int test = 0; test < num_tests; test++) {
int frame_num = 5 + rand() % (total_frames - 10);
printf("[Test %d/%d] Frame pair %d → %d\n", test + 1, num_tests, frame_num - 1, frame_num);
cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
cv::Mat frame0, frame1;
cap >> frame0;
cap >> frame1;
if (frame0.empty() || frame1.empty()) {
fprintf(stderr, "Error reading frames\n");
continue;
}
cv::Mat frame0_rgb, frame1_rgb;
cv::cvtColor(frame0, frame0_rgb, cv::COLOR_BGR2RGB);
cv::cvtColor(frame1, frame1_rgb, cv::COLOR_BGR2RGB);
// Compute mesh (F0 → F1)
float *flow_x = nullptr, *flow_y = nullptr;
estimate_motion_optical_flow(frame0_rgb.data, frame1_rgb.data,
width, height, &flow_x, &flow_y);
int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
// Create inverted mesh
int16_t *inv_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *inv_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
for (int i = 0; i < mesh_w * mesh_h; i++) {
inv_mesh_dx[i] = -mesh_dx[i];
inv_mesh_dy[i] = -mesh_dy[i];
}
// Create half-mesh for symmetric lifting test
int16_t *half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *neg_half_mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *neg_half_mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
for (int i = 0; i < mesh_w * mesh_h; i++) {
half_mesh_dx[i] = mesh_dx[i] / 2;
half_mesh_dy[i] = mesh_dy[i] / 2;
neg_half_mesh_dx[i] = -half_mesh_dx[i];
neg_half_mesh_dy[i] = -half_mesh_dy[i];
}
// TEST 1: Full forward warp quality (F0 → F1)
cv::Mat warped_forward;
apply_mesh_warp_rgb(frame0, warped_forward, mesh_dx, mesh_dy, mesh_w, mesh_h);
double forward_mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)warped_forward.at<cv::Vec3b>(y, x)[c] -
(double)frame1.at<cv::Vec3b>(y, x)[c];
forward_mse += diff * diff;
}
}
}
forward_mse /= (width * height * 3);
double forward_psnr = (forward_mse > 0) ? 10.0 * log10(255.0 * 255.0 / forward_mse) : 999.0;
total_forward_psnr += forward_psnr;
// TEST 2: Full round-trip (F0 → forward → backward → F0')
cv::Mat roundtrip;
apply_mesh_warp_rgb(warped_forward, roundtrip, inv_mesh_dx, inv_mesh_dy, mesh_w, mesh_h);
double roundtrip_mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)roundtrip.at<cv::Vec3b>(y, x)[c] -
(double)frame0.at<cv::Vec3b>(y, x)[c];
roundtrip_mse += diff * diff;
}
}
}
roundtrip_mse /= (width * height * 3);
double roundtrip_psnr = (roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / roundtrip_mse) : 999.0;
total_roundtrip_psnr += roundtrip_psnr;
// TEST 3: Half-step symmetric round-trip (MC-lifting style)
// F0 → +½mesh, then → -½mesh (should return to F0)
cv::Mat half_forward, half_roundtrip;
apply_mesh_warp_rgb(frame0, half_forward, half_mesh_dx, half_mesh_dy, mesh_w, mesh_h);
apply_mesh_warp_rgb(half_forward, half_roundtrip, neg_half_mesh_dx, neg_half_mesh_dy, mesh_w, mesh_h);
double half_roundtrip_mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)half_roundtrip.at<cv::Vec3b>(y, x)[c] -
(double)frame0.at<cv::Vec3b>(y, x)[c];
half_roundtrip_mse += diff * diff;
}
}
}
half_roundtrip_mse /= (width * height * 3);
double half_roundtrip_psnr = (half_roundtrip_mse > 0) ? 10.0 * log10(255.0 * 255.0 / half_roundtrip_mse) : 999.0;
total_half_roundtrip_psnr += half_roundtrip_psnr;
printf(" Forward warp (F0→F1): PSNR = %.2f dB\n", forward_psnr);
printf(" Full round-trip (F0→F0'): PSNR = %.2f dB\n", roundtrip_psnr);
printf(" Half round-trip (±½mesh): PSNR = %.2f dB\n", half_roundtrip_psnr);
// Compute motion stats
float avg_motion = 0.0f, max_motion = 0.0f;
for (int i = 0; i < mesh_w * mesh_h; i++) {
float dx = mesh_dx[i] / 8.0f;
float dy = mesh_dy[i] / 8.0f;
float motion = sqrtf(dx * dx + dy * dy);
avg_motion += motion;
if (motion > max_motion) max_motion = motion;
}
avg_motion /= (mesh_w * mesh_h);
printf(" Motion: avg=%.2f px, max=%.2f px\n\n", avg_motion, max_motion);
// Save visualisation for worst case
if (test == 0 || roundtrip_psnr < 30.0) {
char filename[256];
sprintf(filename, "roundtrip_%04d_original.png", frame_num);
cv::imwrite(filename, frame0);
sprintf(filename, "roundtrip_%04d_forward.png", frame_num);
cv::imwrite(filename, warped_forward);
sprintf(filename, "roundtrip_%04d_roundtrip.png", frame_num);
cv::imwrite(filename, roundtrip);
// Difference images
cv::Mat diff_roundtrip = cv::Mat::zeros(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
int diff = abs((int)roundtrip.at<cv::Vec3b>(y, x)[c] -
(int)frame0.at<cv::Vec3b>(y, x)[c]);
diff_roundtrip.at<cv::Vec3b>(y, x)[c] = std::min(diff * 5, 255);
}
}
}
sprintf(filename, "roundtrip_%04d_diff.png", frame_num);
cv::imwrite(filename, diff_roundtrip);
printf(" Saved visualisation: roundtrip_%04d_*.png\n\n", frame_num);
}
free(flow_x);
free(flow_y);
free(mesh_dx);
free(mesh_dy);
free(inv_mesh_dx);
free(inv_mesh_dy);
free(half_mesh_dx);
free(half_mesh_dy);
free(neg_half_mesh_dx);
free(neg_half_mesh_dy);
}
printf("===========================================\n");
printf("Average Results (%d tests):\n", num_tests);
printf(" Forward warp quality: %.2f dB\n", total_forward_psnr / num_tests);
printf(" Full round-trip error: %.2f dB\n", total_roundtrip_psnr / num_tests);
printf(" Half round-trip error: %.2f dB\n", total_half_roundtrip_psnr / num_tests);
printf("===========================================\n\n");
if (total_roundtrip_psnr / num_tests < 35.0) {
printf("WARNING: Round-trip PSNR < 35 dB indicates poor invertibility!\n");
printf("This will cause MC-lifting to accumulate errors and hurt compression.\n");
printf("Bilinear interpolation artifacts are likely the culprit.\n");
} else {
printf("Round-trip consistency looks acceptable (>35 dB).\n");
}
cap.release();
return 0;
}

View File

@@ -0,0 +1,422 @@
// Visual unit test for mesh warping with hierarchical block matching and affine estimation
// Picks 5 random frames from test_video.mp4, warps prev frame to current frame using mesh,
// and saves both warped and target frames for visual comparison
// Now includes: hierarchical diamond search, Laplacian smoothing, and selective affine transforms
#include <opencv2/opencv.hpp>
#include <opencv2/video/tracking.hpp>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cstdio>
#include <ctime>
// Include the mesh functions from encoder
extern "C" {
void estimate_motion_optical_flow(
const unsigned char *frame1_rgb, const unsigned char *frame2_rgb,
int width, int height,
float **out_flow_x, float **out_flow_y
);
void build_mesh_from_flow(
const float *flow_x, const float *flow_y,
int width, int height,
int mesh_w, int mesh_h,
int16_t *mesh_dx, int16_t *mesh_dy
);
void smooth_mesh_laplacian(
int16_t *mesh_dx, int16_t *mesh_dy,
int mesh_width, int mesh_height,
float smoothness, int iterations
);
int estimate_cell_affine(
const float *flow_x, const float *flow_y,
int width, int height,
int cell_x, int cell_y,
int cell_w, int cell_h,
float threshold,
int16_t *out_tx, int16_t *out_ty,
int16_t *out_a11, int16_t *out_a12,
int16_t *out_a21, int16_t *out_a22
);
}
// Mesh warp with bilinear interpolation and optional affine support
static void apply_mesh_warp_rgb(
const cv::Mat &src, // Input BGR image
cv::Mat &dst, // Output warped BGR image
const int16_t *mesh_dx, // Mesh motion vectors (1/8 pixel)
const int16_t *mesh_dy,
const uint8_t *affine_mask, // 1=affine, 0=translation
const int16_t *affine_a11,
const int16_t *affine_a12,
const int16_t *affine_a21,
const int16_t *affine_a22,
int mesh_w, int mesh_h
) {
int width = src.cols;
int height = src.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
dst = cv::Mat(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int cell_x = x / cell_w;
int cell_y = y / cell_h;
// Clamp to valid mesh range
cell_x = std::min(cell_x, mesh_w - 2);
cell_y = std::min(cell_y, mesh_h - 2);
// Four corner control points
int idx_00 = cell_y * mesh_w + cell_x;
int idx_10 = idx_00 + 1;
int idx_01 = (cell_y + 1) * mesh_w + cell_x;
int idx_11 = idx_01 + 1;
// Control point positions
float cp_x0 = cell_x * cell_w + cell_w / 2.0f;
float cp_y0 = cell_y * cell_h + cell_h / 2.0f;
float cp_x1 = (cell_x + 1) * cell_w + cell_w / 2.0f;
float cp_y1 = (cell_y + 1) * cell_h + cell_h / 2.0f;
// Local coordinates
float alpha = (x - cp_x0) / (cp_x1 - cp_x0);
float beta = (y - cp_y0) / (cp_y1 - cp_y0);
alpha = std::max(0.0f, std::min(1.0f, alpha));
beta = std::max(0.0f, std::min(1.0f, beta));
// Bilinear interpolation of motion vectors
float dx = (1 - alpha) * (1 - beta) * (mesh_dx[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dx[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dx[idx_01] / 8.0f) +
alpha * beta * (mesh_dx[idx_11] / 8.0f);
float dy = (1 - alpha) * (1 - beta) * (mesh_dy[idx_00] / 8.0f) +
alpha * (1 - beta) * (mesh_dy[idx_10] / 8.0f) +
(1 - alpha) * beta * (mesh_dy[idx_01] / 8.0f) +
alpha * beta * (mesh_dy[idx_11] / 8.0f);
// Check if we're using affine in this cell
// For simplicity, just use the top-left corner's affine parameters
int cell_idx = cell_y * mesh_w + cell_x;
if (affine_mask && affine_mask[cell_idx]) {
// Apply affine transform
// Compute position relative to cell center
float rel_x = x - (cell_x * cell_w + cell_w / 2.0f);
float rel_y = y - (cell_y * cell_h + cell_h / 2.0f);
float a11 = affine_a11[cell_idx] / 256.0f;
float a12 = affine_a12[cell_idx] / 256.0f;
float a21 = affine_a21[cell_idx] / 256.0f;
float a22 = affine_a22[cell_idx] / 256.0f;
// Affine warp: [x'] = [a11 a12][x] + [dx]
// [y'] [a21 a22][y] [dy]
dx = a11 * rel_x + a12 * rel_y + dx;
dy = a21 * rel_x + a22 * rel_y + dy;
}
// Source coordinates (inverse warp)
float src_x = x + dx;
float src_y = y + dy;
// Bilinear interpolation
int sx0 = (int)floorf(src_x);
int sy0 = (int)floorf(src_y);
int sx1 = sx0 + 1;
int sy1 = sy0 + 1;
sx0 = std::max(0, std::min(width - 1, sx0));
sy0 = std::max(0, std::min(height - 1, sy0));
sx1 = std::max(0, std::min(width - 1, sx1));
sy1 = std::max(0, std::min(height - 1, sy1));
float fx = src_x - sx0;
float fy = src_y - sy0;
// Interpolate each channel
for (int c = 0; c < 3; c++) {
float val_00 = src.at<cv::Vec3b>(sy0, sx0)[c];
float val_10 = src.at<cv::Vec3b>(sy0, sx1)[c];
float val_01 = src.at<cv::Vec3b>(sy1, sx0)[c];
float val_11 = src.at<cv::Vec3b>(sy1, sx1)[c];
float val = (1 - fx) * (1 - fy) * val_00 +
fx * (1 - fy) * val_10 +
(1 - fx) * fy * val_01 +
fx * fy * val_11;
dst.at<cv::Vec3b>(y, x)[c] = (unsigned char)std::max(0.0f, std::min(255.0f, val));
}
}
}
}
// Create visualisation overlay showing affine cells
static void create_affine_overlay(
cv::Mat &img,
const uint8_t *affine_mask,
int mesh_w, int mesh_h
) {
int width = img.cols;
int height = img.rows;
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
for (int my = 0; my < mesh_h; my++) {
for (int mx = 0; mx < mesh_w; mx++) {
int idx = my * mesh_w + mx;
if (affine_mask[idx]) {
// Draw green rectangle for affine cells
int x0 = mx * cell_w;
int y0 = my * cell_h;
int x1 = (mx + 1) * cell_w;
int y1 = (my + 1) * cell_h;
cv::rectangle(img,
cv::Point(x0, y0),
cv::Point(x1, y1),
cv::Scalar(0, 255, 0), 1);
}
}
}
}
int main(int argc, char** argv) {
const char* video_file = (argc > 1) ? argv[1] : "test_video.mp4";
int num_test_frames = (argc > 2) ? atoi(argv[2]) : 5;
printf("Opening video: %s\n", video_file);
cv::VideoCapture cap(video_file);
if (!cap.isOpened()) {
fprintf(stderr, "Error: Cannot open video file %s\n", video_file);
return 1;
}
int total_frames = (int)cap.get(cv::CAP_PROP_FRAME_COUNT);
int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
printf("Video: %dx%d, %d frames\n", width, height, total_frames);
if (total_frames < 10) {
fprintf(stderr, "Error: Video too short (need at least 10 frames)\n");
return 1;
}
// Calculate mesh dimensions (32×32 pixel cells, matches current encoder)
int mesh_cell_size = 32;
int mesh_w = (width + mesh_cell_size - 1) / mesh_cell_size;
int mesh_h = (height + mesh_cell_size - 1) / mesh_cell_size;
if (mesh_w < 2) mesh_w = 2;
if (mesh_h < 2) mesh_h = 2;
printf("Mesh: %dx%d (approx %dx%d px cells)\n",
mesh_w, mesh_h, width / mesh_w, height / mesh_h);
// Encoder parameters (match current encoder_tav.c settings)
float smoothness = 0.5f; // Mesh smoothness weight
int smooth_iterations = 8; // Smoothing iterations
float affine_threshold = 0.40f; // 40% improvement required for affine
printf("Settings: smoothness=%.2f, iterations=%d, affine_threshold=%.0f%%\n",
smoothness, smooth_iterations, affine_threshold * 100.0f);
// Seed random number generator
srand(time(NULL));
// Pick random frames (avoid first and last 5 frames)
printf("\nTesting %d random frame pairs:\n", num_test_frames);
for (int test = 0; test < num_test_frames; test++) {
// Pick random frame (ensure we have a previous frame)
int frame_num = 5 + rand() % (total_frames - 10);
printf("\n[Test %d/%d] Warping frame %d → frame %d (inverse warp)\n",
test + 1, num_test_frames, frame_num - 1, frame_num);
// Read previous frame (source for warping)
cap.set(cv::CAP_PROP_POS_FRAMES, frame_num - 1);
cv::Mat prev_frame;
cap >> prev_frame;
if (prev_frame.empty()) {
fprintf(stderr, "Error reading frame %d\n", frame_num - 1);
continue;
}
// Read current frame (target to match)
cv::Mat curr_frame;
cap >> curr_frame;
if (curr_frame.empty()) {
fprintf(stderr, "Error reading frame %d\n", frame_num);
continue;
}
// Convert to RGB for block matching
cv::Mat prev_rgb, curr_rgb;
cv::cvtColor(prev_frame, prev_rgb, cv::COLOR_BGR2RGB);
cv::cvtColor(curr_frame, curr_rgb, cv::COLOR_BGR2RGB);
// Compute hierarchical block matching (replaces optical flow)
printf(" Computing hierarchical block matching...\n");
float *flow_x = nullptr, *flow_y = nullptr;
estimate_motion_optical_flow(
prev_rgb.data, curr_rgb.data,
width, height,
&flow_x, &flow_y
);
// Build mesh from flow
printf(" Building mesh from block matches...\n");
int16_t *mesh_dx = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *mesh_dy = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
build_mesh_from_flow(flow_x, flow_y, width, height, mesh_w, mesh_h, mesh_dx, mesh_dy);
// Apply Laplacian smoothing
printf(" Applying Laplacian smoothing (%d iterations, %.2f weight)...\n",
smooth_iterations, smoothness);
smooth_mesh_laplacian(mesh_dx, mesh_dy, mesh_w, mesh_h, smoothness, smooth_iterations);
// Estimate selective per-cell affine transforms
printf(" Estimating selective affine transforms (threshold=%.0f%%)...\n",
affine_threshold * 100.0f);
uint8_t *affine_mask = (uint8_t*)calloc(mesh_w * mesh_h, sizeof(uint8_t));
int16_t *affine_a11 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a12 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a21 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int16_t *affine_a22 = (int16_t*)malloc(mesh_w * mesh_h * sizeof(int16_t));
int cell_w = width / mesh_w;
int cell_h = height / mesh_h;
int affine_count = 0;
for (int cy = 0; cy < mesh_h; cy++) {
for (int cx = 0; cx < mesh_w; cx++) {
int cell_idx = cy * mesh_w + cx;
int16_t tx, ty, a11, a12, a21, a22;
int use_affine = estimate_cell_affine(
flow_x, flow_y,
width, height,
cx, cy, cell_w, cell_h,
affine_threshold,
&tx, &ty, &a11, &a12, &a21, &a22
);
affine_mask[cell_idx] = use_affine ? 1 : 0;
mesh_dx[cell_idx] = tx;
mesh_dy[cell_idx] = ty;
affine_a11[cell_idx] = a11;
affine_a12[cell_idx] = a12;
affine_a21[cell_idx] = a21;
affine_a22[cell_idx] = a22;
if (use_affine) affine_count++;
}
}
printf(" Affine usage: %d/%d cells (%.1f%%)\n",
affine_count, mesh_w * mesh_h,
100.0f * affine_count / (mesh_w * mesh_h));
// Warp previous frame to current frame
printf(" Warping frame with mesh + affine...\n");
cv::Mat warped;
apply_mesh_warp_rgb(prev_frame, warped, mesh_dx, mesh_dy,
affine_mask, affine_a11, affine_a12, affine_a21, affine_a22,
mesh_w, mesh_h);
// Create visualisation with affine overlay
cv::Mat warped_viz = warped.clone();
create_affine_overlay(warped_viz, affine_mask, mesh_w, mesh_h);
// Compute MSE between warped and target
double mse = 0.0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
double diff = (double)warped.at<cv::Vec3b>(y, x)[c] -
(double)curr_frame.at<cv::Vec3b>(y, x)[c];
mse += diff * diff;
}
}
}
mse /= (width * height * 3);
double psnr = (mse > 0) ? 10.0 * log10(255.0 * 255.0 / mse) : 999.0;
printf(" Warp quality: MSE=%.2f, PSNR=%.2f dB\n", mse, psnr);
// Save images
char filename[256];
sprintf(filename, "test_mesh_frame_%04d_source.png", frame_num - 1);
cv::imwrite(filename, prev_frame);
printf(" Saved source: %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_warped.png", frame_num);
cv::imwrite(filename, warped);
printf(" Saved warped: %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_warped_viz.png", frame_num);
cv::imwrite(filename, warped_viz);
printf(" Saved warped+viz (green=affine): %s\n", filename);
sprintf(filename, "test_mesh_frame_%04d_target.png", frame_num);
cv::imwrite(filename, curr_frame);
printf(" Saved target: %s\n", filename);
// Compute difference image
cv::Mat diff_img = cv::Mat::zeros(height, width, CV_8UC3);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
for (int c = 0; c < 3; c++) {
int diff = abs((int)warped.at<cv::Vec3b>(y, x)[c] -
(int)curr_frame.at<cv::Vec3b>(y, x)[c]);
diff_img.at<cv::Vec3b>(y, x)[c] = std::min(diff * 3, 255); // Amplify for visibility
}
}
}
sprintf(filename, "test_mesh_frame_%04d_diff.png", frame_num);
cv::imwrite(filename, diff_img);
printf(" Saved difference (amplified 3x): %s\n", filename);
// Compute motion statistics
float max_motion = 0.0f, avg_motion = 0.0f;
for (int i = 0; i < mesh_w * mesh_h; i++) {
float dx = mesh_dx[i] / 8.0f;
float dy = mesh_dy[i] / 8.0f;
float motion = sqrtf(dx * dx + dy * dy);
avg_motion += motion;
if (motion > max_motion) max_motion = motion;
}
avg_motion /= (mesh_w * mesh_h);
printf(" Motion: avg=%.2f px, max=%.2f px\n", avg_motion, max_motion);
// Cleanup
free(flow_x);
free(flow_y);
free(mesh_dx);
free(mesh_dy);
free(affine_mask);
free(affine_a11);
free(affine_a12);
free(affine_a21);
free(affine_a22);
}
printf("\nDone! Check output images:\n");
printf(" *_source.png: Original frame before warping\n");
printf(" *_warped.png: Warped frame (should match target)\n");
printf(" *_warped_viz.png: Warped with green overlay showing affine cells\n");
printf(" *_target.png: Target frame to match\n");
printf(" *_diff.png: Difference image (should be mostly black if warp is good)\n");
cap.release();
return 0;
}

View File

@@ -1,200 +0,0 @@
// XYB Color Space Conversion Functions for TEV
// Based on JPEG XL XYB specification with proper sRGB linearization
// test with:
//// gcc -DXYB_TEST_MAIN -o test_xyb xyb_conversion.c -lm && ./test_xyb
#include <stdio.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#define CLAMP(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x)))
// XYB conversion constants from JPEG XL specification
static const double XYB_BIAS = 0.00379307325527544933;
static const double CBRT_BIAS = 0.155954200549248620; // cbrt(XYB_BIAS)
// RGB to LMS mixing coefficients
static const double RGB_TO_LMS[3][3] = {
{0.3, 0.622, 0.078}, // L coefficients
{0.23, 0.692, 0.078}, // M coefficients
{0.24342268924547819, 0.20476744424496821, 0.55180986650955360} // S coefficients
};
// LMS to RGB inverse matrix (calculated via matrix inversion)
static const double LMS_TO_RGB[3][3] = {
{11.0315669046, -9.8669439081, -0.1646229965},
{-3.2541473811, 4.4187703776, -0.1646229965},
{-3.6588512867, 2.7129230459, 1.9459282408}
};
// sRGB linearization (0..1 range)
static inline double srgb_linearize(double val) {
if (val > 0.04045) {
return pow((val + 0.055) / 1.055, 2.4);
} else {
return val / 12.92;
}
}
// sRGB unlinearization (0..1 range)
static inline double srgb_unlinearize(double val) {
if (val > 0.0031308) {
return 1.055 * pow(val, 1.0 / 2.4) - 0.055;
} else {
return val * 12.92;
}
}
// Fast cube root approximation for performance
static inline double fast_cbrt(double x) {
if (x < 0) return -cbrt(-x);
return cbrt(x);
}
// RGB to XYB conversion with proper sRGB linearization
void rgb_to_xyb(uint8_t r, uint8_t g, uint8_t b, double *x, double *y, double *xyb_b) {
// Convert RGB to 0-1 range and linearize sRGB
double r_norm = srgb_linearize(r / 255.0);
double g_norm = srgb_linearize(g / 255.0);
double b_norm = srgb_linearize(b / 255.0);
// RGB to LMS mixing with bias
double lmix = RGB_TO_LMS[0][0] * r_norm + RGB_TO_LMS[0][1] * g_norm + RGB_TO_LMS[0][2] * b_norm + XYB_BIAS;
double mmix = RGB_TO_LMS[1][0] * r_norm + RGB_TO_LMS[1][1] * g_norm + RGB_TO_LMS[1][2] * b_norm + XYB_BIAS;
double smix = RGB_TO_LMS[2][0] * r_norm + RGB_TO_LMS[2][1] * g_norm + RGB_TO_LMS[2][2] * b_norm + XYB_BIAS;
// Apply gamma correction (cube root)
double lgamma = fast_cbrt(lmix) - CBRT_BIAS;
double mgamma = fast_cbrt(mmix) - CBRT_BIAS;
double sgamma = fast_cbrt(smix) - CBRT_BIAS;
// LMS to XYB transformation
*x = (lgamma - mgamma) / 2.0;
*y = (lgamma + mgamma) / 2.0;
*xyb_b = sgamma;
}
// XYB to RGB conversion with proper sRGB unlinearization
void xyb_to_rgb(double x, double y, double xyb_b, uint8_t *r, uint8_t *g, uint8_t *b) {
// XYB to LMS gamma
double lgamma = x + y;
double mgamma = y - x;
double sgamma = xyb_b;
// Remove gamma correction
double lmix = pow(lgamma + CBRT_BIAS, 3.0) - XYB_BIAS;
double mmix = pow(mgamma + CBRT_BIAS, 3.0) - XYB_BIAS;
double smix = pow(sgamma + CBRT_BIAS, 3.0) - XYB_BIAS;
// LMS to linear RGB using inverse matrix
double r_linear = LMS_TO_RGB[0][0] * lmix + LMS_TO_RGB[0][1] * mmix + LMS_TO_RGB[0][2] * smix;
double g_linear = LMS_TO_RGB[1][0] * lmix + LMS_TO_RGB[1][1] * mmix + LMS_TO_RGB[1][2] * smix;
double b_linear = LMS_TO_RGB[2][0] * lmix + LMS_TO_RGB[2][1] * mmix + LMS_TO_RGB[2][2] * smix;
// Clamp linear RGB to valid range
r_linear = CLAMP(r_linear, 0.0, 1.0);
g_linear = CLAMP(g_linear, 0.0, 1.0);
b_linear = CLAMP(b_linear, 0.0, 1.0);
// Convert back to sRGB gamma and 0-255 range
*r = CLAMP((int)(srgb_unlinearize(r_linear) * 255.0 + 0.5), 0, 255);
*g = CLAMP((int)(srgb_unlinearize(g_linear) * 255.0 + 0.5), 0, 255);
*b = CLAMP((int)(srgb_unlinearize(b_linear) * 255.0 + 0.5), 0, 255);
}
// Convert RGB to XYB with integer quantization suitable for TEV format
void rgb_to_xyb_quantized(uint8_t r, uint8_t g, uint8_t b, int *x_quant, int *y_quant, int *b_quant) {
double x, y, xyb_b;
rgb_to_xyb(r, g, b, &x, &y, &xyb_b);
// Quantize to suitable integer ranges for TEV
// Y channel: 0-255 (similar to current Y in YCoCg)
*y_quant = CLAMP((int)(y * 255.0 + 128.0), 0, 255);
// X channel: -128 to +127 (similar to Co range)
*x_quant = CLAMP((int)(x * 255.0), -128, 127);
// B channel: -128 to +127 (similar to Cg, can be aggressively quantized)
*b_quant = CLAMP((int)(xyb_b * 255.0), -128, 127);
}
// Test function to verify conversion accuracy
int test_xyb_conversion() {
printf("Testing XYB conversion accuracy with sRGB linearization...\n");
// Test with various RGB values
uint8_t test_colors[][3] = {
{255, 0, 0}, // Red
{0, 255, 0}, // Green
{0, 0, 255}, // Blue
{255, 255, 255}, // White
{0, 0, 0}, // Black
{128, 128, 128}, // Gray
{255, 255, 0}, // Yellow
{255, 0, 255}, // Magenta
{0, 255, 255}, // Cyan
// MacBeth chart colours converted to sRGB
{0x73,0x52,0x44},
{0xc2,0x96,0x82},
{0x62,0x7a,0x9d},
{0x57,0x6c,0x43},
{0x85,0x80,0xb1},
{0x67,0xbd,0xaa},
{0xd6,0x7e,0x2c},
{0x50,0x5b,0xa6},
{0xc1,0x5a,0x63},
{0x5e,0x3c,0x6c},
{0x9d,0xbc,0x40},
{0xe0,0xa3,0x2e},
{0x38,0x3d,0x96},
{0x46,0x94,0x49},
{0xaf,0x36,0x3c},
{0xe7,0xc7,0x1f},
{0xbb,0x56,0x95},
{0x08,0x85,0xa1},
{0xf3,0xf3,0xf3},
{0xc8,0xc8,0xc8},
{0xa0,0xa0,0xa0},
{0x7a,0x7a,0x7a},
{0x55,0x55,0x55},
{0x34,0x34,0x34}
};
int num_tests = sizeof(test_colors) / sizeof(test_colors[0]);
int errors = 0;
for (int i = 0; i < num_tests; i++) {
uint8_t r_orig = test_colors[i][0];
uint8_t g_orig = test_colors[i][1];
uint8_t b_orig = test_colors[i][2];
double x, y, xyb_b;
uint8_t r_conv, g_conv, b_conv;
// Forward and reverse conversion
rgb_to_xyb(r_orig, g_orig, b_orig, &x, &y, &xyb_b);
xyb_to_rgb(x, y, xyb_b, &r_conv, &g_conv, &b_conv);
// Check accuracy (allow small rounding errors)
int r_error = abs((int)r_orig - (int)r_conv);
int g_error = abs((int)g_orig - (int)g_conv);
int b_error = abs((int)b_orig - (int)b_conv);
printf("RGB(%3d,%3d,%3d) -> XYB(%6.3f,%6.3f,%6.3f) -> RGB(%3d,%3d,%3d) [Error: %d,%d,%d]\n",
r_orig, g_orig, b_orig, x, y, xyb_b, r_conv, g_conv, b_conv, r_error, g_error, b_error);
if (r_error > 2 || g_error > 2 || b_error > 2) {
errors++;
}
}
printf("Test completed: %d/%d passed\n", num_tests - errors, num_tests);
return errors == 0;
}
#ifdef XYB_TEST_MAIN
int main() {
return test_xyb_conversion() ? 0 : 1;
}
#endif