mirror of
https://github.com/curioustorvald/Terrarum-sans-bitmap.git
synced 2026-03-07 11:51:50 +09:00
typesetter wip
This commit is contained in:
@@ -349,72 +349,140 @@ class MovableType(
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun CodepointSequence.tokenise(): MutableList<CodepointSequence> {
|
/**
|
||||||
val tokens = mutableListOf<CodepointSequence>()
|
* This function will tokenise input string into a list of boxes.
|
||||||
var currentToken = mutableListOf<Int>()
|
*
|
||||||
|
* Each element in the outer list is a single line of the text. The line can be empty.
|
||||||
|
*
|
||||||
|
* Inner list (ArrayList) contains the boxes for the single line.
|
||||||
|
*/
|
||||||
|
private fun CodepointSequence.tokenise(): List<ArrayList<CodepointSequence>> {
|
||||||
|
val lines = ArrayList<ArrayList<CodepointSequence>>()
|
||||||
|
var tokens = ArrayList<CodepointSequence>()
|
||||||
|
var boxBuffer = mutableListOf<Int>()
|
||||||
|
|
||||||
val controlCharStack = ArrayList<CodePoint>()
|
val controlCharStack = ArrayList<CodePoint>()
|
||||||
var colourCode: CodePoint? = null
|
var colourCode: CodePoint? = null
|
||||||
var colourCodeRemovalRequested = false
|
var colourCodeRemovalRequested = false
|
||||||
|
|
||||||
|
var cM: Int? = null
|
||||||
|
var glue = 0
|
||||||
|
|
||||||
fun getControlHeader() = if (colourCode != null)
|
fun getControlHeader() = if (colourCode != null)
|
||||||
CodepointSequence(controlCharStack.reversed() + colourCode)
|
CodepointSequence(controlCharStack.reversed() + colourCode)
|
||||||
else
|
else
|
||||||
CodepointSequence(controlCharStack.reversed())
|
CodepointSequence(controlCharStack.reversed())
|
||||||
|
|
||||||
fun submitBlock(c: CodepointSequence) {
|
|
||||||
tokens.add(CodepointSequence(getControlHeader() + c))
|
|
||||||
|
fun sendoutBox() {
|
||||||
|
tokens.add(CodepointSequence(getControlHeader() + boxBuffer))
|
||||||
|
|
||||||
if (colourCodeRemovalRequested) {
|
if (colourCodeRemovalRequested) {
|
||||||
colourCodeRemovalRequested = false
|
colourCodeRemovalRequested = false
|
||||||
colourCode = null
|
colourCode = null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boxBuffer = mutableListOf()
|
||||||
}
|
}
|
||||||
|
|
||||||
fun appendToWord(char: CodePoint) {
|
fun sendoutGlue() {
|
||||||
currentToken.add(char)
|
if (glue == 0)
|
||||||
|
tokens.add(CodepointSequence(ZWSP))
|
||||||
|
else if (glue.absoluteValue <= 16)
|
||||||
|
if (glue > 0)
|
||||||
|
tokens.add(CodepointSequence(GLUE_POSITIVE_ONE + (glue - 1)))
|
||||||
|
else
|
||||||
|
tokens.add(CodepointSequence(GLUE_NEGATIVE_ONE + (glue.absoluteValue - 1)))
|
||||||
|
else
|
||||||
|
throw IllegalStateException("Glue too large ($glue)")
|
||||||
|
|
||||||
|
glue = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
this.forEach {
|
fun appendToBuffer(char: CodePoint) {
|
||||||
if (it == 0x20 || it == 0x0A) {
|
boxBuffer.add(char)
|
||||||
submitBlock(CodepointSequence(currentToken))
|
}
|
||||||
if (it != 0x20)
|
|
||||||
submitBlock(CodepointSequence(listOf(it)))
|
fun appendGlue(char: CodePoint) {
|
||||||
currentToken = mutableListOf()
|
glue += whitespaceGlues[char]!!
|
||||||
|
}
|
||||||
|
|
||||||
|
fun proceedToNextLine() {
|
||||||
|
lines.add(tokens)
|
||||||
|
tokens = ArrayList<CodepointSequence>()
|
||||||
|
}
|
||||||
|
|
||||||
|
this.forEachIndexed { index, it ->
|
||||||
|
val c0 = it
|
||||||
|
|
||||||
|
if (c0.isColourCode()) {
|
||||||
|
colourCode = c0
|
||||||
|
appendToBuffer(c0)
|
||||||
}
|
}
|
||||||
else if (it.isCJ()) {
|
else if (c0 == 0x100000) {
|
||||||
// flush out existing buffer
|
|
||||||
CodepointSequence(currentToken).let {
|
|
||||||
if (it.isNotEmpty()) submitBlock(it)
|
|
||||||
}
|
|
||||||
// tokenise this single character
|
|
||||||
submitBlock(CodepointSequence(listOf(it)))
|
|
||||||
// prepare new buffer, even if it's wasted because next character is also Chinese/Japanese
|
|
||||||
currentToken = mutableListOf()
|
|
||||||
}
|
|
||||||
else if (it.isColourCode()) {
|
|
||||||
colourCode = it
|
|
||||||
appendToWord(it)
|
|
||||||
}
|
|
||||||
else if (it == 0x100000) {
|
|
||||||
colourCodeRemovalRequested = true
|
colourCodeRemovalRequested = true
|
||||||
appendToWord(it)
|
appendToBuffer(c0)
|
||||||
}
|
}
|
||||||
else if (it.isControlIn()) {
|
else if (c0.isControlIn()) {
|
||||||
controlCharStack.add(0, it)
|
controlCharStack.add(0, c0)
|
||||||
}
|
}
|
||||||
else if (it.isControlOut()) {
|
else if (c0.isControlOut()) {
|
||||||
controlCharStack.removeAt(0)
|
controlCharStack.removeAt(0)
|
||||||
}
|
}
|
||||||
else {
|
else if (c0 == 0x0A) {
|
||||||
appendToWord(it)
|
sendoutBox()
|
||||||
|
proceedToNextLine()
|
||||||
}
|
}
|
||||||
|
else if (c0.isCJ()) {
|
||||||
|
if (cM.isWhiteSpace()) {
|
||||||
|
sendoutGlue()
|
||||||
|
}
|
||||||
|
else if (cM.isCJpunct()) {
|
||||||
|
appendGlue(cM!!) // will append 0 to the glue
|
||||||
|
sendoutGlue()
|
||||||
|
}
|
||||||
|
else { // includes if cM.isCJ()
|
||||||
|
sendoutBox()
|
||||||
|
}
|
||||||
|
|
||||||
|
appendToBuffer(c0)
|
||||||
|
}
|
||||||
|
else if (c0.isWhiteSpace()) {
|
||||||
|
if (cM.isWhiteSpace() && cM != null)
|
||||||
|
sendoutBox()
|
||||||
|
|
||||||
|
appendGlue(c0)
|
||||||
|
}
|
||||||
|
else if (c0.isCJpunct()) {
|
||||||
|
if (cM.isWhiteSpace())
|
||||||
|
sendoutGlue()
|
||||||
|
|
||||||
|
appendToBuffer(c0)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (cM.isCJ()) {
|
||||||
|
sendoutBox()
|
||||||
|
}
|
||||||
|
else if (cM.isWhiteSpace()) {
|
||||||
|
sendoutGlue()
|
||||||
|
}
|
||||||
|
else if (cM.isCJpunct()) {
|
||||||
|
appendGlue(cM!!) // will append 0 to the glue
|
||||||
|
sendoutGlue()
|
||||||
|
}
|
||||||
|
|
||||||
|
appendToBuffer(c0)
|
||||||
|
}
|
||||||
|
|
||||||
|
cM = it
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the last token if it's not empty
|
// Add the last token if it's not empty
|
||||||
submitBlock(CodepointSequence(currentToken))
|
sendoutBox()
|
||||||
|
proceedToNextLine()
|
||||||
|
|
||||||
return tokens
|
return lines
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun <E> java.util.ArrayList<E>.penultimate(): E {
|
private fun <E> java.util.ArrayList<E>.penultimate(): E {
|
||||||
@@ -425,13 +493,17 @@ class MovableType(
|
|||||||
|
|
||||||
private fun penaliseHyphenation(score: Int): Float = (10.0 * pow(score.toDouble(), 1.0/3.0) + 0.47*score).toFloat()
|
private fun penaliseHyphenation(score: Int): Float = (10.0 * pow(score.toDouble(), 1.0/3.0) + 0.47*score).toFloat()
|
||||||
|
|
||||||
private fun CodePoint.isCJ() = listOf(4, 6).any {
|
private fun CodePoint?.isCJ() = listOf(4, 6).any {
|
||||||
TerrarumSansBitmap.codeRange[it].contains(this)
|
TerrarumSansBitmap.codeRange[it].contains(this)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun CodePoint.isControlIn() = controlIns.contains(this)
|
private fun CodePoint?.isWhiteSpace() = whitespaceGlues.contains(this)
|
||||||
private fun CodePoint.isControlOut() = controlOuts.contains(this)
|
|
||||||
private fun CodePoint.isColourCode() = colourCodes.contains(this)
|
private fun CodePoint?.isCJpunct() = cjpuncts.contains(this)
|
||||||
|
|
||||||
|
private fun CodePoint?.isControlIn() = controlIns.contains(this)
|
||||||
|
private fun CodePoint?.isControlOut() = controlOuts.contains(this)
|
||||||
|
private fun CodePoint?.isColourCode() = colourCodes.contains(this)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hyphenates the word at the middle ("paragraph" -> "para-graph")
|
* Hyphenates the word at the middle ("paragraph" -> "para-graph")
|
||||||
@@ -517,6 +589,20 @@ class MovableType(
|
|||||||
private val colourCodes = (0x10F000..0x10FFFF).toSortedSet()
|
private val colourCodes = (0x10F000..0x10FFFF).toSortedSet()
|
||||||
private val controlIns = listOf(0xFFFA2, 0xFFFA3, 0xFFFC1, 0xFFFC2).toSortedSet()
|
private val controlIns = listOf(0xFFFA2, 0xFFFA3, 0xFFFC1, 0xFFFC2).toSortedSet()
|
||||||
private val controlOuts = listOf(0xFFFBF, 0xFFFC0).toSortedSet()
|
private val controlOuts = listOf(0xFFFBF, 0xFFFC0).toSortedSet()
|
||||||
|
private val whitespaceGlues = hashMapOf(
|
||||||
|
0x20 to 5,
|
||||||
|
0x3000 to 16,
|
||||||
|
// cjpuncts
|
||||||
|
0x3001 to 0,
|
||||||
|
0x3002 to 0,
|
||||||
|
0xff0c to 0,
|
||||||
|
0xff0e to 0,
|
||||||
|
)
|
||||||
|
private val cjpuncts = listOf(0x3001, 0x3002, 0xff0c, 0xff0e).toSortedSet()
|
||||||
|
|
||||||
|
private val ZWSP = 0x200B
|
||||||
|
private val GLUE_POSITIVE_ONE = 0xFFFF0
|
||||||
|
private val GLUE_NEGATIVE_ONE = 0xFFFE0
|
||||||
|
|
||||||
private fun CodepointSequence.toReadable() = this.joinToString("") { Character.toString(it.toChar()) }
|
private fun CodepointSequence.toReadable() = this.joinToString("") { Character.toString(it.toChar()) }
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user