mirror of
https://github.com/curioustorvald/Terrarum-sans-bitmap.git
synced 2026-03-07 20:01:52 +09:00
tokeniser: japanese small kana coalesced
This commit is contained in:
@@ -17,19 +17,19 @@ import kotlin.math.*
|
|||||||
class MovableType(
|
class MovableType(
|
||||||
val font: TerrarumSansBitmap,
|
val font: TerrarumSansBitmap,
|
||||||
val inputText: CodepointSequence,
|
val inputText: CodepointSequence,
|
||||||
val width: Int,
|
val paperWidth: Int,
|
||||||
internal val isNull: Boolean = false
|
internal val isNull: Boolean = false
|
||||||
): Disposable {
|
): Disposable {
|
||||||
|
|
||||||
var height = 0; private set
|
var height = 0; private set
|
||||||
internal val hash: Long = inputText.getHash()
|
internal val hash: Long = inputText.getHash()
|
||||||
private var disposed = false
|
private var disposed = false
|
||||||
private val lines = ArrayList<List<Block>>()
|
private val typesettedSlugs = ArrayList<List<Block>>()
|
||||||
|
|
||||||
override fun dispose() {
|
override fun dispose() {
|
||||||
if (!disposed) {
|
if (!disposed) {
|
||||||
disposed = true
|
disposed = true
|
||||||
lines.forEach {
|
typesettedSlugs.forEach {
|
||||||
it.forEach {
|
it.forEach {
|
||||||
it.block.dispose()
|
it.block.dispose()
|
||||||
}
|
}
|
||||||
@@ -39,19 +39,93 @@ class MovableType(
|
|||||||
|
|
||||||
// perform typesetting
|
// perform typesetting
|
||||||
init { if (inputText.isNotEmpty() && !isNull) {
|
init { if (inputText.isNotEmpty() && !isNull) {
|
||||||
if (width < 100) throw IllegalArgumentException("Width too narrow; width must be at least 100 pixels (got $width)")
|
if (paperWidth < 100) throw IllegalArgumentException("Width too narrow; width must be at least 100 pixels (got $paperWidth)")
|
||||||
|
|
||||||
val lines = inputText.tokenise()
|
val lines = inputText.tokenise()
|
||||||
lines.debugprint()
|
lines.debugprint()
|
||||||
|
|
||||||
TODO()
|
TODO()
|
||||||
|
|
||||||
|
lines.forEach {
|
||||||
|
val boxes: MutableList<TextCacheObj> = it.map { font.createTextCache(it) }.toMutableList()
|
||||||
|
var slug = ArrayList<Block>() // slug of the linotype machine
|
||||||
|
var slugWidth = 0
|
||||||
|
|
||||||
|
fun dequeue() = boxes.removeAt(0)
|
||||||
|
fun addHyphenatedTail(box: TextCacheObj) = boxes.add(0, box)
|
||||||
|
fun addToSlug(box: TextCacheObj) {
|
||||||
|
val nextPosX = (slug.lastOrNull()?.getEndPos() ?: 0)
|
||||||
|
slug.add(Block(nextPosX, box))
|
||||||
|
slugWidth += box.width
|
||||||
|
}
|
||||||
|
fun dispatchSlug() {
|
||||||
|
typesettedSlugs.add(slug)
|
||||||
|
|
||||||
|
slug = ArrayList()
|
||||||
|
slugWidth = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
fun getBadnessW(): Pair<Float, Int> = TODO()
|
||||||
|
fun getBadnessT(): Pair<Float, Int> = TODO()
|
||||||
|
fun getBadnessH(): Pair<Float, Int> = TODO()
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
while (boxes.isNotEmpty()) {
|
||||||
|
val box = dequeue()
|
||||||
|
|
||||||
|
if (box.isNotGlue()) {
|
||||||
|
// if adding a box would cause overflow
|
||||||
|
if (slugWidth + spaceWidth + box.width >= paperWidth) {
|
||||||
|
// badness: always positive and weighted
|
||||||
|
// widthDelta: can be positive or negative
|
||||||
|
val (badnessW, widthDeltaW) = getBadnessW()
|
||||||
|
val (badnessT, widthDeltaT) = getBadnessT()
|
||||||
|
val (badnessH, widthDeltaH) = getBadnessH()
|
||||||
|
|
||||||
|
val (selectedBadness, selectedWidthDelta, selectedStrat) = listOf(
|
||||||
|
Triple(badnessW, widthDeltaW, "Widen"),
|
||||||
|
Triple(badnessT, widthDeltaT, "Tighten"),
|
||||||
|
Triple(badnessH, widthDeltaH, "Hyphenate"),
|
||||||
|
).minByOrNull { it.first }!!
|
||||||
|
|
||||||
|
when (selectedStrat) {
|
||||||
|
"Widen" -> {
|
||||||
|
TODO()
|
||||||
|
}
|
||||||
|
"Tighten" -> {
|
||||||
|
TODO()
|
||||||
|
}
|
||||||
|
"Hyphenate" -> {
|
||||||
|
TODO()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dispatchSlug()
|
||||||
|
}
|
||||||
|
// typeset the boxes normally
|
||||||
|
else {
|
||||||
|
addToSlug(box)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
addToSlug(box)
|
||||||
|
}
|
||||||
|
} // end of while (boxes.isNotEmpty())
|
||||||
|
|
||||||
|
dispatchSlug()
|
||||||
|
} // end of lines.forEach
|
||||||
|
|
||||||
|
TODO()
|
||||||
} }
|
} }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private fun lololololol() { if (inputText.isNotEmpty() && !isNull) {
|
private fun lololololol() { if (inputText.isNotEmpty() && !isNull) {
|
||||||
|
|
||||||
if (width < 100) throw IllegalArgumentException("Width too narrow; width must be at least 100 pixels (got $width)")
|
if (paperWidth < 100) throw IllegalArgumentException("Width too narrow; width must be at least 100 pixels (got $paperWidth)")
|
||||||
|
|
||||||
val inputCharSeqsTokenised = inputText.tokenise()
|
val inputCharSeqsTokenised = inputText.tokenise()
|
||||||
|
|
||||||
@@ -62,7 +136,7 @@ class MovableType(
|
|||||||
|
|
||||||
println("Length of input text: ${inputText.size}")
|
println("Length of input text: ${inputText.size}")
|
||||||
println("Token size: ${inputCharSeqsTokenised.size}")
|
println("Token size: ${inputCharSeqsTokenised.size}")
|
||||||
println("Paper width: $width")
|
println("Paper width: $paperWidth")
|
||||||
|
|
||||||
var currentLine = ArrayList<Block>()
|
var currentLine = ArrayList<Block>()
|
||||||
var wordCount = 0
|
var wordCount = 0
|
||||||
@@ -75,7 +149,7 @@ class MovableType(
|
|||||||
// println("\n Anchors [$wordCount] =${" ".repeat(if (wordCount < 10) 3 else if (wordCount < 100) 2 else 1)}${currentLine.map { it.posX }.joinToString()}\n")
|
// println("\n Anchors [$wordCount] =${" ".repeat(if (wordCount < 10) 3 else if (wordCount < 100) 2 else 1)}${currentLine.map { it.posX }.joinToString()}\n")
|
||||||
|
|
||||||
// flush the line
|
// flush the line
|
||||||
lines.add(currentLine)
|
typesettedSlugs.add(currentLine)
|
||||||
currentLine = ArrayList()
|
currentLine = ArrayList()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -233,7 +307,7 @@ class MovableType(
|
|||||||
|
|
||||||
// if the word is \n
|
// if the word is \n
|
||||||
if (thisWordStr.size == 3 && thisWordStr[1] == 0x0A) {
|
if (thisWordStr.size == 3 && thisWordStr[1] == 0x0A) {
|
||||||
println("Strategy [L ${lines.size}]: line is shorter than the paper width ($lineWidthNow < $width)")
|
println("Strategy [L ${typesettedSlugs.size}]: line is shorter than the paper width ($lineWidthNow < $paperWidth)")
|
||||||
|
|
||||||
// flush the line
|
// flush the line
|
||||||
if (lineWidthNow >= 0) flush()
|
if (lineWidthNow >= 0) flush()
|
||||||
@@ -243,7 +317,7 @@ class MovableType(
|
|||||||
}
|
}
|
||||||
// decide if it should add last word and make newline, or make newline then add the word
|
// decide if it should add last word and make newline, or make newline then add the word
|
||||||
// would adding the current word would cause line overflow?
|
// would adding the current word would cause line overflow?
|
||||||
else if (lineWidthNow + spaceWidth + thisWord.width >= width) {
|
else if (lineWidthNow + spaceWidth + thisWord.width >= paperWidth) {
|
||||||
justifyAndFlush(lineWidthNow, thisWordObj, thisWord)
|
justifyAndFlush(lineWidthNow, thisWordObj, thisWord)
|
||||||
}
|
}
|
||||||
// typeset the text normally
|
// typeset the text normally
|
||||||
@@ -259,12 +333,12 @@ class MovableType(
|
|||||||
}
|
}
|
||||||
} // end while
|
} // end while
|
||||||
|
|
||||||
println("Strategy [L ${lines.size}]: (end of the text)")
|
println("Strategy [L ${typesettedSlugs.size}]: (end of the text)")
|
||||||
flush()
|
flush()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
height = lines.size
|
height = typesettedSlugs.size
|
||||||
} }
|
} }
|
||||||
|
|
||||||
fun draw(batch: Batch, x: Int, y: Int, lineStart: Int = 0, linesToDraw: Int = -1, lineHeight: Int = 24) =
|
fun draw(batch: Batch, x: Int, y: Int, lineStart: Int = 0, linesToDraw: Int = -1, lineHeight: Int = 24) =
|
||||||
@@ -273,7 +347,7 @@ class MovableType(
|
|||||||
fun draw(batch: Batch, x: Float, y: Float, lineStart: Int = 0, linesToDraw: Int = 2147483647, lineHeight: Int = 24) {
|
fun draw(batch: Batch, x: Float, y: Float, lineStart: Int = 0, linesToDraw: Int = 2147483647, lineHeight: Int = 24) {
|
||||||
if (isNull) return
|
if (isNull) return
|
||||||
|
|
||||||
lines.subList(lineStart, minOf(lines.size, lineStart + linesToDraw)).forEachIndexed { lineNum, lineBlocks ->
|
typesettedSlugs.subList(lineStart, minOf(typesettedSlugs.size, lineStart + linesToDraw)).forEachIndexed { lineNum, lineBlocks ->
|
||||||
// println("Line [${lineNum+1}] anchors: "+ lineBlocks.map { it.posX }.joinToString())
|
// println("Line [${lineNum+1}] anchors: "+ lineBlocks.map { it.posX }.joinToString())
|
||||||
|
|
||||||
lineBlocks.forEach {
|
lineBlocks.forEach {
|
||||||
@@ -285,7 +359,10 @@ class MovableType(
|
|||||||
}
|
}
|
||||||
|
|
||||||
private data class Block(var posX: Int, val block: TextCacheObj) { // a single word
|
private data class Block(var posX: Int, val block: TextCacheObj) { // a single word
|
||||||
fun getEndPos() = this.posX + this.block.glyphLayout!!.width
|
fun getEndPos() = this.posX + this.block.width
|
||||||
|
// fun isGlue() = this.block.text.isGlue()
|
||||||
|
// inline fun isNotGlue() = !isGlue()
|
||||||
|
// fun getGlueWidth() = this.block.text[0].toGlueSize()
|
||||||
}
|
}
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
@@ -293,8 +370,8 @@ class MovableType(
|
|||||||
private val quots = listOf(0x22, 0x27, 0xAB, 0xBB, 0x2018, 0x2019, 0x201A, 0x201B, 0x201C, 0x201D, 0x201E, 0x201F, 0x2039, 0x203A).toSortedSet()
|
private val quots = listOf(0x22, 0x27, 0xAB, 0xBB, 0x2018, 0x2019, 0x201A, 0x201B, 0x201C, 0x201D, 0x201E, 0x201F, 0x2039, 0x203A).toSortedSet()
|
||||||
private val commas = listOf(0x2C, 0x3B, 0x3001, 0xff0c).toSortedSet()
|
private val commas = listOf(0x2C, 0x3B, 0x3001, 0xff0c).toSortedSet()
|
||||||
private val hangable = listOf(0x2E, 0x2C).toSortedSet()
|
private val hangable = listOf(0x2E, 0x2C).toSortedSet()
|
||||||
private val spaceWidth = 5
|
private const val spaceWidth = 5
|
||||||
private val hangWidth = 6
|
private const val hangWidth = 6
|
||||||
|
|
||||||
private fun CodePoint.toHex() = "U+${this.toString(16).padStart(4, '0').toUpperCase()}"
|
private fun CodePoint.toHex() = "U+${this.toString(16).padStart(4, '0').toUpperCase()}"
|
||||||
|
|
||||||
@@ -445,6 +522,35 @@ class MovableType(
|
|||||||
sendoutBox()
|
sendoutBox()
|
||||||
proceedToNextLine()
|
proceedToNextLine()
|
||||||
}
|
}
|
||||||
|
else if (c0.isWhiteSpace()) {
|
||||||
|
if (cM != null && !cM.isWhiteSpace())
|
||||||
|
sendoutBox()
|
||||||
|
|
||||||
|
appendGlue(c0)
|
||||||
|
}
|
||||||
|
else if (c0.isSmallKana()) {
|
||||||
|
if (cM.isSmallKana() || cM.isCJ()) {
|
||||||
|
appendToBuffer(c0)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
sendoutBox()
|
||||||
|
appendToBuffer(c0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (c0.isCJparenStart()) {
|
||||||
|
if (boxBuffer.isNotEmpty())
|
||||||
|
sendoutBox()
|
||||||
|
appendZeroGlue()
|
||||||
|
sendoutGlue()
|
||||||
|
|
||||||
|
appendToBuffer(c0)
|
||||||
|
}
|
||||||
|
else if (c0.isCJpunctOrParenEnd()) {
|
||||||
|
if (cM.isWhiteSpace())
|
||||||
|
sendoutGlue()
|
||||||
|
|
||||||
|
appendToBuffer(c0)
|
||||||
|
}
|
||||||
else if (c0.isCJ()) {
|
else if (c0.isCJ()) {
|
||||||
if (cM.isWhiteSpace()) {
|
if (cM.isWhiteSpace()) {
|
||||||
sendoutGlue()
|
sendoutGlue()
|
||||||
@@ -463,26 +569,6 @@ class MovableType(
|
|||||||
|
|
||||||
appendToBuffer(c0)
|
appendToBuffer(c0)
|
||||||
}
|
}
|
||||||
else if (c0.isWhiteSpace()) {
|
|
||||||
if (cM != null && !cM.isWhiteSpace())
|
|
||||||
sendoutBox()
|
|
||||||
|
|
||||||
appendGlue(c0)
|
|
||||||
}
|
|
||||||
else if (c0.isCJparenStart()) {
|
|
||||||
if (boxBuffer.isNotEmpty())
|
|
||||||
sendoutBox()
|
|
||||||
appendZeroGlue()
|
|
||||||
sendoutGlue()
|
|
||||||
|
|
||||||
appendToBuffer(c0)
|
|
||||||
}
|
|
||||||
else if (c0.isCJpunctOrParenEnd()) {
|
|
||||||
if (cM.isWhiteSpace())
|
|
||||||
sendoutGlue()
|
|
||||||
|
|
||||||
appendToBuffer(c0)
|
|
||||||
}
|
|
||||||
else {
|
else {
|
||||||
if (cM.isCJ()) {
|
if (cM.isCJ()) {
|
||||||
sendoutBox()
|
sendoutBox()
|
||||||
@@ -530,7 +616,7 @@ class MovableType(
|
|||||||
|
|
||||||
private fun CodePoint?.isCJparenStart() = if (this == null) false else cjparenStarts.contains(this)
|
private fun CodePoint?.isCJparenStart() = if (this == null) false else cjparenStarts.contains(this)
|
||||||
private fun CodePoint?.isCJpunctOrParenEnd() = if (this == null) false else (cjpuncts.contains(this) || cjparenEnds.contains(this))
|
private fun CodePoint?.isCJpunctOrParenEnd() = if (this == null) false else (cjpuncts.contains(this) || cjparenEnds.contains(this))
|
||||||
|
private fun CodePoint?.isSmallKana() = if (this == null) false else jaSmallKanas.contains(this)
|
||||||
private fun CodePoint?.isControlIn() = if (this == null) false else controlIns.contains(this)
|
private fun CodePoint?.isControlIn() = if (this == null) false else controlIns.contains(this)
|
||||||
private fun CodePoint?.isControlOut() = if (this == null) false else controlOuts.contains(this)
|
private fun CodePoint?.isControlOut() = if (this == null) false else controlOuts.contains(this)
|
||||||
private fun CodePoint?.isColourCode() = if (this == null) false else colourCodes.contains(this)
|
private fun CodePoint?.isColourCode() = if (this == null) false else colourCodes.contains(this)
|
||||||
@@ -555,13 +641,20 @@ class MovableType(
|
|||||||
// one with the least distance from the middle point will be used for hyphenating point
|
// one with the least distance from the middle point will be used for hyphenating point
|
||||||
val hyphenateCandidates = ArrayList<Int>()
|
val hyphenateCandidates = ArrayList<Int>()
|
||||||
val splitCandidates = ArrayList<Int>()
|
val splitCandidates = ArrayList<Int>()
|
||||||
for (i in 1 until this.size) {
|
var i = 1
|
||||||
|
while (i < this.size) {
|
||||||
val thisChar = this[i]
|
val thisChar = this[i]
|
||||||
val prevChar = this[i-1]
|
val prevChar = this[i-1]
|
||||||
if (!isVowel(thisChar) && isVowel(prevChar))
|
if (!isVowel(thisChar) && isVowel(prevChar))
|
||||||
hyphenateCandidates.add(i)
|
hyphenateCandidates.add(i)
|
||||||
|
else if (thisChar == SHY && isVowel((prevChar))) {
|
||||||
|
hyphenateCandidates.add(i)
|
||||||
|
i += 1 // skip SHY
|
||||||
|
}
|
||||||
if (isHangulPK(prevChar) && isHangulI(thisChar))
|
if (isHangulPK(prevChar) && isHangulI(thisChar))
|
||||||
splitCandidates.add((i))
|
splitCandidates.add((i))
|
||||||
|
|
||||||
|
i += 1
|
||||||
}
|
}
|
||||||
|
|
||||||
hyphenateCandidates.removeIf { it <= 2 || it >= this.size - 2 }
|
hyphenateCandidates.removeIf { it <= 2 || it >= this.size - 2 }
|
||||||
@@ -632,13 +725,15 @@ class MovableType(
|
|||||||
0x20 to 5,
|
0x20 to 5,
|
||||||
0x3000 to 16,
|
0x3000 to 16,
|
||||||
)
|
)
|
||||||
private val cjpuncts = listOf(0x3001, 0x3002, 0x3006, 0x303b, 0x30a0, 0x30fb, 0x30fc, 0x301c, 0xff01, 0xff0c, 0xff0e, 0xff1a, 0xff1b, 0xff1f, 0xff5e, 0xff65).toSortedSet()
|
private val cjpuncts = listOf(0x203c, 0x2047, 0x2048, 0x2049, 0x3001, 0x3002, 0x3006, 0x303b, 0x30a0, 0x30fb, 0x30fc, 0x301c, 0xff01, 0xff0c, 0xff0e, 0xff1a, 0xff1b, 0xff1f, 0xff5e, 0xff65).toSortedSet()
|
||||||
private val cjparenStarts = listOf(0x3008, 0x300A, 0x300C, 0x300E, 0x3010, 0x3014, 0x3016, 0x3018, 0x301A, 0x30fb, 0xff65).toSortedSet()
|
private val cjparenStarts = listOf(0x3008, 0x300A, 0x300C, 0x300E, 0x3010, 0x3014, 0x3016, 0x3018, 0x301A, 0x30fb, 0xff65).toSortedSet()
|
||||||
private val cjparenEnds = listOf(0x3009, 0x300B, 0x300D, 0x300F, 0x3011, 0x3015, 0x3017, 0x3019, 0x301B).toSortedSet()
|
private val cjparenEnds = listOf(0x3009, 0x300B, 0x300D, 0x300F, 0x3011, 0x3015, 0x3017, 0x3019, 0x301B).toSortedSet()
|
||||||
|
private val jaSmallKanas = "ァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ".map { it.toInt() }.toSortedSet()
|
||||||
|
|
||||||
private val ZWSP = 0x200B
|
private const val ZWSP = 0x200B
|
||||||
private val GLUE_POSITIVE_ONE = 0xFFFF0
|
private const val SHY = 0xAD
|
||||||
private val GLUE_NEGATIVE_ONE = 0xFFFE0
|
private const val GLUE_POSITIVE_ONE = 0xFFFF0
|
||||||
|
private const val GLUE_NEGATIVE_ONE = 0xFFFE0
|
||||||
|
|
||||||
private fun CodepointSequence.toReadable() = this.joinToString("") {
|
private fun CodepointSequence.toReadable() = this.joinToString("") {
|
||||||
if (it in 0x00..0x1f)
|
if (it in 0x00..0x1f)
|
||||||
@@ -671,6 +766,10 @@ class MovableType(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun TextCacheObj.isNotGlue(): Boolean {
|
||||||
|
return this.glyphLayout!!.textBuffer.isGlue()
|
||||||
|
}
|
||||||
|
|
||||||
} // end of companion object
|
} // end of companion object
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2068,6 +2068,13 @@ class TerrarumSansBitmap(
|
|||||||
|
|
||||||
data class ShittyGlyphLayout(val textBuffer: CodepointSequence, val linotype: Texture, val width: Int)
|
data class ShittyGlyphLayout(val textBuffer: CodepointSequence, val linotype: Texture, val width: Int)
|
||||||
data class TextCacheObj(val hash: Long, val glyphLayout: ShittyGlyphLayout?): Comparable<TextCacheObj> {
|
data class TextCacheObj(val hash: Long, val glyphLayout: ShittyGlyphLayout?): Comparable<TextCacheObj> {
|
||||||
|
val text: CodepointSequence
|
||||||
|
get() = glyphLayout!!.textBuffer
|
||||||
|
val width: Int
|
||||||
|
get() = glyphLayout!!.width
|
||||||
|
val texture: Texture
|
||||||
|
get() = glyphLayout!!.linotype
|
||||||
|
|
||||||
fun dispose() {
|
fun dispose() {
|
||||||
glyphLayout?.linotype?.dispose()
|
glyphLayout?.linotype?.dispose()
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user