This allows line breaks after any kana, unless they are immediately followed by a gyōtō kinsoku character, in which case the line may not break at that point. Also pedantically renamed is_chinese to is_cjk_ideograph as is_chinese will also cause line breaks on Japanese kanji.
This commit is contained in:
parent
abf340c62a
commit
2ca72e0bae
1 changed files with 25 additions and 6 deletions
|
@ -195,8 +195,8 @@ fn line_break(
|
||||||
let mut row_start_idx = 0;
|
let mut row_start_idx = 0;
|
||||||
let mut non_empty_rows = 0;
|
let mut non_empty_rows = 0;
|
||||||
|
|
||||||
for (i, glyph) in paragraph.glyphs.iter().enumerate() {
|
for i in 0..paragraph.glyphs.len() {
|
||||||
let potential_row_width = glyph.max_x() - row_start_x;
|
let potential_row_width = paragraph.glyphs[i].max_x() - row_start_x;
|
||||||
|
|
||||||
if job.wrap.max_rows > 0 && non_empty_rows >= job.wrap.max_rows {
|
if job.wrap.max_rows > 0 && non_empty_rows >= job.wrap.max_rows {
|
||||||
break;
|
break;
|
||||||
|
@ -246,7 +246,7 @@ fn line_break(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
row_break_candidates.add(i, glyph.chr);
|
row_break_candidates.add(i, ¶graph.glyphs[i..]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if row_start_idx < paragraph.glyphs.len() {
|
if row_start_idx < paragraph.glyphs.len() {
|
||||||
|
@ -717,6 +717,8 @@ struct RowBreakCandidates {
|
||||||
space: Option<usize>,
|
space: Option<usize>,
|
||||||
/// Logograms (single character representing a whole word) are good candidates for line break.
|
/// Logograms (single character representing a whole word) are good candidates for line break.
|
||||||
logogram: Option<usize>,
|
logogram: Option<usize>,
|
||||||
|
/// Kana (Japanese hiragana and katakana) may be line broken unless before a gyōtō kinsoku character.
|
||||||
|
kana: Option<usize>,
|
||||||
/// Breaking at a dash is a super-
|
/// Breaking at a dash is a super-
|
||||||
/// good idea.
|
/// good idea.
|
||||||
dash: Option<usize>,
|
dash: Option<usize>,
|
||||||
|
@ -729,16 +731,19 @@ struct RowBreakCandidates {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RowBreakCandidates {
|
impl RowBreakCandidates {
|
||||||
fn add(&mut self, index: usize, chr: char) {
|
fn add(&mut self, index: usize, glyphs: &[Glyph]) {
|
||||||
|
let chr = glyphs[0].chr;
|
||||||
const NON_BREAKING_SPACE: char = '\u{A0}';
|
const NON_BREAKING_SPACE: char = '\u{A0}';
|
||||||
if chr.is_whitespace() && chr != NON_BREAKING_SPACE {
|
if chr.is_whitespace() && chr != NON_BREAKING_SPACE {
|
||||||
self.space = Some(index);
|
self.space = Some(index);
|
||||||
} else if is_chinese(chr) {
|
} else if is_cjk_ideograph(chr) {
|
||||||
self.logogram = Some(index);
|
self.logogram = Some(index);
|
||||||
} else if chr == '-' {
|
} else if chr == '-' {
|
||||||
self.dash = Some(index);
|
self.dash = Some(index);
|
||||||
} else if chr.is_ascii_punctuation() {
|
} else if chr.is_ascii_punctuation() {
|
||||||
self.punctuation = Some(index);
|
self.punctuation = Some(index);
|
||||||
|
} else if is_kana(chr) && (glyphs.len() == 1 || !is_gyoto_kinsoku(glyphs[1].chr)) {
|
||||||
|
self.kana = Some(index);
|
||||||
}
|
}
|
||||||
self.any = Some(index);
|
self.any = Some(index);
|
||||||
}
|
}
|
||||||
|
@ -760,6 +765,7 @@ impl RowBreakCandidates {
|
||||||
self.any
|
self.any
|
||||||
} else {
|
} else {
|
||||||
self.space
|
self.space
|
||||||
|
.or(self.kana)
|
||||||
.or(self.logogram)
|
.or(self.logogram)
|
||||||
.or(self.dash)
|
.or(self.dash)
|
||||||
.or(self.punctuation)
|
.or(self.punctuation)
|
||||||
|
@ -769,12 +775,25 @@ impl RowBreakCandidates {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn is_chinese(c: char) -> bool {
|
fn is_cjk_ideograph(c: char) -> bool {
|
||||||
('\u{4E00}' <= c && c <= '\u{9FFF}')
|
('\u{4E00}' <= c && c <= '\u{9FFF}')
|
||||||
|| ('\u{3400}' <= c && c <= '\u{4DBF}')
|
|| ('\u{3400}' <= c && c <= '\u{4DBF}')
|
||||||
|| ('\u{2B740}' <= c && c <= '\u{2B81F}')
|
|| ('\u{2B740}' <= c && c <= '\u{2B81F}')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_kana(c: char) -> bool {
|
||||||
|
('\u{3040}' <= c && c <= '\u{309F}') // Hiragana block
|
||||||
|
|| ('\u{30A0}' <= c && c <= '\u{30FF}') // Katakana block
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_gyoto_kinsoku(c: char) -> bool {
|
||||||
|
// Gyōtō (meaning "beginning of line") kinsoku characters in Japanese typesetting are characters that may not appear at the start of a line, according to kinsoku shori rules.
|
||||||
|
// The list of gyōtō kinsoku characters can be found at https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages#Characters_not_permitted_on_the_start_of_a_line.
|
||||||
|
")]}〕〉》」』】〙〗〟'\"⦆»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。.".contains(c)
|
||||||
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in a new issue