Difference between revisions of "LoadICUBreakIterator"
(tab) |
B michaelsen (Talk | contribs) |
||
(4 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
=Breaking encapsulation of ICU BreakIterator= | =Breaking encapsulation of ICU BreakIterator= | ||
− | Because of {{Bug|84467}} | + | Because of {{Bug|84467}} (duplicate of the {{Bug|81519}}) we are using <code>RuleBasedBreakIterator() constructor</code> and then we want to <code>setBreakType()</code> there. |
+ | |||
+ | There is a fix to this that removes the patch to ICU by creating a subclass of <code>RuleBasedBreakIterator</code> which can access the <code>protected</code> <code>setBreakType()</code> member. The bug is here: {{Bug|88411}} | ||
ICU code: | ICU code: | ||
Line 9: | Line 11: | ||
OpenOffice.org code: | OpenOffice.org code: | ||
* [http://l10n.openoffice.org/source/browse/l10n/i18npool/source/breakiterator/breakiterator_unicode.cxx?rev=1.34&view=markup BreakIterator_Unicode::loadICUBreakIterator] function | * [http://l10n.openoffice.org/source/browse/l10n/i18npool/source/breakiterator/breakiterator_unicode.cxx?rev=1.34&view=markup BreakIterator_Unicode::loadICUBreakIterator] function | ||
+ | |||
+ | Mailing list discussions: | ||
+ | * [http://www.nabble.com/Minor-changes-needed-to-ICULanguageBreakFactory-(ICU4C)-td10069414.html Discussion] about [http://bugs.icu-project.org/trac/ticket/5695 ICULanguageBreakFactory] | ||
+ | * [http://www.freebsd.org/cgi/query-pr.cgi?pr=121787 ports/121787] FreeBSD problem report | ||
+ | * [http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=448745 Debian bug 448745] | ||
+ | * [http://sourceforge.net/mailarchive/forum.php?thread_name=200804301825.10247.mi%2Bicu%40aldan.algebra.com&forum_name=icu-support icu-support] | ||
+ | |||
+ | Example reasons to use custom rules: | ||
+ | * {{Bug|72868|Writer/Impress: line does not break after Chinese punctuation and before Latin letters}} | ||
+ | * {{Bug|80891|character in the forbidden list sometimes appears at the home of line}} | ||
+ | * {{Bug|83229|wrong hyphenation when word does contain a hyphen}} | ||
+ | * {{Bug|83649|Line break should be between typographical quote and left bracket}} | ||
+ | * {{Bug|83464|line brake between letter and $}} | ||
+ | * {{Bug|81448|slash and backslash make non-braking spaces of preceding spaces}} | ||
=Use cases of <code>loadICUBreakIterator</code>= | =Use cases of <code>loadICUBreakIterator</code>= | ||
+ | |||
+ | Questions: | ||
+ | *Why does <code>wordRule</code> need to be static and preserved across the calls? | ||
+ | *Is rulestring <code>word</code> used at all? Other WordTypes? | ||
{|class="wikitable" | {|class="wikitable" | ||
+ | !public method | ||
+ | !loadICU call | ||
+ | !resulting rule text | ||
+ | |- | ||
| | | | ||
nextCharacters(Text, nStartPos, rLocale, | nextCharacters(Text, nStartPos, rLocale, | ||
Line 20: | Line 44: | ||
SKIPCELL, sal_Int32 nCount, nDone) | SKIPCELL, sal_Int32 nCount, nDone) | ||
|loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text) | |loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text) | ||
+ | |<code>char</code> | ||
|- | |- | ||
| | | | ||
Line 31: | Line 56: | ||
rLocale, ANYWORD_IGNOREWHITESPACES, sal_Bool bDirection) | rLocale, ANYWORD_IGNOREWHITESPACES, sal_Bool bDirection) | ||
|loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, ANYWORD_IGNOREWHITESPACES, NULL, Text) | |loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, ANYWORD_IGNOREWHITESPACES, NULL, Text) | ||
+ | |<code>edit_word</code> | ||
|- | |- | ||
| | | | ||
Line 42: | Line 68: | ||
rLocale, DICTIONARY_WORD, sal_Bool bDirection) | rLocale, DICTIONARY_WORD, sal_Bool bDirection) | ||
|loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, DICTIONARY_WORD, NULL, Text) | |loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, DICTIONARY_WORD, NULL, Text) | ||
+ | |<code>dict_word</code> | ||
|- | |- | ||
| | | | ||
Line 53: | Line 80: | ||
rLocale, WORD_COUNT, sal_Bool bDirection) | rLocale, WORD_COUNT, sal_Bool bDirection) | ||
|loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, WORD_COUNT, NULL, Text) | |loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, WORD_COUNT, NULL, Text) | ||
+ | |<code>count_word</code> | ||
+ | |- | ||
+ | | | ||
+ | nextWord( const OUString& Text, sal_Int32 nStartPos, | ||
+ | rLocale, another_word_type) | ||
+ | |||
+ | previousWord(const OUString& Text, sal_Int32 nStartPos, | ||
+ | rLocale, another_word_type) | ||
+ | |||
+ | getWordBoundary( const OUString& Text, sal_Int32 nPos, | ||
+ | rLocale, another_word_type, sal_Bool bDirection) | ||
+ | |loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, another_word_type NULL, Text) | ||
+ | |<code>word</code> '''(???)''' | ||
|- | |- | ||
| | | | ||
Line 59: | Line 99: | ||
endOfSentence( const OUString& Text, sal_Int32 nStartPos,rLocale) | endOfSentence( const OUString& Text, sal_Int32 nStartPos,rLocale) | ||
|loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, NULL, Text); | |loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, NULL, Text); | ||
+ | |NULL | ||
|- | |- | ||
| | | | ||
Line 67: | Line 108: | ||
const LineBreakUserOptions& /*rOptions*/ ) | const LineBreakUserOptions& /*rOptions*/ ) | ||
|loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, "line", Text); | |loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, "line", Text); | ||
+ | |<code>line</code> | ||
|} | |} | ||
+ | |||
+ | # Figure out if locale BreakIteratorRules (<code>{edit_word, dict_word, count_word, char, line}</code>) gives something for the requested locale | ||
+ | # If not, try to load ''rule''+<code>_</code> + ''lang'' string anyway. | ||
+ | [[Category:Localization]] |
Latest revision as of 20:20, 14 March 2010
Breaking encapsulation of ICU BreakIterator
Because of Issue 84467 (duplicate of the Issue 81519 ) we are using RuleBasedBreakIterator() constructor
and then we want to setBreakType()
there.
There is a fix to this that removes the patch to ICU by creating a subclass of RuleBasedBreakIterator
which can access the protected
setBreakType()
member. The bug is here: Issue 88411
ICU code:
- BreakIterator reference
- RuleBasedBreakIterator reference
OpenOffice.org code:
Mailing list discussions:
- Discussion about ICULanguageBreakFactory
- ports/121787 FreeBSD problem report
- Debian bug 448745
- icu-support
Example reasons to use custom rules:
- Issue 72868 Writer/Impress: line does not break after Chinese punctuation and before Latin letters
- Issue 80891 character in the forbidden list sometimes appears at the home of line
- Issue 83229 wrong hyphenation when word does contain a hyphen
- Issue 83649 Line break should be between typographical quote and left bracket
- Issue 83464 line brake between letter and $
- Issue 81448 slash and backslash make non-braking spaces of preceding spaces
Use cases of loadICUBreakIterator
Questions:
- Why does
wordRule
need to be static and preserved across the calls? - Is rulestring
word
used at all? Other WordTypes?
public method | loadICU call | resulting rule text |
---|---|---|
nextCharacters(Text, nStartPos, rLocale, SKIPCELL, sal_Int32 nCount, nDone) prevCharacters(Text, nStartPos, rLocale, SKIPCELL, sal_Int32 nCount, nDone) |
loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text) | char
|
nextWord( const OUString& Text, sal_Int32 nStartPos, rLocale, ANYWORD_IGNOREWHITESPACES) previousWord(const OUString& Text, sal_Int32 nStartPos, rLocale, ANYWORD_IGNOREWHITESPACES) getWordBoundary( const OUString& Text, sal_Int32 nPos, rLocale, ANYWORD_IGNOREWHITESPACES, sal_Bool bDirection) |
loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, ANYWORD_IGNOREWHITESPACES, NULL, Text) | edit_word
|
nextWord( const OUString& Text, sal_Int32 nStartPos, rLocale, DICTIONARY_WORD) previousWord(const OUString& Text, sal_Int32 nStartPos, rLocale, DICTIONARY_WORD) getWordBoundary( const OUString& Text, sal_Int32 nPos, rLocale, DICTIONARY_WORD, sal_Bool bDirection) |
loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, DICTIONARY_WORD, NULL, Text) | dict_word
|
nextWord( const OUString& Text, sal_Int32 nStartPos, rLocale, WORD_COUNT) previousWord(const OUString& Text, sal_Int32 nStartPos, rLocale, WORD_COUNT) getWordBoundary( const OUString& Text, sal_Int32 nPos, rLocale, WORD_COUNT, sal_Bool bDirection) |
loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, WORD_COUNT, NULL, Text) | count_word
|
nextWord( const OUString& Text, sal_Int32 nStartPos, rLocale, another_word_type) previousWord(const OUString& Text, sal_Int32 nStartPos, rLocale, another_word_type) getWordBoundary( const OUString& Text, sal_Int32 nPos, rLocale, another_word_type, sal_Bool bDirection) |
loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, another_word_type NULL, Text) | word (???)
|
beginOfSentence( const OUString& Text, sal_Int32 nStartPos, rLocale) endOfSentence( const OUString& Text, sal_Int32 nStartPos,rLocale) |
loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, NULL, Text); | NULL |
getLineBreak( const OUString& Text, sal_Int32 nStartPos, const lang::Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions, const LineBreakUserOptions& /*rOptions*/ ) |
loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, "line", Text); | line
|
- Figure out if locale BreakIteratorRules (
{edit_word, dict_word, count_word, char, line}
) gives something for the requested locale - If not, try to load rule+
_
+ lang string anyway.