Loading core/java/android/text/Hyphenator.java +4 −251 Original line number Diff line number Diff line Loading @@ -16,262 +16,15 @@ package android.text; import android.annotation.IntRange; import android.annotation.NonNull; import android.annotation.Nullable; import android.system.ErrnoException; import android.system.Os; import android.system.OsConstants; import android.util.Log; import com.android.internal.annotations.GuardedBy; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.util.HashMap; import java.util.Locale; /** * Hyphenator is a wrapper class for a native implementation of automatic hyphenation, * Hyphenator just initializes the native implementation of automatic hyphenation, * in essence finding valid hyphenation opportunities in a word. * * @hide */ public class Hyphenator { private static String TAG = "Hyphenator"; private final static Object sLock = new Object(); @GuardedBy("sLock") final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>(); private final long mNativePtr; private final HyphenationData mData; private Hyphenator(long nativePtr, HyphenationData data) { mNativePtr = nativePtr; mData = data; } public long getNativePtr() { return mNativePtr; } public static Hyphenator get(@Nullable Locale locale) { synchronized (sLock) { Hyphenator result = sMap.get(locale); if (result != null) { return result; } // If there's a variant, fall back to language+variant only, if available final String variant = locale.getVariant(); if (!variant.isEmpty()) { final Locale languageAndVariantOnlyLocale = new Locale(locale.getLanguage(), "", variant); result = sMap.get(languageAndVariantOnlyLocale); if (result != null) { return putAlias(locale, result); } } // Fall back to language-only, if available final Locale languageOnlyLocale = new Locale(locale.getLanguage()); result = sMap.get(languageOnlyLocale); if (result != null) { return putAlias(locale, result); } // Fall back to script-only, if available final String script = locale.getScript(); if (!script.equals("")) { final Locale scriptOnlyLocale = new Locale.Builder() .setLanguage("und") .setScript(script) .build(); result = sMap.get(scriptOnlyLocale); if (result != null) { return putAlias(locale, result); } } return putEmptyAlias(locale); } } private static class HyphenationData { private static final String SYSTEM_HYPHENATOR_LOCATION = "/system/usr/hyphen-data"; public final int mMinPrefix, mMinSuffix; public final long mDataAddress; // Reasonable enough values for cases where we have no hyphenation patterns but may be able // to do some automatic hyphenation based on characters. These values would be used very // rarely. private static final int DEFAULT_MIN_PREFIX = 2; private static final int DEFAULT_MIN_SUFFIX = 2; public static final HyphenationData sEmptyData = new HyphenationData(DEFAULT_MIN_PREFIX, DEFAULT_MIN_SUFFIX); // Create empty HyphenationData. private HyphenationData(int minPrefix, int minSuffix) { mMinPrefix = minPrefix; mMinSuffix = minSuffix; mDataAddress = 0; } HyphenationData(String languageTag, int minPrefix, int minSuffix) { mMinPrefix = minPrefix; mMinSuffix = minSuffix; final String patternFilename = "hyph-" + languageTag.toLowerCase(Locale.US) + ".hyb"; final File patternFile = new File(SYSTEM_HYPHENATOR_LOCATION, patternFilename); if (!patternFile.canRead()) { mDataAddress = 0; } else { long address; try (RandomAccessFile f = new RandomAccessFile(patternFile, "r")) { address = Os.mmap(0, f.length(), OsConstants.PROT_READ, OsConstants.MAP_SHARED, f.getFD(), 0 /* offset */); } catch (IOException | ErrnoException e) { Log.e(TAG, "error loading hyphenation " + patternFile, e); address = 0; } mDataAddress = address; } } } // Do not call this method outside of init method. private static Hyphenator putNewHyphenator(Locale loc, HyphenationData data) { final Hyphenator hyphenator = new Hyphenator(nBuildHyphenator( data.mDataAddress, loc.getLanguage(), data.mMinPrefix, data.mMinSuffix), data); sMap.put(loc, hyphenator); return hyphenator; } // Do not call this method outside of init method. private static void loadData(String langTag, int minPrefix, int maxPrefix) { final HyphenationData data = new HyphenationData(langTag, minPrefix, maxPrefix); putNewHyphenator(Locale.forLanguageTag(langTag), data); } // Caller must acquire sLock before calling this method. // The Hyphenator for the baseLangTag must exists. private static Hyphenator addAliasByTag(String langTag, String baseLangTag) { return putAlias(Locale.forLanguageTag(langTag), sMap.get(Locale.forLanguageTag(baseLangTag))); } // Caller must acquire sLock before calling this method. private static Hyphenator putAlias(Locale locale, Hyphenator base) { return putNewHyphenator(locale, base.mData); } // Caller must acquire sLock before calling this method. private static Hyphenator putEmptyAlias(Locale locale) { return putNewHyphenator(locale, HyphenationData.sEmptyData); } // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but // that appears too small. private static final int INDIC_MIN_PREFIX = 2; private static final int INDIC_MIN_SUFFIX = 2; /** * Load hyphenation patterns at initialization time. We want to have patterns * for all locales loaded and ready to use so we don't have to do any file IO * on the UI thread when drawing text in different locales. * * @hide */ public static void init() { synchronized (sLock) { sMap.put(null, null); loadData("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Assamese loadData("bg", 2, 2); // Bulgarian loadData("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Bengali loadData("cu", 1, 2); // Church Slavonic loadData("cy", 2, 3); // Welsh loadData("da", 2, 2); // Danish loadData("de-1901", 2, 2); // German 1901 orthography loadData("de-1996", 2, 2); // German 1996 orthography loadData("de-CH-1901", 2, 2); // Swiss High German 1901 orthography loadData("en-GB", 2, 3); // British English loadData("en-US", 2, 3); // American English loadData("es", 2, 2); // Spanish loadData("et", 2, 3); // Estonian loadData("eu", 2, 2); // Basque loadData("fr", 2, 3); // French loadData("ga", 2, 3); // Irish loadData("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Gujarati loadData("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Hindi loadData("hr", 2, 2); // Croatian loadData("hu", 2, 2); // Hungarian // texhyphen sources say Armenian may be (1, 2); but that it needs confirmation. // Going with a more conservative value of (2, 2) for now. loadData("hy", 2, 2); // Armenian loadData("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Kannada loadData("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Malayalam loadData("mn-Cyrl", 2, 2); // Mongolian in Cyrillic script loadData("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Marathi loadData("nb", 2, 2); // Norwegian Bokmål loadData("nn", 2, 2); // Norwegian Nynorsk loadData("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Oriya loadData("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Punjabi loadData("pt", 2, 3); // Portuguese loadData("sl", 2, 2); // Slovenian loadData("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Tamil loadData("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Telugu loadData("tk", 2, 2); // Turkmen loadData("und-Ethi", 1, 1); // Any language in Ethiopic script // Following two hyphenators do not have pattern files but there is some special logic // based on language. loadData("ca", 2, 2); // Catalan loadData("pl", 2, 2); // Polish // English locales that fall back to en-US. The data is // from CLDR. It's all English locales, minus the locales whose // parent is en-001 (from supplementalData.xml, under <parentLocales>). // TODO: Figure out how to get this from ICU. addAliasByTag("en-AS", "en-US"); // English (American Samoa) addAliasByTag("en-GU", "en-US"); // English (Guam) addAliasByTag("en-MH", "en-US"); // English (Marshall Islands) addAliasByTag("en-MP", "en-US"); // English (Northern Mariana Islands) addAliasByTag("en-PR", "en-US"); // English (Puerto Rico) addAliasByTag("en-UM", "en-US"); // English (United States Minor Outlying Islands) addAliasByTag("en-VI", "en-US"); // English (Virgin Islands) // All English locales other than those falling back to en-US are mapped to en-GB. addAliasByTag("en", "en-GB"); // For German, we're assuming the 1996 (and later) orthography by default. addAliasByTag("de", "de-1996"); // Liechtenstein uses the Swiss hyphenation rules for the 1901 orthography. addAliasByTag("de-LI-1901", "de-CH-1901"); // Norwegian is very probably Norwegian Bokmål. addAliasByTag("no", "nb"); // Use mn-Cyrl. According to CLDR's likelySubtags.xml, mn is most likely to be mn-Cyrl. addAliasByTag("mn", "mn-Cyrl"); // Mongolian // Fall back to Ethiopic script for languages likely to be written in Ethiopic. // Data is from CLDR's likelySubtags.xml. // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). addAliasByTag("am", "und-Ethi"); // Amharic addAliasByTag("byn", "und-Ethi"); // Blin addAliasByTag("gez", "und-Ethi"); // Geʻez addAliasByTag("ti", "und-Ethi"); // Tigrinya addAliasByTag("wal", "und-Ethi"); // Wolaytta nInit(); } }; private static native long nBuildHyphenator(long dataAddress, @NonNull String langTag, @IntRange(from = 1) int minPrefix, @IntRange(from = 1) int minSuffix); private static native void nInit(); } core/java/android/text/StaticLayout.java +8 −43 Original line number Diff line number Diff line Loading @@ -21,21 +21,18 @@ import android.annotation.IntRange; import android.annotation.NonNull; import android.annotation.Nullable; import android.graphics.Paint; import android.os.LocaleList; import android.text.style.LeadingMarginSpan; import android.text.style.LeadingMarginSpan.LeadingMarginSpan2; import android.text.style.LineHeightSpan; import android.text.style.MetricAffectingSpan; import android.text.style.TabStopSpan; import android.util.Log; import android.util.Pair; import android.util.Pools.SynchronizedPool; import com.android.internal.util.ArrayUtils; import com.android.internal.util.GrowingArrayUtils; import java.util.Arrays; import java.util.Locale; /** * StaticLayout is a Layout for text that will not be edited after it Loading Loading @@ -101,7 +98,6 @@ public class StaticLayout extends Layout { b.mBreakStrategy = Layout.BREAK_STRATEGY_SIMPLE; b.mHyphenationFrequency = Layout.HYPHENATION_FREQUENCY_NONE; b.mJustificationMode = Layout.JUSTIFICATION_MODE_NONE; b.mLocales = null; b.mMeasuredText = MeasuredText.obtain(); return b; Loading @@ -118,7 +114,6 @@ public class StaticLayout extends Layout { b.mMeasuredText = null; b.mLeftIndents = null; b.mRightIndents = null; b.mLocales = null; b.mLeftPaddings = null; b.mRightPaddings = null; nFinishBuilder(b.mNativePtr); Loading Loading @@ -409,17 +404,6 @@ public class StaticLayout extends Layout { return this; } @NonNull private long[] getHyphenators(@NonNull LocaleList locales) { final int length = locales.size(); final long[] result = new long[length]; for (int i = 0; i < length; i++) { final Locale locale = locales.get(i); result[i] = Hyphenator.get(locale).getNativePtr(); } return result; } /** * Measurement and break iteration is done in native code. The protocol for using * the native code is as follows. Loading @@ -438,27 +422,12 @@ public class StaticLayout extends Layout { * After all paragraphs, call finish() to release expensive buffers. */ private Pair<String, long[]> getLocaleAndHyphenatorIfChanged(TextPaint paint) { final LocaleList locales = paint.getTextLocales(); if (!locales.equals(mLocales)) { mLocales = locales; return new Pair(locales.toLanguageTags(), getHyphenators(locales)); } else { // passing null means keep current locale. // TODO: move locale change detection to native. return new Pair(null, null); } } /* package */ void addStyleRun(TextPaint paint, int start, int end, boolean isRtl) { Pair<String, long[]> locHyph = getLocaleAndHyphenatorIfChanged(paint); nAddStyleRun(mNativePtr, paint.getNativeInstance(), start, end, isRtl, locHyph.first, locHyph.second); nAddStyleRun(mNativePtr, paint.getNativeInstance(), start, end, isRtl); } /* package */ void addReplacementRun(TextPaint paint, int start, int end, float width) { Pair<String, long[]> locHyph = getLocaleAndHyphenatorIfChanged(paint); nAddReplacementRun(mNativePtr, start, end, width, locHyph.first, locHyph.second); nAddReplacementRun(mNativePtr, paint.getNativeInstance(), start, end, width); } /** Loading Loading @@ -516,8 +485,6 @@ public class StaticLayout extends Layout { // This will go away and be subsumed by native builder code private MeasuredText mMeasuredText; private LocaleList mLocales; private static final SynchronizedPool<Builder> sPool = new SynchronizedPool<>(3); } Loading Loading @@ -807,9 +774,6 @@ public class StaticLayout extends Layout { } } // TODO: Move locale tracking code to native. b.mLocales = null; // Reset the locale tracking. nSetupParagraph(b.mNativePtr, chs, paraEnd - paraStart, firstWidth, firstWidthLineCount, restWidth, variableTabStops, TAB_INCREMENT, b.mBreakStrategy, b.mHyphenationFrequency, Loading Loading @@ -1537,15 +1501,16 @@ public class StaticLayout extends Layout { @Nullable int[] indents, @Nullable int[] leftPaddings, @Nullable int[] rightPaddings, @IntRange(from = 0) int indentsOffset); // TODO: Make this method CriticalNative once native code defers doing layouts. private static native void nAddStyleRun( /* non-zero */ long nativePtr, /* non-zero */ long nativePaint, @IntRange(from = 0) int start, @IntRange(from = 0) int end, boolean isRtl, @Nullable String languageTags, @Nullable long[] hyphenators); @IntRange(from = 0) int start, @IntRange(from = 0) int end, boolean isRtl); private static native void nAddReplacementRun(/* non-zero */ long nativePtr, // TODO: Make this method CriticalNative once native code defers doing layouts. private static native void nAddReplacementRun( /* non-zero */ long nativePtr, /* non-zero */ long nativePaint, @IntRange(from = 0) int start, @IntRange(from = 0) int end, @FloatRange(from = 0.0f) float width, @Nullable String languageTags, @Nullable long[] hyphenators); @FloatRange(from = 0.0f) float width); // populates LineBreaks and returns the number of breaks found // Loading core/jni/android_text_Hyphenator.cpp +141 −10 Original line number Diff line number Diff line Loading @@ -14,24 +14,155 @@ * limitations under the License. */ #include <cstdint> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <algorithm> #include <core_jni_helpers.h> #include <minikin/Hyphenator.h> #include <nativehelper/ScopedUtfChars.h> namespace android { static jlong nBuildHyphenator(JNIEnv* env, jclass, jlong dataAddress, jstring lang, jint minPrefix, jint minSuffix) { const uint8_t* bytebuf = reinterpret_cast<const uint8_t*>(dataAddress); // null allowed. ScopedUtfChars language(env, lang); minikin::Hyphenator* hyphenator = minikin::Hyphenator::loadBinary( bytebuf, minPrefix, minSuffix, language.c_str(), language.size()); return reinterpret_cast<jlong>(hyphenator); static std::string buildFileName(const std::string& locale) { constexpr char SYSTEM_HYPHENATOR_PREFIX[] = "/system/usr/hyphen-data/hyph-"; constexpr char SYSTEM_HYPHENATOR_SUFFIX[] = ".hyb"; std::string lowerLocale; lowerLocale.reserve(locale.size()); std::transform(locale.begin(), locale.end(), std::back_inserter(lowerLocale), ::tolower); return SYSTEM_HYPHENATOR_PREFIX + lowerLocale + SYSTEM_HYPHENATOR_SUFFIX; } static const uint8_t* mmapPatternFile(const std::string& locale) { const std::string hyFilePath = buildFileName(locale); const int fd = open(hyFilePath.c_str(), O_RDONLY); if (fd == -1) { return nullptr; // Open failed. } struct stat st = {}; if (fstat(fd, &st) == -1) { // Unlikely to happen. close(fd); return nullptr; } void* ptr = mmap(nullptr, st.st_size, PROT_READ, MAP_SHARED, fd, 0 /* offset */); close(fd); if (ptr == MAP_FAILED) { return nullptr; } return reinterpret_cast<const uint8_t*>(ptr); } static void addHyphenatorWithoutPatternFile(const std::string& locale, int minPrefix, int minSuffix) { minikin::addHyphenator(locale, minikin::Hyphenator::loadBinary( nullptr, minPrefix, minSuffix, locale)); } static void addHyphenator(const std::string& locale, int minPrefix, int minSuffix) { const uint8_t* ptr = mmapPatternFile(locale); if (ptr == nullptr) { ALOGE("Unable to find pattern file or unable to map it for %s", locale.c_str()); return; } minikin::addHyphenator(locale, minikin::Hyphenator::loadBinary( ptr, minPrefix, minSuffix, locale)); } static void addHyphenatorAlias(const std::string& from, const std::string& to) { minikin::addHyphenatorAlias(from, to); } static void init() { // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but that // appears too small. constexpr int INDIC_MIN_PREFIX = 2; constexpr int INDIC_MIN_SUFFIX = 2; addHyphenator("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Assamese addHyphenator("bg", 2, 2); // Bulgarian addHyphenator("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Bengali addHyphenator("cu", 1, 2); // Church Slavonic addHyphenator("cy", 2, 3); // Welsh addHyphenator("da", 2, 2); // Danish addHyphenator("de-1901", 2, 2); // German 1901 orthography addHyphenator("de-1996", 2, 2); // German 1996 orthography addHyphenator("de-CH-1901", 2, 2); // Swiss High German 1901 orthography addHyphenator("en-GB", 2, 3); // British English addHyphenator("en-US", 2, 3); // American English addHyphenator("es", 2, 2); // Spanish addHyphenator("et", 2, 3); // Estonian addHyphenator("eu", 2, 2); // Basque addHyphenator("fr", 2, 3); // French addHyphenator("ga", 2, 3); // Irish addHyphenator("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Gujarati addHyphenator("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Hindi addHyphenator("hr", 2, 2); // Croatian addHyphenator("hu", 2, 2); // Hungarian // texhyphen sources say Armenian may be (1, 2); but that it needs confirmation. // Going with a more conservative value of (2, 2) for now. addHyphenator("hy", 2, 2); // Armenian addHyphenator("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Kannada addHyphenator("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Malayalam addHyphenator("mn-Cyrl", 2, 2); // Mongolian in Cyrillic script addHyphenator("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Marathi addHyphenator("nb", 2, 2); // Norwegian Bokmål addHyphenator("nn", 2, 2); // Norwegian Nynorsk addHyphenator("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Oriya addHyphenator("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Punjabi addHyphenator("pt", 2, 3); // Portuguese addHyphenator("sl", 2, 2); // Slovenian addHyphenator("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Tamil addHyphenator("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Telugu addHyphenator("tk", 2, 2); // Turkmen addHyphenator("und-Ethi", 1, 1); // Any language in Ethiopic script // Following two hyphenators do not have pattern files but there is some special logic based on // language. addHyphenatorWithoutPatternFile("ca", 2, 2); // Catalan addHyphenatorWithoutPatternFile("pl", 2, 2); // Polish // English locales that fall back to en-US. The data is from CLDR. It's all English locales, // minus the locales whose parent is en-001 (from supplementalData.xml, under <parentLocales>). // TODO: Figure out how to get this from ICU. addHyphenatorAlias("en-AS", "en-US"); // English (American Samoa) addHyphenatorAlias("en-GU", "en-US"); // English (Guam) addHyphenatorAlias("en-MH", "en-US"); // English (Marshall Islands) addHyphenatorAlias("en-MP", "en-US"); // English (Northern Mariana Islands) addHyphenatorAlias("en-PR", "en-US"); // English (Puerto Rico) addHyphenatorAlias("en-UM", "en-US"); // English (United States Minor Outlying Islands) addHyphenatorAlias("en-VI", "en-US"); // English (Virgin Islands) // All English locales other than those falling back to en-US are mapped to en-GB. addHyphenatorAlias("en", "en-GB"); // For German, we're assuming the 1996 (and later) orthography by default. addHyphenatorAlias("de", "de-1996"); // Liechtenstein uses the Swiss hyphenation rules for the 1901 orthography. addHyphenatorAlias("de-LI-1901", "de-CH-1901"); // Norwegian is very probably Norwegian Bokmål. addHyphenatorAlias("no", "nb"); // Use mn-Cyrl. According to CLDR's likelySubtags.xml, mn is most likely to be mn-Cyrl. addHyphenatorAlias("mn", "mn-Cyrl"); // Mongolian // Fall back to Ethiopic script for languages likely to be written in Ethiopic. // Data is from CLDR's likelySubtags.xml. // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). addHyphenatorAlias("am", "und-Ethi"); // Amharic addHyphenatorAlias("byn", "und-Ethi"); // Blin addHyphenatorAlias("gez", "und-Ethi"); // Geʻez addHyphenatorAlias("ti", "und-Ethi"); // Tigrinya addHyphenatorAlias("wal", "und-Ethi"); // Wolaytta } static const JNINativeMethod gMethods[] = { {"nBuildHyphenator", "(JLjava/lang/String;II)J", (void*) nBuildHyphenator}, {"nInit", "()V", (void*) init}, }; int register_android_text_Hyphenator(JNIEnv* env) { Loading core/jni/android_text_StaticLayout.cpp +8 −50 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
core/java/android/text/Hyphenator.java +4 −251 Original line number Diff line number Diff line Loading @@ -16,262 +16,15 @@ package android.text; import android.annotation.IntRange; import android.annotation.NonNull; import android.annotation.Nullable; import android.system.ErrnoException; import android.system.Os; import android.system.OsConstants; import android.util.Log; import com.android.internal.annotations.GuardedBy; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.util.HashMap; import java.util.Locale; /** * Hyphenator is a wrapper class for a native implementation of automatic hyphenation, * Hyphenator just initializes the native implementation of automatic hyphenation, * in essence finding valid hyphenation opportunities in a word. * * @hide */ public class Hyphenator { private static String TAG = "Hyphenator"; private final static Object sLock = new Object(); @GuardedBy("sLock") final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>(); private final long mNativePtr; private final HyphenationData mData; private Hyphenator(long nativePtr, HyphenationData data) { mNativePtr = nativePtr; mData = data; } public long getNativePtr() { return mNativePtr; } public static Hyphenator get(@Nullable Locale locale) { synchronized (sLock) { Hyphenator result = sMap.get(locale); if (result != null) { return result; } // If there's a variant, fall back to language+variant only, if available final String variant = locale.getVariant(); if (!variant.isEmpty()) { final Locale languageAndVariantOnlyLocale = new Locale(locale.getLanguage(), "", variant); result = sMap.get(languageAndVariantOnlyLocale); if (result != null) { return putAlias(locale, result); } } // Fall back to language-only, if available final Locale languageOnlyLocale = new Locale(locale.getLanguage()); result = sMap.get(languageOnlyLocale); if (result != null) { return putAlias(locale, result); } // Fall back to script-only, if available final String script = locale.getScript(); if (!script.equals("")) { final Locale scriptOnlyLocale = new Locale.Builder() .setLanguage("und") .setScript(script) .build(); result = sMap.get(scriptOnlyLocale); if (result != null) { return putAlias(locale, result); } } return putEmptyAlias(locale); } } private static class HyphenationData { private static final String SYSTEM_HYPHENATOR_LOCATION = "/system/usr/hyphen-data"; public final int mMinPrefix, mMinSuffix; public final long mDataAddress; // Reasonable enough values for cases where we have no hyphenation patterns but may be able // to do some automatic hyphenation based on characters. These values would be used very // rarely. private static final int DEFAULT_MIN_PREFIX = 2; private static final int DEFAULT_MIN_SUFFIX = 2; public static final HyphenationData sEmptyData = new HyphenationData(DEFAULT_MIN_PREFIX, DEFAULT_MIN_SUFFIX); // Create empty HyphenationData. private HyphenationData(int minPrefix, int minSuffix) { mMinPrefix = minPrefix; mMinSuffix = minSuffix; mDataAddress = 0; } HyphenationData(String languageTag, int minPrefix, int minSuffix) { mMinPrefix = minPrefix; mMinSuffix = minSuffix; final String patternFilename = "hyph-" + languageTag.toLowerCase(Locale.US) + ".hyb"; final File patternFile = new File(SYSTEM_HYPHENATOR_LOCATION, patternFilename); if (!patternFile.canRead()) { mDataAddress = 0; } else { long address; try (RandomAccessFile f = new RandomAccessFile(patternFile, "r")) { address = Os.mmap(0, f.length(), OsConstants.PROT_READ, OsConstants.MAP_SHARED, f.getFD(), 0 /* offset */); } catch (IOException | ErrnoException e) { Log.e(TAG, "error loading hyphenation " + patternFile, e); address = 0; } mDataAddress = address; } } } // Do not call this method outside of init method. private static Hyphenator putNewHyphenator(Locale loc, HyphenationData data) { final Hyphenator hyphenator = new Hyphenator(nBuildHyphenator( data.mDataAddress, loc.getLanguage(), data.mMinPrefix, data.mMinSuffix), data); sMap.put(loc, hyphenator); return hyphenator; } // Do not call this method outside of init method. private static void loadData(String langTag, int minPrefix, int maxPrefix) { final HyphenationData data = new HyphenationData(langTag, minPrefix, maxPrefix); putNewHyphenator(Locale.forLanguageTag(langTag), data); } // Caller must acquire sLock before calling this method. // The Hyphenator for the baseLangTag must exists. private static Hyphenator addAliasByTag(String langTag, String baseLangTag) { return putAlias(Locale.forLanguageTag(langTag), sMap.get(Locale.forLanguageTag(baseLangTag))); } // Caller must acquire sLock before calling this method. private static Hyphenator putAlias(Locale locale, Hyphenator base) { return putNewHyphenator(locale, base.mData); } // Caller must acquire sLock before calling this method. private static Hyphenator putEmptyAlias(Locale locale) { return putNewHyphenator(locale, HyphenationData.sEmptyData); } // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but // that appears too small. private static final int INDIC_MIN_PREFIX = 2; private static final int INDIC_MIN_SUFFIX = 2; /** * Load hyphenation patterns at initialization time. We want to have patterns * for all locales loaded and ready to use so we don't have to do any file IO * on the UI thread when drawing text in different locales. * * @hide */ public static void init() { synchronized (sLock) { sMap.put(null, null); loadData("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Assamese loadData("bg", 2, 2); // Bulgarian loadData("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Bengali loadData("cu", 1, 2); // Church Slavonic loadData("cy", 2, 3); // Welsh loadData("da", 2, 2); // Danish loadData("de-1901", 2, 2); // German 1901 orthography loadData("de-1996", 2, 2); // German 1996 orthography loadData("de-CH-1901", 2, 2); // Swiss High German 1901 orthography loadData("en-GB", 2, 3); // British English loadData("en-US", 2, 3); // American English loadData("es", 2, 2); // Spanish loadData("et", 2, 3); // Estonian loadData("eu", 2, 2); // Basque loadData("fr", 2, 3); // French loadData("ga", 2, 3); // Irish loadData("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Gujarati loadData("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Hindi loadData("hr", 2, 2); // Croatian loadData("hu", 2, 2); // Hungarian // texhyphen sources say Armenian may be (1, 2); but that it needs confirmation. // Going with a more conservative value of (2, 2) for now. loadData("hy", 2, 2); // Armenian loadData("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Kannada loadData("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Malayalam loadData("mn-Cyrl", 2, 2); // Mongolian in Cyrillic script loadData("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Marathi loadData("nb", 2, 2); // Norwegian Bokmål loadData("nn", 2, 2); // Norwegian Nynorsk loadData("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Oriya loadData("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Punjabi loadData("pt", 2, 3); // Portuguese loadData("sl", 2, 2); // Slovenian loadData("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Tamil loadData("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Telugu loadData("tk", 2, 2); // Turkmen loadData("und-Ethi", 1, 1); // Any language in Ethiopic script // Following two hyphenators do not have pattern files but there is some special logic // based on language. loadData("ca", 2, 2); // Catalan loadData("pl", 2, 2); // Polish // English locales that fall back to en-US. The data is // from CLDR. It's all English locales, minus the locales whose // parent is en-001 (from supplementalData.xml, under <parentLocales>). // TODO: Figure out how to get this from ICU. addAliasByTag("en-AS", "en-US"); // English (American Samoa) addAliasByTag("en-GU", "en-US"); // English (Guam) addAliasByTag("en-MH", "en-US"); // English (Marshall Islands) addAliasByTag("en-MP", "en-US"); // English (Northern Mariana Islands) addAliasByTag("en-PR", "en-US"); // English (Puerto Rico) addAliasByTag("en-UM", "en-US"); // English (United States Minor Outlying Islands) addAliasByTag("en-VI", "en-US"); // English (Virgin Islands) // All English locales other than those falling back to en-US are mapped to en-GB. addAliasByTag("en", "en-GB"); // For German, we're assuming the 1996 (and later) orthography by default. addAliasByTag("de", "de-1996"); // Liechtenstein uses the Swiss hyphenation rules for the 1901 orthography. addAliasByTag("de-LI-1901", "de-CH-1901"); // Norwegian is very probably Norwegian Bokmål. addAliasByTag("no", "nb"); // Use mn-Cyrl. According to CLDR's likelySubtags.xml, mn is most likely to be mn-Cyrl. addAliasByTag("mn", "mn-Cyrl"); // Mongolian // Fall back to Ethiopic script for languages likely to be written in Ethiopic. // Data is from CLDR's likelySubtags.xml. // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). addAliasByTag("am", "und-Ethi"); // Amharic addAliasByTag("byn", "und-Ethi"); // Blin addAliasByTag("gez", "und-Ethi"); // Geʻez addAliasByTag("ti", "und-Ethi"); // Tigrinya addAliasByTag("wal", "und-Ethi"); // Wolaytta nInit(); } }; private static native long nBuildHyphenator(long dataAddress, @NonNull String langTag, @IntRange(from = 1) int minPrefix, @IntRange(from = 1) int minSuffix); private static native void nInit(); }
core/java/android/text/StaticLayout.java +8 −43 Original line number Diff line number Diff line Loading @@ -21,21 +21,18 @@ import android.annotation.IntRange; import android.annotation.NonNull; import android.annotation.Nullable; import android.graphics.Paint; import android.os.LocaleList; import android.text.style.LeadingMarginSpan; import android.text.style.LeadingMarginSpan.LeadingMarginSpan2; import android.text.style.LineHeightSpan; import android.text.style.MetricAffectingSpan; import android.text.style.TabStopSpan; import android.util.Log; import android.util.Pair; import android.util.Pools.SynchronizedPool; import com.android.internal.util.ArrayUtils; import com.android.internal.util.GrowingArrayUtils; import java.util.Arrays; import java.util.Locale; /** * StaticLayout is a Layout for text that will not be edited after it Loading Loading @@ -101,7 +98,6 @@ public class StaticLayout extends Layout { b.mBreakStrategy = Layout.BREAK_STRATEGY_SIMPLE; b.mHyphenationFrequency = Layout.HYPHENATION_FREQUENCY_NONE; b.mJustificationMode = Layout.JUSTIFICATION_MODE_NONE; b.mLocales = null; b.mMeasuredText = MeasuredText.obtain(); return b; Loading @@ -118,7 +114,6 @@ public class StaticLayout extends Layout { b.mMeasuredText = null; b.mLeftIndents = null; b.mRightIndents = null; b.mLocales = null; b.mLeftPaddings = null; b.mRightPaddings = null; nFinishBuilder(b.mNativePtr); Loading Loading @@ -409,17 +404,6 @@ public class StaticLayout extends Layout { return this; } @NonNull private long[] getHyphenators(@NonNull LocaleList locales) { final int length = locales.size(); final long[] result = new long[length]; for (int i = 0; i < length; i++) { final Locale locale = locales.get(i); result[i] = Hyphenator.get(locale).getNativePtr(); } return result; } /** * Measurement and break iteration is done in native code. The protocol for using * the native code is as follows. Loading @@ -438,27 +422,12 @@ public class StaticLayout extends Layout { * After all paragraphs, call finish() to release expensive buffers. */ private Pair<String, long[]> getLocaleAndHyphenatorIfChanged(TextPaint paint) { final LocaleList locales = paint.getTextLocales(); if (!locales.equals(mLocales)) { mLocales = locales; return new Pair(locales.toLanguageTags(), getHyphenators(locales)); } else { // passing null means keep current locale. // TODO: move locale change detection to native. return new Pair(null, null); } } /* package */ void addStyleRun(TextPaint paint, int start, int end, boolean isRtl) { Pair<String, long[]> locHyph = getLocaleAndHyphenatorIfChanged(paint); nAddStyleRun(mNativePtr, paint.getNativeInstance(), start, end, isRtl, locHyph.first, locHyph.second); nAddStyleRun(mNativePtr, paint.getNativeInstance(), start, end, isRtl); } /* package */ void addReplacementRun(TextPaint paint, int start, int end, float width) { Pair<String, long[]> locHyph = getLocaleAndHyphenatorIfChanged(paint); nAddReplacementRun(mNativePtr, start, end, width, locHyph.first, locHyph.second); nAddReplacementRun(mNativePtr, paint.getNativeInstance(), start, end, width); } /** Loading Loading @@ -516,8 +485,6 @@ public class StaticLayout extends Layout { // This will go away and be subsumed by native builder code private MeasuredText mMeasuredText; private LocaleList mLocales; private static final SynchronizedPool<Builder> sPool = new SynchronizedPool<>(3); } Loading Loading @@ -807,9 +774,6 @@ public class StaticLayout extends Layout { } } // TODO: Move locale tracking code to native. b.mLocales = null; // Reset the locale tracking. nSetupParagraph(b.mNativePtr, chs, paraEnd - paraStart, firstWidth, firstWidthLineCount, restWidth, variableTabStops, TAB_INCREMENT, b.mBreakStrategy, b.mHyphenationFrequency, Loading Loading @@ -1537,15 +1501,16 @@ public class StaticLayout extends Layout { @Nullable int[] indents, @Nullable int[] leftPaddings, @Nullable int[] rightPaddings, @IntRange(from = 0) int indentsOffset); // TODO: Make this method CriticalNative once native code defers doing layouts. private static native void nAddStyleRun( /* non-zero */ long nativePtr, /* non-zero */ long nativePaint, @IntRange(from = 0) int start, @IntRange(from = 0) int end, boolean isRtl, @Nullable String languageTags, @Nullable long[] hyphenators); @IntRange(from = 0) int start, @IntRange(from = 0) int end, boolean isRtl); private static native void nAddReplacementRun(/* non-zero */ long nativePtr, // TODO: Make this method CriticalNative once native code defers doing layouts. private static native void nAddReplacementRun( /* non-zero */ long nativePtr, /* non-zero */ long nativePaint, @IntRange(from = 0) int start, @IntRange(from = 0) int end, @FloatRange(from = 0.0f) float width, @Nullable String languageTags, @Nullable long[] hyphenators); @FloatRange(from = 0.0f) float width); // populates LineBreaks and returns the number of breaks found // Loading
core/jni/android_text_Hyphenator.cpp +141 −10 Original line number Diff line number Diff line Loading @@ -14,24 +14,155 @@ * limitations under the License. */ #include <cstdint> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <algorithm> #include <core_jni_helpers.h> #include <minikin/Hyphenator.h> #include <nativehelper/ScopedUtfChars.h> namespace android { static jlong nBuildHyphenator(JNIEnv* env, jclass, jlong dataAddress, jstring lang, jint minPrefix, jint minSuffix) { const uint8_t* bytebuf = reinterpret_cast<const uint8_t*>(dataAddress); // null allowed. ScopedUtfChars language(env, lang); minikin::Hyphenator* hyphenator = minikin::Hyphenator::loadBinary( bytebuf, minPrefix, minSuffix, language.c_str(), language.size()); return reinterpret_cast<jlong>(hyphenator); static std::string buildFileName(const std::string& locale) { constexpr char SYSTEM_HYPHENATOR_PREFIX[] = "/system/usr/hyphen-data/hyph-"; constexpr char SYSTEM_HYPHENATOR_SUFFIX[] = ".hyb"; std::string lowerLocale; lowerLocale.reserve(locale.size()); std::transform(locale.begin(), locale.end(), std::back_inserter(lowerLocale), ::tolower); return SYSTEM_HYPHENATOR_PREFIX + lowerLocale + SYSTEM_HYPHENATOR_SUFFIX; } static const uint8_t* mmapPatternFile(const std::string& locale) { const std::string hyFilePath = buildFileName(locale); const int fd = open(hyFilePath.c_str(), O_RDONLY); if (fd == -1) { return nullptr; // Open failed. } struct stat st = {}; if (fstat(fd, &st) == -1) { // Unlikely to happen. close(fd); return nullptr; } void* ptr = mmap(nullptr, st.st_size, PROT_READ, MAP_SHARED, fd, 0 /* offset */); close(fd); if (ptr == MAP_FAILED) { return nullptr; } return reinterpret_cast<const uint8_t*>(ptr); } static void addHyphenatorWithoutPatternFile(const std::string& locale, int minPrefix, int minSuffix) { minikin::addHyphenator(locale, minikin::Hyphenator::loadBinary( nullptr, minPrefix, minSuffix, locale)); } static void addHyphenator(const std::string& locale, int minPrefix, int minSuffix) { const uint8_t* ptr = mmapPatternFile(locale); if (ptr == nullptr) { ALOGE("Unable to find pattern file or unable to map it for %s", locale.c_str()); return; } minikin::addHyphenator(locale, minikin::Hyphenator::loadBinary( ptr, minPrefix, minSuffix, locale)); } static void addHyphenatorAlias(const std::string& from, const std::string& to) { minikin::addHyphenatorAlias(from, to); } static void init() { // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but that // appears too small. constexpr int INDIC_MIN_PREFIX = 2; constexpr int INDIC_MIN_SUFFIX = 2; addHyphenator("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Assamese addHyphenator("bg", 2, 2); // Bulgarian addHyphenator("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Bengali addHyphenator("cu", 1, 2); // Church Slavonic addHyphenator("cy", 2, 3); // Welsh addHyphenator("da", 2, 2); // Danish addHyphenator("de-1901", 2, 2); // German 1901 orthography addHyphenator("de-1996", 2, 2); // German 1996 orthography addHyphenator("de-CH-1901", 2, 2); // Swiss High German 1901 orthography addHyphenator("en-GB", 2, 3); // British English addHyphenator("en-US", 2, 3); // American English addHyphenator("es", 2, 2); // Spanish addHyphenator("et", 2, 3); // Estonian addHyphenator("eu", 2, 2); // Basque addHyphenator("fr", 2, 3); // French addHyphenator("ga", 2, 3); // Irish addHyphenator("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Gujarati addHyphenator("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Hindi addHyphenator("hr", 2, 2); // Croatian addHyphenator("hu", 2, 2); // Hungarian // texhyphen sources say Armenian may be (1, 2); but that it needs confirmation. // Going with a more conservative value of (2, 2) for now. addHyphenator("hy", 2, 2); // Armenian addHyphenator("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Kannada addHyphenator("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Malayalam addHyphenator("mn-Cyrl", 2, 2); // Mongolian in Cyrillic script addHyphenator("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Marathi addHyphenator("nb", 2, 2); // Norwegian Bokmål addHyphenator("nn", 2, 2); // Norwegian Nynorsk addHyphenator("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Oriya addHyphenator("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Punjabi addHyphenator("pt", 2, 3); // Portuguese addHyphenator("sl", 2, 2); // Slovenian addHyphenator("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Tamil addHyphenator("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Telugu addHyphenator("tk", 2, 2); // Turkmen addHyphenator("und-Ethi", 1, 1); // Any language in Ethiopic script // Following two hyphenators do not have pattern files but there is some special logic based on // language. addHyphenatorWithoutPatternFile("ca", 2, 2); // Catalan addHyphenatorWithoutPatternFile("pl", 2, 2); // Polish // English locales that fall back to en-US. The data is from CLDR. It's all English locales, // minus the locales whose parent is en-001 (from supplementalData.xml, under <parentLocales>). // TODO: Figure out how to get this from ICU. addHyphenatorAlias("en-AS", "en-US"); // English (American Samoa) addHyphenatorAlias("en-GU", "en-US"); // English (Guam) addHyphenatorAlias("en-MH", "en-US"); // English (Marshall Islands) addHyphenatorAlias("en-MP", "en-US"); // English (Northern Mariana Islands) addHyphenatorAlias("en-PR", "en-US"); // English (Puerto Rico) addHyphenatorAlias("en-UM", "en-US"); // English (United States Minor Outlying Islands) addHyphenatorAlias("en-VI", "en-US"); // English (Virgin Islands) // All English locales other than those falling back to en-US are mapped to en-GB. addHyphenatorAlias("en", "en-GB"); // For German, we're assuming the 1996 (and later) orthography by default. addHyphenatorAlias("de", "de-1996"); // Liechtenstein uses the Swiss hyphenation rules for the 1901 orthography. addHyphenatorAlias("de-LI-1901", "de-CH-1901"); // Norwegian is very probably Norwegian Bokmål. addHyphenatorAlias("no", "nb"); // Use mn-Cyrl. According to CLDR's likelySubtags.xml, mn is most likely to be mn-Cyrl. addHyphenatorAlias("mn", "mn-Cyrl"); // Mongolian // Fall back to Ethiopic script for languages likely to be written in Ethiopic. // Data is from CLDR's likelySubtags.xml. // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). addHyphenatorAlias("am", "und-Ethi"); // Amharic addHyphenatorAlias("byn", "und-Ethi"); // Blin addHyphenatorAlias("gez", "und-Ethi"); // Geʻez addHyphenatorAlias("ti", "und-Ethi"); // Tigrinya addHyphenatorAlias("wal", "und-Ethi"); // Wolaytta } static const JNINativeMethod gMethods[] = { {"nBuildHyphenator", "(JLjava/lang/String;II)J", (void*) nBuildHyphenator}, {"nInit", "()V", (void*) init}, }; int register_android_text_Hyphenator(JNIEnv* env) { Loading
core/jni/android_text_StaticLayout.cpp +8 −50 File changed.Preview size limit exceeded, changes collapsed. Show changes