Loading core/java/android/text/Hyphenator.java +162 −158 Original line number Diff line number Diff line Loading @@ -16,7 +16,12 @@ package android.text; import android.annotation.IntRange; import android.annotation.NonNull; import android.annotation.Nullable; import android.system.ErrnoException; import android.system.Os; import android.system.OsConstants; import android.util.Log; import com.android.internal.annotations.GuardedBy; Loading @@ -24,9 +29,6 @@ import com.android.internal.annotations.GuardedBy; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.util.HashMap; import java.util.Locale; Loading @@ -37,39 +39,19 @@ import java.util.Locale; * @hide */ public class Hyphenator { // This class has deliberately simple lifetime management (no finalizer) because in // the common case a process will use a very small number of locales. private static String TAG = "Hyphenator"; // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but // that appears too small. private static final int INDIC_MIN_PREFIX = 2; private static final int INDIC_MIN_SUFFIX = 2; private final static Object sLock = new Object(); @GuardedBy("sLock") final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>(); // Reasonable enough values for cases where we have no hyphenation patterns but may be able to // do some automatic hyphenation based on characters. These values would be used very rarely. private static final int DEFAULT_MIN_PREFIX = 2; private static final int DEFAULT_MIN_SUFFIX = 2; final static Hyphenator sEmptyHyphenator = new Hyphenator(StaticLayout.nLoadHyphenator( null, 0, DEFAULT_MIN_PREFIX, DEFAULT_MIN_SUFFIX), null); final private long mNativePtr; private final long mNativePtr; private final HyphenationData mData; // We retain a reference to the buffer to keep the memory mapping valid @SuppressWarnings("unused") final private ByteBuffer mBuffer; private Hyphenator(long nativePtr, ByteBuffer b) { private Hyphenator(long nativePtr, HyphenationData data) { mNativePtr = nativePtr; mBuffer = b; mData = data; } public long getNativePtr() { Loading @@ -90,8 +72,7 @@ public class Hyphenator { new Locale(locale.getLanguage(), "", variant); result = sMap.get(languageAndVariantOnlyLocale); if (result != null) { sMap.put(locale, result); return result; return putAlias(locale, result); } } Loading @@ -99,8 +80,7 @@ public class Hyphenator { final Locale languageOnlyLocale = new Locale(locale.getLanguage()); result = sMap.get(languageOnlyLocale); if (result != null) { sMap.put(locale, result); return result; return putAlias(locale, result); } // Fall back to script-only, if available Loading @@ -112,158 +92,182 @@ public class Hyphenator { .build(); result = sMap.get(scriptOnlyLocale); if (result != null) { sMap.put(locale, result); return result; return putAlias(locale, result); } } sMap.put(locale, sEmptyHyphenator); // To remember we found nothing. return putEmptyAlias(locale); } return sEmptyHyphenator; } private static class HyphenationData { final String mLanguageTag; final int mMinPrefix, mMinSuffix; HyphenationData(String languageTag, int minPrefix, int minSuffix) { this.mLanguageTag = languageTag; this.mMinPrefix = minPrefix; this.mMinSuffix = minSuffix; } private static final String SYSTEM_HYPHENATOR_LOCATION = "/system/usr/hyphen-data"; public final int mMinPrefix, mMinSuffix; public final long mDataAddress; // Reasonable enough values for cases where we have no hyphenation patterns but may be able // to do some automatic hyphenation based on characters. These values would be used very // rarely. private static final int DEFAULT_MIN_PREFIX = 2; private static final int DEFAULT_MIN_SUFFIX = 2; public static final HyphenationData sEmptyData = new HyphenationData(DEFAULT_MIN_PREFIX, DEFAULT_MIN_SUFFIX); // Create empty HyphenationData. private HyphenationData(int minPrefix, int minSuffix) { mMinPrefix = minPrefix; mMinSuffix = minSuffix; mDataAddress = 0; } private static Hyphenator loadHyphenator(HyphenationData data) { String patternFilename = "hyph-" + data.mLanguageTag.toLowerCase(Locale.US) + ".hyb"; File patternFile = new File(getSystemHyphenatorLocation(), patternFilename); HyphenationData(String languageTag, int minPrefix, int minSuffix) { mMinPrefix = minPrefix; mMinSuffix = minSuffix; final String patternFilename = "hyph-" + languageTag.toLowerCase(Locale.US) + ".hyb"; final File patternFile = new File(SYSTEM_HYPHENATOR_LOCATION, patternFilename); if (!patternFile.canRead()) { Log.e(TAG, "hyphenation patterns for " + patternFile + " not found or unreadable"); return null; mDataAddress = 0; } else { long address; try (RandomAccessFile f = new RandomAccessFile(patternFile, "r")) { address = Os.mmap(0, f.length(), OsConstants.PROT_READ, OsConstants.MAP_SHARED, f.getFD(), 0 /* offset */); } catch (IOException | ErrnoException e) { Log.e(TAG, "error loading hyphenation " + patternFile, e); address = 0; } try { RandomAccessFile f = new RandomAccessFile(patternFile, "r"); try { FileChannel fc = f.getChannel(); MappedByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); long nativePtr = StaticLayout.nLoadHyphenator( buf, 0, data.mMinPrefix, data.mMinSuffix); return new Hyphenator(nativePtr, buf); } finally { f.close(); mDataAddress = address; } } catch (IOException e) { Log.e(TAG, "error loading hyphenation " + patternFile, e); return null; } } private static File getSystemHyphenatorLocation() { return new File("/system/usr/hyphen-data"); // Do not call this method outside of init method. private static Hyphenator putNewHyphenator(Locale loc, HyphenationData data) { final Hyphenator hyphenator = new Hyphenator(nBuildHyphenator( data.mDataAddress, loc.getLanguage(), data.mMinPrefix, data.mMinSuffix), data); sMap.put(loc, hyphenator); return hyphenator; } // Do not call this method outside of init method. private static void loadData(String langTag, int minPrefix, int maxPrefix) { final HyphenationData data = new HyphenationData(langTag, minPrefix, maxPrefix); putNewHyphenator(Locale.forLanguageTag(langTag), data); } // This array holds pairs of language tags that are used to prefill the map from locale to // hyphenation data: The hyphenation data for the first field will be prefilled from the // hyphenation data for the second field. // // The aliases that are computable by the get() method above are not included. private static final String[][] LOCALE_FALLBACK_DATA = { // Caller must acquire sLock before calling this method. // The Hyphenator for the baseLangTag must exists. private static Hyphenator addAliasByTag(String langTag, String baseLangTag) { return putAlias(Locale.forLanguageTag(langTag), sMap.get(Locale.forLanguageTag(baseLangTag))); } // Caller must acquire sLock before calling this method. private static Hyphenator putAlias(Locale locale, Hyphenator base) { return putNewHyphenator(locale, base.mData); } // Caller must acquire sLock before calling this method. private static Hyphenator putEmptyAlias(Locale locale) { return putNewHyphenator(locale, HyphenationData.sEmptyData); } // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but // that appears too small. private static final int INDIC_MIN_PREFIX = 2; private static final int INDIC_MIN_SUFFIX = 2; /** * Load hyphenation patterns at initialization time. We want to have patterns * for all locales loaded and ready to use so we don't have to do any file IO * on the UI thread when drawing text in different locales. * * @hide */ public static void init() { synchronized (sLock) { sMap.put(null, null); loadData("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Assamese loadData("bg", 2, 2); // Bulgarian loadData("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Bengali loadData("cu", 1, 2); // Church Slavonic loadData("cy", 2, 3); // Welsh loadData("da", 2, 2); // Danish loadData("de-1901", 2, 2); // German 1901 orthography loadData("de-1996", 2, 2); // German 1996 orthography loadData("de-CH-1901", 2, 2); // Swiss High German 1901 orthography loadData("en-GB", 2, 3); // British English loadData("en-US", 2, 3); // American English loadData("es", 2, 2); // Spanish loadData("et", 2, 3); // Estonian loadData("eu", 2, 2); // Basque loadData("fr", 2, 3); // French loadData("ga", 2, 3); // Irish loadData("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Gujarati loadData("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Hindi loadData("hr", 2, 2); // Croatian loadData("hu", 2, 2); // Hungarian // texhyphen sources say Armenian may be (1, 2); but that it needs confirmation. // Going with a more conservative value of (2, 2) for now. loadData("hy", 2, 2); // Armenian loadData("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Kannada loadData("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Malayalam loadData("mn-Cyrl", 2, 2); // Mongolian in Cyrillic script loadData("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Marathi loadData("nb", 2, 2); // Norwegian Bokmål loadData("nn", 2, 2); // Norwegian Nynorsk loadData("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Oriya loadData("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Punjabi loadData("pt", 2, 3); // Portuguese loadData("sl", 2, 2); // Slovenian loadData("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Tamil loadData("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Telugu loadData("tk", 2, 2); // Turkmen loadData("und-Ethi", 1, 1); // Any language in Ethiopic script // English locales that fall back to en-US. The data is // from CLDR. It's all English locales, minus the locales whose // parent is en-001 (from supplementalData.xml, under <parentLocales>). // TODO: Figure out how to get this from ICU. {"en-AS", "en-US"}, // English (American Samoa) {"en-GU", "en-US"}, // English (Guam) {"en-MH", "en-US"}, // English (Marshall Islands) {"en-MP", "en-US"}, // English (Northern Mariana Islands) {"en-PR", "en-US"}, // English (Puerto Rico) {"en-UM", "en-US"}, // English (United States Minor Outlying Islands) {"en-VI", "en-US"}, // English (Virgin Islands) addAliasByTag("en-AS", "en-US"); // English (American Samoa) addAliasByTag("en-GU", "en-US"); // English (Guam) addAliasByTag("en-MH", "en-US"); // English (Marshall Islands) addAliasByTag("en-MP", "en-US"); // English (Northern Mariana Islands) addAliasByTag("en-PR", "en-US"); // English (Puerto Rico) addAliasByTag("en-UM", "en-US"); // English (United States Minor Outlying Islands) addAliasByTag("en-VI", "en-US"); // English (Virgin Islands) // All English locales other than those falling back to en-US are mapped to en-GB. {"en", "en-GB"}, addAliasByTag("en", "en-GB"); // For German, we're assuming the 1996 (and later) orthography by default. {"de", "de-1996"}, addAliasByTag("de", "de-1996"); // Liechtenstein uses the Swiss hyphenation rules for the 1901 orthography. {"de-LI-1901", "de-CH-1901"}, addAliasByTag("de-LI-1901", "de-CH-1901"); // Norwegian is very probably Norwegian Bokmål. {"no", "nb"}, addAliasByTag("no", "nb"); // Use mn-Cyrl. According to CLDR's likelySubtags.xml, mn is most likely to be mn-Cyrl. {"mn", "mn-Cyrl"}, // Mongolian addAliasByTag("mn", "mn-Cyrl"); // Mongolian // Fall back to Ethiopic script for languages likely to be written in Ethiopic. // Data is from CLDR's likelySubtags.xml. // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). {"am", "und-Ethi"}, // Amharic {"byn", "und-Ethi"}, // Blin {"gez", "und-Ethi"}, // Geʻez {"ti", "und-Ethi"}, // Tigrinya {"wal", "und-Ethi"}, // Wolaytta }; private static final HyphenationData[] AVAILABLE_LANGUAGES = { new HyphenationData("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Assamese new HyphenationData("bg", 2, 2), // Bulgarian new HyphenationData("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Bengali new HyphenationData("cu", 1, 2), // Church Slavonic new HyphenationData("cy", 2, 3), // Welsh new HyphenationData("da", 2, 2), // Danish new HyphenationData("de-1901", 2, 2), // German 1901 orthography new HyphenationData("de-1996", 2, 2), // German 1996 orthography new HyphenationData("de-CH-1901", 2, 2), // Swiss High German 1901 orthography new HyphenationData("en-GB", 2, 3), // British English new HyphenationData("en-US", 2, 3), // American English new HyphenationData("es", 2, 2), // Spanish new HyphenationData("et", 2, 3), // Estonian new HyphenationData("eu", 2, 2), // Basque new HyphenationData("fr", 2, 3), // French new HyphenationData("ga", 2, 3), // Irish new HyphenationData("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Gujarati new HyphenationData("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Hindi new HyphenationData("hr", 2, 2), // Croatian new HyphenationData("hu", 2, 2), // Hungarian // texhyphen sources say Armenian may be (1, 2), but that it needs confirmation. // Going with a more conservative value of (2, 2) for now. new HyphenationData("hy", 2, 2), // Armenian new HyphenationData("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Kannada new HyphenationData("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Malayalam new HyphenationData("mn-Cyrl", 2, 2), // Mongolian in Cyrillic script new HyphenationData("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Marathi new HyphenationData("nb", 2, 2), // Norwegian Bokmål new HyphenationData("nn", 2, 2), // Norwegian Nynorsk new HyphenationData("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Oriya new HyphenationData("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Punjabi new HyphenationData("pt", 2, 3), // Portuguese new HyphenationData("sl", 2, 2), // Slovenian new HyphenationData("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Tamil new HyphenationData("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Telugu new HyphenationData("tk", 2, 2), // Turkmen new HyphenationData("und-Ethi", 1, 1), // Any language in Ethiopic script }; /** * Load hyphenation patterns at initialization time. We want to have patterns * for all locales loaded and ready to use so we don't have to do any file IO * on the UI thread when drawing text in different locales. * * @hide */ public static void init() { sMap.put(null, null); for (int i = 0; i < AVAILABLE_LANGUAGES.length; i++) { HyphenationData data = AVAILABLE_LANGUAGES[i]; Hyphenator h = loadHyphenator(data); if (h != null) { sMap.put(Locale.forLanguageTag(data.mLanguageTag), h); } addAliasByTag("am", "und-Ethi"); // Amharic addAliasByTag("byn", "und-Ethi"); // Blin addAliasByTag("gez", "und-Ethi"); // Geʻez addAliasByTag("ti", "und-Ethi"); // Tigrinya addAliasByTag("wal", "und-Ethi"); // Wolaytta } }; for (int i = 0; i < LOCALE_FALLBACK_DATA.length; i++) { String language = LOCALE_FALLBACK_DATA[i][0]; String fallback = LOCALE_FALLBACK_DATA[i][1]; sMap.put(Locale.forLanguageTag(language), sMap.get(Locale.forLanguageTag(fallback))); } } private static native long nBuildHyphenator(/* non-zero */ long dataAddress, @NonNull String langTag, @IntRange(from = 1) int minPrefix, @IntRange(from = 1) int minSuffix); } core/java/android/text/StaticLayout.java +0 −4 Original line number Diff line number Diff line Loading @@ -33,7 +33,6 @@ import android.util.Pools.SynchronizedPool; import com.android.internal.util.ArrayUtils; import com.android.internal.util.GrowingArrayUtils; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Locale; Loading Loading @@ -1529,9 +1528,6 @@ public class StaticLayout extends Layout { private static native void nFreeBuilder(long nativePtr); private static native void nFinishBuilder(long nativePtr); /* package */ static native long nLoadHyphenator(ByteBuffer buf, int offset, int minPrefix, int minSuffix); // Set up paragraph text and settings; done as one big method to minimize jni crossings private static native void nSetupParagraph( /* non zero */ long nativePtr, @NonNull char[] text, @IntRange(from = 0) int length, Loading core/jni/Android.bp +1 −0 Original line number Diff line number Diff line Loading @@ -78,6 +78,7 @@ cc_library_shared { "android_view_VelocityTracker.cpp", "android_text_AndroidCharacter.cpp", "android_text_AndroidBidi.cpp", "android_text_Hyphenator.cpp", "android_text_StaticLayout.cpp", "android_os_Debug.cpp", "android_os_GraphicsEnvironment.cpp", Loading core/jni/AndroidRuntime.cpp +2 −0 Original line number Diff line number Diff line Loading @@ -173,6 +173,7 @@ extern int register_android_net_LocalSocketImpl(JNIEnv* env); extern int register_android_net_NetworkUtils(JNIEnv* env); extern int register_android_net_TrafficStats(JNIEnv* env); extern int register_android_text_AndroidCharacter(JNIEnv *env); extern int register_android_text_Hyphenator(JNIEnv *env); extern int register_android_text_StaticLayout(JNIEnv *env); extern int register_android_text_AndroidBidi(JNIEnv *env); extern int register_android_opengl_classes(JNIEnv *env); Loading Loading @@ -1321,6 +1322,7 @@ static const RegJNIRec gRegJNI[] = { REG_JNI(register_android_content_StringBlock), REG_JNI(register_android_content_XmlBlock), REG_JNI(register_android_text_AndroidCharacter), REG_JNI(register_android_text_Hyphenator), REG_JNI(register_android_text_StaticLayout), REG_JNI(register_android_text_AndroidBidi), REG_JNI(register_android_view_InputDevice), Loading core/jni/android_text_Hyphenator.cpp 0 → 100644 +41 −0 Original line number Diff line number Diff line /* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <cstdint> #include <core_jni_helpers.h> #include <minikin/Hyphenator.h> #include <nativehelper/ScopedUtfChars.h> namespace android { static jlong nBuildHyphenator(JNIEnv* env, jclass, jlong dataAddress, jstring lang, jint minPrefix, jint minSuffix) { const uint8_t* bytebuf = reinterpret_cast<const uint8_t*>(dataAddress); // null allowed. ScopedUtfChars language(env, lang); minikin::Hyphenator* hyphenator = minikin::Hyphenator::loadBinary( bytebuf, minPrefix, minSuffix, language.c_str(), language.size()); return reinterpret_cast<jlong>(hyphenator); } static const JNINativeMethod gMethods[] = { {"nBuildHyphenator", "(JLjava/lang/String;II)J", (void*) nBuildHyphenator}, }; int register_android_text_Hyphenator(JNIEnv* env) { return RegisterMethodsOrDie(env, "android/text/Hyphenator", gMethods, NELEM(gMethods)); } } // namespace android Loading
core/java/android/text/Hyphenator.java +162 −158 Original line number Diff line number Diff line Loading @@ -16,7 +16,12 @@ package android.text; import android.annotation.IntRange; import android.annotation.NonNull; import android.annotation.Nullable; import android.system.ErrnoException; import android.system.Os; import android.system.OsConstants; import android.util.Log; import com.android.internal.annotations.GuardedBy; Loading @@ -24,9 +29,6 @@ import com.android.internal.annotations.GuardedBy; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.util.HashMap; import java.util.Locale; Loading @@ -37,39 +39,19 @@ import java.util.Locale; * @hide */ public class Hyphenator { // This class has deliberately simple lifetime management (no finalizer) because in // the common case a process will use a very small number of locales. private static String TAG = "Hyphenator"; // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but // that appears too small. private static final int INDIC_MIN_PREFIX = 2; private static final int INDIC_MIN_SUFFIX = 2; private final static Object sLock = new Object(); @GuardedBy("sLock") final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>(); // Reasonable enough values for cases where we have no hyphenation patterns but may be able to // do some automatic hyphenation based on characters. These values would be used very rarely. private static final int DEFAULT_MIN_PREFIX = 2; private static final int DEFAULT_MIN_SUFFIX = 2; final static Hyphenator sEmptyHyphenator = new Hyphenator(StaticLayout.nLoadHyphenator( null, 0, DEFAULT_MIN_PREFIX, DEFAULT_MIN_SUFFIX), null); final private long mNativePtr; private final long mNativePtr; private final HyphenationData mData; // We retain a reference to the buffer to keep the memory mapping valid @SuppressWarnings("unused") final private ByteBuffer mBuffer; private Hyphenator(long nativePtr, ByteBuffer b) { private Hyphenator(long nativePtr, HyphenationData data) { mNativePtr = nativePtr; mBuffer = b; mData = data; } public long getNativePtr() { Loading @@ -90,8 +72,7 @@ public class Hyphenator { new Locale(locale.getLanguage(), "", variant); result = sMap.get(languageAndVariantOnlyLocale); if (result != null) { sMap.put(locale, result); return result; return putAlias(locale, result); } } Loading @@ -99,8 +80,7 @@ public class Hyphenator { final Locale languageOnlyLocale = new Locale(locale.getLanguage()); result = sMap.get(languageOnlyLocale); if (result != null) { sMap.put(locale, result); return result; return putAlias(locale, result); } // Fall back to script-only, if available Loading @@ -112,158 +92,182 @@ public class Hyphenator { .build(); result = sMap.get(scriptOnlyLocale); if (result != null) { sMap.put(locale, result); return result; return putAlias(locale, result); } } sMap.put(locale, sEmptyHyphenator); // To remember we found nothing. return putEmptyAlias(locale); } return sEmptyHyphenator; } private static class HyphenationData { final String mLanguageTag; final int mMinPrefix, mMinSuffix; HyphenationData(String languageTag, int minPrefix, int minSuffix) { this.mLanguageTag = languageTag; this.mMinPrefix = minPrefix; this.mMinSuffix = minSuffix; } private static final String SYSTEM_HYPHENATOR_LOCATION = "/system/usr/hyphen-data"; public final int mMinPrefix, mMinSuffix; public final long mDataAddress; // Reasonable enough values for cases where we have no hyphenation patterns but may be able // to do some automatic hyphenation based on characters. These values would be used very // rarely. private static final int DEFAULT_MIN_PREFIX = 2; private static final int DEFAULT_MIN_SUFFIX = 2; public static final HyphenationData sEmptyData = new HyphenationData(DEFAULT_MIN_PREFIX, DEFAULT_MIN_SUFFIX); // Create empty HyphenationData. private HyphenationData(int minPrefix, int minSuffix) { mMinPrefix = minPrefix; mMinSuffix = minSuffix; mDataAddress = 0; } private static Hyphenator loadHyphenator(HyphenationData data) { String patternFilename = "hyph-" + data.mLanguageTag.toLowerCase(Locale.US) + ".hyb"; File patternFile = new File(getSystemHyphenatorLocation(), patternFilename); HyphenationData(String languageTag, int minPrefix, int minSuffix) { mMinPrefix = minPrefix; mMinSuffix = minSuffix; final String patternFilename = "hyph-" + languageTag.toLowerCase(Locale.US) + ".hyb"; final File patternFile = new File(SYSTEM_HYPHENATOR_LOCATION, patternFilename); if (!patternFile.canRead()) { Log.e(TAG, "hyphenation patterns for " + patternFile + " not found or unreadable"); return null; mDataAddress = 0; } else { long address; try (RandomAccessFile f = new RandomAccessFile(patternFile, "r")) { address = Os.mmap(0, f.length(), OsConstants.PROT_READ, OsConstants.MAP_SHARED, f.getFD(), 0 /* offset */); } catch (IOException | ErrnoException e) { Log.e(TAG, "error loading hyphenation " + patternFile, e); address = 0; } try { RandomAccessFile f = new RandomAccessFile(patternFile, "r"); try { FileChannel fc = f.getChannel(); MappedByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); long nativePtr = StaticLayout.nLoadHyphenator( buf, 0, data.mMinPrefix, data.mMinSuffix); return new Hyphenator(nativePtr, buf); } finally { f.close(); mDataAddress = address; } } catch (IOException e) { Log.e(TAG, "error loading hyphenation " + patternFile, e); return null; } } private static File getSystemHyphenatorLocation() { return new File("/system/usr/hyphen-data"); // Do not call this method outside of init method. private static Hyphenator putNewHyphenator(Locale loc, HyphenationData data) { final Hyphenator hyphenator = new Hyphenator(nBuildHyphenator( data.mDataAddress, loc.getLanguage(), data.mMinPrefix, data.mMinSuffix), data); sMap.put(loc, hyphenator); return hyphenator; } // Do not call this method outside of init method. private static void loadData(String langTag, int minPrefix, int maxPrefix) { final HyphenationData data = new HyphenationData(langTag, minPrefix, maxPrefix); putNewHyphenator(Locale.forLanguageTag(langTag), data); } // This array holds pairs of language tags that are used to prefill the map from locale to // hyphenation data: The hyphenation data for the first field will be prefilled from the // hyphenation data for the second field. // // The aliases that are computable by the get() method above are not included. private static final String[][] LOCALE_FALLBACK_DATA = { // Caller must acquire sLock before calling this method. // The Hyphenator for the baseLangTag must exists. private static Hyphenator addAliasByTag(String langTag, String baseLangTag) { return putAlias(Locale.forLanguageTag(langTag), sMap.get(Locale.forLanguageTag(baseLangTag))); } // Caller must acquire sLock before calling this method. private static Hyphenator putAlias(Locale locale, Hyphenator base) { return putNewHyphenator(locale, base.mData); } // Caller must acquire sLock before calling this method. private static Hyphenator putEmptyAlias(Locale locale) { return putNewHyphenator(locale, HyphenationData.sEmptyData); } // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but // that appears too small. private static final int INDIC_MIN_PREFIX = 2; private static final int INDIC_MIN_SUFFIX = 2; /** * Load hyphenation patterns at initialization time. We want to have patterns * for all locales loaded and ready to use so we don't have to do any file IO * on the UI thread when drawing text in different locales. * * @hide */ public static void init() { synchronized (sLock) { sMap.put(null, null); loadData("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Assamese loadData("bg", 2, 2); // Bulgarian loadData("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Bengali loadData("cu", 1, 2); // Church Slavonic loadData("cy", 2, 3); // Welsh loadData("da", 2, 2); // Danish loadData("de-1901", 2, 2); // German 1901 orthography loadData("de-1996", 2, 2); // German 1996 orthography loadData("de-CH-1901", 2, 2); // Swiss High German 1901 orthography loadData("en-GB", 2, 3); // British English loadData("en-US", 2, 3); // American English loadData("es", 2, 2); // Spanish loadData("et", 2, 3); // Estonian loadData("eu", 2, 2); // Basque loadData("fr", 2, 3); // French loadData("ga", 2, 3); // Irish loadData("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Gujarati loadData("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Hindi loadData("hr", 2, 2); // Croatian loadData("hu", 2, 2); // Hungarian // texhyphen sources say Armenian may be (1, 2); but that it needs confirmation. // Going with a more conservative value of (2, 2) for now. loadData("hy", 2, 2); // Armenian loadData("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Kannada loadData("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Malayalam loadData("mn-Cyrl", 2, 2); // Mongolian in Cyrillic script loadData("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Marathi loadData("nb", 2, 2); // Norwegian Bokmål loadData("nn", 2, 2); // Norwegian Nynorsk loadData("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Oriya loadData("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Punjabi loadData("pt", 2, 3); // Portuguese loadData("sl", 2, 2); // Slovenian loadData("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Tamil loadData("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX); // Telugu loadData("tk", 2, 2); // Turkmen loadData("und-Ethi", 1, 1); // Any language in Ethiopic script // English locales that fall back to en-US. The data is // from CLDR. It's all English locales, minus the locales whose // parent is en-001 (from supplementalData.xml, under <parentLocales>). // TODO: Figure out how to get this from ICU. {"en-AS", "en-US"}, // English (American Samoa) {"en-GU", "en-US"}, // English (Guam) {"en-MH", "en-US"}, // English (Marshall Islands) {"en-MP", "en-US"}, // English (Northern Mariana Islands) {"en-PR", "en-US"}, // English (Puerto Rico) {"en-UM", "en-US"}, // English (United States Minor Outlying Islands) {"en-VI", "en-US"}, // English (Virgin Islands) addAliasByTag("en-AS", "en-US"); // English (American Samoa) addAliasByTag("en-GU", "en-US"); // English (Guam) addAliasByTag("en-MH", "en-US"); // English (Marshall Islands) addAliasByTag("en-MP", "en-US"); // English (Northern Mariana Islands) addAliasByTag("en-PR", "en-US"); // English (Puerto Rico) addAliasByTag("en-UM", "en-US"); // English (United States Minor Outlying Islands) addAliasByTag("en-VI", "en-US"); // English (Virgin Islands) // All English locales other than those falling back to en-US are mapped to en-GB. {"en", "en-GB"}, addAliasByTag("en", "en-GB"); // For German, we're assuming the 1996 (and later) orthography by default. {"de", "de-1996"}, addAliasByTag("de", "de-1996"); // Liechtenstein uses the Swiss hyphenation rules for the 1901 orthography. {"de-LI-1901", "de-CH-1901"}, addAliasByTag("de-LI-1901", "de-CH-1901"); // Norwegian is very probably Norwegian Bokmål. {"no", "nb"}, addAliasByTag("no", "nb"); // Use mn-Cyrl. According to CLDR's likelySubtags.xml, mn is most likely to be mn-Cyrl. {"mn", "mn-Cyrl"}, // Mongolian addAliasByTag("mn", "mn-Cyrl"); // Mongolian // Fall back to Ethiopic script for languages likely to be written in Ethiopic. // Data is from CLDR's likelySubtags.xml. // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). {"am", "und-Ethi"}, // Amharic {"byn", "und-Ethi"}, // Blin {"gez", "und-Ethi"}, // Geʻez {"ti", "und-Ethi"}, // Tigrinya {"wal", "und-Ethi"}, // Wolaytta }; private static final HyphenationData[] AVAILABLE_LANGUAGES = { new HyphenationData("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Assamese new HyphenationData("bg", 2, 2), // Bulgarian new HyphenationData("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Bengali new HyphenationData("cu", 1, 2), // Church Slavonic new HyphenationData("cy", 2, 3), // Welsh new HyphenationData("da", 2, 2), // Danish new HyphenationData("de-1901", 2, 2), // German 1901 orthography new HyphenationData("de-1996", 2, 2), // German 1996 orthography new HyphenationData("de-CH-1901", 2, 2), // Swiss High German 1901 orthography new HyphenationData("en-GB", 2, 3), // British English new HyphenationData("en-US", 2, 3), // American English new HyphenationData("es", 2, 2), // Spanish new HyphenationData("et", 2, 3), // Estonian new HyphenationData("eu", 2, 2), // Basque new HyphenationData("fr", 2, 3), // French new HyphenationData("ga", 2, 3), // Irish new HyphenationData("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Gujarati new HyphenationData("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Hindi new HyphenationData("hr", 2, 2), // Croatian new HyphenationData("hu", 2, 2), // Hungarian // texhyphen sources say Armenian may be (1, 2), but that it needs confirmation. // Going with a more conservative value of (2, 2) for now. new HyphenationData("hy", 2, 2), // Armenian new HyphenationData("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Kannada new HyphenationData("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Malayalam new HyphenationData("mn-Cyrl", 2, 2), // Mongolian in Cyrillic script new HyphenationData("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Marathi new HyphenationData("nb", 2, 2), // Norwegian Bokmål new HyphenationData("nn", 2, 2), // Norwegian Nynorsk new HyphenationData("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Oriya new HyphenationData("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Punjabi new HyphenationData("pt", 2, 3), // Portuguese new HyphenationData("sl", 2, 2), // Slovenian new HyphenationData("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Tamil new HyphenationData("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Telugu new HyphenationData("tk", 2, 2), // Turkmen new HyphenationData("und-Ethi", 1, 1), // Any language in Ethiopic script }; /** * Load hyphenation patterns at initialization time. We want to have patterns * for all locales loaded and ready to use so we don't have to do any file IO * on the UI thread when drawing text in different locales. * * @hide */ public static void init() { sMap.put(null, null); for (int i = 0; i < AVAILABLE_LANGUAGES.length; i++) { HyphenationData data = AVAILABLE_LANGUAGES[i]; Hyphenator h = loadHyphenator(data); if (h != null) { sMap.put(Locale.forLanguageTag(data.mLanguageTag), h); } addAliasByTag("am", "und-Ethi"); // Amharic addAliasByTag("byn", "und-Ethi"); // Blin addAliasByTag("gez", "und-Ethi"); // Geʻez addAliasByTag("ti", "und-Ethi"); // Tigrinya addAliasByTag("wal", "und-Ethi"); // Wolaytta } }; for (int i = 0; i < LOCALE_FALLBACK_DATA.length; i++) { String language = LOCALE_FALLBACK_DATA[i][0]; String fallback = LOCALE_FALLBACK_DATA[i][1]; sMap.put(Locale.forLanguageTag(language), sMap.get(Locale.forLanguageTag(fallback))); } } private static native long nBuildHyphenator(/* non-zero */ long dataAddress, @NonNull String langTag, @IntRange(from = 1) int minPrefix, @IntRange(from = 1) int minSuffix); }
core/java/android/text/StaticLayout.java +0 −4 Original line number Diff line number Diff line Loading @@ -33,7 +33,6 @@ import android.util.Pools.SynchronizedPool; import com.android.internal.util.ArrayUtils; import com.android.internal.util.GrowingArrayUtils; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Locale; Loading Loading @@ -1529,9 +1528,6 @@ public class StaticLayout extends Layout { private static native void nFreeBuilder(long nativePtr); private static native void nFinishBuilder(long nativePtr); /* package */ static native long nLoadHyphenator(ByteBuffer buf, int offset, int minPrefix, int minSuffix); // Set up paragraph text and settings; done as one big method to minimize jni crossings private static native void nSetupParagraph( /* non zero */ long nativePtr, @NonNull char[] text, @IntRange(from = 0) int length, Loading
core/jni/Android.bp +1 −0 Original line number Diff line number Diff line Loading @@ -78,6 +78,7 @@ cc_library_shared { "android_view_VelocityTracker.cpp", "android_text_AndroidCharacter.cpp", "android_text_AndroidBidi.cpp", "android_text_Hyphenator.cpp", "android_text_StaticLayout.cpp", "android_os_Debug.cpp", "android_os_GraphicsEnvironment.cpp", Loading
core/jni/AndroidRuntime.cpp +2 −0 Original line number Diff line number Diff line Loading @@ -173,6 +173,7 @@ extern int register_android_net_LocalSocketImpl(JNIEnv* env); extern int register_android_net_NetworkUtils(JNIEnv* env); extern int register_android_net_TrafficStats(JNIEnv* env); extern int register_android_text_AndroidCharacter(JNIEnv *env); extern int register_android_text_Hyphenator(JNIEnv *env); extern int register_android_text_StaticLayout(JNIEnv *env); extern int register_android_text_AndroidBidi(JNIEnv *env); extern int register_android_opengl_classes(JNIEnv *env); Loading Loading @@ -1321,6 +1322,7 @@ static const RegJNIRec gRegJNI[] = { REG_JNI(register_android_content_StringBlock), REG_JNI(register_android_content_XmlBlock), REG_JNI(register_android_text_AndroidCharacter), REG_JNI(register_android_text_Hyphenator), REG_JNI(register_android_text_StaticLayout), REG_JNI(register_android_text_AndroidBidi), REG_JNI(register_android_view_InputDevice), Loading
core/jni/android_text_Hyphenator.cpp 0 → 100644 +41 −0 Original line number Diff line number Diff line /* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <cstdint> #include <core_jni_helpers.h> #include <minikin/Hyphenator.h> #include <nativehelper/ScopedUtfChars.h> namespace android { static jlong nBuildHyphenator(JNIEnv* env, jclass, jlong dataAddress, jstring lang, jint minPrefix, jint minSuffix) { const uint8_t* bytebuf = reinterpret_cast<const uint8_t*>(dataAddress); // null allowed. ScopedUtfChars language(env, lang); minikin::Hyphenator* hyphenator = minikin::Hyphenator::loadBinary( bytebuf, minPrefix, minSuffix, language.c_str(), language.size()); return reinterpret_cast<jlong>(hyphenator); } static const JNINativeMethod gMethods[] = { {"nBuildHyphenator", "(JLjava/lang/String;II)J", (void*) nBuildHyphenator}, }; int register_android_text_Hyphenator(JNIEnv* env) { return RegisterMethodsOrDie(env, "android/text/Hyphenator", gMethods, NELEM(gMethods)); } } // namespace android