Get rid of icu4j dependency (8588b9b3) · Commits · e / os / android_packages_apps_Eleven

src/org/lineageos/eleven/locale/HanziToPinyin.java

deleted100644 → 0

+0 −186

Original line number	Diff line number	Diff line
		/*
		* Copyright (C) 2011 The Android Open Source Project
		*
		* Licensed under the Apache License, Version 2.0 (the "License");
		* you may not use this file except in compliance with the License.
		* You may obtain a copy of the License at
		*
		* http://www.apache.org/licenses/LICENSE-2.0
		*
		* Unless required by applicable law or agreed to in writing, software
		* distributed under the License is distributed on an "AS IS" BASIS,
		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		* See the License for the specific language governing permissions and
		* limitations under the License.
		*/

		package org.lineageos.eleven.locale;

		import android.text.TextUtils;
		import android.util.Log;

		import java.util.ArrayList;

		import android.icu.text.Transliterator;

		/**
		* An object to convert Chinese character to its corresponding pinyin string.
		* For characters with multiple possible pinyin string, only one is selected
		* according to ICU Transliterator class. Polyphone is not supported in this
		* implementation.
		*/
		public class HanziToPinyin {
		private static final String TAG = "HanziToPinyin";

		private static HanziToPinyin sInstance;
		private Transliterator mPinyinTransliterator;
		private Transliterator mAsciiTransliterator;

		public static class Token {
		/**
		* Separator between target string for each source char
		*/
		public static final String SEPARATOR = " ";

		public static final int LATIN = 1;
		public static final int PINYIN = 2;
		public static final int UNKNOWN = 3;

		public Token() {
		}

		public Token(int type, String source, String target) {
		this.type = type;
		this.source = source;
		this.target = target;
		}

		/**
		* Type of this token, ASCII, PINYIN or UNKNOWN.
		*/
		public int type;
		/**
		* Original string before translation.
		*/
		public String source;
		/**
		* Translated string of source. For Han, target is corresponding Pinyin. Otherwise target is
		* original string in source.
		*/
		public String target;
		}

		private HanziToPinyin() {
		try {
		mPinyinTransliterator = Transliterator.getInstance("Han-Latin/Names; Latin-Ascii; Any-Upper");
		mAsciiTransliterator = Transliterator.getInstance("Latin-Ascii");
		} catch (RuntimeException e) {
		Log.w(TAG, "Han-Latin/Names transliterator data is missing,"
		+ " HanziToPinyin is disabled");
		}
		}

		public boolean hasChineseTransliterator() {
		return mPinyinTransliterator != null;
		}

		public static HanziToPinyin getInstance() {
		synchronized (HanziToPinyin.class) {
		if (sInstance == null) {
		sInstance = new HanziToPinyin();
		}
		return sInstance;
		}
		}

		private void tokenize(char character, Token token) {
		token.source = Character.toString(character);

		// ASCII
		if (character < 128) {
		token.type = Token.LATIN;
		token.target = token.source;
		return;
		}

		// Extended Latin. Transcode these to ASCII equivalents
		if (character < 0x250 \|\| (0x1e00 <= character && character < 0x1eff)) {
		token.type = Token.LATIN;
		token.target = mAsciiTransliterator == null ? token.source :
		mAsciiTransliterator.transliterate(token.source);
		return;
		}

		token.type = Token.PINYIN;
		token.target = mPinyinTransliterator.transliterate(token.source);
		if (TextUtils.isEmpty(token.target) \|\|
		TextUtils.equals(token.source, token.target)) {
		token.type = Token.UNKNOWN;
		token.target = token.source;
		}
		}

		public String transliterate(final String input) {
		if (!hasChineseTransliterator() \|\| TextUtils.isEmpty(input)) {
		return null;
		}
		return mPinyinTransliterator.transliterate(input);
		}

		/**
		* Convert the input to a array of tokens. The sequence of ASCII or Unknown characters without
		* space will be put into a Token, One Hanzi character which has pinyin will be treated as a
		* Token. If there is no Chinese transliterator, the empty token array is returned.
		*/
		public ArrayList<Token> getTokens(final String input) {
		ArrayList<Token> tokens = new ArrayList<Token>();
		if (!hasChineseTransliterator() \|\| TextUtils.isEmpty(input)) {
		// return empty tokens.
		return tokens;
		}

		final int inputLength = input.length();
		final StringBuilder sb = new StringBuilder();
		int tokenType = Token.LATIN;
		Token token = new Token();

		// Go through the input, create a new token when
		// a. Token type changed
		// b. Get the Pinyin of current charater.
		// c. current character is space.
		for (int i = 0; i < inputLength; i++) {
		final char character = input.charAt(i);
		if (Character.isSpaceChar(character)) {
		if (sb.length() > 0) {
		addToken(sb, tokens, tokenType);
		}
		} else {
		tokenize(character, token);
		if (token.type == Token.PINYIN) {
		if (sb.length() > 0) {
		addToken(sb, tokens, tokenType);
		}
		tokens.add(token);
		token = new Token();
		} else {
		if (tokenType != token.type && sb.length() > 0) {
		addToken(sb, tokens, tokenType);
		}
		sb.append(token.target);
		}
		tokenType = token.type;
		}
		}
		if (sb.length() > 0) {
		addToken(sb, tokens, tokenType);
		}
		return tokens;
		}

		private void addToken(
		final StringBuilder sb, final ArrayList<Token> tokens, final int tokenType) {
		String str = sb.toString();
		tokens.add(new Token(tokenType, str, str));
		sb.setLength(0);
		}
		}

src/org/lineageos/eleven/locale/LocaleUtils.java

+4 −239

Original line number	Diff line number	Diff line
		@@ -16,25 +16,13 @@

		package org.lineageos.eleven.locale;

		import android.provider.ContactsContract.FullNameStyle;
		import android.provider.ContactsContract.PhoneticNameStyle;
		import android.icu.text.AlphabeticIndex;
		import android.support.annotation.VisibleForTesting;
		import android.text.TextUtils;
		import android.util.Log;

		import org.lineageos.eleven.locale.HanziToPinyin.Token;

		import java.lang.Character.UnicodeBlock;
		import java.util.ArrayList;
		import java.util.Collections;
		import java.util.HashSet;
		import java.util.Iterator;
		import java.util.Locale;
		import java.util.Set;

		import android.icu.text.AlphabeticIndex;
		import android.icu.text.AlphabeticIndex.ImmutableIndex;
		import android.icu.text.Transliterator;

		/**
		* This utility class provides specialized handling for locale specific
		@@ -73,10 +61,9 @@ public class LocaleUtils {
		private static final String EMPTY_STRING = "";
		private static final String NUMBER_STRING = "#";

		protected final ImmutableIndex mAlphabeticIndex;
		protected final AlphabeticIndex.ImmutableIndex mAlphabeticIndex;
		private final int mAlphabeticIndexBucketCount;
		private final int mNumberBucketIndex;
		private final boolean mEnableSecondaryLocalePinyin;

		public LocaleUtilsBase(LocaleSet locales) {
		// AlphabeticIndex.getBucketLabel() uses a binary search across
		@@ -92,7 +79,6 @@ public class LocaleUtils {
		// Cyrillic because their alphabets are complementary supersets
		// of Russian.
		final Locale secondaryLocale = locales.getSecondaryLocale();
		mEnableSecondaryLocalePinyin = locales.isSecondaryLocaleSimplifiedChinese();
		AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
		.setMaxLabelCount(300);
		if (secondaryLocale != null) {
		@@ -151,13 +137,6 @@ public class LocaleUtils {
		return mNumberBucketIndex;
		}

		/**
		* TODO: ICU 52 AlphabeticIndex doesn't support Simplified Chinese
		* as a secondary locale. Remove the following if that is added.
		*/
		if (mEnableSecondaryLocalePinyin) {
		name = HanziToPinyin.getInstance().transliterate(name);
		}
		final int bucket = mAlphabeticIndex.getBucketIndex(name);
		if (bucket < 0) {
		return -1;
		@@ -207,215 +186,6 @@ public class LocaleUtils {
		}
		}

		/**
		* Japanese specific locale overrides.
		*
		* sortKey: unchanged (same as name)
		* nameLookupKeys: unchanged (none)
		* labels: extends default labels by labeling unlabeled CJ characters
		* with the Japanese character 他 ("misc"). Japanese labels are:
		* あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
		*/
		private static class JapaneseContactUtils extends LocaleUtilsBase {
		// \u4ed6 is Japanese character 他 ("misc")
		private static final String JAPANESE_MISC_LABEL = "\u4ed6";
		private final int mMiscBucketIndex;

		public JapaneseContactUtils(LocaleSet locales) {
		super(locales);
		// Determine which bucket AlphabeticIndex is lumping unclassified
		// Japanese characters into by looking up the bucket index for
		// a representative Kanji/CJK unified ideograph (\u65e5 is the
		// character '日').
		mMiscBucketIndex = super.getBucketIndex("\u65e5");
		}

		// Set of UnicodeBlocks for unified CJK (Chinese) characters and
		// Japanese characters. This includes all code blocks that might
		// contain a character used in Japanese (which is why unified CJK
		// blocks are included but Korean Hangul and jamo are not).
		private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
		static {
		Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
		set.add(UnicodeBlock.HIRAGANA);
		set.add(UnicodeBlock.KATAKANA);
		set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
		set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
		set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
		set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
		set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
		set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
		set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
		set.add(UnicodeBlock.CJK_COMPATIBILITY);
		set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
		set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
		set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
		CJ_BLOCKS = Collections.unmodifiableSet(set);
		}

		/**
		* Helper routine to identify unlabeled Chinese or Japanese characters
		* to put in a 'misc' bucket.
		*
		* @return true if the specified Unicode code point is Chinese or
		* Japanese
		*/
		private static boolean isChineseOrJapanese(int codePoint) {
		return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
		}

		/**
		* Returns the bucket index for the specified string. Adds an
		* additional 'misc' bucket for Kanji characters to the base class set.
		*/
		@Override
		public int getBucketIndex(String name) {
		final int bucketIndex = super.getBucketIndex(name);
		if ((bucketIndex == mMiscBucketIndex &&
		!isChineseOrJapanese(Character.codePointAt(name, 0))) \|\|
		bucketIndex > mMiscBucketIndex) {
		return bucketIndex + 1;
		}
		return bucketIndex;
		}

		/**
		* Returns the number of buckets in use (one more than the base class
		* uses, because this class adds a bucket for Kanji).
		*/
		@Override
		public int getBucketCount() {
		return super.getBucketCount() + 1;
		}

		/**
		* Returns the label for the specified bucket index if a valid index,
		* otherwise returns an empty string. '他' is returned for unclassified
		* Kanji; for all others, the label determined by the base class is
		* returned.
		*/
		@Override
		public String getBucketLabel(int bucketIndex) {
		if (bucketIndex == mMiscBucketIndex) {
		return JAPANESE_MISC_LABEL;
		} else if (bucketIndex > mMiscBucketIndex) {
		--bucketIndex;
		}
		return super.getBucketLabel(bucketIndex);
		}

		@Override
		public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
		// Hiragana and Katakana will be positively identified as Japanese.
		if (nameStyle == PhoneticNameStyle.JAPANESE) {
		return getRomajiNameLookupKeys(name);
		}
		return null;
		}

		private static boolean mInitializedTransliterator;
		private static Transliterator mJapaneseTransliterator;

		private static Transliterator getJapaneseTransliterator() {
		synchronized(JapaneseContactUtils.class) {
		if (!mInitializedTransliterator) {
		mInitializedTransliterator = true;
		Transliterator t = null;
		try {
		t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
		+ " Latin-Ascii");
		} catch (RuntimeException e) {
		Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
		+ " is missing");
		}
		mJapaneseTransliterator = t;
		}
		return mJapaneseTransliterator;
		}
		}

		public static Iterator<String> getRomajiNameLookupKeys(String name) {
		final Transliterator t = getJapaneseTransliterator();
		if (t == null) {
		return null;
		}
		final String romajiName = t.transliterate(name);
		if (TextUtils.isEmpty(romajiName) \|\|
		TextUtils.equals(name, romajiName)) {
		return null;
		}
		final HashSet<String> keys = new HashSet<String>();
		keys.add(romajiName);
		return keys.iterator();
		}
		}

		/**
		* Simplified Chinese specific locale overrides. Uses ICU Transliterator
		* for generating pinyin transliteration.
		*
		* sortKey: unchanged (same as name)
		* nameLookupKeys: adds additional name lookup keys
		* - Chinese character's pinyin and pinyin's initial character.
		* - Latin word and initial character.
		* labels: unchanged
		* Simplified Chinese labels are the same as English: [A-Z], #, " "
		*/
		private static class SimplifiedChineseContactUtils
		extends LocaleUtilsBase {
		public SimplifiedChineseContactUtils(LocaleSet locales) {
		super(locales);
		}

		@Override
		public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
		if (nameStyle != FullNameStyle.JAPANESE &&
		nameStyle != FullNameStyle.KOREAN) {
		return getPinyinNameLookupKeys(name);
		}
		return null;
		}

		public static Iterator<String> getPinyinNameLookupKeys(String name) {
		// TODO : Reduce the object allocation.
		HashSet<String> keys = new HashSet<String>();
		ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
		final int tokenCount = tokens.size();
		final StringBuilder keyPinyin = new StringBuilder();
		final StringBuilder keyInitial = new StringBuilder();
		// There is no space among the Chinese Characters, the variant name
		// lookup key wouldn't work for Chinese. The keyOriginal is used to
		// build the lookup keys for itself.
		final StringBuilder keyOriginal = new StringBuilder();
		for (int i = tokenCount - 1; i >= 0; i--) {
		final Token token = tokens.get(i);
		if (Token.UNKNOWN == token.type) {
		continue;
		}
		if (Token.PINYIN == token.type) {
		keyPinyin.insert(0, token.target);
		keyInitial.insert(0, token.target.charAt(0));
		} else if (Token.LATIN == token.type) {
		// Avoid adding space at the end of String.
		if (keyPinyin.length() > 0) {
		keyPinyin.insert(0, ' ');
		}
		if (keyOriginal.length() > 0) {
		keyOriginal.insert(0, ' ');
		}
		keyPinyin.insert(0, token.source);
		keyInitial.insert(0, token.source.charAt(0));
		}
		keyOriginal.insert(0, token.source);
		keys.add(keyOriginal.toString());
		keys.add(keyPinyin.toString());
		keys.add(keyInitial.toString());
		}
		return keys.iterator();
		}
		}

		private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
		private static LocaleUtils sSingleton;

		private final LocaleSet mLocales;
		@@ -427,13 +197,8 @@ public class LocaleUtils {
		} else {
		mLocales = locales;
		}
		if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) {
		mUtils = new JapaneseContactUtils(mLocales);
		} else if (mLocales.isPrimaryLocaleSimplifiedChinese()) {
		mUtils = new SimplifiedChineseContactUtils(mLocales);
		} else {

		mUtils = new LocaleUtilsBase(mLocales);
		}
		Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
		+ getLabels().toString());
		}