Merge "Fix frameworks/base/tools/localedata/extract_icu_data.py" into main (e1dee26b) · Commits · e / os / android_frameworks_base

tools/localedata/extract_icu_data.py

+45 −40

Original line number	Diff line number	Diff line
		@@ -22,6 +22,8 @@ import glob
		import os.path
		import sys

		import xml.etree.ElementTree as ElementTree


		def get_locale_parts(locale):
		"""Split a locale into three parts, for langauge, script, and region."""
		@@ -40,7 +42,6 @@ def get_locale_parts(locale):

		def read_likely_subtags(input_file_name):
		"""Read and parse ICU's likelySubtags.txt."""
		with open(input_file_name) as input_file:
		likely_script_dict = {
		# Android's additions for pseudo-locales. These internal codes make
		# sure that the pseudo-locales would not match other English or
		@@ -60,21 +61,23 @@ def read_likely_subtags(input_file_name):
		# while. Fortunately, MX < US, so if both exist, MX
		# would be chosen.)
		}
		for line in input_file:
		line = line.strip(u' \n\uFEFF')
		if line.startswith('//'):
		continue
		if '{' in line and '}' in line:
		from_locale = line[:line.index('{')]
		to_locale = line[line.index('"')+1:line.rindex('"')]
		xml_tree = ElementTree.parse(input_file_name)
		likely_subtags = xml_tree.find('likelySubtags')
		for child in likely_subtags:
		from_locale = child.get('from')
		to_locale = child.get('to')
		# print(f'from: {from_locale} to: {to_locale}')
		from_lang, from_scr, from_region = get_locale_parts(from_locale)
		_, to_scr, to_region = get_locale_parts(to_locale)
		if to_locale == "FAIL":
		continue # "FAIL" cases are not useful here.
		if from_lang == 'und':
		continue # not very useful for our purposes
		if from_region is None and to_region not in ['001', 'ZZ']:
		representative_locales.add(to_locale)
		if from_scr is None:
		likely_script_dict[from_locale] = to_scr

		return likely_script_dict, frozenset(representative_locales)


		@@ -86,7 +89,7 @@ def pack_language_or_region(inp, base):
		elif len(inp) == 2:
		return ord(inp[0]), ord(inp[1])
		else:
		assert len(inp) == 3
		assert len(inp) == 3, f'Expects a 3-character string, but "{inp}" '
		base = ord(base)
		first = ord(inp[0]) - base
		second = ord(inp[1]) - base
		@@ -161,9 +164,10 @@ def dump_representative_locales(representative_locales):
		print('});')


		def read_and_dump_likely_data(icu_data_dir):
		def read_and_dump_likely_data(cldr_source_dir):
		"""Read and dump the likely-script data."""
		likely_subtags_txt = os.path.join(icu_data_dir, 'misc', 'likelySubtags.txt')
		likely_subtags_txt = os.path.join(cldr_source_dir,
		'common', 'supplemental', 'likelySubtags.xml')
		likely_script_dict, representative_locales = read_likely_subtags(
		likely_subtags_txt)

		@@ -280,10 +284,11 @@ def main():
		icu_data_dir = os.path.join(
		source_root,
		'external', 'icu', 'icu4c', 'source', 'data')
		cldr_source_dir = os.path.join(source_root, 'external', 'cldr')

		print('// Auto-generated by %s' % sys.argv[0])
		print()
		likely_script_dict = read_and_dump_likely_data(icu_data_dir)
		likely_script_dict = read_and_dump_likely_data(cldr_source_dir)
		read_and_dump_parent_data(icu_data_dir, likely_script_dict)