Loading tools/aapt2/ResourceParser.cpp +8 −4 Original line number Diff line number Diff line Loading @@ -155,7 +155,10 @@ bool ResourceParser::FlattenXmlSubtree( xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string, std::vector<UntranslatableSection>* out_untranslatable_sections) { // Keeps track of formatting tags (<b>, <i>) and the range of characters for which they apply. std::vector<Span> span_stack; // The stack elements refer to the indices in out_style_string->spans. // By first adding to the out_style_string->spans vector, and then using the stack to refer // to this vector, the original order of tags is preserved in cases such as <b><i>hello</b></i>. std::vector<size_t> span_stack; // Clear the output variables. out_raw_string->clear(); Loading Loading @@ -192,7 +195,9 @@ bool ResourceParser::FlattenXmlSubtree( return false; } span_stack.push_back(Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())}); out_style_string->spans.push_back( Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())}); span_stack.push_back(out_style_string->spans.size() - 1); } else if (parser->element_namespace() == sXliffNamespaceUri) { if (parser->element_name() == "g") { if (untranslatable_start_depth) { Loading Loading @@ -233,9 +238,8 @@ bool ResourceParser::FlattenXmlSubtree( if (parser->element_namespace().empty()) { // This is an HTML tag which we encode as a span. Update the span // stack and pop the top entry. Span& top_span = span_stack.back(); Span& top_span = out_style_string->spans[span_stack.back()]; top_span.last_char = builder.Utf16Len() - 1; out_style_string->spans.push_back(std::move(top_span)); span_stack.pop_back(); } else if (untranslatable_start_depth == make_value(depth)) { // This is the end of an untranslatable section. Use UTF8 indices/lengths. Loading tools/aapt2/ResourceParser_test.cpp +8 −4 Original line number Diff line number Diff line Loading @@ -101,20 +101,24 @@ TEST_F(ResourceParserTest, ParseStyledString) { // Use a surrogate pair unicode point so that we can verify that the span // indices use UTF-16 length and not UTF-8 length. std::string input = "<string name=\"foo\">This is my aunt\u2019s <b>string</b></string>"; "<string name=\"foo\">This is my aunt\u2019s <b>fickle <small>string</small></b></string>"; ASSERT_TRUE(TestParse(input)); StyledString* str = test::GetValue<StyledString>(&table_, "string/foo"); ASSERT_NE(nullptr, str); const std::string expected_str = "This is my aunt\u2019s string"; const std::string expected_str = "This is my aunt\u2019s fickle string"; EXPECT_EQ(expected_str, *str->value->str); EXPECT_EQ(1u, str->value->spans.size()); EXPECT_EQ(2u, str->value->spans.size()); EXPECT_TRUE(str->untranslatable_sections.empty()); EXPECT_EQ(std::string("b"), *str->value->spans[0].name); EXPECT_EQ(17u, str->value->spans[0].first_char); EXPECT_EQ(23u, str->value->spans[0].last_char); EXPECT_EQ(30u, str->value->spans[0].last_char); EXPECT_EQ(std::string("small"), *str->value->spans[1].name); EXPECT_EQ(24u, str->value->spans[1].first_char); EXPECT_EQ(30u, str->value->spans[1].last_char); } TEST_F(ResourceParserTest, ParseStringWithWhitespace) { Loading tools/aapt2/compile/PseudolocaleGenerator.cpp +171 −122 Original line number Diff line number Diff line Loading @@ -22,136 +22,194 @@ #include "ResourceValues.h" #include "ValueVisitor.h" #include "compile/Pseudolocalizer.h" #include "util/Util.h" using android::StringPiece; using android::StringPiece16; namespace aapt { std::unique_ptr<StyledString> PseudolocalizeStyledString( StyledString* string, Pseudolocalizer::Method method, StringPool* pool) { Pseudolocalizer localizer(method); const StringPiece original_text = *string->value->str; StyleString localized; // Copy the spans. We will update their offsets when we localize. localized.spans.reserve(string->value->spans.size()); for (const StringPool::Span& span : string->value->spans) { localized.spans.push_back( Span{*span.name, span.first_char, span.last_char}); } // The ranges are all represented with a single value. This is the start of // one range and end of another. struct Range { size_t start; // The struct that represents both Span objects and UntranslatableSections. struct UnifiedSpan { // Only present for Span objects. If not present, this was an UntranslatableSection. Maybe<std::string> tag; // If set to true, toggles the state of translatability. bool toggle_translatability; // The UTF-16 index into the string where this span starts. uint32_t first_char; // Once the new string is localized, these are the pointers to the spans to adjust. // Since this struct represents the start of one range and end of another, // we have the two pointers respectively. uint32_t* update_start; uint32_t* update_end; // The UTF-16 index into the string where this span ends, inclusive. uint32_t last_char; }; auto cmp = [](const Range& r, size_t index) -> bool { return r.start < index; }; // Construct the ranges. The ranges are represented like so: [0, 2, 5, 7] // The ranges are the spaces in between. In this example, with a total string // length of 9, the vector represents: (0,1], (2,4], (5,6], (7,9] // std::vector<Range> ranges; ranges.push_back(Range{0, false, nullptr, nullptr}); ranges.push_back(Range{original_text.size() - 1, false, nullptr, nullptr}); for (size_t i = 0; i < string->value->spans.size(); i++) { const StringPool::Span& span = string->value->spans[i]; // Insert or update the Range marker for the start of this span. auto iter = std::lower_bound(ranges.begin(), ranges.end(), span.first_char, cmp); if (iter != ranges.end() && iter->start == span.first_char) { iter->update_start = &localized.spans[i].first_char; } else { ranges.insert(iter, Range{span.first_char, false, &localized.spans[i].first_char, nullptr}); inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) { if (left.first_char < right.first_char) { return true; } else if (left.first_char > right.first_char) { return false; } else if (left.last_char < right.last_char) { return true; } return false; } // Insert or update the Range marker for the end of this span. iter = std::lower_bound(ranges.begin(), ranges.end(), span.last_char, cmp); if (iter != ranges.end() && iter->start == span.last_char) { iter->update_end = &localized.spans[i].last_char; inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) { return UnifiedSpan{*span.name, span.first_char, span.last_char}; } inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) { return UnifiedSpan{ {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1}; } // Merges the Span and UntranslatableSections of this StyledString into a single vector of // UnifiedSpans. This will first check that the Spans are sorted in ascending order. static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) { // Ensure the Spans are sorted and converted. std::vector<UnifiedSpan> sorted_spans; sorted_spans.reserve(string.value->spans.size()); std::transform(string.value->spans.begin(), string.value->spans.end(), std::back_inserter(sorted_spans), SpanToUnifiedSpan); // Stable sort to ensure tag sequences like "<b><i>" are preserved. std::stable_sort(sorted_spans.begin(), sorted_spans.end()); // Ensure the UntranslatableSections are sorted and converted. std::vector<UnifiedSpan> sorted_untranslatable_sections; sorted_untranslatable_sections.reserve(string.untranslatable_sections.size()); std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(), std::back_inserter(sorted_untranslatable_sections), UntranslatableSectionToUnifiedSpan); std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end()); std::vector<UnifiedSpan> merged_spans; merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size()); auto span_iter = sorted_spans.begin(); auto untranslatable_iter = sorted_untranslatable_sections.begin(); while (span_iter != sorted_spans.end() && untranslatable_iter != sorted_untranslatable_sections.end()) { if (*span_iter < *untranslatable_iter) { merged_spans.push_back(std::move(*span_iter)); ++span_iter; } else { ranges.insert(iter, Range{span.last_char, false, nullptr, &localized.spans[i].last_char}); merged_spans.push_back(std::move(*untranslatable_iter)); ++untranslatable_iter; } } // Parts of the string may be untranslatable. Merge those ranges // in as well, so that we have continuous sections of text to // feed into the pseudolocalizer. // We do this by marking the beginning of a range as either toggling // the translatability state or not. for (const UntranslatableSection& section : string->untranslatable_sections) { auto iter = std::lower_bound(ranges.begin(), ranges.end(), section.start, cmp); if (iter != ranges.end() && iter->start == section.start) { // An existing span starts (or ends) here. We just need to mark that // the translatability should toggle here. If translatability was // already being toggled, then that means we have two adjacent ranges of untranslatable // text, so remove the toggle and only toggle at the end of this range, // effectively merging these ranges. iter->toggle_translatability = !iter->toggle_translatability; } else { // Insert a new range that specifies to toggle the translatability. iter = ranges.insert(iter, Range{section.start, true, nullptr, nullptr}); while (span_iter != sorted_spans.end()) { merged_spans.push_back(std::move(*span_iter)); ++span_iter; } // Update/create an end to the untranslatable section. iter = std::lower_bound(iter, ranges.end(), section.end, cmp); if (iter != ranges.end() && iter->start == section.end) { iter->toggle_translatability = true; } else { iter = ranges.insert(iter, Range{section.end, true, nullptr, nullptr}); while (untranslatable_iter != sorted_untranslatable_sections.end()) { merged_spans.push_back(std::move(*untranslatable_iter)); ++untranslatable_iter; } return merged_spans; } localized.str += localizer.Start(); std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string, Pseudolocalizer::Method method, StringPool* pool) { Pseudolocalizer localizer(method); // Collect the spans and untranslatable sections into one set of spans, sorted by first_char. // This will effectively subdivide the string into multiple sections that can be individually // pseudolocalized, while keeping the span indices synchronized. std::vector<UnifiedSpan> merged_spans = MergeSpans(*string); // Iterate over the ranges and localize each section. // The text starts as translatable, and each time a range has toggle_translatability // set to true, we toggle whether to translate or not. // This assumes no untranslatable ranges overlap. // All Span indices are UTF-16 based, according to the resources.arsc format expected by the // runtime. So we will do all our processing in UTF-16, then convert back. const std::u16string text16 = util::Utf8ToUtf16(*string->value->str); // Convenient wrapper around the text that allows us to work with StringPieces. const StringPiece16 text(text16); // The new string. std::string new_string = localizer.Start(); // The stack that keeps track of what nested Span we're in. std::vector<size_t> span_stack; // The current position in the original text. uint32_t cursor = 0u; // The current position in the new text. uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()), new_string.size(), false); // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it. bool translatable = true; for (size_t i = 0; i < ranges.size(); i++) { const size_t start = ranges[i].start; size_t len = original_text.size() - start; if (i + 1 < ranges.size()) { len = ranges[i + 1].start - start; size_t span_idx = 0u; while (span_idx < merged_spans.size() || !span_stack.empty()) { UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx]; UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()]; if (span != nullptr) { if (parent_span == nullptr || parent_span->last_char > span->first_char) { // There is no parent, or this span is the child of the parent. // Pseudolocalize all the text until this span. const StringPiece16 substr = text.substr(cursor, span->first_char - cursor); cursor += substr.size(); // Pseudolocalize the substring. std::string new_substr = util::Utf16ToUtf8(substr); if (translatable) { new_substr = localizer.Text(new_substr); } new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), new_substr.size(), false); new_string += new_substr; if (ranges[i].update_start) { *ranges[i].update_start = localized.str.size(); // Rewrite the first_char. span->first_char = new_cursor; if (!span->tag) { // An untranslatable section has begun! translatable = false; } if (ranges[i].update_end) { *ranges[i].update_end = localized.str.size(); span_stack.push_back(span_idx); ++span_idx; continue; } if (ranges[i].toggle_translatability) { translatable = !translatable; } if (parent_span != nullptr) { // There is a parent, and either this span is not a child of it, or there are no more spans. // Pop this off the stack. const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1); cursor += substr.size(); // Pseudolocalize the substring. std::string new_substr = util::Utf16ToUtf8(substr); if (translatable) { localized.str += localizer.Text(original_text.substr(start, len)); } else { localized.str += original_text.substr(start, len); new_substr = localizer.Text(new_substr); } new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), new_substr.size(), false); new_string += new_substr; parent_span->last_char = new_cursor - 1; if (parent_span->tag) { // An end to an untranslatable section. translatable = true; } span_stack.pop_back(); } } // Finish the pseudolocalization at the end of the string. new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor))); new_string += localizer.End(); localized.str += localizer.End(); StyleString localized; localized.str = std::move(new_string); // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections. for (UnifiedSpan& span : merged_spans) { if (span.tag) { localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char}); } } return util::make_unique<StyledString>(pool->MakeRef(localized)); } Loading @@ -175,8 +233,7 @@ class Visitor : public RawValueVisitor { if (sub_visitor.value) { localized->values[i] = std::move(sub_visitor.item); } else { localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_)); localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_)); } } } Loading Loading @@ -210,8 +267,7 @@ class Visitor : public RawValueVisitor { } result += localizer_.End(); std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result)); std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result)); localized->SetSource(string->GetSource()); localized->SetWeak(true); item = std::move(localized); Loading Loading @@ -282,14 +338,10 @@ void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method, } } /** * A value is pseudolocalizable if it does not define a locale (or is the * default locale) * and is translatable. */ // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is // translatable. static bool IsPseudolocalizable(ResourceConfigValue* config_value) { const int diff = config_value->config.diff(ConfigDescription::DefaultConfig()); const int diff = config_value->config.diff(ConfigDescription::DefaultConfig()); if (diff & ConfigDescription::CONFIG_LOCALE) { return false; } Loading @@ -298,19 +350,16 @@ static bool IsPseudolocalizable(ResourceConfigValue* config_value) { } // namespace bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) { bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) { for (auto& package : table->packages) { for (auto& type : package->types) { for (auto& entry : type->entries) { std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable); std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable); for (ResourceConfigValue* value : values) { PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool, entry.get()); PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool, entry.get()); PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool, entry.get()); PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool, entry.get()); } } } Loading tools/aapt2/compile/PseudolocaleGenerator_test.cpp +124 −22 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
tools/aapt2/ResourceParser.cpp +8 −4 Original line number Diff line number Diff line Loading @@ -155,7 +155,10 @@ bool ResourceParser::FlattenXmlSubtree( xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string, std::vector<UntranslatableSection>* out_untranslatable_sections) { // Keeps track of formatting tags (<b>, <i>) and the range of characters for which they apply. std::vector<Span> span_stack; // The stack elements refer to the indices in out_style_string->spans. // By first adding to the out_style_string->spans vector, and then using the stack to refer // to this vector, the original order of tags is preserved in cases such as <b><i>hello</b></i>. std::vector<size_t> span_stack; // Clear the output variables. out_raw_string->clear(); Loading Loading @@ -192,7 +195,9 @@ bool ResourceParser::FlattenXmlSubtree( return false; } span_stack.push_back(Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())}); out_style_string->spans.push_back( Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())}); span_stack.push_back(out_style_string->spans.size() - 1); } else if (parser->element_namespace() == sXliffNamespaceUri) { if (parser->element_name() == "g") { if (untranslatable_start_depth) { Loading Loading @@ -233,9 +238,8 @@ bool ResourceParser::FlattenXmlSubtree( if (parser->element_namespace().empty()) { // This is an HTML tag which we encode as a span. Update the span // stack and pop the top entry. Span& top_span = span_stack.back(); Span& top_span = out_style_string->spans[span_stack.back()]; top_span.last_char = builder.Utf16Len() - 1; out_style_string->spans.push_back(std::move(top_span)); span_stack.pop_back(); } else if (untranslatable_start_depth == make_value(depth)) { // This is the end of an untranslatable section. Use UTF8 indices/lengths. Loading
tools/aapt2/ResourceParser_test.cpp +8 −4 Original line number Diff line number Diff line Loading @@ -101,20 +101,24 @@ TEST_F(ResourceParserTest, ParseStyledString) { // Use a surrogate pair unicode point so that we can verify that the span // indices use UTF-16 length and not UTF-8 length. std::string input = "<string name=\"foo\">This is my aunt\u2019s <b>string</b></string>"; "<string name=\"foo\">This is my aunt\u2019s <b>fickle <small>string</small></b></string>"; ASSERT_TRUE(TestParse(input)); StyledString* str = test::GetValue<StyledString>(&table_, "string/foo"); ASSERT_NE(nullptr, str); const std::string expected_str = "This is my aunt\u2019s string"; const std::string expected_str = "This is my aunt\u2019s fickle string"; EXPECT_EQ(expected_str, *str->value->str); EXPECT_EQ(1u, str->value->spans.size()); EXPECT_EQ(2u, str->value->spans.size()); EXPECT_TRUE(str->untranslatable_sections.empty()); EXPECT_EQ(std::string("b"), *str->value->spans[0].name); EXPECT_EQ(17u, str->value->spans[0].first_char); EXPECT_EQ(23u, str->value->spans[0].last_char); EXPECT_EQ(30u, str->value->spans[0].last_char); EXPECT_EQ(std::string("small"), *str->value->spans[1].name); EXPECT_EQ(24u, str->value->spans[1].first_char); EXPECT_EQ(30u, str->value->spans[1].last_char); } TEST_F(ResourceParserTest, ParseStringWithWhitespace) { Loading
tools/aapt2/compile/PseudolocaleGenerator.cpp +171 −122 Original line number Diff line number Diff line Loading @@ -22,136 +22,194 @@ #include "ResourceValues.h" #include "ValueVisitor.h" #include "compile/Pseudolocalizer.h" #include "util/Util.h" using android::StringPiece; using android::StringPiece16; namespace aapt { std::unique_ptr<StyledString> PseudolocalizeStyledString( StyledString* string, Pseudolocalizer::Method method, StringPool* pool) { Pseudolocalizer localizer(method); const StringPiece original_text = *string->value->str; StyleString localized; // Copy the spans. We will update their offsets when we localize. localized.spans.reserve(string->value->spans.size()); for (const StringPool::Span& span : string->value->spans) { localized.spans.push_back( Span{*span.name, span.first_char, span.last_char}); } // The ranges are all represented with a single value. This is the start of // one range and end of another. struct Range { size_t start; // The struct that represents both Span objects and UntranslatableSections. struct UnifiedSpan { // Only present for Span objects. If not present, this was an UntranslatableSection. Maybe<std::string> tag; // If set to true, toggles the state of translatability. bool toggle_translatability; // The UTF-16 index into the string where this span starts. uint32_t first_char; // Once the new string is localized, these are the pointers to the spans to adjust. // Since this struct represents the start of one range and end of another, // we have the two pointers respectively. uint32_t* update_start; uint32_t* update_end; // The UTF-16 index into the string where this span ends, inclusive. uint32_t last_char; }; auto cmp = [](const Range& r, size_t index) -> bool { return r.start < index; }; // Construct the ranges. The ranges are represented like so: [0, 2, 5, 7] // The ranges are the spaces in between. In this example, with a total string // length of 9, the vector represents: (0,1], (2,4], (5,6], (7,9] // std::vector<Range> ranges; ranges.push_back(Range{0, false, nullptr, nullptr}); ranges.push_back(Range{original_text.size() - 1, false, nullptr, nullptr}); for (size_t i = 0; i < string->value->spans.size(); i++) { const StringPool::Span& span = string->value->spans[i]; // Insert or update the Range marker for the start of this span. auto iter = std::lower_bound(ranges.begin(), ranges.end(), span.first_char, cmp); if (iter != ranges.end() && iter->start == span.first_char) { iter->update_start = &localized.spans[i].first_char; } else { ranges.insert(iter, Range{span.first_char, false, &localized.spans[i].first_char, nullptr}); inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) { if (left.first_char < right.first_char) { return true; } else if (left.first_char > right.first_char) { return false; } else if (left.last_char < right.last_char) { return true; } return false; } // Insert or update the Range marker for the end of this span. iter = std::lower_bound(ranges.begin(), ranges.end(), span.last_char, cmp); if (iter != ranges.end() && iter->start == span.last_char) { iter->update_end = &localized.spans[i].last_char; inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) { return UnifiedSpan{*span.name, span.first_char, span.last_char}; } inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) { return UnifiedSpan{ {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1}; } // Merges the Span and UntranslatableSections of this StyledString into a single vector of // UnifiedSpans. This will first check that the Spans are sorted in ascending order. static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) { // Ensure the Spans are sorted and converted. std::vector<UnifiedSpan> sorted_spans; sorted_spans.reserve(string.value->spans.size()); std::transform(string.value->spans.begin(), string.value->spans.end(), std::back_inserter(sorted_spans), SpanToUnifiedSpan); // Stable sort to ensure tag sequences like "<b><i>" are preserved. std::stable_sort(sorted_spans.begin(), sorted_spans.end()); // Ensure the UntranslatableSections are sorted and converted. std::vector<UnifiedSpan> sorted_untranslatable_sections; sorted_untranslatable_sections.reserve(string.untranslatable_sections.size()); std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(), std::back_inserter(sorted_untranslatable_sections), UntranslatableSectionToUnifiedSpan); std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end()); std::vector<UnifiedSpan> merged_spans; merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size()); auto span_iter = sorted_spans.begin(); auto untranslatable_iter = sorted_untranslatable_sections.begin(); while (span_iter != sorted_spans.end() && untranslatable_iter != sorted_untranslatable_sections.end()) { if (*span_iter < *untranslatable_iter) { merged_spans.push_back(std::move(*span_iter)); ++span_iter; } else { ranges.insert(iter, Range{span.last_char, false, nullptr, &localized.spans[i].last_char}); merged_spans.push_back(std::move(*untranslatable_iter)); ++untranslatable_iter; } } // Parts of the string may be untranslatable. Merge those ranges // in as well, so that we have continuous sections of text to // feed into the pseudolocalizer. // We do this by marking the beginning of a range as either toggling // the translatability state or not. for (const UntranslatableSection& section : string->untranslatable_sections) { auto iter = std::lower_bound(ranges.begin(), ranges.end(), section.start, cmp); if (iter != ranges.end() && iter->start == section.start) { // An existing span starts (or ends) here. We just need to mark that // the translatability should toggle here. If translatability was // already being toggled, then that means we have two adjacent ranges of untranslatable // text, so remove the toggle and only toggle at the end of this range, // effectively merging these ranges. iter->toggle_translatability = !iter->toggle_translatability; } else { // Insert a new range that specifies to toggle the translatability. iter = ranges.insert(iter, Range{section.start, true, nullptr, nullptr}); while (span_iter != sorted_spans.end()) { merged_spans.push_back(std::move(*span_iter)); ++span_iter; } // Update/create an end to the untranslatable section. iter = std::lower_bound(iter, ranges.end(), section.end, cmp); if (iter != ranges.end() && iter->start == section.end) { iter->toggle_translatability = true; } else { iter = ranges.insert(iter, Range{section.end, true, nullptr, nullptr}); while (untranslatable_iter != sorted_untranslatable_sections.end()) { merged_spans.push_back(std::move(*untranslatable_iter)); ++untranslatable_iter; } return merged_spans; } localized.str += localizer.Start(); std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string, Pseudolocalizer::Method method, StringPool* pool) { Pseudolocalizer localizer(method); // Collect the spans and untranslatable sections into one set of spans, sorted by first_char. // This will effectively subdivide the string into multiple sections that can be individually // pseudolocalized, while keeping the span indices synchronized. std::vector<UnifiedSpan> merged_spans = MergeSpans(*string); // Iterate over the ranges and localize each section. // The text starts as translatable, and each time a range has toggle_translatability // set to true, we toggle whether to translate or not. // This assumes no untranslatable ranges overlap. // All Span indices are UTF-16 based, according to the resources.arsc format expected by the // runtime. So we will do all our processing in UTF-16, then convert back. const std::u16string text16 = util::Utf8ToUtf16(*string->value->str); // Convenient wrapper around the text that allows us to work with StringPieces. const StringPiece16 text(text16); // The new string. std::string new_string = localizer.Start(); // The stack that keeps track of what nested Span we're in. std::vector<size_t> span_stack; // The current position in the original text. uint32_t cursor = 0u; // The current position in the new text. uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()), new_string.size(), false); // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it. bool translatable = true; for (size_t i = 0; i < ranges.size(); i++) { const size_t start = ranges[i].start; size_t len = original_text.size() - start; if (i + 1 < ranges.size()) { len = ranges[i + 1].start - start; size_t span_idx = 0u; while (span_idx < merged_spans.size() || !span_stack.empty()) { UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx]; UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()]; if (span != nullptr) { if (parent_span == nullptr || parent_span->last_char > span->first_char) { // There is no parent, or this span is the child of the parent. // Pseudolocalize all the text until this span. const StringPiece16 substr = text.substr(cursor, span->first_char - cursor); cursor += substr.size(); // Pseudolocalize the substring. std::string new_substr = util::Utf16ToUtf8(substr); if (translatable) { new_substr = localizer.Text(new_substr); } new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), new_substr.size(), false); new_string += new_substr; if (ranges[i].update_start) { *ranges[i].update_start = localized.str.size(); // Rewrite the first_char. span->first_char = new_cursor; if (!span->tag) { // An untranslatable section has begun! translatable = false; } if (ranges[i].update_end) { *ranges[i].update_end = localized.str.size(); span_stack.push_back(span_idx); ++span_idx; continue; } if (ranges[i].toggle_translatability) { translatable = !translatable; } if (parent_span != nullptr) { // There is a parent, and either this span is not a child of it, or there are no more spans. // Pop this off the stack. const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1); cursor += substr.size(); // Pseudolocalize the substring. std::string new_substr = util::Utf16ToUtf8(substr); if (translatable) { localized.str += localizer.Text(original_text.substr(start, len)); } else { localized.str += original_text.substr(start, len); new_substr = localizer.Text(new_substr); } new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()), new_substr.size(), false); new_string += new_substr; parent_span->last_char = new_cursor - 1; if (parent_span->tag) { // An end to an untranslatable section. translatable = true; } span_stack.pop_back(); } } // Finish the pseudolocalization at the end of the string. new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor))); new_string += localizer.End(); localized.str += localizer.End(); StyleString localized; localized.str = std::move(new_string); // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections. for (UnifiedSpan& span : merged_spans) { if (span.tag) { localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char}); } } return util::make_unique<StyledString>(pool->MakeRef(localized)); } Loading @@ -175,8 +233,7 @@ class Visitor : public RawValueVisitor { if (sub_visitor.value) { localized->values[i] = std::move(sub_visitor.item); } else { localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_)); localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_)); } } } Loading Loading @@ -210,8 +267,7 @@ class Visitor : public RawValueVisitor { } result += localizer_.End(); std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result)); std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result)); localized->SetSource(string->GetSource()); localized->SetWeak(true); item = std::move(localized); Loading Loading @@ -282,14 +338,10 @@ void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method, } } /** * A value is pseudolocalizable if it does not define a locale (or is the * default locale) * and is translatable. */ // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is // translatable. static bool IsPseudolocalizable(ResourceConfigValue* config_value) { const int diff = config_value->config.diff(ConfigDescription::DefaultConfig()); const int diff = config_value->config.diff(ConfigDescription::DefaultConfig()); if (diff & ConfigDescription::CONFIG_LOCALE) { return false; } Loading @@ -298,19 +350,16 @@ static bool IsPseudolocalizable(ResourceConfigValue* config_value) { } // namespace bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) { bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) { for (auto& package : table->packages) { for (auto& type : package->types) { for (auto& entry : type->entries) { std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable); std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable); for (ResourceConfigValue* value : values) { PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool, entry.get()); PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool, entry.get()); PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool, entry.get()); PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool, entry.get()); } } } Loading
tools/aapt2/compile/PseudolocaleGenerator_test.cpp +124 −22 File changed.Preview size limit exceeded, changes collapsed. Show changes