Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 151e4e18 authored by Jean Chalard's avatar Jean Chalard Committed by Android Git Automerger
Browse files

am 065f126f: Merge "Fix two nasty bugs with surrogate pairs."

* commit '065f126f':
  Fix two nasty bugs with surrogate pairs.
parents bde25a4a 065f126f
Loading
Loading
Loading
Loading
+13 −13
Original line number Diff line number Diff line
@@ -620,34 +620,34 @@ public final class FusionDictionary implements Iterable<Word> {
     * Helper method to find a word in a given branch.
     */
    @SuppressWarnings("unused")
    public static CharGroup findWordInTree(Node node, final String s) {
    public static CharGroup findWordInTree(Node node, final String string) {
        int index = 0;
        final StringBuilder checker = DBG ? new StringBuilder() : null;
        final int[] codePoints = getCodePoints(string);

        CharGroup currentGroup;
        final int codePointCountInS = s.codePointCount(0, s.length());
        do {
            int indexOfGroup = findIndexOfChar(node, s.codePointAt(index));
            int indexOfGroup = findIndexOfChar(node, codePoints[index]);
            if (CHARACTER_NOT_FOUND == indexOfGroup) return null;
            currentGroup = node.mData.get(indexOfGroup);

            if (s.length() - index < currentGroup.mChars.length) return null;
            if (codePoints.length - index < currentGroup.mChars.length) return null;
            int newIndex = index;
            while (newIndex < s.length() && newIndex - index < currentGroup.mChars.length) {
                if (currentGroup.mChars[newIndex - index] != s.codePointAt(newIndex)) return null;
            while (newIndex < codePoints.length && newIndex - index < currentGroup.mChars.length) {
                if (currentGroup.mChars[newIndex - index] != codePoints[newIndex]) return null;
                newIndex++;
            }
            index = newIndex;

            if (DBG) checker.append(new String(currentGroup.mChars, 0, currentGroup.mChars.length));
            if (index < codePointCountInS) {
            if (index < codePoints.length) {
                node = currentGroup.mChildren;
            }
        } while (null != node && index < codePointCountInS);
        } while (null != node && index < codePoints.length);

        if (index < codePointCountInS) return null;
        if (index < codePoints.length) return null;
        if (!currentGroup.isTerminal()) return null;
        if (DBG && !s.equals(checker.toString())) return null;
        if (DBG && !codePoints.equals(checker.toString())) return null;
        return currentGroup;
    }

@@ -847,12 +847,12 @@ public final class FusionDictionary implements Iterable<Word> {
        @Override
        public Word next() {
            Position currentPos = mPositions.getLast();
            mCurrentString.setLength(mCurrentString.length() - currentPos.length);
            mCurrentString.setLength(currentPos.length);

            do {
                if (currentPos.pos.hasNext()) {
                    final CharGroup currentGroup = currentPos.pos.next();
                    currentPos.length = currentGroup.mChars.length;
                    currentPos.length = mCurrentString.length();
                    for (int i : currentGroup.mChars)
                        mCurrentString.append(Character.toChars(i));
                    if (null != currentGroup.mChildren) {
@@ -866,7 +866,7 @@ public final class FusionDictionary implements Iterable<Word> {
                } else {
                    mPositions.removeLast();
                    currentPos = mPositions.getLast();
                    mCurrentString.setLength(mCurrentString.length() - mPositions.getLast().length);
                    mCurrentString.setLength(mPositions.getLast().length);
                }
            } while (true);
        }
+14 −14
Original line number Diff line number Diff line
@@ -72,15 +72,12 @@ public class BinaryDictIOTests extends AndroidTestCase {
    private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
            new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);

    private static final String[] CHARACTERS = {
        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
        "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"
    };

    public BinaryDictIOTests() {
        super();

        final Random random = new Random(123456);
        final long time = System.currentTimeMillis();
        Log.e(TAG, "Testing dictionary: seed is " + time);
        final Random random = new Random(time);
        sWords.clear();
        generateWords(MAX_UNIGRAMS, random);

@@ -132,13 +129,16 @@ public class BinaryDictIOTests extends AndroidTestCase {
    /**
     * Generates a random word.
     */
    private String generateWord(final int value) {
        final int lengthOfChars = CHARACTERS.length;
    private String generateWord(final Random random) {
        StringBuilder builder = new StringBuilder("a");
        long lvalue = Math.abs((long)value);
        while (lvalue > 0) {
            builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
            lvalue /= lengthOfChars;
        int count = random.nextInt() % 30; // Arbitrarily 30 chars max
        while (count > 0) {
            final long r = Math.abs(random.nextInt());
            if (r < 0) continue;
            // Don't insert 0~20, but insert any other code point.
            // Code points are in the range 0~0x10FFFF.
            builder.appendCodePoint((int)(20 + r % (0x10FFFF - 20)));
            --count;
        }
        return builder.toString();
    }
@@ -146,7 +146,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
    private void generateWords(final int number, final Random random) {
        final Set<String> wordSet = CollectionUtils.newHashSet();
        while (wordSet.size() < number) {
            wordSet.add(generateWord(random.nextInt()));
            wordSet.add(generateWord(random));
        }
        sWords.addAll(wordSet);
    }
@@ -555,7 +555,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
        // Test a word that isn't contained within the dictionary.
        final Random random = new Random((int)System.currentTimeMillis());
        for (int i = 0; i < 1000; ++i) {
            final String word = generateWord(random.nextInt());
            final String word = generateWord(random);
            if (sWords.indexOf(word) != -1) continue;
            runGetTerminalPosition(buffer, word, i, false);
        }
+114 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin.makedict;

import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.Word;

import junit.framework.TestCase;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Random;

/**
 * Unit tests for BinaryDictInputOutput.
 */
public class FusionDictionaryTest extends TestCase {
    private static final ArrayList<String> sWords = new ArrayList<String>();
    private static final int MAX_UNIGRAMS = 1000;

    private void prepare(final long seed) {
        System.out.println("Seed is " + seed);
        final Random random = new Random(seed);
        sWords.clear();
        generateWords(MAX_UNIGRAMS, random);
    }

    /**
     * Generates a random word.
     */
    private String generateWord(final Random random) {
        StringBuilder builder = new StringBuilder("a");
        int count = random.nextInt() % 30;
        while (count > 0) {
            final long r = Math.abs(random.nextInt());
            if (r < 0) continue;
            // Don't insert 0~20, but insert any other code point.
            // Code points are in the range 0~0x10FFFF.
            if (builder.length() < 7)
                builder.appendCodePoint((int)(20 +r % (0x10FFFF - 20)));
            --count;
        }
        if (builder.length() == 1) return generateWord(random);
        return builder.toString();
    }

    private void generateWords(final int number, final Random random) {
        while (sWords.size() < number) {
            sWords.add(generateWord(random));
        }
    }

    private void checkDictionary(final FusionDictionary dict, final ArrayList<String> words,
            int limit) {
        assertNotNull(dict);
        for (final String word : words) {
            if (--limit < 0) return;
            final CharGroup cg = FusionDictionary.findWordInTree(dict.mRoot, word);
            if (null == cg) {
                System.out.println("word " + dumpWord(word));
                dumpDict(dict);
            }
            assertNotNull(cg);
        }
    }

    private String dumpWord(final String word) {
        final StringBuilder sb = new StringBuilder("");
        for (int i = 0; i < word.length(); i = word.offsetByCodePoints(i, 1)) {
            sb.append(word.codePointAt(i));
            sb.append(" ");
        }
        return sb.toString();
    }

    private void dumpDict(final FusionDictionary dict) {
        for (Word w : dict) {
            System.out.println("Word " + dumpWord(w.mWord));
        }
    }

    // Test the flattened array contains the expected number of nodes, and
    // that it does not contain any duplicates.
    public void testFusion() {
        final FusionDictionary dict = new FusionDictionary(new Node(),
                new DictionaryOptions(new HashMap<String, String>(),
                        false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
        final long time = System.currentTimeMillis();
        prepare(time);
        for (int i = 0; i < sWords.size(); ++i) {
            System.out.println("Adding in pos " + i + " : " + dumpWord(sWords.get(i)));
            dict.add(sWords.get(i), 180, null, false);
            dumpDict(dict);
            checkDictionary(dict, sWords, i);
        }
    }
}