Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 666a4338 authored by Yuichiro Hanada's avatar Yuichiro Hanada
Browse files

add UserHistoryDictIOUtils.

Change-Id: I8a70e43b23f65b5fd5f0ee0b30a94ad8f5ef8a8a
parent 9bbc7ec0
Loading
Loading
Loading
Loading
+193 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.android.inputmethod.latin;

import android.util.Log;

import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.PendingAttribute;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;

import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

/**
 * Reads and writes Binary files for a UserHistoryDictionary.
 *
 * All the methods in this class are static.
 */
public class UserHistoryDictIOUtils {
    private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
    private static final boolean DEBUG = false;

    public interface OnAddWordListener {
        public void setUnigram(final String word, final String shortcutTarget, final int frequency);
        public void setBigram(final String word1, final String word2, final int frequency);
    }

    public interface BigramDictionaryInterface {
        public int getFrequency(final String word1, final String word2);
    }

    public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface {
        private byte[] mBuffer;
        private int mPosition;

        ByteArrayWrapper(final byte[] buffer) {
            mBuffer = buffer;
            mPosition = 0;
        }

        @Override
        public int readUnsignedByte() {
            return ((int)mBuffer[mPosition++]) & 0xFF;
        }

        @Override
        public int readUnsignedShort() {
            final int retval = readUnsignedByte();
            return (retval << 8) + readUnsignedByte();
        }

        @Override
        public int readUnsignedInt24() {
            final int retval = readUnsignedShort();
            return (retval << 8) + readUnsignedByte();
        }

        @Override
        public int readInt() {
            final int retval = readUnsignedShort();
            return (retval << 16) + readUnsignedShort();
        }

        @Override
        public int position() {
            return mPosition;
        }

        @Override
        public void position(int position) {
            mPosition = position;
        }
    }

    /**
     * Writes dictionary to file.
     */
    public static void writeDictionaryBinary(final OutputStream destination,
            final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
            final int version) {

        final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);

        try {
            BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version);
        } catch (IOException e) {
            Log.e(TAG, "IO exception while writing file: " + e);
        } catch (UnsupportedFormatException e) {
            Log.e(TAG, "Unsupported fomat: " + e);
        }
    }

    /**
     * Constructs a new FusionDictionary from BigramDictionaryInterface.
     */
    /* packages for test */ static FusionDictionary constructFusionDictionary(
            final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {

        final FusionDictionary fusionDict = new FusionDictionary(new Node(),
                new FusionDictionary.DictionaryOptions(
                        new HashMap<String,String>(), false, false));

        for (final String word1 : bigrams.keySet()) {
            final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
            for (final String word2 : word1Bigrams.keySet()) {
                final int freq = dict.getFrequency(word1, word2);

                if (DEBUG) {
                    if (word1 == null) {
                        Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
                    } else {
                        Log.d(TAG, "add bigram: " + word1
                                + "," + word2 + "," + Integer.toString(freq));
                    }
                }

                if (word1 == null) { // unigram
                    fusionDict.add(word2, freq, null);
                } else { // bigram
                    fusionDict.setBigram(word1, word2, freq);
                }
                bigrams.updateBigram(word1, word2, (byte)freq);
            }
        }

        return fusionDict;
    }

    /**
     * Reads dictionary from file.
     */
    public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer,
            final OnAddWordListener dict) {
        final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
        final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
        final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();

        try {
            BinaryDictInputOutput.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies,
                    bigrams);
            addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
        } catch (IOException e) {
            Log.e(TAG, "IO exception while reading file: " + e);
        } catch (UnsupportedFormatException e) {
            Log.e(TAG, "Unsupported format: " + e);
        }
    }

    /**
     * Adds all unigrams and bigrams in maps to OnAddWordListener.
     */
    /* package for test */ static void addWordsFromWordMap(final Map<Integer, String> unigrams,
            final Map<Integer, Integer> frequencies,
            final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) {

        for (Map.Entry<Integer, String> entry : unigrams.entrySet()) {
            final String word1 = entry.getValue();
            final int unigramFrequency = frequencies.get(entry.getKey());
            to.setUnigram(word1, null, unigramFrequency);

            final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());

            if (attrList != null) {
                for (final PendingAttribute attr : attrList) {
                    to.setBigram(word1, unigrams.get(attr.mAddress),
                            BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency,
                                    attr.mFrequency));
                }
            }
        }

    }
}
 No newline at end of file
+2 −11
Original line number Diff line number Diff line
@@ -189,7 +189,7 @@ public class BinaryDictInputOutput {
    // suspicion that a bug might be causing an infinite loop.
    private static final int MAX_PASSES = 24;

    private interface FusionDictionaryBufferInterface {
    public interface FusionDictionaryBufferInterface {
        public int readUnsignedByte();
        public int readUnsignedShort();
        public int readUnsignedInt24();
@@ -234,7 +234,6 @@ public class BinaryDictInputOutput {
        @Override
        public void position(int newPos) {
            mBuffer.position(newPos);
            return;
        }
    }

@@ -1393,7 +1392,6 @@ public class BinaryDictInputOutput {
            final FusionDictionaryBufferInterface buffer, final int headerSize,
            final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
            final Map<Integer, ArrayList<PendingAttribute>> bigrams) {

        int[] pushedChars = new int[MAX_WORD_LENGTH + 1];

        Stack<Position> stack = new Stack<Position>();
@@ -1443,8 +1441,6 @@ public class BinaryDictInputOutput {
                stack.push(childrenPos);
            }
        }

        return;
    }

    /**
@@ -1462,7 +1458,6 @@ public class BinaryDictInputOutput {
            final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
            final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
            UnsupportedFormatException {

        // Read header
        final int version = checkFormatVersion(buffer);
        final int optionsFlags = buffer.readUnsignedShort();
@@ -1507,10 +1502,8 @@ public class BinaryDictInputOutput {
     * @throws UnsupportedFormatException
     */
    private static int readHeader(final FusionDictionaryBufferInterface buffer,
            final HashMap<String, String> options,
            final int version)
            final HashMap<String, String> options, final int version)
            throws IOException, UnsupportedFormatException {

        final int headerSize;
        if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
            headerSize = buffer.position();
@@ -1523,7 +1516,6 @@ public class BinaryDictInputOutput {
        if (headerSize < 0) {
            throw new UnsupportedFormatException("header size can't be negative.");
        }

        return headerSize;
    }

@@ -1561,7 +1553,6 @@ public class BinaryDictInputOutput {
    public static FusionDictionary readDictionaryBinary(
            final FusionDictionaryBufferInterface buffer, final FusionDictionary dict)
                    throws IOException, UnsupportedFormatException {

        // clear cache
        wordCache.clear();

+249 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin;

import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface;
import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;

import android.content.Context;
import android.test.AndroidTestCase;
import android.util.Log;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;

/**
 * Unit tests for UserHistoryDictIOUtils
 */
public class UserHistoryDictIOUtilsTests extends AndroidTestCase
    implements BigramDictionaryInterface {

    private static final String TAG = UserHistoryDictIOUtilsTests.class.getSimpleName();
    private static final int UNIGRAM_FREQUENCY = 50;
    private static final int BIGRAM_FREQUENCY = 100;
    private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>();

    /**
     * Return same frequency for all words and bigrams
     */
    @Override
    public int getFrequency(String word1, String word2) {
        if (word1 == null) return UNIGRAM_FREQUENCY;
        return BIGRAM_FREQUENCY;
    }

    // Utilities for Testing

    private void addWord(final String word,
            final HashMap<String, ArrayList<String> > addedWords) {
        if (!addedWords.containsKey(word)) {
            addedWords.put(word, new ArrayList<String>());
        }
    }

    private void addBigram(final String word1, final String word2,
            final HashMap<String, ArrayList<String> > addedWords) {
        addWord(word1, addedWords);
        addWord(word2, addedWords);
        addedWords.get(word1).add(word2);
    }

    private void addBigramToBigramList(final String word1, final String word2,
            final HashMap<String, ArrayList<String> > addedWords,
            final UserHistoryDictionaryBigramList bigramList) {
        bigramList.addBigram(null, word1);
        bigramList.addBigram(word1, word2);

        addBigram(word1, word2, addedWords);
    }

    private void checkWordInFusionDict(final FusionDictionary dict, final String word,
            final ArrayList<String> expectedBigrams) {
        final CharGroup group = FusionDictionary.findWordInTree(dict.mRoot, word);
        assertNotNull(group);
        assertTrue(group.isTerminal());

        for (final String bigram : expectedBigrams) {
            assertNotNull(group.getBigram(bigram));
        }
    }

    private void checkWordsInFusionDict(final FusionDictionary dict,
            final HashMap<String, ArrayList<String> > bigrams) {
        for (final String word : bigrams.keySet()) {
            if (bigrams.containsKey(word)) {
                checkWordInFusionDict(dict, word, bigrams.get(word));
            } else {
                checkWordInFusionDict(dict, word, NOT_HAVE_BIGRAM);
            }
        }
    }

    private void checkWordInBigramList(
            final UserHistoryDictionaryBigramList bigramList, final String word,
            final ArrayList<String> expectedBigrams) {
        // check unigram
        final HashMap<String,Byte> unigramMap = bigramList.getBigrams(null);
        assertTrue(unigramMap.containsKey(word));

        // check bigrams
        final ArrayList<String> actualBigrams = new ArrayList<String>(
                bigramList.getBigrams(word).keySet());

        Collections.sort(expectedBigrams);
        Collections.sort(actualBigrams);
        assertEquals(expectedBigrams, actualBigrams);
    }

    private void checkWordsInBigramList(final UserHistoryDictionaryBigramList bigramList,
            final HashMap<String, ArrayList<String> > addedWords) {
        for (final String word : addedWords.keySet()) {
            if (addedWords.containsKey(word)) {
                checkWordInBigramList(bigramList, word, addedWords.get(word));
            } else {
                checkWordInBigramList(bigramList, word, NOT_HAVE_BIGRAM);
            }
        }
    }

    private void writeDictToFile(final File file,
            final UserHistoryDictionaryBigramList bigramList) {
        try {
            final FileOutputStream out = new FileOutputStream(file);
            UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, 2);
            out.flush();
            out.close();
        } catch (IOException e) {
            Log.e(TAG, "IO exception while writing file: " + e);
        }
    }

    private void readDictFromFile(final File file, final OnAddWordListener listener) {
        FileInputStream inStream = null;

        try {
            inStream = new FileInputStream(file);
            final byte[] buffer = new byte[(int)file.length()];
            inStream.read(buffer);

            UserHistoryDictIOUtils.readDictionaryBinary(
                    new UserHistoryDictIOUtils.ByteArrayWrapper(buffer), listener);
        } catch (FileNotFoundException e) {
            Log.e(TAG, "file not found: " + e);
        } catch (IOException e) {
            Log.e(TAG, "IOException: " + e);
        } finally {
            if (inStream != null) {
                try {
                    inStream.close();
                } catch (IOException e) {
                    // do nothing
                }
            }
        }
    }

    public void testGenerateFusionDictionary() {
        final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList();

        final HashMap<String, ArrayList<String> > addedWords =
                new HashMap<String, ArrayList<String>>();
        addBigramToBigramList("this", "is", addedWords, originalList);
        addBigramToBigramList("this", "was", addedWords, originalList);
        addBigramToBigramList("hello", "world", addedWords, originalList);

        final FusionDictionary fusionDict =
                UserHistoryDictIOUtils.constructFusionDictionary(this, originalList);

        checkWordsInFusionDict(fusionDict, addedWords);
    }

    public void testReadAndWrite() {
        final Context context = getContext();

        File file = null;
        try {
            file = File.createTempFile("testReadAndWrite", ".dict");
        } catch (IOException e) {
            Log.d(TAG, "IOException while creating a temporary file: " + e);
        }
        assertNotNull(file);

        // make original dictionary
        final UserHistoryDictionaryBigramList originalList = new UserHistoryDictionaryBigramList();
        final HashMap<String, ArrayList<String>> addedWords = CollectionUtils.newHashMap();
        addBigramToBigramList("this" , "is"   , addedWords, originalList);
        addBigramToBigramList("this" , "was"  , addedWords, originalList);
        addBigramToBigramList("is"   , "not"  , addedWords, originalList);
        addBigramToBigramList("hello", "world", addedWords, originalList);

        // write to file
        writeDictToFile(file, originalList);

        // make result dict.
        final UserHistoryDictionaryBigramList resultList = new UserHistoryDictionaryBigramList();
        final OnAddWordListener listener = new OnAddWordListener() {
            @Override
            public void setUnigram(final String word,
                    final String shortcutTarget, final int frequency) {
                Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
                resultList.addBigram(null, word, (byte)frequency);
            }
            @Override
            public void setBigram(final String word1, final String word2, final int frequency) {
                Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency);
                resultList.addBigram(word1, word2, (byte)frequency);
            }
        };

        // load from file
        readDictFromFile(file, listener);
        checkWordsInBigramList(resultList, addedWords);

        // add new bigram
        addBigramToBigramList("hello", "java", addedWords, resultList);

        // rewrite
        writeDictToFile(file, resultList);
        final UserHistoryDictionaryBigramList resultList2 = new UserHistoryDictionaryBigramList();
        final OnAddWordListener listener2 = new OnAddWordListener() {
            @Override
            public void setUnigram(final String word,
                    final String shortcutTarget, final int frequency) {
                Log.d(TAG, "in: setUnigram: " + word + "," + frequency);
                resultList2.addBigram(null, word, (byte)frequency);
            }
            @Override
            public void setBigram(final String word1, final String word2, final int frequency) {
                Log.d(TAG, "in: setBigram: " + word1 + "," + word2 + "," + frequency);
                resultList2.addBigram(word1, word2, (byte)frequency);
            }
        };

        // load from file
        readDictFromFile(file, listener2);
        checkWordsInBigramList(resultList2, addedWords);
    }
}