Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 20a6dea1 authored by Jean Chalard's avatar Jean Chalard
Browse files

Add a flag for bigram presence in the header

This is a cherry-pick of Icb602762 onto jb-dev.

Bug: 6355745
Change-Id: Icb602762bb0d81472f024fa491571062ec1fc4e9
parent 329c8d7b
Loading
Loading
Loading
Loading
+7 −3
Original line number Diff line number Diff line
@@ -131,6 +131,7 @@ public class BinaryDictInputOutput {
    // These options need to be the same numeric values as the one in the native reading code.
    private static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
    private static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
    private static final int CONTAINS_BIGRAMS_FLAG = 0x8;

    // TODO: Make this value adaptative to content data, store it in the header, and
    // use it in the reading code.
@@ -752,9 +753,12 @@ public class BinaryDictInputOutput {
    /**
     * Makes the 2-byte value for options flags.
     */
    private static final int makeOptionsValue(final DictionaryOptions options) {
    private static final int makeOptionsValue(final FusionDictionary dictionary) {
        final DictionaryOptions options = dictionary.mOptions;
        final boolean hasBigrams = dictionary.hasBigrams();
        return (options.mFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0)
                + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0);
                + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0)
                + (hasBigrams ? CONTAINS_BIGRAMS_FLAG : 0);
    }

    /**
@@ -970,7 +974,7 @@ public class BinaryDictInputOutput {
            headerBuffer.write((byte) (0xFF & version));
        }
        // Options flags
        final int options = makeOptionsValue(dict.mOptions);
        final int options = makeOptionsValue(dict);
        headerBuffer.write((byte) (0xFF & (options >> 8)));
        headerBuffer.write((byte) (0xFF & options));
        if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
+27 −1
Original line number Diff line number Diff line
@@ -563,7 +563,7 @@ public class FusionDictionary implements Iterable<Word> {
     * Recursively count the number of nodes in a given branch of the trie.
     *
     * @param node the node to count.
     * @result the number of nodes in this branch.
     * @return the number of nodes in this branch.
     */
    public static int countNodes(final Node node) {
        int size = 1;
@@ -575,6 +575,32 @@ public class FusionDictionary implements Iterable<Word> {
        return size;
    }

    // Recursively find out whether there are any bigrams.
    // This can be pretty expensive especially if there aren't any (we return as soon
    // as we find one, so it's much cheaper if there are bigrams)
    private static boolean hasBigramsInternal(final Node node) {
        if (null == node) return false;
        for (int i = node.mData.size() - 1; i >= 0; --i) {
            CharGroup group = node.mData.get(i);
            if (null != group.mBigrams) return true;
            if (hasBigramsInternal(group.mChildren)) return true;
        }
        return false;
    }

    /**
     * Finds out whether there are any bigrams in this dictionary.
     *
     * @return true if there is any bigram, false otherwise.
     */
    // TODO: this is expensive especially for large dictionaries without any bigram.
    // The up side is, this is always accurate and correct and uses no memory. We should
    // find a more efficient way of doing this, without compromising too much on memory
    // and ease of use.
    public boolean hasBigrams() {
        return hasBigramsInternal(mRoot);
    }

    // Historically, the tails of the words were going to be merged to save space.
    // However, that would prevent the code to search for a specific address in log(n)
    // time so this was abandoned.