Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 5650c86e authored by Marten Gajda's avatar Marten Gajda Committed by GitHub
Browse files

Improve FTS performance, fixes #750 (#752)

The previous implementation tried to use the CONFLICT_IGNORE policy when inserting new ngrams and was falling back to a query in case it failed to find the id of the exiting ngram. This was very slow because it meant an expensive query for every ngram to insert.

The new solution goes over the existing ngrams and only inserts the ones which don't exist in the database. Similarily the ngram relations table is not cleared for the particular task, instead the existing ngrams are loaded and only new relations are inserted and obsolete relations are removed.
parent ea2aa2e9
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
def support_lib_version = '26.1.0'
def support_lib_version = '26.1.0'
def jems_version = '1.15'
def jems_version = '1.18'
def contentpal_version = '9b087b2' // 9b087b2 -> 2017-12-12
def contentpal_version = '9b087b2' // 9b087b2 -> 2017-12-12
def support_test_runner_version = '0.5'
def support_test_runner_version = '0.5'


+7 −22
Original line number Original line Diff line number Diff line
@@ -16,6 +16,7 @@


package org.dmfs.ngrams;
package org.dmfs.ngrams;


import java.util.Collections;
import java.util.HashSet;
import java.util.HashSet;
import java.util.Locale;
import java.util.Locale;
import java.util.Set;
import java.util.Set;
@@ -112,28 +113,15 @@ public final class NGramGenerator
     * @param data
     * @param data
     *         The String to analyze.
     *         The String to analyze.
     *
     *
     * @return A {@link Set} containing all N-grams.
     * @return The {@link Set} containing the N-grams.
     */
     */
    public Set<String> getNgrams(String data)
    public Set<String> getNgrams(String data)
    {
    {
        Set<String> result = new HashSet<String>(128);
        if (data == null)

        {
        return getNgrams(result, data);
            return Collections.emptySet();
        }
        }



    /**
     * Get all N-grams contained in the given String.
     *
     * @param set
     *         The set to add all the N-grams to, or <code>null</code> to create a new set.
     * @param data
     *         The String to analyze.
     *
     * @return The {@link Set} containing the N-grams.
     */
    public Set<String> getNgrams(Set<String> set, String data)
    {
        if (mAllLowercase)
        if (mAllLowercase)
        {
        {
            data = data.toLowerCase(mLocale);
            data = data.toLowerCase(mLocale);
@@ -141,10 +129,7 @@ public final class NGramGenerator


        String[] words = mReturnNumbers ? SEPARATOR_PATTERN.split(data) : SEPARATOR_PATTERN_NO_NUMBERS.split(data);
        String[] words = mReturnNumbers ? SEPARATOR_PATTERN.split(data) : SEPARATOR_PATTERN_NO_NUMBERS.split(data);


        if (set == null)
        Set<String> set = new HashSet<String>(128);
        {
            set = new HashSet<String>(128);
        }


        for (String word : words)
        for (String word : words)
        {
        {
+110 −45
Original line number Original line Diff line number Diff line
@@ -24,12 +24,16 @@ import android.text.TextUtils;
import org.dmfs.ngrams.NGramGenerator;
import org.dmfs.ngrams.NGramGenerator;
import org.dmfs.provider.tasks.TaskDatabaseHelper.Tables;
import org.dmfs.provider.tasks.TaskDatabaseHelper.Tables;
import org.dmfs.provider.tasks.model.TaskAdapter;
import org.dmfs.provider.tasks.model.TaskAdapter;
import org.dmfs.provider.tasks.utils.Chunked;
import org.dmfs.tasks.contract.TaskContract;
import org.dmfs.tasks.contract.TaskContract;
import org.dmfs.tasks.contract.TaskContract.Properties;
import org.dmfs.tasks.contract.TaskContract.Properties;
import org.dmfs.tasks.contract.TaskContract.TaskColumns;
import org.dmfs.tasks.contract.TaskContract.TaskColumns;
import org.dmfs.tasks.contract.TaskContract.Tasks;
import org.dmfs.tasks.contract.TaskContract.Tasks;


import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Set;




@@ -41,6 +45,11 @@ import java.util.Set;
 */
 */
public class FTSDatabaseHelper
public class FTSDatabaseHelper
{
{
    /**
     * We search the ngram table in chunks of 500. This should be good enough for an average task but still well below
     * the SQLITE expression length limit and the variable count limit.
     */
    private final static int NGRAM_SEARCH_CHUNK_SIZE = 500;


    private final static float SEARCH_RESULTS_MIN_SCORE = 0.33f;
    private final static float SEARCH_RESULTS_MIN_SCORE = 0.33f;


@@ -54,6 +63,12 @@ public class FTSDatabaseHelper
     */
     */
    private final static NGramGenerator TETRAGRAM_GENERATOR = new NGramGenerator(4, 3 /* shorter words are fully covered by trigrams */).setAddSpaceInFront(
    private final static NGramGenerator TETRAGRAM_GENERATOR = new NGramGenerator(4, 3 /* shorter words are fully covered by trigrams */).setAddSpaceInFront(
            true);
            true);
    private static final String PROPERTY_NGRAM_SELECTION = String.format("%s = ? AND %s = ? AND %s = ?", FTSContentColumns.TASK_ID, FTSContentColumns.TYPE,
            FTSContentColumns.PROPERTY_ID);
    private static final String NON_PROPERTY_NGRAM_SELECTION = String.format("%s = ? AND %s = ? AND %s is null", FTSContentColumns.TASK_ID,
            FTSContentColumns.TYPE,
            FTSContentColumns.PROPERTY_ID);
    private static final String[] NGRAM_SYNC_COLUMNS = { "_rowid_", FTSContentColumns.NGRAM_ID };




    /**
    /**
@@ -324,67 +339,89 @@ public class FTSDatabaseHelper




    /**
    /**
     * Inserts NGrams into the NGram database.
     * Returns the IDs of each of the provided ngrams, creating them in th database if necessary.
     *
     *
     * @param db
     * @param db
     *         A writable {@link SQLiteDatabase}.
     *         A writable {@link SQLiteDatabase}.
     * @param ngrams
     * @param ngrams
     *         The set of NGrams.
     *         The NGrams.
     *
     *
     * @return The ids of the ngrams in the given set.
     * @return The ids of the ngrams in the given set.
     */
     */
    private static Set<Long> insertNGrams(SQLiteDatabase db, Set<String> ngrams)
    private static Set<Long> ngramIds(SQLiteDatabase db, Set<String> ngrams)
    {
    {
        Set<Long> nGramIds = new HashSet<Long>(ngrams.size());
        if (ngrams.size() == 0)
        ContentValues values = new ContentValues(1);
        for (String ngram : ngrams)
        {
        {
            values.put(NGramColumns.TEXT, ngram);
            return Collections.emptySet();
            long nGramId = db.insertWithOnConflict(FTS_NGRAM_TABLE, null, values, SQLiteDatabase.CONFLICT_IGNORE);
        }
            if (nGramId == -1)

        Set<String> missingNgrams = new HashSet<>(ngrams);
        Set<Long> ngramIds = new HashSet<>(ngrams.size() * 2);

        for (Iterable<String> chunk : new Chunked<>(NGRAM_SEARCH_CHUNK_SIZE, ngrams))
        {
        {
                // the docs say insertWithOnConflict returns the existing row id when CONFLICT_IGNORE is specified an the values conflict with an existing
            // build selection and arguments for each chunk
                // column, however, that doesn't seem to work reliably, so we when for an error condition and get the row id ourselves
            // we can't do this in a single query because the length of sql statement and number of arguments is limited.
                Cursor c = db

                        .query(FTS_NGRAM_TABLE, new String[] { NGramColumns.NGRAM_ID }, NGramColumns.TEXT + "=?", new String[] { ngram }, null, null, null);
            StringBuilder selection = new StringBuilder(NGramColumns.TEXT);
                try
            selection.append(" in (");
            boolean first = true;
            List<String> arguments = new ArrayList<>(NGRAM_SEARCH_CHUNK_SIZE);
            for (String ngram : chunk)
            {
            {
                    if (c.moveToFirst())
                if (first)
                {
                {
                        nGramId = c.getLong(0);
                    first = false;
                }
                }
                }
                else
                finally
                {
                {
                    c.close();
                    selection.append(",");
                }
                selection.append("?");
                arguments.add(ngram);
            }
            }
            selection.append(" )");


            try (Cursor c = db.query(FTS_NGRAM_TABLE, new String[] { NGramColumns.NGRAM_ID, NGramColumns.TEXT }, selection.toString(),
                    arguments.toArray(new String[0]), null, null, null))
            {
                while (c.moveToNext())
                {
                    // remove the ngrams we already have in the table
                    missingNgrams.remove(c.getString(1));
                    // remember its id
                    ngramIds.add(c.getLong(0));
                }
                }
            nGramIds.add(nGramId);
            }
            }
        return nGramIds;

        }
        }


        ContentValues values = new ContentValues(1);


    private static void updateEntry(SQLiteDatabase db, long taskId, long propertyId, int type, String searchableText)
        // now insert the missing ngrams and store their ids
        for (String ngram : missingNgrams)
        {
        {
        // delete existing NGram relations
            values.put(NGramColumns.TEXT, ngram);
        deleteNGramRelations(db, taskId, propertyId, type);
            ngramIds.add(db.insert(FTS_NGRAM_TABLE, null, values));
        }
        return ngramIds;


        if (searchableText != null && searchableText.length() > 0)
    }


    private static void updateEntry(SQLiteDatabase db, long taskId, long propertyId, int type, String searchableText)
    {
    {
        // generate nGrams
        // generate nGrams
        Set<String> propertyNgrams = TRIGRAM_GENERATOR.getNgrams(searchableText);
        Set<String> propertyNgrams = TRIGRAM_GENERATOR.getNgrams(searchableText);
        propertyNgrams.addAll(TETRAGRAM_GENERATOR.getNgrams(searchableText));


            TETRAGRAM_GENERATOR.getNgrams(propertyNgrams, searchableText);
        // get an ID for each of the Ngrams.
        Set<Long> ngramIds = ngramIds(db, propertyNgrams);


            // insert ngrams
        // unlink unused ngrams from the task and get the missing ones we have to link to the tak
            Set<Long> propertyNgramIds = insertNGrams(db, propertyNgrams);
        Set<Long> missing = syncNgrams(db, taskId, propertyId, type, ngramIds);


            // insert ngram relations
        // insert ngram relations for all new ngrams
            insertNGramRelations(db, propertyNgramIds, taskId, propertyId, type);
        addNgrams(db, missing, taskId, propertyId, type);
        }
    }
    }




@@ -400,7 +437,7 @@ public class FTSDatabaseHelper
     * @param propertyId
     * @param propertyId
     *         The row id of the property.
     *         The row id of the property.
     */
     */
    private static void insertNGramRelations(SQLiteDatabase db, Set<Long> ngramIds, long taskId, Long propertyId, int contentType)
    private static void addNgrams(SQLiteDatabase db, Set<Long> ngramIds, long taskId, Long propertyId, int contentType)
    {
    {
        ContentValues values = new ContentValues(4);
        ContentValues values = new ContentValues(4);
        for (Long ngramId : ngramIds)
        for (Long ngramId : ngramIds)
@@ -416,14 +453,14 @@ public class FTSDatabaseHelper
            {
            {
                values.putNull(FTSContentColumns.PROPERTY_ID);
                values.putNull(FTSContentColumns.PROPERTY_ID);
            }
            }
            db.insertWithOnConflict(FTS_CONTENT_TABLE, null, values, SQLiteDatabase.CONFLICT_IGNORE);
            db.insert(FTS_CONTENT_TABLE, null, values);
        }
        }


    }
    }




    /**
    /**
     * Deletes the NGram relations of a task
     * Synchronizes the NGram relations of a task
     *
     *
     * @param db
     * @param db
     *         The writable {@link SQLiteDatabase}.
     *         The writable {@link SQLiteDatabase}.
@@ -433,18 +470,46 @@ public class FTSDatabaseHelper
     *         The property row id, ignored if <code>contentType</code> is not {@link SearchableTypes#PROPERTY}.
     *         The property row id, ignored if <code>contentType</code> is not {@link SearchableTypes#PROPERTY}.
     * @param contentType
     * @param contentType
     *         The {@link SearchableTypes} type.
     *         The {@link SearchableTypes} type.
     * @param ngramsIds
     *         The set of ngrams ids which should be linked to the task
     *
     *
     * @return The number of deleted relations.
     * @return The number of deleted relations.
     */
     */
    private static int deleteNGramRelations(SQLiteDatabase db, long taskId, long propertyId, int contentType)
    private static Set<Long> syncNgrams(SQLiteDatabase db, long taskId, long propertyId, int contentType, Set<Long> ngramsIds)
    {
    {
        StringBuilder whereClause = new StringBuilder(FTSContentColumns.TASK_ID).append(" = ").append(taskId);
        String selection;
        whereClause.append(" AND ").append(FTSContentColumns.TYPE).append(" = ").append(contentType);
        String[] selectionArgs;
        if (contentType == SearchableTypes.PROPERTY)
        if (SearchableTypes.PROPERTY == contentType)
        {
            selection = PROPERTY_NGRAM_SELECTION;
            selectionArgs = new String[] { String.valueOf(taskId), String.valueOf(contentType), String.valueOf(propertyId) };
        }
        else
        {
            selection = NON_PROPERTY_NGRAM_SELECTION;
            selectionArgs = new String[] { String.valueOf(taskId), String.valueOf(contentType) };
        }

        // In order to sync the ngrams, we go over each existing ngram and delete ngram relations not in the set of new ngrams
        // Then we return the set of ngrams we didn't find
        Set<Long> missing = new HashSet<>(ngramsIds);
        try (Cursor c = db.query(FTS_CONTENT_TABLE, NGRAM_SYNC_COLUMNS, selection, selectionArgs, null, null, null))
        {
            while (c.moveToNext())
            {
                Long ngramId = c.getLong(1);
                if (!ngramsIds.contains(ngramId))
                {
                {
            whereClause.append(" AND ").append(FTSContentColumns.PROPERTY_ID).append(" = ").append(propertyId);
                    db.delete(FTS_CONTENT_TABLE, "_rowid_ = ?", new String[] { c.getString(0) });
                }
                else
                {
                    // this ngram wasn't missing
                    missing.remove(ngramId);
                }
            }
        }
        }
        return db.delete(FTS_CONTENT_TABLE, whereClause.toString(), null);
        return missing;
    }
    }




@@ -484,7 +549,7 @@ public class FTSDatabaseHelper
        }
        }


        Set<String> ngrams = TRIGRAM_GENERATOR.getNgrams(searchString);
        Set<String> ngrams = TRIGRAM_GENERATOR.getNgrams(searchString);
        TETRAGRAM_GENERATOR.getNgrams(ngrams, searchString);
        ngrams.addAll(TETRAGRAM_GENERATOR.getNgrams(searchString));


        String[] queryArgs;
        String[] queryArgs;


+51 −0
Original line number Original line Diff line number Diff line
/*
 * Copyright 2019 dmfs GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dmfs.provider.tasks.utils;

import java.util.Iterator;
import java.util.Locale;


/**
 * An {@link Iterable} decorator which returns the elements of the decorated {@link Iterable} in chunks of a specific size.
 *
 * @author Marten Gajda
 * @deprecated TODO: move to jems
 */
public final class Chunked<T> implements Iterable<Iterable<T>>
{
    private final int mChunkSize;
    private final Iterable<T> mDelegate;


    public Chunked(int chunkSize, Iterable<T> delegate)
    {
        if (chunkSize <= 0)
        {
            throw new IllegalArgumentException(String.format(Locale.ENGLISH, "Chunk size must be >0 but was %s", chunkSize));
        }
        mChunkSize = chunkSize;
        mDelegate = delegate;
    }


    @Override
    public Iterator<Iterable<T>> iterator()
    {
        return new ChunkedIterator<>(mChunkSize, mDelegate.iterator());
    }
}
+68 −0
Original line number Original line Diff line number Diff line
/*
 * Copyright 2019 dmfs GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dmfs.provider.tasks.utils;

import org.dmfs.iterators.AbstractBaseIterator;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;


/**
 * In {@link Iterator} decorator which returns the elements of the decorated {@link Iterator} in chunks of a specific size.
 *
 * @author Marten Gajda
 * @deprecated TODO: Move to jems.
 */
public final class ChunkedIterator<T> extends AbstractBaseIterator<Iterable<T>>
{
    private final int mChunkSize;
    private final Iterator<T> mDelegate;


    public ChunkedIterator(int chunkSize, Iterator<T> delegate)
    {
        if (chunkSize <= 0)
        {
            throw new IllegalArgumentException(String.format(Locale.ENGLISH, "Chunk size must be >0 but was %s", chunkSize));
        }
        mChunkSize = chunkSize;
        mDelegate = delegate;
    }


    @Override
    public boolean hasNext()
    {
        return mDelegate.hasNext();
    }


    @Override
    public Iterable<T> next()
    {
        List<T> result = new ArrayList<>(mChunkSize);
        int remaining = mChunkSize;
        do
        {
            result.add(mDelegate.next());
        } while (mDelegate.hasNext() && --remaining > 0);
        return result;
    }
}
Loading