Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 04558da9 authored by Marten Gajda's avatar Marten Gajda Committed by GitHub
Browse files

Fix & improve FTS. Implements #552 (#556)

Improve free text search by:
 * Fixing n-gram generation when a space is added in front of a word
 * give 4-grams a higher weight than multiple matches of the same 3-gram by not counting duplicate n-grams (as a result the score can not be >1 anymore)
 * lower the min-score to 0.33 which means at least 1 out of 3 n-grams must match in order for a task to be considered a result

The changes are supposed to favor longer matches over many shorter matches.
parent aa955cd0
Loading
Loading
Loading
Loading
+3 −21
Original line number Diff line number Diff line
@@ -46,8 +46,6 @@ public final class NGramGenerator
    private boolean mAddSpaceInFront = false;
    private Locale mLocale = Locale.getDefault();

    private char[] mTempArray;


    public NGramGenerator(int n)
    {
@@ -59,8 +57,6 @@ public final class NGramGenerator
    {
        mN = n;
        mMinWordLen = minWordLen;
        mTempArray = new char[n];
        mTempArray[0] = ' ';
    }


@@ -159,12 +155,11 @@ public final class NGramGenerator
    }


    public void getNgrams(String word, Set<String> ngrams)
    private void getNgrams(String word, Set<String> ngrams)
    {
        final int len = word.length();
        final int minWordLen = mMinWordLen;

        if (len < minWordLen)
        if (len < mMinWordLen)
        {
            return;
        }
@@ -181,21 +176,8 @@ public final class NGramGenerator
        {
            /*
             * Add another String with a space and the first n-1 characters of the word.
             *
             * We could just call
             *
             * ngrams.add(" " + word.substring(0, Math.min(len, n - 1));
             *
             * But it's probably way more efficient like this:
             */
            char[] tempArray = mTempArray;

            int count = Math.min(len, n - 1);
            for (int i = 0; i < count; ++i)
            {
                tempArray[i + 1] = word.charAt(i);
            }
            ngrams.add(new String(tempArray));
            ngrams.add(" " + word.substring(0, Math.min(len, n - 1)));
        }
    }
}
+3 −3
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@ import java.util.Set;


/**
 * Supports the {@link TaskDatabaseHelper} in the manner of full-text-search.
 * Supports the {@link TaskDatabaseHelper} in the matter of full-text-search.
 *
 * @author Tobias Reinsch <tobias@dmfs.org>
 * @author Marten Gajda <marten@dmfs.org>
@@ -42,7 +42,7 @@ import java.util.Set;
public class FTSDatabaseHelper
{

    private final static float SEARCH_RESULTS_MIN_SCORE = 0.4f;
    private final static float SEARCH_RESULTS_MIN_SCORE = 0.33f;

    /**
     * A Generator for 3-grams.
@@ -127,7 +127,7 @@ public class FTSDatabaseHelper
            + " Integer PRIMARY KEY AUTOINCREMENT, " + NGramColumns.TEXT + " Text)";

    // FIXME: at present the minimum score is hard coded can we leave that decision to the caller?
    private final static String SQL_RAW_QUERY_SEARCH_TASK = "SELECT %s " + ", min(1.0*count(*)/?, 1.0) as " + TaskContract.Tasks.SCORE + " from "
    private final static String SQL_RAW_QUERY_SEARCH_TASK = "SELECT %s " + ", (1.0*count(DISTINCT " + NGramColumns.NGRAM_ID + ")/?) as " + TaskContract.Tasks.SCORE + " from "
            + FTS_NGRAM_TABLE + " join " + FTS_CONTENT_TABLE + " on (" + FTS_NGRAM_TABLE + "." + NGramColumns.NGRAM_ID + "=" + FTS_CONTENT_TABLE + "."
            + FTSContentColumns.NGRAM_ID + ") join " + Tables.INSTANCE_VIEW + " on (" + Tables.INSTANCE_VIEW + "." + TaskContract.Instances.TASK_ID + " = " + FTS_CONTENT_TABLE + "."
            + FTSContentColumns.TASK_ID + ") where %s group by " + TaskContract.Instances.TASK_ID + " having " + TaskContract.Tasks.SCORE + " >= " + SEARCH_RESULTS_MIN_SCORE