Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2baf095c authored by Patrick Baumann's avatar Patrick Baumann
Browse files

Adding advanced pattern support to PatternMatcher

This change adds a new pathMatch attribute to the intent-filter tag
that supports a subset of regular expression syntax. Supported
tokens include dot (.) and set ([]) and supported modifiers include
zero-or-more (*), one-or-more(+) and ranges ({}).

Change-Id: I2ec3bc8a9eb3e94d21caf34a1ed0f3fd3fb33a35
Bug: 30805203
parent 02913603
Loading
Loading
Loading
Loading
+374 −8
Original line number Original line Diff line number Diff line
@@ -16,6 +16,10 @@


package android.os;
package android.os;


import android.util.Log;

import java.util.Arrays;

/**
/**
 * A simple pattern matcher, which is safe to use on untrusted data: it does
 * A simple pattern matcher, which is safe to use on untrusted data: it does
 * not provide full reg-exp support, only simple globbing that can not be
 * not provide full reg-exp support, only simple globbing that can not be
@@ -45,12 +49,58 @@ public class PatternMatcher implements Parcelable {
     */
     */
    public static final int PATTERN_SIMPLE_GLOB = 2;
    public static final int PATTERN_SIMPLE_GLOB = 2;


    /**
     * Pattern type: the given pattern is interpreted with a regular
     * expression-like syntax for matching against the string it is tested
     * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
     * with full support for character ranges and the not ({@code ^}) modifier.
     * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
     * for one-or-more and full range ({@code {...}}) support. This is a simple
     * evaulation implementation in which matching is done against the pattern in
     * realtime with no backtracking support.
     *
     * {@hide} Pending approval for public API
     */
    public static final int PATTERN_ADVANCED_GLOB = 3;

    // token types for advanced matching
    private static final int TOKEN_TYPE_LITERAL = 0;
    private static final int TOKEN_TYPE_ANY = 1;
    private static final int TOKEN_TYPE_SET = 2;
    private static final int TOKEN_TYPE_INVERSE_SET = 3;

    // Return for no match
    private static final int NO_MATCH = -1;

    private static final String TAG = "PatternMatcher";

    // Parsed placeholders for advanced patterns
    private static final int PARSED_TOKEN_CHAR_SET_START = -1;
    private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
    private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
    private static final int PARSED_TOKEN_CHAR_ANY = -4;
    private static final int PARSED_MODIFIER_RANGE_START = -5;
    private static final int PARSED_MODIFIER_RANGE_STOP = -6;
    private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
    private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;

    private final String mPattern;
    private final String mPattern;
    private final int mType;
    private final int mType;
    private final int[] mParsedPattern;


    private static final int MAX_PATTERN_STORAGE = 2048;
    // workspace to use for building a parsed advanced pattern;
    private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];


    public PatternMatcher(String pattern, int type) {
    public PatternMatcher(String pattern, int type) {
        mPattern = pattern;
        mPattern = pattern;
        mType = type;
        mType = type;
        if (mType == PATTERN_ADVANCED_GLOB) {
            mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
        } else {
            mParsedPattern = null;
        }
    }
    }


    public final String getPath() {
    public final String getPath() {
@@ -62,7 +112,7 @@ public class PatternMatcher implements Parcelable {
    }
    }
    
    
    public boolean match(String str) {
    public boolean match(String str) {
        return matchPattern(mPattern, str, mType);
        return matchPattern(str, mPattern, mParsedPattern, mType);
    }
    }


    public String toString() {
    public String toString() {
@@ -77,6 +127,9 @@ public class PatternMatcher implements Parcelable {
            case PATTERN_SIMPLE_GLOB:
            case PATTERN_SIMPLE_GLOB:
                type = "GLOB: ";
                type = "GLOB: ";
                break;
                break;
            case PATTERN_ADVANCED_GLOB:
                type = "ADVANCED: ";
                break;
        }
        }
        return "PatternMatcher{" + type + mPattern + "}";
        return "PatternMatcher{" + type + mPattern + "}";
    }
    }
@@ -88,11 +141,13 @@ public class PatternMatcher implements Parcelable {
    public void writeToParcel(Parcel dest, int flags) {
    public void writeToParcel(Parcel dest, int flags) {
        dest.writeString(mPattern);
        dest.writeString(mPattern);
        dest.writeInt(mType);
        dest.writeInt(mType);
        dest.writeIntArray(mParsedPattern);
    }
    }
    
    
    public PatternMatcher(Parcel src) {
    public PatternMatcher(Parcel src) {
        mPattern = src.readString();
        mPattern = src.readString();
        mType = src.readInt();
        mType = src.readInt();
        mParsedPattern = src.createIntArray();
    }
    }
    
    
    public static final Parcelable.Creator<PatternMatcher> CREATOR
    public static final Parcelable.Creator<PatternMatcher> CREATOR
@@ -106,16 +161,21 @@ public class PatternMatcher implements Parcelable {
        }
        }
    };
    };
    
    
    static boolean matchPattern(String pattern, String match, int type) {
    static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
        if (match == null) return false;
        if (match == null) return false;
        if (type == PATTERN_LITERAL) {
        if (type == PATTERN_LITERAL) {
            return pattern.equals(match);
            return pattern.equals(match);
        } if (type == PATTERN_PREFIX) {
        } if (type == PATTERN_PREFIX) {
            return match.startsWith(pattern);
            return match.startsWith(pattern);
        } else if (type != PATTERN_SIMPLE_GLOB) {
        } else if (type == PATTERN_SIMPLE_GLOB) {
            return matchGlobPattern(pattern, match);
        } else if (type == PATTERN_ADVANCED_GLOB) {
            return matchAdvancedPattern(parsedPattern, match);
        }
        return false;
        return false;
    }
    }


    static boolean matchGlobPattern(String pattern, String match) {
        final int NP = pattern.length();
        final int NP = pattern.length();
        if (NP <= 0) {
        if (NP <= 0) {
            return match.length() <= 0;
            return match.length() <= 0;
@@ -194,4 +254,310 @@ public class PatternMatcher implements Parcelable {
        
        
        return false;
        return false;
    }
    }

    /**
     * Parses the advanced pattern and returns an integer array representation of it. The integer
     * array treats each field as a character if positive and a unique token placeholder if
     * negative. This method will throw on any pattern structure violations.
     */
    synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
        int ip = 0;
        final int LP = pattern.length();

        int it = 0;

        boolean inSet = false;
        boolean inRange = false;
        boolean inCharClass = false;

        boolean addToParsedPattern;

        while (ip < LP) {
            if (it > MAX_PATTERN_STORAGE - 3) {
                throw new IllegalArgumentException("Pattern is too large!");
            }

            char c = pattern.charAt(ip);
            addToParsedPattern = false;

            switch (c) {
                case '[':
                    if (inSet) {
                        addToParsedPattern = true; // treat as literal or char class in set
                    } else {
                        if (pattern.charAt(ip + 1) == '^') {
                            sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
                            ip++; // skip over the '^'
                        } else {
                            sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
                        }
                        ip++; // move to the next pattern char
                        inSet = true;
                        continue;
                    }
                    break;
                case ']':
                    if (!inSet) {
                        addToParsedPattern = true; // treat as literal outside of set
                    } else {
                        int parsedToken = sParsedPatternScratch[it - 1];
                        if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
                            parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
                            throw new IllegalArgumentException(
                                    "You must define characters in a set.");
                        }
                        sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
                        inSet = false;
                        inCharClass = false;
                    }
                    break;
                case '{':
                    if (!inSet) {
                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
                            throw new IllegalArgumentException("Modifier must follow a token.");
                        }
                        sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
                        ip++;
                        inRange = true;
                    }
                    break;
                case '}':
                    if (inRange) { // only terminate the range if we're currently in one
                        sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
                        inRange = false;
                    }
                    break;
                case '*':
                    if (!inSet) {
                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
                            throw new IllegalArgumentException("Modifier must follow a token.");
                        }
                        sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
                    }
                    break;
                case '+':
                    if (!inSet) {
                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
                            throw new IllegalArgumentException("Modifier must follow a token.");
                        }
                        sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
                    }
                    break;
                case '.':
                    if (!inSet) {
                        sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
                    }
                    break;
                case '\\': // escape
                    if (ip + 1 >= LP) {
                        throw new IllegalArgumentException("Escape found at end of pattern!");
                    }
                    c = pattern.charAt(++ip);
                    addToParsedPattern = true;
                    break;
                default:
                    addToParsedPattern = true;
                    break;
            }
            if (inSet) {
                if (inCharClass) {
                    sParsedPatternScratch[it++] = c;
                    inCharClass = false;
                } else {
                    // look forward for character class
                    if (ip + 2 < LP
                            && pattern.charAt(ip + 1) == '-'
                            && pattern.charAt(ip + 2) != ']') {
                        inCharClass = true;
                        sParsedPatternScratch[it++] = c; // set first token as lower end of range
                        ip++; // advance past dash
                    } else { // literal
                        sParsedPatternScratch[it++] = c; // set first token as literal
                        sParsedPatternScratch[it++] = c; // set second set as literal
                    }
                }
            } else if (inRange) {
                int endOfSet = pattern.indexOf('}', ip);
                if (endOfSet < 0) {
                    throw new IllegalArgumentException("Range not ended with '}'");
                }
                String rangeString = pattern.substring(ip, endOfSet);
                int commaIndex = rangeString.indexOf(',');
                try {
                    final int rangeMin;
                    final int rangeMax;
                    if (commaIndex < 0) {
                        int parsedRange = Integer.parseInt(rangeString);
                        rangeMin = rangeMax = parsedRange;
                    } else {
                        rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
                        if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
                            rangeMax = Integer.MAX_VALUE;
                        } else {
                            rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
                        }
                    }
                    if (rangeMin > rangeMax) {
                        throw new IllegalArgumentException(
                            "Range quantifier minimum is greater than maximum");
                    }
                    sParsedPatternScratch[it++] = rangeMin;
                    sParsedPatternScratch[it++] = rangeMax;
                } catch (NumberFormatException e) {
                    throw new IllegalArgumentException("Range number format incorrect", e);
                }
                ip = endOfSet;
                continue; // don't increment ip
            } else if (addToParsedPattern) {
                sParsedPatternScratch[it++] = c;
            }
            ip++;
        }
        if (inSet) {
            throw new IllegalArgumentException("Set was not terminated!");
        }
        return Arrays.copyOf(sParsedPatternScratch, it);
    }

    private static boolean isParsedModifier(int parsedChar) {
        return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
                parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
                parsedChar == PARSED_MODIFIER_RANGE_STOP ||
                parsedChar == PARSED_MODIFIER_RANGE_START;
    }

    static boolean matchAdvancedPattern(int[] parsedPattern, String match) {

        // create indexes
        int ip = 0, im = 0;

        // one-time length check
        final int LP = parsedPattern.length, LM = match.length();

        // The current character being analyzed in the pattern
        int patternChar;

        int tokenType;

        int charSetStart = 0, charSetEnd = 0;

        while (ip < LP) { // we still have content in the pattern

            patternChar = parsedPattern[ip];
            // get the match type of the next verb

            switch (patternChar) {
                case PARSED_TOKEN_CHAR_ANY:
                    tokenType = TOKEN_TYPE_ANY;
                    ip++;
                    break;
                case PARSED_TOKEN_CHAR_SET_START:
                case PARSED_TOKEN_CHAR_SET_INVERSE_START:
                    tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
                            ? TOKEN_TYPE_SET
                            : TOKEN_TYPE_INVERSE_SET;
                    charSetStart = ip + 1; // start from the char after the set start
                    while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
                    charSetEnd = ip - 1; // we're on the set stop, end is the previous
                    ip++; // move the pointer to the next pattern entry
                    break;
                default:
                    charSetStart = ip;
                    tokenType = TOKEN_TYPE_LITERAL;
                    ip++;
                    break;
            }

            final int minRepetition;
            final int maxRepetition;

            // look for a match length modifier
            if (ip >= LP) {
                minRepetition = maxRepetition = 1;
            } else {
                patternChar = parsedPattern[ip];
                switch (patternChar) {
                    case PARSED_MODIFIER_ZERO_OR_MORE:
                        minRepetition = 0;
                        maxRepetition = Integer.MAX_VALUE;
                        ip++;
                        break;
                    case PARSED_MODIFIER_ONE_OR_MORE:
                        minRepetition = 1;
                        maxRepetition = Integer.MAX_VALUE;
                        ip++;
                        break;
                    case PARSED_MODIFIER_RANGE_START:
                        minRepetition = parsedPattern[++ip];
                        maxRepetition = parsedPattern[++ip];
                        ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
                        break;
                    default:
                        minRepetition = maxRepetition = 1; // implied literal
                        break;
                }
            }
            if (minRepetition > maxRepetition) {
                return false;
            }

            // attempt to match as many characters as possible
            int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
                    parsedPattern, charSetStart, charSetEnd);

            // if we found a conflict, return false immediately
            if (matched == NO_MATCH) {
                return false;
            }

            // move the match pointer the number of characters matched
            im += matched;
        }
        return ip >= LP && im >= LM; // have parsed entire string and regex
    }

    private static int matchChars(String match, int im, final int lm, int tokenType,
            int minRepetition, int maxRepetition, int[] parsedPattern,
            int tokenStart, int tokenEnd) {
        int matched = 0;

        while(matched < maxRepetition
                && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
                    tokenEnd)) {
            matched++;
        }

        return matched < minRepetition ? NO_MATCH : matched;
    }

    private static boolean matchChar(String match, int im, final int lm, int tokenType,
            int[] parsedPattern, int tokenStart, int tokenEnd) {
        if (im >= lm) { // we've overrun the string, no match
            return false;
        }
        switch (tokenType) {
            case TOKEN_TYPE_ANY:
                return true;
            case TOKEN_TYPE_SET:
                for (int i = tokenStart; i < tokenEnd; i += 2) {
                    char matchChar = match.charAt(im);
                    if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
                        return true;
                    }
                }
                return false;
            case TOKEN_TYPE_INVERSE_SET:
                for (int i = tokenStart; i < tokenEnd; i += 2) {
                    char matchChar = match.charAt(im);
                    if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
                        return false;
                    }
                }
                return true;
            case TOKEN_TYPE_LITERAL:
                return match.charAt(im) == parsedPattern[tokenStart];
            default:
                return false;
        }
    }
}
}
 No newline at end of file
+1 −0
Original line number Original line Diff line number Diff line
@@ -32,6 +32,7 @@ public class OsTests {
        suite.addTestSuite(IdleHandlerTest.class);
        suite.addTestSuite(IdleHandlerTest.class);
        suite.addTestSuite(MessageQueueTest.class);
        suite.addTestSuite(MessageQueueTest.class);
        suite.addTestSuite(MessengerTest.class);
        suite.addTestSuite(MessengerTest.class);
        suite.addTestSuite(PatternMatcherTest.class);
        suite.addTestSuite(SystemPropertiesTest.class);
        suite.addTestSuite(SystemPropertiesTest.class);


        return suite;
        return suite;
+234 −0

File added.

Preview size limit exceeded, changes collapsed.