Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bd1e1569 authored by yanglv's avatar yanglv Committed by Ricardo Cerqueira
Browse files

base: Enhance Web URL pattern

Support recognize Web URL without space prefix and postfix with
Chinese characters.

Change-Id: I03bf0afa57a031050e66dfdca2dd341d84a6b996
parent d99f5e12
Loading
Loading
Loading
Loading
+15 −8
Original line number Diff line number Diff line
@@ -117,6 +117,13 @@ public class Patterns {
    public static final String GOOD_IRI_CHAR =
        "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";

    /**
     * Match the characters without containing chinese characters
     * @hide
     */
    public static final String GOOD_IRI_HOST_CHAR =
        "a-zA-Z0-9\u00A0-\u4DFF\u9FA6-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";

    public static final Pattern IP_ADDRESS
        = Pattern.compile(
            "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
@@ -128,10 +135,10 @@ public class Patterns {
     * RFC 1035 Section 2.3.4 limits the labels to a maximum 63 octets.
     */
    private static final String IRI
        = "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "\\-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";
        = "[" + GOOD_IRI_HOST_CHAR + "]([" + GOOD_IRI_HOST_CHAR + "\\-]{0,61}[" + GOOD_IRI_HOST_CHAR + "]){0,1}";

    private static final String GOOD_GTLD_CHAR =
        "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
        "a-zA-Z\u00A0-\u4DFF\u9FA6-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
    private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}";
    private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD;

@@ -144,16 +151,16 @@ public class Patterns {
     *  added.
     */
    public static final Pattern WEB_URL = Pattern.compile(
        "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
        "((?=[^\u0391-\uffe5])(?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/"
        + "(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
        + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
        + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
        + "(?:" + DOMAIN_NAME + ")"
        + "(?:\\:\\d{1,5})?)" // plus option port number
        + "(\\/(?:(?:[" + GOOD_IRI_CHAR + "\\;\\/\\?\\:\\@\\&\\=\\#\\~"  // plus option query params
        + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
        + "(?:\\b|$)"); // and finally, a word boundary or end of
                        // input.  This is to stop foo.sure from
                        // matching as foo.su
        + "(\\/(?:(?:[" + GOOD_IRI_HOST_CHAR
        + "\\;\\/\\?\\:\\@\\&\\=\\#\\~"  // plus option query params
        + "\\-\\.\\+\\!\\*\\'\\(\\)\\_])|(?:\\,[" + GOOD_IRI_HOST_CHAR
        + "])|(?:\\%[a-fA-F0-9]{2}))*)?", Pattern.CASE_INSENSITIVE);

    public static final Pattern EMAIL_ADDRESS
        = Pattern.compile(