Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2257440f authored by Steve Block's avatar Steve Block
Browse files

Updates WebAddress to ignore the colon after the host component of a URL if no port is specified

Currently, the regex used to extract the port matches ':' followed by 1 or more
digits. This means that when passed a malformed URL of type <host>:<path>, no
match is made for the port and the ':' is matched as part of the path. Since the
handling of the path adds a leading '/' where absent (see http://b/1011602),
this leads to the URL being converted to <host>/:<path>.

This change updates the port regex to match ':' followed by zero or more digits.
This means that the ':' is always matched, so it does not leak into the path
and the result is <host><path>. This matches the behavior of desktop browsers.

Bug: 2494876
Change-Id: I34b47c8187cf03aa7674c14cd6593de53dce3169
parent 780a1cb7
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ public class WebAddress {
            /* scheme    */ "(?:(http|HTTP|https|HTTPS|file|FILE)\\:\\/\\/)?" +
            /* authority */ "(?:([-A-Za-z0-9$_.+!*'(),;?&=]+(?:\\:[-A-Za-z0-9$_.+!*'(),;?&=]+)?)@)?" +
            /* host      */ "([-" + GOOD_IRI_CHAR + "%_]+(?:\\.[-" + GOOD_IRI_CHAR + "%_]+)*|\\[[0-9a-fA-F:\\.]+\\])?" +
            /* port      */ "(?:\\:([0-9]+))?" +
            /* port      */ "(?:\\:([0-9]*))?" +
            /* path      */ "(\\/?[^#]*)?" +
            /* anchor    */ ".*");

@@ -85,7 +85,8 @@ public class WebAddress {
            t = m.group(MATCH_GROUP_HOST);
            if (t != null) mHost = t;
            t = m.group(MATCH_GROUP_PORT);
            if (t != null) {
            if (t != null && t.length() > 0) {
                // The ':' character is not returned by the regex.
                try {
                    mPort = Integer.parseInt(t);
                } catch (NumberFormatException ex) {