Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8a9996d0 authored by Tom Cherry's avatar Tom Cherry Committed by android-build-merger
Browse files

Merge "liblog: display valid utf8 characters with 'printable' log format" am: 6682d3bb

am: 047e9407

Change-Id: I19fb0d89937a2bec85930e270284c8a3dd4e5a6e
parents 60a5541e 047e9407
Loading
Loading
Loading
Loading
+6 −59
Original line number Original line Diff line number Diff line
@@ -33,6 +33,7 @@
#include <string.h>
#include <string.h>
#include <sys/param.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/types.h>
#include <wchar.h>


#include <cutils/list.h>
#include <cutils/list.h>
#include <log/log.h>
#include <log/log.h>
@@ -1133,67 +1134,14 @@ int android_log_processBinaryLogBuffer(
  return result;
  return result;
}
}


/*
 * One utf8 character at a time
 *
 * Returns the length of the utf8 character in the buffer,
 * or -1 if illegal or truncated
 *
 * Open coded from libutils/Unicode.cpp, borrowed from utf8_length(),
 * can not remove from here because of library circular dependencies.
 * Expect one-day utf8_character_length with the same signature could
 * _also_ be part of libutils/Unicode.cpp if its usefullness needs to
 * propagate globally.
 */
static ssize_t utf8_character_length(const char* src, size_t len) {
  const char* cur = src;
  const char first_char = *cur++;
  static const uint32_t kUnicodeMaxCodepoint = 0x0010FFFF;
  int32_t mask, to_ignore_mask;
  size_t num_to_read;
  uint32_t utf32;

  if ((first_char & 0x80) == 0) { /* ASCII */
    return first_char ? 1 : -1;
  }

  /*
   * (UTF-8's character must not be like 10xxxxxx,
   *  but 110xxxxx, 1110xxxx, ... or 1111110x)
   */
  if ((first_char & 0x40) == 0) {
    return -1;
  }

  for (utf32 = 1, num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80;
       num_to_read < 5 && (first_char & mask); num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
    if (num_to_read > len) {
      return -1;
    }
    if ((*cur & 0xC0) != 0x80) { /* can not be 10xxxxxx? */
      return -1;
    }
    utf32 = (utf32 << 6) + (*cur++ & 0b00111111);
  }
  /* "first_char" must be (110xxxxx - 11110xxx) */
  if (num_to_read >= 5) {
    return -1;
  }
  to_ignore_mask |= mask;
  utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1));
  if (utf32 > kUnicodeMaxCodepoint) {
    return -1;
  }
  return num_to_read;
}

/*
/*
 * Convert to printable from message to p buffer, return string length. If p is
 * Convert to printable from message to p buffer, return string length. If p is
 * NULL, do not copy, but still return the expected string length.
 * NULL, do not copy, but still return the expected string length.
 */
 */
static size_t convertPrintable(char* p, const char* message, size_t messageLen) {
size_t convertPrintable(char* p, const char* message, size_t messageLen) {
  char* begin = p;
  char* begin = p;
  bool print = p != NULL;
  bool print = p != NULL;
  mbstate_t mb_state = {};


  while (messageLen) {
  while (messageLen) {
    char buf[6];
    char buf[6];
@@ -1201,11 +1149,10 @@ static size_t convertPrintable(char* p, const char* message, size_t messageLen)
    if ((size_t)len > messageLen) {
    if ((size_t)len > messageLen) {
      len = messageLen;
      len = messageLen;
    }
    }
    len = utf8_character_length(message, len);
    len = mbrtowc(nullptr, message, len, &mb_state);


    if (len < 0) {
    if (len < 0) {
      snprintf(buf, sizeof(buf), ((messageLen > 1) && isdigit(message[1])) ? "\\%03o" : "\\%o",
      snprintf(buf, sizeof(buf), "\\x%02X", static_cast<unsigned char>(*message));
               *message & 0377);
      len = 1;
      len = 1;
    } else {
    } else {
      buf[0] = '\0';
      buf[0] = '\0';
@@ -1225,7 +1172,7 @@ static size_t convertPrintable(char* p, const char* message, size_t messageLen)
        } else if (*message == '\\') {
        } else if (*message == '\\') {
          strcpy(buf, "\\\\");
          strcpy(buf, "\\\\");
        } else if ((*message < ' ') || (*message & 0x80)) {
        } else if ((*message < ' ') || (*message & 0x80)) {
          snprintf(buf, sizeof(buf), "\\%o", *message & 0377);
          snprintf(buf, sizeof(buf), "\\x%02X", static_cast<unsigned char>(*message));
        }
        }
      }
      }
      if (!buf[0]) {
      if (!buf[0]) {
+1 −0
Original line number Original line Diff line number Diff line
@@ -62,6 +62,7 @@ cc_defaults {
        "log_system_test.cpp",
        "log_system_test.cpp",
        "log_time_test.cpp",
        "log_time_test.cpp",
        "log_wrap_test.cpp",
        "log_wrap_test.cpp",
        "logprint_test.cpp",
    ],
    ],
    shared_libs: [
    shared_libs: [
        "libcutils",
        "libcutils",
+87 −0
Original line number Original line Diff line number Diff line
/*
 * Copyright (C) 2019 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <gtest/gtest.h>

size_t convertPrintable(char* p, const char* message, size_t messageLen);

TEST(liblog, convertPrintable_ascii) {
  auto input = "easy string, output same";
  auto output_size = convertPrintable(nullptr, input, strlen(input));
  EXPECT_EQ(output_size, strlen(input));

  char output[output_size];

  output_size = convertPrintable(output, input, strlen(input));
  EXPECT_EQ(output_size, strlen(input));
  EXPECT_STREQ(input, output);
}

TEST(liblog, convertPrintable_escapes) {
  // Note that \t is not escaped.
  auto input = "escape\a\b\t\v\f\r\\";
  auto expected_output = "escape\\a\\b\t\\v\\f\\r\\\\";
  auto output_size = convertPrintable(nullptr, input, strlen(input));
  EXPECT_EQ(output_size, strlen(expected_output));

  char output[output_size];

  output_size = convertPrintable(output, input, strlen(input));
  EXPECT_EQ(output_size, strlen(expected_output));
  EXPECT_STREQ(expected_output, output);
}

TEST(liblog, convertPrintable_validutf8) {
  auto input = u8"¢ह€𐍈";
  auto output_size = convertPrintable(nullptr, input, strlen(input));
  EXPECT_EQ(output_size, strlen(input));

  char output[output_size];

  output_size = convertPrintable(output, input, strlen(input));
  EXPECT_EQ(output_size, strlen(input));
  EXPECT_STREQ(input, output);
}

TEST(liblog, convertPrintable_invalidutf8) {
  auto input = "\x80\xC2\x01\xE0\xA4\x06\xE0\x06\xF0\x90\x8D\x06\xF0\x90\x06\xF0\x0E";
  auto expected_output =
      "\\x80\\xC2\\x01\\xE0\\xA4\\x06\\xE0\\x06\\xF0\\x90\\x8D\\x06\\xF0\\x90\\x06\\xF0\\x0E";
  auto output_size = convertPrintable(nullptr, input, strlen(input));
  EXPECT_EQ(output_size, strlen(expected_output));

  char output[output_size];

  output_size = convertPrintable(output, input, strlen(input));
  EXPECT_EQ(output_size, strlen(expected_output));
  EXPECT_STREQ(expected_output, output);
}

TEST(liblog, convertPrintable_mixed) {
  auto input =
      u8"\x80\xC2¢ह€𐍈\x01\xE0\xA4\x06¢ह€𐍈\xE0\x06\a\b\xF0\x90¢ह€𐍈\x8D\x06\xF0\t\t\x90\x06\xF0\x0E";
  auto expected_output =
      u8"\\x80\\xC2¢ह€𐍈\\x01\\xE0\\xA4\\x06¢ह€𐍈\\xE0\\x06\\a\\b\\xF0\\x90¢ह€𐍈\\x8D\\x06\\xF0\t\t"
      u8"\\x90\\x06\\xF0\\x0E";
  auto output_size = convertPrintable(nullptr, input, strlen(input));
  EXPECT_EQ(output_size, strlen(expected_output));

  char output[output_size];

  output_size = convertPrintable(output, input, strlen(input));
  EXPECT_EQ(output_size, strlen(expected_output));
  EXPECT_STREQ(expected_output, output);
}