Loading adb/sysdeps.h +18 −0 Original line number Diff line number Diff line Loading @@ -27,6 +27,7 @@ #include <errno.h> #include <string> #include <vector> // Include this before open/unlink are defined as macros below. #include <android-base/utf8.h> Loading Loading @@ -288,6 +289,8 @@ extern int adb_chmod(const char *, int); extern int adb_vfprintf(FILE *stream, const char *format, va_list ap) __attribute__((__format__(ADB_FORMAT_ARCHETYPE, 2, 0))); extern int adb_vprintf(const char *format, va_list ap) __attribute__((__format__(ADB_FORMAT_ARCHETYPE, 1, 0))); extern int adb_fprintf(FILE *stream, const char *format, ...) __attribute__((__format__(ADB_FORMAT_ARCHETYPE, 2, 3))); extern int adb_printf(const char *format, ...) Loading @@ -295,6 +298,8 @@ extern int adb_printf(const char *format, ...) extern int adb_fputs(const char* buf, FILE* stream); extern int adb_fputc(int ch, FILE* stream); extern int adb_putchar(int ch); extern int adb_puts(const char* buf); extern size_t adb_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream); Loading @@ -321,13 +326,20 @@ inline void seekdir(DIR*, long) { #define chmod adb_chmod #define vfprintf adb_vfprintf #define vprintf adb_vprintf #define fprintf adb_fprintf #define printf adb_printf #define fputs adb_fputs #define fputc adb_fputc // putc may be a macro, so if so, undefine it, so that we can redefine it. #undef putc #define putc(c, s) adb_fputc(c, s) #define putchar adb_putchar #define puts adb_puts #define fwrite adb_fwrite #define fopen adb_fopen #define freopen freopen_utf8_not_yet_implemented #define getenv adb_getenv #define putenv putenv_utf8_not_yet_implemented Loading Loading @@ -386,6 +398,12 @@ public: // but does not check if the handle != INVALID_HANDLE_VALUE. typedef std::unique_ptr<HANDLE, handle_deleter> unique_handle; namespace internal { size_t ParseCompleteUTF8(const char* first, const char* last, std::vector<char>* remaining_bytes); } #else /* !_WIN32 a.k.a. Unix */ #include "fdevent.h" Loading adb/sysdeps_win32.cpp +134 −31 Original line number Diff line number Diff line Loading @@ -2451,12 +2451,15 @@ static void _fh_socketpair_hook( FH fh, int events, EventHook hook ) } static adb_mutex_t g_console_output_buffer_lock; void adb_sysdeps_init( void ) { #define ADB_MUTEX(x) InitializeCriticalSection( & x ); #include "mutex_list.h" InitializeCriticalSection( &_win32_lock ); InitializeCriticalSection( &g_console_output_buffer_lock ); } /**************************************************************************/ Loading Loading @@ -2525,6 +2528,8 @@ static HANDLE _get_console_handle(int fd, DWORD* mode=nullptr) { // Returns a console handle if |stream| is a console, otherwise returns nullptr. static HANDLE _get_console_handle(FILE* const stream) { // Save and restore errno to make it easier for callers to prevent from overwriting errno. android::base::ErrnoRestorer er; const int fd = fileno(stream); if (fd < 0) { return nullptr; Loading Loading @@ -3296,6 +3301,9 @@ static HANDLE _console_handle; // when set, console mode should be restored void stdin_raw_init() { const HANDLE in = _get_console_handle(STDIN_FILENO, &_old_console_mode); if (in == nullptr) { return; } // Disable ENABLE_PROCESSED_INPUT so that Ctrl-C is read instead of // calling the process Ctrl-C routine (configured by Loading Loading @@ -3617,16 +3625,98 @@ int adb_chmod(const char* path, int mode) { return _wchmod(path_wide.c_str(), mode); } // Internal helper function to write UTF-8 bytes to a console. Returns -1 // on error. static int _console_write_utf8(const char* buf, size_t size, FILE* stream, // From libutils/Unicode.cpp, get the length of a UTF-8 sequence given the lead byte. static inline size_t utf8_codepoint_len(uint8_t ch) { return ((0xe5000000 >> ((ch >> 3) & 0x1e)) & 3) + 1; } namespace internal { // Given a sequence of UTF-8 bytes (denoted by the range [first, last)), return the number of bytes // (from the beginning) that are complete UTF-8 sequences and append the remaining bytes to // remaining_bytes. size_t ParseCompleteUTF8(const char* const first, const char* const last, std::vector<char>* const remaining_bytes) { // Walk backwards from the end of the sequence looking for the beginning of a UTF-8 sequence. // Current_after points one byte past the current byte to be examined. for (const char* current_after = last; current_after != first; --current_after) { const char* const current = current_after - 1; const char ch = *current; const char kHighBit = 0x80u; const char kTwoHighestBits = 0xC0u; if ((ch & kHighBit) == 0) { // high bit not set // The buffer ends with a one-byte UTF-8 sequence, possibly followed by invalid trailing // bytes with no leading byte, so return the entire buffer. break; } else if ((ch & kTwoHighestBits) == kTwoHighestBits) { // top two highest bits set // Lead byte in UTF-8 sequence, so check if we have all the bytes in the sequence. const size_t bytes_available = last - current; if (bytes_available < utf8_codepoint_len(ch)) { // We don't have all the bytes in the UTF-8 sequence, so return all the bytes // preceding the current incomplete UTF-8 sequence and append the remaining bytes // to remaining_bytes. remaining_bytes->insert(remaining_bytes->end(), current, last); return current - first; } else { // The buffer ends with a complete UTF-8 sequence, possibly followed by invalid // trailing bytes with no lead byte, so return the entire buffer. break; } } else { // Trailing byte, so keep going backwards looking for the lead byte. } } // Return the size of the entire buffer. It is possible that we walked backward past invalid // trailing bytes with no lead byte, in which case we want to return all those invalid bytes // so that they can be processed. return last - first; } } // Bytes that have not yet been output to the console because they are incomplete UTF-8 sequences. // Note that we use only one buffer even though stderr and stdout are logically separate streams. // This matches the behavior of Linux. // Protected by g_console_output_buffer_lock. static auto& g_console_output_buffer = *new std::vector<char>(); // Internal helper function to write UTF-8 bytes to a console. Returns -1 on error. static int _console_write_utf8(const char* const buf, const size_t buf_size, FILE* stream, HANDLE console) { std::wstring output; const int saved_errno = errno; std::vector<char> combined_buffer; // Complete UTF-8 sequences that should be immediately written to the console. const char* utf8; size_t utf8_size; adb_mutex_lock(&g_console_output_buffer_lock); if (g_console_output_buffer.empty()) { // If g_console_output_buffer doesn't have a buffered up incomplete UTF-8 sequence (the // common case with plain ASCII), parse buf directly. utf8 = buf; utf8_size = internal::ParseCompleteUTF8(buf, buf + buf_size, &g_console_output_buffer); } else { // If g_console_output_buffer has a buffered up incomplete UTF-8 sequence, move it to // combined_buffer (and effectively clear g_console_output_buffer) and append buf to // combined_buffer, then parse it all together. combined_buffer.swap(g_console_output_buffer); combined_buffer.insert(combined_buffer.end(), buf, buf + buf_size); // Try to convert from data that might be UTF-8 to UTF-16, ignoring errors. // Data might not be UTF-8 if the user cat's random data, runs dmesg, etc. utf8 = combined_buffer.data(); utf8_size = internal::ParseCompleteUTF8(utf8, utf8 + combined_buffer.size(), &g_console_output_buffer); } adb_mutex_unlock(&g_console_output_buffer_lock); std::wstring utf16; // Try to convert from data that might be UTF-8 to UTF-16, ignoring errors (just like Linux // which does not return an error on bad UTF-8). Data might not be UTF-8 if the user cat's // random data, runs dmesg (which might have non-UTF-8), etc. // This could throw std::bad_alloc. (void)android::base::UTF8ToWide(buf, size, &output); (void)android::base::UTF8ToWide(utf8, utf8_size, &utf16); // Note that this does not do \n => \r\n translation because that // doesn't seem necessary for the Windows console. For the Windows Loading @@ -3639,16 +3729,16 @@ static int _console_write_utf8(const char* buf, size_t size, FILE* stream, // Write UTF-16 to the console. DWORD written = 0; if (!WriteConsoleW(console, output.c_str(), output.length(), &written, NULL)) { if (!WriteConsoleW(console, utf16.c_str(), utf16.length(), &written, NULL)) { errno = EIO; return -1; } // This is the number of UTF-16 chars written, which might be different // than the number of UTF-8 chars passed in. It doesn't seem practical to // get this count correct. return written; // Return the size of the original buffer passed in, signifying that we consumed it all, even // if nothing was displayed, in the case of being passed an incomplete UTF-8 sequence. This // matches the Linux behavior. errno = saved_errno; return buf_size; } // Function prototype because attributes cannot be placed on func definitions. Loading @@ -3660,14 +3750,21 @@ static int _console_vfprintf(const HANDLE console, FILE* stream, // Returns -1 on error. static int _console_vfprintf(const HANDLE console, FILE* stream, const char *format, va_list ap) { const int saved_errno = errno; std::string output_utf8; // Format the string. // This could throw std::bad_alloc. android::base::StringAppendV(&output_utf8, format, ap); return _console_write_utf8(output_utf8.c_str(), output_utf8.length(), stream, console); const int result = _console_write_utf8(output_utf8.c_str(), output_utf8.length(), stream, console); if (result != -1) { errno = saved_errno; } else { // If -1 was returned, errno has been set. } return result; } // Version of vfprintf() that takes UTF-8 and can write Unicode to a Loading @@ -3689,6 +3786,11 @@ int adb_vfprintf(FILE *stream, const char *format, va_list ap) { } } // Version of vprintf() that takes UTF-8 and can write Unicode to a Windows console. int adb_vprintf(const char *format, va_list ap) { return adb_vfprintf(stdout, format, ap); } // Version of fprintf() that takes UTF-8 and can write Unicode to a // Windows console. int adb_fprintf(FILE *stream, const char *format, ...) { Loading Loading @@ -3716,6 +3818,7 @@ int adb_printf(const char *format, ...) { int adb_fputs(const char* buf, FILE* stream) { // adb_fprintf returns -1 on error, which is conveniently the same as EOF // which fputs (and hence adb_fputs) should return on error. static_assert(EOF == -1, "EOF is not -1, so this code needs to be fixed"); return adb_fprintf(stream, "%s", buf); } Loading @@ -3723,32 +3826,32 @@ int adb_fputs(const char* buf, FILE* stream) { // Windows console. int adb_fputc(int ch, FILE* stream) { const int result = adb_fprintf(stream, "%c", ch); if (result <= 0) { // If there was an error, or if nothing was printed (which should be an // error), return an error, which fprintf signifies with EOF. if (result == -1) { return EOF; } // For success, fputc returns the char, cast to unsigned char, then to int. return static_cast<unsigned char>(ch); } // Version of putchar() that takes UTF-8 and can write Unicode to a Windows console. int adb_putchar(int ch) { return adb_fputc(ch, stdout); } // Version of puts() that takes UTF-8 and can write Unicode to a Windows console. int adb_puts(const char* buf) { // adb_printf returns -1 on error, which is conveniently the same as EOF // which puts (and hence adb_puts) should return on error. static_assert(EOF == -1, "EOF is not -1, so this code needs to be fixed"); return adb_printf("%s\n", buf); } // Internal function to write UTF-8 to a Win32 console. Returns the number of // items (of length size) written. On error, returns a short item count or 0. static size_t _console_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream, HANDLE console) { // TODO: Note that a Unicode character could be several UTF-8 bytes. But // if we're passed only some of the bytes of a character (for example, from // the network socket for adb shell), we won't be able to convert the char // to a complete UTF-16 char (or surrogate pair), so the output won't look // right. // // To fix this, see libutils/Unicode.cpp for hints on decoding UTF-8. // // For now we ignore this problem because the alternative is that we'd have // to parse UTF-8 and buffer things up (doable). At least this is better // than what we had before -- always incorrect multi-byte UTF-8 output. int result = _console_write_utf8(reinterpret_cast<const char*>(ptr), size * nmemb, stream, console); const int result = _console_write_utf8(reinterpret_cast<const char*>(ptr), size * nmemb, stream, console); if (result == -1) { return 0; } Loading adb/sysdeps_win32_test.cpp +57 −0 Original line number Diff line number Diff line Loading @@ -137,3 +137,60 @@ TEST(sysdeps_win32, unix_isatty) { // Make sure an invalid FD is handled correctly. EXPECT_EQ(0, unix_isatty(-1)); } void TestParseCompleteUTF8(const char* buf, const size_t buf_size, const size_t expected_complete_bytes, const std::vector<char>& expected_remaining_bytes) { std::vector<char> remaining_bytes; const size_t complete_bytes = internal::ParseCompleteUTF8(buf, buf + buf_size, &remaining_bytes); EXPECT_EQ(expected_complete_bytes, complete_bytes); EXPECT_EQ(expected_remaining_bytes, remaining_bytes); } TEST(sysdeps_win32, ParseCompleteUTF8) { const std::vector<std::vector<char>> multi_byte_sequences = { { '\xc2', '\xa9' }, // 2 byte UTF-8 sequence { '\xe1', '\xb4', '\xa8' }, // 3 byte UTF-8 sequence { '\xf0', '\x9f', '\x98', '\x80' }, // 4 byte UTF-8 sequence }; std::vector<std::vector<char>> all_sequences = { {}, // 0 bytes { '\0' }, // NULL byte { 'a' }, // 1 byte UTF-8 sequence }; all_sequences.insert(all_sequences.end(), multi_byte_sequences.begin(), multi_byte_sequences.end()); // Vary a prefix of bytes in front of the sequence that we're actually interested in parsing. for (const auto& prefix : all_sequences) { // Parse (prefix + one byte of the sequence at a time) for (const auto& seq : multi_byte_sequences) { std::vector<char> buffer(prefix); // For every byte of the sequence except the last for (size_t i = 0; i < seq.size() - 1; ++i) { buffer.push_back(seq[i]); // When parsing an incomplete UTF-8 sequence, the amount of the buffer preceding // the start of the incomplete UTF-8 sequence is valid. The remaining bytes are the // bytes of the incomplete UTF-8 sequence. TestParseCompleteUTF8(buffer.data(), buffer.size(), prefix.size(), std::vector<char>(seq.begin(), seq.begin() + i + 1)); } // For the last byte of the sequence buffer.push_back(seq.back()); TestParseCompleteUTF8(buffer.data(), buffer.size(), buffer.size(), std::vector<char>()); } // Parse (prefix (aka sequence) + invalid trailing bytes) to verify that the invalid // trailing bytes are immediately "returned" to prevent them from being stuck in some // buffer. std::vector<char> buffer(prefix); for (size_t i = 0; i < 8; ++i) { buffer.push_back(0x80); // trailing byte TestParseCompleteUTF8(buffer.data(), buffer.size(), buffer.size(), std::vector<char>()); } } } Loading
adb/sysdeps.h +18 −0 Original line number Diff line number Diff line Loading @@ -27,6 +27,7 @@ #include <errno.h> #include <string> #include <vector> // Include this before open/unlink are defined as macros below. #include <android-base/utf8.h> Loading Loading @@ -288,6 +289,8 @@ extern int adb_chmod(const char *, int); extern int adb_vfprintf(FILE *stream, const char *format, va_list ap) __attribute__((__format__(ADB_FORMAT_ARCHETYPE, 2, 0))); extern int adb_vprintf(const char *format, va_list ap) __attribute__((__format__(ADB_FORMAT_ARCHETYPE, 1, 0))); extern int adb_fprintf(FILE *stream, const char *format, ...) __attribute__((__format__(ADB_FORMAT_ARCHETYPE, 2, 3))); extern int adb_printf(const char *format, ...) Loading @@ -295,6 +298,8 @@ extern int adb_printf(const char *format, ...) extern int adb_fputs(const char* buf, FILE* stream); extern int adb_fputc(int ch, FILE* stream); extern int adb_putchar(int ch); extern int adb_puts(const char* buf); extern size_t adb_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream); Loading @@ -321,13 +326,20 @@ inline void seekdir(DIR*, long) { #define chmod adb_chmod #define vfprintf adb_vfprintf #define vprintf adb_vprintf #define fprintf adb_fprintf #define printf adb_printf #define fputs adb_fputs #define fputc adb_fputc // putc may be a macro, so if so, undefine it, so that we can redefine it. #undef putc #define putc(c, s) adb_fputc(c, s) #define putchar adb_putchar #define puts adb_puts #define fwrite adb_fwrite #define fopen adb_fopen #define freopen freopen_utf8_not_yet_implemented #define getenv adb_getenv #define putenv putenv_utf8_not_yet_implemented Loading Loading @@ -386,6 +398,12 @@ public: // but does not check if the handle != INVALID_HANDLE_VALUE. typedef std::unique_ptr<HANDLE, handle_deleter> unique_handle; namespace internal { size_t ParseCompleteUTF8(const char* first, const char* last, std::vector<char>* remaining_bytes); } #else /* !_WIN32 a.k.a. Unix */ #include "fdevent.h" Loading
adb/sysdeps_win32.cpp +134 −31 Original line number Diff line number Diff line Loading @@ -2451,12 +2451,15 @@ static void _fh_socketpair_hook( FH fh, int events, EventHook hook ) } static adb_mutex_t g_console_output_buffer_lock; void adb_sysdeps_init( void ) { #define ADB_MUTEX(x) InitializeCriticalSection( & x ); #include "mutex_list.h" InitializeCriticalSection( &_win32_lock ); InitializeCriticalSection( &g_console_output_buffer_lock ); } /**************************************************************************/ Loading Loading @@ -2525,6 +2528,8 @@ static HANDLE _get_console_handle(int fd, DWORD* mode=nullptr) { // Returns a console handle if |stream| is a console, otherwise returns nullptr. static HANDLE _get_console_handle(FILE* const stream) { // Save and restore errno to make it easier for callers to prevent from overwriting errno. android::base::ErrnoRestorer er; const int fd = fileno(stream); if (fd < 0) { return nullptr; Loading Loading @@ -3296,6 +3301,9 @@ static HANDLE _console_handle; // when set, console mode should be restored void stdin_raw_init() { const HANDLE in = _get_console_handle(STDIN_FILENO, &_old_console_mode); if (in == nullptr) { return; } // Disable ENABLE_PROCESSED_INPUT so that Ctrl-C is read instead of // calling the process Ctrl-C routine (configured by Loading Loading @@ -3617,16 +3625,98 @@ int adb_chmod(const char* path, int mode) { return _wchmod(path_wide.c_str(), mode); } // Internal helper function to write UTF-8 bytes to a console. Returns -1 // on error. static int _console_write_utf8(const char* buf, size_t size, FILE* stream, // From libutils/Unicode.cpp, get the length of a UTF-8 sequence given the lead byte. static inline size_t utf8_codepoint_len(uint8_t ch) { return ((0xe5000000 >> ((ch >> 3) & 0x1e)) & 3) + 1; } namespace internal { // Given a sequence of UTF-8 bytes (denoted by the range [first, last)), return the number of bytes // (from the beginning) that are complete UTF-8 sequences and append the remaining bytes to // remaining_bytes. size_t ParseCompleteUTF8(const char* const first, const char* const last, std::vector<char>* const remaining_bytes) { // Walk backwards from the end of the sequence looking for the beginning of a UTF-8 sequence. // Current_after points one byte past the current byte to be examined. for (const char* current_after = last; current_after != first; --current_after) { const char* const current = current_after - 1; const char ch = *current; const char kHighBit = 0x80u; const char kTwoHighestBits = 0xC0u; if ((ch & kHighBit) == 0) { // high bit not set // The buffer ends with a one-byte UTF-8 sequence, possibly followed by invalid trailing // bytes with no leading byte, so return the entire buffer. break; } else if ((ch & kTwoHighestBits) == kTwoHighestBits) { // top two highest bits set // Lead byte in UTF-8 sequence, so check if we have all the bytes in the sequence. const size_t bytes_available = last - current; if (bytes_available < utf8_codepoint_len(ch)) { // We don't have all the bytes in the UTF-8 sequence, so return all the bytes // preceding the current incomplete UTF-8 sequence and append the remaining bytes // to remaining_bytes. remaining_bytes->insert(remaining_bytes->end(), current, last); return current - first; } else { // The buffer ends with a complete UTF-8 sequence, possibly followed by invalid // trailing bytes with no lead byte, so return the entire buffer. break; } } else { // Trailing byte, so keep going backwards looking for the lead byte. } } // Return the size of the entire buffer. It is possible that we walked backward past invalid // trailing bytes with no lead byte, in which case we want to return all those invalid bytes // so that they can be processed. return last - first; } } // Bytes that have not yet been output to the console because they are incomplete UTF-8 sequences. // Note that we use only one buffer even though stderr and stdout are logically separate streams. // This matches the behavior of Linux. // Protected by g_console_output_buffer_lock. static auto& g_console_output_buffer = *new std::vector<char>(); // Internal helper function to write UTF-8 bytes to a console. Returns -1 on error. static int _console_write_utf8(const char* const buf, const size_t buf_size, FILE* stream, HANDLE console) { std::wstring output; const int saved_errno = errno; std::vector<char> combined_buffer; // Complete UTF-8 sequences that should be immediately written to the console. const char* utf8; size_t utf8_size; adb_mutex_lock(&g_console_output_buffer_lock); if (g_console_output_buffer.empty()) { // If g_console_output_buffer doesn't have a buffered up incomplete UTF-8 sequence (the // common case with plain ASCII), parse buf directly. utf8 = buf; utf8_size = internal::ParseCompleteUTF8(buf, buf + buf_size, &g_console_output_buffer); } else { // If g_console_output_buffer has a buffered up incomplete UTF-8 sequence, move it to // combined_buffer (and effectively clear g_console_output_buffer) and append buf to // combined_buffer, then parse it all together. combined_buffer.swap(g_console_output_buffer); combined_buffer.insert(combined_buffer.end(), buf, buf + buf_size); // Try to convert from data that might be UTF-8 to UTF-16, ignoring errors. // Data might not be UTF-8 if the user cat's random data, runs dmesg, etc. utf8 = combined_buffer.data(); utf8_size = internal::ParseCompleteUTF8(utf8, utf8 + combined_buffer.size(), &g_console_output_buffer); } adb_mutex_unlock(&g_console_output_buffer_lock); std::wstring utf16; // Try to convert from data that might be UTF-8 to UTF-16, ignoring errors (just like Linux // which does not return an error on bad UTF-8). Data might not be UTF-8 if the user cat's // random data, runs dmesg (which might have non-UTF-8), etc. // This could throw std::bad_alloc. (void)android::base::UTF8ToWide(buf, size, &output); (void)android::base::UTF8ToWide(utf8, utf8_size, &utf16); // Note that this does not do \n => \r\n translation because that // doesn't seem necessary for the Windows console. For the Windows Loading @@ -3639,16 +3729,16 @@ static int _console_write_utf8(const char* buf, size_t size, FILE* stream, // Write UTF-16 to the console. DWORD written = 0; if (!WriteConsoleW(console, output.c_str(), output.length(), &written, NULL)) { if (!WriteConsoleW(console, utf16.c_str(), utf16.length(), &written, NULL)) { errno = EIO; return -1; } // This is the number of UTF-16 chars written, which might be different // than the number of UTF-8 chars passed in. It doesn't seem practical to // get this count correct. return written; // Return the size of the original buffer passed in, signifying that we consumed it all, even // if nothing was displayed, in the case of being passed an incomplete UTF-8 sequence. This // matches the Linux behavior. errno = saved_errno; return buf_size; } // Function prototype because attributes cannot be placed on func definitions. Loading @@ -3660,14 +3750,21 @@ static int _console_vfprintf(const HANDLE console, FILE* stream, // Returns -1 on error. static int _console_vfprintf(const HANDLE console, FILE* stream, const char *format, va_list ap) { const int saved_errno = errno; std::string output_utf8; // Format the string. // This could throw std::bad_alloc. android::base::StringAppendV(&output_utf8, format, ap); return _console_write_utf8(output_utf8.c_str(), output_utf8.length(), stream, console); const int result = _console_write_utf8(output_utf8.c_str(), output_utf8.length(), stream, console); if (result != -1) { errno = saved_errno; } else { // If -1 was returned, errno has been set. } return result; } // Version of vfprintf() that takes UTF-8 and can write Unicode to a Loading @@ -3689,6 +3786,11 @@ int adb_vfprintf(FILE *stream, const char *format, va_list ap) { } } // Version of vprintf() that takes UTF-8 and can write Unicode to a Windows console. int adb_vprintf(const char *format, va_list ap) { return adb_vfprintf(stdout, format, ap); } // Version of fprintf() that takes UTF-8 and can write Unicode to a // Windows console. int adb_fprintf(FILE *stream, const char *format, ...) { Loading Loading @@ -3716,6 +3818,7 @@ int adb_printf(const char *format, ...) { int adb_fputs(const char* buf, FILE* stream) { // adb_fprintf returns -1 on error, which is conveniently the same as EOF // which fputs (and hence adb_fputs) should return on error. static_assert(EOF == -1, "EOF is not -1, so this code needs to be fixed"); return adb_fprintf(stream, "%s", buf); } Loading @@ -3723,32 +3826,32 @@ int adb_fputs(const char* buf, FILE* stream) { // Windows console. int adb_fputc(int ch, FILE* stream) { const int result = adb_fprintf(stream, "%c", ch); if (result <= 0) { // If there was an error, or if nothing was printed (which should be an // error), return an error, which fprintf signifies with EOF. if (result == -1) { return EOF; } // For success, fputc returns the char, cast to unsigned char, then to int. return static_cast<unsigned char>(ch); } // Version of putchar() that takes UTF-8 and can write Unicode to a Windows console. int adb_putchar(int ch) { return adb_fputc(ch, stdout); } // Version of puts() that takes UTF-8 and can write Unicode to a Windows console. int adb_puts(const char* buf) { // adb_printf returns -1 on error, which is conveniently the same as EOF // which puts (and hence adb_puts) should return on error. static_assert(EOF == -1, "EOF is not -1, so this code needs to be fixed"); return adb_printf("%s\n", buf); } // Internal function to write UTF-8 to a Win32 console. Returns the number of // items (of length size) written. On error, returns a short item count or 0. static size_t _console_fwrite(const void* ptr, size_t size, size_t nmemb, FILE* stream, HANDLE console) { // TODO: Note that a Unicode character could be several UTF-8 bytes. But // if we're passed only some of the bytes of a character (for example, from // the network socket for adb shell), we won't be able to convert the char // to a complete UTF-16 char (or surrogate pair), so the output won't look // right. // // To fix this, see libutils/Unicode.cpp for hints on decoding UTF-8. // // For now we ignore this problem because the alternative is that we'd have // to parse UTF-8 and buffer things up (doable). At least this is better // than what we had before -- always incorrect multi-byte UTF-8 output. int result = _console_write_utf8(reinterpret_cast<const char*>(ptr), size * nmemb, stream, console); const int result = _console_write_utf8(reinterpret_cast<const char*>(ptr), size * nmemb, stream, console); if (result == -1) { return 0; } Loading
adb/sysdeps_win32_test.cpp +57 −0 Original line number Diff line number Diff line Loading @@ -137,3 +137,60 @@ TEST(sysdeps_win32, unix_isatty) { // Make sure an invalid FD is handled correctly. EXPECT_EQ(0, unix_isatty(-1)); } void TestParseCompleteUTF8(const char* buf, const size_t buf_size, const size_t expected_complete_bytes, const std::vector<char>& expected_remaining_bytes) { std::vector<char> remaining_bytes; const size_t complete_bytes = internal::ParseCompleteUTF8(buf, buf + buf_size, &remaining_bytes); EXPECT_EQ(expected_complete_bytes, complete_bytes); EXPECT_EQ(expected_remaining_bytes, remaining_bytes); } TEST(sysdeps_win32, ParseCompleteUTF8) { const std::vector<std::vector<char>> multi_byte_sequences = { { '\xc2', '\xa9' }, // 2 byte UTF-8 sequence { '\xe1', '\xb4', '\xa8' }, // 3 byte UTF-8 sequence { '\xf0', '\x9f', '\x98', '\x80' }, // 4 byte UTF-8 sequence }; std::vector<std::vector<char>> all_sequences = { {}, // 0 bytes { '\0' }, // NULL byte { 'a' }, // 1 byte UTF-8 sequence }; all_sequences.insert(all_sequences.end(), multi_byte_sequences.begin(), multi_byte_sequences.end()); // Vary a prefix of bytes in front of the sequence that we're actually interested in parsing. for (const auto& prefix : all_sequences) { // Parse (prefix + one byte of the sequence at a time) for (const auto& seq : multi_byte_sequences) { std::vector<char> buffer(prefix); // For every byte of the sequence except the last for (size_t i = 0; i < seq.size() - 1; ++i) { buffer.push_back(seq[i]); // When parsing an incomplete UTF-8 sequence, the amount of the buffer preceding // the start of the incomplete UTF-8 sequence is valid. The remaining bytes are the // bytes of the incomplete UTF-8 sequence. TestParseCompleteUTF8(buffer.data(), buffer.size(), prefix.size(), std::vector<char>(seq.begin(), seq.begin() + i + 1)); } // For the last byte of the sequence buffer.push_back(seq.back()); TestParseCompleteUTF8(buffer.data(), buffer.size(), buffer.size(), std::vector<char>()); } // Parse (prefix (aka sequence) + invalid trailing bytes) to verify that the invalid // trailing bytes are immediately "returned" to prevent them from being stuck in some // buffer. std::vector<char> buffer(prefix); for (size_t i = 0; i < 8; ++i) { buffer.push_back(0x80); // trailing byte TestParseCompleteUTF8(buffer.data(), buffer.size(), buffer.size(), std::vector<char>()); } } }