diff options
-rw-r--r-- | libutils/Unicode.cpp | 43 | ||||
-rw-r--r-- | libutils/include/utils/Unicode.h | 18 |
2 files changed, 0 insertions, 61 deletions
diff --git a/libutils/Unicode.cpp b/libutils/Unicode.cpp index b6e457b04..843a81afb 100644 --- a/libutils/Unicode.cpp +++ b/libutils/Unicode.cpp @@ -359,49 +359,6 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_le // UTF-8 // -------------------------------------------------------------------------- -ssize_t utf8_length(const char *src) -{ - const char *cur = src; - size_t ret = 0; - while (*cur != '\0') { - const char first_char = *cur++; - if ((first_char & 0x80) == 0) { // ASCII - ret += 1; - continue; - } - // (UTF-8's character must not be like 10xxxxxx, - // but 110xxxxx, 1110xxxx, ... or 1111110x) - if ((first_char & 0x40) == 0) { - return -1; - } - - int32_t mask, to_ignore_mask; - size_t num_to_read = 0; - char32_t utf32 = 0; - for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80; - num_to_read < 5 && (first_char & mask); - num_to_read++, to_ignore_mask |= mask, mask >>= 1) { - if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx - return -1; - } - // 0x3F == 00111111 - utf32 = (utf32 << 6) + (*cur++ & 0x3F); - } - // "first_char" must be (110xxxxx - 11110xxx) - if (num_to_read == 5) { - return -1; - } - to_ignore_mask |= mask; - utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1)); - if (utf32 > kUnicodeMaxCodepoint) { - return -1; - } - - ret += num_to_read; - } - return ret; -} - ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len) { if (src == nullptr || src_len == 0) { diff --git a/libutils/include/utils/Unicode.h b/libutils/include/utils/Unicode.h index fc6712d9b..00873839b 100644 --- a/libutils/include/utils/Unicode.h +++ b/libutils/include/utils/Unicode.h @@ -111,24 +111,6 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len); void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len); /** - * Returns the length of "src" when "src" is valid UTF-8 string. - * Returns 0 if src is NULL or 0-length string. Returns -1 when the source - * is an invalid string. - * - * This function should be used to determine whether "src" is valid UTF-8 - * characters with valid unicode codepoints. "src" must be nul-terminated. - * - * If you are going to use other utf8_to_... functions defined in this header - * with string which may not be valid UTF-8 with valid codepoint (form 0 to - * 0x10FFFF), you should use this function before calling others, since the - * other functions do not check whether the string is valid UTF-8 or not. - * - * If you do not care whether "src" is valid UTF-8 or not, you should use - * strlen() as usual, which should be much faster. - */ -ssize_t utf8_length(const char *src); - -/** * Returns the UTF-16 length of UTF-8 string "src". Returns -1 in case * it's invalid utf8. No buffer over-read occurs because of bound checks. Using overreadIsFatal you * can ask to log a message and fail in case the invalid utf8 could have caused an override if no |