diff options
-rw-r--r-- | luni/src/main/java/java/nio/charset/ModifiedUtf8.java | 178 | ||||
-rw-r--r-- | luni/src/test/java/libcore/java/lang/StringTest.java | 1 | ||||
-rw-r--r-- | luni/src/test/java/libcore/java/nio/charset/ModifiedUtf8Test.java | 314 | ||||
-rw-r--r-- | non_openjdk_java_files.bp | 1 |
4 files changed, 0 insertions, 494 deletions
diff --git a/luni/src/main/java/java/nio/charset/ModifiedUtf8.java b/luni/src/main/java/java/nio/charset/ModifiedUtf8.java deleted file mode 100644 index 51638ee4e8..0000000000 --- a/luni/src/main/java/java/nio/charset/ModifiedUtf8.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -package java.nio.charset; - -import java.io.UTFDataFormatException; - -/** - * Encoding and decoding methods for Modified UTF-8 - * - * <p>Modified UTF-8 is a simple variation of UTF-8 in which {@code \u0000} is encoded as - * 0xc0 0x80 . This avoids the presence of bytes 0 in the output. - * - * @hide - */ -public class ModifiedUtf8 { - - /** - * Count the number of bytes in the modified UTF-8 representation of {@code s}. - * - * <p>Additionally, if {@code shortLength} is true, throw a {@code UTFDataFormatException} if - * the size cannot be presented in an (unsigned) java short. - */ - public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException { - long counter = 0; - int strLen = s.length(); - for (int i = 0; i < strLen; i++) { - char c = s.charAt(i); - if (c < '\u0080') { - counter++; - if (c == '\u0000') { - counter++; - } - } else if (c < '\u0800') { - counter += 2; - } else { - counter += 3; - } - } - // Allow up to the maximum value of an unsigned short (as the value is known to be - // unsigned. - if (shortLength && counter > 0xffff) { - throw new UTFDataFormatException( - "Size of the encoded string doesn't fit in two bytes"); - } - return counter; - } - - /** - * Encode {@code s} into {@code dst} starting at offset {@code offset}. - * - * <p>The output buffer is guaranteed to have enough space. - */ - public static void encode(byte[] dst, int offset, String s) { - int strLen = s.length(); - for (int i = 0; i < strLen; i++) { - char c = s.charAt(i); - if (c < '\u0080') { - if (c == 0) { - dst[offset++] = (byte) 0xc0; - dst[offset++] = (byte) 0x80; - } else { - dst[offset++] = (byte) c; - } - } else if (c < '\u0800') { - dst[offset++] = (byte) ((c >>> 6) | 0xc0); - dst[offset++] = (byte) ((c & 0x3f) | 0x80); - } else { - dst[offset++] = (byte) ((c >>> 12) | 0xe0); - dst[offset++] = (byte) (((c >>> 6) & 0x3f) | 0x80); - dst[offset++] = (byte) ((c & 0x3f) | 0x80); - } - } - } - - /** - * Encodes {@code s} into a buffer with the following format: - * - * <p>- the first two bytes of the buffer are the length of the modified-utf8 output - * (as a big endian short. A UTFDataFormatException is thrown if the encoded size cannot be - * represented as a short. - * - * <p>- the remainder of the buffer contains the modified-utf8 output (equivalent to - * {@code encode(buf, 2, s)}). - */ - public static byte[] encode(String s) throws UTFDataFormatException { - long size = countBytes(s, true); - byte[] output = new byte[(int) size + 2]; - encode(output, 2, s); - output[0] = (byte) (size >>> 8); - output[1] = (byte) size; - return output; - } - - /** - * Decodes {@code length} utf-8 bytes from {@code in} starting at offset {@code offset} to - * {@code out}, - * - * <p>A maximum of {@code length} chars are written to the output starting at offset 0. - * {@code out} is assumed to have enough space for the output (a standard - * {@code ArrayIndexOutOfBoundsException} is thrown otherwise). - * - * <p>If a ‘0’ byte is encountered, it is converted to U+0000. - */ - public static String decode(byte[] in, char[] out, int offset, int length) - throws UTFDataFormatException { - if (offset < 0 || length < 0) { - throw new IllegalArgumentException("Illegal arguments: offset " + offset - + ". Length: " + length); - } - int outputIndex = 0; - int limitIndex = offset + length; - while (offset < limitIndex) { - int i = in[offset] & 0xff; - offset++; - if (i < 0x80) { - out[outputIndex] = (char) i; - outputIndex++; - continue; - } - if (0xc0 <= i && i < 0xe0) { - // This branch covers the case 0 = 0xc080. - - // The result is: 5 least-significant bits of i + 6 l-s bits of next input byte. - i = (i & 0x1f) << 6; - if(offset == limitIndex) { - throw new UTFDataFormatException("unexpected end of input"); - } - // Include 6 least-significant bits of the input byte. - if ((in[offset] & 0xc0) != 0x80) { - throw new UTFDataFormatException("bad second byte at " + offset); - } - out[outputIndex] = (char) (i | (in[offset] & 0x3f)); - offset++; - outputIndex++; - } else if(i < 0xf0) { - // The result is: 5 least-significant bits of i + 6 l-s bits of next input byte - // + 6 l-s of next to next input byte. - i = (i & 0x1f) << 12; - // Make sure there are are at least two bytes left. - if (offset + 1 >= limitIndex) { - throw new UTFDataFormatException("unexpected end of input"); - } - // Include 6 least-significant bits of the input byte, with 6 bits of room - // for the next byte. - if ((in[offset] & 0xc0) != 0x80) { - throw new UTFDataFormatException("bad second byte at " + offset); - } - i = i | (in[offset] & 0x3f) << 6; - offset++; - // Include 6 least-significant bits of the input byte. - if ((in[offset] & 0xc0) != 0x80) { - throw new UTFDataFormatException("bad third byte at " + offset); - } - out[outputIndex] = (char) (i | (in[offset] & 0x3f)); - offset++; - outputIndex++; - } else { - throw new UTFDataFormatException("Invalid UTF8 byte " - + (int) i + " at position " + (offset - 1)); - } - } - return String.valueOf(out, 0, outputIndex); - } -} diff --git a/luni/src/test/java/libcore/java/lang/StringTest.java b/luni/src/test/java/libcore/java/lang/StringTest.java index c440ebe1ba..395209aa42 100644 --- a/luni/src/test/java/libcore/java/lang/StringTest.java +++ b/luni/src/test/java/libcore/java/lang/StringTest.java @@ -29,7 +29,6 @@ import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; -import java.nio.charset.ModifiedUtf8; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.ArrayList; diff --git a/luni/src/test/java/libcore/java/nio/charset/ModifiedUtf8Test.java b/luni/src/test/java/libcore/java/nio/charset/ModifiedUtf8Test.java deleted file mode 100644 index f7c91ba770..0000000000 --- a/luni/src/test/java/libcore/java/nio/charset/ModifiedUtf8Test.java +++ /dev/null @@ -1,314 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -package libcore.java.nio.charset; - -import junit.framework.TestCase; - -import java.io.UTFDataFormatException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.ModifiedUtf8; -import java.util.Arrays; - -/** - * Tests for {@code ModifiedUtf8}. - */ -public class ModifiedUtf8Test extends TestCase { - public void test_decode_singleChar() throws Exception { - assertEquals("A", ModifiedUtf8.decode(new byte[] { 'A' }, new char[1], 0, 1)); - } - - public void test_decode_checkOffsetAndLength() throws Exception { - assertEquals("BC", ModifiedUtf8.decode( - new byte[] { 'A', 'B', 'C', 'D' }, new char[2], 1, 2)); - } - - public void test_decode_unexpectedEndOfStreamAfterC2_throws() { - // We need at least one byte after 0xc2. - try { - ModifiedUtf8.decode(new byte[]{'B', (byte) 0xc2}, new char[2], 0, 2); - fail("Should throw " + UTFDataFormatException.class.getName()); - } catch(UTFDataFormatException expected) { - // Expected. - } - } - - public void test_decode_unexpectedEndOfStreamAfterE0_throws() { - // We need at least two bytes after 0xe0. - try { - ModifiedUtf8.decode( - new byte[] { 'B', (byte) 0xe0, (byte) 0xab }, new char[2], 0, 3); - fail("Should throw " + UTFDataFormatException.class.getName()); - } catch(UTFDataFormatException expected) { - // Expected. - } - } - - public void test_decode_endOfStreamAfterC2() throws Exception { - assertEquals("B\u00a0", ModifiedUtf8.decode( - new byte[] { 'B', (byte) 0xc2, (byte) 0xa0 }, - new char[2], - 0, - 3)); - } - - public void test_decode_endOfStreamAfterE0() throws Exception { - assertEquals("B\u0830", ModifiedUtf8.decode( - new byte[] { 'B', (byte) 0xe0, (byte) 0xa0, (byte) 0xb0 }, - new char[2], - 0, - 4)); - } - - public void test_decode_invalidByte_characterUnknown() throws Exception { - try { - ModifiedUtf8.decode(new byte[]{'A', (byte) 0xf0}, new char[2], 0, 2); - fail("Should throw " + UTFDataFormatException.class.getName()); - } catch (UTFDataFormatException expected) { - // Expected. - } - } - - public void test_decode_someC2Character() throws Exception { - assertEquals("A\u00a6", ModifiedUtf8.decode( - new byte[] { 'A', (byte) 0xc2, (byte) 0xa6 }, new char[2], 0, 3)); - } - - public void test_decode_lastC2Character() throws Exception { - assertEquals("A\u00bf", ModifiedUtf8.decode( - new byte[] { 'A', (byte) 0xc2, (byte) 0xbf }, new char[2], 0, 3)); - } - - public void test_decode_someTwoByteCharacter() throws Exception { - // Make sure bit masking works - assertEquals("A\u0606", ModifiedUtf8.decode( - new byte[] { 'A', (byte) 0xd8, (byte) 0x86 }, new char[3], 0, 3)); - } - - public void test_decode_lastTwoByteCharacter() throws Exception { - assertEquals("A\u07ff", ModifiedUtf8.decode( - new byte[] { 'A', (byte) 0xdf, (byte) 0xbf }, new char[2], 0, 3)); - } - - public void test_decode_firstE0Character() throws Exception { - assertEquals("A\u0800", ModifiedUtf8.decode( - new byte[] { 'A', (byte) 0xe0, (byte) 0xa0, (byte) 0x80 }, - new char[2], - 0, - 4)); - } - - public void test_decode_someThreeBytesCharacter() throws Exception { - assertEquals("A\u31c6", ModifiedUtf8.decode( - new byte[]{ 'A', (byte) 0xe3, (byte) 0x87, (byte) 0x86 }, - new char[2], - 0, - 4)); - } - - public void test_decode_lastThreeBytesCharacter() throws Exception { - assertEquals("A\uffff", ModifiedUtf8.decode( - new byte[] { 'A', (byte) 0xef, (byte) 0xbf, (byte) 0xbf }, - new char[2], - 0, - 4)); - } - - public void test_decode_twoByteCharacterAfterThreeByteCharacter() throws Exception { - assertEquals("\uffff\u0606A", ModifiedUtf8.decode( - new byte[] { (byte) 0xef, (byte) 0xbf, (byte) 0xbf, (byte) 0xd8, (byte) 0x86, 'A' }, - new char[3], - 0, - 6)); - } - - public void test_decode_c080isZero() throws Exception { - assertEquals("A\u0000A", ModifiedUtf8.decode( - new byte[] { 'A', (byte) 0xc0, (byte) 0x80, 'A' }, new char[3], 0, 4)); - } - - public void test_decode_00isZero() throws Exception { - assertEquals("A\u0000A", ModifiedUtf8.decode( - new byte[] { 'A', (byte) 0, 'A' }, new char[3], 0, 3)); - } - - public void test_decode_insufficientOutputSpace_throws() throws Exception{ - try { - ModifiedUtf8.decode(new byte[] { 'A', (byte) 0, 'A' }, new char[2], 0, 3); - fail("Should throw " + ArrayIndexOutOfBoundsException.class.getName()); - } catch(ArrayIndexOutOfBoundsException expected) { - // Expected. - } - } - - public void test_decode_checkBadSecondByteOfTwo() throws Exception { - try { - ModifiedUtf8.decode(new byte[]{(byte) 0xc0, (byte) 0xc0}, new char[2], 0, 2); - fail("Should throw " + UTFDataFormatException.class.getName()); - } catch (UTFDataFormatException expected) { - // Expected. - } - } - - public void test_decode_checkBadSecondByteOfThree() throws Exception{ - try { - ModifiedUtf8.decode(new byte[]{ - (byte) 0xe0, (byte) 0xc0, (byte) 0x80}, new char[2], 0, 2); - fail("Should throw " + UTFDataFormatException.class.getName()); - } catch (UTFDataFormatException expected) { - // Expected. - } - } - - public void test_decode_checkBadThirdByteOfThree() throws Exception{ - try { - ModifiedUtf8.decode(new byte[]{ - (byte) 0xe0, (byte) 0x80, (byte) 0xc0}, new char[2], 0, 2); - fail("Should throw " + UTFDataFormatException.class.getName()); - } catch (UTFDataFormatException expected) { - // Expected. - } - } - - public void test_decode_insufficientInput_throws() throws Exception{ - try { - ModifiedUtf8.decode(new byte[] { 'A', (byte) 0, 'A' }, new char[8], 0, 100); - fail("Should throw " + ArrayIndexOutOfBoundsException.class.getName()); - } catch(ArrayIndexOutOfBoundsException expected) { - // Expected. - } - } - - public void test_decode_extraCharsInArray_ignored() throws Exception { - assertEquals("A", ModifiedUtf8.decode(new byte[] { 'A' }, new char[] { 'B', 'Z' }, 0, 1)); - } - - public void test_countBytes_rightCount() throws Exception { - assertEquals(0, ModifiedUtf8.countBytes("", false)); - assertEquals(2, ModifiedUtf8.countBytes("\u0000", false)); - assertEquals(1, ModifiedUtf8.countBytes("A", false)); - assertEquals(1, ModifiedUtf8.countBytes("\u007f", false)); - assertEquals(2, ModifiedUtf8.countBytes("\u0080", false)); - assertEquals(2, ModifiedUtf8.countBytes("\u07ff", false)); - assertEquals(3, ModifiedUtf8.countBytes("\u0800", false)); - assertEquals(3, ModifiedUtf8.countBytes("\uffff", false)); - } - - public void test_countBytes_checkExceptionThrown() throws Exception { - // These two mustn't throw... - ModifiedUtf8.countBytes("", true); - ModifiedUtf8.countBytes("A", true); - - char[] unsignedShortSizedCharArray = new char[2 * Short.MAX_VALUE + 1]; - for (int i = 0; i < unsignedShortSizedCharArray.length; i++) { - unsignedShortSizedCharArray[i] = 'A'; - } - String unsignedShortSizedString = String.copyValueOf(unsignedShortSizedCharArray); - - char[] sizeLongerThanUnsignedShortCharArray = new char[2 * Short.MAX_VALUE + 2]; - for (int i = 0; i < sizeLongerThanUnsignedShortCharArray.length; i++) { - sizeLongerThanUnsignedShortCharArray[i] = 'A'; - } - String sizeLongerThanUnsignedShortString = String.copyValueOf( - sizeLongerThanUnsignedShortCharArray); - - // Mustn't throw. - ModifiedUtf8.countBytes(unsignedShortSizedString, true); - - try { - // Must throw. - ModifiedUtf8.countBytes(sizeLongerThanUnsignedShortString, true); - fail(); - } catch (UTFDataFormatException expected) { - // Expected. - } - - // Mustn't throw. - ModifiedUtf8.countBytes(unsignedShortSizedString, false); - ModifiedUtf8.countBytes(sizeLongerThanUnsignedShortString, false); - } - - public void test_encode() throws Exception { - assertTrue(Arrays.equals(new byte[]{0, 1, 'A'}, ModifiedUtf8.encode("A"))); - assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', 'B', 'C' }, ModifiedUtf8.encode("ABC"))); - assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc2, (byte) 0xa0 }, - ModifiedUtf8.encode("A\u00a0"))); - assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xe0, (byte) 0xa0, (byte) 0xb0 }, - ModifiedUtf8.encode("A\u0830"))); - assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc2, (byte) 0xa6 }, - ModifiedUtf8.encode("A\u00a6"))); - assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc2, (byte) 0xbf }, - ModifiedUtf8.encode("A\u00bf"))); - assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xd8, (byte) 0x86 }, - ModifiedUtf8.encode("A\u0606"))); - assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xdf, (byte) 0xbf }, - ModifiedUtf8.encode("A\u07ff"))); - assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xe0, (byte) 0xa0, (byte) 0x80 }, - ModifiedUtf8.encode("A\u0800"))); - assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xe3, (byte) 0x87, (byte) 0x86 }, - ModifiedUtf8.encode("A\u31c6"))); - assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xef, (byte) 0xbf, (byte) 0xbf }, - ModifiedUtf8.encode("A\uffff"))); - assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc0, (byte) 0x80 }, - ModifiedUtf8.encode("A\u0000"))); - assertTrue( - Arrays.equals(new byte[] { 0, 8, (byte) 0xe3, (byte) 0x87, (byte) 0x86, - (byte) 0xd8, (byte) 0x86, (byte) 0xc0, (byte) 0x80, 'A' }, - ModifiedUtf8.encode("\u31c6\u0606\u0000A"))); - } - - public void test_encode_throws() throws Exception { - char[] unsignedShortSizedCharArray = new char[Short.MAX_VALUE * 2 + 1]; - for (int i = 0; i < unsignedShortSizedCharArray.length; i++) { - unsignedShortSizedCharArray[i] = 'A'; - } - String unsignedShortSizedString = String.copyValueOf(unsignedShortSizedCharArray); - - char[] sizeLongerThanUnsignedShortCharArray = new char[Short.MAX_VALUE * 2 + 2]; - for (int i = 0; i < sizeLongerThanUnsignedShortCharArray.length; i++) { - sizeLongerThanUnsignedShortCharArray[i] = 'A'; - } - String sizeLongerThanUnsignedShortString = - String.copyValueOf(sizeLongerThanUnsignedShortCharArray); - - // Mustn't throw. - ModifiedUtf8.encode(unsignedShortSizedString); - try { - // Must throw. - ModifiedUtf8.encode(sizeLongerThanUnsignedShortString); - fail("Should throw " + UTFDataFormatException.class.getName()); - } catch (UTFDataFormatException expected) { - // Expected. - } - } - - public void test_encode_lengthAtBeginning() throws Exception { - int testStringLength = 20000; - char[] charArray = new char[testStringLength]; - for (int i = 0; i < charArray.length; i++) { - charArray[i] = 'A'; - } - String testString = String.copyValueOf(charArray); - - // Mustn't throw. - byte[] result = ModifiedUtf8.encode(testString); - ByteBuffer b = ByteBuffer.wrap(result); - b.order(ByteOrder.BIG_ENDIAN); - assertEquals(testStringLength, b.getShort()); - } - -} diff --git a/non_openjdk_java_files.bp b/non_openjdk_java_files.bp index 4f7a33d64b..98f60068dc 100644 --- a/non_openjdk_java_files.bp +++ b/non_openjdk_java_files.bp @@ -179,7 +179,6 @@ filegroup { "luni/src/main/java/java/nio/charset/CharsetDecoderICU.java", "luni/src/main/java/java/nio/charset/CharsetEncoderICU.java", "luni/src/main/java/java/nio/charset/CharsetICU.java", - "luni/src/main/java/java/nio/charset/ModifiedUtf8.java", "luni/src/main/java/javax/xml/XMLConstants.java", "luni/src/main/java/javax/xml/datatype/DatatypeConfigurationException.java", "luni/src/main/java/javax/xml/datatype/DatatypeConstants.java", |