summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--luni/src/main/java/java/nio/charset/ModifiedUtf8.java178
-rw-r--r--luni/src/test/java/libcore/java/lang/StringTest.java1
-rw-r--r--luni/src/test/java/libcore/java/nio/charset/ModifiedUtf8Test.java314
-rw-r--r--non_openjdk_java_files.bp1
4 files changed, 0 insertions, 494 deletions
diff --git a/luni/src/main/java/java/nio/charset/ModifiedUtf8.java b/luni/src/main/java/java/nio/charset/ModifiedUtf8.java
deleted file mode 100644
index 51638ee4e8..0000000000
--- a/luni/src/main/java/java/nio/charset/ModifiedUtf8.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-package java.nio.charset;
-
-import java.io.UTFDataFormatException;
-
-/**
- * Encoding and decoding methods for Modified UTF-8
- *
- * <p>Modified UTF-8 is a simple variation of UTF-8 in which {@code \u0000} is encoded as
- * 0xc0 0x80 . This avoids the presence of bytes 0 in the output.
- *
- * @hide
- */
-public class ModifiedUtf8 {
-
- /**
- * Count the number of bytes in the modified UTF-8 representation of {@code s}.
- *
- * <p>Additionally, if {@code shortLength} is true, throw a {@code UTFDataFormatException} if
- * the size cannot be presented in an (unsigned) java short.
- */
- public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
- long counter = 0;
- int strLen = s.length();
- for (int i = 0; i < strLen; i++) {
- char c = s.charAt(i);
- if (c < '\u0080') {
- counter++;
- if (c == '\u0000') {
- counter++;
- }
- } else if (c < '\u0800') {
- counter += 2;
- } else {
- counter += 3;
- }
- }
- // Allow up to the maximum value of an unsigned short (as the value is known to be
- // unsigned.
- if (shortLength && counter > 0xffff) {
- throw new UTFDataFormatException(
- "Size of the encoded string doesn't fit in two bytes");
- }
- return counter;
- }
-
- /**
- * Encode {@code s} into {@code dst} starting at offset {@code offset}.
- *
- * <p>The output buffer is guaranteed to have enough space.
- */
- public static void encode(byte[] dst, int offset, String s) {
- int strLen = s.length();
- for (int i = 0; i < strLen; i++) {
- char c = s.charAt(i);
- if (c < '\u0080') {
- if (c == 0) {
- dst[offset++] = (byte) 0xc0;
- dst[offset++] = (byte) 0x80;
- } else {
- dst[offset++] = (byte) c;
- }
- } else if (c < '\u0800') {
- dst[offset++] = (byte) ((c >>> 6) | 0xc0);
- dst[offset++] = (byte) ((c & 0x3f) | 0x80);
- } else {
- dst[offset++] = (byte) ((c >>> 12) | 0xe0);
- dst[offset++] = (byte) (((c >>> 6) & 0x3f) | 0x80);
- dst[offset++] = (byte) ((c & 0x3f) | 0x80);
- }
- }
- }
-
- /**
- * Encodes {@code s} into a buffer with the following format:
- *
- * <p>- the first two bytes of the buffer are the length of the modified-utf8 output
- * (as a big endian short. A UTFDataFormatException is thrown if the encoded size cannot be
- * represented as a short.
- *
- * <p>- the remainder of the buffer contains the modified-utf8 output (equivalent to
- * {@code encode(buf, 2, s)}).
- */
- public static byte[] encode(String s) throws UTFDataFormatException {
- long size = countBytes(s, true);
- byte[] output = new byte[(int) size + 2];
- encode(output, 2, s);
- output[0] = (byte) (size >>> 8);
- output[1] = (byte) size;
- return output;
- }
-
- /**
- * Decodes {@code length} utf-8 bytes from {@code in} starting at offset {@code offset} to
- * {@code out},
- *
- * <p>A maximum of {@code length} chars are written to the output starting at offset 0.
- * {@code out} is assumed to have enough space for the output (a standard
- * {@code ArrayIndexOutOfBoundsException} is thrown otherwise).
- *
- * <p>If a ‘0’ byte is encountered, it is converted to U+0000.
- */
- public static String decode(byte[] in, char[] out, int offset, int length)
- throws UTFDataFormatException {
- if (offset < 0 || length < 0) {
- throw new IllegalArgumentException("Illegal arguments: offset " + offset
- + ". Length: " + length);
- }
- int outputIndex = 0;
- int limitIndex = offset + length;
- while (offset < limitIndex) {
- int i = in[offset] & 0xff;
- offset++;
- if (i < 0x80) {
- out[outputIndex] = (char) i;
- outputIndex++;
- continue;
- }
- if (0xc0 <= i && i < 0xe0) {
- // This branch covers the case 0 = 0xc080.
-
- // The result is: 5 least-significant bits of i + 6 l-s bits of next input byte.
- i = (i & 0x1f) << 6;
- if(offset == limitIndex) {
- throw new UTFDataFormatException("unexpected end of input");
- }
- // Include 6 least-significant bits of the input byte.
- if ((in[offset] & 0xc0) != 0x80) {
- throw new UTFDataFormatException("bad second byte at " + offset);
- }
- out[outputIndex] = (char) (i | (in[offset] & 0x3f));
- offset++;
- outputIndex++;
- } else if(i < 0xf0) {
- // The result is: 5 least-significant bits of i + 6 l-s bits of next input byte
- // + 6 l-s of next to next input byte.
- i = (i & 0x1f) << 12;
- // Make sure there are are at least two bytes left.
- if (offset + 1 >= limitIndex) {
- throw new UTFDataFormatException("unexpected end of input");
- }
- // Include 6 least-significant bits of the input byte, with 6 bits of room
- // for the next byte.
- if ((in[offset] & 0xc0) != 0x80) {
- throw new UTFDataFormatException("bad second byte at " + offset);
- }
- i = i | (in[offset] & 0x3f) << 6;
- offset++;
- // Include 6 least-significant bits of the input byte.
- if ((in[offset] & 0xc0) != 0x80) {
- throw new UTFDataFormatException("bad third byte at " + offset);
- }
- out[outputIndex] = (char) (i | (in[offset] & 0x3f));
- offset++;
- outputIndex++;
- } else {
- throw new UTFDataFormatException("Invalid UTF8 byte "
- + (int) i + " at position " + (offset - 1));
- }
- }
- return String.valueOf(out, 0, outputIndex);
- }
-}
diff --git a/luni/src/test/java/libcore/java/lang/StringTest.java b/luni/src/test/java/libcore/java/lang/StringTest.java
index c440ebe1ba..395209aa42 100644
--- a/luni/src/test/java/libcore/java/lang/StringTest.java
+++ b/luni/src/test/java/libcore/java/lang/StringTest.java
@@ -29,7 +29,6 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
-import java.nio.charset.ModifiedUtf8;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.ArrayList;
diff --git a/luni/src/test/java/libcore/java/nio/charset/ModifiedUtf8Test.java b/luni/src/test/java/libcore/java/nio/charset/ModifiedUtf8Test.java
deleted file mode 100644
index f7c91ba770..0000000000
--- a/luni/src/test/java/libcore/java/nio/charset/ModifiedUtf8Test.java
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-package libcore.java.nio.charset;
-
-import junit.framework.TestCase;
-
-import java.io.UTFDataFormatException;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.charset.ModifiedUtf8;
-import java.util.Arrays;
-
-/**
- * Tests for {@code ModifiedUtf8}.
- */
-public class ModifiedUtf8Test extends TestCase {
- public void test_decode_singleChar() throws Exception {
- assertEquals("A", ModifiedUtf8.decode(new byte[] { 'A' }, new char[1], 0, 1));
- }
-
- public void test_decode_checkOffsetAndLength() throws Exception {
- assertEquals("BC", ModifiedUtf8.decode(
- new byte[] { 'A', 'B', 'C', 'D' }, new char[2], 1, 2));
- }
-
- public void test_decode_unexpectedEndOfStreamAfterC2_throws() {
- // We need at least one byte after 0xc2.
- try {
- ModifiedUtf8.decode(new byte[]{'B', (byte) 0xc2}, new char[2], 0, 2);
- fail("Should throw " + UTFDataFormatException.class.getName());
- } catch(UTFDataFormatException expected) {
- // Expected.
- }
- }
-
- public void test_decode_unexpectedEndOfStreamAfterE0_throws() {
- // We need at least two bytes after 0xe0.
- try {
- ModifiedUtf8.decode(
- new byte[] { 'B', (byte) 0xe0, (byte) 0xab }, new char[2], 0, 3);
- fail("Should throw " + UTFDataFormatException.class.getName());
- } catch(UTFDataFormatException expected) {
- // Expected.
- }
- }
-
- public void test_decode_endOfStreamAfterC2() throws Exception {
- assertEquals("B\u00a0", ModifiedUtf8.decode(
- new byte[] { 'B', (byte) 0xc2, (byte) 0xa0 },
- new char[2],
- 0,
- 3));
- }
-
- public void test_decode_endOfStreamAfterE0() throws Exception {
- assertEquals("B\u0830", ModifiedUtf8.decode(
- new byte[] { 'B', (byte) 0xe0, (byte) 0xa0, (byte) 0xb0 },
- new char[2],
- 0,
- 4));
- }
-
- public void test_decode_invalidByte_characterUnknown() throws Exception {
- try {
- ModifiedUtf8.decode(new byte[]{'A', (byte) 0xf0}, new char[2], 0, 2);
- fail("Should throw " + UTFDataFormatException.class.getName());
- } catch (UTFDataFormatException expected) {
- // Expected.
- }
- }
-
- public void test_decode_someC2Character() throws Exception {
- assertEquals("A\u00a6", ModifiedUtf8.decode(
- new byte[] { 'A', (byte) 0xc2, (byte) 0xa6 }, new char[2], 0, 3));
- }
-
- public void test_decode_lastC2Character() throws Exception {
- assertEquals("A\u00bf", ModifiedUtf8.decode(
- new byte[] { 'A', (byte) 0xc2, (byte) 0xbf }, new char[2], 0, 3));
- }
-
- public void test_decode_someTwoByteCharacter() throws Exception {
- // Make sure bit masking works
- assertEquals("A\u0606", ModifiedUtf8.decode(
- new byte[] { 'A', (byte) 0xd8, (byte) 0x86 }, new char[3], 0, 3));
- }
-
- public void test_decode_lastTwoByteCharacter() throws Exception {
- assertEquals("A\u07ff", ModifiedUtf8.decode(
- new byte[] { 'A', (byte) 0xdf, (byte) 0xbf }, new char[2], 0, 3));
- }
-
- public void test_decode_firstE0Character() throws Exception {
- assertEquals("A\u0800", ModifiedUtf8.decode(
- new byte[] { 'A', (byte) 0xe0, (byte) 0xa0, (byte) 0x80 },
- new char[2],
- 0,
- 4));
- }
-
- public void test_decode_someThreeBytesCharacter() throws Exception {
- assertEquals("A\u31c6", ModifiedUtf8.decode(
- new byte[]{ 'A', (byte) 0xe3, (byte) 0x87, (byte) 0x86 },
- new char[2],
- 0,
- 4));
- }
-
- public void test_decode_lastThreeBytesCharacter() throws Exception {
- assertEquals("A\uffff", ModifiedUtf8.decode(
- new byte[] { 'A', (byte) 0xef, (byte) 0xbf, (byte) 0xbf },
- new char[2],
- 0,
- 4));
- }
-
- public void test_decode_twoByteCharacterAfterThreeByteCharacter() throws Exception {
- assertEquals("\uffff\u0606A", ModifiedUtf8.decode(
- new byte[] { (byte) 0xef, (byte) 0xbf, (byte) 0xbf, (byte) 0xd8, (byte) 0x86, 'A' },
- new char[3],
- 0,
- 6));
- }
-
- public void test_decode_c080isZero() throws Exception {
- assertEquals("A\u0000A", ModifiedUtf8.decode(
- new byte[] { 'A', (byte) 0xc0, (byte) 0x80, 'A' }, new char[3], 0, 4));
- }
-
- public void test_decode_00isZero() throws Exception {
- assertEquals("A\u0000A", ModifiedUtf8.decode(
- new byte[] { 'A', (byte) 0, 'A' }, new char[3], 0, 3));
- }
-
- public void test_decode_insufficientOutputSpace_throws() throws Exception{
- try {
- ModifiedUtf8.decode(new byte[] { 'A', (byte) 0, 'A' }, new char[2], 0, 3);
- fail("Should throw " + ArrayIndexOutOfBoundsException.class.getName());
- } catch(ArrayIndexOutOfBoundsException expected) {
- // Expected.
- }
- }
-
- public void test_decode_checkBadSecondByteOfTwo() throws Exception {
- try {
- ModifiedUtf8.decode(new byte[]{(byte) 0xc0, (byte) 0xc0}, new char[2], 0, 2);
- fail("Should throw " + UTFDataFormatException.class.getName());
- } catch (UTFDataFormatException expected) {
- // Expected.
- }
- }
-
- public void test_decode_checkBadSecondByteOfThree() throws Exception{
- try {
- ModifiedUtf8.decode(new byte[]{
- (byte) 0xe0, (byte) 0xc0, (byte) 0x80}, new char[2], 0, 2);
- fail("Should throw " + UTFDataFormatException.class.getName());
- } catch (UTFDataFormatException expected) {
- // Expected.
- }
- }
-
- public void test_decode_checkBadThirdByteOfThree() throws Exception{
- try {
- ModifiedUtf8.decode(new byte[]{
- (byte) 0xe0, (byte) 0x80, (byte) 0xc0}, new char[2], 0, 2);
- fail("Should throw " + UTFDataFormatException.class.getName());
- } catch (UTFDataFormatException expected) {
- // Expected.
- }
- }
-
- public void test_decode_insufficientInput_throws() throws Exception{
- try {
- ModifiedUtf8.decode(new byte[] { 'A', (byte) 0, 'A' }, new char[8], 0, 100);
- fail("Should throw " + ArrayIndexOutOfBoundsException.class.getName());
- } catch(ArrayIndexOutOfBoundsException expected) {
- // Expected.
- }
- }
-
- public void test_decode_extraCharsInArray_ignored() throws Exception {
- assertEquals("A", ModifiedUtf8.decode(new byte[] { 'A' }, new char[] { 'B', 'Z' }, 0, 1));
- }
-
- public void test_countBytes_rightCount() throws Exception {
- assertEquals(0, ModifiedUtf8.countBytes("", false));
- assertEquals(2, ModifiedUtf8.countBytes("\u0000", false));
- assertEquals(1, ModifiedUtf8.countBytes("A", false));
- assertEquals(1, ModifiedUtf8.countBytes("\u007f", false));
- assertEquals(2, ModifiedUtf8.countBytes("\u0080", false));
- assertEquals(2, ModifiedUtf8.countBytes("\u07ff", false));
- assertEquals(3, ModifiedUtf8.countBytes("\u0800", false));
- assertEquals(3, ModifiedUtf8.countBytes("\uffff", false));
- }
-
- public void test_countBytes_checkExceptionThrown() throws Exception {
- // These two mustn't throw...
- ModifiedUtf8.countBytes("", true);
- ModifiedUtf8.countBytes("A", true);
-
- char[] unsignedShortSizedCharArray = new char[2 * Short.MAX_VALUE + 1];
- for (int i = 0; i < unsignedShortSizedCharArray.length; i++) {
- unsignedShortSizedCharArray[i] = 'A';
- }
- String unsignedShortSizedString = String.copyValueOf(unsignedShortSizedCharArray);
-
- char[] sizeLongerThanUnsignedShortCharArray = new char[2 * Short.MAX_VALUE + 2];
- for (int i = 0; i < sizeLongerThanUnsignedShortCharArray.length; i++) {
- sizeLongerThanUnsignedShortCharArray[i] = 'A';
- }
- String sizeLongerThanUnsignedShortString = String.copyValueOf(
- sizeLongerThanUnsignedShortCharArray);
-
- // Mustn't throw.
- ModifiedUtf8.countBytes(unsignedShortSizedString, true);
-
- try {
- // Must throw.
- ModifiedUtf8.countBytes(sizeLongerThanUnsignedShortString, true);
- fail();
- } catch (UTFDataFormatException expected) {
- // Expected.
- }
-
- // Mustn't throw.
- ModifiedUtf8.countBytes(unsignedShortSizedString, false);
- ModifiedUtf8.countBytes(sizeLongerThanUnsignedShortString, false);
- }
-
- public void test_encode() throws Exception {
- assertTrue(Arrays.equals(new byte[]{0, 1, 'A'}, ModifiedUtf8.encode("A")));
- assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', 'B', 'C' }, ModifiedUtf8.encode("ABC")));
- assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc2, (byte) 0xa0 },
- ModifiedUtf8.encode("A\u00a0")));
- assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xe0, (byte) 0xa0, (byte) 0xb0 },
- ModifiedUtf8.encode("A\u0830")));
- assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc2, (byte) 0xa6 },
- ModifiedUtf8.encode("A\u00a6")));
- assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc2, (byte) 0xbf },
- ModifiedUtf8.encode("A\u00bf")));
- assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xd8, (byte) 0x86 },
- ModifiedUtf8.encode("A\u0606")));
- assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xdf, (byte) 0xbf },
- ModifiedUtf8.encode("A\u07ff")));
- assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xe0, (byte) 0xa0, (byte) 0x80 },
- ModifiedUtf8.encode("A\u0800")));
- assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xe3, (byte) 0x87, (byte) 0x86 },
- ModifiedUtf8.encode("A\u31c6")));
- assertTrue(Arrays.equals(new byte[] { 0, 4, 'A', (byte) 0xef, (byte) 0xbf, (byte) 0xbf },
- ModifiedUtf8.encode("A\uffff")));
- assertTrue(Arrays.equals(new byte[] { 0, 3, 'A', (byte) 0xc0, (byte) 0x80 },
- ModifiedUtf8.encode("A\u0000")));
- assertTrue(
- Arrays.equals(new byte[] { 0, 8, (byte) 0xe3, (byte) 0x87, (byte) 0x86,
- (byte) 0xd8, (byte) 0x86, (byte) 0xc0, (byte) 0x80, 'A' },
- ModifiedUtf8.encode("\u31c6\u0606\u0000A")));
- }
-
- public void test_encode_throws() throws Exception {
- char[] unsignedShortSizedCharArray = new char[Short.MAX_VALUE * 2 + 1];
- for (int i = 0; i < unsignedShortSizedCharArray.length; i++) {
- unsignedShortSizedCharArray[i] = 'A';
- }
- String unsignedShortSizedString = String.copyValueOf(unsignedShortSizedCharArray);
-
- char[] sizeLongerThanUnsignedShortCharArray = new char[Short.MAX_VALUE * 2 + 2];
- for (int i = 0; i < sizeLongerThanUnsignedShortCharArray.length; i++) {
- sizeLongerThanUnsignedShortCharArray[i] = 'A';
- }
- String sizeLongerThanUnsignedShortString =
- String.copyValueOf(sizeLongerThanUnsignedShortCharArray);
-
- // Mustn't throw.
- ModifiedUtf8.encode(unsignedShortSizedString);
- try {
- // Must throw.
- ModifiedUtf8.encode(sizeLongerThanUnsignedShortString);
- fail("Should throw " + UTFDataFormatException.class.getName());
- } catch (UTFDataFormatException expected) {
- // Expected.
- }
- }
-
- public void test_encode_lengthAtBeginning() throws Exception {
- int testStringLength = 20000;
- char[] charArray = new char[testStringLength];
- for (int i = 0; i < charArray.length; i++) {
- charArray[i] = 'A';
- }
- String testString = String.copyValueOf(charArray);
-
- // Mustn't throw.
- byte[] result = ModifiedUtf8.encode(testString);
- ByteBuffer b = ByteBuffer.wrap(result);
- b.order(ByteOrder.BIG_ENDIAN);
- assertEquals(testStringLength, b.getShort());
- }
-
-}
diff --git a/non_openjdk_java_files.bp b/non_openjdk_java_files.bp
index 4f7a33d64b..98f60068dc 100644
--- a/non_openjdk_java_files.bp
+++ b/non_openjdk_java_files.bp
@@ -179,7 +179,6 @@ filegroup {
"luni/src/main/java/java/nio/charset/CharsetDecoderICU.java",
"luni/src/main/java/java/nio/charset/CharsetEncoderICU.java",
"luni/src/main/java/java/nio/charset/CharsetICU.java",
- "luni/src/main/java/java/nio/charset/ModifiedUtf8.java",
"luni/src/main/java/javax/xml/XMLConstants.java",
"luni/src/main/java/javax/xml/datatype/DatatypeConfigurationException.java",
"luni/src/main/java/javax/xml/datatype/DatatypeConstants.java",