diff options
author | Joe Onorato <joeo@google.com> | 2018-11-16 16:08:15 -0800 |
---|---|---|
committer | Joe Onorato <joeo@google.com> | 2018-11-28 12:34:31 -0800 |
commit | 7d1851fe49b2f923bb33f2a2db2f0baeb38b413b (patch) | |
tree | c441ffb622077e0c291b9dc7d94a81579e123073 | |
parent | 9112a5e07a82d0e27d91e34971dc7afce53542f9 (diff) |
Add csv parser for the power profile library.
There are enough variants of csv, and it's simple enough that
it's easier to just have our own parser than to introduce a
dependency on some other library, as this library will be used
in a variety of environments.
Test: atest frameworks/base/tools/powermodel --host
Change-Id: Ib0f7aceb2a58b58f447f6bcef5c95729303dee8a
-rw-r--r-- | tools/powermodel/src/com/android/powermodel/CsvParser.java | 173 | ||||
-rw-r--r-- | tools/powermodel/test/com/android/powermodel/CsvParserTest.java | 311 |
2 files changed, 484 insertions, 0 deletions
diff --git a/tools/powermodel/src/com/android/powermodel/CsvParser.java b/tools/powermodel/src/com/android/powermodel/CsvParser.java new file mode 100644 index 000000000000..78cd261306fc --- /dev/null +++ b/tools/powermodel/src/com/android/powermodel/CsvParser.java @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.powermodel; + +import java.io.InputStream; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; + +/** + * Parses CSV. + * <p> + * Call parse() with an InputStream. + * <p> + * CsvLineProcessor.onLine() will be called for each line in the source document. + * <p> + * To simplify parsing and to protect against using too much memory for bad + * data, the maximum field length is {@link #MAX_FIELD_SIZE}. + */ +class CsvParser { + /** + * The maximum size of a single field in bytes. + */ + public static final int MAX_FIELD_SIZE = (8*1024)-1; + + /** + * Callback interface for each line of CSV as it is parsed. + */ + interface LineProcessor { + /** + * A line of CSV was parsed. + * + * @param lineNumber the line number in the file, starting at 1 + * @param fields the comma separated fields for the line + */ + void onLine(int lineNumber, ArrayList<String> fields) throws ParseException; + } + + /** + * Parse the CSV text in input, calling onto processor for each row. + */ + public static void parse(InputStream input, LineProcessor processor) + throws IOException, ParseException { + final Charset utf8 = StandardCharsets.UTF_8; + final byte[] buf = new byte[MAX_FIELD_SIZE+1]; + int lineNumber = 1; + int readPos = 0; + int prev = 0; + ArrayList<String> fields = new ArrayList<String>(); + boolean finalBuffer = false; + boolean escaping = false; + boolean sawQuote = false; + + while (!finalBuffer) { + int amt = input.read(buf, readPos, buf.length-readPos); + if (amt < 0) { + // No more data. Process whatever's left from before. + amt = readPos; + finalBuffer = true; + } else { + // Process whatever's left from before, plus the new data. + amt += readPos; + finalBuffer = false; + } + + // Process as much of this buffer as we can. + int fieldStart = 0; + int index = readPos; + int escapeIndex = escaping ? readPos : -1; + while (index < amt) { + byte c = buf[index]; + if (c == '\r' || c == '\n') { + if (escaping) { + // TODO: Quotes do not escape newlines in our CSV dialect, + // but we actually see some data where it should. + fields.add(new String(buf, fieldStart, escapeIndex-fieldStart)); + escapeIndex = -1; + escaping = false; + sawQuote = false; + } else { + fields.add(new String(buf, fieldStart, index-fieldStart)); + } + // Don't report blank lines + if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) { + processor.onLine(lineNumber, fields); + } + fields = new ArrayList<String>(); + if (!(c == '\n' && prev == '\r')) { + // Don't double increment for dos line endings. + lineNumber++; + } + fieldStart = index = index + 1; + } else { + if (escaping) { + // Field started with a " so quotes are escaped with " and commas + // don't matter except when following a single quote. + if (c == '"') { + if (sawQuote) { + buf[escapeIndex] = buf[index]; + escapeIndex++; + sawQuote = false; + } else { + sawQuote = true; + } + index++; + } else if (sawQuote && c == ',') { + fields.add(new String(buf, fieldStart, escapeIndex-fieldStart)); + fieldStart = index = index + 1; + escapeIndex = -1; + escaping = false; + sawQuote = false; + } else { + buf[escapeIndex] = buf[index]; + escapeIndex++; + index++; + sawQuote = false; + } + } else { + if (c == ',') { + fields.add(new String(buf, fieldStart, index-fieldStart)); + fieldStart = index + 1; + } else if (c == '"' && fieldStart == index) { + // First character is a " + escaping = true; + fieldStart = escapeIndex = index + 1; + } + index++; + } + } + prev = c; + } + + // A single field is greater than buf.length, so fail. + if (fieldStart == 0 && index == buf.length) { + throw new ParseException(lineNumber, "Line is too long: " + + new String(buf, 0, 20, utf8) + "..."); + } + + // Move whatever we didn't process to the beginning of the buffer + // and try again. + if (fieldStart != amt) { + readPos = (escaping ? escapeIndex : index) - fieldStart; + System.arraycopy(buf, fieldStart, buf, 0, readPos); + } else { + readPos = 0; + } + + // Process whatever's left over + if (finalBuffer) { + fields.add(new String(buf, 0, readPos)); + // If there is any content, return the last line. + if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) { + processor.onLine(lineNumber, fields); + } + } + } + } +} diff --git a/tools/powermodel/test/com/android/powermodel/CsvParserTest.java b/tools/powermodel/test/com/android/powermodel/CsvParserTest.java new file mode 100644 index 000000000000..55dde412b78e --- /dev/null +++ b/tools/powermodel/test/com/android/powermodel/CsvParserTest.java @@ -0,0 +1,311 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.powermodel; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests {@link PowerProfile} + */ +public class CsvParserTest { + + class LineCollector implements CsvParser.LineProcessor { + ArrayList<ArrayList<String>> results = new ArrayList<ArrayList<String>>(); + + @Override + public void onLine(int lineNumber, ArrayList<String> fields) { + System.out.println(lineNumber); + for (String str: fields) { + System.out.println("-->" + str + "<--"); + } + results.add(fields); + } + } + + private void assertEquals(String[][] expected, ArrayList<ArrayList<String>> results) { + final String[][] resultArray = new String[results.size()][]; + for (int i=0; i<results.size(); i++) { + final ArrayList<String> list = results.get(i); + resultArray[i] = list.toArray(new String[list.size()]); + } + Assert.assertArrayEquals(expected, resultArray); + } + + private String makeString(int length) { + final StringBuilder str = new StringBuilder(); + for (int i=0; i<length; i++) { + str.append('a'); + } + return str.toString(); + } + + @Test public void testEmpty() throws Exception { + final String text = ""; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + }, collector.results); + } + + @Test public void testOnlyNewline() throws Exception { + final String text = "\n"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + }, collector.results); + } + + @Test public void testTwoLines() throws Exception { + final String text = "one,twoo,3\nfour,5,six\n"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { "one", "twoo", "3", }, + { "four", "5", "six", }, + }, collector.results); + } + + + @Test public void testEscapedEmpty() throws Exception { + final String text = "\"\",\"\",\"\"\n"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { "", "", "", }, + }, collector.results); + } + + @Test public void testEscapedText() throws Exception { + final String text = "\"one\",\"twoo\",\"3\"\n"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { "one", "twoo", "3", }, + }, collector.results); + } + + @Test public void testEscapedQuotes() throws Exception { + final String text = "\"\"\"\",\"\"\"\"\"\",\"\"\"\"\n"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { "\"", "\"\"", "\"", }, + }, collector.results); + } + + @Test public void testEscapedCommas() throws Exception { + final String text = "\",\",\",\",\",\"\n"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { ",", ",", ",", }, + }, collector.results); + } + + @Test public void testEscapedQuotesAndCommas() throws Exception { + final String text = "\"\"\",\",\"\"\",\",\"\"\",\"\n"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { "\",", "\",", "\",", }, + }, collector.results); + } + + @Test public void testNoNewline() throws Exception { + final String text = "a,b,c"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { "a", "b", "c", } + }, collector.results); + } + + @Test public void testNoNewlineWithCommas() throws Exception { + final String text = "a,b,,"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { "a", "b", "", "" } + }, collector.results); + } + + @Test public void testNoNewlineWithQuote() throws Exception { + final String text = "a,b,\",\""; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { "a", "b", "," } + }, collector.results); + } + + @Test public void testNoCommas() throws Exception { + final String text = "aasdfadfadfad"; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { "aasdfadfadfad", } + }, collector.results); + } + + @Test public void testMaxLength() throws Exception { + final String text = makeString(CsvParser.MAX_FIELD_SIZE); + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { text, } + }, collector.results); + } + + @Test public void testMaxLengthTwice() throws Exception { + String big = makeString(CsvParser.MAX_FIELD_SIZE); + final String text = big + "," + big; + System.out.println("Test: [" + text + "]"); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { big, big, } + }, collector.results); + } + + @Test public void testTooLong() throws Exception { + final String text = makeString(CsvParser.MAX_FIELD_SIZE+1); + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + try { + CsvParser.parse(is, collector); + throw new RuntimeException("Expected CsvParser.parse to throw ParseException"); + } catch (ParseException ex) { + // good + } + } + + @Test public void testBufferBoundary() throws Exception { + final String big = makeString(CsvParser.MAX_FIELD_SIZE-3); + final String text = big + ",b,c,d,e,f,g"; + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { big, "b", "c", "d", "e", "f", "g", } + }, collector.results); + } + + @Test public void testBufferBoundaryEmpty() throws Exception { + final String big = makeString(CsvParser.MAX_FIELD_SIZE-3); + final String text = big + ",,,,,,"; + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { big, "", "", "", "", "", "", } + }, collector.results); + } + + // Checks that the escaping and sawQuote behavior is correct at the buffer boundary + @Test public void testBufferBoundaryEscapingEven() throws Exception { + final String big = makeString(CsvParser.MAX_FIELD_SIZE-2); + final String text = big + ",\"\"\"\"\"\"\"\"\"\"\"\"," + big; + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { big, "\"\"\"\"\"", big } + }, collector.results); + } + + // Checks that the escaping and sawQuote behavior is correct at the buffer boundary + @Test public void testBufferBoundaryEscapingOdd() throws Exception { + final String big = makeString(CsvParser.MAX_FIELD_SIZE-3); + final String text = big + ",\"\"\"\"\"\"\"\"\"\"\"\"," + big; + final InputStream is = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); + LineCollector collector = new LineCollector(); + + CsvParser.parse(is, collector); + + assertEquals(new String[][] { + { big, "\"\"\"\"\"", big } + }, collector.results); + } + +} |