From da047f001b7a667accbc227c157a864523fa1b16 Mon Sep 17 00:00:00 2001 From: David Noble Date: Wed, 2 Oct 2019 17:23:02 -0700 Subject: [PATCH] Refinements to Utf8String and Utf8StringTest --- .../azure/data/cosmos/core/Utf8String.java | 54 +++++++++++------- .../data/cosmos/core/Utf8StringTest.java | 57 +++++++++++-------- 2 files changed, 68 insertions(+), 43 deletions(-) diff --git a/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java b/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java index 060d037..059f180 100644 --- a/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java +++ b/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java @@ -78,7 +78,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable final int index = this.buffer.forEachByte(0, length, counter); assert index == -1 : lenientFormat("index: %s, length: %s", index, length); - return counter.value(); + return counter.charCount(); }); } @@ -515,10 +515,14 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable i = 0; n = encodedLength; } else { + final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(start); i = this.buffer.forEachByte(0, encodedLength, counter); - checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter); n = encodedLength - i; + + checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s", + start, end, counter + ); } final int j; @@ -526,12 +530,16 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable if (end == length) { j = encodedLength; } else { + final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(end - start); j = this.buffer.forEachByte(i, n, counter); - checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter); - assert j >= 0; + + checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s", + start, end, counter + ); } + assert i >= 0 && j >= 0 : lenientFormat("i: %s, j: %s", i, j); return fromUnsafe(this.buffer.slice(i, j - i)); } @@ -809,13 +817,13 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable private static final class UTF16CodeUnitCounter implements ByteProcessor { @JsonProperty - private final int end; + private final int charLimit; @JsonProperty - private int count = 0; + private int charCount = 0; @JsonProperty - private int index = 0; + private int charIndex = 0; @JsonProperty private int skip = 0; @@ -824,9 +832,9 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable this(Integer.MAX_VALUE); } - public UTF16CodeUnitCounter(int end) { - checkArgument(end >= 0); - this.end = end; + public UTF16CodeUnitCounter(int charLimit) { + checkArgument(charLimit >= 0); + this.charLimit = charLimit; } @Override @@ -838,35 +846,43 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable } final int leadingByte = value & 0xFF; - this.index = this.count; + this.charIndex = this.charCount; if (leadingByte < 0x7F) { // UTF-8-1 = 0x00-7F this.skip = 0; - this.count++; + this.charCount++; } else if (0xC2 <= leadingByte && leadingByte <= 0xDF) { // UTF8-8-2 = 0xC2-DF UTF8-tail this.skip = 1; - this.count++; + this.charCount++; } else if (0xE0 <= leadingByte && leadingByte <= 0xEF) { // UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2 // (UTF8-tail) this.skip = 2; - this.count++; + this.charCount++; } else if (0xF0 <= leadingByte && leadingByte <= 0xF4) { // UTF8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail ) this.skip = 3; - this.count += 2; + this.charCount += 2; } else { this.skip = 0; - this.count++; + this.charCount++; } - return this.count <= this.end; + return this.charCount <= this.charLimit; } - public int value() { - return this.count; + public int charCount() { + return this.charCount; + } + + public int charIndex() { + return this.charIndex; + } + + public int charLimit() { + return this.charLimit; } @Override diff --git a/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java b/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java index e3576ce..84eba1e 100644 --- a/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java +++ b/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java @@ -171,29 +171,19 @@ public class Utf8StringTest { Utf8String value = Utf8String.fromUnsafe(item.byteBuf()); - for (int start = 0, end = start + 1; end <= value.length(); end++) { - try { - final Utf8String actual = (Utf8String)value.subSequence(start, end); - assertNotNull(actual); - assertFalse(actual.isNull()); - assertEquals(actual.toUtf16(), item.value.subSequence(start, end)); - } catch (IllegalArgumentException error) { - final String actual = value.toUtf16(); - assertNotNull(actual); - assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end))); - } - } + for (int start : new int[] {0, 1, 2 }) { - for (int start = 1, end = start + 1; end <= value.length(); end++) { - try { - final Utf8String actual = (Utf8String)value.subSequence(start, end); - assertNotNull(actual); - assertFalse(actual.isNull()); - assertEquals(actual.toUtf16(), item.value.subSequence(start, end)); - } catch (IllegalArgumentException error) { - final String actual = value.toUtf16(); - assertNotNull(actual); - assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end))); + for (int end = start + 1; end <= value.length(); end++) { + try { + final Utf8String actual = (Utf8String) value.subSequence(start, end); + assertNotNull(actual); + assertFalse(actual.isNull()); + assertEquals(actual.toUtf16(), item.value.subSequence(start, end)); + } catch (IllegalArgumentException error) { + final String actual = value.toUtf16(); + assertNotNull(actual); + assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end))); + } } } @@ -204,14 +194,33 @@ public class Utf8StringTest { assertFalse(actual.isNull()); assertEquals(actual.toUtf16(), item.value.subSequence(start, end)); } catch (IllegalArgumentException error) { - // TODO: DANOBLE: assertions - System.out.println(error.toString()); + final String actual = value.toUtf16(); + assertNotNull(actual); + assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end))); } } + + assertThrows(IndexOutOfBoundsException.class, () -> Utf8String.NULL.subSequence(0, 0)); + assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(-1, 0)); + assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(0, -1)); + assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() + 1, 1)); + assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(1, value.length() + 1)); + assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() / 2, value.length() / 2 - 1)); } @Test public void testToString() { + + assertEquals(Utf8String.NULL.toString(), "null"); + assertSame("null", Utf8String.NULL.toString()); + + assertEquals(Utf8String.EMPTY.toString(), "\"\""); + assertSame("\"\"", Utf8String.EMPTY.toString()); + assertSame("\"\"", Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString()); + assertSame("\"\"", Utf8String.transcodeUtf16(new String("")).toString()); + + assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\""); + assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\""); } @Test(dataProvider = "unicodeTextDataProvider")