Refinements to Utf8String and Utf8StringTest

This commit is contained in:
David Noble
2019-10-02 17:23:02 -07:00
parent b79e5c6ec6
commit da047f001b
2 changed files with 68 additions and 43 deletions

View File

@@ -78,7 +78,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
final int index = this.buffer.forEachByte(0, length, counter); final int index = this.buffer.forEachByte(0, length, counter);
assert index == -1 : lenientFormat("index: %s, length: %s", index, length); assert index == -1 : lenientFormat("index: %s, length: %s", index, length);
return counter.value(); return counter.charCount();
}); });
} }
@@ -515,10 +515,14 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
i = 0; i = 0;
n = encodedLength; n = encodedLength;
} else { } else {
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(start); final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(start);
i = this.buffer.forEachByte(0, encodedLength, counter); i = this.buffer.forEachByte(0, encodedLength, counter);
checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter);
n = encodedLength - i; n = encodedLength - i;
checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s",
start, end, counter
);
} }
final int j; final int j;
@@ -526,12 +530,16 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
if (end == length) { if (end == length) {
j = encodedLength; j = encodedLength;
} else { } else {
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(end - start); final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(end - start);
j = this.buffer.forEachByte(i, n, counter); j = this.buffer.forEachByte(i, n, counter);
checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter);
assert j >= 0; checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s",
start, end, counter
);
} }
assert i >= 0 && j >= 0 : lenientFormat("i: %s, j: %s", i, j);
return fromUnsafe(this.buffer.slice(i, j - i)); return fromUnsafe(this.buffer.slice(i, j - i));
} }
@@ -809,13 +817,13 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
private static final class UTF16CodeUnitCounter implements ByteProcessor { private static final class UTF16CodeUnitCounter implements ByteProcessor {
@JsonProperty @JsonProperty
private final int end; private final int charLimit;
@JsonProperty @JsonProperty
private int count = 0; private int charCount = 0;
@JsonProperty @JsonProperty
private int index = 0; private int charIndex = 0;
@JsonProperty @JsonProperty
private int skip = 0; private int skip = 0;
@@ -824,9 +832,9 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
this(Integer.MAX_VALUE); this(Integer.MAX_VALUE);
} }
public UTF16CodeUnitCounter(int end) { public UTF16CodeUnitCounter(int charLimit) {
checkArgument(end >= 0); checkArgument(charLimit >= 0);
this.end = end; this.charLimit = charLimit;
} }
@Override @Override
@@ -838,35 +846,43 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
} }
final int leadingByte = value & 0xFF; final int leadingByte = value & 0xFF;
this.index = this.count; this.charIndex = this.charCount;
if (leadingByte < 0x7F) { if (leadingByte < 0x7F) {
// UTF-8-1 = 0x00-7F // UTF-8-1 = 0x00-7F
this.skip = 0; this.skip = 0;
this.count++; this.charCount++;
} else if (0xC2 <= leadingByte && leadingByte <= 0xDF) { } else if (0xC2 <= leadingByte && leadingByte <= 0xDF) {
// UTF8-8-2 = 0xC2-DF UTF8-tail // UTF8-8-2 = 0xC2-DF UTF8-tail
this.skip = 1; this.skip = 1;
this.count++; this.charCount++;
} else if (0xE0 <= leadingByte && leadingByte <= 0xEF) { } else if (0xE0 <= leadingByte && leadingByte <= 0xEF) {
// UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2 // UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2
// (UTF8-tail) // (UTF8-tail)
this.skip = 2; this.skip = 2;
this.count++; this.charCount++;
} else if (0xF0 <= leadingByte && leadingByte <= 0xF4) { } else if (0xF0 <= leadingByte && leadingByte <= 0xF4) {
// UTF8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail ) // UTF8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail )
this.skip = 3; this.skip = 3;
this.count += 2; this.charCount += 2;
} else { } else {
this.skip = 0; this.skip = 0;
this.count++; this.charCount++;
} }
return this.count <= this.end; return this.charCount <= this.charLimit;
} }
public int value() { public int charCount() {
return this.count; return this.charCount;
}
public int charIndex() {
return this.charIndex;
}
public int charLimit() {
return this.charLimit;
} }
@Override @Override

View File

@@ -171,29 +171,19 @@ public class Utf8StringTest {
Utf8String value = Utf8String.fromUnsafe(item.byteBuf()); Utf8String value = Utf8String.fromUnsafe(item.byteBuf());
for (int start = 0, end = start + 1; end <= value.length(); end++) { for (int start : new int[] {0, 1, 2 }) {
try {
final Utf8String actual = (Utf8String)value.subSequence(start, end);
assertNotNull(actual);
assertFalse(actual.isNull());
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
} catch (IllegalArgumentException error) {
final String actual = value.toUtf16();
assertNotNull(actual);
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
}
}
for (int start = 1, end = start + 1; end <= value.length(); end++) { for (int end = start + 1; end <= value.length(); end++) {
try { try {
final Utf8String actual = (Utf8String)value.subSequence(start, end); final Utf8String actual = (Utf8String) value.subSequence(start, end);
assertNotNull(actual); assertNotNull(actual);
assertFalse(actual.isNull()); assertFalse(actual.isNull());
assertEquals(actual.toUtf16(), item.value.subSequence(start, end)); assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
} catch (IllegalArgumentException error) { } catch (IllegalArgumentException error) {
final String actual = value.toUtf16(); final String actual = value.toUtf16();
assertNotNull(actual); assertNotNull(actual);
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end))); assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
}
} }
} }
@@ -204,14 +194,33 @@ public class Utf8StringTest {
assertFalse(actual.isNull()); assertFalse(actual.isNull());
assertEquals(actual.toUtf16(), item.value.subSequence(start, end)); assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
} catch (IllegalArgumentException error) { } catch (IllegalArgumentException error) {
// TODO: DANOBLE: assertions final String actual = value.toUtf16();
System.out.println(error.toString()); assertNotNull(actual);
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
} }
} }
assertThrows(IndexOutOfBoundsException.class, () -> Utf8String.NULL.subSequence(0, 0));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(-1, 0));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(0, -1));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() + 1, 1));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(1, value.length() + 1));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() / 2, value.length() / 2 - 1));
} }
@Test @Test
public void testToString() { public void testToString() {
assertEquals(Utf8String.NULL.toString(), "null");
assertSame("null", Utf8String.NULL.toString());
assertEquals(Utf8String.EMPTY.toString(), "\"\"");
assertSame("\"\"", Utf8String.EMPTY.toString());
assertSame("\"\"", Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString());
assertSame("\"\"", Utf8String.transcodeUtf16(new String("")).toString());
assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\"");
assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\"");
} }
@Test(dataProvider = "unicodeTextDataProvider") @Test(dataProvider = "unicodeTextDataProvider")