Refinements to Utf8String and Utf8StringTest

This commit is contained in:
David Noble
2019-10-02 17:23:02 -07:00
parent b79e5c6ec6
commit da047f001b
2 changed files with 68 additions and 43 deletions

View File

@@ -78,7 +78,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
final int index = this.buffer.forEachByte(0, length, counter);
assert index == -1 : lenientFormat("index: %s, length: %s", index, length);
return counter.value();
return counter.charCount();
});
}
@@ -515,10 +515,14 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
i = 0;
n = encodedLength;
} else {
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(start);
i = this.buffer.forEachByte(0, encodedLength, counter);
checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter);
n = encodedLength - i;
checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s",
start, end, counter
);
}
final int j;
@@ -526,12 +530,16 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
if (end == length) {
j = encodedLength;
} else {
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(end - start);
j = this.buffer.forEachByte(i, n, counter);
checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter);
assert j >= 0;
checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s",
start, end, counter
);
}
assert i >= 0 && j >= 0 : lenientFormat("i: %s, j: %s", i, j);
return fromUnsafe(this.buffer.slice(i, j - i));
}
@@ -809,13 +817,13 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
private static final class UTF16CodeUnitCounter implements ByteProcessor {
@JsonProperty
private final int end;
private final int charLimit;
@JsonProperty
private int count = 0;
private int charCount = 0;
@JsonProperty
private int index = 0;
private int charIndex = 0;
@JsonProperty
private int skip = 0;
@@ -824,9 +832,9 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
this(Integer.MAX_VALUE);
}
public UTF16CodeUnitCounter(int end) {
checkArgument(end >= 0);
this.end = end;
public UTF16CodeUnitCounter(int charLimit) {
checkArgument(charLimit >= 0);
this.charLimit = charLimit;
}
@Override
@@ -838,35 +846,43 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
}
final int leadingByte = value & 0xFF;
this.index = this.count;
this.charIndex = this.charCount;
if (leadingByte < 0x7F) {
// UTF-8-1 = 0x00-7F
this.skip = 0;
this.count++;
this.charCount++;
} else if (0xC2 <= leadingByte && leadingByte <= 0xDF) {
// UTF8-8-2 = 0xC2-DF UTF8-tail
this.skip = 1;
this.count++;
this.charCount++;
} else if (0xE0 <= leadingByte && leadingByte <= 0xEF) {
// UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2
// (UTF8-tail)
this.skip = 2;
this.count++;
this.charCount++;
} else if (0xF0 <= leadingByte && leadingByte <= 0xF4) {
// UTF8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail )
this.skip = 3;
this.count += 2;
this.charCount += 2;
} else {
this.skip = 0;
this.count++;
this.charCount++;
}
return this.count <= this.end;
return this.charCount <= this.charLimit;
}
public int value() {
return this.count;
public int charCount() {
return this.charCount;
}
public int charIndex() {
return this.charIndex;
}
public int charLimit() {
return this.charLimit;
}
@Override

View File

@@ -171,29 +171,19 @@ public class Utf8StringTest {
Utf8String value = Utf8String.fromUnsafe(item.byteBuf());
for (int start = 0, end = start + 1; end <= value.length(); end++) {
try {
final Utf8String actual = (Utf8String)value.subSequence(start, end);
assertNotNull(actual);
assertFalse(actual.isNull());
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
} catch (IllegalArgumentException error) {
final String actual = value.toUtf16();
assertNotNull(actual);
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
}
}
for (int start : new int[] {0, 1, 2 }) {
for (int start = 1, end = start + 1; end <= value.length(); end++) {
try {
final Utf8String actual = (Utf8String)value.subSequence(start, end);
assertNotNull(actual);
assertFalse(actual.isNull());
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
} catch (IllegalArgumentException error) {
final String actual = value.toUtf16();
assertNotNull(actual);
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
for (int end = start + 1; end <= value.length(); end++) {
try {
final Utf8String actual = (Utf8String) value.subSequence(start, end);
assertNotNull(actual);
assertFalse(actual.isNull());
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
} catch (IllegalArgumentException error) {
final String actual = value.toUtf16();
assertNotNull(actual);
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
}
}
}
@@ -204,14 +194,33 @@ public class Utf8StringTest {
assertFalse(actual.isNull());
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
} catch (IllegalArgumentException error) {
// TODO: DANOBLE: assertions
System.out.println(error.toString());
final String actual = value.toUtf16();
assertNotNull(actual);
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
}
}
assertThrows(IndexOutOfBoundsException.class, () -> Utf8String.NULL.subSequence(0, 0));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(-1, 0));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(0, -1));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() + 1, 1));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(1, value.length() + 1));
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() / 2, value.length() / 2 - 1));
}
@Test
public void testToString() {
assertEquals(Utf8String.NULL.toString(), "null");
assertSame("null", Utf8String.NULL.toString());
assertEquals(Utf8String.EMPTY.toString(), "\"\"");
assertSame("\"\"", Utf8String.EMPTY.toString());
assertSame("\"\"", Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString());
assertSame("\"\"", Utf8String.transcodeUtf16(new String("")).toString());
assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\"");
assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\"");
}
@Test(dataProvider = "unicodeTextDataProvider")