mirror of
https://github.com/microsoft/HybridRow.git
synced 2026-01-20 18:03:14 +00:00
Refinements to Utf8String and Utf8StringTest
This commit is contained in:
@@ -78,7 +78,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
||||
final int index = this.buffer.forEachByte(0, length, counter);
|
||||
|
||||
assert index == -1 : lenientFormat("index: %s, length: %s", index, length);
|
||||
return counter.value();
|
||||
return counter.charCount();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -515,10 +515,14 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
||||
i = 0;
|
||||
n = encodedLength;
|
||||
} else {
|
||||
|
||||
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(start);
|
||||
i = this.buffer.forEachByte(0, encodedLength, counter);
|
||||
checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter);
|
||||
n = encodedLength - i;
|
||||
|
||||
checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s",
|
||||
start, end, counter
|
||||
);
|
||||
}
|
||||
|
||||
final int j;
|
||||
@@ -526,12 +530,16 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
||||
if (end == length) {
|
||||
j = encodedLength;
|
||||
} else {
|
||||
|
||||
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(end - start);
|
||||
j = this.buffer.forEachByte(i, n, counter);
|
||||
checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter);
|
||||
assert j >= 0;
|
||||
|
||||
checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s",
|
||||
start, end, counter
|
||||
);
|
||||
}
|
||||
|
||||
assert i >= 0 && j >= 0 : lenientFormat("i: %s, j: %s", i, j);
|
||||
return fromUnsafe(this.buffer.slice(i, j - i));
|
||||
}
|
||||
|
||||
@@ -809,13 +817,13 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
||||
private static final class UTF16CodeUnitCounter implements ByteProcessor {
|
||||
|
||||
@JsonProperty
|
||||
private final int end;
|
||||
private final int charLimit;
|
||||
|
||||
@JsonProperty
|
||||
private int count = 0;
|
||||
private int charCount = 0;
|
||||
|
||||
@JsonProperty
|
||||
private int index = 0;
|
||||
private int charIndex = 0;
|
||||
|
||||
@JsonProperty
|
||||
private int skip = 0;
|
||||
@@ -824,9 +832,9 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
||||
this(Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
public UTF16CodeUnitCounter(int end) {
|
||||
checkArgument(end >= 0);
|
||||
this.end = end;
|
||||
public UTF16CodeUnitCounter(int charLimit) {
|
||||
checkArgument(charLimit >= 0);
|
||||
this.charLimit = charLimit;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -838,35 +846,43 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
||||
}
|
||||
|
||||
final int leadingByte = value & 0xFF;
|
||||
this.index = this.count;
|
||||
this.charIndex = this.charCount;
|
||||
|
||||
if (leadingByte < 0x7F) {
|
||||
// UTF-8-1 = 0x00-7F
|
||||
this.skip = 0;
|
||||
this.count++;
|
||||
this.charCount++;
|
||||
} else if (0xC2 <= leadingByte && leadingByte <= 0xDF) {
|
||||
// UTF8-8-2 = 0xC2-DF UTF8-tail
|
||||
this.skip = 1;
|
||||
this.count++;
|
||||
this.charCount++;
|
||||
} else if (0xE0 <= leadingByte && leadingByte <= 0xEF) {
|
||||
// UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2
|
||||
// (UTF8-tail)
|
||||
this.skip = 2;
|
||||
this.count++;
|
||||
this.charCount++;
|
||||
} else if (0xF0 <= leadingByte && leadingByte <= 0xF4) {
|
||||
// UTF8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail )
|
||||
this.skip = 3;
|
||||
this.count += 2;
|
||||
this.charCount += 2;
|
||||
} else {
|
||||
this.skip = 0;
|
||||
this.count++;
|
||||
this.charCount++;
|
||||
}
|
||||
|
||||
return this.count <= this.end;
|
||||
return this.charCount <= this.charLimit;
|
||||
}
|
||||
|
||||
public int value() {
|
||||
return this.count;
|
||||
public int charCount() {
|
||||
return this.charCount;
|
||||
}
|
||||
|
||||
public int charIndex() {
|
||||
return this.charIndex;
|
||||
}
|
||||
|
||||
public int charLimit() {
|
||||
return this.charLimit;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -171,29 +171,19 @@ public class Utf8StringTest {
|
||||
|
||||
Utf8String value = Utf8String.fromUnsafe(item.byteBuf());
|
||||
|
||||
for (int start = 0, end = start + 1; end <= value.length(); end++) {
|
||||
try {
|
||||
final Utf8String actual = (Utf8String)value.subSequence(start, end);
|
||||
assertNotNull(actual);
|
||||
assertFalse(actual.isNull());
|
||||
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
|
||||
} catch (IllegalArgumentException error) {
|
||||
final String actual = value.toUtf16();
|
||||
assertNotNull(actual);
|
||||
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
|
||||
}
|
||||
}
|
||||
for (int start : new int[] {0, 1, 2 }) {
|
||||
|
||||
for (int start = 1, end = start + 1; end <= value.length(); end++) {
|
||||
try {
|
||||
final Utf8String actual = (Utf8String)value.subSequence(start, end);
|
||||
assertNotNull(actual);
|
||||
assertFalse(actual.isNull());
|
||||
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
|
||||
} catch (IllegalArgumentException error) {
|
||||
final String actual = value.toUtf16();
|
||||
assertNotNull(actual);
|
||||
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
|
||||
for (int end = start + 1; end <= value.length(); end++) {
|
||||
try {
|
||||
final Utf8String actual = (Utf8String) value.subSequence(start, end);
|
||||
assertNotNull(actual);
|
||||
assertFalse(actual.isNull());
|
||||
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
|
||||
} catch (IllegalArgumentException error) {
|
||||
final String actual = value.toUtf16();
|
||||
assertNotNull(actual);
|
||||
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -204,14 +194,33 @@ public class Utf8StringTest {
|
||||
assertFalse(actual.isNull());
|
||||
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
|
||||
} catch (IllegalArgumentException error) {
|
||||
// TODO: DANOBLE: assertions
|
||||
System.out.println(error.toString());
|
||||
final String actual = value.toUtf16();
|
||||
assertNotNull(actual);
|
||||
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
|
||||
}
|
||||
}
|
||||
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> Utf8String.NULL.subSequence(0, 0));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(-1, 0));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(0, -1));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() + 1, 1));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(1, value.length() + 1));
|
||||
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() / 2, value.length() / 2 - 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToString() {
|
||||
|
||||
assertEquals(Utf8String.NULL.toString(), "null");
|
||||
assertSame("null", Utf8String.NULL.toString());
|
||||
|
||||
assertEquals(Utf8String.EMPTY.toString(), "\"\"");
|
||||
assertSame("\"\"", Utf8String.EMPTY.toString());
|
||||
assertSame("\"\"", Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString());
|
||||
assertSame("\"\"", Utf8String.transcodeUtf16(new String("")).toString());
|
||||
|
||||
assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\"");
|
||||
assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\"");
|
||||
}
|
||||
|
||||
@Test(dataProvider = "unicodeTextDataProvider")
|
||||
|
||||
Reference in New Issue
Block a user