mirror of
https://github.com/microsoft/HybridRow.git
synced 2026-01-26 21:03:14 +00:00
Refinements to Utf8String and Utf8StringTest
This commit is contained in:
@@ -78,7 +78,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
final int index = this.buffer.forEachByte(0, length, counter);
|
final int index = this.buffer.forEachByte(0, length, counter);
|
||||||
|
|
||||||
assert index == -1 : lenientFormat("index: %s, length: %s", index, length);
|
assert index == -1 : lenientFormat("index: %s, length: %s", index, length);
|
||||||
return counter.value();
|
return counter.charCount();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -515,10 +515,14 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
i = 0;
|
i = 0;
|
||||||
n = encodedLength;
|
n = encodedLength;
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(start);
|
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(start);
|
||||||
i = this.buffer.forEachByte(0, encodedLength, counter);
|
i = this.buffer.forEachByte(0, encodedLength, counter);
|
||||||
checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter);
|
|
||||||
n = encodedLength - i;
|
n = encodedLength - i;
|
||||||
|
|
||||||
|
checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s",
|
||||||
|
start, end, counter
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
final int j;
|
final int j;
|
||||||
@@ -526,12 +530,16 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
if (end == length) {
|
if (end == length) {
|
||||||
j = encodedLength;
|
j = encodedLength;
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(end - start);
|
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(end - start);
|
||||||
j = this.buffer.forEachByte(i, n, counter);
|
j = this.buffer.forEachByte(i, n, counter);
|
||||||
checkArgument(counter.index == counter.end, "start: %s, end: %s, counter: %s", start, end, counter);
|
|
||||||
assert j >= 0;
|
checkArgument(counter.charIndex() == counter.charLimit(), "start: %s, end: %s, counter: %s",
|
||||||
|
start, end, counter
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert i >= 0 && j >= 0 : lenientFormat("i: %s, j: %s", i, j);
|
||||||
return fromUnsafe(this.buffer.slice(i, j - i));
|
return fromUnsafe(this.buffer.slice(i, j - i));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -809,13 +817,13 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
private static final class UTF16CodeUnitCounter implements ByteProcessor {
|
private static final class UTF16CodeUnitCounter implements ByteProcessor {
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
private final int end;
|
private final int charLimit;
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
private int count = 0;
|
private int charCount = 0;
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
private int index = 0;
|
private int charIndex = 0;
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
private int skip = 0;
|
private int skip = 0;
|
||||||
@@ -824,9 +832,9 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
this(Integer.MAX_VALUE);
|
this(Integer.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public UTF16CodeUnitCounter(int end) {
|
public UTF16CodeUnitCounter(int charLimit) {
|
||||||
checkArgument(end >= 0);
|
checkArgument(charLimit >= 0);
|
||||||
this.end = end;
|
this.charLimit = charLimit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -838,35 +846,43 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
}
|
}
|
||||||
|
|
||||||
final int leadingByte = value & 0xFF;
|
final int leadingByte = value & 0xFF;
|
||||||
this.index = this.count;
|
this.charIndex = this.charCount;
|
||||||
|
|
||||||
if (leadingByte < 0x7F) {
|
if (leadingByte < 0x7F) {
|
||||||
// UTF-8-1 = 0x00-7F
|
// UTF-8-1 = 0x00-7F
|
||||||
this.skip = 0;
|
this.skip = 0;
|
||||||
this.count++;
|
this.charCount++;
|
||||||
} else if (0xC2 <= leadingByte && leadingByte <= 0xDF) {
|
} else if (0xC2 <= leadingByte && leadingByte <= 0xDF) {
|
||||||
// UTF8-8-2 = 0xC2-DF UTF8-tail
|
// UTF8-8-2 = 0xC2-DF UTF8-tail
|
||||||
this.skip = 1;
|
this.skip = 1;
|
||||||
this.count++;
|
this.charCount++;
|
||||||
} else if (0xE0 <= leadingByte && leadingByte <= 0xEF) {
|
} else if (0xE0 <= leadingByte && leadingByte <= 0xEF) {
|
||||||
// UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2
|
// UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2
|
||||||
// (UTF8-tail)
|
// (UTF8-tail)
|
||||||
this.skip = 2;
|
this.skip = 2;
|
||||||
this.count++;
|
this.charCount++;
|
||||||
} else if (0xF0 <= leadingByte && leadingByte <= 0xF4) {
|
} else if (0xF0 <= leadingByte && leadingByte <= 0xF4) {
|
||||||
// UTF8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail )
|
// UTF8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail )
|
||||||
this.skip = 3;
|
this.skip = 3;
|
||||||
this.count += 2;
|
this.charCount += 2;
|
||||||
} else {
|
} else {
|
||||||
this.skip = 0;
|
this.skip = 0;
|
||||||
this.count++;
|
this.charCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return this.count <= this.end;
|
return this.charCount <= this.charLimit;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int value() {
|
public int charCount() {
|
||||||
return this.count;
|
return this.charCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int charIndex() {
|
||||||
|
return this.charIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int charLimit() {
|
||||||
|
return this.charLimit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -171,29 +171,19 @@ public class Utf8StringTest {
|
|||||||
|
|
||||||
Utf8String value = Utf8String.fromUnsafe(item.byteBuf());
|
Utf8String value = Utf8String.fromUnsafe(item.byteBuf());
|
||||||
|
|
||||||
for (int start = 0, end = start + 1; end <= value.length(); end++) {
|
for (int start : new int[] {0, 1, 2 }) {
|
||||||
try {
|
|
||||||
final Utf8String actual = (Utf8String)value.subSequence(start, end);
|
|
||||||
assertNotNull(actual);
|
|
||||||
assertFalse(actual.isNull());
|
|
||||||
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
|
|
||||||
} catch (IllegalArgumentException error) {
|
|
||||||
final String actual = value.toUtf16();
|
|
||||||
assertNotNull(actual);
|
|
||||||
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int start = 1, end = start + 1; end <= value.length(); end++) {
|
for (int end = start + 1; end <= value.length(); end++) {
|
||||||
try {
|
try {
|
||||||
final Utf8String actual = (Utf8String)value.subSequence(start, end);
|
final Utf8String actual = (Utf8String) value.subSequence(start, end);
|
||||||
assertNotNull(actual);
|
assertNotNull(actual);
|
||||||
assertFalse(actual.isNull());
|
assertFalse(actual.isNull());
|
||||||
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
|
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
|
||||||
} catch (IllegalArgumentException error) {
|
} catch (IllegalArgumentException error) {
|
||||||
final String actual = value.toUtf16();
|
final String actual = value.toUtf16();
|
||||||
assertNotNull(actual);
|
assertNotNull(actual);
|
||||||
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
|
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -204,14 +194,33 @@ public class Utf8StringTest {
|
|||||||
assertFalse(actual.isNull());
|
assertFalse(actual.isNull());
|
||||||
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
|
assertEquals(actual.toUtf16(), item.value.subSequence(start, end));
|
||||||
} catch (IllegalArgumentException error) {
|
} catch (IllegalArgumentException error) {
|
||||||
// TODO: DANOBLE: assertions
|
final String actual = value.toUtf16();
|
||||||
System.out.println(error.toString());
|
assertNotNull(actual);
|
||||||
|
assertTrue(Character.isSurrogate(actual.charAt(start)) || Character.isSurrogate(actual.charAt(end)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assertThrows(IndexOutOfBoundsException.class, () -> Utf8String.NULL.subSequence(0, 0));
|
||||||
|
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(-1, 0));
|
||||||
|
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(0, -1));
|
||||||
|
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() + 1, 1));
|
||||||
|
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(1, value.length() + 1));
|
||||||
|
assertThrows(IndexOutOfBoundsException.class, () -> value.subSequence(value.length() / 2, value.length() / 2 - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testToString() {
|
public void testToString() {
|
||||||
|
|
||||||
|
assertEquals(Utf8String.NULL.toString(), "null");
|
||||||
|
assertSame("null", Utf8String.NULL.toString());
|
||||||
|
|
||||||
|
assertEquals(Utf8String.EMPTY.toString(), "\"\"");
|
||||||
|
assertSame("\"\"", Utf8String.EMPTY.toString());
|
||||||
|
assertSame("\"\"", Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString());
|
||||||
|
assertSame("\"\"", Utf8String.transcodeUtf16(new String("")).toString());
|
||||||
|
|
||||||
|
assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\"");
|
||||||
|
assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "unicodeTextDataProvider")
|
@Test(dataProvider = "unicodeTextDataProvider")
|
||||||
|
|||||||
Reference in New Issue
Block a user