mirror of
https://github.com/microsoft/HybridRow.git
synced 2026-01-25 04:13:19 +00:00
Added caching of UTF-16 string caching to Utf8String to reduce the time and space cost of Utf8String.toUtf16(). Also: addressed a number of javadoc issues.
This commit is contained in:
@@ -9,10 +9,10 @@ import java.util.Objects;
|
|||||||
* A container object which may or may not contain a non-null value
|
* A container object which may or may not contain a non-null value
|
||||||
*
|
*
|
||||||
* This is a value-based class and as such use of identity-sensitive operations--including reference equality
|
* This is a value-based class and as such use of identity-sensitive operations--including reference equality
|
||||||
* ({@code ==}), identity hash code, or synchronization--on instances of {@Out} may have unpredictable results and
|
* ({@code ==}), identity hash code, or synchronization--on instances of {@code Out} may have unpredictable results and
|
||||||
* should be avoided.
|
* should be avoided.
|
||||||
*
|
*
|
||||||
* @param <T>
|
* @param <T> type of the referent.
|
||||||
*/
|
*/
|
||||||
public final class Out<T> {
|
public final class Out<T> {
|
||||||
|
|
||||||
@@ -42,11 +42,13 @@ public final class Out<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Indicates whether some other object is equal to this {@link Out} value. The other object is considered equal if:
|
* Indicates whether some other object is equal to this {@link Out} value.
|
||||||
|
* <p>
|
||||||
|
* The other object is considered equal if:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>it is also an {@link Out} and;
|
* <li>it is also an {@link Out} and;
|
||||||
* <li>both instances have no value present or;
|
* <li>both instances have no value present or;
|
||||||
* <li>the present values are equal to each other as determined by {@link T#equals(Object)}}.
|
* <li>the present values are equal to each other as determined by {@code T.equals(Object)}}.
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* @param other an object to be tested for equality
|
* @param other an object to be tested for equality
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ import java.util.Objects;
|
|||||||
* ({@code ==}), identity hash code, or synchronization--on instances of {@link Reference} may have unpredictable
|
* ({@code ==}), identity hash code, or synchronization--on instances of {@link Reference} may have unpredictable
|
||||||
* results and should be avoided.
|
* results and should be avoided.
|
||||||
*
|
*
|
||||||
* @param <T>
|
* @param <T> type of the referent.
|
||||||
*/
|
*/
|
||||||
public final class Reference<T> {
|
public final class Reference<T> {
|
||||||
|
|
||||||
@@ -45,11 +45,13 @@ public final class Reference<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Indicates whether some other object is equal to this {@link Reference} value. The other object is considered equal if:.
|
* Indicates whether some other object is equal to this {@link Reference} value.
|
||||||
|
* <p>
|
||||||
|
* The other object is considered equal if:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>it is also an {@link Reference} and;
|
* <li>it is also an {@link Reference} and;
|
||||||
* <li>both instances have no value present or;
|
* <li>both instances have no value present or;
|
||||||
* <li>the present values are equal to each other as determined by {@link T#equals(Object)}}.
|
* <li>the present values are equal to each other as determined by {@code T.equals(Object)}}.
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* @param other an object to be tested for equality
|
* @param other an object to be tested for equality
|
||||||
|
|||||||
@@ -53,30 +53,43 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
public static final Utf8String NULL = new Utf8String(null);
|
public static final Utf8String NULL = new Utf8String(null);
|
||||||
|
|
||||||
private final ByteBuf buffer;
|
private final ByteBuf buffer;
|
||||||
private final Supplier<Integer> utf16CodeUnitCount;
|
private final Supplier<String> utf16String;
|
||||||
|
private final Supplier<Integer> utf16StringLength;
|
||||||
|
|
||||||
private Utf8String(@Nullable final ByteBuf buffer) {
|
private Utf8String(@Nullable final ByteBuf buffer) {
|
||||||
|
|
||||||
if (buffer == null) {
|
if (buffer == null) {
|
||||||
this.buffer = null;
|
this.buffer = null;
|
||||||
this.utf16CodeUnitCount = Suppliers.memoize(() -> -1);
|
this.utf16String = Suppliers.memoize(() -> null);
|
||||||
|
this.utf16StringLength = Suppliers.memoize(() -> -1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buffer.writerIndex() == 0) {
|
if (buffer.writerIndex() == 0) {
|
||||||
this.buffer = Unpooled.EMPTY_BUFFER;
|
this.buffer = Unpooled.EMPTY_BUFFER;
|
||||||
this.utf16CodeUnitCount = Suppliers.memoize(() -> 0);
|
this.utf16String = Suppliers.memoize(() -> "");
|
||||||
|
this.utf16StringLength = Suppliers.memoize(() -> 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.buffer = buffer;
|
this.buffer = buffer;
|
||||||
|
|
||||||
this.utf16CodeUnitCount = Suppliers.memoize(() -> {
|
this.utf16String = Suppliers.memoize(() -> {
|
||||||
|
|
||||||
|
CodePointIterator iterator = new CodePointIterator(this.buffer);
|
||||||
|
StringBuilder builder = new StringBuilder(this.length());
|
||||||
|
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
builder.appendCodePoint(iterator.nextInt());
|
||||||
|
}
|
||||||
|
|
||||||
|
return builder.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
this.utf16StringLength = Suppliers.memoize(() -> {
|
||||||
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter();
|
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter();
|
||||||
final int length = this.buffer.writerIndex();
|
final int length = this.buffer.writerIndex();
|
||||||
final int index = this.buffer.forEachByte(0, length, counter);
|
final int index = this.buffer.forEachByte(0, length, counter);
|
||||||
|
|
||||||
assert index == -1 : lenientFormat("index: %s, length: %s", index, length);
|
assert index == -1 : lenientFormat("index: %s, length: %s", index, length);
|
||||||
return counter.charCount();
|
return counter.charCount();
|
||||||
});
|
});
|
||||||
@@ -84,13 +97,17 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* {@code true} if the length of this instance is zero.
|
* {@code true} if the length of this instance is zero.
|
||||||
|
*
|
||||||
|
* @return {@code true} if the length of this instance is zero.
|
||||||
*/
|
*/
|
||||||
public final boolean isEmpty() {
|
public final boolean isEmpty() {
|
||||||
return this.buffer != null && this.buffer.writerIndex() == 0;
|
return this.buffer != null && this.buffer.writerIndex() == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@code true} if this instance is null.
|
* {@code true} if this instance is {@code null}.
|
||||||
|
*
|
||||||
|
* @return {@code true} if this instance is {@code null}.
|
||||||
*/
|
*/
|
||||||
public final boolean isNull() {
|
public final boolean isNull() {
|
||||||
return this.buffer == null;
|
return this.buffer == null;
|
||||||
@@ -104,8 +121,8 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
/**
|
/**
|
||||||
* Returns a stream of {@code int} zero-extending the {@code char} values in this {@link Utf8String}.
|
* Returns a stream of {@code int} zero-extending the {@code char} values in this {@link Utf8String}.
|
||||||
* <p>
|
* <p>
|
||||||
* Any char which maps to a "{@docRoot}/java/lang/Character.html#unicode">surrogate code point</a> is passed through
|
* Any char which maps to a <a href="{@docRoot}/java/lang/Character.html#unicode">surrogate code point</a> is passed
|
||||||
* uninterpreted.
|
* through uninterpreted.
|
||||||
* <p>
|
* <p>
|
||||||
* No additional string space is allocated.
|
* No additional string space is allocated.
|
||||||
*
|
*
|
||||||
@@ -249,10 +266,10 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
/**
|
/**
|
||||||
* Encoded length of this {@link Utf8String}.
|
* Encoded length of this {@link Utf8String}.
|
||||||
* <p>
|
* <p>
|
||||||
* This is the same value as would be returned by {@link String#getBytes()#utf16CodeUnitCount} with no time or space
|
* This is the same value as would be returned by {@link String#getBytes()}{@code .length()} with no time or space
|
||||||
* overhead.
|
* overhead.
|
||||||
*
|
*
|
||||||
* @return encoded length of {@link Utf8String}
|
* @return encoded length of this {@link Utf8String}
|
||||||
*/
|
*/
|
||||||
public final int encodedLength() {
|
public final int encodedLength() {
|
||||||
return this == NULL || this == EMPTY ? 0 : this.buffer.writerIndex();
|
return this == NULL || this == EMPTY ? 0 : this.buffer.writerIndex();
|
||||||
@@ -384,12 +401,12 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
* Returns the length of this {@link Utf8String}.
|
* Returns the length of this {@link Utf8String}.
|
||||||
* <p>
|
* <p>
|
||||||
* The length is the number of UTF-16 code units in this {@link Utf8String}. This is the same value as would be
|
* The length is the number of UTF-16 code units in this {@link Utf8String}. This is the same value as would be
|
||||||
* returned by {@link Utf8String#toUtf16()#length()} with no time or space overhead.
|
* returned by {@link Utf8String#toUtf16()}{@code .length()} with no space overhead.
|
||||||
*
|
*
|
||||||
* @return the length of this {@link Utf8String}.
|
* @return the length of this {@link Utf8String}.
|
||||||
*/
|
*/
|
||||||
public final int length() {
|
public final int length() {
|
||||||
return this.utf16CodeUnitCount.get();
|
return this.utf16StringLength.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -425,7 +442,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
* @return {@code true} if and only if the reference count became {@code 0}.
|
* @return {@code true} if and only if the reference count became {@code 0}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean release(int decrement) {
|
public boolean release(final int decrement) {
|
||||||
return this == NULL || this.buffer.release(decrement);
|
return this == NULL || this.buffer.release(decrement);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -436,7 +453,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
* @return the {@link Utf8String} created.
|
* @return the {@link Utf8String} created.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Utf8String replace(ByteBuf content) {
|
public Utf8String replace(final ByteBuf content) {
|
||||||
return fromUnsafe(content);
|
return fromUnsafe(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -449,7 +466,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Utf8String retain(int increment) {
|
public Utf8String retain(final int increment) {
|
||||||
if (this != NULL) {
|
if (this != NULL) {
|
||||||
this.buffer.retain(increment);
|
this.buffer.retain(increment);
|
||||||
}
|
}
|
||||||
@@ -486,7 +503,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
* allocation, convert this {@link Utf8String} to a {@link String} and call
|
* allocation, convert this {@link Utf8String} to a {@link String} and call
|
||||||
* {@link String#subSequence}.
|
* {@link String#subSequence}.
|
||||||
* @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, {@code end} is greater than
|
* @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, {@code end} is greater than
|
||||||
* {@link #length()}, <li>{@code start} is greater than {@code end}, or
|
* {@link #length()}, {@code start} is greater than {@code end}, or
|
||||||
* {@link #isNull()} is {@code true}.
|
* {@link #isNull()} is {@code true}.
|
||||||
*/
|
*/
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@@ -546,24 +563,12 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
@Override
|
@Override
|
||||||
@Nonnull
|
@Nonnull
|
||||||
public String toString() {
|
public String toString() {
|
||||||
if (this == NULL) {
|
return Json.toString(this.toUtf16());
|
||||||
return "null";
|
|
||||||
}
|
|
||||||
if (this == EMPTY) {
|
|
||||||
return "\"\"";
|
|
||||||
}
|
|
||||||
return Json.toString(this.buffer.getCharSequence(0, this.buffer.writerIndex(), UTF_8));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
public String toUtf16() {
|
public String toUtf16() {
|
||||||
if (this == NULL) {
|
return this.utf16String.get();
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (this == EMPTY) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
return this.buffer.getCharSequence(0, this.buffer.writerIndex(), UTF_8).toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -575,7 +580,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Utf8String touch(Object hint) {
|
public Utf8String touch(final Object hint) {
|
||||||
if (this != NULL) {
|
if (this != NULL) {
|
||||||
this.buffer.touch(hint);
|
this.buffer.touch(hint);
|
||||||
}
|
}
|
||||||
@@ -603,7 +608,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
return new Utf8String(Unpooled.wrappedBuffer(string.getBytes(UTF_8)));
|
return new Utf8String(Unpooled.wrappedBuffer(string.getBytes(UTF_8)));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int toCodePoint(int characterEncoding) {
|
private static int toCodePoint(final int characterEncoding) {
|
||||||
|
|
||||||
if ((characterEncoding & 0b11111000_00000000_00000000_00000000) == 0b11110000_00000000_00000000_00000000) {
|
if ((characterEncoding & 0b11111000_00000000_00000000_00000000) == 0b11110000_00000000_00000000_00000000) {
|
||||||
|
|
||||||
@@ -773,7 +778,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Utf8String deserialize(JsonParser parser, DeserializationContext context) throws IOException {
|
public Utf8String deserialize(final JsonParser parser, final DeserializationContext context) throws IOException {
|
||||||
|
|
||||||
final JsonNode node = parser.getCodec().readTree(parser);
|
final JsonNode node = parser.getCodec().readTree(parser);
|
||||||
final JsonNodeType type = node.getNodeType();
|
final JsonNodeType type = node.getNodeType();
|
||||||
@@ -798,7 +803,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void serialize(Utf8String value, JsonGenerator generator, SerializerProvider provider) throws IOException {
|
public void serialize(final Utf8String value, final JsonGenerator generator, final SerializerProvider provider) throws IOException {
|
||||||
generator.writeString(value.toUtf16());
|
generator.writeString(value.toUtf16());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -838,7 +843,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean process(byte value) {
|
public boolean process(final byte value) {
|
||||||
|
|
||||||
if (this.skip > 0) {
|
if (this.skip > 0) {
|
||||||
this.skip--;
|
this.skip--;
|
||||||
@@ -918,7 +923,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
* code point is complete, a value of {@code false} is returned.
|
* code point is complete, a value of {@code false} is returned.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean process(byte value) {
|
public boolean process(final byte value) {
|
||||||
|
|
||||||
switch (this.shift) {
|
switch (this.shift) {
|
||||||
|
|
||||||
@@ -1018,7 +1023,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
* of {@code true}.
|
* of {@code true}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean process(byte value) {
|
public boolean process(final byte value) {
|
||||||
|
|
||||||
switch (this.shift) {
|
switch (this.shift) {
|
||||||
|
|
||||||
|
|||||||
@@ -212,12 +212,11 @@ public class Utf8StringTest {
|
|||||||
public void testToString() {
|
public void testToString() {
|
||||||
|
|
||||||
assertEquals(Utf8String.NULL.toString(), "null");
|
assertEquals(Utf8String.NULL.toString(), "null");
|
||||||
assertSame("null", Utf8String.NULL.toString());
|
|
||||||
|
|
||||||
assertEquals(Utf8String.EMPTY.toString(), "\"\"");
|
assertEquals(Utf8String.EMPTY.toString(), "\"\"");
|
||||||
assertSame("\"\"", Utf8String.EMPTY.toString());
|
assertEquals(Utf8String.EMPTY.toString(), "\"\"");
|
||||||
assertSame("\"\"", Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString());
|
assertEquals(Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString(), "\"\"");
|
||||||
assertSame("\"\"", Utf8String.transcodeUtf16(new String("")).toString());
|
assertEquals(Utf8String.transcodeUtf16(new String("")).toString(), "\"\"");
|
||||||
|
|
||||||
assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\"");
|
assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\"");
|
||||||
assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\"");
|
assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\"");
|
||||||
|
|||||||
Reference in New Issue
Block a user