Added caching of UTF-16 string caching to Utf8String to reduce the time and space cost of Utf8String.toUtf16(). Also: addressed a number of javadoc issues.

This commit is contained in:
David Noble
2019-10-02 22:06:13 -07:00
parent da047f001b
commit 3308c544aa
4 changed files with 56 additions and 48 deletions

View File

@@ -9,10 +9,10 @@ import java.util.Objects;
* A container object which may or may not contain a non-null value * A container object which may or may not contain a non-null value
* *
* This is a value-based class and as such use of identity-sensitive operations--including reference equality * This is a value-based class and as such use of identity-sensitive operations--including reference equality
* ({@code ==}), identity hash code, or synchronization--on instances of {@Out} may have unpredictable results and * ({@code ==}), identity hash code, or synchronization--on instances of {@code Out} may have unpredictable results and
* should be avoided. * should be avoided.
* *
* @param <T> * @param <T> type of the referent.
*/ */
public final class Out<T> { public final class Out<T> {
@@ -42,11 +42,13 @@ public final class Out<T> {
} }
/** /**
* Indicates whether some other object is equal to this {@link Out} value. The other object is considered equal if: * Indicates whether some other object is equal to this {@link Out} value.
* <p>
* The other object is considered equal if:
* <ul> * <ul>
* <li>it is also an {@link Out} and; * <li>it is also an {@link Out} and;
* <li>both instances have no value present or; * <li>both instances have no value present or;
* <li>the present values are equal to each other as determined by {@link T#equals(Object)}}. * <li>the present values are equal to each other as determined by {@code T.equals(Object)}}.
* </ul> * </ul>
* *
* @param other an object to be tested for equality * @param other an object to be tested for equality

View File

@@ -12,7 +12,7 @@ import java.util.Objects;
* ({@code ==}), identity hash code, or synchronization--on instances of {@link Reference} may have unpredictable * ({@code ==}), identity hash code, or synchronization--on instances of {@link Reference} may have unpredictable
* results and should be avoided. * results and should be avoided.
* *
* @param <T> * @param <T> type of the referent.
*/ */
public final class Reference<T> { public final class Reference<T> {
@@ -45,11 +45,13 @@ public final class Reference<T> {
} }
/** /**
* Indicates whether some other object is equal to this {@link Reference} value. The other object is considered equal if:. * Indicates whether some other object is equal to this {@link Reference} value.
* <p>
* The other object is considered equal if:
* <ul> * <ul>
* <li>it is also an {@link Reference} and; * <li>it is also an {@link Reference} and;
* <li>both instances have no value present or; * <li>both instances have no value present or;
* <li>the present values are equal to each other as determined by {@link T#equals(Object)}}. * <li>the present values are equal to each other as determined by {@code T.equals(Object)}}.
* </ul> * </ul>
* *
* @param other an object to be tested for equality * @param other an object to be tested for equality

View File

@@ -53,30 +53,43 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
public static final Utf8String NULL = new Utf8String(null); public static final Utf8String NULL = new Utf8String(null);
private final ByteBuf buffer; private final ByteBuf buffer;
private final Supplier<Integer> utf16CodeUnitCount; private final Supplier<String> utf16String;
private final Supplier<Integer> utf16StringLength;
private Utf8String(@Nullable final ByteBuf buffer) { private Utf8String(@Nullable final ByteBuf buffer) {
if (buffer == null) { if (buffer == null) {
this.buffer = null; this.buffer = null;
this.utf16CodeUnitCount = Suppliers.memoize(() -> -1); this.utf16String = Suppliers.memoize(() -> null);
this.utf16StringLength = Suppliers.memoize(() -> -1);
return; return;
} }
if (buffer.writerIndex() == 0) { if (buffer.writerIndex() == 0) {
this.buffer = Unpooled.EMPTY_BUFFER; this.buffer = Unpooled.EMPTY_BUFFER;
this.utf16CodeUnitCount = Suppliers.memoize(() -> 0); this.utf16String = Suppliers.memoize(() -> "");
this.utf16StringLength = Suppliers.memoize(() -> 0);
return; return;
} }
this.buffer = buffer; this.buffer = buffer;
this.utf16CodeUnitCount = Suppliers.memoize(() -> { this.utf16String = Suppliers.memoize(() -> {
CodePointIterator iterator = new CodePointIterator(this.buffer);
StringBuilder builder = new StringBuilder(this.length());
while (iterator.hasNext()) {
builder.appendCodePoint(iterator.nextInt());
}
return builder.toString();
});
this.utf16StringLength = Suppliers.memoize(() -> {
final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(); final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter();
final int length = this.buffer.writerIndex(); final int length = this.buffer.writerIndex();
final int index = this.buffer.forEachByte(0, length, counter); final int index = this.buffer.forEachByte(0, length, counter);
assert index == -1 : lenientFormat("index: %s, length: %s", index, length); assert index == -1 : lenientFormat("index: %s, length: %s", index, length);
return counter.charCount(); return counter.charCount();
}); });
@@ -84,13 +97,17 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
/** /**
* {@code true} if the length of this instance is zero. * {@code true} if the length of this instance is zero.
*
* @return {@code true} if the length of this instance is zero.
*/ */
public final boolean isEmpty() { public final boolean isEmpty() {
return this.buffer != null && this.buffer.writerIndex() == 0; return this.buffer != null && this.buffer.writerIndex() == 0;
} }
/** /**
* {@code true} if this instance is null. * {@code true} if this instance is {@code null}.
*
* @return {@code true} if this instance is {@code null}.
*/ */
public final boolean isNull() { public final boolean isNull() {
return this.buffer == null; return this.buffer == null;
@@ -104,8 +121,8 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
/** /**
* Returns a stream of {@code int} zero-extending the {@code char} values in this {@link Utf8String}. * Returns a stream of {@code int} zero-extending the {@code char} values in this {@link Utf8String}.
* <p> * <p>
* Any char which maps to a "{@docRoot}/java/lang/Character.html#unicode">surrogate code point</a> is passed through * Any char which maps to a <a href="{@docRoot}/java/lang/Character.html#unicode">surrogate code point</a> is passed
* uninterpreted. * through uninterpreted.
* <p> * <p>
* No additional string space is allocated. * No additional string space is allocated.
* *
@@ -249,10 +266,10 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
/** /**
* Encoded length of this {@link Utf8String}. * Encoded length of this {@link Utf8String}.
* <p> * <p>
* This is the same value as would be returned by {@link String#getBytes()#utf16CodeUnitCount} with no time or space * This is the same value as would be returned by {@link String#getBytes()}{@code .length()} with no time or space
* overhead. * overhead.
* *
* @return encoded length of {@link Utf8String} * @return encoded length of this {@link Utf8String}
*/ */
public final int encodedLength() { public final int encodedLength() {
return this == NULL || this == EMPTY ? 0 : this.buffer.writerIndex(); return this == NULL || this == EMPTY ? 0 : this.buffer.writerIndex();
@@ -384,12 +401,12 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
* Returns the length of this {@link Utf8String}. * Returns the length of this {@link Utf8String}.
* <p> * <p>
* The length is the number of UTF-16 code units in this {@link Utf8String}. This is the same value as would be * The length is the number of UTF-16 code units in this {@link Utf8String}. This is the same value as would be
* returned by {@link Utf8String#toUtf16()#length()} with no time or space overhead. * returned by {@link Utf8String#toUtf16()}{@code .length()} with no space overhead.
* *
* @return the length of this {@link Utf8String}. * @return the length of this {@link Utf8String}.
*/ */
public final int length() { public final int length() {
return this.utf16CodeUnitCount.get(); return this.utf16StringLength.get();
} }
/** /**
@@ -425,7 +442,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
* @return {@code true} if and only if the reference count became {@code 0}. * @return {@code true} if and only if the reference count became {@code 0}.
*/ */
@Override @Override
public boolean release(int decrement) { public boolean release(final int decrement) {
return this == NULL || this.buffer.release(decrement); return this == NULL || this.buffer.release(decrement);
} }
@@ -436,7 +453,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
* @return the {@link Utf8String} created. * @return the {@link Utf8String} created.
*/ */
@Override @Override
public Utf8String replace(ByteBuf content) { public Utf8String replace(final ByteBuf content) {
return fromUnsafe(content); return fromUnsafe(content);
} }
@@ -449,7 +466,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
} }
@Override @Override
public Utf8String retain(int increment) { public Utf8String retain(final int increment) {
if (this != NULL) { if (this != NULL) {
this.buffer.retain(increment); this.buffer.retain(increment);
} }
@@ -486,7 +503,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
* allocation, convert this {@link Utf8String} to a {@link String} and call * allocation, convert this {@link Utf8String} to a {@link String} and call
* {@link String#subSequence}. * {@link String#subSequence}.
* @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, {@code end} is greater than * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, {@code end} is greater than
* {@link #length()}, <li>{@code start} is greater than {@code end}, or * {@link #length()}, {@code start} is greater than {@code end}, or
* {@link #isNull()} is {@code true}. * {@link #isNull()} is {@code true}.
*/ */
@Nonnull @Nonnull
@@ -546,24 +563,12 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
@Override @Override
@Nonnull @Nonnull
public String toString() { public String toString() {
if (this == NULL) { return Json.toString(this.toUtf16());
return "null";
}
if (this == EMPTY) {
return "\"\"";
}
return Json.toString(this.buffer.getCharSequence(0, this.buffer.writerIndex(), UTF_8));
} }
@Nullable @Nullable
public String toUtf16() { public String toUtf16() {
if (this == NULL) { return this.utf16String.get();
return null;
}
if (this == EMPTY) {
return "";
}
return this.buffer.getCharSequence(0, this.buffer.writerIndex(), UTF_8).toString();
} }
@Override @Override
@@ -575,7 +580,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
} }
@Override @Override
public Utf8String touch(Object hint) { public Utf8String touch(final Object hint) {
if (this != NULL) { if (this != NULL) {
this.buffer.touch(hint); this.buffer.touch(hint);
} }
@@ -603,7 +608,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
return new Utf8String(Unpooled.wrappedBuffer(string.getBytes(UTF_8))); return new Utf8String(Unpooled.wrappedBuffer(string.getBytes(UTF_8)));
} }
private static int toCodePoint(int characterEncoding) { private static int toCodePoint(final int characterEncoding) {
if ((characterEncoding & 0b11111000_00000000_00000000_00000000) == 0b11110000_00000000_00000000_00000000) { if ((characterEncoding & 0b11111000_00000000_00000000_00000000) == 0b11110000_00000000_00000000_00000000) {
@@ -773,7 +778,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
} }
@Override @Override
public Utf8String deserialize(JsonParser parser, DeserializationContext context) throws IOException { public Utf8String deserialize(final JsonParser parser, final DeserializationContext context) throws IOException {
final JsonNode node = parser.getCodec().readTree(parser); final JsonNode node = parser.getCodec().readTree(parser);
final JsonNodeType type = node.getNodeType(); final JsonNodeType type = node.getNodeType();
@@ -798,7 +803,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
} }
@Override @Override
public void serialize(Utf8String value, JsonGenerator generator, SerializerProvider provider) throws IOException { public void serialize(final Utf8String value, final JsonGenerator generator, final SerializerProvider provider) throws IOException {
generator.writeString(value.toUtf16()); generator.writeString(value.toUtf16());
} }
} }
@@ -838,7 +843,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
} }
@Override @Override
public boolean process(byte value) { public boolean process(final byte value) {
if (this.skip > 0) { if (this.skip > 0) {
this.skip--; this.skip--;
@@ -918,7 +923,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
* code point is complete, a value of {@code false} is returned. * code point is complete, a value of {@code false} is returned.
*/ */
@Override @Override
public boolean process(byte value) { public boolean process(final byte value) {
switch (this.shift) { switch (this.shift) {
@@ -1018,7 +1023,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
* of {@code true}. * of {@code true}.
*/ */
@Override @Override
public boolean process(byte value) { public boolean process(final byte value) {
switch (this.shift) { switch (this.shift) {

View File

@@ -212,12 +212,11 @@ public class Utf8StringTest {
public void testToString() { public void testToString() {
assertEquals(Utf8String.NULL.toString(), "null"); assertEquals(Utf8String.NULL.toString(), "null");
assertSame("null", Utf8String.NULL.toString());
assertEquals(Utf8String.EMPTY.toString(), "\"\""); assertEquals(Utf8String.EMPTY.toString(), "\"\"");
assertSame("\"\"", Utf8String.EMPTY.toString()); assertEquals(Utf8String.EMPTY.toString(), "\"\"");
assertSame("\"\"", Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString()); assertEquals(Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString(), "\"\"");
assertSame("\"\"", Utf8String.transcodeUtf16(new String("")).toString()); assertEquals(Utf8String.transcodeUtf16(new String("")).toString(), "\"\"");
assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\""); assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\"");
assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\""); assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\"");