From 3308c544aa223fefea6059464f16fe2f75b805ea Mon Sep 17 00:00:00 2001 From: David Noble Date: Wed, 2 Oct 2019 22:06:13 -0700 Subject: [PATCH] Added caching of UTF-16 string caching to Utf8String to reduce the time and space cost of Utf8String.toUtf16(). Also: addressed a number of javadoc issues. --- .../java/com/azure/data/cosmos/core/Out.java | 10 ++- .../com/azure/data/cosmos/core/Reference.java | 8 +- .../azure/data/cosmos/core/Utf8String.java | 79 ++++++++++--------- .../data/cosmos/core/Utf8StringTest.java | 7 +- 4 files changed, 56 insertions(+), 48 deletions(-) diff --git a/java/src/main/java/com/azure/data/cosmos/core/Out.java b/java/src/main/java/com/azure/data/cosmos/core/Out.java index ecca988..676786b 100644 --- a/java/src/main/java/com/azure/data/cosmos/core/Out.java +++ b/java/src/main/java/com/azure/data/cosmos/core/Out.java @@ -9,10 +9,10 @@ import java.util.Objects; * A container object which may or may not contain a non-null value * * This is a value-based class and as such use of identity-sensitive operations--including reference equality - * ({@code ==}), identity hash code, or synchronization--on instances of {@Out} may have unpredictable results and + * ({@code ==}), identity hash code, or synchronization--on instances of {@code Out} may have unpredictable results and * should be avoided. * - * @param + * @param type of the referent. */ public final class Out { @@ -42,11 +42,13 @@ public final class Out { } /** - * Indicates whether some other object is equal to this {@link Out} value. The other object is considered equal if: + * Indicates whether some other object is equal to this {@link Out} value. + *

+ * The other object is considered equal if: *

    *
  • it is also an {@link Out} and; *
  • both instances have no value present or; - *
  • the present values are equal to each other as determined by {@link T#equals(Object)}}. + *
  • the present values are equal to each other as determined by {@code T.equals(Object)}}. *
* * @param other an object to be tested for equality diff --git a/java/src/main/java/com/azure/data/cosmos/core/Reference.java b/java/src/main/java/com/azure/data/cosmos/core/Reference.java index 4bdc68c..cede38a 100644 --- a/java/src/main/java/com/azure/data/cosmos/core/Reference.java +++ b/java/src/main/java/com/azure/data/cosmos/core/Reference.java @@ -12,7 +12,7 @@ import java.util.Objects; * ({@code ==}), identity hash code, or synchronization--on instances of {@link Reference} may have unpredictable * results and should be avoided. * - * @param + * @param type of the referent. */ public final class Reference { @@ -45,11 +45,13 @@ public final class Reference { } /** - * Indicates whether some other object is equal to this {@link Reference} value. The other object is considered equal if:. + * Indicates whether some other object is equal to this {@link Reference} value. + *

+ * The other object is considered equal if: *

    *
  • it is also an {@link Reference} and; *
  • both instances have no value present or; - *
  • the present values are equal to each other as determined by {@link T#equals(Object)}}. + *
  • the present values are equal to each other as determined by {@code T.equals(Object)}}. *
* * @param other an object to be tested for equality diff --git a/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java b/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java index 059f180..61302fa 100644 --- a/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java +++ b/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java @@ -53,30 +53,43 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable public static final Utf8String NULL = new Utf8String(null); private final ByteBuf buffer; - private final Supplier utf16CodeUnitCount; + private final Supplier utf16String; + private final Supplier utf16StringLength; private Utf8String(@Nullable final ByteBuf buffer) { if (buffer == null) { this.buffer = null; - this.utf16CodeUnitCount = Suppliers.memoize(() -> -1); + this.utf16String = Suppliers.memoize(() -> null); + this.utf16StringLength = Suppliers.memoize(() -> -1); return; } if (buffer.writerIndex() == 0) { this.buffer = Unpooled.EMPTY_BUFFER; - this.utf16CodeUnitCount = Suppliers.memoize(() -> 0); + this.utf16String = Suppliers.memoize(() -> ""); + this.utf16StringLength = Suppliers.memoize(() -> 0); return; } this.buffer = buffer; - this.utf16CodeUnitCount = Suppliers.memoize(() -> { + this.utf16String = Suppliers.memoize(() -> { + CodePointIterator iterator = new CodePointIterator(this.buffer); + StringBuilder builder = new StringBuilder(this.length()); + + while (iterator.hasNext()) { + builder.appendCodePoint(iterator.nextInt()); + } + + return builder.toString(); + }); + + this.utf16StringLength = Suppliers.memoize(() -> { final UTF16CodeUnitCounter counter = new UTF16CodeUnitCounter(); final int length = this.buffer.writerIndex(); final int index = this.buffer.forEachByte(0, length, counter); - assert index == -1 : lenientFormat("index: %s, length: %s", index, length); return counter.charCount(); }); @@ -84,13 +97,17 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable /** * {@code true} if the length of this instance is zero. + * + * @return {@code true} if the length of this instance is zero. */ public final boolean isEmpty() { return this.buffer != null && this.buffer.writerIndex() == 0; } /** - * {@code true} if this instance is null. + * {@code true} if this instance is {@code null}. + * + * @return {@code true} if this instance is {@code null}. */ public final boolean isNull() { return this.buffer == null; @@ -104,8 +121,8 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable /** * Returns a stream of {@code int} zero-extending the {@code char} values in this {@link Utf8String}. *

- * Any char which maps to a "{@docRoot}/java/lang/Character.html#unicode">surrogate code point is passed through - * uninterpreted. + * Any char which maps to a surrogate code point is passed + * through uninterpreted. *

* No additional string space is allocated. * @@ -249,10 +266,10 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable /** * Encoded length of this {@link Utf8String}. *

- * This is the same value as would be returned by {@link String#getBytes()#utf16CodeUnitCount} with no time or space + * This is the same value as would be returned by {@link String#getBytes()}{@code .length()} with no time or space * overhead. * - * @return encoded length of {@link Utf8String} + * @return encoded length of this {@link Utf8String} */ public final int encodedLength() { return this == NULL || this == EMPTY ? 0 : this.buffer.writerIndex(); @@ -384,12 +401,12 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable * Returns the length of this {@link Utf8String}. *

* The length is the number of UTF-16 code units in this {@link Utf8String}. This is the same value as would be - * returned by {@link Utf8String#toUtf16()#length()} with no time or space overhead. + * returned by {@link Utf8String#toUtf16()}{@code .length()} with no space overhead. * * @return the length of this {@link Utf8String}. */ public final int length() { - return this.utf16CodeUnitCount.get(); + return this.utf16StringLength.get(); } /** @@ -425,7 +442,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable * @return {@code true} if and only if the reference count became {@code 0}. */ @Override - public boolean release(int decrement) { + public boolean release(final int decrement) { return this == NULL || this.buffer.release(decrement); } @@ -436,7 +453,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable * @return the {@link Utf8String} created. */ @Override - public Utf8String replace(ByteBuf content) { + public Utf8String replace(final ByteBuf content) { return fromUnsafe(content); } @@ -449,7 +466,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable } @Override - public Utf8String retain(int increment) { + public Utf8String retain(final int increment) { if (this != NULL) { this.buffer.retain(increment); } @@ -486,7 +503,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable * allocation, convert this {@link Utf8String} to a {@link String} and call * {@link String#subSequence}. * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, {@code end} is greater than - * {@link #length()},

  • {@code start} is greater than {@code end}, or + * {@link #length()}, {@code start} is greater than {@code end}, or * {@link #isNull()} is {@code true}. */ @Nonnull @@ -546,24 +563,12 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable @Override @Nonnull public String toString() { - if (this == NULL) { - return "null"; - } - if (this == EMPTY) { - return "\"\""; - } - return Json.toString(this.buffer.getCharSequence(0, this.buffer.writerIndex(), UTF_8)); + return Json.toString(this.toUtf16()); } @Nullable public String toUtf16() { - if (this == NULL) { - return null; - } - if (this == EMPTY) { - return ""; - } - return this.buffer.getCharSequence(0, this.buffer.writerIndex(), UTF_8).toString(); + return this.utf16String.get(); } @Override @@ -575,7 +580,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable } @Override - public Utf8String touch(Object hint) { + public Utf8String touch(final Object hint) { if (this != NULL) { this.buffer.touch(hint); } @@ -603,7 +608,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable return new Utf8String(Unpooled.wrappedBuffer(string.getBytes(UTF_8))); } - private static int toCodePoint(int characterEncoding) { + private static int toCodePoint(final int characterEncoding) { if ((characterEncoding & 0b11111000_00000000_00000000_00000000) == 0b11110000_00000000_00000000_00000000) { @@ -773,7 +778,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable } @Override - public Utf8String deserialize(JsonParser parser, DeserializationContext context) throws IOException { + public Utf8String deserialize(final JsonParser parser, final DeserializationContext context) throws IOException { final JsonNode node = parser.getCodec().readTree(parser); final JsonNodeType type = node.getNodeType(); @@ -798,7 +803,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable } @Override - public void serialize(Utf8String value, JsonGenerator generator, SerializerProvider provider) throws IOException { + public void serialize(final Utf8String value, final JsonGenerator generator, final SerializerProvider provider) throws IOException { generator.writeString(value.toUtf16()); } } @@ -838,7 +843,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable } @Override - public boolean process(byte value) { + public boolean process(final byte value) { if (this.skip > 0) { this.skip--; @@ -918,7 +923,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable * code point is complete, a value of {@code false} is returned. */ @Override - public boolean process(byte value) { + public boolean process(final byte value) { switch (this.shift) { @@ -1018,7 +1023,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable * of {@code true}. */ @Override - public boolean process(byte value) { + public boolean process(final byte value) { switch (this.shift) { diff --git a/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java b/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java index 84eba1e..2f66089 100644 --- a/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java +++ b/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java @@ -212,12 +212,11 @@ public class Utf8StringTest { public void testToString() { assertEquals(Utf8String.NULL.toString(), "null"); - assertSame("null", Utf8String.NULL.toString()); assertEquals(Utf8String.EMPTY.toString(), "\"\""); - assertSame("\"\"", Utf8String.EMPTY.toString()); - assertSame("\"\"", Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString()); - assertSame("\"\"", Utf8String.transcodeUtf16(new String("")).toString()); + assertEquals(Utf8String.EMPTY.toString(), "\"\""); + assertEquals(Utf8String.fromUnsafe(Unpooled.EMPTY_BUFFER).toString(), "\"\""); + assertEquals(Utf8String.transcodeUtf16(new String("")).toString(), "\"\""); assertEquals(Utf8String.transcodeUtf16("Hello World!").toString(), "\"Hello World!\""); assertEquals(Utf8String.transcodeUtf16("\"Hello World!\"").toString(), "\"\\\"Hello World!\\\"\"");