From 96819035bfcebf019f6ad7f58ff4f3bf691b274f Mon Sep 17 00:00:00 2001 From: David Noble Date: Thu, 3 Oct 2019 21:34:35 -0700 Subject: [PATCH] Utf8String refinements with inefficient support instead of no support for Utf8String.charAt(int) --- .../azure/data/cosmos/core/Utf8String.java | 251 +++++++++++++----- 1 file changed, 185 insertions(+), 66 deletions(-) diff --git a/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java b/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java index 61302fa..08fa7b9 100644 --- a/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java +++ b/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java @@ -21,7 +21,6 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufHolder; import io.netty.buffer.Unpooled; import io.netty.util.ByteProcessor; -import it.unimi.dsi.fastutil.ints.IntIterator; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -30,7 +29,8 @@ import java.util.NoSuchElementException; import java.util.Optional; import java.util.PrimitiveIterator; import java.util.Spliterator; -import java.util.Spliterators; +import java.util.function.Consumer; +import java.util.function.IntConsumer; import java.util.function.Supplier; import java.util.stream.IntStream; import java.util.stream.StreamSupport; @@ -113,9 +113,36 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable return this.buffer == null; } + /** + * Returns the {@code char} value at the specified index. + *

+ * An index ranges from zero to {@code length() - 1}. The first {@code char} value of the sequence is at index + * index zero, the next at index one, and so on, as for array indexing. + *

+ * If the {@code char} value specified by the index is a + * surrogate, the surrogate value is returned. + * + * @param index the index of the {@code char} value to be returned. + * @return the specified {@code char} value. + * @throws IndexOutOfBoundsException if the {@code index} argument is negative or not less than {@code length()}. + */ @Override public char charAt(final int index) { - throw new UnsupportedOperationException(lenientFormat("Utf8String.charAt(index: %s)", index)); + + if (index < 0) { + throw new IndexOutOfBoundsException(); + } + + UTF16CodeUnitIterator iterator = new UTF16CodeUnitIterator(this.buffer); + int countdown = index; + + while (iterator.hasNext()) { + if (--countdown < 0) { + return (char) iterator.nextInt(); + } + } + + throw new IndexOutOfBoundsException(); } /** @@ -132,10 +159,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable public IntStream chars() { return this == NULL || this == EMPTY ? IntStream.empty() - : StreamSupport.intStream( - () -> Spliterators.spliteratorUnknownSize(new UTF16CodeUnitIterator(this.buffer), Spliterator.ORDERED), - Spliterator.ORDERED, false - ); + : StreamSupport.intStream(new UTF16CodeUnitSpliterator(this.buffer), false); } /** @@ -152,10 +176,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable public final IntStream codePoints() { return this == NULL || this == EMPTY ? IntStream.empty() - : StreamSupport.intStream( - () -> Spliterators.spliteratorUnknownSize(new CodePointIterator(this.buffer), Spliterator.ORDERED), - Spliterator.ORDERED, false - ); + : StreamSupport.intStream(new CodePointSpliterator(this.buffer), false); } /** @@ -682,59 +703,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable // region Types - private static final class UTF16CodeUnitIterator extends UTF8CodePointGetter implements IntIterator.OfInt { - - private final ByteBuf buffer; - private int start, length; - private int lowSurrogate; - - UTF16CodeUnitIterator(final ByteBuf buffer) { - this.buffer = buffer; - this.lowSurrogate = 0; - this.start = 0; - this.length = buffer.writerIndex(); - } - - @Override - public boolean hasNext() { - return (this.lowSurrogate != 0) || (0 <= this.start && this.start < this.length); - } - - /** - * Returns the next {@code int} code point in the iteration. - * - * @return the next {@code int} code point in the iteration. - * @throws NoSuchElementException if the iteration has no more code points. - */ - @Override - public int nextInt() { - - if (!this.hasNext()) { - throw new NoSuchElementException(); - } - - if (this.lowSurrogate != 0) { - int codeUnit = this.lowSurrogate; - this.lowSurrogate = 0; - return codeUnit; - } - - final int index = this.buffer.forEachByte(this.start, this.length - this.start, this); - assert index >= 0; - this.start = index + 1; - - final int codePoint = this.codePoint(); - - if ((codePoint & 0xFFFF0000) == 0) { - return codePoint; - } - - this.lowSurrogate = Character.lowSurrogate(codePoint); - return Character.highSurrogate(codePoint); - } - } - - private static final class CodePointIterator extends UTF8CodePointGetter implements IntIterator.OfInt { + private static class CodePointIterator extends UTF8CodePointGetter implements PrimitiveIterator.OfInt { private final ByteBuf buffer; private int start, length; @@ -745,6 +714,11 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable this.length = buffer.writerIndex(); } + /** + * Returns {@code true} if there is another code point in the iteration. + * + * @return {@code true} if there is another code point in the iteration. + */ @Override public boolean hasNext() { return 0 <= this.start && this.start < this.length; @@ -759,11 +733,13 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable @Override public int nextInt() { - if (!this.hasNext()) { + final int length = this.length - this.start; + + if (length <= 0) { throw new NoSuchElementException(); } - final int index = this.buffer.forEachByte(this.start, this.length - this.start, this); + final int index = this.buffer.forEachByte(this.start, length, this); assert index >= 0; this.start = index + 1; @@ -771,6 +747,149 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable } } + private static final class CodePointSpliterator extends CodePointIterator implements Spliterator.OfInt { + + CodePointSpliterator(final ByteBuf buffer) { + super(buffer); + } + + @Override + public int characteristics() { + return Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED; + } + + @Override + public long estimateSize() { + return Long.MAX_VALUE; + } + + @Override + public void forEachRemaining(IntConsumer action) { + super.forEachRemaining(action); + } + + @Override + public void forEachRemaining(Consumer action) { + super.forEachRemaining(action); + } + + @Override + public boolean tryAdvance(IntConsumer action) { + checkNotNull(action, "expected non-null action"); + if (this.hasNext()) { + action.accept(this.nextInt()); + return true; + } + return false; + } + + @Override + public Spliterator.OfInt trySplit() { + return null; // Utf8String doesn't support parallel processing and so this method does not attempt a split + } + } + + private static class UTF16CodeUnitIterator extends UTF8CodePointGetter implements PrimitiveIterator.OfInt { + + private final ByteBuf buffer; + private int start, length; + private int lowSurrogate; + + UTF16CodeUnitIterator(final ByteBuf buffer) { + this.buffer = buffer; + this.lowSurrogate = 0; + this.start = 0; + this.length = buffer.writerIndex(); + } + + /** + * Returns {@code true} if there is another UTF-16 code unit in the iteration. + * + * @return {@code true} if there is another UTF-16 code unit in the iteration. + */ + @Override + public boolean hasNext() { + return (this.lowSurrogate != 0) || (0 <= this.start && this.start < this.length); + } + + /** + * Returns the next {@code int} UTF-16 code unit in the iteration. + * + * @return the next {@code int} UTF-16 code unit in the iteration. + * @throws NoSuchElementException if the iteration has no more UTF-16 code units. + */ + @Override + public int nextInt() { + + if (this.lowSurrogate != 0) { + int codeUnit = this.lowSurrogate; + this.lowSurrogate = 0; + return codeUnit; + } + + final int length = this.length - this.start; + + if (length <= 0) { + throw new NoSuchElementException(); + } + + final int index = this.buffer.forEachByte(this.start, length, this); + assert index >= 0; + this.start = index + 1; + + final int codePoint = this.codePoint(); + + if ((codePoint & 0xFFFF0000) == 0) { + return codePoint; + } + + this.lowSurrogate = Character.lowSurrogate(codePoint); + return Character.highSurrogate(codePoint); + } + } + + private static final class UTF16CodeUnitSpliterator extends UTF16CodeUnitIterator implements Spliterator.OfInt { + + UTF16CodeUnitSpliterator(final ByteBuf buffer) { + super(buffer); + } + + @Override + public int characteristics() { + return Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED; + } + + @Override + public long estimateSize() { + return Long.MAX_VALUE; + } + + @Override + public void forEachRemaining(IntConsumer action) { + super.forEachRemaining(action); + } + + @Override + public void forEachRemaining(Consumer action) { + super.forEachRemaining(action); + } + + @Override + public boolean tryAdvance(IntConsumer action) { + checkNotNull(action, "expected non-null action"); + if (this.hasNext()) { + action.accept(this.nextInt()); + return true; + } + return false; + } + + @Override + public Spliterator.OfInt trySplit() { + return null; // Utf8String doesn't support parallel processing and so this method does not attempt a split + } + } + static final class JsonDeserializer extends StdDeserializer { private JsonDeserializer() {