Utf8String refinements with inefficient support instead of no support for Utf8String.charAt(int)

This commit is contained in:
David Noble
2019-10-03 21:34:35 -07:00
parent e79884edae
commit 96819035bf

View File

@@ -21,7 +21,6 @@ import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufHolder;
import io.netty.buffer.Unpooled;
import io.netty.util.ByteProcessor;
import it.unimi.dsi.fastutil.ints.IntIterator;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@@ -30,7 +29,8 @@ import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.PrimitiveIterator;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.function.Consumer;
import java.util.function.IntConsumer;
import java.util.function.Supplier;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;
@@ -113,9 +113,36 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
return this.buffer == null;
}
/**
* Returns the {@code char} value at the specified index.
* <p>
* An index ranges from zero to {@code length() - 1}. The first {@code char} value of the sequence is at index
* index zero, the next at index one, and so on, as for array indexing.
* <p>
* If the {@code char} value specified by the index is a <a href="{@docRoot}/java/lang/Character.html#unicode">
* surrogate</a>, the surrogate value is returned.
*
* @param index the index of the {@code char} value to be returned.
* @return the specified {@code char} value.
* @throws IndexOutOfBoundsException if the {@code index} argument is negative or not less than {@code length()}.
*/
@Override
public char charAt(final int index) {
throw new UnsupportedOperationException(lenientFormat("Utf8String.charAt(index: %s)", index));
if (index < 0) {
throw new IndexOutOfBoundsException();
}
UTF16CodeUnitIterator iterator = new UTF16CodeUnitIterator(this.buffer);
int countdown = index;
while (iterator.hasNext()) {
if (--countdown < 0) {
return (char) iterator.nextInt();
}
}
throw new IndexOutOfBoundsException();
}
/**
@@ -132,10 +159,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
public IntStream chars() {
return this == NULL || this == EMPTY
? IntStream.empty()
: StreamSupport.intStream(
() -> Spliterators.spliteratorUnknownSize(new UTF16CodeUnitIterator(this.buffer), Spliterator.ORDERED),
Spliterator.ORDERED, false
);
: StreamSupport.intStream(new UTF16CodeUnitSpliterator(this.buffer), false);
}
/**
@@ -152,10 +176,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
public final IntStream codePoints() {
return this == NULL || this == EMPTY
? IntStream.empty()
: StreamSupport.intStream(
() -> Spliterators.spliteratorUnknownSize(new CodePointIterator(this.buffer), Spliterator.ORDERED),
Spliterator.ORDERED, false
);
: StreamSupport.intStream(new CodePointSpliterator(this.buffer), false);
}
/**
@@ -682,59 +703,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
// region Types
private static final class UTF16CodeUnitIterator extends UTF8CodePointGetter implements IntIterator.OfInt {
private final ByteBuf buffer;
private int start, length;
private int lowSurrogate;
UTF16CodeUnitIterator(final ByteBuf buffer) {
this.buffer = buffer;
this.lowSurrogate = 0;
this.start = 0;
this.length = buffer.writerIndex();
}
@Override
public boolean hasNext() {
return (this.lowSurrogate != 0) || (0 <= this.start && this.start < this.length);
}
/**
* Returns the next {@code int} code point in the iteration.
*
* @return the next {@code int} code point in the iteration.
* @throws NoSuchElementException if the iteration has no more code points.
*/
@Override
public int nextInt() {
if (!this.hasNext()) {
throw new NoSuchElementException();
}
if (this.lowSurrogate != 0) {
int codeUnit = this.lowSurrogate;
this.lowSurrogate = 0;
return codeUnit;
}
final int index = this.buffer.forEachByte(this.start, this.length - this.start, this);
assert index >= 0;
this.start = index + 1;
final int codePoint = this.codePoint();
if ((codePoint & 0xFFFF0000) == 0) {
return codePoint;
}
this.lowSurrogate = Character.lowSurrogate(codePoint);
return Character.highSurrogate(codePoint);
}
}
private static final class CodePointIterator extends UTF8CodePointGetter implements IntIterator.OfInt {
private static class CodePointIterator extends UTF8CodePointGetter implements PrimitiveIterator.OfInt {
private final ByteBuf buffer;
private int start, length;
@@ -745,6 +714,11 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
this.length = buffer.writerIndex();
}
/**
* Returns {@code true} if there is another code point in the iteration.
*
* @return {@code true} if there is another code point in the iteration.
*/
@Override
public boolean hasNext() {
return 0 <= this.start && this.start < this.length;
@@ -759,11 +733,13 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
@Override
public int nextInt() {
if (!this.hasNext()) {
final int length = this.length - this.start;
if (length <= 0) {
throw new NoSuchElementException();
}
final int index = this.buffer.forEachByte(this.start, this.length - this.start, this);
final int index = this.buffer.forEachByte(this.start, length, this);
assert index >= 0;
this.start = index + 1;
@@ -771,6 +747,149 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
}
}
private static final class CodePointSpliterator extends CodePointIterator implements Spliterator.OfInt {
CodePointSpliterator(final ByteBuf buffer) {
super(buffer);
}
@Override
public int characteristics() {
return Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED;
}
@Override
public long estimateSize() {
return Long.MAX_VALUE;
}
@Override
public void forEachRemaining(IntConsumer action) {
super.forEachRemaining(action);
}
@Override
public void forEachRemaining(Consumer<? super Integer> action) {
super.forEachRemaining(action);
}
@Override
public boolean tryAdvance(IntConsumer action) {
checkNotNull(action, "expected non-null action");
if (this.hasNext()) {
action.accept(this.nextInt());
return true;
}
return false;
}
@Override
public Spliterator.OfInt trySplit() {
return null; // Utf8String doesn't support parallel processing and so this method does not attempt a split
}
}
private static class UTF16CodeUnitIterator extends UTF8CodePointGetter implements PrimitiveIterator.OfInt {
private final ByteBuf buffer;
private int start, length;
private int lowSurrogate;
UTF16CodeUnitIterator(final ByteBuf buffer) {
this.buffer = buffer;
this.lowSurrogate = 0;
this.start = 0;
this.length = buffer.writerIndex();
}
/**
* Returns {@code true} if there is another UTF-16 code unit in the iteration.
*
* @return {@code true} if there is another UTF-16 code unit in the iteration.
*/
@Override
public boolean hasNext() {
return (this.lowSurrogate != 0) || (0 <= this.start && this.start < this.length);
}
/**
* Returns the next {@code int} UTF-16 code unit in the iteration.
*
* @return the next {@code int} UTF-16 code unit in the iteration.
* @throws NoSuchElementException if the iteration has no more UTF-16 code units.
*/
@Override
public int nextInt() {
if (this.lowSurrogate != 0) {
int codeUnit = this.lowSurrogate;
this.lowSurrogate = 0;
return codeUnit;
}
final int length = this.length - this.start;
if (length <= 0) {
throw new NoSuchElementException();
}
final int index = this.buffer.forEachByte(this.start, length, this);
assert index >= 0;
this.start = index + 1;
final int codePoint = this.codePoint();
if ((codePoint & 0xFFFF0000) == 0) {
return codePoint;
}
this.lowSurrogate = Character.lowSurrogate(codePoint);
return Character.highSurrogate(codePoint);
}
}
private static final class UTF16CodeUnitSpliterator extends UTF16CodeUnitIterator implements Spliterator.OfInt {
UTF16CodeUnitSpliterator(final ByteBuf buffer) {
super(buffer);
}
@Override
public int characteristics() {
return Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED;
}
@Override
public long estimateSize() {
return Long.MAX_VALUE;
}
@Override
public void forEachRemaining(IntConsumer action) {
super.forEachRemaining(action);
}
@Override
public void forEachRemaining(Consumer<? super Integer> action) {
super.forEachRemaining(action);
}
@Override
public boolean tryAdvance(IntConsumer action) {
checkNotNull(action, "expected non-null action");
if (this.hasNext()) {
action.accept(this.nextInt());
return true;
}
return false;
}
@Override
public Spliterator.OfInt trySplit() {
return null; // Utf8String doesn't support parallel processing and so this method does not attempt a split
}
}
static final class JsonDeserializer extends StdDeserializer<Utf8String> {
private JsonDeserializer() {