mirror of
https://github.com/microsoft/HybridRow.git
synced 2026-01-24 20:03:09 +00:00
Utf8String refinements with inefficient support instead of no support for Utf8String.charAt(int)
This commit is contained in:
@@ -21,7 +21,6 @@ import io.netty.buffer.ByteBuf;
|
|||||||
import io.netty.buffer.ByteBufHolder;
|
import io.netty.buffer.ByteBufHolder;
|
||||||
import io.netty.buffer.Unpooled;
|
import io.netty.buffer.Unpooled;
|
||||||
import io.netty.util.ByteProcessor;
|
import io.netty.util.ByteProcessor;
|
||||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
@@ -30,7 +29,8 @@ import java.util.NoSuchElementException;
|
|||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.PrimitiveIterator;
|
import java.util.PrimitiveIterator;
|
||||||
import java.util.Spliterator;
|
import java.util.Spliterator;
|
||||||
import java.util.Spliterators;
|
import java.util.function.Consumer;
|
||||||
|
import java.util.function.IntConsumer;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
@@ -113,9 +113,36 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
return this.buffer == null;
|
return this.buffer == null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the {@code char} value at the specified index.
|
||||||
|
* <p>
|
||||||
|
* An index ranges from zero to {@code length() - 1}. The first {@code char} value of the sequence is at index
|
||||||
|
* index zero, the next at index one, and so on, as for array indexing.
|
||||||
|
* <p>
|
||||||
|
* If the {@code char} value specified by the index is a <a href="{@docRoot}/java/lang/Character.html#unicode">
|
||||||
|
* surrogate</a>, the surrogate value is returned.
|
||||||
|
*
|
||||||
|
* @param index the index of the {@code char} value to be returned.
|
||||||
|
* @return the specified {@code char} value.
|
||||||
|
* @throws IndexOutOfBoundsException if the {@code index} argument is negative or not less than {@code length()}.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public char charAt(final int index) {
|
public char charAt(final int index) {
|
||||||
throw new UnsupportedOperationException(lenientFormat("Utf8String.charAt(index: %s)", index));
|
|
||||||
|
if (index < 0) {
|
||||||
|
throw new IndexOutOfBoundsException();
|
||||||
|
}
|
||||||
|
|
||||||
|
UTF16CodeUnitIterator iterator = new UTF16CodeUnitIterator(this.buffer);
|
||||||
|
int countdown = index;
|
||||||
|
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
if (--countdown < 0) {
|
||||||
|
return (char) iterator.nextInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new IndexOutOfBoundsException();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -132,10 +159,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
public IntStream chars() {
|
public IntStream chars() {
|
||||||
return this == NULL || this == EMPTY
|
return this == NULL || this == EMPTY
|
||||||
? IntStream.empty()
|
? IntStream.empty()
|
||||||
: StreamSupport.intStream(
|
: StreamSupport.intStream(new UTF16CodeUnitSpliterator(this.buffer), false);
|
||||||
() -> Spliterators.spliteratorUnknownSize(new UTF16CodeUnitIterator(this.buffer), Spliterator.ORDERED),
|
|
||||||
Spliterator.ORDERED, false
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -152,10 +176,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
public final IntStream codePoints() {
|
public final IntStream codePoints() {
|
||||||
return this == NULL || this == EMPTY
|
return this == NULL || this == EMPTY
|
||||||
? IntStream.empty()
|
? IntStream.empty()
|
||||||
: StreamSupport.intStream(
|
: StreamSupport.intStream(new CodePointSpliterator(this.buffer), false);
|
||||||
() -> Spliterators.spliteratorUnknownSize(new CodePointIterator(this.buffer), Spliterator.ORDERED),
|
|
||||||
Spliterator.ORDERED, false
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -682,59 +703,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
|
|
||||||
// region Types
|
// region Types
|
||||||
|
|
||||||
private static final class UTF16CodeUnitIterator extends UTF8CodePointGetter implements IntIterator.OfInt {
|
private static class CodePointIterator extends UTF8CodePointGetter implements PrimitiveIterator.OfInt {
|
||||||
|
|
||||||
private final ByteBuf buffer;
|
|
||||||
private int start, length;
|
|
||||||
private int lowSurrogate;
|
|
||||||
|
|
||||||
UTF16CodeUnitIterator(final ByteBuf buffer) {
|
|
||||||
this.buffer = buffer;
|
|
||||||
this.lowSurrogate = 0;
|
|
||||||
this.start = 0;
|
|
||||||
this.length = buffer.writerIndex();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return (this.lowSurrogate != 0) || (0 <= this.start && this.start < this.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the next {@code int} code point in the iteration.
|
|
||||||
*
|
|
||||||
* @return the next {@code int} code point in the iteration.
|
|
||||||
* @throws NoSuchElementException if the iteration has no more code points.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int nextInt() {
|
|
||||||
|
|
||||||
if (!this.hasNext()) {
|
|
||||||
throw new NoSuchElementException();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.lowSurrogate != 0) {
|
|
||||||
int codeUnit = this.lowSurrogate;
|
|
||||||
this.lowSurrogate = 0;
|
|
||||||
return codeUnit;
|
|
||||||
}
|
|
||||||
|
|
||||||
final int index = this.buffer.forEachByte(this.start, this.length - this.start, this);
|
|
||||||
assert index >= 0;
|
|
||||||
this.start = index + 1;
|
|
||||||
|
|
||||||
final int codePoint = this.codePoint();
|
|
||||||
|
|
||||||
if ((codePoint & 0xFFFF0000) == 0) {
|
|
||||||
return codePoint;
|
|
||||||
}
|
|
||||||
|
|
||||||
this.lowSurrogate = Character.lowSurrogate(codePoint);
|
|
||||||
return Character.highSurrogate(codePoint);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final class CodePointIterator extends UTF8CodePointGetter implements IntIterator.OfInt {
|
|
||||||
|
|
||||||
private final ByteBuf buffer;
|
private final ByteBuf buffer;
|
||||||
private int start, length;
|
private int start, length;
|
||||||
@@ -745,6 +714,11 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
this.length = buffer.writerIndex();
|
this.length = buffer.writerIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns {@code true} if there is another code point in the iteration.
|
||||||
|
*
|
||||||
|
* @return {@code true} if there is another code point in the iteration.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return 0 <= this.start && this.start < this.length;
|
return 0 <= this.start && this.start < this.length;
|
||||||
@@ -759,11 +733,13 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
@Override
|
@Override
|
||||||
public int nextInt() {
|
public int nextInt() {
|
||||||
|
|
||||||
if (!this.hasNext()) {
|
final int length = this.length - this.start;
|
||||||
|
|
||||||
|
if (length <= 0) {
|
||||||
throw new NoSuchElementException();
|
throw new NoSuchElementException();
|
||||||
}
|
}
|
||||||
|
|
||||||
final int index = this.buffer.forEachByte(this.start, this.length - this.start, this);
|
final int index = this.buffer.forEachByte(this.start, length, this);
|
||||||
assert index >= 0;
|
assert index >= 0;
|
||||||
this.start = index + 1;
|
this.start = index + 1;
|
||||||
|
|
||||||
@@ -771,6 +747,149 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final class CodePointSpliterator extends CodePointIterator implements Spliterator.OfInt {
|
||||||
|
|
||||||
|
CodePointSpliterator(final ByteBuf buffer) {
|
||||||
|
super(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int characteristics() {
|
||||||
|
return Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long estimateSize() {
|
||||||
|
return Long.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void forEachRemaining(IntConsumer action) {
|
||||||
|
super.forEachRemaining(action);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void forEachRemaining(Consumer<? super Integer> action) {
|
||||||
|
super.forEachRemaining(action);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean tryAdvance(IntConsumer action) {
|
||||||
|
checkNotNull(action, "expected non-null action");
|
||||||
|
if (this.hasNext()) {
|
||||||
|
action.accept(this.nextInt());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Spliterator.OfInt trySplit() {
|
||||||
|
return null; // Utf8String doesn't support parallel processing and so this method does not attempt a split
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class UTF16CodeUnitIterator extends UTF8CodePointGetter implements PrimitiveIterator.OfInt {
|
||||||
|
|
||||||
|
private final ByteBuf buffer;
|
||||||
|
private int start, length;
|
||||||
|
private int lowSurrogate;
|
||||||
|
|
||||||
|
UTF16CodeUnitIterator(final ByteBuf buffer) {
|
||||||
|
this.buffer = buffer;
|
||||||
|
this.lowSurrogate = 0;
|
||||||
|
this.start = 0;
|
||||||
|
this.length = buffer.writerIndex();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns {@code true} if there is another UTF-16 code unit in the iteration.
|
||||||
|
*
|
||||||
|
* @return {@code true} if there is another UTF-16 code unit in the iteration.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return (this.lowSurrogate != 0) || (0 <= this.start && this.start < this.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next {@code int} UTF-16 code unit in the iteration.
|
||||||
|
*
|
||||||
|
* @return the next {@code int} UTF-16 code unit in the iteration.
|
||||||
|
* @throws NoSuchElementException if the iteration has no more UTF-16 code units.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int nextInt() {
|
||||||
|
|
||||||
|
if (this.lowSurrogate != 0) {
|
||||||
|
int codeUnit = this.lowSurrogate;
|
||||||
|
this.lowSurrogate = 0;
|
||||||
|
return codeUnit;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int length = this.length - this.start;
|
||||||
|
|
||||||
|
if (length <= 0) {
|
||||||
|
throw new NoSuchElementException();
|
||||||
|
}
|
||||||
|
|
||||||
|
final int index = this.buffer.forEachByte(this.start, length, this);
|
||||||
|
assert index >= 0;
|
||||||
|
this.start = index + 1;
|
||||||
|
|
||||||
|
final int codePoint = this.codePoint();
|
||||||
|
|
||||||
|
if ((codePoint & 0xFFFF0000) == 0) {
|
||||||
|
return codePoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.lowSurrogate = Character.lowSurrogate(codePoint);
|
||||||
|
return Character.highSurrogate(codePoint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class UTF16CodeUnitSpliterator extends UTF16CodeUnitIterator implements Spliterator.OfInt {
|
||||||
|
|
||||||
|
UTF16CodeUnitSpliterator(final ByteBuf buffer) {
|
||||||
|
super(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int characteristics() {
|
||||||
|
return Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.ORDERED;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long estimateSize() {
|
||||||
|
return Long.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void forEachRemaining(IntConsumer action) {
|
||||||
|
super.forEachRemaining(action);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void forEachRemaining(Consumer<? super Integer> action) {
|
||||||
|
super.forEachRemaining(action);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean tryAdvance(IntConsumer action) {
|
||||||
|
checkNotNull(action, "expected non-null action");
|
||||||
|
if (this.hasNext()) {
|
||||||
|
action.accept(this.nextInt());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Spliterator.OfInt trySplit() {
|
||||||
|
return null; // Utf8String doesn't support parallel processing and so this method does not attempt a split
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static final class JsonDeserializer extends StdDeserializer<Utf8String> {
|
static final class JsonDeserializer extends StdDeserializer<Utf8String> {
|
||||||
|
|
||||||
private JsonDeserializer() {
|
private JsonDeserializer() {
|
||||||
|
|||||||
Reference in New Issue
Block a user