Tweaks

2026-01-20 09:53:13 +00:00 · 2019-10-09 13:50:50 -07:00
parent 4b4a0c809d
commit 7de49efa03
1 changed files with 108 additions and 108 deletions
--- a/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java
+++ b/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java
@@ -789,114 +789,6 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
        }
    }

-    /**
-     * A {@link ByteProcessor} used to convert a UTF-8 byte sequence to a {@link String}.
-     * <p>
-     * This {@link #process(byte)} method accumulates a single code point at a time. Invalid code points are changed to
-     * <a href="https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character">Replacement Characters</a>
-     */
-    private static class UTF16Converter implements ByteProcessor {
-
-        private static final int REPLACEMENT_CHARACTER = 0xFFFD;
-
-        private final StringBuilder builder;
-        private int codePoint = -1;
-        private int shift = -1;
-
-        UTF16Converter(final int capacity) {
-            this.builder = new StringBuilder(capacity);
-        }
-
-        /**
-         * Processes the next byte in a UTF-8 encoded code point sequence.
-         *
-         * Characters are appended to the result value at the end of each code point sequence that is encountered.
-         *
-         * @param value the next byte in a UTF-8 encoded code point sequence.
-         * @return {@code true}.
-         */
-        @Override
-        public boolean process(final byte value) {
-
-            switch (this.shift) {
-
-                default: {
-
-                    // Next unit (byte) of multi-byte code point sequence
-
-                    this.codePoint |= ((value & 0xFF) << this.shift);
-                    this.shift -= Byte.SIZE;
-                    return true;
-                }
-                case 0: {
-
-                    // End of multi-byte code point sequence
-
-                    this.codePoint = toCodePoint(this.codePoint | (value & 0xFF));
-
-                    if (this.codePoint < 0) {
-                        this.builder.append((char)REPLACEMENT_CHARACTER);
-                    } else if (Character.isBmpCodePoint(this.codePoint)) {
-                        this.builder.append((char)this.codePoint);
-                    } else {
-                        this.builder.append(Character.highSurrogate(this.codePoint));
-                        this.builder.append(Character.lowSurrogate(this.codePoint));
-                    }
-
-                    this.shift = -1;
-                    return true;
-                }
-                case -1: {
-
-                    // Start of code point sequence
-
-                    final int leadingByte = value & 0xFF;
-
-                    if (leadingByte < 0x7F) {
-                        // UTF-8-1 = 0x00-7F
-                        this.builder.append((char)leadingByte);
-                        return true;
-                    }
-
-                    if (0xC2 <= leadingByte && leadingByte <= 0xDF) {
-                        // UTF8-8-2 = 0xC2-DF UTF8-tail
-                        this.codePoint = leadingByte << Byte.SIZE;
-                        this.shift = 0;
-                        return true;
-                    }
-
-                    if (0xE0 <= leadingByte && leadingByte <= 0xEF) {
-                        // UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2(UTF8-tail)
-                        this.codePoint = leadingByte << 2 * Byte.SIZE;
-                        this.shift = Byte.SIZE;
-                        return true;
-                    }
-
-                    if (0xF0 <= leadingByte && leadingByte <= 0xF4) {
-                        // UTF-8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail )
-                        this.codePoint = leadingByte << (3 * Byte.SIZE);
-                        this.shift = 2 * Byte.SIZE;
-                        return true;
-                    }
-
-                    this.builder.append((char)REPLACEMENT_CHARACTER);
-                    return true;
-                }
-            }
-        }
-
-        /**
-         * Returns the converted {@link String} value.
-         *
-         * A new {@link String} is allocated on each call to this method.
-         *
-         * @return the converted {@link String} value.
-         */
-        String value() {
-            return this.builder.toString();
-        }
-    }
-
    /**
     * A {@link ByteProcessor} used to read a UTF-8 encoded string one code point at a time.
     * <p>
@@ -1418,5 +1310,113 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
        }
    }

+    /**
+     * A {@link ByteProcessor} used to convert a UTF-8 byte sequence to a {@link String}.
+     * <p>
+     * This {@link #process(byte)} method accumulates a single code point at a time. Invalid code points are changed to
+     * <a href="https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character">Replacement Characters</a>
+     */
+    private static class UTF16Converter implements ByteProcessor {
+
+        private static final int REPLACEMENT_CHARACTER = 0xFFFD;
+
+        private final StringBuilder builder;
+        private int codePoint = -1;
+        private int shift = -1;
+
+        UTF16Converter(final int capacity) {
+            this.builder = new StringBuilder(capacity);
+        }
+
+        /**
+         * Processes the next byte in a UTF-8 encoded code point sequence.
+         *
+         * Characters are appended to the result value at the end of each code point sequence that is encountered.
+         *
+         * @param value the next byte in a UTF-8 encoded code point sequence.
+         * @return {@code true}.
+         */
+        @Override
+        public boolean process(final byte value) {
+
+            switch (this.shift) {
+
+                default: {
+
+                    // Next unit (byte) of multi-byte code point sequence
+
+                    this.codePoint |= ((value & 0xFF) << this.shift);
+                    this.shift -= Byte.SIZE;
+                    return true;
+                }
+                case 0: {
+
+                    // End of multi-byte code point sequence
+
+                    this.codePoint = toCodePoint(this.codePoint | (value & 0xFF));
+
+                    if (this.codePoint < 0) {
+                        this.builder.append((char)REPLACEMENT_CHARACTER);
+                    } else if (Character.isBmpCodePoint(this.codePoint)) {
+                        this.builder.append((char)this.codePoint);
+                    } else {
+                        this.builder.append(Character.highSurrogate(this.codePoint));
+                        this.builder.append(Character.lowSurrogate(this.codePoint));
+                    }
+
+                    this.shift = -1;
+                    return true;
+                }
+                case -1: {
+
+                    // Start of code point sequence
+
+                    final int leadingByte = value & 0xFF;
+
+                    if (leadingByte < 0x7F) {
+                        // UTF-8-1 = 0x00-7F
+                        this.builder.append((char)leadingByte);
+                        return true;
+                    }
+
+                    if (0xC2 <= leadingByte && leadingByte <= 0xDF) {
+                        // UTF8-8-2 = 0xC2-DF UTF8-tail
+                        this.codePoint = leadingByte << Byte.SIZE;
+                        this.shift = 0;
+                        return true;
+                    }
+
+                    if (0xE0 <= leadingByte && leadingByte <= 0xEF) {
+                        // UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2(UTF8-tail)
+                        this.codePoint = leadingByte << 2 * Byte.SIZE;
+                        this.shift = Byte.SIZE;
+                        return true;
+                    }
+
+                    if (0xF0 <= leadingByte && leadingByte <= 0xF4) {
+                        // UTF-8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail )
+                        this.codePoint = leadingByte << (3 * Byte.SIZE);
+                        this.shift = 2 * Byte.SIZE;
+                        return true;
+                    }
+
+                    this.builder.append((char)REPLACEMENT_CHARACTER);
+                    return true;
+                }
+            }
+        }
+
+        /**
+         * Returns the converted {@link String} value.
+         *
+         * A new {@link String} is allocated on each call to this method.
+         *
+         * @return the converted {@link String} value.
+         */
+        String value() {
+            return this.builder.toString();
+        }
+    }
+
    // endregion
 }