diff --git a/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java b/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java
index 073acfc..e8be3bf 100644
--- a/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java
+++ b/java/src/main/java/com/azure/data/cosmos/core/Utf8String.java
@@ -9,25 +9,22 @@ import com.fasterxml.jackson.core.JsonParser;
 import com.fasterxml.jackson.databind.DeserializationContext;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
 import com.fasterxml.jackson.databind.annotation.JsonSerialize;
 import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
 import com.fasterxml.jackson.databind.node.JsonNodeType;
 import com.fasterxml.jackson.databind.ser.std.StdSerializer;
 import com.google.common.base.Objects;
 import com.google.common.base.Suppliers;
-import com.google.common.base.Utf8;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.ByteBufHolder;
-import io.netty.buffer.ByteBufUtil;
 import io.netty.buffer.Unpooled;
 import io.netty.util.ByteProcessor;
 import it.unimi.dsi.fastutil.ints.IntIterator;
-import org.checkerframework.checker.initialization.qual.NotOnlyInitialized;
 
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.util.NoSuchElementException;
 import java.util.Optional;
 import java.util.PrimitiveIterator;
@@ -43,6 +40,7 @@ import static com.google.common.base.Preconditions.checkState;
 import static com.google.common.base.Strings.lenientFormat;
 import static java.nio.charset.StandardCharsets.UTF_8;
 
+@JsonDeserialize(using = Utf8String.JsonDeserializer.class)
 @JsonSerialize(using = Utf8String.JsonSerializer.class)
 @SuppressWarnings("UnstableApiUsage")
 public final class Utf8String implements ByteBufHolder, CharSequence, Comparable<Utf8String> {
@@ -147,8 +145,12 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
 
     public final int compareTo(final String other) {
 
-        if (this.buffer == null) {
-            return other == null ? 0 : -1;
+        if (null == other) {
+            return null == this.buffer ? 0 : 1;
+        }
+
+        if (null == this.buffer) {
+            return -1;
         }
 
         PrimitiveIterator.OfInt t = this.codePoints().iterator();
@@ -408,6 +410,80 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
         return new Utf8String(Unpooled.wrappedBuffer(string.getBytes(UTF_8)));
     }
 
+    private static int toCodePoint(int characterEncoding) {
+
+        if ((characterEncoding & 0b11111000_00000000_00000000_00000000) == 0b11110000_00000000_00000000_00000000) {
+
+            // Map 4-byte UTF-8 encoding to code point in the [0x10000, 0x0FFFF] range
+            //
+            // UTF-8 encodings in this range have this bit pattern:
+            //
+            //  Bits 24-31 = 0b11110VVV (byte 1)
+            //  Bits 16-23 = 0b10ZZZZZZ (byte 2)
+            //  Bits 08-15 = 0b10YYYYYY (byte 3)
+            //  Bits 00-07 = 0b10XXXXXX (byte 4)
+            //
+            // The corresponding UTF-16 code point can be viewed as a 21-bit integer,
+            // 0bVVVZZZZZZYYYYYYXXXXXX. Hence, we map the UTF-8 code units into 3 bytes with the first
+            // 4 bits coming from the first (high order) byte, the next 6 bits from the second byte,
+            // the next 6 bits from the third byte, and the last 6 bits from the fourth (low order)
+            // byte.
+
+            final int b1 = characterEncoding & 0b00000111_00000000_00000000_00000000;
+            final int b2 = characterEncoding & 0b00000000_00111111_00000000_00000000;
+            final int b3 = characterEncoding & 0b00000000_00000000_00111111_00000000;
+            final int b4 = characterEncoding & 0b00000000_00000000_00000000_00111111;
+
+            return (b1 >> 6) | (b2 >> 4) | (b3 >> 2) | b4;
+        }
+
+        if ((characterEncoding & 0b11111111_11110000_00000000_00000000) == 0b00000000_11100000_00000000_00000000) {
+
+            // Map 3-byte UTF-8 encoding to code point in the [0x0800, 0xFFFF] range
+            //
+            // UTF-8 encodings in this range have this bit pattern:
+            //
+            //  Bits 24-31 = 0b00000000
+            //  Bits 16-23 = 0b1110ZZZZ (byte 1)
+            //  Bits 08-15 = 0b10YYYYYY (byte 2)
+            //  Bits 00-07 = 0b10XXXXXX (byte 3)
+            //
+            // The corresponding UTF-16 code point can be viewed as a 16-bit integer,
+            // 0bZZZZYYYYYYXXXXXX. Hence, we map the UTF-8 code units into 2 bytes with the first 3
+            // bits coming from the first (high order) byte, the next 6 bits from the second (mid order)
+            // byte, and the last 6 bits from the third (low order) byte.
+
+            final int b1 = characterEncoding & 0b000011110000000000000000;
+            final int b2 = characterEncoding & 0b000000000011111100000000;
+            final int b3 = characterEncoding & 0b000000000000000000111111;
+
+            return (b1 >> 4) | (b2 >> 2) | b3;
+        }
+
+        if ((characterEncoding & 0b11111111_11111111_11100000_00000000) == 0b00000000_00000000_11000000_00000000) {
+
+            // Map 2-byte UTF-8 character encoding to code point in the [0x0080, 0x07FF] range
+            //
+            // UTF-8 Encodings in this this range have this bit pattern:
+            //
+            //  Bits 24-31 = 0b00000000
+            //  Bits 16-23 = 0b00000000
+            //  Bits 08-15 = 0b110YYYYY (byte 1)
+            //  Bits 00-07 = 0b10XXXXXX (byte 2)
+            //
+            // The corresponding UTF-16 code point can be viewed as an 11-bit integer, 0bYYYYYXXXXXX.
+            // Hence, we map the UTF-8 code units into 1 byte with the first 5 bits coming from the
+            // first (high order) byte and the final 6 bits coming from the second (low order) byte
+
+            final int b1 = characterEncoding & 0b0001111100000000;
+            final int b2 = characterEncoding & 0b0000000000111111;
+
+            return (b1 >> 2) | b2;
+        }
+
+        return -1;
+    }
+
     private static final class CodePointIterator extends UTF8CodePointGetter implements IntIterator.OfInt {
 
         private final ByteBuf buffer;
@@ -445,9 +521,9 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
         }
     }
 
-    static final class Deserializer extends StdDeserializer<Utf8String> {
+    static final class JsonDeserializer extends StdDeserializer<Utf8String> {
 
-        private Deserializer() {
+        private JsonDeserializer() {
             super(Utf8String.class);
         }
 
@@ -597,7 +673,6 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
         private static final int REPLACEMENT_CHARACTER = 0xFFFD;
 
         private int codePoint = -1;
-        private int length = -1;
         private int shift = -1;
 
         /**
@@ -626,84 +701,9 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
 
                     // End of code point sequence of length 2-4
 
-                    this.codePoint |= (value & 0xFF);
                     this.shift = -1;
-
-                    switch (this.length) {
-                        default: {
-                            assert false : lenientFormat("codePoint: 0b%s, length: %s, shift: 0",
-                                Integer.toBinaryString(this.codePoint),
-                                this.length);
-                            this.codePoint = REPLACEMENT_CHARACTER;
-                            return false;
-                        }
-                        case 2: {
-
-                            // Map UTF-8 encoding to UTF-16 code point in the [0x0080, 0x07FF] range
-                            //
-                            // UTF-8 Encodings in this this range have this bit pattern:
-                            //
-                            //  Bits 08-15 = 0b110YYYYY (byte 1)
-                            //  Bits 00-07 = 0b10XXXXXX (byte 2)
-                            //
-                            // The corresponding UTF-16 code point can be viewed as an 11-bit integer, 0bYYYYYXXXXXX.
-                            // Hence, we map the UTF-8 code units into 1 byte with the first 5 bits coming from the
-                            // first (high order) byte and the final 6 bits coming from the second (low order) byte
-
-                            final int b1 = this.codePoint & 0b0001111100000000;
-                            final int b2 = this.codePoint & 0b0000000000111111;
-
-                            this.codePoint = (b1 >> 2) | b2;
-                            break;
-                        }
-                        case 3: {
-
-                            // Map UTF-8 encoding to UTF-16 code point in the [0x0800, 0xFFFF] range
-                            //
-                            // UTF-8 encodings in this range have this bit pattern:
-                            //
-                            //  Bits 16-23 = 0b1110ZZZZ (byte 1)
-                            //  Bits 08-15 = 0b10YYYYYY (byte 2)
-                            //  Bits 00-07 = 0b10XXXXXX (byte 3)
-                            //
-                            // The corresponding UTF-16 code point can be viewed as a 16-bit integer,
-                            // 0bZZZZYYYYYYXXXXXX. Hence, we map the UTF-8 code units into 2 bytes with the first 3
-                            // bits coming from the first (high order) byte, the next 6 bits from the second (mid order)
-                            // byte, and the last 6 bits from the third (low order) byte.
-
-                            final int b1 = this.codePoint & 0b000011110000000000000000;
-                            final int b2 = this.codePoint & 0b000000000011111100000000;
-                            final int b3 = this.codePoint & 0b000000000000000000111111;
-
-                            this.codePoint = (b1 >> 4) | (b2 >> 2) | b3;
-                            break;
-                        }
-                        case 4: {
-
-                            // Map UTF-8 encoding to UTF-16 code point in the [0x10000, 0x0FFFF] range
-                            //
-                            // UTF-8 encodings in this range have this bit pattern:
-                            //
-                            //  Bits 24-32 = 0b11110VVV (byte 1)
-                            //  Bits 16-23 = 0b10ZZZZZZ (byte 2)
-                            //  Bits 08-15 = 0b10YYYYYY (byte 3)
-                            //  Bits 00-07 = 0b10XXXXXX (byte 4)
-                            //
-                            // The corresponding UTF-16 code point can be viewed as a 21-bit integer,
-                            // 0bVVVZZZZZZYYYYYYXXXXXX. Hence, we map the UTF-8 code units into 3 bytes with the first
-                            // 4 bits coming from the first (high order) byte, the next 6 bits from the second byte,
-                            // the next 6 bits from the third byte, and the last 6 bits from the fourth (low order)
-                            // byte.
-
-                            final int b1 = this.codePoint & 0b00000111000000000000000000000000;
-                            final int b2 = this.codePoint & 0b00000000001111110000000000000000;
-                            final int b3 = this.codePoint & 0b00000000000000000011111100000000;
-                            final int b4 = this.codePoint & 0b00000000000000000000000000111111;
-
-                            this.codePoint = (b1 >> 6) | (b2 >> 4) | (b3 >> 2) | b4;
-                            break;
-                        }
-                    }
+                    this.codePoint |= (value & 0xFF);
+                    this.codePoint = toCodePoint(this.codePoint);
 
                     if (!Character.isDefined(this.codePoint)) {
                         this.codePoint = REPLACEMENT_CHARACTER;
@@ -720,14 +720,12 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
                     if (leadingByte < 0x7F) {
                         // UTF-8-1 = 0x00-7F
                         this.codePoint = leadingByte;
-                        this.length = 1;
                         return false;
                     }
 
                     if (0xC2 <= leadingByte && leadingByte <= 0xDF) {
                         // UTF8-8-2 = 0xC2-DF UTF8-tail
                         this.codePoint = leadingByte << Byte.SIZE;
-                        this.length = 2;
                         this.shift = 0;
                         return true;
                     }
@@ -735,7 +733,6 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
                     if (0xE0 <= leadingByte && leadingByte <= 0xEF) {
                         // UTF-8-3 = 0xE0 0xA0-BF UTF8-tail / 0xE1-EC 2(UTF8-tail) / 0xED 0x80-9F UTF8-tail / 0xEE-EF 2(UTF8-tail)
                         this.codePoint = leadingByte << 2 * Byte.SIZE;
-                        this.length = 3;
                         this.shift = Byte.SIZE;
                         return true;
                     }
@@ -743,7 +740,6 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
                     if (0xF0 <= leadingByte && leadingByte <= 0xF4) {
                         // UTF8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail )
                         this.codePoint = leadingByte << 3 * Byte.SIZE;
-                        this.length = 4;
                         this.shift = 2 * Byte.SIZE;
                         return true;
                     }
@@ -770,13 +766,13 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
      * This {@link #process(byte)} method reads a single code point at a time. The first byte read following
      * construction of an instance of this class must be a leading byte. This is used to determine the number of
      * single-byte UTF-8 code units in the code point. The {@link #process(byte)} method returns {@code false} when
-     * an undefined code point is encountered.
+     * an undefined code point is encountered as determined by {@link Character#isDefined(int)}}.
      *
      * @see <a href="https://tools.ietf.org/html/rfc3629">RFC 3629: UTF-8, a transformation format of ISO 10646</a>
      */
     private static class UTF8CodePointValidator implements ByteProcessor {
 
-        private int codePoint = 0;
+        private int codePoint = -1;
         private int shift = -1;
 
         /**
@@ -796,7 +792,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
 
                     // Next unit of code point sequence
 
-                    this.codePoint |= (value & 0xFF << this.shift);
+                    this.codePoint |= ((value & 0xFF) << this.shift);
                     this.shift -= Byte.SIZE;
                     return true;
                 }
@@ -804,8 +800,9 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
 
                     // End of code point sequence
 
-                    this.codePoint |= value & 0xFF;
+                    this.codePoint |= (value & 0xFF);
                     this.shift = -1;
+                    this.codePoint = toCodePoint(this.codePoint);
 
                     return Character.isDefined(this.codePoint);
                 }
@@ -813,7 +810,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
 
                     // Start of code point sequence
 
-                    final int leadingByte = value & 0xFF;
+                    final int leadingByte = (value & 0xFF);
 
                     if (leadingByte < 0x7F) {
                         // UTF-8-1 = 0x00-7F
@@ -838,7 +835,7 @@ public final class Utf8String implements ByteBufHolder, CharSequence, Comparable
                     if (0xF0 <= leadingByte && leadingByte <= 0xF4) {
                         // UTF8-4 = 0xF0 0x90-BF 2( UTF8-tail ) / 0xF1-F3 3( UTF8-tail ) / 0xF4 0x80-8F 2( UTF8-tail )
                         this.codePoint = leadingByte << 3 * Byte.SIZE;
-                        this.shift = 3 * Byte.SIZE;
+                        this.shift = 2 * Byte.SIZE;
                         return true;
                     }
 
diff --git a/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java b/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java
index aaf424f..f69645c 100644
--- a/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java
+++ b/java/src/test/java/com/azure/data/cosmos/core/Utf8StringTest.java
@@ -15,6 +15,7 @@ import java.util.Optional;
 
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertSame;
+import static org.testng.Assert.assertThrows;
 import static org.testng.Assert.assertTrue;
 
 public class Utf8StringTest {
@@ -55,12 +56,32 @@ public class Utf8StringTest {
         assertEquals(value.codePoints().iterator(), item.value().codePoints().iterator());
     }
 
-    @Test
-    public void testCompareTo() {
-    }
+    @SuppressWarnings("EqualsWithItself")
+    @Test(dataProvider = "unicodeTextDataProvider")
+    public void testCompareTo(UnicodeTextItem item) {
 
-    @Test
-    public void testTestCompareTo() {
+        Utf8String value = Utf8String.transcodeUtf16(item.value());
+        assertEquals(value.compareTo(value), 0);
+        assertEquals(value.compareTo(item.value()), 0);
+
+        Utf8String unsafe = Utf8String.fromUnsafe(item.byteBuf());
+        assertEquals(unsafe.compareTo(value), 0);
+        assertEquals(unsafe.compareTo(item.value()), 0);
+
+        Optional<Utf8String> optional = Utf8String.from(item.byteBuf());
+        assertTrue(optional.isPresent());
+        assertEquals(optional.get().compareTo(value), 0);
+        assertEquals(optional.get().compareTo(item.value()), 0);
+
+        assertThrows(NullPointerException.class, () -> {
+            //noinspection ConstantConditions,ResultOfMethodCallIgnored
+            value.compareTo((Utf8String) null);
+        });
+
+        assertEquals(Utf8String.NULL.compareTo(item.value()), -1);
+        assertEquals(Utf8String.NULL.compareTo((String) null), 0);
+        assertEquals(Utf8String.EMPTY.compareTo((String) null), 1);
+        assertEquals(Utf8String.EMPTY.compareTo(Utf8String.NULL), 1);
     }
 
     @Test
@@ -115,17 +136,22 @@ public class Utf8StringTest {
     private static Iterator<Object[]> unicodeTextData() {
 
         ImmutableList<UnicodeTextItem> items = ImmutableList.of(
-            // English
+            // US ASCII (7-bit encoding)
+            // ..English
             new UnicodeTextItem("The quick brown fox jumps over the lazy dog."),
-            // German
+            // ISO-8859-1 (8-bit encoding that adds Latin-1 supplement to US ASCII)
+            // ..German
             new UnicodeTextItem("Der schnelle braune Fuchs springt über den faulen Hund."),
-            // Swedish
+            // ..Swedish
             new UnicodeTextItem("Den snabbbruna räven hoppar över den lata hunden."),
-            // Greek
+            // ISO 8859-7 (11-bit encoding that covers the Greek and Coptic alphabets)
+            // ..Greek
             new UnicodeTextItem("Η γρήγορη καφέ αλεπού πηδάει πάνω από το τεμπέλικο σκυλί."),
-            // Japanese
+            // Katakana code block (16-bit encoding containing katakana characters for the Japanese and Ainu languages)
+            // ..Japanese
             new UnicodeTextItem("速い茶色のキツネは怠laな犬を飛び越えます。"),
-            // Deseret alphabet
+            // Deseret code block (21-bit encoding containing an English alphabet invented by the LDS Church)
+            // ..Deseret
             new UnicodeTextItem("\uD801\uDC10\uD801\uDC2F\uD801\uDC4A\uD801\uDC2C, \uD801\uDC38\uD801\uDC35 \uD801\uDC2A\uD801\uDC49 \uD801\uDC4F?")
         );
 
@@ -150,6 +176,11 @@ public class Utf8StringTest {
             return Unpooled.wrappedBuffer(this.buffer);
         }
 
+        @Override
+        public String toString() {
+            return this.value.toString();
+        }
+
         public String value() {
             return this.value;
         }