001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import static org.apache.hadoop.hbase.util.Order.ASCENDING;
021import static org.apache.hadoop.hbase.util.Order.DESCENDING;
022
023import java.math.BigDecimal;
024import java.math.BigInteger;
025import java.math.MathContext;
026import java.math.RoundingMode;
027import java.nio.charset.Charset;
028import org.apache.yetus.audience.InterfaceAudience;
029
030/**
031 * Utility class that handles ordered byte arrays. That is, unlike {@link Bytes}, these methods
032 * produce byte arrays which maintain the sort order of the original values.
033 * <h3>Encoding Format summary</h3>
034 * <p>
035 * Each value is encoded as one or more bytes. The first byte of the encoding, its meaning, and a
036 * terse description of the bytes that follow is given by the following table:
037 * </p>
038 * <table summary="Encodings">
039 * <tr>
040 * <th>Content Type</th>
041 * <th>Encoding</th>
042 * </tr>
043 * <tr>
044 * <td>NULL</td>
045 * <td>0x05</td>
046 * </tr>
047 * <tr>
048 * <td>negative infinity</td>
049 * <td>0x07</td>
050 * </tr>
051 * <tr>
052 * <td>negative large</td>
053 * <td>0x08, ~E, ~M</td>
054 * </tr>
055 * <tr>
056 * <td>negative medium</td>
057 * <td>0x13-E, ~M</td>
058 * </tr>
059 * <tr>
060 * <td>negative small</td>
061 * <td>0x14, -E, ~M</td>
062 * </tr>
063 * <tr>
064 * <td>zero</td>
065 * <td>0x15</td>
066 * </tr>
067 * <tr>
068 * <td>positive small</td>
069 * <td>0x16, ~-E, M</td>
070 * </tr>
071 * <tr>
072 * <td>positive medium</td>
073 * <td>0x17+E, M</td>
074 * </tr>
075 * <tr>
076 * <td>positive large</td>
077 * <td>0x22, E, M</td>
078 * </tr>
079 * <tr>
080 * <td>positive infinity</td>
081 * <td>0x23</td>
082 * </tr>
083 * <tr>
084 * <td>NaN</td>
085 * <td>0x25</td>
086 * </tr>
087 * <tr>
088 * <td>fixed-length 32-bit integer</td>
089 * <td>0x27, I</td>
090 * </tr>
091 * <tr>
092 * <td>fixed-length 64-bit integer</td>
093 * <td>0x28, I</td>
094 * </tr>
095 * <tr>
096 * <td>fixed-length 8-bit integer</td>
097 * <td>0x29</td>
098 * </tr>
099 * <tr>
100 * <td>fixed-length 16-bit integer</td>
101 * <td>0x2a</td>
102 * </tr>
103 * <tr>
104 * <td>fixed-length 32-bit float</td>
105 * <td>0x30, F</td>
106 * </tr>
107 * <tr>
108 * <td>fixed-length 64-bit float</td>
109 * <td>0x31, F</td>
110 * </tr>
111 * <tr>
112 * <td>TEXT</td>
113 * <td>0x33, T</td>
114 * </tr>
115 * <tr>
116 * <td>variable length BLOB</td>
117 * <td>0x35, B</td>
118 * </tr>
119 * <tr>
120 * <td>byte-for-byte BLOB</td>
121 * <td>0x36, X</td>
122 * </tr>
123 * </table>
124 * <h3>Null Encoding</h3>
125 * <p>
126 * Each value that is a NULL encodes as a single byte of 0x05. Since every other value encoding
127 * begins with a byte greater than 0x05, this forces NULL values to sort first.
128 * </p>
129 * <h3>Text Encoding</h3>
130 * <p>
131 * Each text value begins with a single byte of 0x33 and ends with a single byte of 0x00. There are
132 * zero or more intervening bytes that encode the text value. The intervening bytes are chosen so
133 * that the encoding will sort in the desired collating order. The intervening bytes may not contain
134 * a 0x00 character; the only 0x00 byte allowed in a text encoding is the final byte.
135 * </p>
136 * <p>
137 * The text encoding ends in 0x00 in order to ensure that when there are two strings where one is a
138 * prefix of the other that the shorter string will sort first.
139 * </p>
140 * <h3>Binary Encoding</h3>
141 * <p>
142 * There are two encoding strategies for binary fields, referred to as "BlobVar" and "BlobCopy".
143 * BlobVar is less efficient in both space and encoding time. It has no limitations on the range of
144 * encoded values. BlobCopy is a byte-for-byte copy of the input data followed by a termination
145 * byte. It is extremely fast to encode and decode. It carries the restriction of not allowing a
146 * 0x00 value in the input byte[] as this value is used as the termination byte.
147 * </p>
148 * <h4>BlobVar</h4>
149 * <p>
150 * "BlobVar" encodes the input byte[] in a manner similar to a variable length integer encoding. As
151 * with the other {@code OrderedBytes} encodings, the first encoded byte is used to indicate what
152 * kind of value follows. This header byte is 0x37 for BlobVar encoded values. As with the
153 * traditional varint encoding, the most significant bit of each subsequent encoded {@code byte} is
154 * used as a continuation marker. The 7 remaining bits contain the 7 most significant bits of the
155 * first unencoded byte. The next encoded byte starts with a continuation marker in the MSB. The
156 * least significant bit from the first unencoded byte follows, and the remaining 6 bits contain the
157 * 6 MSBs of the second unencoded byte. The encoding continues, encoding 7 bytes on to 8 encoded
158 * bytes. The MSB of the final encoded byte contains a termination marker rather than a continuation
159 * marker, and any remaining bits from the final input byte. Any trailing bits in the final encoded
160 * byte are zeros.
161 * </p>
162 * <h4>BlobCopy</h4>
163 * <p>
164 * "BlobCopy" is a simple byte-for-byte copy of the input data. It uses 0x38 as the header byte, and
165 * is terminated by 0x00 in the DESCENDING case. This alternative encoding is faster and more
166 * space-efficient, but it cannot accept values containing a 0x00 byte in DESCENDING order.
167 * </p>
168 * <h3>Variable-length Numeric Encoding</h3>
169 * <p>
170 * Numeric values must be coded so as to sort in numeric order. We assume that numeric values can be
171 * both integer and floating point values. Clients must be careful to use inspection methods for
172 * encoded values (such as {@link #isNumericInfinite(PositionedByteRange)} and
173 * {@link #isNumericNaN(PositionedByteRange)} to protect against decoding values into object which
174 * do not support these numeric concepts (such as {@link Long} and {@link BigDecimal}).
175 * </p>
176 * <p>
177 * Simplest cases first: If the numeric value is a NaN, then the encoding is a single byte of 0x25.
178 * This causes NaN values to sort after every other numeric value.
179 * </p>
180 * <p>
181 * If the numeric value is a negative infinity then the encoding is a single byte of 0x07. Since
182 * every other numeric value except NaN has a larger initial byte, this encoding ensures that
183 * negative infinity will sort prior to every other numeric value other than NaN.
184 * </p>
185 * <p>
186 * If the numeric value is a positive infinity then the encoding is a single byte of 0x23. Every
187 * other numeric value encoding begins with a smaller byte, ensuring that positive infinity always
188 * sorts last among numeric values. 0x23 is also smaller than 0x33, the initial byte of a text
189 * value, ensuring that every numeric value sorts before every text value.
190 * </p>
191 * <p>
192 * If the numeric value is exactly zero then it is encoded as a single byte of 0x15. Finite negative
193 * values will have initial bytes of 0x08 through 0x14 and finite positive values will have initial
194 * bytes of 0x16 through 0x22.
195 * </p>
196 * <p>
197 * For all numeric values, we compute a mantissa M and an exponent E. The mantissa is a base-100
198 * representation of the value. The exponent E determines where to put the decimal point.
199 * </p>
200 * <p>
201 * Each centimal digit of the mantissa is stored in a byte. If the value of the centimal digit is X
202 * (hence X&ge;0 and X&le;99) then the byte value will be 2*X+1 for every byte of the mantissa,
203 * except for the last byte which will be 2*X+0. The mantissa must be the minimum number of bytes
204 * necessary to represent the value; trailing X==0 digits are omitted. This means that the mantissa
205 * will never contain a byte with the value 0x00.
206 * </p>
207 * <p>
208 * If we assume all digits of the mantissa occur to the right of the decimal point, then the
209 * exponent E is the power of one hundred by which one must multiply the mantissa to recover the
210 * original value.
211 * </p>
212 * <p>
213 * Values are classified as large, medium, or small according to the value of E. If E is 11 or more,
214 * the value is large. For E between 0 and 10, the value is medium. For E less than zero, the value
215 * is small.
216 * </p>
217 * <p>
218 * Large positive values are encoded as a single byte 0x22 followed by E as a varint and then M.
219 * Medium positive values are a single byte of 0x17+E followed by M. Small positive values are
220 * encoded as a single byte 0x16 followed by the ones-complement of the varint for -E followed by M.
221 * </p>
222 * <p>
223 * Small negative values are encoded as a single byte 0x14 followed by -E as a varint and then the
224 * ones-complement of M. Medium negative values are encoded as a byte 0x13-E followed by the
225 * ones-complement of M. Large negative values consist of the single byte 0x08 followed by the
226 * ones-complement of the varint encoding of E followed by the ones-complement of M.
227 * </p>
228 * <h3>Fixed-length Integer Encoding</h3>
229 * <p>
230 * All 4-byte integers are serialized to a 5-byte, fixed-width, sortable byte format. All 8-byte
231 * integers are serialized to the equivelant 9-byte format. Serialization is performed by writing a
232 * header byte, inverting the integer sign bit and writing the resulting bytes to the byte array in
233 * big endian order.
234 * </p>
235 * <h3>Fixed-length Floating Point Encoding</h3>
236 * <p>
237 * 32-bit and 64-bit floating point numbers are encoded to a 5-byte and 9-byte encoding format,
238 * respectively. The format is identical, save for the precision respected in each step of the
239 * operation.
240 * <p>
241 * This format ensures the following total ordering of floating point values:
242 * Float.NEGATIVE_INFINITY &lt; -Float.MAX_VALUE &lt; ... &lt; -Float.MIN_VALUE &lt; -0.0 &lt; +0.0;
243 * &lt; Float.MIN_VALUE &lt; ... &lt; Float.MAX_VALUE &lt; Float.POSITIVE_INFINITY &lt; Float.NaN
244 * </p>
245 * <p>
246 * Floating point numbers are encoded as specified in IEEE 754. A 32-bit single precision float
247 * consists of a sign bit, 8-bit unsigned exponent encoded in offset-127 notation, and a 23-bit
248 * significand. The format is described further in the
249 * <a href="http://en.wikipedia.org/wiki/Single_precision"> Single Precision Floating Point
250 * Wikipedia page</a>
251 * </p>
252 * <p>
253 * The value of a normal float is -1 <sup>sign bit</sup> &times; 2<sup>exponent - 127</sup> &times;
254 * 1.significand
255 * </p>
256 * <p>
257 * The IEE754 floating point format already preserves sort ordering for positive floating point
258 * numbers when the raw bytes are compared in most significant byte order. This is discussed further
259 * at <a href= "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm">
260 * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm</a>
261 * </p>
262 * <p>
263 * Thus, we need only ensure that negative numbers sort in the the exact opposite order as positive
264 * numbers (so that say, negative infinity is less than negative 1), and that all negative numbers
265 * compare less than any positive number. To accomplish this, we invert the sign bit of all floating
266 * point numbers, and we also invert the exponent and significand bits if the floating point number
267 * was negative.
268 * </p>
269 * <p>
270 * More specifically, we first store the floating point bits into a 32-bit int {@code j} using
271 * {@link Float#floatToIntBits}. This method collapses all NaNs into a single, canonical NaN value
272 * but otherwise leaves the bits unchanged. We then compute
273 * </p>
274 *
275 * <pre>
276 * j &circ;= (j &gt;&gt; (Integer.SIZE - 1)) | Integer.MIN_SIZE
277 * </pre>
278 * <p>
279 * which inverts the sign bit and XOR's all other bits with the sign bit itself. Comparing the raw
280 * bytes of {@code j} in most significant byte order is equivalent to performing a single precision
281 * floating point comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't compare
282 * equal to anything when performing floating point comparisons).
283 * </p>
284 * <p>
285 * The resulting integer is then converted into a byte array by serializing the integer one byte at
286 * a time in most significant byte order. The serialized integer is prefixed by a single header
287 * byte. All serialized values are 5 bytes in length.
288 * </p>
289 * <p>
290 * {@code OrderedBytes} encodings are heavily influenced by the
291 * <a href="http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki">SQLite4 Key Encoding</a>. Slight
292 * deviations are make in the interest of order correctness and user extensibility. Fixed-width
293 * {@code Long} and {@link Double} encodings are based on implementations from the now defunct
294 * Orderly library.
295 * </p>
296 */
297@InterfaceAudience.Public
298public class OrderedBytes {
299
300  /*
301   * These constants define header bytes used to identify encoded values. Note that the values here
302   * are not exhaustive as the Numeric format encodes portions of its value within the header byte.
303   * The values listed here are directly applied to persisted data -- DO NOT modify the values
304   * specified here. Instead, gaps are placed intentionally between values so that new
305   * implementations can be inserted into the total ordering enforced here.
306   */
307  private static final byte NULL = 0x05;
308  // room for 1 expansion type
309  private static final byte NEG_INF = 0x07;
310  private static final byte NEG_LARGE = 0x08;
311  private static final byte NEG_MED_MIN = 0x09;
312  private static final byte NEG_MED_MAX = 0x13;
313  private static final byte NEG_SMALL = 0x14;
314  private static final byte ZERO = 0x15;
315  private static final byte POS_SMALL = 0x16;
316  private static final byte POS_MED_MIN = 0x17;
317  private static final byte POS_MED_MAX = 0x21;
318  private static final byte POS_LARGE = 0x22;
319  private static final byte POS_INF = 0x23;
320  // room for 2 expansion type
321  private static final byte NAN = 0x26;
322  // room for 2 expansion types
323  private static final byte FIXED_INT8 = 0x29;
324  private static final byte FIXED_INT16 = 0x2a;
325  private static final byte FIXED_INT32 = 0x2b;
326  private static final byte FIXED_INT64 = 0x2c;
327  // room for 3 expansion types
328  private static final byte FIXED_FLOAT32 = 0x30;
329  private static final byte FIXED_FLOAT64 = 0x31;
330  // room for 2 expansion type
331  private static final byte TEXT = 0x34;
332  // room for 2 expansion type
333  private static final byte BLOB_VAR = 0x37;
334  private static final byte BLOB_COPY = 0x38;
335
336  /*
337   * The following constant values are used by encoding implementations
338   */
339
340  public static final Charset UTF8 = Charset.forName("UTF-8");
341  private static final byte TERM = 0x00;
342  private static final BigDecimal E8 = BigDecimal.valueOf(1e8);
343  private static final BigDecimal E32 = BigDecimal.valueOf(1e32);
344  private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2);
345  private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10);
346
347  /**
348   * Max precision guaranteed to fit into a {@code long}.
349   */
350  public static final int MAX_PRECISION = 31;
351
352  /**
353   * The context used to normalize {@link BigDecimal} values.
354   */
355  public static final MathContext DEFAULT_MATH_CONTEXT =
356    new MathContext(MAX_PRECISION, RoundingMode.HALF_UP);
357
358  /**
359   * Creates the standard exception when the encoded header byte is unexpected for the decoding
360   * context.
361   * @param header value used in error message.
362   */
363  private static IllegalArgumentException unexpectedHeader(byte header) {
364    throw new IllegalArgumentException(
365      "unexpected value in first byte: 0x" + Long.toHexString(header));
366  }
367
368  /**
369   * Perform unsigned comparison between two long values. Conforms to the same interface as
370   * {@link org.apache.hadoop.hbase.CellComparator}.
371   */
372  private static int unsignedCmp(long x1, long x2) {
373    int cmp;
374    if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0;
375    // invert the result when either value is negative
376    if ((x1 < 0) != (x2 < 0)) return -cmp;
377    return cmp;
378  }
379
380  /**
381   * Write a 32-bit unsigned integer to {@code dst} as 4 big-endian bytes.
382   * @return number of bytes written.
383   */
384  private static int putUint32(PositionedByteRange dst, int val) {
385    dst.put((byte) (val >>> 24)).put((byte) (val >>> 16)).put((byte) (val >>> 8)).put((byte) val);
386    return 4;
387  }
388
389  /**
390   * Encode an unsigned 64-bit unsigned integer {@code val} into {@code dst}.
391   * @param dst  The destination to which encoded bytes are written.
392   * @param val  The value to write.
393   * @param comp Compliment the encoded value when {@code comp} is true.
394   * @return number of bytes written.
395   */
396  @InterfaceAudience.Private
397  static int putVaruint64(PositionedByteRange dst, long val, boolean comp) {
398    int w, y, len = 0;
399    final int offset = dst.getOffset(), start = dst.getPosition();
400    byte[] a = dst.getBytes();
401    Order ord = comp ? DESCENDING : ASCENDING;
402    if (-1 == unsignedCmp(val, 241L)) {
403      dst.put((byte) val);
404      len = dst.getPosition() - start;
405      ord.apply(a, offset + start, len);
406      return len;
407    }
408    if (-1 == unsignedCmp(val, 2288L)) {
409      y = (int) (val - 240);
410      dst.put((byte) (y / 256 + 241)).put((byte) (y % 256));
411      len = dst.getPosition() - start;
412      ord.apply(a, offset + start, len);
413      return len;
414    }
415    if (-1 == unsignedCmp(val, 67824L)) {
416      y = (int) (val - 2288);
417      dst.put((byte) 249).put((byte) (y / 256)).put((byte) (y % 256));
418      len = dst.getPosition() - start;
419      ord.apply(a, offset + start, len);
420      return len;
421    }
422    y = (int) val;
423    w = (int) (val >>> 32);
424    if (w == 0) {
425      if (-1 == unsignedCmp(y, 16777216L)) {
426        dst.put((byte) 250).put((byte) (y >>> 16)).put((byte) (y >>> 8)).put((byte) y);
427        len = dst.getPosition() - start;
428        ord.apply(a, offset + start, len);
429        return len;
430      }
431      dst.put((byte) 251);
432      putUint32(dst, y);
433      len = dst.getPosition() - start;
434      ord.apply(a, offset + start, len);
435      return len;
436    }
437    if (-1 == unsignedCmp(w, 256L)) {
438      dst.put((byte) 252).put((byte) w);
439      putUint32(dst, y);
440      len = dst.getPosition() - start;
441      ord.apply(a, offset + start, len);
442      return len;
443    }
444    if (-1 == unsignedCmp(w, 65536L)) {
445      dst.put((byte) 253).put((byte) (w >>> 8)).put((byte) w);
446      putUint32(dst, y);
447      len = dst.getPosition() - start;
448      ord.apply(a, offset + start, len);
449      return len;
450    }
451    if (-1 == unsignedCmp(w, 16777216L)) {
452      dst.put((byte) 254).put((byte) (w >>> 16)).put((byte) (w >>> 8)).put((byte) w);
453      putUint32(dst, y);
454      len = dst.getPosition() - start;
455      ord.apply(a, offset + start, len);
456      return len;
457    }
458    dst.put((byte) 255);
459    putUint32(dst, w);
460    putUint32(dst, y);
461    len = dst.getPosition() - start;
462    ord.apply(a, offset + start, len);
463    return len;
464  }
465
466  /**
467   * Inspect {@code src} for an encoded varuint64 for its length in bytes. Preserves the state of
468   * {@code src}.
469   * @param src  source buffer
470   * @param comp if true, parse the compliment of the value.
471   * @return the number of bytes consumed by this value.
472   */
473  @InterfaceAudience.Private
474  static int lengthVaruint64(PositionedByteRange src, boolean comp) {
475    int a0 = (comp ? DESCENDING : ASCENDING).apply(src.peek()) & 0xff;
476    if (a0 <= 240) return 1;
477    if (a0 <= 248) return 2;
478    if (a0 == 249) return 3;
479    if (a0 == 250) return 4;
480    if (a0 == 251) return 5;
481    if (a0 == 252) return 6;
482    if (a0 == 253) return 7;
483    if (a0 == 254) return 8;
484    if (a0 == 255) return 9;
485    throw unexpectedHeader(src.peek());
486  }
487
488  /**
489   * Skip {@code src} over the encoded varuint64.
490   * @param src source buffer
491   * @param cmp if true, parse the compliment of the value.
492   * @return the number of bytes skipped.
493   */
494  @InterfaceAudience.Private
495  static int skipVaruint64(PositionedByteRange src, boolean cmp) {
496    final int len = lengthVaruint64(src, cmp);
497    src.setPosition(src.getPosition() + len);
498    return len;
499  }
500
501  /**
502   * Decode a sequence of bytes in {@code src} as a varuint64. Compliment the encoded value when
503   * {@code comp} is true.
504   * @return the decoded value.
505   */
506  @InterfaceAudience.Private
507  static long getVaruint64(PositionedByteRange src, boolean comp) {
508    assert src.getRemaining() >= lengthVaruint64(src, comp);
509    final long ret;
510    Order ord = comp ? DESCENDING : ASCENDING;
511    byte x = src.get();
512    final int a0 = ord.apply(x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8;
513    if (-1 == unsignedCmp(a0, 241)) {
514      return a0;
515    }
516    x = src.get();
517    a1 = ord.apply(x) & 0xff;
518    if (-1 == unsignedCmp(a0, 249)) {
519      return (a0 - 241L) * 256 + a1 + 240;
520    }
521    x = src.get();
522    a2 = ord.apply(x) & 0xff;
523    if (a0 == 249) {
524      return 2288L + 256 * a1 + a2;
525    }
526    x = src.get();
527    a3 = ord.apply(x) & 0xff;
528    if (a0 == 250) {
529      return ((long) a1 << 16L) | (a2 << 8) | a3;
530    }
531    x = src.get();
532    a4 = ord.apply(x) & 0xff;
533    ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4;
534    if (a0 == 251) {
535      return ret;
536    }
537    x = src.get();
538    a5 = ord.apply(x) & 0xff;
539    if (a0 == 252) {
540      return (ret << 8) | a5;
541    }
542    x = src.get();
543    a6 = ord.apply(x) & 0xff;
544    if (a0 == 253) {
545      return (ret << 16) | (a5 << 8) | a6;
546    }
547    x = src.get();
548    a7 = ord.apply(x) & 0xff;
549    if (a0 == 254) {
550      return (ret << 24) | (a5 << 16) | (a6 << 8) | a7;
551    }
552    x = src.get();
553    a8 = ord.apply(x) & 0xff;
554    return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8;
555  }
556
557  /**
558   * Strip all trailing zeros to ensure that no digit will be zero and round using our default
559   * context to ensure precision doesn't exceed max allowed. From Phoenix's {@code NumberUtil}.
560   * @return new {@link BigDecimal} instance
561   */
562  @InterfaceAudience.Private
563  static BigDecimal normalize(BigDecimal val) {
564    return null == val ? null : val.stripTrailingZeros().round(DEFAULT_MATH_CONTEXT);
565  }
566
567  /**
568   * Read significand digits from {@code src} according to the magnitude of {@code e}.
569   * @param src  The source from which to read encoded digits.
570   * @param e    The magnitude of the first digit read.
571   * @param comp Treat encoded bytes as compliments when {@code comp} is true.
572   * @return The decoded value.
573   * @throws IllegalArgumentException when read exceeds the remaining length of {@code src}.
574   */
575  private static BigDecimal decodeSignificand(PositionedByteRange src, int e, boolean comp) {
576    // TODO: can this be made faster?
577    byte[] a = src.getBytes();
578    final int start = src.getPosition(), offset = src.getOffset(), remaining = src.getRemaining();
579    Order ord = comp ? DESCENDING : ASCENDING;
580    BigDecimal m = BigDecimal.ZERO;
581    e--;
582    for (int i = 0;; i++) {
583      if (i > remaining) {
584        // we've exceeded this range's window
585        src.setPosition(start);
586        throw new IllegalArgumentException(
587          "Read exceeds range before termination byte found. offset: " + offset + " position: "
588            + (start + i));
589      }
590      // base-100 digits are encoded as val * 2 + 1 except for the termination digit.
591      m = m.add( // m +=
592        new BigDecimal(BigInteger.ONE, e * -2).multiply( // 100 ^ p * [decoded digit]
593          BigDecimal.valueOf((ord.apply(a[offset + start + i]) & 0xff) / 2)));
594      e--;
595      // detect termination digit
596      if ((ord.apply(a[offset + start + i]) & 1) == 0) {
597        src.setPosition(start + i + 1);
598        break;
599      }
600    }
601    return normalize(m);
602  }
603
604  /**
605   * Skip {@code src} over the significand bytes.
606   * @param src  The source from which to read encoded digits.
607   * @param comp Treat encoded bytes as compliments when {@code comp} is true.
608   * @return the number of bytes skipped.
609   */
610  private static int skipSignificand(PositionedByteRange src, boolean comp) {
611    byte[] a = src.getBytes();
612    final int offset = src.getOffset(), start = src.getPosition();
613    int i = src.getPosition();
614    while (((comp ? DESCENDING : ASCENDING).apply(a[offset + i++]) & 1) != 0)
615      ;
616    src.setPosition(i);
617    return i - start;
618  }
619
620  /**
621   * <p>
622   * Encode the small magnitude floating point number {@code val} using the key encoding. The caller
623   * guarantees that 1.0 > abs(val) > 0.0.
624   * </p>
625   * <p>
626   * A floating point value is encoded as an integer exponent {@code E} and a mantissa {@code M}.
627   * The original value is equal to {@code (M * 100^E)}. {@code E} is set to the smallest value
628   * possible without making {@code M} greater than or equal to 1.0.
629   * </p>
630   * <p>
631   * For this routine, {@code E} will always be zero or negative, since the original value is less
632   * than one. The encoding written by this routine is the ones-complement of the varint of the
633   * negative of {@code E} followed by the mantissa:
634   *
635   * <pre>
636   *   Encoding:   ~-E  M
637   * </pre>
638   * </p>
639   * @param dst The destination to which encoded digits are written.
640   * @param val The value to encode.
641   * @return the number of bytes written.
642   */
643  private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) {
644    // TODO: this can be done faster?
645    // assert 1.0 > abs(val) > 0.0
646    BigDecimal abs = val.abs();
647    assert BigDecimal.ZERO.compareTo(abs) < 0 && BigDecimal.ONE.compareTo(abs) > 0;
648    byte[] a = dst.getBytes();
649    boolean isNeg = val.signum() == -1;
650    final int offset = dst.getOffset(), start = dst.getPosition();
651    int e = 0, d, startM;
652
653    if (isNeg) { /* Small negative number: 0x14, -E, ~M */
654      dst.put(NEG_SMALL);
655    } else { /* Small positive number: 0x16, ~-E, M */
656      dst.put(POS_SMALL);
657    }
658
659    // normalize abs(val) to determine E
660    while (abs.compareTo(EN10) < 0) {
661      abs = abs.movePointRight(8);
662      e += 4;
663    }
664    while (abs.compareTo(EN2) < 0) {
665      abs = abs.movePointRight(2);
666      e++;
667    }
668
669    putVaruint64(dst, e, !isNeg); // encode appropriate E value.
670
671    // encode M by peeling off centimal digits, encoding x as 2x+1
672    startM = dst.getPosition();
673    // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
674    // numeric scale.
675    for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
676      abs = abs.movePointRight(2);
677      d = abs.intValue();
678      dst.put((byte) ((2 * d + 1) & 0xff));
679      abs = abs.subtract(BigDecimal.valueOf(d));
680    }
681    // terminal digit should be 2x
682    a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
683    if (isNeg) {
684      // negative values encoded as ~M
685      DESCENDING.apply(a, offset + startM, dst.getPosition() - startM);
686    }
687    return dst.getPosition() - start;
688  }
689
690  /**
691   * Encode the large magnitude floating point number {@code val} using the key encoding. The caller
692   * guarantees that {@code val} will be finite and abs(val) >= 1.0.
693   * <p>
694   * A floating point value is encoded as an integer exponent {@code E} and a mantissa {@code M}.
695   * The original value is equal to {@code (M * 100^E)}. {@code E} is set to the smallest value
696   * possible without making {@code M} greater than or equal to 1.0.
697   * </p>
698   * <p>
699   * Each centimal digit of the mantissa is stored in a byte. If the value of the centimal digit is
700   * {@code X} (hence {@code X>=0} and {@code X<=99}) then the byte value will be {@code 2*X+1} for
701   * every byte of the mantissa, except for the last byte which will be {@code 2*X+0}. The mantissa
702   * must be the minimum number of bytes necessary to represent the value; trailing {@code X==0}
703   * digits are omitted. This means that the mantissa will never contain a byte with the value
704   * {@code 0x00}.
705   * </p>
706   * <p>
707   * If {@code E > 10}, then this routine writes of {@code E} as a varint followed by the mantissa
708   * as described above. Otherwise, if {@code E <= 10}, this routine only writes the mantissa and
709   * leaves the {@code E} value to be encoded as part of the opening byte of the field by the
710   * calling function.
711   *
712   * <pre>
713   *   Encoding:  M       (if E<=10)
714   *              E M     (if E>10)
715   * </pre>
716   * </p>
717   * @param dst The destination to which encoded digits are written.
718   * @param val The value to encode.
719   * @return the number of bytes written.
720   */
721  private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
722    // TODO: this can be done faster
723    BigDecimal abs = val.abs();
724    byte[] a = dst.getBytes();
725    boolean isNeg = val.signum() == -1;
726    final int start = dst.getPosition(), offset = dst.getOffset();
727    int e = 0, d, startM;
728
729    if (isNeg) { /* Large negative number: 0x08, ~E, ~M */
730      dst.put(NEG_LARGE);
731    } else { /* Large positive number: 0x22, E, M */
732      dst.put(POS_LARGE);
733    }
734
735    // normalize abs(val) to determine E
736    while (abs.compareTo(E32) >= 0 && e <= 350) {
737      abs = abs.movePointLeft(32);
738      e += 16;
739    }
740    while (abs.compareTo(E8) >= 0 && e <= 350) {
741      abs = abs.movePointLeft(8);
742      e += 4;
743    }
744    while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) {
745      abs = abs.movePointLeft(2);
746      e++;
747    }
748
749    // encode appropriate header byte and/or E value.
750    if (e > 10) { /* large number, write out {~,}E */
751      putVaruint64(dst, e, isNeg);
752    } else {
753      if (isNeg) { /* Medium negative number: 0x13-E, ~M */
754        dst.put(start, (byte) (NEG_MED_MAX - e));
755      } else { /* Medium positive number: 0x17+E, M */
756        dst.put(start, (byte) (POS_MED_MIN + e));
757      }
758    }
759
760    // encode M by peeling off centimal digits, encoding x as 2x+1
761    startM = dst.getPosition();
762    // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
763    // numeric scale.
764    for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
765      abs = abs.movePointRight(2);
766      d = abs.intValue();
767      dst.put((byte) (2 * d + 1));
768      abs = abs.subtract(BigDecimal.valueOf(d));
769    }
770    // terminal digit should be 2x
771    a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
772    if (isNeg) {
773      // negative values encoded as ~M
774      DESCENDING.apply(a, offset + startM, dst.getPosition() - startM);
775    }
776    return dst.getPosition() - start;
777  }
778
779  /**
780   * Encode a numerical value using the variable-length encoding.
781   * @param dst The destination to which encoded digits are written.
782   * @param val The value to encode.
783   * @param ord The {@link Order} to respect while encoding {@code val}.
784   * @return the number of bytes written.
785   */
786  public static int encodeNumeric(PositionedByteRange dst, long val, Order ord) {
787    return encodeNumeric(dst, BigDecimal.valueOf(val), ord);
788  }
789
790  /**
791   * Encode a numerical value using the variable-length encoding.
792   * @param dst The destination to which encoded digits are written.
793   * @param val The value to encode.
794   * @param ord The {@link Order} to respect while encoding {@code val}.
795   * @return the number of bytes written.
796   */
797  public static int encodeNumeric(PositionedByteRange dst, double val, Order ord) {
798    if (val == 0.0) {
799      dst.put(ord.apply(ZERO));
800      return 1;
801    }
802    if (Double.isNaN(val)) {
803      dst.put(ord.apply(NAN));
804      return 1;
805    }
806    if (val == Double.NEGATIVE_INFINITY) {
807      dst.put(ord.apply(NEG_INF));
808      return 1;
809    }
810    if (val == Double.POSITIVE_INFINITY) {
811      dst.put(ord.apply(POS_INF));
812      return 1;
813    }
814    return encodeNumeric(dst, BigDecimal.valueOf(val), ord);
815  }
816
817  /**
818   * Encode a numerical value using the variable-length encoding.
819   * @param dst The destination to which encoded digits are written.
820   * @param val The value to encode.
821   * @param ord The {@link Order} to respect while encoding {@code val}.
822   * @return the number of bytes written.
823   */
824  public static int encodeNumeric(PositionedByteRange dst, BigDecimal val, Order ord) {
825    final int len, offset = dst.getOffset(), start = dst.getPosition();
826    if (null == val) {
827      return encodeNull(dst, ord);
828    } else if (BigDecimal.ZERO.compareTo(val) == 0) {
829      dst.put(ord.apply(ZERO));
830      return 1;
831    }
832    BigDecimal abs = val.abs();
833    if (BigDecimal.ONE.compareTo(abs) <= 0) { // abs(v) >= 1.0
834      len = encodeNumericLarge(dst, normalize(val));
835    } else { // 1.0 > abs(v) >= 0.0
836      len = encodeNumericSmall(dst, normalize(val));
837    }
838    ord.apply(dst.getBytes(), offset + start, len);
839    return len;
840  }
841
842  /**
843   * Decode a {@link BigDecimal} from {@code src}. Assumes {@code src} encodes a value in Numeric
844   * encoding and is within the valid range of {@link BigDecimal} values. {@link BigDecimal} does
845   * not support {@code NaN} or {@code Infinte} values.
846   * @see #decodeNumericAsDouble(PositionedByteRange)
847   */
848  private static BigDecimal decodeNumericValue(PositionedByteRange src) {
849    final int e;
850    byte header = src.get();
851    boolean dsc = -1 == Integer.signum(header);
852    header = dsc ? DESCENDING.apply(header) : header;
853
854    if (header == NULL) return null;
855    if (header == NEG_LARGE) { /* Large negative number: 0x08, ~E, ~M */
856      e = (int) getVaruint64(src, !dsc);
857      return decodeSignificand(src, e, !dsc).negate();
858    }
859    if (header >= NEG_MED_MIN && header <= NEG_MED_MAX) {
860      /* Medium negative number: 0x13-E, ~M */
861      e = NEG_MED_MAX - header;
862      return decodeSignificand(src, e, !dsc).negate();
863    }
864    if (header == NEG_SMALL) { /* Small negative number: 0x14, -E, ~M */
865      e = (int) -getVaruint64(src, dsc);
866      return decodeSignificand(src, e, !dsc).negate();
867    }
868    if (header == ZERO) {
869      return BigDecimal.ZERO;
870    }
871    if (header == POS_SMALL) { /* Small positive number: 0x16, ~-E, M */
872      e = (int) -getVaruint64(src, !dsc);
873      return decodeSignificand(src, e, dsc);
874    }
875    if (header >= POS_MED_MIN && header <= POS_MED_MAX) {
876      /* Medium positive number: 0x17+E, M */
877      e = header - POS_MED_MIN;
878      return decodeSignificand(src, e, dsc);
879    }
880    if (header == POS_LARGE) { /* Large positive number: 0x22, E, M */
881      e = (int) getVaruint64(src, dsc);
882      return decodeSignificand(src, e, dsc);
883    }
884    throw unexpectedHeader(header);
885  }
886
887  /**
888   * Decode a primitive {@code double} value from the Numeric encoding. Numeric encoding is based on
889   * {@link BigDecimal}; in the event the encoded value is larger than can be represented in a
890   * {@code double}, this method performs an implicit narrowing conversion as described in
891   * {@link BigDecimal#doubleValue()}.
892   * @throws NullPointerException     when the encoded value is {@code NULL}.
893   * @throws IllegalArgumentException when the encoded value is not a Numeric.
894   * @see #encodeNumeric(PositionedByteRange, double, Order)
895   * @see BigDecimal#doubleValue()
896   */
897  public static double decodeNumericAsDouble(PositionedByteRange src) {
898    // TODO: should an encoded NULL value throw unexpectedHeader() instead?
899    if (isNull(src)) {
900      throw new NullPointerException("A null value cannot be decoded to a double.");
901    }
902    if (isNumericNaN(src)) {
903      src.get();
904      return Double.NaN;
905    }
906    if (isNumericZero(src)) {
907      src.get();
908      return Double.valueOf(0.0);
909    }
910
911    byte header = -1 == Integer.signum(src.peek()) ? DESCENDING.apply(src.peek()) : src.peek();
912
913    if (header == NEG_INF) {
914      src.get();
915      return Double.NEGATIVE_INFINITY;
916    } else if (header == POS_INF) {
917      src.get();
918      return Double.POSITIVE_INFINITY;
919    } else {
920      return decodeNumericValue(src).doubleValue();
921    }
922  }
923
924  /**
925   * Decode a primitive {@code long} value from the Numeric encoding. Numeric encoding is based on
926   * {@link BigDecimal}; in the event the encoded value is larger than can be represented in a
927   * {@code long}, this method performs an implicit narrowing conversion as described in
928   * {@link BigDecimal#doubleValue()}.
929   * @throws NullPointerException     when the encoded value is {@code NULL}.
930   * @throws IllegalArgumentException when the encoded value is not a Numeric.
931   * @see #encodeNumeric(PositionedByteRange, long, Order)
932   * @see BigDecimal#longValue()
933   */
934  public static long decodeNumericAsLong(PositionedByteRange src) {
935    // TODO: should an encoded NULL value throw unexpectedHeader() instead?
936    if (isNull(src)) throw new NullPointerException();
937    if (!isNumeric(src)) throw unexpectedHeader(src.peek());
938    if (isNumericNaN(src)) throw unexpectedHeader(src.peek());
939    if (isNumericInfinite(src)) throw unexpectedHeader(src.peek());
940
941    if (isNumericZero(src)) {
942      src.get();
943      return Long.valueOf(0);
944    }
945    return decodeNumericValue(src).longValue();
946  }
947
948  /**
949   * Decode a {@link BigDecimal} value from the variable-length encoding.
950   * @throws IllegalArgumentException when the encoded value is not a Numeric.
951   * @see #encodeNumeric(PositionedByteRange, BigDecimal, Order)
952   */
953  public static BigDecimal decodeNumericAsBigDecimal(PositionedByteRange src) {
954    if (isNull(src)) {
955      src.get();
956      return null;
957    }
958    if (!isNumeric(src)) throw unexpectedHeader(src.peek());
959    if (isNumericNaN(src)) throw unexpectedHeader(src.peek());
960    if (isNumericInfinite(src)) throw unexpectedHeader(src.peek());
961    return decodeNumericValue(src);
962  }
963
964  /**
965   * Encode a String value. String encoding is 0x00-terminated and so it does not support
966   * {@code \u0000} codepoints in the value.
967   * @param dst The destination to which the encoded value is written.
968   * @param val The value to encode.
969   * @param ord The {@link Order} to respect while encoding {@code val}.
970   * @return the number of bytes written.
971   * @throws IllegalArgumentException when {@code val} contains a {@code \u0000}.
972   */
973  public static int encodeString(PositionedByteRange dst, String val, Order ord) {
974    if (null == val) {
975      return encodeNull(dst, ord);
976    }
977    if (val.contains("\u0000"))
978      throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'");
979    final int offset = dst.getOffset(), start = dst.getPosition();
980    dst.put(TEXT);
981    // TODO: is there no way to decode into dst directly?
982    dst.put(val.getBytes(UTF8));
983    dst.put(TERM);
984    ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start);
985    return dst.getPosition() - start;
986  }
987
988  /**
989   * Decode a String value.
990   */
991  public static String decodeString(PositionedByteRange src) {
992    final byte header = src.get();
993    if (header == NULL || header == DESCENDING.apply(NULL)) return null;
994    assert header == TEXT || header == DESCENDING.apply(TEXT);
995    Order ord = header == TEXT ? ASCENDING : DESCENDING;
996    byte[] a = src.getBytes();
997    final int offset = src.getOffset(), start = src.getPosition();
998    final byte terminator = ord.apply(TERM);
999    int rawStartPos = offset + start, rawTermPos = rawStartPos;
1000    for (; a[rawTermPos] != terminator; rawTermPos++)
1001      ;
1002    src.setPosition(rawTermPos - offset + 1); // advance position to TERM + 1
1003    if (DESCENDING == ord) {
1004      // make a copy so that we don't disturb encoded value with ord.
1005      byte[] copy = new byte[rawTermPos - rawStartPos];
1006      System.arraycopy(a, rawStartPos, copy, 0, copy.length);
1007      ord.apply(copy);
1008      return new String(copy, UTF8);
1009    } else {
1010      return new String(a, rawStartPos, rawTermPos - rawStartPos, UTF8);
1011    }
1012  }
1013
1014  /**
1015   * Calculate the expected BlobVar encoded length based on unencoded length.
1016   */
1017  public static int blobVarEncodedLength(int len) {
1018    if (0 == len) return 2; // 1-byte header + 1-byte terminator
1019    else return (int) Math.ceil((len * 8) // 8-bits per input byte
1020      / 7.0) // 7-bits of input data per encoded byte, rounded up
1021      + 1; // + 1-byte header
1022  }
1023
1024  /**
1025   * Calculate the expected BlobVar decoded length based on encoded length.
1026   */
1027  @InterfaceAudience.Private
1028  static int blobVarDecodedLength(int len) {
1029    return ((len - 1) // 1-byte header
1030      * 7) // 7-bits of payload per encoded byte
1031      / 8; // 8-bits per byte
1032  }
1033
1034  /**
1035   * Encode a Blob value using a modified varint encoding scheme.
1036   * <p>
1037   * This format encodes a byte[] value such that no limitations on the input value are imposed. The
1038   * first byte encodes the encoding scheme that follows, {@link #BLOB_VAR}. Each encoded byte
1039   * thereafter consists of a header bit followed by 7 bits of payload. A header bit of '1'
1040   * indicates continuation of the encoding. A header bit of '0' indicates this byte contains the
1041   * last of the payload. An empty input value is encoded as the header byte immediately followed by
1042   * a termination byte {@code 0x00}. This is not ambiguous with the encoded value of {@code []},
1043   * which results in {@code [0x80, 0x00]}.
1044   * </p>
1045   * @return the number of bytes written.
1046   */
1047  public static int encodeBlobVar(PositionedByteRange dst, byte[] val, int voff, int vlen,
1048    Order ord) {
1049    if (null == val) {
1050      return encodeNull(dst, ord);
1051    }
1052    // Empty value is null-terminated. All other values are encoded as 7-bits per byte.
1053    assert dst.getRemaining() >= blobVarEncodedLength(vlen) : "buffer overflow expected.";
1054    final int offset = dst.getOffset(), start = dst.getPosition();
1055    dst.put(BLOB_VAR);
1056    if (0 == vlen) {
1057      dst.put(TERM);
1058    } else {
1059      byte s = 1, t = 0;
1060      for (int i = voff; i < vlen; i++) {
1061        dst.put((byte) (0x80 | t | ((val[i] & 0xff) >>> s)));
1062        if (s < 7) {
1063          t = (byte) (val[i] << (7 - s));
1064          s++;
1065        } else {
1066          dst.put((byte) (0x80 | val[i]));
1067          s = 1;
1068          t = 0;
1069        }
1070      }
1071      if (s > 1) {
1072        dst.put((byte) (0x7f & t));
1073      } else {
1074        dst.getBytes()[offset + dst.getPosition() - 1] =
1075          (byte) (dst.getBytes()[offset + dst.getPosition() - 1] & 0x7f);
1076      }
1077    }
1078    ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start);
1079    return dst.getPosition() - start;
1080  }
1081
1082  /**
1083   * Encode a blob value using a modified varint encoding scheme.
1084   * @return the number of bytes written.
1085   * @see #encodeBlobVar(PositionedByteRange, byte[], int, int, Order)
1086   */
1087  public static int encodeBlobVar(PositionedByteRange dst, byte[] val, Order ord) {
1088    return encodeBlobVar(dst, val, 0, null != val ? val.length : 0, ord);
1089  }
1090
1091  /**
1092   * Decode a blob value that was encoded using BlobVar encoding.
1093   */
1094  public static byte[] decodeBlobVar(PositionedByteRange src) {
1095    final byte header = src.get();
1096    if (header == NULL || header == DESCENDING.apply(NULL)) {
1097      return null;
1098    }
1099    assert header == BLOB_VAR || header == DESCENDING.apply(BLOB_VAR);
1100    Order ord = BLOB_VAR == header ? ASCENDING : DESCENDING;
1101    if (src.peek() == ord.apply(TERM)) {
1102      // skip empty input buffer.
1103      src.get();
1104      return new byte[0];
1105    }
1106    final int offset = src.getOffset(), start = src.getPosition();
1107    int end;
1108    byte[] a = src.getBytes();
1109    for (end = start; (byte) (ord.apply(a[offset + end]) & 0x80) != TERM; end++)
1110      ;
1111    end++; // increment end to 1-past last byte
1112    // create ret buffer using length of encoded data + 1 (header byte)
1113    PositionedByteRange ret =
1114      new SimplePositionedMutableByteRange(blobVarDecodedLength(end - start + 1));
1115    int s = 6;
1116    byte t = (byte) ((ord.apply(a[offset + start]) << 1) & 0xff);
1117    for (int i = start + 1; i < end; i++) {
1118      if (s == 7) {
1119        ret.put((byte) (t | (ord.apply(a[offset + i]) & 0x7f)));
1120        i++;
1121        // explicitly reset t -- clean up overflow buffer after decoding
1122        // a full cycle and retain assertion condition below. This happens
1123        t = 0; // when the LSB in the last encoded byte is 1. (HBASE-9893)
1124      } else {
1125        ret.put((byte) (t | ((ord.apply(a[offset + i]) & 0x7f) >>> s)));
1126      }
1127      if (i == end) break;
1128      t = (byte) ((ord.apply(a[offset + i]) << (8 - s)) & 0xff);
1129      s = s == 1 ? 7 : s - 1;
1130    }
1131    src.setPosition(end);
1132    assert t == 0 : "Unexpected bits remaining after decoding blob.";
1133    assert ret.getPosition() == ret.getLength() : "Allocated unnecessarily large return buffer.";
1134    return ret.getBytes();
1135  }
1136
1137  /**
1138   * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in DESCENDING order is NULL
1139   * terminated so as to preserve proper sorting of {@code []} and so it does not support
1140   * {@code 0x00} in the value.
1141   * @return the number of bytes written.
1142   * @throws IllegalArgumentException when {@code ord} is DESCENDING and {@code val} contains a
1143   *                                  {@code 0x00} byte.
1144   */
1145  public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, int voff, int vlen,
1146    Order ord) {
1147    if (null == val) {
1148      encodeNull(dst, ord);
1149      if (ASCENDING == ord) return 1;
1150      else {
1151        // DESCENDING ordered BlobCopy requires a termination bit to preserve
1152        // sort-order semantics of null values.
1153        dst.put(ord.apply(TERM));
1154        return 2;
1155      }
1156    }
1157    // Blobs as final entry in a compound key are written unencoded.
1158    assert dst.getRemaining() >= vlen + (ASCENDING == ord ? 1 : 2);
1159    if (DESCENDING == ord) {
1160      for (int i = 0; i < vlen; i++) {
1161        if (TERM == val[voff + i]) {
1162          throw new IllegalArgumentException("0x00 bytes not permitted in value.");
1163        }
1164      }
1165    }
1166    final int offset = dst.getOffset(), start = dst.getPosition();
1167    dst.put(BLOB_COPY);
1168    dst.put(val, voff, vlen);
1169    // DESCENDING ordered BlobCopy requires a termination bit to preserve
1170    // sort-order semantics of null values.
1171    if (DESCENDING == ord) dst.put(TERM);
1172    ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start);
1173    return dst.getPosition() - start;
1174  }
1175
1176  /**
1177   * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in DESCENDING order is NULL
1178   * terminated so as to preserve proper sorting of {@code []} and so it does not support
1179   * {@code 0x00} in the value.
1180   * @return the number of bytes written.
1181   * @throws IllegalArgumentException when {@code ord} is DESCENDING and {@code val} contains a
1182   *                                  {@code 0x00} byte.
1183   * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order)
1184   */
1185  public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, Order ord) {
1186    return encodeBlobCopy(dst, val, 0, null != val ? val.length : 0, ord);
1187  }
1188
1189  /**
1190   * Decode a Blob value, byte-for-byte copy.
1191   * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order)
1192   */
1193  public static byte[] decodeBlobCopy(PositionedByteRange src) {
1194    byte header = src.get();
1195    if (header == NULL || header == DESCENDING.apply(NULL)) {
1196      return null;
1197    }
1198    assert header == BLOB_COPY || header == DESCENDING.apply(BLOB_COPY);
1199    Order ord = header == BLOB_COPY ? ASCENDING : DESCENDING;
1200    final int length = src.getRemaining() - (ASCENDING == ord ? 0 : 1);
1201    byte[] ret = new byte[length];
1202    src.get(ret);
1203    ord.apply(ret, 0, ret.length);
1204    // DESCENDING ordered BlobCopy requires a termination bit to preserve
1205    // sort-order semantics of null values.
1206    if (DESCENDING == ord) src.get();
1207    return ret;
1208  }
1209
1210  /**
1211   * Encode a null value.
1212   * @param dst The destination to which encoded digits are written.
1213   * @param ord The {@link Order} to respect while encoding {@code val}.
1214   * @return the number of bytes written.
1215   */
1216  public static int encodeNull(PositionedByteRange dst, Order ord) {
1217    dst.put(ord.apply(NULL));
1218    return 1;
1219  }
1220
1221  /**
1222   * Encode an {@code int8} value using the fixed-length encoding.
1223   * @return the number of bytes written.
1224   * @see #encodeInt64(PositionedByteRange, long, Order)
1225   * @see #decodeInt8(PositionedByteRange)
1226   */
1227  public static int encodeInt8(PositionedByteRange dst, byte val, Order ord) {
1228    final int offset = dst.getOffset(), start = dst.getPosition();
1229    dst.put(FIXED_INT8).put((byte) (val ^ 0x80));
1230    ord.apply(dst.getBytes(), offset + start, 2);
1231    return 2;
1232  }
1233
1234  /**
1235   * Decode an {@code int8} value.
1236   * @see #encodeInt8(PositionedByteRange, byte, Order)
1237   */
1238  public static byte decodeInt8(PositionedByteRange src) {
1239    final byte header = src.get();
1240    assert header == FIXED_INT8 || header == DESCENDING.apply(FIXED_INT8);
1241    Order ord = header == FIXED_INT8 ? ASCENDING : DESCENDING;
1242    return (byte) ((ord.apply(src.get()) ^ 0x80) & 0xff);
1243  }
1244
1245  /**
1246   * Encode an {@code int16} value using the fixed-length encoding.
1247   * @return the number of bytes written.
1248   * @see #encodeInt64(PositionedByteRange, long, Order)
1249   * @see #decodeInt16(PositionedByteRange)
1250   */
1251  public static int encodeInt16(PositionedByteRange dst, short val, Order ord) {
1252    final int offset = dst.getOffset(), start = dst.getPosition();
1253    dst.put(FIXED_INT16).put((byte) ((val >> 8) ^ 0x80)).put((byte) val);
1254    ord.apply(dst.getBytes(), offset + start, 3);
1255    return 3;
1256  }
1257
1258  /**
1259   * Decode an {@code int16} value.
1260   * @see #encodeInt16(PositionedByteRange, short, Order)
1261   */
1262  public static short decodeInt16(PositionedByteRange src) {
1263    final byte header = src.get();
1264    assert header == FIXED_INT16 || header == DESCENDING.apply(FIXED_INT16);
1265    Order ord = header == FIXED_INT16 ? ASCENDING : DESCENDING;
1266    short val = (short) ((ord.apply(src.get()) ^ 0x80) & 0xff);
1267    val = (short) ((val << 8) + (ord.apply(src.get()) & 0xff));
1268    return val;
1269  }
1270
1271  /**
1272   * Encode an {@code int32} value using the fixed-length encoding.
1273   * @return the number of bytes written.
1274   * @see #encodeInt64(PositionedByteRange, long, Order)
1275   * @see #decodeInt32(PositionedByteRange)
1276   */
1277  public static int encodeInt32(PositionedByteRange dst, int val, Order ord) {
1278    final int offset = dst.getOffset(), start = dst.getPosition();
1279    dst.put(FIXED_INT32).put((byte) ((val >> 24) ^ 0x80)).put((byte) (val >> 16))
1280      .put((byte) (val >> 8)).put((byte) val);
1281    ord.apply(dst.getBytes(), offset + start, 5);
1282    return 5;
1283  }
1284
1285  /**
1286   * Decode an {@code int32} value.
1287   * @see #encodeInt32(PositionedByteRange, int, Order)
1288   */
1289  public static int decodeInt32(PositionedByteRange src) {
1290    final byte header = src.get();
1291    assert header == FIXED_INT32 || header == DESCENDING.apply(FIXED_INT32);
1292    Order ord = header == FIXED_INT32 ? ASCENDING : DESCENDING;
1293    int val = (ord.apply(src.get()) ^ 0x80) & 0xff;
1294    for (int i = 1; i < 4; i++) {
1295      val = (val << 8) + (ord.apply(src.get()) & 0xff);
1296    }
1297    return val;
1298  }
1299
1300  /**
1301   * Encode an {@code int64} value using the fixed-length encoding.
1302   * <p>
1303   * This format ensures that all longs sort in their natural order, as they would sort when using
1304   * signed long comparison.
1305   * </p>
1306   * <p>
1307   * All Longs are serialized to an 8-byte, fixed-width sortable byte format. Serialization is
1308   * performed by inverting the integer sign bit and writing the resulting bytes to the byte array
1309   * in big endian order. The encoded value is prefixed by the {@link #FIXED_INT64} header byte.
1310   * This encoding is designed to handle java language primitives and so Null values are NOT
1311   * supported by this implementation.
1312   * </p>
1313   * <p>
1314   * For example:
1315   * </p>
1316   *
1317   * <pre>
1318   * Input:   0x0000000000000005 (5)
1319   * Result:  0x288000000000000005
1320   *
1321   * Input:   0xfffffffffffffffb (-4)
1322   * Result:  0x280000000000000004
1323   *
1324   * Input:   0x7fffffffffffffff (Long.MAX_VALUE)
1325   * Result:  0x28ffffffffffffffff
1326   *
1327   * Input:   0x8000000000000000 (Long.MIN_VALUE)
1328   * Result:  0x287fffffffffffffff
1329   * </pre>
1330   * <p>
1331   * This encoding format, and much of this documentation string, is based on Orderly's
1332   * {@code FixedIntWritableRowKey}.
1333   * </p>
1334   * @return the number of bytes written.
1335   * @see #decodeInt64(PositionedByteRange)
1336   */
1337  public static int encodeInt64(PositionedByteRange dst, long val, Order ord) {
1338    final int offset = dst.getOffset(), start = dst.getPosition();
1339    dst.put(FIXED_INT64).put((byte) ((val >> 56) ^ 0x80)).put((byte) (val >> 48))
1340      .put((byte) (val >> 40)).put((byte) (val >> 32)).put((byte) (val >> 24))
1341      .put((byte) (val >> 16)).put((byte) (val >> 8)).put((byte) val);
1342    ord.apply(dst.getBytes(), offset + start, 9);
1343    return 9;
1344  }
1345
1346  /**
1347   * Decode an {@code int64} value.
1348   * @see #encodeInt64(PositionedByteRange, long, Order)
1349   */
1350  public static long decodeInt64(PositionedByteRange src) {
1351    final byte header = src.get();
1352    assert header == FIXED_INT64 || header == DESCENDING.apply(FIXED_INT64);
1353    Order ord = header == FIXED_INT64 ? ASCENDING : DESCENDING;
1354    long val = (ord.apply(src.get()) ^ 0x80) & 0xff;
1355    for (int i = 1; i < 8; i++) {
1356      val = (val << 8) + (ord.apply(src.get()) & 0xff);
1357    }
1358    return val;
1359  }
1360
1361  /**
1362   * Encode a 32-bit floating point value using the fixed-length encoding. Encoding format is
1363   * described at length in {@link #encodeFloat64(PositionedByteRange, double, Order)}.
1364   * @return the number of bytes written.
1365   * @see #decodeFloat32(PositionedByteRange)
1366   * @see #encodeFloat64(PositionedByteRange, double, Order)
1367   */
1368  public static int encodeFloat32(PositionedByteRange dst, float val, Order ord) {
1369    final int offset = dst.getOffset(), start = dst.getPosition();
1370    int i = Float.floatToIntBits(val);
1371    i ^= ((i >> (Integer.SIZE - 1)) | Integer.MIN_VALUE);
1372    dst.put(FIXED_FLOAT32).put((byte) (i >> 24)).put((byte) (i >> 16)).put((byte) (i >> 8))
1373      .put((byte) i);
1374    ord.apply(dst.getBytes(), offset + start, 5);
1375    return 5;
1376  }
1377
1378  /**
1379   * Decode a 32-bit floating point value using the fixed-length encoding.
1380   * @see #encodeFloat32(PositionedByteRange, float, Order)
1381   */
1382  public static float decodeFloat32(PositionedByteRange src) {
1383    final byte header = src.get();
1384    assert header == FIXED_FLOAT32 || header == DESCENDING.apply(FIXED_FLOAT32);
1385    Order ord = header == FIXED_FLOAT32 ? ASCENDING : DESCENDING;
1386    int val = ord.apply(src.get()) & 0xff;
1387    for (int i = 1; i < 4; i++) {
1388      val = (val << 8) + (ord.apply(src.get()) & 0xff);
1389    }
1390    val ^= (~val >> (Integer.SIZE - 1)) | Integer.MIN_VALUE;
1391    return Float.intBitsToFloat(val);
1392  }
1393
1394  /**
1395   * Encode a 64-bit floating point value using the fixed-length encoding.
1396   * <p>
1397   * This format ensures the following total ordering of floating point values:
1398   * Double.NEGATIVE_INFINITY &lt; -Double.MAX_VALUE &lt; ... &lt; -Double.MIN_VALUE &lt; -0.0 &lt;
1399   * +0.0; &lt; Double.MIN_VALUE &lt; ... &lt; Double.MAX_VALUE &lt; Double.POSITIVE_INFINITY &lt;
1400   * Double.NaN
1401   * </p>
1402   * <p>
1403   * Floating point numbers are encoded as specified in IEEE 754. A 64-bit double precision float
1404   * consists of a sign bit, 11-bit unsigned exponent encoded in offset-1023 notation, and a 52-bit
1405   * significand. The format is described further in the
1406   * <a href="http://en.wikipedia.org/wiki/Double_precision"> Double Precision Floating Point
1407   * Wikipedia page</a>
1408   * </p>
1409   * <p>
1410   * The value of a normal float is -1 <sup>sign bit</sup> &times; 2<sup>exponent - 1023</sup>
1411   * &times; 1.significand
1412   * </p>
1413   * <p>
1414   * The IEE754 floating point format already preserves sort ordering for positive floating point
1415   * numbers when the raw bytes are compared in most significant byte order. This is discussed
1416   * further at
1417   * <a href= "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm" >
1418   * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. htm</a>
1419   * </p>
1420   * <p>
1421   * Thus, we need only ensure that negative numbers sort in the the exact opposite order as
1422   * positive numbers (so that say, negative infinity is less than negative 1), and that all
1423   * negative numbers compare less than any positive number. To accomplish this, we invert the sign
1424   * bit of all floating point numbers, and we also invert the exponent and significand bits if the
1425   * floating point number was negative.
1426   * </p>
1427   * <p>
1428   * More specifically, we first store the floating point bits into a 64-bit long {@code l} using
1429   * {@link Double#doubleToLongBits}. This method collapses all NaNs into a single, canonical NaN
1430   * value but otherwise leaves the bits unchanged. We then compute
1431   * </p>
1432   *
1433   * <pre>
1434   * l &circ;= (l &gt;&gt; (Long.SIZE - 1)) | Long.MIN_SIZE
1435   * </pre>
1436   * <p>
1437   * which inverts the sign bit and XOR's all other bits with the sign bit itself. Comparing the raw
1438   * bytes of {@code l} in most significant byte order is equivalent to performing a double
1439   * precision floating point comparison on the underlying bits (ignoring NaN comparisons, as NaNs
1440   * don't compare equal to anything when performing floating point comparisons).
1441   * </p>
1442   * <p>
1443   * The resulting long integer is then converted into a byte array by serializing the long one byte
1444   * at a time in most significant byte order. The serialized integer is prefixed by a single header
1445   * byte. All serialized values are 9 bytes in length.
1446   * </p>
1447   * <p>
1448   * This encoding format, and much of this highly detailed documentation string, is based on
1449   * Orderly's {@code DoubleWritableRowKey}.
1450   * </p>
1451   * @return the number of bytes written.
1452   * @see #decodeFloat64(PositionedByteRange)
1453   */
1454  public static int encodeFloat64(PositionedByteRange dst, double val, Order ord) {
1455    final int offset = dst.getOffset(), start = dst.getPosition();
1456    long lng = Double.doubleToLongBits(val);
1457    lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE);
1458    dst.put(FIXED_FLOAT64).put((byte) (lng >> 56)).put((byte) (lng >> 48)).put((byte) (lng >> 40))
1459      .put((byte) (lng >> 32)).put((byte) (lng >> 24)).put((byte) (lng >> 16))
1460      .put((byte) (lng >> 8)).put((byte) lng);
1461    ord.apply(dst.getBytes(), offset + start, 9);
1462    return 9;
1463  }
1464
1465  /**
1466   * Decode a 64-bit floating point value using the fixed-length encoding.
1467   * @see #encodeFloat64(PositionedByteRange, double, Order)
1468   */
1469  public static double decodeFloat64(PositionedByteRange src) {
1470    final byte header = src.get();
1471    assert header == FIXED_FLOAT64 || header == DESCENDING.apply(FIXED_FLOAT64);
1472    Order ord = header == FIXED_FLOAT64 ? ASCENDING : DESCENDING;
1473    long val = ord.apply(src.get()) & 0xff;
1474    for (int i = 1; i < 8; i++) {
1475      val = (val << 8) + (ord.apply(src.get()) & 0xff);
1476    }
1477    val ^= (~val >> (Long.SIZE - 1)) | Long.MIN_VALUE;
1478    return Double.longBitsToDouble(val);
1479  }
1480
1481  /**
1482   * Returns true when {@code src} appears to be positioned an encoded value, false otherwise.
1483   */
1484  public static boolean isEncodedValue(PositionedByteRange src) {
1485    return isNull(src) || isNumeric(src) || isFixedInt8(src) || isFixedInt16(src)
1486      || isFixedInt32(src) || isFixedInt64(src) || isFixedFloat32(src) || isFixedFloat64(src)
1487      || isText(src) || isBlobCopy(src) || isBlobVar(src);
1488  }
1489
1490  /**
1491   * Return true when the next encoded value in {@code src} is null, false otherwise.
1492   */
1493  public static boolean isNull(PositionedByteRange src) {
1494    return NULL == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1495  }
1496
1497  /**
1498   * Return true when the next encoded value in {@code src} uses Numeric encoding, false otherwise.
1499   * {@code NaN}, {@code +/-Inf} are valid Numeric values.
1500   */
1501  public static boolean isNumeric(PositionedByteRange src) {
1502    byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1503    return x >= NEG_INF && x <= NAN;
1504  }
1505
1506  /**
1507   * Return true when the next encoded value in {@code src} uses Numeric encoding and is
1508   * {@code Infinite}, false otherwise.
1509   */
1510  public static boolean isNumericInfinite(PositionedByteRange src) {
1511    byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1512    return NEG_INF == x || POS_INF == x;
1513  }
1514
1515  /**
1516   * Return true when the next encoded value in {@code src} uses Numeric encoding and is
1517   * {@code NaN}, false otherwise.
1518   */
1519  public static boolean isNumericNaN(PositionedByteRange src) {
1520    return NAN == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1521  }
1522
1523  /**
1524   * Return true when the next encoded value in {@code src} uses Numeric encoding and is {@code 0},
1525   * false otherwise.
1526   */
1527  public static boolean isNumericZero(PositionedByteRange src) {
1528    return ZERO == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1529  }
1530
1531  /**
1532   * Return true when the next encoded value in {@code src} uses fixed-width Int8 encoding, false
1533   * otherwise.
1534   */
1535  public static boolean isFixedInt8(PositionedByteRange src) {
1536    return FIXED_INT8
1537        == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1538  }
1539
1540  /**
1541   * Return true when the next encoded value in {@code src} uses fixed-width Int16 encoding, false
1542   * otherwise.
1543   */
1544  public static boolean isFixedInt16(PositionedByteRange src) {
1545    return FIXED_INT16
1546        == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1547  }
1548
1549  /**
1550   * Return true when the next encoded value in {@code src} uses fixed-width Int32 encoding, false
1551   * otherwise.
1552   */
1553  public static boolean isFixedInt32(PositionedByteRange src) {
1554    return FIXED_INT32
1555        == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1556  }
1557
1558  /**
1559   * Return true when the next encoded value in {@code src} uses fixed-width Int64 encoding, false
1560   * otherwise.
1561   */
1562  public static boolean isFixedInt64(PositionedByteRange src) {
1563    return FIXED_INT64
1564        == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1565  }
1566
1567  /**
1568   * Return true when the next encoded value in {@code src} uses fixed-width Float32 encoding, false
1569   * otherwise.
1570   */
1571  public static boolean isFixedFloat32(PositionedByteRange src) {
1572    return FIXED_FLOAT32
1573        == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1574  }
1575
1576  /**
1577   * Return true when the next encoded value in {@code src} uses fixed-width Float64 encoding, false
1578   * otherwise.
1579   */
1580  public static boolean isFixedFloat64(PositionedByteRange src) {
1581    return FIXED_FLOAT64
1582        == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1583  }
1584
1585  /**
1586   * Return true when the next encoded value in {@code src} uses Text encoding, false otherwise.
1587   */
1588  public static boolean isText(PositionedByteRange src) {
1589    return TEXT == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1590  }
1591
1592  /**
1593   * Return true when the next encoded value in {@code src} uses BlobVar encoding, false otherwise.
1594   */
1595  public static boolean isBlobVar(PositionedByteRange src) {
1596    return BLOB_VAR
1597        == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1598  }
1599
1600  /**
1601   * Return true when the next encoded value in {@code src} uses BlobCopy encoding, false otherwise.
1602   */
1603  public static boolean isBlobCopy(PositionedByteRange src) {
1604    return BLOB_COPY
1605        == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1606  }
1607
1608  /**
1609   * Skip {@code buff}'s position forward over one encoded value.
1610   * @return number of bytes skipped.
1611   */
1612  public static int skip(PositionedByteRange src) {
1613    final int start = src.getPosition();
1614    byte header = src.get();
1615    Order ord = (-1 == Integer.signum(header)) ? DESCENDING : ASCENDING;
1616    header = ord.apply(header);
1617
1618    switch (header) {
1619      case NULL:
1620      case NEG_INF:
1621        return 1;
1622      case NEG_LARGE: /* Large negative number: 0x08, ~E, ~M */
1623        skipVaruint64(src, DESCENDING != ord);
1624        skipSignificand(src, DESCENDING != ord);
1625        return src.getPosition() - start;
1626      case NEG_MED_MIN: /* Medium negative number: 0x13-E, ~M */
1627      case NEG_MED_MIN + 0x01:
1628      case NEG_MED_MIN + 0x02:
1629      case NEG_MED_MIN + 0x03:
1630      case NEG_MED_MIN + 0x04:
1631      case NEG_MED_MIN + 0x05:
1632      case NEG_MED_MIN + 0x06:
1633      case NEG_MED_MIN + 0x07:
1634      case NEG_MED_MIN + 0x08:
1635      case NEG_MED_MIN + 0x09:
1636      case NEG_MED_MAX:
1637        skipSignificand(src, DESCENDING != ord);
1638        return src.getPosition() - start;
1639      case NEG_SMALL: /* Small negative number: 0x14, -E, ~M */
1640        skipVaruint64(src, DESCENDING == ord);
1641        skipSignificand(src, DESCENDING != ord);
1642        return src.getPosition() - start;
1643      case ZERO:
1644        return 1;
1645      case POS_SMALL: /* Small positive number: 0x16, ~-E, M */
1646        skipVaruint64(src, DESCENDING != ord);
1647        skipSignificand(src, DESCENDING == ord);
1648        return src.getPosition() - start;
1649      case POS_MED_MIN: /* Medium positive number: 0x17+E, M */
1650      case POS_MED_MIN + 0x01:
1651      case POS_MED_MIN + 0x02:
1652      case POS_MED_MIN + 0x03:
1653      case POS_MED_MIN + 0x04:
1654      case POS_MED_MIN + 0x05:
1655      case POS_MED_MIN + 0x06:
1656      case POS_MED_MIN + 0x07:
1657      case POS_MED_MIN + 0x08:
1658      case POS_MED_MIN + 0x09:
1659      case POS_MED_MAX:
1660        skipSignificand(src, DESCENDING == ord);
1661        return src.getPosition() - start;
1662      case POS_LARGE: /* Large positive number: 0x22, E, M */
1663        skipVaruint64(src, DESCENDING == ord);
1664        skipSignificand(src, DESCENDING == ord);
1665        return src.getPosition() - start;
1666      case POS_INF:
1667        return 1;
1668      case NAN:
1669        return 1;
1670      case FIXED_INT8:
1671        src.setPosition(src.getPosition() + 1);
1672        return src.getPosition() - start;
1673      case FIXED_INT16:
1674        src.setPosition(src.getPosition() + 2);
1675        return src.getPosition() - start;
1676      case FIXED_INT32:
1677        src.setPosition(src.getPosition() + 4);
1678        return src.getPosition() - start;
1679      case FIXED_INT64:
1680        src.setPosition(src.getPosition() + 8);
1681        return src.getPosition() - start;
1682      case FIXED_FLOAT32:
1683        src.setPosition(src.getPosition() + 4);
1684        return src.getPosition() - start;
1685      case FIXED_FLOAT64:
1686        src.setPosition(src.getPosition() + 8);
1687        return src.getPosition() - start;
1688      case TEXT:
1689        // for null-terminated values, skip to the end.
1690        do {
1691          header = ord.apply(src.get());
1692        } while (header != TERM);
1693        return src.getPosition() - start;
1694      case BLOB_VAR:
1695        // read until we find a 0 in the MSB
1696        do {
1697          header = ord.apply(src.get());
1698        } while ((byte) (header & 0x80) != TERM);
1699        return src.getPosition() - start;
1700      case BLOB_COPY:
1701        if (Order.DESCENDING == ord) {
1702          // if descending, read to termination byte.
1703          do {
1704            header = ord.apply(src.get());
1705          } while (header != TERM);
1706          return src.getPosition() - start;
1707        } else {
1708          // otherwise, just skip to the end.
1709          src.setPosition(src.getLength());
1710          return src.getPosition() - start;
1711        }
1712      default:
1713        throw unexpectedHeader(header);
1714    }
1715  }
1716
1717  /**
1718   * Return the number of encoded entries remaining in {@code buff}. The state of {@code buff} is
1719   * not modified through use of this method.
1720   */
1721  public static int length(PositionedByteRange buff) {
1722    PositionedByteRange b =
1723      new SimplePositionedMutableByteRange(buff.getBytes(), buff.getOffset(), buff.getLength());
1724    b.setPosition(buff.getPosition());
1725    int cnt = 0;
1726    for (; isEncodedValue(b); skip(b), cnt++)
1727      ;
1728    return cnt;
1729  }
1730}