001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import static org.apache.hadoop.hbase.util.Order.ASCENDING; 021import static org.apache.hadoop.hbase.util.Order.DESCENDING; 022 023import java.math.BigDecimal; 024import java.math.BigInteger; 025import java.math.MathContext; 026import java.math.RoundingMode; 027import java.nio.charset.Charset; 028import org.apache.yetus.audience.InterfaceAudience; 029 030/** 031 * Utility class that handles ordered byte arrays. That is, unlike {@link Bytes}, these methods 032 * produce byte arrays which maintain the sort order of the original values. 033 * <h3>Encoding Format summary</h3> 034 * <p> 035 * Each value is encoded as one or more bytes. The first byte of the encoding, its meaning, and a 036 * terse description of the bytes that follow is given by the following table: 037 * </p> 038 * <table summary="Encodings"> 039 * <tr> 040 * <th>Content Type</th> 041 * <th>Encoding</th> 042 * </tr> 043 * <tr> 044 * <td>NULL</td> 045 * <td>0x05</td> 046 * </tr> 047 * <tr> 048 * <td>negative infinity</td> 049 * <td>0x07</td> 050 * </tr> 051 * <tr> 052 * <td>negative large</td> 053 * <td>0x08, ~E, ~M</td> 054 * </tr> 055 * <tr> 056 * <td>negative medium</td> 057 * <td>0x13-E, ~M</td> 058 * </tr> 059 * <tr> 060 * <td>negative small</td> 061 * <td>0x14, -E, ~M</td> 062 * </tr> 063 * <tr> 064 * <td>zero</td> 065 * <td>0x15</td> 066 * </tr> 067 * <tr> 068 * <td>positive small</td> 069 * <td>0x16, ~-E, M</td> 070 * </tr> 071 * <tr> 072 * <td>positive medium</td> 073 * <td>0x17+E, M</td> 074 * </tr> 075 * <tr> 076 * <td>positive large</td> 077 * <td>0x22, E, M</td> 078 * </tr> 079 * <tr> 080 * <td>positive infinity</td> 081 * <td>0x23</td> 082 * </tr> 083 * <tr> 084 * <td>NaN</td> 085 * <td>0x25</td> 086 * </tr> 087 * <tr> 088 * <td>fixed-length 32-bit integer</td> 089 * <td>0x27, I</td> 090 * </tr> 091 * <tr> 092 * <td>fixed-length 64-bit integer</td> 093 * <td>0x28, I</td> 094 * </tr> 095 * <tr> 096 * <td>fixed-length 8-bit integer</td> 097 * <td>0x29</td> 098 * </tr> 099 * <tr> 100 * <td>fixed-length 16-bit integer</td> 101 * <td>0x2a</td> 102 * </tr> 103 * <tr> 104 * <td>fixed-length 32-bit float</td> 105 * <td>0x30, F</td> 106 * </tr> 107 * <tr> 108 * <td>fixed-length 64-bit float</td> 109 * <td>0x31, F</td> 110 * </tr> 111 * <tr> 112 * <td>TEXT</td> 113 * <td>0x33, T</td> 114 * </tr> 115 * <tr> 116 * <td>variable length BLOB</td> 117 * <td>0x35, B</td> 118 * </tr> 119 * <tr> 120 * <td>byte-for-byte BLOB</td> 121 * <td>0x36, X</td> 122 * </tr> 123 * </table> 124 * <h3>Null Encoding</h3> 125 * <p> 126 * Each value that is a NULL encodes as a single byte of 0x05. Since every other value encoding 127 * begins with a byte greater than 0x05, this forces NULL values to sort first. 128 * </p> 129 * <h3>Text Encoding</h3> 130 * <p> 131 * Each text value begins with a single byte of 0x33 and ends with a single byte of 0x00. There are 132 * zero or more intervening bytes that encode the text value. The intervening bytes are chosen so 133 * that the encoding will sort in the desired collating order. The intervening bytes may not contain 134 * a 0x00 character; the only 0x00 byte allowed in a text encoding is the final byte. 135 * </p> 136 * <p> 137 * The text encoding ends in 0x00 in order to ensure that when there are two strings where one is a 138 * prefix of the other that the shorter string will sort first. 139 * </p> 140 * <h3>Binary Encoding</h3> 141 * <p> 142 * There are two encoding strategies for binary fields, referred to as "BlobVar" and "BlobCopy". 143 * BlobVar is less efficient in both space and encoding time. It has no limitations on the range of 144 * encoded values. BlobCopy is a byte-for-byte copy of the input data followed by a termination 145 * byte. It is extremely fast to encode and decode. It carries the restriction of not allowing a 146 * 0x00 value in the input byte[] as this value is used as the termination byte. 147 * </p> 148 * <h4>BlobVar</h4> 149 * <p> 150 * "BlobVar" encodes the input byte[] in a manner similar to a variable length integer encoding. As 151 * with the other {@code OrderedBytes} encodings, the first encoded byte is used to indicate what 152 * kind of value follows. This header byte is 0x37 for BlobVar encoded values. As with the 153 * traditional varint encoding, the most significant bit of each subsequent encoded {@code byte} is 154 * used as a continuation marker. The 7 remaining bits contain the 7 most significant bits of the 155 * first unencoded byte. The next encoded byte starts with a continuation marker in the MSB. The 156 * least significant bit from the first unencoded byte follows, and the remaining 6 bits contain the 157 * 6 MSBs of the second unencoded byte. The encoding continues, encoding 7 bytes on to 8 encoded 158 * bytes. The MSB of the final encoded byte contains a termination marker rather than a continuation 159 * marker, and any remaining bits from the final input byte. Any trailing bits in the final encoded 160 * byte are zeros. 161 * </p> 162 * <h4>BlobCopy</h4> 163 * <p> 164 * "BlobCopy" is a simple byte-for-byte copy of the input data. It uses 0x38 as the header byte, and 165 * is terminated by 0x00 in the DESCENDING case. This alternative encoding is faster and more 166 * space-efficient, but it cannot accept values containing a 0x00 byte in DESCENDING order. 167 * </p> 168 * <h3>Variable-length Numeric Encoding</h3> 169 * <p> 170 * Numeric values must be coded so as to sort in numeric order. We assume that numeric values can be 171 * both integer and floating point values. Clients must be careful to use inspection methods for 172 * encoded values (such as {@link #isNumericInfinite(PositionedByteRange)} and 173 * {@link #isNumericNaN(PositionedByteRange)} to protect against decoding values into object which 174 * do not support these numeric concepts (such as {@link Long} and {@link BigDecimal}). 175 * </p> 176 * <p> 177 * Simplest cases first: If the numeric value is a NaN, then the encoding is a single byte of 0x25. 178 * This causes NaN values to sort after every other numeric value. 179 * </p> 180 * <p> 181 * If the numeric value is a negative infinity then the encoding is a single byte of 0x07. Since 182 * every other numeric value except NaN has a larger initial byte, this encoding ensures that 183 * negative infinity will sort prior to every other numeric value other than NaN. 184 * </p> 185 * <p> 186 * If the numeric value is a positive infinity then the encoding is a single byte of 0x23. Every 187 * other numeric value encoding begins with a smaller byte, ensuring that positive infinity always 188 * sorts last among numeric values. 0x23 is also smaller than 0x33, the initial byte of a text 189 * value, ensuring that every numeric value sorts before every text value. 190 * </p> 191 * <p> 192 * If the numeric value is exactly zero then it is encoded as a single byte of 0x15. Finite negative 193 * values will have initial bytes of 0x08 through 0x14 and finite positive values will have initial 194 * bytes of 0x16 through 0x22. 195 * </p> 196 * <p> 197 * For all numeric values, we compute a mantissa M and an exponent E. The mantissa is a base-100 198 * representation of the value. The exponent E determines where to put the decimal point. 199 * </p> 200 * <p> 201 * Each centimal digit of the mantissa is stored in a byte. If the value of the centimal digit is X 202 * (hence X≥0 and X≤99) then the byte value will be 2*X+1 for every byte of the mantissa, 203 * except for the last byte which will be 2*X+0. The mantissa must be the minimum number of bytes 204 * necessary to represent the value; trailing X==0 digits are omitted. This means that the mantissa 205 * will never contain a byte with the value 0x00. 206 * </p> 207 * <p> 208 * If we assume all digits of the mantissa occur to the right of the decimal point, then the 209 * exponent E is the power of one hundred by which one must multiply the mantissa to recover the 210 * original value. 211 * </p> 212 * <p> 213 * Values are classified as large, medium, or small according to the value of E. If E is 11 or more, 214 * the value is large. For E between 0 and 10, the value is medium. For E less than zero, the value 215 * is small. 216 * </p> 217 * <p> 218 * Large positive values are encoded as a single byte 0x22 followed by E as a varint and then M. 219 * Medium positive values are a single byte of 0x17+E followed by M. Small positive values are 220 * encoded as a single byte 0x16 followed by the ones-complement of the varint for -E followed by M. 221 * </p> 222 * <p> 223 * Small negative values are encoded as a single byte 0x14 followed by -E as a varint and then the 224 * ones-complement of M. Medium negative values are encoded as a byte 0x13-E followed by the 225 * ones-complement of M. Large negative values consist of the single byte 0x08 followed by the 226 * ones-complement of the varint encoding of E followed by the ones-complement of M. 227 * </p> 228 * <h3>Fixed-length Integer Encoding</h3> 229 * <p> 230 * All 4-byte integers are serialized to a 5-byte, fixed-width, sortable byte format. All 8-byte 231 * integers are serialized to the equivelant 9-byte format. Serialization is performed by writing a 232 * header byte, inverting the integer sign bit and writing the resulting bytes to the byte array in 233 * big endian order. 234 * </p> 235 * <h3>Fixed-length Floating Point Encoding</h3> 236 * <p> 237 * 32-bit and 64-bit floating point numbers are encoded to a 5-byte and 9-byte encoding format, 238 * respectively. The format is identical, save for the precision respected in each step of the 239 * operation. 240 * <p> 241 * This format ensures the following total ordering of floating point values: 242 * Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ... < -Float.MIN_VALUE < -0.0 < +0.0; 243 * < Float.MIN_VALUE < ... < Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN 244 * </p> 245 * <p> 246 * Floating point numbers are encoded as specified in IEEE 754. A 32-bit single precision float 247 * consists of a sign bit, 8-bit unsigned exponent encoded in offset-127 notation, and a 23-bit 248 * significand. The format is described further in the 249 * <a href="http://en.wikipedia.org/wiki/Single_precision"> Single Precision Floating Point 250 * Wikipedia page</a> 251 * </p> 252 * <p> 253 * The value of a normal float is -1 <sup>sign bit</sup> × 2<sup>exponent - 127</sup> × 254 * 1.significand 255 * </p> 256 * <p> 257 * The IEE754 floating point format already preserves sort ordering for positive floating point 258 * numbers when the raw bytes are compared in most significant byte order. This is discussed further 259 * at <a href= "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm"> 260 * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm</a> 261 * </p> 262 * <p> 263 * Thus, we need only ensure that negative numbers sort in the the exact opposite order as positive 264 * numbers (so that say, negative infinity is less than negative 1), and that all negative numbers 265 * compare less than any positive number. To accomplish this, we invert the sign bit of all floating 266 * point numbers, and we also invert the exponent and significand bits if the floating point number 267 * was negative. 268 * </p> 269 * <p> 270 * More specifically, we first store the floating point bits into a 32-bit int {@code j} using 271 * {@link Float#floatToIntBits}. This method collapses all NaNs into a single, canonical NaN value 272 * but otherwise leaves the bits unchanged. We then compute 273 * </p> 274 * 275 * <pre> 276 * j ˆ= (j >> (Integer.SIZE - 1)) | Integer.MIN_SIZE 277 * </pre> 278 * <p> 279 * which inverts the sign bit and XOR's all other bits with the sign bit itself. Comparing the raw 280 * bytes of {@code j} in most significant byte order is equivalent to performing a single precision 281 * floating point comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't compare 282 * equal to anything when performing floating point comparisons). 283 * </p> 284 * <p> 285 * The resulting integer is then converted into a byte array by serializing the integer one byte at 286 * a time in most significant byte order. The serialized integer is prefixed by a single header 287 * byte. All serialized values are 5 bytes in length. 288 * </p> 289 * <p> 290 * {@code OrderedBytes} encodings are heavily influenced by the 291 * <a href="http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki">SQLite4 Key Encoding</a>. Slight 292 * deviations are make in the interest of order correctness and user extensibility. Fixed-width 293 * {@code Long} and {@link Double} encodings are based on implementations from the now defunct 294 * Orderly library. 295 * </p> 296 */ 297@InterfaceAudience.Public 298public class OrderedBytes { 299 300 /* 301 * These constants define header bytes used to identify encoded values. Note that the values here 302 * are not exhaustive as the Numeric format encodes portions of its value within the header byte. 303 * The values listed here are directly applied to persisted data -- DO NOT modify the values 304 * specified here. Instead, gaps are placed intentionally between values so that new 305 * implementations can be inserted into the total ordering enforced here. 306 */ 307 private static final byte NULL = 0x05; 308 // room for 1 expansion type 309 private static final byte NEG_INF = 0x07; 310 private static final byte NEG_LARGE = 0x08; 311 private static final byte NEG_MED_MIN = 0x09; 312 private static final byte NEG_MED_MAX = 0x13; 313 private static final byte NEG_SMALL = 0x14; 314 private static final byte ZERO = 0x15; 315 private static final byte POS_SMALL = 0x16; 316 private static final byte POS_MED_MIN = 0x17; 317 private static final byte POS_MED_MAX = 0x21; 318 private static final byte POS_LARGE = 0x22; 319 private static final byte POS_INF = 0x23; 320 // room for 2 expansion type 321 private static final byte NAN = 0x26; 322 // room for 2 expansion types 323 private static final byte FIXED_INT8 = 0x29; 324 private static final byte FIXED_INT16 = 0x2a; 325 private static final byte FIXED_INT32 = 0x2b; 326 private static final byte FIXED_INT64 = 0x2c; 327 // room for 3 expansion types 328 private static final byte FIXED_FLOAT32 = 0x30; 329 private static final byte FIXED_FLOAT64 = 0x31; 330 // room for 2 expansion type 331 private static final byte TEXT = 0x34; 332 // room for 2 expansion type 333 private static final byte BLOB_VAR = 0x37; 334 private static final byte BLOB_COPY = 0x38; 335 336 /* 337 * The following constant values are used by encoding implementations 338 */ 339 340 public static final Charset UTF8 = Charset.forName("UTF-8"); 341 private static final byte TERM = 0x00; 342 private static final BigDecimal E8 = BigDecimal.valueOf(1e8); 343 private static final BigDecimal E32 = BigDecimal.valueOf(1e32); 344 private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2); 345 private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10); 346 347 /** 348 * Max precision guaranteed to fit into a {@code long}. 349 */ 350 public static final int MAX_PRECISION = 31; 351 352 /** 353 * The context used to normalize {@link BigDecimal} values. 354 */ 355 public static final MathContext DEFAULT_MATH_CONTEXT = 356 new MathContext(MAX_PRECISION, RoundingMode.HALF_UP); 357 358 /** 359 * Creates the standard exception when the encoded header byte is unexpected for the decoding 360 * context. 361 * @param header value used in error message. 362 */ 363 private static IllegalArgumentException unexpectedHeader(byte header) { 364 throw new IllegalArgumentException( 365 "unexpected value in first byte: 0x" + Long.toHexString(header)); 366 } 367 368 /** 369 * Perform unsigned comparison between two long values. Conforms to the same interface as 370 * {@link org.apache.hadoop.hbase.CellComparator}. 371 */ 372 private static int unsignedCmp(long x1, long x2) { 373 int cmp; 374 if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0; 375 // invert the result when either value is negative 376 if ((x1 < 0) != (x2 < 0)) return -cmp; 377 return cmp; 378 } 379 380 /** 381 * Write a 32-bit unsigned integer to {@code dst} as 4 big-endian bytes. 382 * @return number of bytes written. 383 */ 384 private static int putUint32(PositionedByteRange dst, int val) { 385 dst.put((byte) (val >>> 24)).put((byte) (val >>> 16)).put((byte) (val >>> 8)).put((byte) val); 386 return 4; 387 } 388 389 /** 390 * Encode an unsigned 64-bit unsigned integer {@code val} into {@code dst}. 391 * @param dst The destination to which encoded bytes are written. 392 * @param val The value to write. 393 * @param comp Compliment the encoded value when {@code comp} is true. 394 * @return number of bytes written. 395 */ 396 @InterfaceAudience.Private 397 static int putVaruint64(PositionedByteRange dst, long val, boolean comp) { 398 int w, y, len = 0; 399 final int offset = dst.getOffset(), start = dst.getPosition(); 400 byte[] a = dst.getBytes(); 401 Order ord = comp ? DESCENDING : ASCENDING; 402 if (-1 == unsignedCmp(val, 241L)) { 403 dst.put((byte) val); 404 len = dst.getPosition() - start; 405 ord.apply(a, offset + start, len); 406 return len; 407 } 408 if (-1 == unsignedCmp(val, 2288L)) { 409 y = (int) (val - 240); 410 dst.put((byte) (y / 256 + 241)).put((byte) (y % 256)); 411 len = dst.getPosition() - start; 412 ord.apply(a, offset + start, len); 413 return len; 414 } 415 if (-1 == unsignedCmp(val, 67824L)) { 416 y = (int) (val - 2288); 417 dst.put((byte) 249).put((byte) (y / 256)).put((byte) (y % 256)); 418 len = dst.getPosition() - start; 419 ord.apply(a, offset + start, len); 420 return len; 421 } 422 y = (int) val; 423 w = (int) (val >>> 32); 424 if (w == 0) { 425 if (-1 == unsignedCmp(y, 16777216L)) { 426 dst.put((byte) 250).put((byte) (y >>> 16)).put((byte) (y >>> 8)).put((byte) y); 427 len = dst.getPosition() - start; 428 ord.apply(a, offset + start, len); 429 return len; 430 } 431 dst.put((byte) 251); 432 putUint32(dst, y); 433 len = dst.getPosition() - start; 434 ord.apply(a, offset + start, len); 435 return len; 436 } 437 if (-1 == unsignedCmp(w, 256L)) { 438 dst.put((byte) 252).put((byte) w); 439 putUint32(dst, y); 440 len = dst.getPosition() - start; 441 ord.apply(a, offset + start, len); 442 return len; 443 } 444 if (-1 == unsignedCmp(w, 65536L)) { 445 dst.put((byte) 253).put((byte) (w >>> 8)).put((byte) w); 446 putUint32(dst, y); 447 len = dst.getPosition() - start; 448 ord.apply(a, offset + start, len); 449 return len; 450 } 451 if (-1 == unsignedCmp(w, 16777216L)) { 452 dst.put((byte) 254).put((byte) (w >>> 16)).put((byte) (w >>> 8)).put((byte) w); 453 putUint32(dst, y); 454 len = dst.getPosition() - start; 455 ord.apply(a, offset + start, len); 456 return len; 457 } 458 dst.put((byte) 255); 459 putUint32(dst, w); 460 putUint32(dst, y); 461 len = dst.getPosition() - start; 462 ord.apply(a, offset + start, len); 463 return len; 464 } 465 466 /** 467 * Inspect {@code src} for an encoded varuint64 for its length in bytes. Preserves the state of 468 * {@code src}. 469 * @param src source buffer 470 * @param comp if true, parse the compliment of the value. 471 * @return the number of bytes consumed by this value. 472 */ 473 @InterfaceAudience.Private 474 static int lengthVaruint64(PositionedByteRange src, boolean comp) { 475 int a0 = (comp ? DESCENDING : ASCENDING).apply(src.peek()) & 0xff; 476 if (a0 <= 240) return 1; 477 if (a0 <= 248) return 2; 478 if (a0 == 249) return 3; 479 if (a0 == 250) return 4; 480 if (a0 == 251) return 5; 481 if (a0 == 252) return 6; 482 if (a0 == 253) return 7; 483 if (a0 == 254) return 8; 484 if (a0 == 255) return 9; 485 throw unexpectedHeader(src.peek()); 486 } 487 488 /** 489 * Skip {@code src} over the encoded varuint64. 490 * @param src source buffer 491 * @param cmp if true, parse the compliment of the value. 492 * @return the number of bytes skipped. 493 */ 494 @InterfaceAudience.Private 495 static int skipVaruint64(PositionedByteRange src, boolean cmp) { 496 final int len = lengthVaruint64(src, cmp); 497 src.setPosition(src.getPosition() + len); 498 return len; 499 } 500 501 /** 502 * Decode a sequence of bytes in {@code src} as a varuint64. Compliment the encoded value when 503 * {@code comp} is true. 504 * @return the decoded value. 505 */ 506 @InterfaceAudience.Private 507 static long getVaruint64(PositionedByteRange src, boolean comp) { 508 assert src.getRemaining() >= lengthVaruint64(src, comp); 509 final long ret; 510 Order ord = comp ? DESCENDING : ASCENDING; 511 byte x = src.get(); 512 final int a0 = ord.apply(x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8; 513 if (-1 == unsignedCmp(a0, 241)) { 514 return a0; 515 } 516 x = src.get(); 517 a1 = ord.apply(x) & 0xff; 518 if (-1 == unsignedCmp(a0, 249)) { 519 return (a0 - 241L) * 256 + a1 + 240; 520 } 521 x = src.get(); 522 a2 = ord.apply(x) & 0xff; 523 if (a0 == 249) { 524 return 2288L + 256 * a1 + a2; 525 } 526 x = src.get(); 527 a3 = ord.apply(x) & 0xff; 528 if (a0 == 250) { 529 return ((long) a1 << 16L) | (a2 << 8) | a3; 530 } 531 x = src.get(); 532 a4 = ord.apply(x) & 0xff; 533 ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4; 534 if (a0 == 251) { 535 return ret; 536 } 537 x = src.get(); 538 a5 = ord.apply(x) & 0xff; 539 if (a0 == 252) { 540 return (ret << 8) | a5; 541 } 542 x = src.get(); 543 a6 = ord.apply(x) & 0xff; 544 if (a0 == 253) { 545 return (ret << 16) | (a5 << 8) | a6; 546 } 547 x = src.get(); 548 a7 = ord.apply(x) & 0xff; 549 if (a0 == 254) { 550 return (ret << 24) | (a5 << 16) | (a6 << 8) | a7; 551 } 552 x = src.get(); 553 a8 = ord.apply(x) & 0xff; 554 return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8; 555 } 556 557 /** 558 * Strip all trailing zeros to ensure that no digit will be zero and round using our default 559 * context to ensure precision doesn't exceed max allowed. From Phoenix's {@code NumberUtil}. 560 * @return new {@link BigDecimal} instance 561 */ 562 @InterfaceAudience.Private 563 static BigDecimal normalize(BigDecimal val) { 564 return null == val ? null : val.stripTrailingZeros().round(DEFAULT_MATH_CONTEXT); 565 } 566 567 /** 568 * Read significand digits from {@code src} according to the magnitude of {@code e}. 569 * @param src The source from which to read encoded digits. 570 * @param e The magnitude of the first digit read. 571 * @param comp Treat encoded bytes as compliments when {@code comp} is true. 572 * @return The decoded value. 573 * @throws IllegalArgumentException when read exceeds the remaining length of {@code src}. 574 */ 575 private static BigDecimal decodeSignificand(PositionedByteRange src, int e, boolean comp) { 576 // TODO: can this be made faster? 577 byte[] a = src.getBytes(); 578 final int start = src.getPosition(), offset = src.getOffset(), remaining = src.getRemaining(); 579 Order ord = comp ? DESCENDING : ASCENDING; 580 BigDecimal m = BigDecimal.ZERO; 581 e--; 582 for (int i = 0;; i++) { 583 if (i > remaining) { 584 // we've exceeded this range's window 585 src.setPosition(start); 586 throw new IllegalArgumentException( 587 "Read exceeds range before termination byte found. offset: " + offset + " position: " 588 + (start + i)); 589 } 590 // base-100 digits are encoded as val * 2 + 1 except for the termination digit. 591 m = m.add( // m += 592 new BigDecimal(BigInteger.ONE, e * -2).multiply( // 100 ^ p * [decoded digit] 593 BigDecimal.valueOf((ord.apply(a[offset + start + i]) & 0xff) / 2))); 594 e--; 595 // detect termination digit 596 if ((ord.apply(a[offset + start + i]) & 1) == 0) { 597 src.setPosition(start + i + 1); 598 break; 599 } 600 } 601 return normalize(m); 602 } 603 604 /** 605 * Skip {@code src} over the significand bytes. 606 * @param src The source from which to read encoded digits. 607 * @param comp Treat encoded bytes as compliments when {@code comp} is true. 608 * @return the number of bytes skipped. 609 */ 610 private static int skipSignificand(PositionedByteRange src, boolean comp) { 611 byte[] a = src.getBytes(); 612 final int offset = src.getOffset(), start = src.getPosition(); 613 int i = src.getPosition(); 614 while (((comp ? DESCENDING : ASCENDING).apply(a[offset + i++]) & 1) != 0) 615 ; 616 src.setPosition(i); 617 return i - start; 618 } 619 620 /** 621 * <p> 622 * Encode the small magnitude floating point number {@code val} using the key encoding. The caller 623 * guarantees that 1.0 > abs(val) > 0.0. 624 * </p> 625 * <p> 626 * A floating point value is encoded as an integer exponent {@code E} and a mantissa {@code M}. 627 * The original value is equal to {@code (M * 100^E)}. {@code E} is set to the smallest value 628 * possible without making {@code M} greater than or equal to 1.0. 629 * </p> 630 * <p> 631 * For this routine, {@code E} will always be zero or negative, since the original value is less 632 * than one. The encoding written by this routine is the ones-complement of the varint of the 633 * negative of {@code E} followed by the mantissa: 634 * 635 * <pre> 636 * Encoding: ~-E M 637 * </pre> 638 * </p> 639 * @param dst The destination to which encoded digits are written. 640 * @param val The value to encode. 641 * @return the number of bytes written. 642 */ 643 private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) { 644 // TODO: this can be done faster? 645 // assert 1.0 > abs(val) > 0.0 646 BigDecimal abs = val.abs(); 647 assert BigDecimal.ZERO.compareTo(abs) < 0 && BigDecimal.ONE.compareTo(abs) > 0; 648 byte[] a = dst.getBytes(); 649 boolean isNeg = val.signum() == -1; 650 final int offset = dst.getOffset(), start = dst.getPosition(); 651 int e = 0, d, startM; 652 653 if (isNeg) { /* Small negative number: 0x14, -E, ~M */ 654 dst.put(NEG_SMALL); 655 } else { /* Small positive number: 0x16, ~-E, M */ 656 dst.put(POS_SMALL); 657 } 658 659 // normalize abs(val) to determine E 660 while (abs.compareTo(EN10) < 0) { 661 abs = abs.movePointRight(8); 662 e += 4; 663 } 664 while (abs.compareTo(EN2) < 0) { 665 abs = abs.movePointRight(2); 666 e++; 667 } 668 669 putVaruint64(dst, e, !isNeg); // encode appropriate E value. 670 671 // encode M by peeling off centimal digits, encoding x as 2x+1 672 startM = dst.getPosition(); 673 // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of 674 // numeric scale. 675 for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) { 676 abs = abs.movePointRight(2); 677 d = abs.intValue(); 678 dst.put((byte) ((2 * d + 1) & 0xff)); 679 abs = abs.subtract(BigDecimal.valueOf(d)); 680 } 681 // terminal digit should be 2x 682 a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe); 683 if (isNeg) { 684 // negative values encoded as ~M 685 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM); 686 } 687 return dst.getPosition() - start; 688 } 689 690 /** 691 * Encode the large magnitude floating point number {@code val} using the key encoding. The caller 692 * guarantees that {@code val} will be finite and abs(val) >= 1.0. 693 * <p> 694 * A floating point value is encoded as an integer exponent {@code E} and a mantissa {@code M}. 695 * The original value is equal to {@code (M * 100^E)}. {@code E} is set to the smallest value 696 * possible without making {@code M} greater than or equal to 1.0. 697 * </p> 698 * <p> 699 * Each centimal digit of the mantissa is stored in a byte. If the value of the centimal digit is 700 * {@code X} (hence {@code X>=0} and {@code X<=99}) then the byte value will be {@code 2*X+1} for 701 * every byte of the mantissa, except for the last byte which will be {@code 2*X+0}. The mantissa 702 * must be the minimum number of bytes necessary to represent the value; trailing {@code X==0} 703 * digits are omitted. This means that the mantissa will never contain a byte with the value 704 * {@code 0x00}. 705 * </p> 706 * <p> 707 * If {@code E > 10}, then this routine writes of {@code E} as a varint followed by the mantissa 708 * as described above. Otherwise, if {@code E <= 10}, this routine only writes the mantissa and 709 * leaves the {@code E} value to be encoded as part of the opening byte of the field by the 710 * calling function. 711 * 712 * <pre> 713 * Encoding: M (if E<=10) 714 * E M (if E>10) 715 * </pre> 716 * </p> 717 * @param dst The destination to which encoded digits are written. 718 * @param val The value to encode. 719 * @return the number of bytes written. 720 */ 721 private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) { 722 // TODO: this can be done faster 723 BigDecimal abs = val.abs(); 724 byte[] a = dst.getBytes(); 725 boolean isNeg = val.signum() == -1; 726 final int start = dst.getPosition(), offset = dst.getOffset(); 727 int e = 0, d, startM; 728 729 if (isNeg) { /* Large negative number: 0x08, ~E, ~M */ 730 dst.put(NEG_LARGE); 731 } else { /* Large positive number: 0x22, E, M */ 732 dst.put(POS_LARGE); 733 } 734 735 // normalize abs(val) to determine E 736 while (abs.compareTo(E32) >= 0 && e <= 350) { 737 abs = abs.movePointLeft(32); 738 e += 16; 739 } 740 while (abs.compareTo(E8) >= 0 && e <= 350) { 741 abs = abs.movePointLeft(8); 742 e += 4; 743 } 744 while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { 745 abs = abs.movePointLeft(2); 746 e++; 747 } 748 749 // encode appropriate header byte and/or E value. 750 if (e > 10) { /* large number, write out {~,}E */ 751 putVaruint64(dst, e, isNeg); 752 } else { 753 if (isNeg) { /* Medium negative number: 0x13-E, ~M */ 754 dst.put(start, (byte) (NEG_MED_MAX - e)); 755 } else { /* Medium positive number: 0x17+E, M */ 756 dst.put(start, (byte) (POS_MED_MIN + e)); 757 } 758 } 759 760 // encode M by peeling off centimal digits, encoding x as 2x+1 761 startM = dst.getPosition(); 762 // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of 763 // numeric scale. 764 for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) { 765 abs = abs.movePointRight(2); 766 d = abs.intValue(); 767 dst.put((byte) (2 * d + 1)); 768 abs = abs.subtract(BigDecimal.valueOf(d)); 769 } 770 // terminal digit should be 2x 771 a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe); 772 if (isNeg) { 773 // negative values encoded as ~M 774 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM); 775 } 776 return dst.getPosition() - start; 777 } 778 779 /** 780 * Encode a numerical value using the variable-length encoding. 781 * @param dst The destination to which encoded digits are written. 782 * @param val The value to encode. 783 * @param ord The {@link Order} to respect while encoding {@code val}. 784 * @return the number of bytes written. 785 */ 786 public static int encodeNumeric(PositionedByteRange dst, long val, Order ord) { 787 return encodeNumeric(dst, BigDecimal.valueOf(val), ord); 788 } 789 790 /** 791 * Encode a numerical value using the variable-length encoding. 792 * @param dst The destination to which encoded digits are written. 793 * @param val The value to encode. 794 * @param ord The {@link Order} to respect while encoding {@code val}. 795 * @return the number of bytes written. 796 */ 797 public static int encodeNumeric(PositionedByteRange dst, double val, Order ord) { 798 if (val == 0.0) { 799 dst.put(ord.apply(ZERO)); 800 return 1; 801 } 802 if (Double.isNaN(val)) { 803 dst.put(ord.apply(NAN)); 804 return 1; 805 } 806 if (val == Double.NEGATIVE_INFINITY) { 807 dst.put(ord.apply(NEG_INF)); 808 return 1; 809 } 810 if (val == Double.POSITIVE_INFINITY) { 811 dst.put(ord.apply(POS_INF)); 812 return 1; 813 } 814 return encodeNumeric(dst, BigDecimal.valueOf(val), ord); 815 } 816 817 /** 818 * Encode a numerical value using the variable-length encoding. 819 * @param dst The destination to which encoded digits are written. 820 * @param val The value to encode. 821 * @param ord The {@link Order} to respect while encoding {@code val}. 822 * @return the number of bytes written. 823 */ 824 public static int encodeNumeric(PositionedByteRange dst, BigDecimal val, Order ord) { 825 final int len, offset = dst.getOffset(), start = dst.getPosition(); 826 if (null == val) { 827 return encodeNull(dst, ord); 828 } else if (BigDecimal.ZERO.compareTo(val) == 0) { 829 dst.put(ord.apply(ZERO)); 830 return 1; 831 } 832 BigDecimal abs = val.abs(); 833 if (BigDecimal.ONE.compareTo(abs) <= 0) { // abs(v) >= 1.0 834 len = encodeNumericLarge(dst, normalize(val)); 835 } else { // 1.0 > abs(v) >= 0.0 836 len = encodeNumericSmall(dst, normalize(val)); 837 } 838 ord.apply(dst.getBytes(), offset + start, len); 839 return len; 840 } 841 842 /** 843 * Decode a {@link BigDecimal} from {@code src}. Assumes {@code src} encodes a value in Numeric 844 * encoding and is within the valid range of {@link BigDecimal} values. {@link BigDecimal} does 845 * not support {@code NaN} or {@code Infinte} values. 846 * @see #decodeNumericAsDouble(PositionedByteRange) 847 */ 848 private static BigDecimal decodeNumericValue(PositionedByteRange src) { 849 final int e; 850 byte header = src.get(); 851 boolean dsc = -1 == Integer.signum(header); 852 header = dsc ? DESCENDING.apply(header) : header; 853 854 if (header == NULL) return null; 855 if (header == NEG_LARGE) { /* Large negative number: 0x08, ~E, ~M */ 856 e = (int) getVaruint64(src, !dsc); 857 return decodeSignificand(src, e, !dsc).negate(); 858 } 859 if (header >= NEG_MED_MIN && header <= NEG_MED_MAX) { 860 /* Medium negative number: 0x13-E, ~M */ 861 e = NEG_MED_MAX - header; 862 return decodeSignificand(src, e, !dsc).negate(); 863 } 864 if (header == NEG_SMALL) { /* Small negative number: 0x14, -E, ~M */ 865 e = (int) -getVaruint64(src, dsc); 866 return decodeSignificand(src, e, !dsc).negate(); 867 } 868 if (header == ZERO) { 869 return BigDecimal.ZERO; 870 } 871 if (header == POS_SMALL) { /* Small positive number: 0x16, ~-E, M */ 872 e = (int) -getVaruint64(src, !dsc); 873 return decodeSignificand(src, e, dsc); 874 } 875 if (header >= POS_MED_MIN && header <= POS_MED_MAX) { 876 /* Medium positive number: 0x17+E, M */ 877 e = header - POS_MED_MIN; 878 return decodeSignificand(src, e, dsc); 879 } 880 if (header == POS_LARGE) { /* Large positive number: 0x22, E, M */ 881 e = (int) getVaruint64(src, dsc); 882 return decodeSignificand(src, e, dsc); 883 } 884 throw unexpectedHeader(header); 885 } 886 887 /** 888 * Decode a primitive {@code double} value from the Numeric encoding. Numeric encoding is based on 889 * {@link BigDecimal}; in the event the encoded value is larger than can be represented in a 890 * {@code double}, this method performs an implicit narrowing conversion as described in 891 * {@link BigDecimal#doubleValue()}. 892 * @throws NullPointerException when the encoded value is {@code NULL}. 893 * @throws IllegalArgumentException when the encoded value is not a Numeric. 894 * @see #encodeNumeric(PositionedByteRange, double, Order) 895 * @see BigDecimal#doubleValue() 896 */ 897 public static double decodeNumericAsDouble(PositionedByteRange src) { 898 // TODO: should an encoded NULL value throw unexpectedHeader() instead? 899 if (isNull(src)) { 900 throw new NullPointerException("A null value cannot be decoded to a double."); 901 } 902 if (isNumericNaN(src)) { 903 src.get(); 904 return Double.NaN; 905 } 906 if (isNumericZero(src)) { 907 src.get(); 908 return Double.valueOf(0.0); 909 } 910 911 byte header = -1 == Integer.signum(src.peek()) ? DESCENDING.apply(src.peek()) : src.peek(); 912 913 if (header == NEG_INF) { 914 src.get(); 915 return Double.NEGATIVE_INFINITY; 916 } else if (header == POS_INF) { 917 src.get(); 918 return Double.POSITIVE_INFINITY; 919 } else { 920 return decodeNumericValue(src).doubleValue(); 921 } 922 } 923 924 /** 925 * Decode a primitive {@code long} value from the Numeric encoding. Numeric encoding is based on 926 * {@link BigDecimal}; in the event the encoded value is larger than can be represented in a 927 * {@code long}, this method performs an implicit narrowing conversion as described in 928 * {@link BigDecimal#doubleValue()}. 929 * @throws NullPointerException when the encoded value is {@code NULL}. 930 * @throws IllegalArgumentException when the encoded value is not a Numeric. 931 * @see #encodeNumeric(PositionedByteRange, long, Order) 932 * @see BigDecimal#longValue() 933 */ 934 public static long decodeNumericAsLong(PositionedByteRange src) { 935 // TODO: should an encoded NULL value throw unexpectedHeader() instead? 936 if (isNull(src)) throw new NullPointerException(); 937 if (!isNumeric(src)) throw unexpectedHeader(src.peek()); 938 if (isNumericNaN(src)) throw unexpectedHeader(src.peek()); 939 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek()); 940 941 if (isNumericZero(src)) { 942 src.get(); 943 return Long.valueOf(0); 944 } 945 return decodeNumericValue(src).longValue(); 946 } 947 948 /** 949 * Decode a {@link BigDecimal} value from the variable-length encoding. 950 * @throws IllegalArgumentException when the encoded value is not a Numeric. 951 * @see #encodeNumeric(PositionedByteRange, BigDecimal, Order) 952 */ 953 public static BigDecimal decodeNumericAsBigDecimal(PositionedByteRange src) { 954 if (isNull(src)) { 955 src.get(); 956 return null; 957 } 958 if (!isNumeric(src)) throw unexpectedHeader(src.peek()); 959 if (isNumericNaN(src)) throw unexpectedHeader(src.peek()); 960 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek()); 961 return decodeNumericValue(src); 962 } 963 964 /** 965 * Encode a String value. String encoding is 0x00-terminated and so it does not support 966 * {@code \u0000} codepoints in the value. 967 * @param dst The destination to which the encoded value is written. 968 * @param val The value to encode. 969 * @param ord The {@link Order} to respect while encoding {@code val}. 970 * @return the number of bytes written. 971 * @throws IllegalArgumentException when {@code val} contains a {@code \u0000}. 972 */ 973 public static int encodeString(PositionedByteRange dst, String val, Order ord) { 974 if (null == val) { 975 return encodeNull(dst, ord); 976 } 977 if (val.contains("\u0000")) 978 throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'"); 979 final int offset = dst.getOffset(), start = dst.getPosition(); 980 dst.put(TEXT); 981 // TODO: is there no way to decode into dst directly? 982 dst.put(val.getBytes(UTF8)); 983 dst.put(TERM); 984 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start); 985 return dst.getPosition() - start; 986 } 987 988 /** 989 * Decode a String value. 990 */ 991 public static String decodeString(PositionedByteRange src) { 992 final byte header = src.get(); 993 if (header == NULL || header == DESCENDING.apply(NULL)) return null; 994 assert header == TEXT || header == DESCENDING.apply(TEXT); 995 Order ord = header == TEXT ? ASCENDING : DESCENDING; 996 byte[] a = src.getBytes(); 997 final int offset = src.getOffset(), start = src.getPosition(); 998 final byte terminator = ord.apply(TERM); 999 int rawStartPos = offset + start, rawTermPos = rawStartPos; 1000 for (; a[rawTermPos] != terminator; rawTermPos++) 1001 ; 1002 src.setPosition(rawTermPos - offset + 1); // advance position to TERM + 1 1003 if (DESCENDING == ord) { 1004 // make a copy so that we don't disturb encoded value with ord. 1005 byte[] copy = new byte[rawTermPos - rawStartPos]; 1006 System.arraycopy(a, rawStartPos, copy, 0, copy.length); 1007 ord.apply(copy); 1008 return new String(copy, UTF8); 1009 } else { 1010 return new String(a, rawStartPos, rawTermPos - rawStartPos, UTF8); 1011 } 1012 } 1013 1014 /** 1015 * Calculate the expected BlobVar encoded length based on unencoded length. 1016 */ 1017 public static int blobVarEncodedLength(int len) { 1018 if (0 == len) return 2; // 1-byte header + 1-byte terminator 1019 else return (int) Math.ceil((len * 8) // 8-bits per input byte 1020 / 7.0) // 7-bits of input data per encoded byte, rounded up 1021 + 1; // + 1-byte header 1022 } 1023 1024 /** 1025 * Calculate the expected BlobVar decoded length based on encoded length. 1026 */ 1027 @InterfaceAudience.Private 1028 static int blobVarDecodedLength(int len) { 1029 return ((len - 1) // 1-byte header 1030 * 7) // 7-bits of payload per encoded byte 1031 / 8; // 8-bits per byte 1032 } 1033 1034 /** 1035 * Encode a Blob value using a modified varint encoding scheme. 1036 * <p> 1037 * This format encodes a byte[] value such that no limitations on the input value are imposed. The 1038 * first byte encodes the encoding scheme that follows, {@link #BLOB_VAR}. Each encoded byte 1039 * thereafter consists of a header bit followed by 7 bits of payload. A header bit of '1' 1040 * indicates continuation of the encoding. A header bit of '0' indicates this byte contains the 1041 * last of the payload. An empty input value is encoded as the header byte immediately followed by 1042 * a termination byte {@code 0x00}. This is not ambiguous with the encoded value of {@code []}, 1043 * which results in {@code [0x80, 0x00]}. 1044 * </p> 1045 * @return the number of bytes written. 1046 */ 1047 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, int voff, int vlen, 1048 Order ord) { 1049 if (null == val) { 1050 return encodeNull(dst, ord); 1051 } 1052 // Empty value is null-terminated. All other values are encoded as 7-bits per byte. 1053 assert dst.getRemaining() >= blobVarEncodedLength(vlen) : "buffer overflow expected."; 1054 final int offset = dst.getOffset(), start = dst.getPosition(); 1055 dst.put(BLOB_VAR); 1056 if (0 == vlen) { 1057 dst.put(TERM); 1058 } else { 1059 byte s = 1, t = 0; 1060 for (int i = voff; i < vlen; i++) { 1061 dst.put((byte) (0x80 | t | ((val[i] & 0xff) >>> s))); 1062 if (s < 7) { 1063 t = (byte) (val[i] << (7 - s)); 1064 s++; 1065 } else { 1066 dst.put((byte) (0x80 | val[i])); 1067 s = 1; 1068 t = 0; 1069 } 1070 } 1071 if (s > 1) { 1072 dst.put((byte) (0x7f & t)); 1073 } else { 1074 dst.getBytes()[offset + dst.getPosition() - 1] = 1075 (byte) (dst.getBytes()[offset + dst.getPosition() - 1] & 0x7f); 1076 } 1077 } 1078 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start); 1079 return dst.getPosition() - start; 1080 } 1081 1082 /** 1083 * Encode a blob value using a modified varint encoding scheme. 1084 * @return the number of bytes written. 1085 * @see #encodeBlobVar(PositionedByteRange, byte[], int, int, Order) 1086 */ 1087 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, Order ord) { 1088 return encodeBlobVar(dst, val, 0, null != val ? val.length : 0, ord); 1089 } 1090 1091 /** 1092 * Decode a blob value that was encoded using BlobVar encoding. 1093 */ 1094 public static byte[] decodeBlobVar(PositionedByteRange src) { 1095 final byte header = src.get(); 1096 if (header == NULL || header == DESCENDING.apply(NULL)) { 1097 return null; 1098 } 1099 assert header == BLOB_VAR || header == DESCENDING.apply(BLOB_VAR); 1100 Order ord = BLOB_VAR == header ? ASCENDING : DESCENDING; 1101 if (src.peek() == ord.apply(TERM)) { 1102 // skip empty input buffer. 1103 src.get(); 1104 return new byte[0]; 1105 } 1106 final int offset = src.getOffset(), start = src.getPosition(); 1107 int end; 1108 byte[] a = src.getBytes(); 1109 for (end = start; (byte) (ord.apply(a[offset + end]) & 0x80) != TERM; end++) 1110 ; 1111 end++; // increment end to 1-past last byte 1112 // create ret buffer using length of encoded data + 1 (header byte) 1113 PositionedByteRange ret = 1114 new SimplePositionedMutableByteRange(blobVarDecodedLength(end - start + 1)); 1115 int s = 6; 1116 byte t = (byte) ((ord.apply(a[offset + start]) << 1) & 0xff); 1117 for (int i = start + 1; i < end; i++) { 1118 if (s == 7) { 1119 ret.put((byte) (t | (ord.apply(a[offset + i]) & 0x7f))); 1120 i++; 1121 // explicitly reset t -- clean up overflow buffer after decoding 1122 // a full cycle and retain assertion condition below. This happens 1123 t = 0; // when the LSB in the last encoded byte is 1. (HBASE-9893) 1124 } else { 1125 ret.put((byte) (t | ((ord.apply(a[offset + i]) & 0x7f) >>> s))); 1126 } 1127 if (i == end) break; 1128 t = (byte) ((ord.apply(a[offset + i]) << (8 - s)) & 0xff); 1129 s = s == 1 ? 7 : s - 1; 1130 } 1131 src.setPosition(end); 1132 assert t == 0 : "Unexpected bits remaining after decoding blob."; 1133 assert ret.getPosition() == ret.getLength() : "Allocated unnecessarily large return buffer."; 1134 return ret.getBytes(); 1135 } 1136 1137 /** 1138 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in DESCENDING order is NULL 1139 * terminated so as to preserve proper sorting of {@code []} and so it does not support 1140 * {@code 0x00} in the value. 1141 * @return the number of bytes written. 1142 * @throws IllegalArgumentException when {@code ord} is DESCENDING and {@code val} contains a 1143 * {@code 0x00} byte. 1144 */ 1145 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, int voff, int vlen, 1146 Order ord) { 1147 if (null == val) { 1148 encodeNull(dst, ord); 1149 if (ASCENDING == ord) return 1; 1150 else { 1151 // DESCENDING ordered BlobCopy requires a termination bit to preserve 1152 // sort-order semantics of null values. 1153 dst.put(ord.apply(TERM)); 1154 return 2; 1155 } 1156 } 1157 // Blobs as final entry in a compound key are written unencoded. 1158 assert dst.getRemaining() >= vlen + (ASCENDING == ord ? 1 : 2); 1159 if (DESCENDING == ord) { 1160 for (int i = 0; i < vlen; i++) { 1161 if (TERM == val[voff + i]) { 1162 throw new IllegalArgumentException("0x00 bytes not permitted in value."); 1163 } 1164 } 1165 } 1166 final int offset = dst.getOffset(), start = dst.getPosition(); 1167 dst.put(BLOB_COPY); 1168 dst.put(val, voff, vlen); 1169 // DESCENDING ordered BlobCopy requires a termination bit to preserve 1170 // sort-order semantics of null values. 1171 if (DESCENDING == ord) dst.put(TERM); 1172 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start); 1173 return dst.getPosition() - start; 1174 } 1175 1176 /** 1177 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in DESCENDING order is NULL 1178 * terminated so as to preserve proper sorting of {@code []} and so it does not support 1179 * {@code 0x00} in the value. 1180 * @return the number of bytes written. 1181 * @throws IllegalArgumentException when {@code ord} is DESCENDING and {@code val} contains a 1182 * {@code 0x00} byte. 1183 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order) 1184 */ 1185 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, Order ord) { 1186 return encodeBlobCopy(dst, val, 0, null != val ? val.length : 0, ord); 1187 } 1188 1189 /** 1190 * Decode a Blob value, byte-for-byte copy. 1191 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order) 1192 */ 1193 public static byte[] decodeBlobCopy(PositionedByteRange src) { 1194 byte header = src.get(); 1195 if (header == NULL || header == DESCENDING.apply(NULL)) { 1196 return null; 1197 } 1198 assert header == BLOB_COPY || header == DESCENDING.apply(BLOB_COPY); 1199 Order ord = header == BLOB_COPY ? ASCENDING : DESCENDING; 1200 final int length = src.getRemaining() - (ASCENDING == ord ? 0 : 1); 1201 byte[] ret = new byte[length]; 1202 src.get(ret); 1203 ord.apply(ret, 0, ret.length); 1204 // DESCENDING ordered BlobCopy requires a termination bit to preserve 1205 // sort-order semantics of null values. 1206 if (DESCENDING == ord) src.get(); 1207 return ret; 1208 } 1209 1210 /** 1211 * Encode a null value. 1212 * @param dst The destination to which encoded digits are written. 1213 * @param ord The {@link Order} to respect while encoding {@code val}. 1214 * @return the number of bytes written. 1215 */ 1216 public static int encodeNull(PositionedByteRange dst, Order ord) { 1217 dst.put(ord.apply(NULL)); 1218 return 1; 1219 } 1220 1221 /** 1222 * Encode an {@code int8} value using the fixed-length encoding. 1223 * @return the number of bytes written. 1224 * @see #encodeInt64(PositionedByteRange, long, Order) 1225 * @see #decodeInt8(PositionedByteRange) 1226 */ 1227 public static int encodeInt8(PositionedByteRange dst, byte val, Order ord) { 1228 final int offset = dst.getOffset(), start = dst.getPosition(); 1229 dst.put(FIXED_INT8).put((byte) (val ^ 0x80)); 1230 ord.apply(dst.getBytes(), offset + start, 2); 1231 return 2; 1232 } 1233 1234 /** 1235 * Decode an {@code int8} value. 1236 * @see #encodeInt8(PositionedByteRange, byte, Order) 1237 */ 1238 public static byte decodeInt8(PositionedByteRange src) { 1239 final byte header = src.get(); 1240 assert header == FIXED_INT8 || header == DESCENDING.apply(FIXED_INT8); 1241 Order ord = header == FIXED_INT8 ? ASCENDING : DESCENDING; 1242 return (byte) ((ord.apply(src.get()) ^ 0x80) & 0xff); 1243 } 1244 1245 /** 1246 * Encode an {@code int16} value using the fixed-length encoding. 1247 * @return the number of bytes written. 1248 * @see #encodeInt64(PositionedByteRange, long, Order) 1249 * @see #decodeInt16(PositionedByteRange) 1250 */ 1251 public static int encodeInt16(PositionedByteRange dst, short val, Order ord) { 1252 final int offset = dst.getOffset(), start = dst.getPosition(); 1253 dst.put(FIXED_INT16).put((byte) ((val >> 8) ^ 0x80)).put((byte) val); 1254 ord.apply(dst.getBytes(), offset + start, 3); 1255 return 3; 1256 } 1257 1258 /** 1259 * Decode an {@code int16} value. 1260 * @see #encodeInt16(PositionedByteRange, short, Order) 1261 */ 1262 public static short decodeInt16(PositionedByteRange src) { 1263 final byte header = src.get(); 1264 assert header == FIXED_INT16 || header == DESCENDING.apply(FIXED_INT16); 1265 Order ord = header == FIXED_INT16 ? ASCENDING : DESCENDING; 1266 short val = (short) ((ord.apply(src.get()) ^ 0x80) & 0xff); 1267 val = (short) ((val << 8) + (ord.apply(src.get()) & 0xff)); 1268 return val; 1269 } 1270 1271 /** 1272 * Encode an {@code int32} value using the fixed-length encoding. 1273 * @return the number of bytes written. 1274 * @see #encodeInt64(PositionedByteRange, long, Order) 1275 * @see #decodeInt32(PositionedByteRange) 1276 */ 1277 public static int encodeInt32(PositionedByteRange dst, int val, Order ord) { 1278 final int offset = dst.getOffset(), start = dst.getPosition(); 1279 dst.put(FIXED_INT32).put((byte) ((val >> 24) ^ 0x80)).put((byte) (val >> 16)) 1280 .put((byte) (val >> 8)).put((byte) val); 1281 ord.apply(dst.getBytes(), offset + start, 5); 1282 return 5; 1283 } 1284 1285 /** 1286 * Decode an {@code int32} value. 1287 * @see #encodeInt32(PositionedByteRange, int, Order) 1288 */ 1289 public static int decodeInt32(PositionedByteRange src) { 1290 final byte header = src.get(); 1291 assert header == FIXED_INT32 || header == DESCENDING.apply(FIXED_INT32); 1292 Order ord = header == FIXED_INT32 ? ASCENDING : DESCENDING; 1293 int val = (ord.apply(src.get()) ^ 0x80) & 0xff; 1294 for (int i = 1; i < 4; i++) { 1295 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1296 } 1297 return val; 1298 } 1299 1300 /** 1301 * Encode an {@code int64} value using the fixed-length encoding. 1302 * <p> 1303 * This format ensures that all longs sort in their natural order, as they would sort when using 1304 * signed long comparison. 1305 * </p> 1306 * <p> 1307 * All Longs are serialized to an 8-byte, fixed-width sortable byte format. Serialization is 1308 * performed by inverting the integer sign bit and writing the resulting bytes to the byte array 1309 * in big endian order. The encoded value is prefixed by the {@link #FIXED_INT64} header byte. 1310 * This encoding is designed to handle java language primitives and so Null values are NOT 1311 * supported by this implementation. 1312 * </p> 1313 * <p> 1314 * For example: 1315 * </p> 1316 * 1317 * <pre> 1318 * Input: 0x0000000000000005 (5) 1319 * Result: 0x288000000000000005 1320 * 1321 * Input: 0xfffffffffffffffb (-4) 1322 * Result: 0x280000000000000004 1323 * 1324 * Input: 0x7fffffffffffffff (Long.MAX_VALUE) 1325 * Result: 0x28ffffffffffffffff 1326 * 1327 * Input: 0x8000000000000000 (Long.MIN_VALUE) 1328 * Result: 0x287fffffffffffffff 1329 * </pre> 1330 * <p> 1331 * This encoding format, and much of this documentation string, is based on Orderly's 1332 * {@code FixedIntWritableRowKey}. 1333 * </p> 1334 * @return the number of bytes written. 1335 * @see #decodeInt64(PositionedByteRange) 1336 */ 1337 public static int encodeInt64(PositionedByteRange dst, long val, Order ord) { 1338 final int offset = dst.getOffset(), start = dst.getPosition(); 1339 dst.put(FIXED_INT64).put((byte) ((val >> 56) ^ 0x80)).put((byte) (val >> 48)) 1340 .put((byte) (val >> 40)).put((byte) (val >> 32)).put((byte) (val >> 24)) 1341 .put((byte) (val >> 16)).put((byte) (val >> 8)).put((byte) val); 1342 ord.apply(dst.getBytes(), offset + start, 9); 1343 return 9; 1344 } 1345 1346 /** 1347 * Decode an {@code int64} value. 1348 * @see #encodeInt64(PositionedByteRange, long, Order) 1349 */ 1350 public static long decodeInt64(PositionedByteRange src) { 1351 final byte header = src.get(); 1352 assert header == FIXED_INT64 || header == DESCENDING.apply(FIXED_INT64); 1353 Order ord = header == FIXED_INT64 ? ASCENDING : DESCENDING; 1354 long val = (ord.apply(src.get()) ^ 0x80) & 0xff; 1355 for (int i = 1; i < 8; i++) { 1356 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1357 } 1358 return val; 1359 } 1360 1361 /** 1362 * Encode a 32-bit floating point value using the fixed-length encoding. Encoding format is 1363 * described at length in {@link #encodeFloat64(PositionedByteRange, double, Order)}. 1364 * @return the number of bytes written. 1365 * @see #decodeFloat32(PositionedByteRange) 1366 * @see #encodeFloat64(PositionedByteRange, double, Order) 1367 */ 1368 public static int encodeFloat32(PositionedByteRange dst, float val, Order ord) { 1369 final int offset = dst.getOffset(), start = dst.getPosition(); 1370 int i = Float.floatToIntBits(val); 1371 i ^= ((i >> (Integer.SIZE - 1)) | Integer.MIN_VALUE); 1372 dst.put(FIXED_FLOAT32).put((byte) (i >> 24)).put((byte) (i >> 16)).put((byte) (i >> 8)) 1373 .put((byte) i); 1374 ord.apply(dst.getBytes(), offset + start, 5); 1375 return 5; 1376 } 1377 1378 /** 1379 * Decode a 32-bit floating point value using the fixed-length encoding. 1380 * @see #encodeFloat32(PositionedByteRange, float, Order) 1381 */ 1382 public static float decodeFloat32(PositionedByteRange src) { 1383 final byte header = src.get(); 1384 assert header == FIXED_FLOAT32 || header == DESCENDING.apply(FIXED_FLOAT32); 1385 Order ord = header == FIXED_FLOAT32 ? ASCENDING : DESCENDING; 1386 int val = ord.apply(src.get()) & 0xff; 1387 for (int i = 1; i < 4; i++) { 1388 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1389 } 1390 val ^= (~val >> (Integer.SIZE - 1)) | Integer.MIN_VALUE; 1391 return Float.intBitsToFloat(val); 1392 } 1393 1394 /** 1395 * Encode a 64-bit floating point value using the fixed-length encoding. 1396 * <p> 1397 * This format ensures the following total ordering of floating point values: 1398 * Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ... < -Double.MIN_VALUE < -0.0 < 1399 * +0.0; < Double.MIN_VALUE < ... < Double.MAX_VALUE < Double.POSITIVE_INFINITY < 1400 * Double.NaN 1401 * </p> 1402 * <p> 1403 * Floating point numbers are encoded as specified in IEEE 754. A 64-bit double precision float 1404 * consists of a sign bit, 11-bit unsigned exponent encoded in offset-1023 notation, and a 52-bit 1405 * significand. The format is described further in the 1406 * <a href="http://en.wikipedia.org/wiki/Double_precision"> Double Precision Floating Point 1407 * Wikipedia page</a> 1408 * </p> 1409 * <p> 1410 * The value of a normal float is -1 <sup>sign bit</sup> × 2<sup>exponent - 1023</sup> 1411 * × 1.significand 1412 * </p> 1413 * <p> 1414 * The IEE754 floating point format already preserves sort ordering for positive floating point 1415 * numbers when the raw bytes are compared in most significant byte order. This is discussed 1416 * further at 1417 * <a href= "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm" > 1418 * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. htm</a> 1419 * </p> 1420 * <p> 1421 * Thus, we need only ensure that negative numbers sort in the the exact opposite order as 1422 * positive numbers (so that say, negative infinity is less than negative 1), and that all 1423 * negative numbers compare less than any positive number. To accomplish this, we invert the sign 1424 * bit of all floating point numbers, and we also invert the exponent and significand bits if the 1425 * floating point number was negative. 1426 * </p> 1427 * <p> 1428 * More specifically, we first store the floating point bits into a 64-bit long {@code l} using 1429 * {@link Double#doubleToLongBits}. This method collapses all NaNs into a single, canonical NaN 1430 * value but otherwise leaves the bits unchanged. We then compute 1431 * </p> 1432 * 1433 * <pre> 1434 * l ˆ= (l >> (Long.SIZE - 1)) | Long.MIN_SIZE 1435 * </pre> 1436 * <p> 1437 * which inverts the sign bit and XOR's all other bits with the sign bit itself. Comparing the raw 1438 * bytes of {@code l} in most significant byte order is equivalent to performing a double 1439 * precision floating point comparison on the underlying bits (ignoring NaN comparisons, as NaNs 1440 * don't compare equal to anything when performing floating point comparisons). 1441 * </p> 1442 * <p> 1443 * The resulting long integer is then converted into a byte array by serializing the long one byte 1444 * at a time in most significant byte order. The serialized integer is prefixed by a single header 1445 * byte. All serialized values are 9 bytes in length. 1446 * </p> 1447 * <p> 1448 * This encoding format, and much of this highly detailed documentation string, is based on 1449 * Orderly's {@code DoubleWritableRowKey}. 1450 * </p> 1451 * @return the number of bytes written. 1452 * @see #decodeFloat64(PositionedByteRange) 1453 */ 1454 public static int encodeFloat64(PositionedByteRange dst, double val, Order ord) { 1455 final int offset = dst.getOffset(), start = dst.getPosition(); 1456 long lng = Double.doubleToLongBits(val); 1457 lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE); 1458 dst.put(FIXED_FLOAT64).put((byte) (lng >> 56)).put((byte) (lng >> 48)).put((byte) (lng >> 40)) 1459 .put((byte) (lng >> 32)).put((byte) (lng >> 24)).put((byte) (lng >> 16)) 1460 .put((byte) (lng >> 8)).put((byte) lng); 1461 ord.apply(dst.getBytes(), offset + start, 9); 1462 return 9; 1463 } 1464 1465 /** 1466 * Decode a 64-bit floating point value using the fixed-length encoding. 1467 * @see #encodeFloat64(PositionedByteRange, double, Order) 1468 */ 1469 public static double decodeFloat64(PositionedByteRange src) { 1470 final byte header = src.get(); 1471 assert header == FIXED_FLOAT64 || header == DESCENDING.apply(FIXED_FLOAT64); 1472 Order ord = header == FIXED_FLOAT64 ? ASCENDING : DESCENDING; 1473 long val = ord.apply(src.get()) & 0xff; 1474 for (int i = 1; i < 8; i++) { 1475 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1476 } 1477 val ^= (~val >> (Long.SIZE - 1)) | Long.MIN_VALUE; 1478 return Double.longBitsToDouble(val); 1479 } 1480 1481 /** 1482 * Returns true when {@code src} appears to be positioned an encoded value, false otherwise. 1483 */ 1484 public static boolean isEncodedValue(PositionedByteRange src) { 1485 return isNull(src) || isNumeric(src) || isFixedInt8(src) || isFixedInt16(src) 1486 || isFixedInt32(src) || isFixedInt64(src) || isFixedFloat32(src) || isFixedFloat64(src) 1487 || isText(src) || isBlobCopy(src) || isBlobVar(src); 1488 } 1489 1490 /** 1491 * Return true when the next encoded value in {@code src} is null, false otherwise. 1492 */ 1493 public static boolean isNull(PositionedByteRange src) { 1494 return NULL == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1495 } 1496 1497 /** 1498 * Return true when the next encoded value in {@code src} uses Numeric encoding, false otherwise. 1499 * {@code NaN}, {@code +/-Inf} are valid Numeric values. 1500 */ 1501 public static boolean isNumeric(PositionedByteRange src) { 1502 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1503 return x >= NEG_INF && x <= NAN; 1504 } 1505 1506 /** 1507 * Return true when the next encoded value in {@code src} uses Numeric encoding and is 1508 * {@code Infinite}, false otherwise. 1509 */ 1510 public static boolean isNumericInfinite(PositionedByteRange src) { 1511 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1512 return NEG_INF == x || POS_INF == x; 1513 } 1514 1515 /** 1516 * Return true when the next encoded value in {@code src} uses Numeric encoding and is 1517 * {@code NaN}, false otherwise. 1518 */ 1519 public static boolean isNumericNaN(PositionedByteRange src) { 1520 return NAN == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1521 } 1522 1523 /** 1524 * Return true when the next encoded value in {@code src} uses Numeric encoding and is {@code 0}, 1525 * false otherwise. 1526 */ 1527 public static boolean isNumericZero(PositionedByteRange src) { 1528 return ZERO == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1529 } 1530 1531 /** 1532 * Return true when the next encoded value in {@code src} uses fixed-width Int8 encoding, false 1533 * otherwise. 1534 */ 1535 public static boolean isFixedInt8(PositionedByteRange src) { 1536 return FIXED_INT8 1537 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1538 } 1539 1540 /** 1541 * Return true when the next encoded value in {@code src} uses fixed-width Int16 encoding, false 1542 * otherwise. 1543 */ 1544 public static boolean isFixedInt16(PositionedByteRange src) { 1545 return FIXED_INT16 1546 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1547 } 1548 1549 /** 1550 * Return true when the next encoded value in {@code src} uses fixed-width Int32 encoding, false 1551 * otherwise. 1552 */ 1553 public static boolean isFixedInt32(PositionedByteRange src) { 1554 return FIXED_INT32 1555 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1556 } 1557 1558 /** 1559 * Return true when the next encoded value in {@code src} uses fixed-width Int64 encoding, false 1560 * otherwise. 1561 */ 1562 public static boolean isFixedInt64(PositionedByteRange src) { 1563 return FIXED_INT64 1564 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1565 } 1566 1567 /** 1568 * Return true when the next encoded value in {@code src} uses fixed-width Float32 encoding, false 1569 * otherwise. 1570 */ 1571 public static boolean isFixedFloat32(PositionedByteRange src) { 1572 return FIXED_FLOAT32 1573 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1574 } 1575 1576 /** 1577 * Return true when the next encoded value in {@code src} uses fixed-width Float64 encoding, false 1578 * otherwise. 1579 */ 1580 public static boolean isFixedFloat64(PositionedByteRange src) { 1581 return FIXED_FLOAT64 1582 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1583 } 1584 1585 /** 1586 * Return true when the next encoded value in {@code src} uses Text encoding, false otherwise. 1587 */ 1588 public static boolean isText(PositionedByteRange src) { 1589 return TEXT == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1590 } 1591 1592 /** 1593 * Return true when the next encoded value in {@code src} uses BlobVar encoding, false otherwise. 1594 */ 1595 public static boolean isBlobVar(PositionedByteRange src) { 1596 return BLOB_VAR 1597 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1598 } 1599 1600 /** 1601 * Return true when the next encoded value in {@code src} uses BlobCopy encoding, false otherwise. 1602 */ 1603 public static boolean isBlobCopy(PositionedByteRange src) { 1604 return BLOB_COPY 1605 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1606 } 1607 1608 /** 1609 * Skip {@code buff}'s position forward over one encoded value. 1610 * @return number of bytes skipped. 1611 */ 1612 public static int skip(PositionedByteRange src) { 1613 final int start = src.getPosition(); 1614 byte header = src.get(); 1615 Order ord = (-1 == Integer.signum(header)) ? DESCENDING : ASCENDING; 1616 header = ord.apply(header); 1617 1618 switch (header) { 1619 case NULL: 1620 case NEG_INF: 1621 return 1; 1622 case NEG_LARGE: /* Large negative number: 0x08, ~E, ~M */ 1623 skipVaruint64(src, DESCENDING != ord); 1624 skipSignificand(src, DESCENDING != ord); 1625 return src.getPosition() - start; 1626 case NEG_MED_MIN: /* Medium negative number: 0x13-E, ~M */ 1627 case NEG_MED_MIN + 0x01: 1628 case NEG_MED_MIN + 0x02: 1629 case NEG_MED_MIN + 0x03: 1630 case NEG_MED_MIN + 0x04: 1631 case NEG_MED_MIN + 0x05: 1632 case NEG_MED_MIN + 0x06: 1633 case NEG_MED_MIN + 0x07: 1634 case NEG_MED_MIN + 0x08: 1635 case NEG_MED_MIN + 0x09: 1636 case NEG_MED_MAX: 1637 skipSignificand(src, DESCENDING != ord); 1638 return src.getPosition() - start; 1639 case NEG_SMALL: /* Small negative number: 0x14, -E, ~M */ 1640 skipVaruint64(src, DESCENDING == ord); 1641 skipSignificand(src, DESCENDING != ord); 1642 return src.getPosition() - start; 1643 case ZERO: 1644 return 1; 1645 case POS_SMALL: /* Small positive number: 0x16, ~-E, M */ 1646 skipVaruint64(src, DESCENDING != ord); 1647 skipSignificand(src, DESCENDING == ord); 1648 return src.getPosition() - start; 1649 case POS_MED_MIN: /* Medium positive number: 0x17+E, M */ 1650 case POS_MED_MIN + 0x01: 1651 case POS_MED_MIN + 0x02: 1652 case POS_MED_MIN + 0x03: 1653 case POS_MED_MIN + 0x04: 1654 case POS_MED_MIN + 0x05: 1655 case POS_MED_MIN + 0x06: 1656 case POS_MED_MIN + 0x07: 1657 case POS_MED_MIN + 0x08: 1658 case POS_MED_MIN + 0x09: 1659 case POS_MED_MAX: 1660 skipSignificand(src, DESCENDING == ord); 1661 return src.getPosition() - start; 1662 case POS_LARGE: /* Large positive number: 0x22, E, M */ 1663 skipVaruint64(src, DESCENDING == ord); 1664 skipSignificand(src, DESCENDING == ord); 1665 return src.getPosition() - start; 1666 case POS_INF: 1667 return 1; 1668 case NAN: 1669 return 1; 1670 case FIXED_INT8: 1671 src.setPosition(src.getPosition() + 1); 1672 return src.getPosition() - start; 1673 case FIXED_INT16: 1674 src.setPosition(src.getPosition() + 2); 1675 return src.getPosition() - start; 1676 case FIXED_INT32: 1677 src.setPosition(src.getPosition() + 4); 1678 return src.getPosition() - start; 1679 case FIXED_INT64: 1680 src.setPosition(src.getPosition() + 8); 1681 return src.getPosition() - start; 1682 case FIXED_FLOAT32: 1683 src.setPosition(src.getPosition() + 4); 1684 return src.getPosition() - start; 1685 case FIXED_FLOAT64: 1686 src.setPosition(src.getPosition() + 8); 1687 return src.getPosition() - start; 1688 case TEXT: 1689 // for null-terminated values, skip to the end. 1690 do { 1691 header = ord.apply(src.get()); 1692 } while (header != TERM); 1693 return src.getPosition() - start; 1694 case BLOB_VAR: 1695 // read until we find a 0 in the MSB 1696 do { 1697 header = ord.apply(src.get()); 1698 } while ((byte) (header & 0x80) != TERM); 1699 return src.getPosition() - start; 1700 case BLOB_COPY: 1701 if (Order.DESCENDING == ord) { 1702 // if descending, read to termination byte. 1703 do { 1704 header = ord.apply(src.get()); 1705 } while (header != TERM); 1706 return src.getPosition() - start; 1707 } else { 1708 // otherwise, just skip to the end. 1709 src.setPosition(src.getLength()); 1710 return src.getPosition() - start; 1711 } 1712 default: 1713 throw unexpectedHeader(header); 1714 } 1715 } 1716 1717 /** 1718 * Return the number of encoded entries remaining in {@code buff}. The state of {@code buff} is 1719 * not modified through use of this method. 1720 */ 1721 public static int length(PositionedByteRange buff) { 1722 PositionedByteRange b = 1723 new SimplePositionedMutableByteRange(buff.getBytes(), buff.getOffset(), buff.getLength()); 1724 b.setPosition(buff.getPosition()); 1725 int cnt = 0; 1726 for (; isEncodedValue(b); skip(b), cnt++) 1727 ; 1728 return cnt; 1729 } 1730}