001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.msgpack;
018
019import static org.apache.juneau.commons.utils.IoUtils.*;
020import static org.apache.juneau.commons.utils.ThrowableUtils.*;
021import static org.apache.juneau.msgpack.DataType.*;
022
023import java.io.*;
024
025import org.apache.juneau.parser.*;
026
027/**
028 * Specialized input stream for parsing MessagePack streams.
029 *
030 * <h5 class='section'>Notes:</h5><ul>
031 *    <li class='note'>
032 *       This class is not intended for external use.
033 * </ul>
034 *
035 * <h5 class='section'>See Also:</h5><ul>
036 *    <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/MessagePackBasics">MessagePack Basics</a>
037
038 * </ul>
039 */
040public class MsgPackInputStream extends ParserInputStream {
041
042   // Data type quick-lookup table.
043   // @formatter:off
044   private static final DataType[] TYPES = {
045      /*0x0?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
046      /*0x1?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
047      /*0x2?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
048      /*0x3?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
049      /*0x4?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
050      /*0x5?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
051      /*0x6?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
052      /*0x7?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
053      /*0x8?*/ MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,
054      /*0x9?*/ ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,
055      /*0xA?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
056      /*0xB?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
057      /*0xC?*/ NULL, INVALID, BOOLEAN, BOOLEAN, BIN, BIN, BIN, EXT, EXT, EXT, FLOAT, DOUBLE, INT, INT, LONG, LONG,
058      /*0xD?*/ INT, INT, INT, LONG, EXT, EXT, EXT, EXT, EXT, STRING, STRING, STRING, ARRAY, ARRAY, MAP, MAP,
059      /*0xE?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
060      /*0xF?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT
061   };
062   // @formatter:on
063   private DataType currentDataType;
064   private long length;
065   private int lastByte;
066   private int extType;
067
068   int pos = 0;
069
070   /**
071    * Constructor.
072    *
073    * @param pipe The parser input.
074    * @throws IOException Thrown by underlying stream.
075    */
076   protected MsgPackInputStream(ParserPipe pipe) throws IOException {
077      super(pipe);
078   }
079
080   /**
081    * Read one byte from the stream.
082    */
083   private int readUInt1() throws IOException {
084      return read();
085   }
086
087   /**
088    * Read two bytes from the stream.
089    */
090   private int readUInt2() throws IOException {
091      return (read() << 8) | read();
092   }
093
094   /**
095    * Read four bytes from the stream.
096    */
097   private long readUInt4() throws IOException {
098      long l = read();
099      l <<= 8;
100      l |= read();
101      l <<= 8;
102      l |= read();
103      l <<= 8;
104      l |= read();
105      return l;
106   }
107
108   /**
109    * Return the extended-format type.
110    * Currently not used.
111    */
112   int getExtType() { return extType; }
113
114   /**
115    * Read a binary field from the stream.
116    */
117   byte[] readBinary() throws IOException {
118      var b = new byte[(int)length];
119      read(b);
120      return b;
121   }
122
123   /**
124    * Read a boolean from the stream.
125    */
126   boolean readBoolean() {
127      return lastByte == TRUE;
128   }
129
130   /**
131    * Reads the data type flag from the stream.
132    *
133    * <p>
134    * This is the byte that indicates what kind of data follows.
135    */
136   DataType readDataType() throws IOException {
137      int i = read();
138      if (i == -1)
139         throw ioex("Unexpected end of file found at position {0}", pos);
140      currentDataType = TYPES[i];
141      switch (currentDataType) {
142         case NULL:
143         case FLOAT: {
144            length = 4;
145            break;
146         }
147         case DOUBLE: {
148            length = 8;
149            break;
150         }
151         case BOOLEAN: {
152            lastByte = i;
153            break;
154         }
155         case INT: {
156            // positive fixnum stores 7-bit positive integer
157            // +--------+
158            // |0XXXXXXX|
159            // +--------+
160            //
161            // negative fixnum stores 5-bit negative integer
162            // +--------+
163            // |111YYYYY|
164            // +--------+
165            //
166            // * 0XXXXXXX is 8-bit unsigned integer
167            // * 111YYYYY is 8-bit signed integer
168            //
169            // uint 8 stores a 8-bit unsigned integer
170            // +--------+--------+
171            // |  0xcc  |ZZZZZZZZ|
172            // +--------+--------+
173            //
174            // uint 16 stores a 16-bit big-endian unsigned integer
175            // +--------+--------+--------+
176            // |  0xcd  |ZZZZZZZZ|ZZZZZZZZ|
177            // +--------+--------+--------+
178            //
179            // uint 32 stores a 32-bit big-endian unsigned integer
180            // +--------+--------+--------+--------+--------+
181            // |  0xce  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
182            // +--------+--------+--------+--------+--------+
183            //
184            // uint 64 stores a 64-bit big-endian unsigned integer
185            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
186            // |  0xcf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
187            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
188            //
189            // int 8 stores a 8-bit signed integer
190            // +--------+--------+
191            // |  0xd0  |ZZZZZZZZ|
192            // +--------+--------+
193            //
194            // int 16 stores a 16-bit big-endian signed integer
195            // +--------+--------+--------+
196            // |  0xd1  |ZZZZZZZZ|ZZZZZZZZ|
197            // +--------+--------+--------+
198            //
199            // int 32 stores a 32-bit big-endian signed integer
200            // +--------+--------+--------+--------+--------+
201            // |  0xd2  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
202            // +--------+--------+--------+--------+--------+
203            //
204            // int 64 stores a 64-bit big-endian signed integer
205            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
206            // |  0xd3  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
207            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
208            lastByte = i;
209            if (i <= POSFIXINT_U)
210               length = 0;
211            else if (i >= NEGFIXINT_L)
212               length = -1;
213            else if (i == INT8 || i == UINT8)
214               length = 1;
215            else if (i == INT16 || i == UINT16)
216               length = 2;
217            else if (i == INT32)
218               length = 4;
219            else
220               length = 0;
221            break;
222         }
223         case LONG: {
224            if (i == UINT32)
225               length = 4;
226            else if (i == INT64 || i == UINT64)
227               length = 8;
228            else
229               length = 0;
230            break;
231         }
232         case STRING: {
233            // fixstr stores a byte array whose length is up to 31 bytes:
234            // +--------+========+
235            // |101XXXXX|  data  |
236            // +--------+========+
237            //
238            // str 8 stores a byte array whose length is up to (2^8)-1 bytes:
239            // +--------+--------+========+
240            // |  0xd9  |YYYYYYYY|  data  |
241            // +--------+--------+========+
242            //
243            // str 16 stores a byte array whose length is up to (2^16)-1 bytes:
244            // +--------+--------+--------+========+
245            // |  0xda  |ZZZZZZZZ|ZZZZZZZZ|  data  |
246            // +--------+--------+--------+========+
247            //
248            // str 32 stores a byte array whose length is up to (2^32)-1 bytes:
249            // +--------+--------+--------+--------+--------+========+
250            // |  0xdb  |AAAAAAAA|AAAAAAAA|AAAAAAAA|AAAAAAAA|  data  |
251            // +--------+--------+--------+--------+--------+========+
252            //
253            // where
254            // * XXXXX is a 5-bit unsigned integer which represents N
255            // * YYYYYYYY is a 8-bit unsigned integer which represents N
256            // * ZZZZZZZZ_ZZZZZZZZ is a 16-bit big-endian unsigned integer which represents N
257            // * AAAAAAAA_AAAAAAAA_AAAAAAAA_AAAAAAAA is a 32-bit big-endian unsigned integer which represents N
258            // * N is the length of data
259            if (i <= FIXSTR_U)
260               length = i & 0x1F;
261            else if (i == STR8)
262               length = readUInt1();
263            else if (i == STR16)
264               length = readUInt2();
265            else
266               length = readUInt4();
267            break;
268         }
269         case ARRAY: {
270            // fixarray stores an array whose length is up to 15 elements:
271            // +--------+~~~~~~~~~~~~~~~~~+
272            // |1001XXXX|    N objects    |
273            // +--------+~~~~~~~~~~~~~~~~~+
274            //
275            // array 16 stores an array whose length is up to (2^16)-1 elements:
276            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
277            // |  0xdc  |YYYYYYYY|YYYYYYYY|    N objects    |
278            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
279            //
280            // array 32 stores an array whose length is up to (2^32)-1 elements:
281            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
282            // |  0xdd  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|    N objects    |
283            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
284            //
285            // where
286            // * XXXX is a 4-bit unsigned integer which represents N
287            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
288            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
289            //     N is the size of a array
290            if (i <= FIXARRAY_U)
291               length = i & 0x0F;
292            else if (i == ARRAY16)
293               length = readUInt2();
294            else
295               length = readUInt4();
296            break;
297         }
298         case BIN: {
299            // bin 8 stores a byte array whose length is up to (2^8)-1 bytes:
300            // +--------+--------+========+
301            // |  0xc4  |XXXXXXXX|  data  |
302            // +--------+--------+========+
303            //
304            // bin 16 stores a byte array whose length is up to (2^16)-1 bytes:
305            // +--------+--------+--------+========+
306            // |  0xc5  |YYYYYYYY|YYYYYYYY|  data  |
307            // +--------+--------+--------+========+
308            //
309            // bin 32 stores a byte array whose length is up to (2^32)-1 bytes:
310            // +--------+--------+--------+--------+--------+========+
311            // |  0xc6  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  data  |
312            // +--------+--------+--------+--------+--------+========+
313            //
314            // where
315            // * XXXXXXXX is a 8-bit unsigned integer which represents N
316            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
317            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
318            // * N is the length of data
319            if (i == BIN8)
320               length = readUInt1();
321            else if (i == BIN16)
322               length = readUInt2();
323            else
324               length = readUInt4();
325            break;
326         }
327         case EXT: {
328            // fixext 1 stores an integer and a byte array whose length is 1 byte
329            // +--------+--------+--------+
330            // |  0xd4  |  type  |  data  |
331            // +--------+--------+--------+
332            //
333            // fixext 2 stores an integer and a byte array whose length is 2 bytes
334            // +--------+--------+--------+--------+
335            // |  0xd5  |  type  |       data      |
336            // +--------+--------+--------+--------+
337            //
338            // fixext 4 stores an integer and a byte array whose length is 4 bytes
339            // +--------+--------+--------+--------+--------+--------+
340            // |  0xd6  |  type  |                data               |
341            // +--------+--------+--------+--------+--------+--------+
342            //
343            // fixext 8 stores an integer and a byte array whose length is 8 bytes
344            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
345            // |  0xd7  |  type  |                                  data                                 |
346            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
347            //
348            // fixext 16 stores an integer and a byte array whose length is 16 bytes
349            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
350            // |  0xd8  |  type  |                                  data
351            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
352            // +--------+--------+--------+--------+--------+--------+--------+--------+
353            //                               data (cont.)                              |
354            // +--------+--------+--------+--------+--------+--------+--------+--------+
355            //
356            // ext 8 stores an integer and a byte array whose length is up to (2^8)-1 bytes:
357            // +--------+--------+--------+========+
358            // |  0xc7  |XXXXXXXX|  type  |  data  |
359            // +--------+--------+--------+========+
360            //
361            // ext 16 stores an integer and a byte array whose length is up to (2^16)-1 bytes:
362            // +--------+--------+--------+--------+========+
363            // |  0xc8  |YYYYYYYY|YYYYYYYY|  type  |  data  |
364            // +--------+--------+--------+--------+========+
365            //
366            // ext 32 stores an integer and a byte array whose length is up to (2^32)-1 bytes:
367            // +--------+--------+--------+--------+--------+--------+========+
368            // |  0xc9  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  type  |  data  |
369            // +--------+--------+--------+--------+--------+--------+========+
370            //
371            // where
372            // * XXXXXXXX is a 8-bit unsigned integer which represents N
373            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
374            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a big-endian 32-bit unsigned integer which represents N
375            // * N is a length of data
376            // * type is a signed 8-bit signed integer
377            // * type < 0 is reserved for future extension including 2-byte type information
378            if (i == FIXEXT1)
379               length = 1;
380            else if (i == FIXEXT2)
381               length = 2;
382            else if (i == FIXEXT4)
383               length = 4;
384            else if (i == FIXEXT8)
385               length = 8;
386            else if (i == FIXEXT16)
387               length = 16;
388            else if (i == EXT8)
389               length = readUInt1();
390            else if (i == EXT16)
391               length = readUInt2();
392            else if (i == EXT32)
393               length = readUInt4();
394            extType = read();
395
396            break;
397         }
398         case MAP: {
399            // fixmap stores a map whose length is up to 15 elements
400            // +--------+~~~~~~~~~~~~~~~~~+
401            // |1000XXXX|   N*2 objects   |
402            // +--------+~~~~~~~~~~~~~~~~~+
403            //
404            // map 16 stores a map whose length is up to (2^16)-1 elements
405            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
406            // |  0xde  |YYYYYYYY|YYYYYYYY|   N*2 objects   |
407            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
408            //
409            // map 32 stores a map whose length is up to (2^32)-1 elements
410            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
411            // |  0xdf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|   N*2 objects   |
412            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
413            //
414            // where
415            // * XXXX is a 4-bit unsigned integer which represents N
416            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
417            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
418            // * N is the size of a map
419            // * odd elements in objects are keys of a map
420            // * the next element of a key is its associated value
421            if (i <= FIXMAP_U)
422               length = i & 0x0F;
423            else if (i == MAP16)
424               length = readUInt2();
425            else
426               length = readUInt4();
427            break;
428         }
429         default:
430            throw ioex("Invalid flag 0xC1 detected in stream.");
431      }
432      return currentDataType;
433   }
434
435   /**
436    * Read a double from the stream.
437    */
438   double readDouble() throws IOException {
439      return Double.longBitsToDouble(readLong());
440   }
441
442   /**
443    * Read a float from the stream.
444    */
445   float readFloat() throws IOException {
446      return Float.intBitsToFloat(readInt());
447   }
448
449   /**
450    * Read an integer from the stream.
451    */
452   int readInt() throws IOException {
453      if (length == 0)
454         return lastByte;
455      if (length == 1)
456         return read();
457      if (length == 2)
458         return (read() << 8) | read();
459      int i = read();
460      i <<= 8;
461      i |= read();
462      i <<= 8;
463      i |= read();
464      i <<= 8;
465      i |= read();
466      return i;
467   }
468
469   /**
470    * Returns the length value for the field.
471    *
472    * <p>
473    * For ints/floats/bins/strings, this is the number of bytes that the field takes up (minus the data-type flag).
474    * For arrays, it's the number of array entries.
475    * For maps, it's the number of map entries.
476    */
477   long readLength() {
478      return length;
479   }
480
481   /**
482    * Read 64-bit long from the stream.
483    */
484   long readLong() throws IOException {
485      if (length == 4)
486         return readUInt4();
487      long l = read();
488      l <<= 8;
489      l |= read();
490      l <<= 8;
491      l |= read();
492      l <<= 8;
493      l |= read();
494      l <<= 8;
495      l |= read();
496      l <<= 8;
497      l |= read();
498      l <<= 8;
499      l |= read();
500      l <<= 8;
501      l |= read();
502      return l;
503   }
504
505   /**
506    * Read a string from the stream.
507    */
508   String readString() throws IOException {
509      return new String(readBinary(), UTF8);
510   }
511}