001// ***************************************************************************************************************************
002// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
003// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
004// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
005// * with the License.  You may obtain a copy of the License at                                                              *
006// *                                                                                                                         *
007// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
008// *                                                                                                                         *
009// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
010// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
011// * specific language governing permissions and limitations under the License.                                              *
012// ***************************************************************************************************************************
013package org.apache.juneau.msgpack;
014
015import static org.apache.juneau.internal.IOUtils.*;
016import static org.apache.juneau.msgpack.DataType.*;
017
018import java.io.*;
019
020import org.apache.juneau.parser.*;
021
022/**
023 * Specialized input stream for parsing MessagePack streams.
024 * 
025 * <h5 class='section'>Notes:</h5>
026 * <ul class='spaced-list'>
027 *    <li>
028 *       This class is not intended for external use.
029 * </ul>
030 */
031public final class MsgPackInputStream extends InputStream {
032
033   private final ParserPipe pipe;
034   private final InputStream is;
035   private DataType currentDataType;
036   private long length;
037   private int lastByte;
038   private int extType;
039   int pos = 0;
040
041   // Data type quick-lookup table.
042   private static final DataType[] TYPES = new DataType[] {
043      /*0x0?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
044      /*0x1?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
045      /*0x2?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
046      /*0x3?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
047      /*0x4?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
048      /*0x5?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
049      /*0x6?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
050      /*0x7?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
051      /*0x8?*/ MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,MAP,
052      /*0x9?*/ ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,ARRAY,
053      /*0xA?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
054      /*0xB?*/ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,
055      /*0xC?*/ NULL, INVALID, BOOLEAN, BOOLEAN, BIN, BIN, BIN, EXT, EXT, EXT, FLOAT, DOUBLE, INT, INT, LONG, LONG,
056      /*0xD?*/ INT, INT, INT, LONG, EXT, EXT, EXT, EXT, EXT, STRING, STRING, STRING, ARRAY, ARRAY, MAP, MAP,
057      /*0xE?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,
058      /*0xF?*/ INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT,INT
059   };
060
061   /**
062    * Constructor.
063    * 
064    * @param pipe The parser input.
065    * @throws Exception
066    */
067   protected MsgPackInputStream(ParserPipe pipe) throws Exception {
068      this.pipe = pipe;
069      this.is = pipe.getInputStream();
070   }
071
072   @Override /* InputStream */
073   public int read() throws IOException {
074      int i = is.read();
075      if (i > 0)
076         pos++;
077      return i;
078   }
079
080   /**
081    * Reads the data type flag from the stream.
082    * 
083    * <p>
084    * This is the byte that indicates what kind of data follows.
085    */
086   DataType readDataType() throws IOException {
087      int i = read();
088      if (i == -1)
089         throw new IOException("Unexpected end of file found at position " + pos);
090      currentDataType = TYPES[i];
091      switch (currentDataType) {
092         case NULL:
093         case FLOAT: {
094            length = 4;
095            break;
096         }
097         case DOUBLE: {
098            length = 8;
099            break;
100         }
101         case BOOLEAN: {
102            lastByte = i;
103            break;
104         }
105         case INT: {
106            // positive fixnum stores 7-bit positive integer
107            // +--------+
108            // |0XXXXXXX|
109            // +--------+
110            //
111            // negative fixnum stores 5-bit negative integer
112            // +--------+
113            // |111YYYYY|
114            // +--------+
115            //
116            // * 0XXXXXXX is 8-bit unsigned integer
117            // * 111YYYYY is 8-bit signed integer
118            //
119            // uint 8 stores a 8-bit unsigned integer
120            // +--------+--------+
121            // |  0xcc  |ZZZZZZZZ|
122            // +--------+--------+
123            //
124            // uint 16 stores a 16-bit big-endian unsigned integer
125            // +--------+--------+--------+
126            // |  0xcd  |ZZZZZZZZ|ZZZZZZZZ|
127            // +--------+--------+--------+
128            //
129            // uint 32 stores a 32-bit big-endian unsigned integer
130            // +--------+--------+--------+--------+--------+
131            // |  0xce  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
132            // +--------+--------+--------+--------+--------+
133            //
134            // uint 64 stores a 64-bit big-endian unsigned integer
135            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
136            // |  0xcf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
137            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
138            //
139            // int 8 stores a 8-bit signed integer
140            // +--------+--------+
141            // |  0xd0  |ZZZZZZZZ|
142            // +--------+--------+
143            //
144            // int 16 stores a 16-bit big-endian signed integer
145            // +--------+--------+--------+
146            // |  0xd1  |ZZZZZZZZ|ZZZZZZZZ|
147            // +--------+--------+--------+
148            //
149            // int 32 stores a 32-bit big-endian signed integer
150            // +--------+--------+--------+--------+--------+
151            // |  0xd2  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
152            // +--------+--------+--------+--------+--------+
153            //
154            // int 64 stores a 64-bit big-endian signed integer
155            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
156            // |  0xd3  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|
157            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+
158            lastByte = i;
159            if (i <= POSFIXINT_U)
160               length = 0;
161            else if (i >= NEGFIXINT_L)
162               length = -1;
163            else if (i == INT8 || i == UINT8)
164               length = 1;
165            else if (i == INT16 || i == UINT16)
166               length = 2;
167            else if (i == INT32)
168               length = 4;
169            else
170               length = 0;
171            break;
172         }
173         case LONG: {
174            if (i == UINT32)
175               length = 4;
176            else if (i == INT64 || i == UINT64)
177               length = 8;
178            else
179               length = 0;
180            break;
181         }
182         case STRING:{
183            // fixstr stores a byte array whose length is up to 31 bytes:
184            // +--------+========+
185            // |101XXXXX|  data  |
186            // +--------+========+
187            //
188            // str 8 stores a byte array whose length is up to (2^8)-1 bytes:
189            // +--------+--------+========+
190            // |  0xd9  |YYYYYYYY|  data  |
191            // +--------+--------+========+
192            //
193            // str 16 stores a byte array whose length is up to (2^16)-1 bytes:
194            // +--------+--------+--------+========+
195            // |  0xda  |ZZZZZZZZ|ZZZZZZZZ|  data  |
196            // +--------+--------+--------+========+
197            //
198            // str 32 stores a byte array whose length is up to (2^32)-1 bytes:
199            // +--------+--------+--------+--------+--------+========+
200            // |  0xdb  |AAAAAAAA|AAAAAAAA|AAAAAAAA|AAAAAAAA|  data  |
201            // +--------+--------+--------+--------+--------+========+
202            //
203            // where
204            // * XXXXX is a 5-bit unsigned integer which represents N
205            // * YYYYYYYY is a 8-bit unsigned integer which represents N
206            // * ZZZZZZZZ_ZZZZZZZZ is a 16-bit big-endian unsigned integer which represents N
207            // * AAAAAAAA_AAAAAAAA_AAAAAAAA_AAAAAAAA is a 32-bit big-endian unsigned integer which represents N
208            // * N is the length of data
209            if (i <= FIXSTR_U)
210               length = i & 0x1F;
211            else if (i == STR8)
212               length = readUInt1();
213            else if (i == STR16)
214               length = readUInt2();
215            else
216               length = readUInt4();
217            break;
218         }
219         case ARRAY: {
220            // fixarray stores an array whose length is up to 15 elements:
221            // +--------+~~~~~~~~~~~~~~~~~+
222            // |1001XXXX|    N objects    |
223            // +--------+~~~~~~~~~~~~~~~~~+
224            //
225            // array 16 stores an array whose length is up to (2^16)-1 elements:
226            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
227            // |  0xdc  |YYYYYYYY|YYYYYYYY|    N objects    |
228            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
229            //
230            // array 32 stores an array whose length is up to (2^32)-1 elements:
231            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
232            // |  0xdd  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|    N objects    |
233            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
234            //
235            // where
236            // * XXXX is a 4-bit unsigned integer which represents N
237            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
238            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
239            //     N is the size of a array
240            if (i <= FIXARRAY_U)
241               length = i & 0x0F;
242            else if (i == ARRAY16)
243               length = readUInt2();
244            else
245               length = readUInt4();
246            break;
247         }
248         case BIN:{
249            // bin 8 stores a byte array whose length is up to (2^8)-1 bytes:
250            // +--------+--------+========+
251            // |  0xc4  |XXXXXXXX|  data  |
252            // +--------+--------+========+
253            //
254            // bin 16 stores a byte array whose length is up to (2^16)-1 bytes:
255            // +--------+--------+--------+========+
256            // |  0xc5  |YYYYYYYY|YYYYYYYY|  data  |
257            // +--------+--------+--------+========+
258            //
259            // bin 32 stores a byte array whose length is up to (2^32)-1 bytes:
260            // +--------+--------+--------+--------+--------+========+
261            // |  0xc6  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  data  |
262            // +--------+--------+--------+--------+--------+========+
263            //
264            // where
265            // * XXXXXXXX is a 8-bit unsigned integer which represents N
266            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
267            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
268            // * N is the length of data
269            if (i == BIN8)
270               length = readUInt1();
271            else if (i == BIN16)
272               length = readUInt2();
273            else
274               length = readUInt4();
275            break;
276         }
277         case EXT:{
278            // fixext 1 stores an integer and a byte array whose length is 1 byte
279            // +--------+--------+--------+
280            // |  0xd4  |  type  |  data  |
281            // +--------+--------+--------+
282            //
283            // fixext 2 stores an integer and a byte array whose length is 2 bytes
284            // +--------+--------+--------+--------+
285            // |  0xd5  |  type  |       data      |
286            // +--------+--------+--------+--------+
287            //
288            // fixext 4 stores an integer and a byte array whose length is 4 bytes
289            // +--------+--------+--------+--------+--------+--------+
290            // |  0xd6  |  type  |                data               |
291            // +--------+--------+--------+--------+--------+--------+
292            //
293            // fixext 8 stores an integer and a byte array whose length is 8 bytes
294            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
295            // |  0xd7  |  type  |                                  data                                 |
296            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
297            //
298            // fixext 16 stores an integer and a byte array whose length is 16 bytes
299            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
300            // |  0xd8  |  type  |                                  data
301            // +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
302            // +--------+--------+--------+--------+--------+--------+--------+--------+
303            //                               data (cont.)                              |
304            // +--------+--------+--------+--------+--------+--------+--------+--------+
305            //
306            // ext 8 stores an integer and a byte array whose length is up to (2^8)-1 bytes:
307            // +--------+--------+--------+========+
308            // |  0xc7  |XXXXXXXX|  type  |  data  |
309            // +--------+--------+--------+========+
310            //
311            // ext 16 stores an integer and a byte array whose length is up to (2^16)-1 bytes:
312            // +--------+--------+--------+--------+========+
313            // |  0xc8  |YYYYYYYY|YYYYYYYY|  type  |  data  |
314            // +--------+--------+--------+--------+========+
315            //
316            // ext 32 stores an integer and a byte array whose length is up to (2^32)-1 bytes:
317            // +--------+--------+--------+--------+--------+--------+========+
318            // |  0xc9  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|  type  |  data  |
319            // +--------+--------+--------+--------+--------+--------+========+
320            //
321            // where
322            // * XXXXXXXX is a 8-bit unsigned integer which represents N
323            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
324            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a big-endian 32-bit unsigned integer which represents N
325            // * N is a length of data
326            // * type is a signed 8-bit signed integer
327            // * type < 0 is reserved for future extension including 2-byte type information
328            if (i == FIXEXT1)
329               length = 1;
330            else if (i == FIXEXT2)
331               length = 2;
332            else if (i == FIXEXT4)
333               length = 4;
334            else if (i == FIXEXT8)
335               length = 8;
336            else if (i == FIXEXT16)
337               length = 16;
338            else if (i == EXT8)
339               length = readUInt1();
340            else if (i == EXT16)
341                  length = readUInt2();
342            else if (i == EXT32)
343               length = readUInt4();
344            extType = is.read();
345
346            break;
347         }
348         case MAP:{
349            // fixmap stores a map whose length is up to 15 elements
350            // +--------+~~~~~~~~~~~~~~~~~+
351            // |1000XXXX|   N*2 objects   |
352            // +--------+~~~~~~~~~~~~~~~~~+
353            //
354            // map 16 stores a map whose length is up to (2^16)-1 elements
355            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
356            // |  0xde  |YYYYYYYY|YYYYYYYY|   N*2 objects   |
357            // +--------+--------+--------+~~~~~~~~~~~~~~~~~+
358            //
359            // map 32 stores a map whose length is up to (2^32)-1 elements
360            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
361            // |  0xdf  |ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|ZZZZZZZZ|   N*2 objects   |
362            // +--------+--------+--------+--------+--------+~~~~~~~~~~~~~~~~~+
363            //
364            // where
365            // * XXXX is a 4-bit unsigned integer which represents N
366            // * YYYYYYYY_YYYYYYYY is a 16-bit big-endian unsigned integer which represents N
367            // * ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ_ZZZZZZZZ is a 32-bit big-endian unsigned integer which represents N
368            // * N is the size of a map
369            // * odd elements in objects are keys of a map
370            // * the next element of a key is its associated value
371            if (i <= FIXMAP_U)
372               length = i & 0x0F;
373            else if (i == MAP16)
374               length = readUInt2();
375            else
376               length = readUInt4();
377            break;
378         }
379         default:
380            throw new IOException("Invalid flag 0xC1 detected in stream.");
381      }
382      return currentDataType;
383   }
384
385   /**
386    * Returns the length value for the field.
387    * 
388    * <p>
389    * For ints/floats/bins/strings, this is the number of bytes that the field takes up (minus the data-type flag).
390    * For arrays, it's the number of array entries.
391    * For maps, it's the number of map entries.
392    */
393   long readLength() {
394      return length;
395   }
396
397   /**
398    * Read a boolean from the stream.
399    */
400   boolean readBoolean() {
401      return lastByte == TRUE;
402   }
403
404   /**
405    * Read a string from the stream.
406    */
407   String readString() throws IOException {
408      return new String(readBinary(), UTF8);
409   }
410
411   /**
412    * Read a binary field from the stream.
413    */
414   byte[] readBinary() throws IOException {
415      byte[] b = new byte[(int)length];
416      is.read(b);
417      return b;
418   }
419
420   /**
421    * Read an integer from the stream.
422    */
423   int readInt() throws IOException {
424      if (length == 0)
425         return lastByte;
426      if (length == 1)
427         return is.read();
428      if (length == 2)
429         return (is.read() << 8) | is.read();
430      int i = is.read(); i <<= 8; i |= is.read(); i <<= 8; i |= is.read(); i <<= 8; i |= is.read();
431      return i;
432   }
433
434   /**
435    * Read a float from the stream.
436    */
437   float readFloat() throws IOException {
438      return Float.intBitsToFloat(readInt());
439   }
440
441   /**
442    * Read a double from the stream.
443    */
444   double readDouble() throws IOException {
445      return Double.longBitsToDouble(readLong());
446   }
447
448   /**
449    * Read 64-bit long from the stream.
450    */
451   long readLong() throws IOException {
452      if (length == 4)
453         return readUInt4();
454      long l = is.read(); l <<= 8; l |= is.read(); l <<= 8; l |= is.read(); l <<= 8; l |= is.read(); l <<= 8; l |= is.read(); l <<= 8; l |= is.read(); l <<= 8; l |= is.read(); l <<= 8; l |= is.read();
455      return l;
456   }
457
458   /**
459    * Return the extended-format type.
460    * Currently not used.
461    */
462   int getExtType() {
463      return extType;
464   }
465
466   /**
467    * Read one byte from the stream.
468    */
469   private int readUInt1() throws IOException {
470      return is.read();
471   }
472
473   /**
474    * Read two bytes from the stream.
475    */
476   private int readUInt2() throws IOException {
477      return (is.read() << 8) | is.read();
478   }
479
480   /**
481    * Read four bytes from the stream.
482    */
483   private long readUInt4() throws IOException {
484      long l = is.read(); l <<= 8; l |= is.read(); l <<= 8; l |= is.read(); l <<= 8; l |= is.read();
485      return l;
486   }
487
488   /**
489    * Return the current read position in the stream (i.e. number of bytes we've read so far).
490    */
491   int getPosition() {
492      return pos;
493   }
494
495   /**
496    * Returns the pipe that was passed into the constructor.
497    * 
498    * @return The pipe that was passed into the constructor.
499    */
500   public ParserPipe getPipe() {
501      return pipe;
502   }
503}