View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.juneau.junit.bct;
18  
19  import static org.apache.juneau.junit.bct.BctAssertions.*;
20  import static org.apache.juneau.junit.bct.NestedTokenizer.*;
21  import static org.junit.jupiter.api.Assertions.*;
22  
23  import java.util.*;
24  
25  import org.apache.juneau.*;
26  import org.junit.jupiter.api.*;
27  
28  /**
29   * Comprehensive unit tests for {@link NestedTokenizer} class.
30   *
31   * <p>Tests cover all aspects of the state machine parser including:</p>
32   * <ul>
33   *    <li>Simple token parsing</li>
34   *    <li>Nested token structures</li>
35   *    <li>Escape sequence handling</li>
36   *    <li>Deep nesting scenarios</li>
37   *    <li>Edge cases and error conditions</li>
38   *    <li>Token object functionality</li>
39   * </ul>
40   */
41  class NestedTokenizer_Test extends TestBase {
42  
43  	//------------------------------------------------------------------------------------------------------------------
44  	// Basic tokenization tests
45  	//------------------------------------------------------------------------------------------------------------------
46  
47  	@Test void a01_simpleTokens() {
48  		new NestedTokenizer();
49  
50  		// Single token
51  		var tokens = tokenize("foo");
52  		assertList(tokens, token("foo"));
53  
54  		// Multiple tokens
55  		tokens = tokenize("foo,bar,baz");
56  		assertList(tokens, token("foo"), token("bar"), token("baz"));
57  
58  		// Tokens with whitespace
59  		tokens = tokenize("  foo  ,  bar  ,  baz  ");
60  		assertList(tokens, token("foo"), token("bar"), token("baz"));
61  	}
62  
63  	@Test void a02_nestedTokens() {
64  		// Simple nested structure
65  		var tokens = tokenize("foo{a,b}");
66  		assertEquals(1, tokens.size());
67  		assertToken(tokens.get(0), "foo", "a", "b");
68  
69  		// Multiple tokens with nesting
70  		tokens = tokenize("foo{a,b},bar{c,d}");
71  		assertEquals(2, tokens.size());
72  		assertToken(tokens.get(0), "foo", "a", "b");
73  		assertToken(tokens.get(1), "bar", "c", "d");
74  
75  		// Empty nested content
76  		tokens = tokenize("foo{}");
77  		assertEquals(1, tokens.size());
78  		assertToken(tokens.get(0), "foo");
79  	}
80  
81  	@Test void a03_deepNesting() {
82  		// Two levels deep
83  		var tokens = tokenize("root{level1{a,b},level2}");
84  		assertEquals(1, tokens.size());
85  		var root = tokens.get(0);
86  		assertEquals("root", root.getValue());
87  		assertEquals(2, root.getNested().size());
88  		assertToken(root.getNested().get(0), "level1", "a", "b");
89  		assertToken(root.getNested().get(1), "level2");
90  
91  		// Three levels deep
92  		tokens = tokenize("root{level1{level2{a,b}}}");
93  		assertEquals(1, tokens.size());
94  		root = tokens.get(0);
95  		assertEquals("root", root.getValue());
96  		assertEquals(1, root.getNested().size());
97  		var level1 = root.getNested().get(0);
98  		assertEquals("level1", level1.getValue());
99  		assertEquals(1, level1.getNested().size());
100 		assertToken(level1.getNested().get(0), "level2", "a", "b");
101 	}
102 
103 	@Test void a04_escapeSequences() {
104 		// Escaped comma
105 		var tokens = tokenize("foo\\,bar");
106 		assertList(tokens, token("foo,bar"));
107 
108 		// Escaped braces
109 		tokens = tokenize("foo\\{bar\\}");
110 		assertList(tokens, token("foo{bar}"));
111 
112 		// Escaped backslash
113 		tokens = tokenize("foo\\\\bar");
114 		assertList(tokens, token("foo\\bar"));
115 
116 		// Multiple escapes
117 		tokens = tokenize("foo\\,bar\\{baz\\}");
118 		assertList(tokens, token("foo,bar{baz}"));
119 
120 		// Escape in nested content
121 		tokens = tokenize("root{foo\\,bar,baz}");
122 		assertEquals(1, tokens.size());
123 		assertToken(tokens.get(0), "root", "foo,bar", "baz");
124 	}
125 
126 	//------------------------------------------------------------------------------------------------------------------
127 	// Complex scenarios
128 	//------------------------------------------------------------------------------------------------------------------
129 
130 	@Test void b01_complexNestedStructures() {
131 		// Real-world example: user configuration
132 		var tokens = tokenize("user{name,email,address{street,city,zipcode{main,plus4}}},config{timeout,retries}");
133 		assertEquals(2, tokens.size());
134 
135 		// Validate user token
136 		var user = tokens.get(0);
137 		assertEquals("user", user.getValue());
138 		assertEquals(3, user.getNested().size());
139 		assertEquals("name", user.getNested().get(0).getValue());
140 		assertEquals("email", user.getNested().get(1).getValue());
141 
142 		var address = user.getNested().get(2);
143 		assertEquals("address", address.getValue());
144 		assertEquals(3, address.getNested().size());
145 		assertEquals("street", address.getNested().get(0).getValue());
146 		assertEquals("city", address.getNested().get(1).getValue());
147 
148 		var zipcode = address.getNested().get(2);
149 		assertEquals("zipcode", zipcode.getValue());
150 		assertEquals(2, zipcode.getNested().size());
151 		assertEquals("main", zipcode.getNested().get(0).getValue());
152 		assertEquals("plus4", zipcode.getNested().get(1).getValue());
153 
154 		// Validate config token
155 		var config = tokens.get(1);
156 		assertToken(config, "config", "timeout", "retries");
157 	}
158 
159 	@Test void b02_mixedEscapingAndNesting() {
160 		// Escaped characters within nested structures
161 		var tokens = tokenize("data{key\\,name,value\\{test\\}},info{desc\\,important}");
162 		assertEquals(2, tokens.size());
163 
164 		assertToken(tokens.get(0), "data", "key,name", "value{test}");
165 		assertToken(tokens.get(1), "info", "desc,important");
166 	}
167 
168 	@Test void b03_extremeNesting() {
169 		// Very deep nesting
170 		var tokens = tokenize("l1{l2{l3{l4{l5{value}}}}}");
171 		assertEquals(1, tokens.size());
172 
173 		var current = tokens.get(0);
174 		for (int i = 1; i <= 5; i++) {
175 			assertEquals("l" + i, current.getValue());
176 			assertEquals(1, current.getNested().size());
177 			current = current.getNested().get(0);
178 		}
179 		assertEquals("value", current.getValue());
180 		assertFalse(current.hasNested());
181 	}
182 
183 	//------------------------------------------------------------------------------------------------------------------
184 	// Edge cases and error conditions
185 	//------------------------------------------------------------------------------------------------------------------
186 
187 	@Test void c01_edgeCases() {
188 		// Single character
189 		var tokens = tokenize("a");
190 		assertList(tokens, token("a"));
191 
192 		// Just comma
193 		tokens = tokenize(",");
194 		assertEquals(2, tokens.size());
195 		assertEquals("", tokens.get(0).getValue());
196 		assertEquals("", tokens.get(1).getValue());
197 
198 		// Multiple commas
199 		tokens = tokenize("a,,b");
200 		assertEquals(3, tokens.size());
201 		assertEquals("a", tokens.get(0).getValue());
202 		assertEquals("", tokens.get(1).getValue());
203 		assertEquals("b", tokens.get(2).getValue());
204 
205 		// Trailing comma
206 		tokens = tokenize("a,b,");
207 		assertEquals(3, tokens.size());
208 		assertEquals("a", tokens.get(0).getValue());
209 		assertEquals("b", tokens.get(1).getValue());
210 		assertEquals("", tokens.get(2).getValue());
211 
212 		// Leading comma
213 		tokens = tokenize(",a,b");
214 		assertEquals(3, tokens.size());
215 		assertEquals("", tokens.get(0).getValue());
216 		assertEquals("a", tokens.get(1).getValue());
217 		assertEquals("b", tokens.get(2).getValue());
218 	}
219 
220 	@Test void c02_whitespaceHandling() {
221 		// Various whitespace scenarios
222 		var tokens = tokenize("  a  ,  b  ");
223 		assertList(tokens, token("a"), token("b"));
224 
225 		// Tabs and newlines
226 		tokens = tokenize("\ta\t,\nb\n");
227 		assertList(tokens, token("a"), token("b"));
228 
229 		// Whitespace in nested content
230 		tokens = tokenize("root{  a  ,  b  }");
231 		assertToken(tokens.get(0), "root", "a", "b");
232 
233 		// Whitespace around braces
234 		tokens = tokenize("root  {  a,b  }  ,  other");
235 		assertEquals(2, tokens.size());
236 		assertToken(tokens.get(0), "root", "a", "b");
237 		assertToken(tokens.get(1), "other");
238 	}
239 
240 	@Test void c03_errorConditions() {
241 		// Null input
242 		assertThrows(IllegalArgumentException.class, () -> tokenize(null));
243 
244 		// Empty input
245 		assertThrows(IllegalArgumentException.class, () -> tokenize(""));
246 
247 		// Blank input
248 		assertThrows(IllegalArgumentException.class, () -> tokenize("   "));
249 	}
250 
251 	@Test void c04_finalTokenLogic() {
252 		// Test line 136: final token addition logic
253 
254 		// Case 1: Empty final value with trailing comma (lastWasComma = true)
255 		var tokens = tokenize("a,");
256 		assertEquals(2, tokens.size());
257 		assertEquals("a", tokens.get(0).getValue());
258 		assertEquals("", tokens.get(1).getValue()); // Empty token added due to trailing comma
259 
260 		// Case 2: No tokens yet and empty input should create one empty token
261 		// This is handled by error conditions, but let's test a whitespace-only case after comma
262 		tokens = tokenize(",   ");
263 		assertEquals(2, tokens.size());
264 		assertEquals("", tokens.get(0).getValue());
265 		assertEquals("", tokens.get(1).getValue()); // Empty final value but added because of lastWasComma
266 
267 		// Case 3: Non-empty final value should always be added
268 		tokens = tokenize("a,b");
269 		assertEquals(2, tokens.size());
270 		assertEquals("a", tokens.get(0).getValue());
271 		assertEquals("b", tokens.get(1).getValue());
272 
273 		// Case 4: Test with nested content and trailing comma
274 		tokens = tokenize("root{a,},next");
275 		assertEquals(2, tokens.size());
276 		assertToken(tokens.get(0), "root", "a", ""); // Empty token in nested due to trailing comma
277 		assertEquals("next", tokens.get(1).getValue());
278 	}
279 
280 	//------------------------------------------------------------------------------------------------------------------
281 	// Token object tests
282 	//------------------------------------------------------------------------------------------------------------------
283 
284 	@Test void d01_tokenConstruction() {
285 		// Normal construction
286 		var token = new Token("test");
287 		assertEquals("test", token.getValue());
288 		assertFalse(token.hasNested());
289 		assertTrue(token.getNested().isEmpty());
290 
291 		// Null value handling
292 		token = new Token(null);
293 		assertEquals("", token.getValue());
294 		assertFalse(token.hasNested());
295 	}
296 
297 	@Test void d02_tokenEquality() {
298 		// Simple tokens
299 		var token1 = new Token("test");
300 		var token2 = new Token("test");
301 		var token3 = new Token("other");
302 
303 		assertEquals(token1, token2);
304 		assertNotEquals(token1, token3);
305 		assertEquals(token1.hashCode(), token2.hashCode());
306 
307 		// Tokens with nested content
308 		var nested1 = new Token("parent");
309 		nested1.setNested(Arrays.asList(new Token("child1"), new Token("child2")));
310 
311 		var nested2 = new Token("parent");
312 		nested2.setNested(Arrays.asList(new Token("child1"), new Token("child2")));
313 
314 		assertEquals(nested1, nested2);
315 		assertEquals(nested1.hashCode(), nested2.hashCode());
316 
317 		// Different nested content
318 		var nested3 = new Token("parent");
319 		nested3.setNested(Arrays.asList(new Token("child1"), new Token("different")));
320 
321 		assertNotEquals(nested1, nested3);
322 	}
323 
324 	@Test void d06_tokenEqualsEdgeCases() {
325 		// Test line 229: equals() method edge cases
326 		var token = new Token("test");
327 
328 		// Case 1: Self equality
329 		assertEquals(token, token);
330 
331 		// Case 2: Null comparison
332 		assertNotEquals(token, null);
333 
334 		// Case 3: Different object type
335 		assertNotEquals(token, "not a token");
336 		assertNotEquals(token, Integer.valueOf(42));
337 
338 		// Case 4: Different value, same nested (null)
339 		var other = new Token("different");
340 		assertNotEquals(token, other);
341 
342 		// Case 5: Same value, different nested content
343 		var token1 = new Token("same");
344 		var token2 = new Token("same");
345 		token1.setNested(Arrays.asList(new Token("child1")));
346 		token2.setNested(Arrays.asList(new Token("child2")));
347 		assertNotEquals(token1, token2);
348 
349 		// Case 6: Same value, one has nested, other doesn't
350 		var token3 = new Token("same");
351 		var token4 = new Token("same");
352 		token3.setNested(Arrays.asList(new Token("child")));
353 		// token4 has no nested content
354 		assertNotEquals(token3, token4);
355 
356 		// Case 7: Both have null nested
357 		var token5 = new Token("same");
358 		var token6 = new Token("same");
359 		token5.setNested(null);
360 		token6.setNested(null);
361 		assertEquals(token5, token6);
362 	}
363 
364 	@Test void d03_tokenToString() {
365 		// Simple token
366 		var token = new Token("test");
367 		assertEquals("test", token.toString());
368 
369 		// Token with nested content
370 		token = new Token("parent");
371 		token.setNested(Arrays.asList(new Token("child1"), new Token("child2")));
372 		assertEquals("parent{child1,child2}", token.toString());
373 
374 		// Deep nesting
375 		var child = new Token("child");
376 		child.setNested(Arrays.asList(new Token("grandchild")));
377 		token = new Token("parent");
378 		token.setNested(Arrays.asList(child));
379 		assertEquals("parent{child{grandchild}}", token.toString());
380 	}
381 
382 	@Test void d04_tokenNestedAccess() {
383 		var parent = new Token("parent");
384 
385 		// Initially no nested content
386 		assertFalse(parent.hasNested());
387 		assertTrue(parent.getNested().isEmpty());
388 
389 		// Add nested content
390 		parent.setNested(Arrays.asList(new Token("child1"), new Token("child2")));
391 		assertTrue(parent.hasNested());
392 		assertEquals(2, parent.getNested().size());
393 
394 		// Verify unmodifiable
395 		var nested = parent.getNested();
396 		assertThrows(UnsupportedOperationException.class, () -> nested.add(new Token("child3")));
397 	}
398 
399 	@Test void d05_hasNestedEdgeCases() {
400 		// Test line 201: hasNested() method edge cases
401 		var token = new Token("test");
402 
403 		// Case 1: null nested list
404 		token.setNested(null);
405 		assertFalse(token.hasNested()); // Should return false when nested is null
406 
407 		// Case 2: empty nested list
408 		token.setNested(new ArrayList<>());
409 		assertFalse(token.hasNested()); // Should return false when nested is empty
410 
411 		// Case 3: non-empty nested list
412 		token.setNested(Arrays.asList(new Token("child")));
413 		assertTrue(token.hasNested()); // Should return true when nested has content
414 
415 		// Case 4: nested list with multiple items
416 		token.setNested(Arrays.asList(new Token("child1"), new Token("child2")));
417 		assertTrue(token.hasNested()); // Should return true when nested has multiple items
418 	}
419 
420 	//------------------------------------------------------------------------------------------------------------------
421 	// Integration and round-trip tests
422 	//------------------------------------------------------------------------------------------------------------------
423 
424 	@Test void e01_roundTripTests() {
425 		// Simple cases
426 		assertRoundTrip("foo");
427 		assertRoundTrip("foo,bar,baz");
428 
429 		// Nested cases
430 		assertRoundTrip("foo{a,b}");
431 		assertRoundTrip("foo{a,b},bar{c,d}");
432 
433 		// Deep nesting
434 		assertRoundTrip("root{level1{level2{a,b}}}");
435 
436 		// Complex real-world case
437 		assertRoundTrip("user{name,email},config{timeout,retries}");
438 	}
439 
440 	@Test void e02_performanceTest() {
441 		// Test with large input to ensure reasonable performance
442 		var sb = new StringBuilder();
443 		for (int i = 0; i < 1000; i++) {
444 			if (i > 0) sb.append(",");
445 			sb.append("token").append(i);
446 			if (i % 10 == 0) {
447 				sb.append("{nested").append(i).append(",value").append(i).append("}");
448 			}
449 		}
450 
451 		var start = System.currentTimeMillis();
452 		var tokens = tokenize(sb.toString());
453 		var elapsed = System.currentTimeMillis() - start;
454 
455 		assertTrue(tokens.size() > 900); // Should have many tokens
456 		assertTrue(elapsed < 1000); // Should complete within 1 second
457 	}
458 
459 	//------------------------------------------------------------------------------------------------------------------
460 	// Helper methods
461 	//------------------------------------------------------------------------------------------------------------------
462 
463 	/**
464 	 * Creates a simple token for testing.
465 	 */
466 	private Token token(String value) {
467 		return new Token(value);
468 	}
469 
470 	/**
471 	 * Asserts that a token has the expected value and nested tokens.
472 	 */
473 	private void assertToken(Token actual, String expectedValue, String... expectedNested) {
474 		assertEquals(expectedValue, actual.getValue());
475 		if (expectedNested.length == 0) {
476 			assertFalse(actual.hasNested());
477 		} else {
478 			assertTrue(actual.hasNested());
479 			assertEquals(expectedNested.length, actual.getNested().size());
480 			for (int i = 0; i < expectedNested.length; i++) {
481 				assertEquals(expectedNested[i], actual.getNested().get(i).getValue());
482 			}
483 		}
484 	}
485 
486 	/**
487 	 * Tests that parsing and toString are inverse operations.
488 	 */
489 	private void assertRoundTrip(String input) {
490 		var tokens = tokenize(input);
491 		var rebuilt = tokens.stream()
492 			.map(Token::toString)
493 			.collect(java.util.stream.Collectors.joining(","));
494 		assertEquals(input, rebuilt);
495 	}
496 }