001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.juneau.commons.collections;
018
019import static org.apache.juneau.commons.utils.Utils.*;
020
021import java.util.*;
022
023/**
024 * A specialized set for storing and efficiently looking up language keywords.
025 *
026 * <p>
027 * This class provides a lightweight, immutable container optimized for fast keyword lookups using binary search.
028 * Keywords are stored in a sorted array, making lookups O(log n) efficient. This is particularly useful for
029 * parsers, compilers, and syntax highlighters that need to frequently check if identifiers are reserved keywords.
030 *
031 * <h5 class='section'>Features:</h5>
032 * <ul class='spaced-list'>
033 *    <li>Immutable after construction - thread-safe for concurrent reads
034 *    <li>Efficient O(log n) lookups using binary search
035 *    <li>Compact memory footprint using sorted array
036 *    <li>Automatic rejection of null and single-character strings
037 * </ul>
038 *
039 * <h5 class='section'>Implementation Details:</h5>
040 * <p>
041 * Keywords are sorted lexicographically during construction and stored in an internal array. The {@link #contains(String)}
042 * method uses {@link Arrays#binarySearch(Object[], Object)} for efficient lookups. Strings shorter than 2 characters
043 * are automatically rejected without performing a search, as most programming languages don't have single-character
044 * keywords.
045 *
046 * <h5 class='section'>Examples:</h5>
047 * <p class='bjava'>
048 *    <jc>// Create a keyword set for Java reserved words</jc>
049 *    KeywordSet <jv>javaKeywords</jv> = <jk>new</jk> KeywordSet(
050 *       <js>"abstract"</js>, <js>"assert"</js>, <js>"boolean"</js>, <js>"break"</js>, <js>"byte"</js>,
051 *       <js>"case"</js>, <js>"catch"</js>, <js>"char"</js>, <js>"class"</js>, <js>"const"</js>
052 *       <jc>// ... more keywords</jc>
053 *    );
054 *
055 *    <jc>// Check if identifiers are keywords</jc>
056 *    <jk>if</jk> (<jv>javaKeywords</jv>.contains(<js>"class"</js>)) {
057 *       <jc>// Handle keyword</jc>
058 *    }
059 *
060 *    <jc>// Safe handling of edge cases</jc>
061 *    <jsm>assertFalse</jsm>(<jv>javaKeywords</jv>.contains(<jk>null</jk>));     <jc>// Returns false</jc>
062 *    <jsm>assertFalse</jsm>(<jv>javaKeywords</jv>.contains(<js>"a"</js>));      <jc>// Single char - returns false</jc>
063 *    <jsm>assertFalse</jsm>(<jv>javaKeywords</jv>.contains(<js>"myVar"</js>));  <jc>// Not a keyword</jc>
064 *
065 *    <jc>// Use in a parser/lexer</jc>
066 *    <jk>class</jk> Lexer {
067 *       <jk>private static final</jk> KeywordSet KEYWORDS = <jk>new</jk> KeywordSet(
068 *          <js>"if"</js>, <js>"else"</js>, <js>"while"</js>, <js>"for"</js>, <js>"return"</js>
069 *       );
070 *
071 *       <jk>boolean</jk> isKeyword(String token) {
072 *          <jk>return</jk> KEYWORDS.contains(token);
073 *       }
074 *    }
075 * </p>
076 *
077 * <h5 class='section'>Use Cases:</h5>
078 * <ul class='spaced-list'>
079 *    <li>Programming language parsers - checking if tokens are reserved words
080 *    <li>Syntax highlighters - identifying keywords for special formatting
081 *    <li>Code analyzers - distinguishing keywords from identifiers
082 *    <li>Template engines - recognizing template keywords
083 *    <li>Query languages - validating reserved words
084 * </ul>
085 *
086 * <h5 class='section'>Notes:</h5><ul>
087 *    <li class='note'>
088 *       This class is <b>immutable and thread-safe</b> after construction. Multiple threads can safely call
089 *       {@link #contains(String)} concurrently.
090 *    <li class='note'>
091 *       Keywords are compared using exact string matching (case-sensitive). For case-insensitive matching,
092 *       normalize your keywords and input strings to the same case.
093 *    <li class='note'>
094 *       The minimum keyword length is 2 characters. Single-character strings are automatically rejected.
095 *    <li class='note'>
096 *       Consider creating {@link KeywordSet} instances as static final constants to avoid repeated construction.
097 * </ul>
098 *
099 * <h5 class='section'>See Also:</h5><ul>
100 *    <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/JuneauCommonsCollections">Collections Package</a>
101 * </ul>
102 */
103public class KeywordSet {
104   final String[] store;
105
106   /**
107    * Creates a new keyword set with the specified keywords.
108    *
109    * <p>
110    * Keywords are automatically sorted during construction. Duplicate keywords are allowed but provide
111    * no benefit. For best performance, pass unique keywords.
112    *
113    * <h5 class='section'>Example:</h5>
114    * <p class='bjava'>
115    *    <jc>// Create a keyword set for SQL keywords</jc>
116    *    KeywordSet <jv>sql</jv> = <jk>new</jk> KeywordSet(
117    *       <js>"SELECT"</js>, <js>"FROM"</js>, <js>"WHERE"</js>, <js>"INSERT"</js>, <js>"UPDATE"</js>,
118    *       <js>"DELETE"</js>, <js>"CREATE"</js>, <js>"DROP"</js>, <js>"TABLE"</js>, <js>"INDEX"</js>
119    *    );
120    *
121    *    <jc>// Keywords can be passed in any order</jc>
122    *    KeywordSet <jv>keywords</jv> = <jk>new</jk> KeywordSet(<js>"zebra"</js>, <js>"apple"</js>, <js>"banana"</js>);
123    *    <jsm>assertTrue</jsm>(<jv>keywords</jv>.contains(<js>"apple"</js>));  <jc>// Sorted internally</jc>
124    * </p>
125    *
126    * @param keywords The keywords to store. Can be empty but not <jk>null</jk>. Individual keywords can be any non-null string.
127    */
128   public KeywordSet(String...keywords) {
129      this.store = keywords;
130      Arrays.sort(store);
131   }
132
133   /**
134    * Checks if the specified string is a keyword in this set.
135    *
136    * <p>
137    * This method performs an O(log n) binary search on the sorted keyword array. Null strings and
138    * strings with fewer than 2 characters are automatically rejected without performing a search.
139    *
140    * <h5 class='section'>Example:</h5>
141    * <p class='bjava'>
142    *    KeywordSet <jv>keywords</jv> = <jk>new</jk> KeywordSet(<js>"class"</js>, <js>"interface"</js>, <js>"enum"</js>);
143    *
144    *    <jc>// Standard checks</jc>
145    *    <jsm>assertTrue</jsm>(<jv>keywords</jv>.contains(<js>"class"</js>));      <jc>// Keyword exists</jc>
146    *    <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<js>"MyClass"</js>));   <jc>// Not a keyword</jc>
147    *
148    *    <jc>// Edge cases handled gracefully</jc>
149    *    <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<jk>null</jk>));        <jc>// null returns false</jc>
150    *    <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<js>""</js>));          <jc>// Empty string returns false</jc>
151    *    <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<js>"a"</js>));         <jc>// Single char returns false</jc>
152    *
153    *    <jc>// Case-sensitive matching</jc>
154    *    <jsm>assertTrue</jsm>(<jv>keywords</jv>.contains(<js>"class"</js>));
155    *    <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<js>"CLASS"</js>));     <jc>// Different case</jc>
156    * </p>
157    *
158    * <h5 class='section'>Performance:</h5>
159    * <ul>
160    *    <li>Time complexity: O(log n) using binary search
161    *    <li>Space complexity: O(1) - no additional memory allocated
162    *    <li>Short-circuit: Strings with length &lt; 2 return immediately without searching
163    * </ul>
164    *
165    * @param s The string to check. Can be <jk>null</jk>.
166    * @return <jk>true</jk> if the string exists in this keyword set, <jk>false</jk> if it doesn't exist,
167    *         is <jk>null</jk>, or has fewer than 2 characters.
168    */
169   public boolean contains(String s) {
170      if (s == null || s.length() < 2)
171         return false;
172      return Arrays.binarySearch(store, s) >= 0;
173   }
174
175   /**
176    * Returns a string representation of this keyword set.
177    *
178    * <p>
179    * The format follows the standard Java set convention: <c>"[keyword1, keyword2, ...]"</c>
180    *
181    * @return A string representation of this keyword set.
182    */
183   @Override
184   public String toString() {
185      return Arrays.toString(store);
186   }
187
188   /**
189    * Compares the specified object with this keyword set for equality.
190    *
191    * <p>
192    * Returns <jk>true</jk> if the given object is also a keyword set and contains the same keywords
193    * in the same order. Two keyword sets are equal if their internal arrays are equal.
194    *
195    * @param o Object to be compared for equality with this keyword set.
196    * @return <jk>true</jk> if the specified object is equal to this keyword set.
197    */
198   @Override
199   public boolean equals(Object o) {
200      return (o instanceof KeywordSet o2) && eq(this, o2, (x, y) -> Arrays.equals(x.store, y.store));
201   }
202
203   /**
204    * Returns the hash code value for this keyword set.
205    *
206    * <p>
207    * The hash code is computed from the internal array of keywords using {@link Arrays#hashCode(Object[])}.
208    * This ensures that <c>ks1.equals(ks2)</c> implies that <c>ks1.hashCode()==ks2.hashCode()</c>
209    * for any two keyword sets <c>ks1</c> and <c>ks2</c>, as required by the general contract of
210    * {@link Object#hashCode()}.
211    *
212    * @return The hash code value for this keyword set.
213    */
214   @Override
215   public int hashCode() {
216      return h((Object[])store);
217   }
218}