001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.juneau.commons.collections; 018 019import static org.apache.juneau.commons.utils.Utils.*; 020 021import java.util.*; 022 023/** 024 * A specialized set for storing and efficiently looking up language keywords. 025 * 026 * <p> 027 * This class provides a lightweight, immutable container optimized for fast keyword lookups using binary search. 028 * Keywords are stored in a sorted array, making lookups O(log n) efficient. This is particularly useful for 029 * parsers, compilers, and syntax highlighters that need to frequently check if identifiers are reserved keywords. 030 * 031 * <h5 class='section'>Features:</h5> 032 * <ul class='spaced-list'> 033 * <li>Immutable after construction - thread-safe for concurrent reads 034 * <li>Efficient O(log n) lookups using binary search 035 * <li>Compact memory footprint using sorted array 036 * <li>Automatic rejection of null and single-character strings 037 * </ul> 038 * 039 * <h5 class='section'>Implementation Details:</h5> 040 * <p> 041 * Keywords are sorted lexicographically during construction and stored in an internal array. The {@link #contains(String)} 042 * method uses {@link Arrays#binarySearch(Object[], Object)} for efficient lookups. Strings shorter than 2 characters 043 * are automatically rejected without performing a search, as most programming languages don't have single-character 044 * keywords. 045 * 046 * <h5 class='section'>Examples:</h5> 047 * <p class='bjava'> 048 * <jc>// Create a keyword set for Java reserved words</jc> 049 * KeywordSet <jv>javaKeywords</jv> = <jk>new</jk> KeywordSet( 050 * <js>"abstract"</js>, <js>"assert"</js>, <js>"boolean"</js>, <js>"break"</js>, <js>"byte"</js>, 051 * <js>"case"</js>, <js>"catch"</js>, <js>"char"</js>, <js>"class"</js>, <js>"const"</js> 052 * <jc>// ... more keywords</jc> 053 * ); 054 * 055 * <jc>// Check if identifiers are keywords</jc> 056 * <jk>if</jk> (<jv>javaKeywords</jv>.contains(<js>"class"</js>)) { 057 * <jc>// Handle keyword</jc> 058 * } 059 * 060 * <jc>// Safe handling of edge cases</jc> 061 * <jsm>assertFalse</jsm>(<jv>javaKeywords</jv>.contains(<jk>null</jk>)); <jc>// Returns false</jc> 062 * <jsm>assertFalse</jsm>(<jv>javaKeywords</jv>.contains(<js>"a"</js>)); <jc>// Single char - returns false</jc> 063 * <jsm>assertFalse</jsm>(<jv>javaKeywords</jv>.contains(<js>"myVar"</js>)); <jc>// Not a keyword</jc> 064 * 065 * <jc>// Use in a parser/lexer</jc> 066 * <jk>class</jk> Lexer { 067 * <jk>private static final</jk> KeywordSet KEYWORDS = <jk>new</jk> KeywordSet( 068 * <js>"if"</js>, <js>"else"</js>, <js>"while"</js>, <js>"for"</js>, <js>"return"</js> 069 * ); 070 * 071 * <jk>boolean</jk> isKeyword(String token) { 072 * <jk>return</jk> KEYWORDS.contains(token); 073 * } 074 * } 075 * </p> 076 * 077 * <h5 class='section'>Use Cases:</h5> 078 * <ul class='spaced-list'> 079 * <li>Programming language parsers - checking if tokens are reserved words 080 * <li>Syntax highlighters - identifying keywords for special formatting 081 * <li>Code analyzers - distinguishing keywords from identifiers 082 * <li>Template engines - recognizing template keywords 083 * <li>Query languages - validating reserved words 084 * </ul> 085 * 086 * <h5 class='section'>Notes:</h5><ul> 087 * <li class='note'> 088 * This class is <b>immutable and thread-safe</b> after construction. Multiple threads can safely call 089 * {@link #contains(String)} concurrently. 090 * <li class='note'> 091 * Keywords are compared using exact string matching (case-sensitive). For case-insensitive matching, 092 * normalize your keywords and input strings to the same case. 093 * <li class='note'> 094 * The minimum keyword length is 2 characters. Single-character strings are automatically rejected. 095 * <li class='note'> 096 * Consider creating {@link KeywordSet} instances as static final constants to avoid repeated construction. 097 * </ul> 098 * 099 * <h5 class='section'>See Also:</h5><ul> 100 * <li class='link'><a class="doclink" href="https://juneau.apache.org/docs/topics/JuneauCommonsCollections">Collections Package</a> 101 * </ul> 102 */ 103public class KeywordSet { 104 final String[] store; 105 106 /** 107 * Creates a new keyword set with the specified keywords. 108 * 109 * <p> 110 * Keywords are automatically sorted during construction. Duplicate keywords are allowed but provide 111 * no benefit. For best performance, pass unique keywords. 112 * 113 * <h5 class='section'>Example:</h5> 114 * <p class='bjava'> 115 * <jc>// Create a keyword set for SQL keywords</jc> 116 * KeywordSet <jv>sql</jv> = <jk>new</jk> KeywordSet( 117 * <js>"SELECT"</js>, <js>"FROM"</js>, <js>"WHERE"</js>, <js>"INSERT"</js>, <js>"UPDATE"</js>, 118 * <js>"DELETE"</js>, <js>"CREATE"</js>, <js>"DROP"</js>, <js>"TABLE"</js>, <js>"INDEX"</js> 119 * ); 120 * 121 * <jc>// Keywords can be passed in any order</jc> 122 * KeywordSet <jv>keywords</jv> = <jk>new</jk> KeywordSet(<js>"zebra"</js>, <js>"apple"</js>, <js>"banana"</js>); 123 * <jsm>assertTrue</jsm>(<jv>keywords</jv>.contains(<js>"apple"</js>)); <jc>// Sorted internally</jc> 124 * </p> 125 * 126 * @param keywords The keywords to store. Can be empty but not <jk>null</jk>. Individual keywords can be any non-null string. 127 */ 128 public KeywordSet(String...keywords) { 129 this.store = keywords; 130 Arrays.sort(store); 131 } 132 133 /** 134 * Checks if the specified string is a keyword in this set. 135 * 136 * <p> 137 * This method performs an O(log n) binary search on the sorted keyword array. Null strings and 138 * strings with fewer than 2 characters are automatically rejected without performing a search. 139 * 140 * <h5 class='section'>Example:</h5> 141 * <p class='bjava'> 142 * KeywordSet <jv>keywords</jv> = <jk>new</jk> KeywordSet(<js>"class"</js>, <js>"interface"</js>, <js>"enum"</js>); 143 * 144 * <jc>// Standard checks</jc> 145 * <jsm>assertTrue</jsm>(<jv>keywords</jv>.contains(<js>"class"</js>)); <jc>// Keyword exists</jc> 146 * <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<js>"MyClass"</js>)); <jc>// Not a keyword</jc> 147 * 148 * <jc>// Edge cases handled gracefully</jc> 149 * <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<jk>null</jk>)); <jc>// null returns false</jc> 150 * <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<js>""</js>)); <jc>// Empty string returns false</jc> 151 * <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<js>"a"</js>)); <jc>// Single char returns false</jc> 152 * 153 * <jc>// Case-sensitive matching</jc> 154 * <jsm>assertTrue</jsm>(<jv>keywords</jv>.contains(<js>"class"</js>)); 155 * <jsm>assertFalse</jsm>(<jv>keywords</jv>.contains(<js>"CLASS"</js>)); <jc>// Different case</jc> 156 * </p> 157 * 158 * <h5 class='section'>Performance:</h5> 159 * <ul> 160 * <li>Time complexity: O(log n) using binary search 161 * <li>Space complexity: O(1) - no additional memory allocated 162 * <li>Short-circuit: Strings with length < 2 return immediately without searching 163 * </ul> 164 * 165 * @param s The string to check. Can be <jk>null</jk>. 166 * @return <jk>true</jk> if the string exists in this keyword set, <jk>false</jk> if it doesn't exist, 167 * is <jk>null</jk>, or has fewer than 2 characters. 168 */ 169 public boolean contains(String s) { 170 if (s == null || s.length() < 2) 171 return false; 172 return Arrays.binarySearch(store, s) >= 0; 173 } 174 175 /** 176 * Returns a string representation of this keyword set. 177 * 178 * <p> 179 * The format follows the standard Java set convention: <c>"[keyword1, keyword2, ...]"</c> 180 * 181 * @return A string representation of this keyword set. 182 */ 183 @Override 184 public String toString() { 185 return Arrays.toString(store); 186 } 187 188 /** 189 * Compares the specified object with this keyword set for equality. 190 * 191 * <p> 192 * Returns <jk>true</jk> if the given object is also a keyword set and contains the same keywords 193 * in the same order. Two keyword sets are equal if their internal arrays are equal. 194 * 195 * @param o Object to be compared for equality with this keyword set. 196 * @return <jk>true</jk> if the specified object is equal to this keyword set. 197 */ 198 @Override 199 public boolean equals(Object o) { 200 return (o instanceof KeywordSet o2) && eq(this, o2, (x, y) -> Arrays.equals(x.store, y.store)); 201 } 202 203 /** 204 * Returns the hash code value for this keyword set. 205 * 206 * <p> 207 * The hash code is computed from the internal array of keywords using {@link Arrays#hashCode(Object[])}. 208 * This ensures that <c>ks1.equals(ks2)</c> implies that <c>ks1.hashCode()==ks2.hashCode()</c> 209 * for any two keyword sets <c>ks1</c> and <c>ks2</c>, as required by the general contract of 210 * {@link Object#hashCode()}. 211 * 212 * @return The hash code value for this keyword set. 213 */ 214 @Override 215 public int hashCode() { 216 return h((Object[])store); 217 } 218}