View Javadoc
1   /*
2    * Portions of this software was developed by employees of the National Institute
3    * of Standards and Technology (NIST), an agency of the Federal Government and is
4    * being made available as a public service. Pursuant to title 17 United States
5    * Code Section 105, works of NIST employees are not subject to copyright
6    * protection in the United States. This software may be subject to foreign
7    * copyright. Permission in the United States and in foreign countries, to the
8    * extent that NIST may hold copyright, to use, copy, modify, create derivative
9    * works, and distribute this software and its documentation without fee is hereby
10   * granted on a non-exclusive basis, provided that this notice and disclaimer
11   * of warranty appears in all copies.
12   *
13   * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
14   * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
15   * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
17   * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
18   * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
19   * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
20   * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
21   * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
22   * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
23   * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
24   * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
25   */
26  
27  /*
28   * This code is based on https://github.com/eclipse-ee4j/jaxb-ri/blob/master/jaxb-ri/core/src/main/java/org/glassfish/jaxb/core/api/impl/NameUtil.java
29   *
30   * Copyright (c) 1997, 2022 Oracle and/or its affiliates. All rights reserved.
31   *
32   * This program and the accompanying materials are made available under the
33   * terms of the Eclipse Distribution License v. 1.0, which is available at
34   * http://www.eclipse.org/org/documents/edl-v10.php.
35   *
36   * SPDX-License-Identifier: BSD-3-Clause
37   */
38  
39  package gov.nist.secauto.metaschema.databind.codegen.impl;
40  
41  import java.util.ArrayList;
42  import java.util.List;
43  import java.util.Locale;
44  
45  /**
46   * Methods that convert strings into various formats.
47   */
48  @SuppressWarnings("PMD")
49  class NameUtil {
50    protected static boolean isPunct(char ch) {
51      return ch == '-' || ch == '.' || ch == ':' || ch == '_' || ch == '·' || ch == '·' || ch == '۝' || ch == '۞';
52    }
53  
54    /**
55     * Capitalizes the first character of the specified string, and de-capitalize
56     * the rest of characters.
57     *
58     * @param str
59     *          the string to capitalize
60     * @return the capitalized string
61     */
62    public static String capitalize(String str) {
63      if (!Character.isLowerCase(str.charAt(0))) {
64        return str;
65      }
66      StringBuilder sb = new StringBuilder(str.length());
67      sb.append(String.valueOf(str.charAt(0)).toUpperCase(Locale.ENGLISH));
68      sb.append(str.substring(1).toLowerCase(Locale.ENGLISH));
69      return sb.toString();
70    }
71  
72    // Precondition: s[start] is not punctuation
73    @SuppressWarnings({
74        "checkstyle:MissingSwitchDefaultCheck", // intentional
75    })
76    private static int nextBreak(String str, int start) {
77      int len = str.length();
78  
79      char c1 = str.charAt(start);
80      int t1 = classify(c1);
81  
82      for (int i = start + 1; i < len; i++) {
83        // shift (c1,t1) into (c0,t0)
84        // char c0 = c1; --- conceptually, but c0 won't be used
85        int t0 = t1;
86  
87        c1 = str.charAt(i);
88        t1 = classify(c1);
89  
90        switch (actionTable[t0 * 5 + t1]) {
91        case ACTION_CHECK_PUNCT:
92          if (isPunct(c1)) {
93            return i;
94          }
95          break;
96        case ACTION_CHECK_C2:
97          if (i < len - 1) {
98            char c2 = str.charAt(i + 1);
99            if (Character.isLowerCase(c2)) {
100             return i;
101           }
102         }
103         break;
104       case ACTION_BREAK:
105         return i;
106       }
107     }
108     return -1;
109   }
110 
111   // the 5-category classification that we use in this code
112   // to find work breaks
113   protected static final int UPPER_LETTER = 0;
114   protected static final int LOWER_LETTER = 1;
115   protected static final int OTHER_LETTER = 2;
116   protected static final int DIGIT = 3;
117   protected static final int OTHER = 4;
118 
119   /**
120    * Look up table for actions. type0*5+type1 would yield the action to be taken.
121    */
122   private static final byte[] actionTable = new byte[5 * 5];
123 
124   // action constants. see nextBreak for the meaning
125   private static final byte ACTION_CHECK_PUNCT = 0;
126   private static final byte ACTION_CHECK_C2 = 1;
127   private static final byte ACTION_BREAK = 2;
128   private static final byte ACTION_NOBREAK = 3;
129 
130   /**
131    * Decide the action to be taken given the classification of the preceding
132    * character 't0' and the classification of the next character 't1'.
133    */
134   private static byte decideAction(int t0, int t1) {
135     if (t0 == OTHER && t1 == OTHER) {
136       return ACTION_CHECK_PUNCT;
137     }
138     if ((t0 == DIGIT) ^ (t1 == DIGIT)) {
139       return ACTION_BREAK;
140     }
141     if (t0 == LOWER_LETTER && t1 != LOWER_LETTER) {
142       return ACTION_BREAK;
143     }
144     if ((t0 <= OTHER_LETTER) ^ (t1 <= OTHER_LETTER)) {
145       return ACTION_BREAK;
146     }
147     if ((t0 == OTHER_LETTER) ^ (t1 == OTHER_LETTER)) {
148       return ACTION_BREAK;
149     }
150     if (t0 == UPPER_LETTER && t1 == UPPER_LETTER) {
151       return ACTION_CHECK_C2;
152     }
153 
154     return ACTION_NOBREAK;
155   }
156 
157   static {
158     // initialize the action table
159     for (int t0 = 0; t0 < 5; t0++) {
160       for (int t1 = 0; t1 < 5; t1++) {
161         actionTable[t0 * 5 + t1] = decideAction(t0, t1);
162       }
163     }
164   }
165 
166   /**
167    * Classify a character into 5 categories that determine the word break.
168    *
169    * @param ch
170    *          the character
171    * @return the categorization
172    */
173   protected static int classify(char ch) {
174     switch (Character.getType(ch)) {
175     case Character.UPPERCASE_LETTER:
176       return UPPER_LETTER;
177     case Character.LOWERCASE_LETTER:
178       return LOWER_LETTER;
179     case Character.TITLECASE_LETTER:
180     case Character.MODIFIER_LETTER:
181     case Character.OTHER_LETTER:
182       return OTHER_LETTER;
183     case Character.DECIMAL_DIGIT_NUMBER:
184       return DIGIT;
185     default:
186       return OTHER;
187     }
188   }
189 
190   /**
191    * Tokenizes a string into words and capitalizes the first character of each
192    * word.
193    * <p>
194    * This method uses a change in character type as a splitter of two words. For
195    * example, "abc100ghi" will be splitted into {"Abc", "100","Ghi"}.
196    *
197    * @param str
198    *          the string to split into a word list
199    * @return the word list
200    */
201   public static List<String> toWordList(String str) {
202     ArrayList<String> retval = new ArrayList<>();
203     int len = str.length();
204     for (int i = 0; i < len;) {
205 
206       // Skip punctuation
207       while (i < len) {
208         if (!isPunct(str.charAt(i))) {
209           break;
210         }
211         i++;
212       }
213       if (i >= len) {
214         break;
215       }
216 
217       // Find next break and collect word
218       int breakPos = nextBreak(str, i);
219       String word = (breakPos == -1) ? str.substring(i) : str.substring(i, breakPos);
220       retval.add(escape(capitalize(word)));
221       if (breakPos == -1) {
222         break;
223       }
224       i = breakPos;
225     }
226 
227     // we can't guarantee a valid Java identifier anyway,
228     // so there's not much point in rejecting things in this way.
229     // if (ss.size() == 0)
230     // throw new IllegalArgumentException("Zero-length identifier");
231     return retval;
232   }
233 
234   protected static String toMixedCaseName(List<String> ss, boolean startUpper) {
235     StringBuilder sb = new StringBuilder();
236     if (!ss.isEmpty()) {
237       sb.append(startUpper ? ss.get(0) : ss.get(0).toLowerCase(Locale.ENGLISH));
238       for (int i = 1; i < ss.size(); i++) {
239         sb.append(ss.get(i));
240       }
241     }
242     return sb.toString();
243   }
244 
245   protected static String toMixedCaseVariableName(String[] ss,
246       boolean startUpper,
247       boolean cdrUpper) {
248     if (cdrUpper) {
249       for (int i = 1; i < ss.length; i++) {
250         ss[i] = capitalize(ss[i]);
251       }
252     }
253     StringBuilder sb = new StringBuilder();
254     if (ss.length > 0) {
255       sb.append(startUpper ? ss[0] : ss[0].toLowerCase(Locale.ENGLISH));
256       for (int i = 1; i < ss.length; i++) {
257         sb.append(ss[i]);
258       }
259     }
260     return sb.toString();
261   }
262 
263   /**
264    * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
265    *
266    * @param str
267    *          the string to format
268    * @return Always return a string but there's no guarantee that the generated
269    *         code is a valid Java identifier.
270    */
271   public String toConstantName(String str) {
272     return toConstantName(toWordList(str));
273   }
274 
275   /**
276    * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
277    *
278    * @param ss
279    *          a list of words
280    * @return Always return a string but there's no guarantee that the generated
281    *         code is a valid Java identifier.
282    */
283   public String toConstantName(List<String> ss) {
284     StringBuilder sb = new StringBuilder();
285     if (!ss.isEmpty()) {
286       sb.append(ss.get(0).toUpperCase(Locale.ENGLISH));
287       for (int i = 1; i < ss.size(); i++) {
288         sb.append('_');
289         sb.append(ss.get(i).toUpperCase(Locale.ENGLISH));
290       }
291     }
292     return sb.toString();
293   }
294 
295   /**
296    * Escapes characters is the given string so that they can be printed by only
297    * using US-ASCII characters.
298    *
299    * The escaped characters will be appended to the given StringBuffer.
300    *
301    * @param sb
302    *          StringBuffer that receives escaped string.
303    * @param str
304    *          String to be escaped. <code>s.substring(start)</code> will be
305    *          escaped and copied to the string buffer.
306    * @param start
307    *          the starting position in the string
308    */
309   @SuppressWarnings({
310       "checkstyle:MissingSwitchDefaultCheck", // intentional
311       "checkstyle:AvoidEscapedUnicodeCharactersCheck" // ok
312   })
313   public static void escape(StringBuilder sb, String str, int start) {
314     int len = str.length();
315     for (int i = start; i < len; i++) {
316       char ch = str.charAt(i);
317       if (Character.isJavaIdentifierPart(ch)) {
318         sb.append(ch);
319       } else {
320         sb.append('_');
321         if (ch <= '\u000f') {
322           sb.append("000");
323         } else if (ch <= '\u00ff') {
324           sb.append("00");
325         } else if (ch <= '\u0fff') {
326           sb.append('0');
327         }
328         sb.append(Integer.toString(ch, 16));
329       }
330     }
331   }
332 
333   /**
334    * Escapes characters that are unusable as Java identifiers by replacing unsafe
335    * characters with safe characters.
336    */
337   private static String escape(String str) {
338     int len = str.length();
339     for (int i = 0; i < len; i++) {
340       if (!Character.isJavaIdentifierPart(str.charAt(i))) {
341         StringBuilder sb = new StringBuilder(str.substring(0, i));
342         escape(sb, str, i);
343         return sb.toString();
344       }
345     }
346     return str;
347   }
348 }