View Javadoc
1   /*
2    * Portions of this software was developed by employees of the National Institute
3    * of Standards and Technology (NIST), an agency of the Federal Government and is
4    * being made available as a public service. Pursuant to title 17 United States
5    * Code Section 105, works of NIST employees are not subject to copyright
6    * protection in the United States. This software may be subject to foreign
7    * copyright. Permission in the United States and in foreign countries, to the
8    * extent that NIST may hold copyright, to use, copy, modify, create derivative
9    * works, and distribute this software and its documentation without fee is hereby
10   * granted on a non-exclusive basis, provided that this notice and disclaimer
11   * of warranty appears in all copies.
12   *
13   * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
14   * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
15   * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
17   * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
18   * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
19   * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
20   * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
21   * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
22   * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
23   * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
24   * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
25   */
26  
27  package gov.nist.secauto.metaschema.databind.io;
28  
29  import com.fasterxml.jackson.core.JsonFactory;
30  import com.fasterxml.jackson.core.format.DataFormatDetector;
31  import com.fasterxml.jackson.core.format.DataFormatMatcher;
32  import com.fasterxml.jackson.core.format.MatchStrength;
33  import com.fasterxml.jackson.dataformat.xml.XmlFactory;
34  import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
35  
36  import gov.nist.secauto.metaschema.core.configuration.DefaultConfiguration;
37  import gov.nist.secauto.metaschema.core.configuration.IConfiguration;
38  import gov.nist.secauto.metaschema.core.util.ObjectUtils;
39  import gov.nist.secauto.metaschema.databind.io.json.JsonFactoryFactory;
40  import gov.nist.secauto.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
41  
42  import java.io.IOException;
43  import java.io.InputStream;
44  import java.net.URL;
45  
46  import edu.umd.cs.findbugs.annotations.NonNull;
47  
48  /**
49   * Provides a means to analyze content to determine what {@link Format} the data
50   * is represented as.
51   */
52  public class FormatDetector {
53  
54    private final DataFormatDetector detector;
55  
56    /**
57     * Construct a new format detector using the default configuration.
58     */
59    public FormatDetector() {
60      this(new DefaultConfiguration<>());
61    }
62  
63    /**
64     * Construct a new format detector using the provided {@code configuration}.
65     *
66     * @param configuration
67     *          the deserialization configuration to use for detection
68     */
69    public FormatDetector(
70        @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
71      this(configuration, newDetectorFactory(configuration));
72    }
73  
74    /**
75     * Construct a new format detector using the provided {@code configuration}.
76     *
77     * @param configuration
78     *          the deserialization configuration to use for detection
79     * @param detectors
80     *          the JSON parser instances to use for format detection
81     */
82    protected FormatDetector(
83        @NonNull IConfiguration<DeserializationFeature<?>> configuration,
84        @NonNull JsonFactory... detectors) {
85      int lookaheadBytes = configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
86      this.detector = new DataFormatDetector(detectors)
87          .withMinimalMatch(MatchStrength.INCONCLUSIVE)
88          .withOptimalMatch(MatchStrength.SOLID_MATCH)
89          .withMaxInputLookahead(lookaheadBytes - 1);
90  
91    }
92  
93    @NonNull
94    private static JsonFactory[] newDetectorFactory(@NonNull IConfiguration<DeserializationFeature<?>> config) {
95      JsonFactory[] detectorFactory = new JsonFactory[3];
96      detectorFactory[0] = YamlFactoryFactory.newParserFactoryInstance(config);
97      detectorFactory[1] = JsonFactoryFactory.instance();
98      detectorFactory[2] = new XmlFactory();
99      return detectorFactory;
100   }
101 
102   /**
103    * Analyzes the provided {@code resource} to determine it's format.
104    *
105    * @param resource
106    *          the resource to analyze
107    * @return the analysis result
108    * @throws IOException
109    *           if an error occurred while reading the resource
110    */
111   @NonNull
112   public Result detect(@NonNull URL resource) throws IOException {
113     try (InputStream is = ObjectUtils.notNull(resource.openStream())) {
114       return detect(is);
115     }
116   }
117 
118   /**
119    * Analyzes the data from the provided {@code inputStream} to determine it's
120    * format.
121    *
122    * @param inputStream
123    *          the resource stream to analyze
124    * @return the analysis result
125    * @throws IOException
126    *           if an error occurred while reading the resource
127    */
128   @NonNull
129   public Result detect(@NonNull InputStream inputStream) throws IOException {
130     DataFormatMatcher matcher = detector.findFormat(inputStream);
131     switch (matcher.getMatchStrength()) {
132     case FULL_MATCH:
133     case SOLID_MATCH:
134     case WEAK_MATCH:
135     case INCONCLUSIVE:
136       return new Result(matcher);
137     case NO_MATCH:
138     default:
139       throw new IOException("Unable to identify format");
140     }
141   }
142 
143   public static class Result {
144     @NonNull
145     private final DataFormatMatcher matcher;
146 
147     private Result(@NonNull DataFormatMatcher matcher) {
148       this.matcher = matcher;
149     }
150 
151     /**
152      * Get the detected format.
153      *
154      * @return the format
155      */
156     @NonNull
157     public Format getFormat() {
158       Format retval;
159       String formatName = matcher.getMatchedFormatName();
160       if (YAMLFactory.FORMAT_NAME_YAML.equals(formatName)) {
161         retval = Format.YAML;
162       } else if (JsonFactory.FORMAT_NAME_JSON.equals(formatName)) {
163         retval = Format.JSON;
164       } else if (XmlFactory.FORMAT_NAME_XML.equals(formatName)) {
165         retval = Format.XML;
166       } else {
167         throw new UnsupportedOperationException(String.format("The detected format '%s' is not supported", formatName));
168       }
169       return retval;
170     }
171 
172     /**
173      * Get an {@link InputStream} that can be used to read the analyzed data from
174      * the start.
175      *
176      * @return the stream
177      */
178     @SuppressWarnings("resource")
179     @NonNull
180     public InputStream getDataStream() {
181       return ObjectUtils.notNull(matcher.getDataStream());
182     }
183 
184     // @SuppressWarnings("resource")
185     // @NonNull
186     // public JsonParser getParser() throws IOException {
187     // return ObjectUtils.notNull(matcher.createParserWithMatch());
188     // }
189 
190     /**
191      * Get the strength of the match.
192      *
193      * @return the strength
194      */
195     @NonNull
196     public MatchStrength getMatchStrength() {
197       return ObjectUtils.notNull(matcher.getMatchStrength());
198     }
199   }
200 }