View Javadoc
1   /*
2    * Portions of this software was developed by employees of the National Institute
3    * of Standards and Technology (NIST), an agency of the Federal Government and is
4    * being made available as a public service. Pursuant to title 17 United States
5    * Code Section 105, works of NIST employees are not subject to copyright
6    * protection in the United States. This software may be subject to foreign
7    * copyright. Permission in the United States and in foreign countries, to the
8    * extent that NIST may hold copyright, to use, copy, modify, create derivative
9    * works, and distribute this software and its documentation without fee is hereby
10   * granted on a non-exclusive basis, provided that this notice and disclaimer
11   * of warranty appears in all copies.
12   *
13   * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
14   * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
15   * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
17   * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
18   * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
19   * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
20   * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
21   * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
22   * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
23   * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
24   * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
25   */
26  
27  package gov.nist.secauto.metaschema.databind.io;
28  
29  import com.ctc.wstx.stax.WstxInputFactory;
30  import com.fasterxml.jackson.core.JsonParser;
31  import com.fasterxml.jackson.core.JsonToken;
32  import com.fasterxml.jackson.core.io.MergedStream;
33  import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
34  
35  import gov.nist.secauto.metaschema.core.configuration.DefaultConfiguration;
36  import gov.nist.secauto.metaschema.core.configuration.IConfiguration;
37  import gov.nist.secauto.metaschema.core.model.util.JsonUtil;
38  import gov.nist.secauto.metaschema.core.util.ObjectUtils;
39  import gov.nist.secauto.metaschema.databind.IBindingContext;
40  import gov.nist.secauto.metaschema.databind.io.json.JsonFactoryFactory;
41  import gov.nist.secauto.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
42  
43  import org.codehaus.stax2.XMLEventReader2;
44  import org.codehaus.stax2.XMLInputFactory2;
45  
46  import java.io.ByteArrayInputStream;
47  import java.io.IOException;
48  import java.io.InputStream;
49  import java.io.InputStreamReader;
50  import java.io.Reader;
51  import java.nio.charset.Charset;
52  
53  import javax.xml.namespace.QName;
54  import javax.xml.stream.XMLInputFactory;
55  import javax.xml.stream.XMLStreamException;
56  import javax.xml.stream.events.StartElement;
57  
58  import edu.umd.cs.findbugs.annotations.NonNull;
59  import edu.umd.cs.findbugs.annotations.Nullable;
60  
61  /**
62   * Provides a means to analyze content to determine what type of bound data it
63   * contains.
64   */
65  public class ModelDetector {
66    @NonNull
67    private final IBindingContext bindingContext;
68    @NonNull
69    private final IConfiguration<DeserializationFeature<?>> configuration;
70  
71    /**
72     * Construct a new format detector using the default configuration.
73     *
74     * @param bindingContext
75     *          information about how Java classes are bound to Module definitions
76     */
77    public ModelDetector(
78        @NonNull IBindingContext bindingContext) {
79      this(bindingContext, new DefaultConfiguration<>());
80    }
81  
82    /**
83     * Construct a new format detector using the provided {@code configuration}.
84     *
85     * @param bindingContext
86     *          information about how Java classes are bound to Module definitions
87     * @param configuration
88     *          the deserialization configuration
89     */
90    public ModelDetector(
91        @NonNull IBindingContext bindingContext,
92        @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
93      this.bindingContext = bindingContext;
94      this.configuration = configuration;
95    }
96  
97    private int getLookaheadLimit() {
98      return configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
99    }
100 
101   @NonNull
102   private IBindingContext getBindingContext() {
103     return bindingContext;
104   }
105 
106   @NonNull
107   private IConfiguration<DeserializationFeature<?>> getConfiguration() {
108     return configuration;
109   }
110 
111   /**
112    * Analyzes the data from the provided {@code inputStream} to determine it's
113    * model.
114    *
115    * @param inputStream
116    *          the resource stream to analyze
117    * @param format
118    *          the expected format of the data to read
119    * @return the analysis result
120    * @throws IOException
121    *           if an error occurred while reading the resource
122    */
123   @NonNull
124   public Result detect(@NonNull InputStream inputStream, @NonNull Format format)
125       throws IOException {
126     byte[] buf = ObjectUtils.notNull(inputStream.readNBytes(getLookaheadLimit()));
127 
128     Class<?> clazz;
129     try (InputStream bis = new ByteArrayInputStream(buf)) {
130       switch (format) {
131       case JSON:
132         clazz = detectModelJsonClass(ObjectUtils.notNull(
133             JsonFactoryFactory.instance().createParser(bis)));
134         break;
135       case YAML:
136         YAMLFactory factory = YamlFactoryFactory.newParserFactoryInstance(getConfiguration());
137         clazz = detectModelJsonClass(ObjectUtils.notNull(factory.createParser(bis)));
138         break;
139       case XML:
140         clazz = detectModelXmlClass(ObjectUtils.notNull(bis));
141         break;
142       default:
143         throw new UnsupportedOperationException(
144             String.format("The format '%s' dataStream not supported", format));
145       }
146     }
147 
148     if (clazz == null) {
149       throw new IllegalStateException(
150           String.format("Detected format '%s', but unable to detect the bound data type", format.name()));
151     }
152 
153     return new Result(clazz, inputStream, buf);
154   }
155 
156   @NonNull
157   private Class<?> detectModelXmlClass(@NonNull InputStream is) throws IOException {
158     QName startElementQName;
159     try {
160       XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance();
161       assert xmlInputFactory instanceof WstxInputFactory;
162       xmlInputFactory.configureForXmlConformance();
163       xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
164 
165       Reader reader = new InputStreamReader(is, Charset.forName("UTF8"));
166       XMLEventReader2 eventReader = (XMLEventReader2) xmlInputFactory.createXMLEventReader(reader);
167       while (eventReader.hasNext() && !eventReader.peek().isStartElement()) {
168         eventReader.nextEvent();
169       }
170 
171       if (!eventReader.peek().isStartElement()) {
172         throw new IOException("Unable to detect a start element");
173       }
174 
175       StartElement start = eventReader.nextEvent().asStartElement();
176       startElementQName = ObjectUtils.notNull(start.getName());
177     } catch (XMLStreamException ex) {
178       throw new IOException(ex);
179     }
180 
181     Class<?> clazz = getBindingContext().getBoundClassForXmlQName(startElementQName);
182 
183     if (clazz == null) {
184       throw new IOException("Unrecognized element name: " + startElementQName.toString());
185     }
186     return clazz;
187   }
188 
189   @Nullable
190   private Class<?> detectModelJsonClass(@NonNull JsonParser parser) throws IOException {
191     Class<?> retval = null;
192     JsonUtil.advanceAndAssert(parser, JsonToken.START_OBJECT);
193     outer: while (JsonToken.FIELD_NAME.equals(parser.nextToken())) {
194       String name = ObjectUtils.notNull(parser.getCurrentName());
195       if ("$schema".equals(name)) {
196         // do nothing
197         parser.nextToken();
198         // JsonUtil.skipNextValue(parser);
199       } else {
200         retval = getBindingContext().getBoundClassForJsonName(name);
201         break outer;
202       }
203     }
204     return retval;
205   }
206 
207   public static class Result {
208     @NonNull
209     private final Class<?> boundClass;
210     @NonNull
211     private final InputStream dataStream;
212 
213     private Result(
214         @NonNull Class<?> clazz,
215         @NonNull InputStream is,
216         @NonNull byte[] buf) {
217       this.boundClass = clazz;
218       this.dataStream = new MergedStream(null, is, buf, 0, buf.length);
219     }
220 
221     /**
222      * Get the Java class representing the detected bound object.
223      *
224      * @return the Java class
225      */
226     @NonNull
227     public Class<?> getBoundClass() {
228       return boundClass;
229     }
230 
231     /**
232      * Get an {@link InputStream} that can be used to read the analyzed data from
233      * the start.
234      *
235      * @return the stream
236      */
237     @NonNull
238     public InputStream getDataStream() {
239       return dataStream;
240     }
241   }
242 }