001/*
002 * Portions of this software was developed by employees of the National Institute
003 * of Standards and Technology (NIST), an agency of the Federal Government and is
004 * being made available as a public service. Pursuant to title 17 United States
005 * Code Section 105, works of NIST employees are not subject to copyright
006 * protection in the United States. This software may be subject to foreign
007 * copyright. Permission in the United States and in foreign countries, to the
008 * extent that NIST may hold copyright, to use, copy, modify, create derivative
009 * works, and distribute this software and its documentation without fee is hereby
010 * granted on a non-exclusive basis, provided that this notice and disclaimer
011 * of warranty appears in all copies.
012 *
013 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
014 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
015 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
016 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
017 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
018 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
019 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
020 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
021 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
022 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
023 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
024 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
025 */
026
027package gov.nist.secauto.metaschema.databind.io;
028
029import com.ctc.wstx.stax.WstxInputFactory;
030import com.fasterxml.jackson.core.JsonParser;
031import com.fasterxml.jackson.core.JsonToken;
032import com.fasterxml.jackson.core.io.MergedStream;
033import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
034
035import gov.nist.secauto.metaschema.core.configuration.DefaultConfiguration;
036import gov.nist.secauto.metaschema.core.configuration.IConfiguration;
037import gov.nist.secauto.metaschema.core.model.util.JsonUtil;
038import gov.nist.secauto.metaschema.core.util.ObjectUtils;
039import gov.nist.secauto.metaschema.databind.IBindingContext;
040import gov.nist.secauto.metaschema.databind.io.json.JsonFactoryFactory;
041import gov.nist.secauto.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
042
043import org.codehaus.stax2.XMLEventReader2;
044import org.codehaus.stax2.XMLInputFactory2;
045
046import java.io.ByteArrayInputStream;
047import java.io.IOException;
048import java.io.InputStream;
049import java.io.InputStreamReader;
050import java.io.Reader;
051import java.nio.charset.Charset;
052
053import javax.xml.namespace.QName;
054import javax.xml.stream.XMLInputFactory;
055import javax.xml.stream.XMLStreamException;
056import javax.xml.stream.events.StartElement;
057
058import edu.umd.cs.findbugs.annotations.NonNull;
059import edu.umd.cs.findbugs.annotations.Nullable;
060
061/**
062 * Provides a means to analyze content to determine what type of bound data it
063 * contains.
064 */
065public class ModelDetector {
066  @NonNull
067  private final IBindingContext bindingContext;
068  @NonNull
069  private final IConfiguration<DeserializationFeature<?>> configuration;
070
071  /**
072   * Construct a new format detector using the default configuration.
073   *
074   * @param bindingContext
075   *          information about how Java classes are bound to Module definitions
076   */
077  public ModelDetector(
078      @NonNull IBindingContext bindingContext) {
079    this(bindingContext, new DefaultConfiguration<>());
080  }
081
082  /**
083   * Construct a new format detector using the provided {@code configuration}.
084   *
085   * @param bindingContext
086   *          information about how Java classes are bound to Module definitions
087   * @param configuration
088   *          the deserialization configuration
089   */
090  public ModelDetector(
091      @NonNull IBindingContext bindingContext,
092      @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
093    this.bindingContext = bindingContext;
094    this.configuration = configuration;
095  }
096
097  private int getLookaheadLimit() {
098    return configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
099  }
100
101  @NonNull
102  private IBindingContext getBindingContext() {
103    return bindingContext;
104  }
105
106  @NonNull
107  private IConfiguration<DeserializationFeature<?>> getConfiguration() {
108    return configuration;
109  }
110
111  /**
112   * Analyzes the data from the provided {@code inputStream} to determine it's
113   * model.
114   *
115   * @param inputStream
116   *          the resource stream to analyze
117   * @param format
118   *          the expected format of the data to read
119   * @return the analysis result
120   * @throws IOException
121   *           if an error occurred while reading the resource
122   */
123  @NonNull
124  public Result detect(@NonNull InputStream inputStream, @NonNull Format format)
125      throws IOException {
126    byte[] buf = ObjectUtils.notNull(inputStream.readNBytes(getLookaheadLimit()));
127
128    Class<?> clazz;
129    try (InputStream bis = new ByteArrayInputStream(buf)) {
130      switch (format) {
131      case JSON:
132        clazz = detectModelJsonClass(ObjectUtils.notNull(
133            JsonFactoryFactory.instance().createParser(bis)));
134        break;
135      case YAML:
136        YAMLFactory factory = YamlFactoryFactory.newParserFactoryInstance(getConfiguration());
137        clazz = detectModelJsonClass(ObjectUtils.notNull(factory.createParser(bis)));
138        break;
139      case XML:
140        clazz = detectModelXmlClass(ObjectUtils.notNull(bis));
141        break;
142      default:
143        throw new UnsupportedOperationException(
144            String.format("The format '%s' dataStream not supported", format));
145      }
146    }
147
148    if (clazz == null) {
149      throw new IllegalStateException(
150          String.format("Detected format '%s', but unable to detect the bound data type", format.name()));
151    }
152
153    return new Result(clazz, inputStream, buf);
154  }
155
156  @NonNull
157  private Class<?> detectModelXmlClass(@NonNull InputStream is) throws IOException {
158    QName startElementQName;
159    try {
160      XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance();
161      assert xmlInputFactory instanceof WstxInputFactory;
162      xmlInputFactory.configureForXmlConformance();
163      xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
164
165      Reader reader = new InputStreamReader(is, Charset.forName("UTF8"));
166      XMLEventReader2 eventReader = (XMLEventReader2) xmlInputFactory.createXMLEventReader(reader);
167      while (eventReader.hasNext() && !eventReader.peek().isStartElement()) {
168        eventReader.nextEvent();
169      }
170
171      if (!eventReader.peek().isStartElement()) {
172        throw new IOException("Unable to detect a start element");
173      }
174
175      StartElement start = eventReader.nextEvent().asStartElement();
176      startElementQName = ObjectUtils.notNull(start.getName());
177    } catch (XMLStreamException ex) {
178      throw new IOException(ex);
179    }
180
181    Class<?> clazz = getBindingContext().getBoundClassForXmlQName(startElementQName);
182
183    if (clazz == null) {
184      throw new IOException("Unrecognized element name: " + startElementQName.toString());
185    }
186    return clazz;
187  }
188
189  @Nullable
190  private Class<?> detectModelJsonClass(@NonNull JsonParser parser) throws IOException {
191    Class<?> retval = null;
192    JsonUtil.advanceAndAssert(parser, JsonToken.START_OBJECT);
193    outer: while (JsonToken.FIELD_NAME.equals(parser.nextToken())) {
194      String name = ObjectUtils.notNull(parser.getCurrentName());
195      if ("$schema".equals(name)) {
196        // do nothing
197        parser.nextToken();
198        // JsonUtil.skipNextValue(parser);
199      } else {
200        retval = getBindingContext().getBoundClassForJsonName(name);
201        break outer;
202      }
203    }
204    return retval;
205  }
206
207  public static class Result {
208    @NonNull
209    private final Class<?> boundClass;
210    @NonNull
211    private final InputStream dataStream;
212
213    private Result(
214        @NonNull Class<?> clazz,
215        @NonNull InputStream is,
216        @NonNull byte[] buf) {
217      this.boundClass = clazz;
218      this.dataStream = new MergedStream(null, is, buf, 0, buf.length);
219    }
220
221    /**
222     * Get the Java class representing the detected bound object.
223     *
224     * @return the Java class
225     */
226    @NonNull
227    public Class<?> getBoundClass() {
228      return boundClass;
229    }
230
231    /**
232     * Get an {@link InputStream} that can be used to read the analyzed data from
233     * the start.
234     *
235     * @return the stream
236     */
237    @NonNull
238    public InputStream getDataStream() {
239      return dataStream;
240    }
241  }
242}