001/*
002 * Portions of this software was developed by employees of the National Institute
003 * of Standards and Technology (NIST), an agency of the Federal Government and is
004 * being made available as a public service. Pursuant to title 17 United States
005 * Code Section 105, works of NIST employees are not subject to copyright
006 * protection in the United States. This software may be subject to foreign
007 * copyright. Permission in the United States and in foreign countries, to the
008 * extent that NIST may hold copyright, to use, copy, modify, create derivative
009 * works, and distribute this software and its documentation without fee is hereby
010 * granted on a non-exclusive basis, provided that this notice and disclaimer
011 * of warranty appears in all copies.
012 *
013 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
014 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
015 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
016 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
017 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
018 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
019 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
020 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
021 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
022 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
023 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
024 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
025 */
026
027package gov.nist.secauto.metaschema.databind.io;
028
029import com.fasterxml.jackson.core.JsonFactory;
030import com.fasterxml.jackson.core.format.DataFormatDetector;
031import com.fasterxml.jackson.core.format.DataFormatMatcher;
032import com.fasterxml.jackson.core.format.MatchStrength;
033import com.fasterxml.jackson.dataformat.xml.XmlFactory;
034import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
035
036import gov.nist.secauto.metaschema.core.configuration.DefaultConfiguration;
037import gov.nist.secauto.metaschema.core.configuration.IConfiguration;
038import gov.nist.secauto.metaschema.core.util.ObjectUtils;
039import gov.nist.secauto.metaschema.databind.io.json.JsonFactoryFactory;
040import gov.nist.secauto.metaschema.databind.io.yaml.impl.YamlFactoryFactory;
041
042import java.io.IOException;
043import java.io.InputStream;
044import java.net.URL;
045
046import edu.umd.cs.findbugs.annotations.NonNull;
047
048/**
049 * Provides a means to analyze content to determine what {@link Format} the data
050 * is represented as.
051 */
052public class FormatDetector {
053
054  private final DataFormatDetector detector;
055
056  /**
057   * Construct a new format detector using the default configuration.
058   */
059  public FormatDetector() {
060    this(new DefaultConfiguration<>());
061  }
062
063  /**
064   * Construct a new format detector using the provided {@code configuration}.
065   *
066   * @param configuration
067   *          the deserialization configuration to use for detection
068   */
069  public FormatDetector(
070      @NonNull IConfiguration<DeserializationFeature<?>> configuration) {
071    this(configuration, newDetectorFactory(configuration));
072  }
073
074  /**
075   * Construct a new format detector using the provided {@code configuration}.
076   *
077   * @param configuration
078   *          the deserialization configuration to use for detection
079   * @param detectors
080   *          the JSON parser instances to use for format detection
081   */
082  protected FormatDetector(
083      @NonNull IConfiguration<DeserializationFeature<?>> configuration,
084      @NonNull JsonFactory... detectors) {
085    int lookaheadBytes = configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT);
086    this.detector = new DataFormatDetector(detectors)
087        .withMinimalMatch(MatchStrength.INCONCLUSIVE)
088        .withOptimalMatch(MatchStrength.SOLID_MATCH)
089        .withMaxInputLookahead(lookaheadBytes - 1);
090
091  }
092
093  @NonNull
094  private static JsonFactory[] newDetectorFactory(@NonNull IConfiguration<DeserializationFeature<?>> config) {
095    JsonFactory[] detectorFactory = new JsonFactory[3];
096    detectorFactory[0] = YamlFactoryFactory.newParserFactoryInstance(config);
097    detectorFactory[1] = JsonFactoryFactory.instance();
098    detectorFactory[2] = new XmlFactory();
099    return detectorFactory;
100  }
101
102  /**
103   * Analyzes the provided {@code resource} to determine it's format.
104   *
105   * @param resource
106   *          the resource to analyze
107   * @return the analysis result
108   * @throws IOException
109   *           if an error occurred while reading the resource
110   */
111  @NonNull
112  public Result detect(@NonNull URL resource) throws IOException {
113    try (InputStream is = ObjectUtils.notNull(resource.openStream())) {
114      return detect(is);
115    }
116  }
117
118  /**
119   * Analyzes the data from the provided {@code inputStream} to determine it's
120   * format.
121   *
122   * @param inputStream
123   *          the resource stream to analyze
124   * @return the analysis result
125   * @throws IOException
126   *           if an error occurred while reading the resource
127   */
128  @NonNull
129  public Result detect(@NonNull InputStream inputStream) throws IOException {
130    DataFormatMatcher matcher = detector.findFormat(inputStream);
131    switch (matcher.getMatchStrength()) {
132    case FULL_MATCH:
133    case SOLID_MATCH:
134    case WEAK_MATCH:
135    case INCONCLUSIVE:
136      return new Result(matcher);
137    case NO_MATCH:
138    default:
139      throw new IOException("Unable to identify format");
140    }
141  }
142
143  public static class Result {
144    @NonNull
145    private final DataFormatMatcher matcher;
146
147    private Result(@NonNull DataFormatMatcher matcher) {
148      this.matcher = matcher;
149    }
150
151    /**
152     * Get the detected format.
153     *
154     * @return the format
155     */
156    @NonNull
157    public Format getFormat() {
158      Format retval;
159      String formatName = matcher.getMatchedFormatName();
160      if (YAMLFactory.FORMAT_NAME_YAML.equals(formatName)) {
161        retval = Format.YAML;
162      } else if (JsonFactory.FORMAT_NAME_JSON.equals(formatName)) {
163        retval = Format.JSON;
164      } else if (XmlFactory.FORMAT_NAME_XML.equals(formatName)) {
165        retval = Format.XML;
166      } else {
167        throw new UnsupportedOperationException(String.format("The detected format '%s' is not supported", formatName));
168      }
169      return retval;
170    }
171
172    /**
173     * Get an {@link InputStream} that can be used to read the analyzed data from
174     * the start.
175     *
176     * @return the stream
177     */
178    @SuppressWarnings("resource")
179    @NonNull
180    public InputStream getDataStream() {
181      return ObjectUtils.notNull(matcher.getDataStream());
182    }
183
184    // @SuppressWarnings("resource")
185    // @NonNull
186    // public JsonParser getParser() throws IOException {
187    // return ObjectUtils.notNull(matcher.createParserWithMatch());
188    // }
189
190    /**
191     * Get the strength of the match.
192     *
193     * @return the strength
194     */
195    @NonNull
196    public MatchStrength getMatchStrength() {
197      return ObjectUtils.notNull(matcher.getMatchStrength());
198    }
199  }
200}