001/* 002 * Portions of this software was developed by employees of the National Institute 003 * of Standards and Technology (NIST), an agency of the Federal Government and is 004 * being made available as a public service. Pursuant to title 17 United States 005 * Code Section 105, works of NIST employees are not subject to copyright 006 * protection in the United States. This software may be subject to foreign 007 * copyright. Permission in the United States and in foreign countries, to the 008 * extent that NIST may hold copyright, to use, copy, modify, create derivative 009 * works, and distribute this software and its documentation without fee is hereby 010 * granted on a non-exclusive basis, provided that this notice and disclaimer 011 * of warranty appears in all copies. 012 * 013 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER 014 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY 015 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF 016 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM 017 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE 018 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE. IN NO EVENT 019 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT, 020 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM, 021 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY, 022 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR 023 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT 024 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER. 025 */ 026 027package gov.nist.secauto.metaschema.databind.io; 028 029import com.fasterxml.jackson.core.JsonFactory; 030import com.fasterxml.jackson.core.format.DataFormatDetector; 031import com.fasterxml.jackson.core.format.DataFormatMatcher; 032import com.fasterxml.jackson.core.format.MatchStrength; 033import com.fasterxml.jackson.dataformat.xml.XmlFactory; 034import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; 035 036import gov.nist.secauto.metaschema.core.configuration.DefaultConfiguration; 037import gov.nist.secauto.metaschema.core.configuration.IConfiguration; 038import gov.nist.secauto.metaschema.core.util.ObjectUtils; 039import gov.nist.secauto.metaschema.databind.io.json.JsonFactoryFactory; 040import gov.nist.secauto.metaschema.databind.io.yaml.impl.YamlFactoryFactory; 041 042import java.io.IOException; 043import java.io.InputStream; 044import java.net.URL; 045 046import edu.umd.cs.findbugs.annotations.NonNull; 047 048/** 049 * Provides a means to analyze content to determine what {@link Format} the data 050 * is represented as. 051 */ 052public class FormatDetector { 053 054 private final DataFormatDetector detector; 055 056 /** 057 * Construct a new format detector using the default configuration. 058 */ 059 public FormatDetector() { 060 this(new DefaultConfiguration<>()); 061 } 062 063 /** 064 * Construct a new format detector using the provided {@code configuration}. 065 * 066 * @param configuration 067 * the deserialization configuration to use for detection 068 */ 069 public FormatDetector( 070 @NonNull IConfiguration<DeserializationFeature<?>> configuration) { 071 this(configuration, newDetectorFactory(configuration)); 072 } 073 074 /** 075 * Construct a new format detector using the provided {@code configuration}. 076 * 077 * @param configuration 078 * the deserialization configuration to use for detection 079 * @param detectors 080 * the JSON parser instances to use for format detection 081 */ 082 protected FormatDetector( 083 @NonNull IConfiguration<DeserializationFeature<?>> configuration, 084 @NonNull JsonFactory... detectors) { 085 int lookaheadBytes = configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT); 086 this.detector = new DataFormatDetector(detectors) 087 .withMinimalMatch(MatchStrength.INCONCLUSIVE) 088 .withOptimalMatch(MatchStrength.SOLID_MATCH) 089 .withMaxInputLookahead(lookaheadBytes - 1); 090 091 } 092 093 @NonNull 094 private static JsonFactory[] newDetectorFactory(@NonNull IConfiguration<DeserializationFeature<?>> config) { 095 JsonFactory[] detectorFactory = new JsonFactory[3]; 096 detectorFactory[0] = YamlFactoryFactory.newParserFactoryInstance(config); 097 detectorFactory[1] = JsonFactoryFactory.instance(); 098 detectorFactory[2] = new XmlFactory(); 099 return detectorFactory; 100 } 101 102 /** 103 * Analyzes the provided {@code resource} to determine it's format. 104 * 105 * @param resource 106 * the resource to analyze 107 * @return the analysis result 108 * @throws IOException 109 * if an error occurred while reading the resource 110 */ 111 @NonNull 112 public Result detect(@NonNull URL resource) throws IOException { 113 try (InputStream is = ObjectUtils.notNull(resource.openStream())) { 114 return detect(is); 115 } 116 } 117 118 /** 119 * Analyzes the data from the provided {@code inputStream} to determine it's 120 * format. 121 * 122 * @param inputStream 123 * the resource stream to analyze 124 * @return the analysis result 125 * @throws IOException 126 * if an error occurred while reading the resource 127 */ 128 @NonNull 129 public Result detect(@NonNull InputStream inputStream) throws IOException { 130 DataFormatMatcher matcher = detector.findFormat(inputStream); 131 switch (matcher.getMatchStrength()) { 132 case FULL_MATCH: 133 case SOLID_MATCH: 134 case WEAK_MATCH: 135 case INCONCLUSIVE: 136 return new Result(matcher); 137 case NO_MATCH: 138 default: 139 throw new IOException("Unable to identify format"); 140 } 141 } 142 143 public static class Result { 144 @NonNull 145 private final DataFormatMatcher matcher; 146 147 private Result(@NonNull DataFormatMatcher matcher) { 148 this.matcher = matcher; 149 } 150 151 /** 152 * Get the detected format. 153 * 154 * @return the format 155 */ 156 @NonNull 157 public Format getFormat() { 158 Format retval; 159 String formatName = matcher.getMatchedFormatName(); 160 if (YAMLFactory.FORMAT_NAME_YAML.equals(formatName)) { 161 retval = Format.YAML; 162 } else if (JsonFactory.FORMAT_NAME_JSON.equals(formatName)) { 163 retval = Format.JSON; 164 } else if (XmlFactory.FORMAT_NAME_XML.equals(formatName)) { 165 retval = Format.XML; 166 } else { 167 throw new UnsupportedOperationException(String.format("The detected format '%s' is not supported", formatName)); 168 } 169 return retval; 170 } 171 172 /** 173 * Get an {@link InputStream} that can be used to read the analyzed data from 174 * the start. 175 * 176 * @return the stream 177 */ 178 @SuppressWarnings("resource") 179 @NonNull 180 public InputStream getDataStream() { 181 return ObjectUtils.notNull(matcher.getDataStream()); 182 } 183 184 // @SuppressWarnings("resource") 185 // @NonNull 186 // public JsonParser getParser() throws IOException { 187 // return ObjectUtils.notNull(matcher.createParserWithMatch()); 188 // } 189 190 /** 191 * Get the strength of the match. 192 * 193 * @return the strength 194 */ 195 @NonNull 196 public MatchStrength getMatchStrength() { 197 return ObjectUtils.notNull(matcher.getMatchStrength()); 198 } 199 } 200}