001/* 002 * Portions of this software was developed by employees of the National Institute 003 * of Standards and Technology (NIST), an agency of the Federal Government and is 004 * being made available as a public service. Pursuant to title 17 United States 005 * Code Section 105, works of NIST employees are not subject to copyright 006 * protection in the United States. This software may be subject to foreign 007 * copyright. Permission in the United States and in foreign countries, to the 008 * extent that NIST may hold copyright, to use, copy, modify, create derivative 009 * works, and distribute this software and its documentation without fee is hereby 010 * granted on a non-exclusive basis, provided that this notice and disclaimer 011 * of warranty appears in all copies. 012 * 013 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER 014 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY 015 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF 016 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM 017 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE 018 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE. IN NO EVENT 019 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT, 020 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM, 021 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY, 022 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR 023 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT 024 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER. 025 */ 026 027package gov.nist.secauto.metaschema.databind.io; 028 029import com.ctc.wstx.stax.WstxInputFactory; 030import com.fasterxml.jackson.core.JsonParser; 031import com.fasterxml.jackson.core.JsonToken; 032import com.fasterxml.jackson.core.io.MergedStream; 033import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; 034 035import gov.nist.secauto.metaschema.core.configuration.DefaultConfiguration; 036import gov.nist.secauto.metaschema.core.configuration.IConfiguration; 037import gov.nist.secauto.metaschema.core.model.util.JsonUtil; 038import gov.nist.secauto.metaschema.core.util.ObjectUtils; 039import gov.nist.secauto.metaschema.databind.IBindingContext; 040import gov.nist.secauto.metaschema.databind.io.json.JsonFactoryFactory; 041import gov.nist.secauto.metaschema.databind.io.yaml.impl.YamlFactoryFactory; 042 043import org.codehaus.stax2.XMLEventReader2; 044import org.codehaus.stax2.XMLInputFactory2; 045 046import java.io.ByteArrayInputStream; 047import java.io.IOException; 048import java.io.InputStream; 049import java.io.InputStreamReader; 050import java.io.Reader; 051import java.nio.charset.Charset; 052 053import javax.xml.namespace.QName; 054import javax.xml.stream.XMLInputFactory; 055import javax.xml.stream.XMLStreamException; 056import javax.xml.stream.events.StartElement; 057 058import edu.umd.cs.findbugs.annotations.NonNull; 059import edu.umd.cs.findbugs.annotations.Nullable; 060 061/** 062 * Provides a means to analyze content to determine what type of bound data it 063 * contains. 064 */ 065public class ModelDetector { 066 @NonNull 067 private final IBindingContext bindingContext; 068 @NonNull 069 private final IConfiguration<DeserializationFeature<?>> configuration; 070 071 /** 072 * Construct a new format detector using the default configuration. 073 * 074 * @param bindingContext 075 * information about how Java classes are bound to Module definitions 076 */ 077 public ModelDetector( 078 @NonNull IBindingContext bindingContext) { 079 this(bindingContext, new DefaultConfiguration<>()); 080 } 081 082 /** 083 * Construct a new format detector using the provided {@code configuration}. 084 * 085 * @param bindingContext 086 * information about how Java classes are bound to Module definitions 087 * @param configuration 088 * the deserialization configuration 089 */ 090 public ModelDetector( 091 @NonNull IBindingContext bindingContext, 092 @NonNull IConfiguration<DeserializationFeature<?>> configuration) { 093 this.bindingContext = bindingContext; 094 this.configuration = configuration; 095 } 096 097 private int getLookaheadLimit() { 098 return configuration.get(DeserializationFeature.FORMAT_DETECTION_LOOKAHEAD_LIMIT); 099 } 100 101 @NonNull 102 private IBindingContext getBindingContext() { 103 return bindingContext; 104 } 105 106 @NonNull 107 private IConfiguration<DeserializationFeature<?>> getConfiguration() { 108 return configuration; 109 } 110 111 /** 112 * Analyzes the data from the provided {@code inputStream} to determine it's 113 * model. 114 * 115 * @param inputStream 116 * the resource stream to analyze 117 * @param format 118 * the expected format of the data to read 119 * @return the analysis result 120 * @throws IOException 121 * if an error occurred while reading the resource 122 */ 123 @NonNull 124 public Result detect(@NonNull InputStream inputStream, @NonNull Format format) 125 throws IOException { 126 byte[] buf = ObjectUtils.notNull(inputStream.readNBytes(getLookaheadLimit())); 127 128 Class<?> clazz; 129 try (InputStream bis = new ByteArrayInputStream(buf)) { 130 switch (format) { 131 case JSON: 132 clazz = detectModelJsonClass(ObjectUtils.notNull( 133 JsonFactoryFactory.instance().createParser(bis))); 134 break; 135 case YAML: 136 YAMLFactory factory = YamlFactoryFactory.newParserFactoryInstance(getConfiguration()); 137 clazz = detectModelJsonClass(ObjectUtils.notNull(factory.createParser(bis))); 138 break; 139 case XML: 140 clazz = detectModelXmlClass(ObjectUtils.notNull(bis)); 141 break; 142 default: 143 throw new UnsupportedOperationException( 144 String.format("The format '%s' dataStream not supported", format)); 145 } 146 } 147 148 if (clazz == null) { 149 throw new IllegalStateException( 150 String.format("Detected format '%s', but unable to detect the bound data type", format.name())); 151 } 152 153 return new Result(clazz, inputStream, buf); 154 } 155 156 @NonNull 157 private Class<?> detectModelXmlClass(@NonNull InputStream is) throws IOException { 158 QName startElementQName; 159 try { 160 XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance(); 161 assert xmlInputFactory instanceof WstxInputFactory; 162 xmlInputFactory.configureForXmlConformance(); 163 xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, false); 164 165 Reader reader = new InputStreamReader(is, Charset.forName("UTF8")); 166 XMLEventReader2 eventReader = (XMLEventReader2) xmlInputFactory.createXMLEventReader(reader); 167 while (eventReader.hasNext() && !eventReader.peek().isStartElement()) { 168 eventReader.nextEvent(); 169 } 170 171 if (!eventReader.peek().isStartElement()) { 172 throw new IOException("Unable to detect a start element"); 173 } 174 175 StartElement start = eventReader.nextEvent().asStartElement(); 176 startElementQName = ObjectUtils.notNull(start.getName()); 177 } catch (XMLStreamException ex) { 178 throw new IOException(ex); 179 } 180 181 Class<?> clazz = getBindingContext().getBoundClassForXmlQName(startElementQName); 182 183 if (clazz == null) { 184 throw new IOException("Unrecognized element name: " + startElementQName.toString()); 185 } 186 return clazz; 187 } 188 189 @Nullable 190 private Class<?> detectModelJsonClass(@NonNull JsonParser parser) throws IOException { 191 Class<?> retval = null; 192 JsonUtil.advanceAndAssert(parser, JsonToken.START_OBJECT); 193 outer: while (JsonToken.FIELD_NAME.equals(parser.nextToken())) { 194 String name = ObjectUtils.notNull(parser.getCurrentName()); 195 if ("$schema".equals(name)) { 196 // do nothing 197 parser.nextToken(); 198 // JsonUtil.skipNextValue(parser); 199 } else { 200 retval = getBindingContext().getBoundClassForJsonName(name); 201 break outer; 202 } 203 } 204 return retval; 205 } 206 207 public static class Result { 208 @NonNull 209 private final Class<?> boundClass; 210 @NonNull 211 private final InputStream dataStream; 212 213 private Result( 214 @NonNull Class<?> clazz, 215 @NonNull InputStream is, 216 @NonNull byte[] buf) { 217 this.boundClass = clazz; 218 this.dataStream = new MergedStream(null, is, buf, 0, buf.length); 219 } 220 221 /** 222 * Get the Java class representing the detected bound object. 223 * 224 * @return the Java class 225 */ 226 @NonNull 227 public Class<?> getBoundClass() { 228 return boundClass; 229 } 230 231 /** 232 * Get an {@link InputStream} that can be used to read the analyzed data from 233 * the start. 234 * 235 * @return the stream 236 */ 237 @NonNull 238 public InputStream getDataStream() { 239 return dataStream; 240 } 241 } 242}