001/*
002 * Portions of this software was developed by employees of the National Institute
003 * of Standards and Technology (NIST), an agency of the Federal Government and is
004 * being made available as a public service. Pursuant to title 17 United States
005 * Code Section 105, works of NIST employees are not subject to copyright
006 * protection in the United States. This software may be subject to foreign
007 * copyright. Permission in the United States and in foreign countries, to the
008 * extent that NIST may hold copyright, to use, copy, modify, create derivative
009 * works, and distribute this software and its documentation without fee is hereby
010 * granted on a non-exclusive basis, provided that this notice and disclaimer
011 * of warranty appears in all copies.
012 *
013 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
014 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
015 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
016 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
017 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
018 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
019 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
020 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
021 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
022 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
023 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
024 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
025 */
026
027package gov.nist.secauto.metaschema.core.model.xml;
028
029import gov.nist.secauto.metaschema.core.model.IModule;
030import gov.nist.secauto.metaschema.core.model.MetaschemaException;
031import gov.nist.secauto.metaschema.core.model.constraint.IConstraintSet;
032import gov.nist.secauto.metaschema.core.model.xml.xmlbeans.METASCHEMADocument;
033import gov.nist.secauto.metaschema.core.model.xml.xmlbeans.MetaschemaImportType;
034import gov.nist.secauto.metaschema.core.util.CollectionUtil;
035import gov.nist.secauto.metaschema.core.util.ObjectUtils;
036
037import org.apache.xmlbeans.XmlException;
038import org.apache.xmlbeans.XmlOptions;
039import org.xml.sax.EntityResolver;
040import org.xml.sax.InputSource;
041import org.xml.sax.SAXException;
042import org.xml.sax.XMLReader;
043
044import java.io.IOException;
045import java.net.URI;
046import java.util.ArrayList;
047import java.util.Collection;
048import java.util.Collections;
049import java.util.Deque;
050import java.util.LinkedHashMap;
051import java.util.List;
052import java.util.Map;
053import java.util.Set;
054
055import javax.xml.XMLConstants;
056import javax.xml.parsers.ParserConfigurationException;
057import javax.xml.parsers.SAXParser;
058import javax.xml.parsers.SAXParserFactory;
059
060import edu.umd.cs.findbugs.annotations.NonNull;
061
062/**
063 * Provides methods to load a Metaschema expressed in XML.
064 * <p>
065 * Loaded Metaschema instances are cached to avoid the need to load them for
066 * every use. Any Metaschema imported is also loaded and cached automatically.
067 */
068public class ModuleLoader
069    extends AbstractLoader<IModule> {
070  private boolean resolveEntities; // = false;
071
072  @NonNull
073  private final Set<IConstraintSet> registeredConstraintSets;
074
075  /**
076   * Construct a new Metaschema loader.
077   */
078  public ModuleLoader() {
079    this(CollectionUtil.emptySet());
080  }
081
082  /**
083   * Construct a new Metaschema loader, which will incorporate the additional
084   * provided constraints into matching loaded definitions.
085   *
086   * @param additionalConstraintSets
087   *          additional constraints to associate with loaded definitions
088   */
089  public ModuleLoader(@NonNull Set<IConstraintSet> additionalConstraintSets) {
090    this.registeredConstraintSets = CollectionUtil.unmodifiableSet(additionalConstraintSets);
091  }
092
093  /**
094   * Get the set of additional constraints associated with this loader.
095   *
096   * @return the set of constraints
097   */
098  @NonNull
099  protected Set<IConstraintSet> getRegisteredConstraintSets() {
100    return registeredConstraintSets;
101  }
102
103  /**
104   * Enable a mode that allows XML entity resolution. This may be needed to parse
105   * some resource files that contain entities. Enabling entity resolution is a
106   * less secure, which requires trust in the resource content being parsed.
107   */
108  public void allowEntityResolution() {
109    resolveEntities = true;
110  }
111
112  /**
113   * Parse the {@code resource} based on the provided {@code xmlObject}.
114   *
115   * @param resource
116   *          the URI of the resource being parsed
117   * @param xmlObject
118   *          the XML beans object to parse
119   * @param importedModules
120   *          previously parsed Metaschema modules imported by the provided
121   *          {@code resource}
122   * @return the parsed resource as a Metaschema module
123   * @throws MetaschemaException
124   *           if an error occurred while parsing the XML beans object
125   */
126  protected IModule newXmlMetaschema(
127      @NonNull URI resource,
128      @NonNull METASCHEMADocument xmlObject,
129      @NonNull List<IModule> importedModules) throws MetaschemaException {
130    IModule retval = new XmlModule(resource, xmlObject, importedModules);
131
132    IConstraintSet.applyConstraintSetToModule(getRegisteredConstraintSets(), retval);
133
134    return retval;
135  }
136
137  @Override
138  protected IModule parseResource(@NonNull URI resource, @NonNull Deque<URI> visitedResources)
139      throws IOException {
140    // parse this Metaschema module
141    METASCHEMADocument xmlObject = parseModule(resource);
142
143    // now check if this Metaschema imports other metaschema
144    int size = xmlObject.getMETASCHEMA().sizeOfImportArray();
145    @NonNull Map<URI, IModule> importedModules;
146    if (size == 0) {
147      importedModules = ObjectUtils.notNull(Collections.emptyMap());
148    } else {
149      try {
150        importedModules = new LinkedHashMap<>();
151        for (MetaschemaImportType imported : xmlObject.getMETASCHEMA().getImportList()) {
152          URI importedResource = URI.create(imported.getHref());
153          importedResource = ObjectUtils.notNull(resource.resolve(importedResource));
154          importedModules.put(importedResource, loadInternal(importedResource, visitedResources));
155        }
156      } catch (MetaschemaException ex) {
157        throw new IOException(ex);
158      }
159    }
160
161    // now create this metaschema
162    Collection<IModule> values = importedModules.values();
163    try {
164      return newXmlMetaschema(resource, xmlObject, new ArrayList<>(values));
165    } catch (MetaschemaException ex) {
166      throw new IOException(ex);
167    }
168  }
169
170  /**
171   * Parse the provided XML resource as a Metaschema module.
172   *
173   * @param resource
174   *          the resource to parse
175   * @return the XMLBeans representation of the Metaschema module
176   * @throws IOException
177   *           if a parsing error occurred
178   */
179  protected METASCHEMADocument parseModule(@NonNull URI resource) throws IOException {
180    METASCHEMADocument metaschemaXml;
181    try {
182      XmlOptions options = new XmlOptions();
183      if (resolveEntities) {
184        SAXParserFactory factory = SAXParserFactory.newInstance();
185
186        try {
187          // factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
188          factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
189          factory.setFeature("http://xml.org/sax/features/external-general-entities", true);
190          factory.setFeature("http://xml.org/sax/features/external-parameter-entities", true);
191          SAXParser parser = factory.newSAXParser();
192          parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "file"); // ,jar:file
193          XMLReader reader = parser.getXMLReader();
194          reader.setEntityResolver(new EntityResolver() {
195
196            @Override
197            public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
198              return null;
199            }
200
201          });
202          options.setLoadUseXMLReader(reader);
203        } catch (SAXException | ParserConfigurationException ex) {
204          throw new IOException(ex);
205        }
206        // options.setLoadEntityBytesLimit(204800);
207        // options.setLoadUseDefaultResolver();
208        options.setEntityResolver(new EntityResolver() {
209
210          @Override
211          public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
212            String effectiveSystemId = systemId;
213            // TODO: It's very odd that the system id looks like this. Need to investigate.
214            if (effectiveSystemId.startsWith("file://file://")) {
215              effectiveSystemId = effectiveSystemId.substring(14);
216            }
217            URI resolvedSystemId = resource.resolve(effectiveSystemId);
218            return new InputSource(resolvedSystemId.toString());
219          }
220
221        });
222        options.setLoadDTDGrammar(true);
223      }
224      options.setBaseURI(resource);
225      options.setLoadLineNumbers();
226      metaschemaXml = METASCHEMADocument.Factory.parse(resource.toURL(), options);
227    } catch (XmlException ex) {
228      throw new IOException(ex);
229    }
230    return metaschemaXml;
231  }
232
233}