View Javadoc
1   /*
2    * Portions of this software was developed by employees of the National Institute
3    * of Standards and Technology (NIST), an agency of the Federal Government and is
4    * being made available as a public service. Pursuant to title 17 United States
5    * Code Section 105, works of NIST employees are not subject to copyright
6    * protection in the United States. This software may be subject to foreign
7    * copyright. Permission in the United States and in foreign countries, to the
8    * extent that NIST may hold copyright, to use, copy, modify, create derivative
9    * works, and distribute this software and its documentation without fee is hereby
10   * granted on a non-exclusive basis, provided that this notice and disclaimer
11   * of warranty appears in all copies.
12   *
13   * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
14   * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
15   * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
17   * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
18   * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
19   * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
20   * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
21   * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
22   * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
23   * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
24   * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
25   */
26  
27  package gov.nist.secauto.metaschema.core.datatype.markup.flexmark;
28  
29  import com.vladsch.flexmark.util.sequence.Escaping;
30  
31  import gov.nist.secauto.metaschema.core.datatype.markup.MarkupLine;
32  import gov.nist.secauto.metaschema.core.datatype.markup.MarkupMultiline;
33  import gov.nist.secauto.metaschema.core.model.util.XmlEventUtil;
34  import gov.nist.secauto.metaschema.core.util.CollectionUtil;
35  import gov.nist.secauto.metaschema.core.util.ObjectUtils;
36  
37  import org.apache.logging.log4j.LogManager;
38  import org.apache.logging.log4j.Logger;
39  import org.codehaus.stax2.XMLEventReader2;
40  
41  import java.util.Set;
42  
43  import javax.xml.namespace.QName;
44  import javax.xml.stream.XMLStreamConstants;
45  import javax.xml.stream.XMLStreamException;
46  import javax.xml.stream.events.Attribute;
47  import javax.xml.stream.events.Characters;
48  import javax.xml.stream.events.StartElement;
49  import javax.xml.stream.events.XMLEvent;
50  
51  import edu.umd.cs.findbugs.annotations.NonNull;
52  
53  public class XmlMarkupParser { // NOPMD - acceptable
54    private static final Logger LOGGER = LogManager.getLogger(XmlMarkupParser.class);
55  
56    @NonNull
57    public static final Set<String> BLOCK_ELEMENTS = ObjectUtils.notNull(
58        Set.of(
59            "h1",
60            "h2",
61            "h3",
62            "h4",
63            "h5",
64            "h6",
65            "ul",
66            "ol",
67            "pre",
68            "hr",
69            "blockquote",
70            "p",
71            "table",
72            "img"));
73  
74    @NonNull
75    private static final XmlMarkupParser SINGLETON = new XmlMarkupParser();
76  
77    @NonNull
78    public static XmlMarkupParser instance() {
79      return SINGLETON;
80    }
81  
82    public MarkupLine parseMarkupline(XMLEventReader2 reader) throws XMLStreamException { // NOPMD - acceptable
83      StringBuilder buffer = new StringBuilder();
84      parseContents(reader, null, buffer);
85      String html = buffer.toString().trim();
86      return html.isEmpty() ? null : MarkupLine.fromHtml(html);
87    }
88  
89    public MarkupMultiline parseMarkupMultiline(XMLEventReader2 reader) throws XMLStreamException {
90      StringBuilder buffer = new StringBuilder();
91      parseToString(reader, buffer);
92      String html = buffer.toString().trim();
93  
94      if (LOGGER.isDebugEnabled()) {
95        LOGGER.debug("XML->HTML: {}", html);
96      }
97      return html.isEmpty() ? null : MarkupMultiline.fromHtml(html);
98    }
99  
100   protected void parseToString(XMLEventReader2 reader, StringBuilder buffer) // NOPMD - acceptable
101       throws XMLStreamException {
102     // if (LOGGER.isDebugEnabled()) {
103     // LOGGER.debug("parseToString(enter): {}",
104     // XmlEventUtil.toString(reader.peek()));
105     // }
106 
107     outer: while (reader.hasNextEvent() && !reader.peek().isEndElement()) {
108       // skip whitespace before the next block element
109       XMLEvent nextEvent = XmlEventUtil.skipWhitespace(reader);
110 
111       // if (LOGGER.isDebugEnabled()) {
112       // LOGGER.debug("parseToString: {}", XmlEventUtil.toString(nextEvent));
113       // }
114 
115       if (nextEvent.isStartElement()) {
116         StartElement start = nextEvent.asStartElement();
117         QName name = start.getName();
118 
119         // Note: the next element is not consumed. The called method is expected to
120         // consume it
121         if (BLOCK_ELEMENTS.contains(name.getLocalPart())) {
122           parseStartElement(reader, start, buffer);
123 
124           // the next event should be the event after the start's END_ELEMENT
125           // assert XmlEventUtil.isNextEventEndElement(reader, name) :
126           // XmlEventUtil.toString(reader.peek());
127         } else {
128           // throw new IllegalStateException();
129           // stop parsing on first unrecognized event
130           break outer;
131         }
132       }
133       // reader.nextEvent();
134 
135       // skip whitespace before the next block element
136       XmlEventUtil.skipWhitespace(reader);
137     }
138 
139     // if (LOGGER.isDebugEnabled()) {
140     // LOGGER.debug("parseToString(exit): {}", reader.peek() != null ?
141     // XmlEventUtil.toString(reader.peek()) : "");
142     // }
143   }
144 
145   private void parseStartElement(XMLEventReader2 reader, StartElement start, StringBuilder buffer)
146       throws XMLStreamException {
147     if (LOGGER.isDebugEnabled()) {
148       LOGGER.debug("parseStartElement(enter): {}", XmlEventUtil.toString(start));
149     }
150 
151     // consume the start event
152     reader.nextEvent();
153 
154     QName name = start.getName();
155     buffer.append('<')
156         .append(name.getLocalPart());
157     for (Attribute attribute : CollectionUtil.toIterable(
158         ObjectUtils.notNull(start.getAttributes()))) {
159       buffer
160           .append(' ')
161           .append(attribute.getName().getLocalPart())
162           .append("=\"")
163           .append(attribute.getValue())
164           .append('"');
165     }
166 
167     XMLEvent next = reader.peek();
168     if (next != null && next.isEndElement()) {
169       buffer.append("/>");
170       // consume end element event
171       reader.nextEvent();
172     } else {
173       buffer.append('>');
174 
175       // parse until the start's END_ELEMENT is reached
176       parseContents(reader, start, buffer);
177 
178       buffer
179           .append("</")
180           .append(name.getLocalPart())
181           .append('>');
182 
183       // the next event should be the start's END_ELEMENT
184       XmlEventUtil.assertNext(reader, XMLStreamConstants.END_ELEMENT, name);
185 
186       // consume the start's END_ELEMENT
187       reader.nextEvent();
188     }
189 
190     if (LOGGER.isDebugEnabled()) {
191       LOGGER.debug("parseStartElement(exit): {}", reader.peek() != null ? XmlEventUtil.toString(reader.peek()) : "");
192     }
193   }
194 
195   private void parseContents(XMLEventReader2 reader, StartElement start, StringBuilder buffer)
196       throws XMLStreamException {
197     // if (LOGGER.isDebugEnabled()) {
198     // LOGGER.debug("parseContents(enter): {}",
199     // XmlEventUtil.toString(reader.peek()));
200     // }
201 
202     XMLEvent event;
203     while (reader.hasNextEvent() && !(event = reader.peek()).isEndElement()) {
204       // // skip whitespace before the next list item
205       // event = XmlEventUtil.skipWhitespace(reader);
206 
207       // if (LOGGER.isDebugEnabled()) {
208       // LOGGER.debug("parseContents(before): {}", XmlEventUtil.toString(event));
209       // }
210 
211       if (event.isStartElement()) {
212         StartElement nextStart = event.asStartElement();
213         // QName nextName = nextStart.getName();
214         parseStartElement(reader, nextStart, buffer);
215 
216         // if (LOGGER.isDebugEnabled()) {
217         // LOGGER.debug("parseContents(after): {}",
218         // XmlEventUtil.toString(reader.peek()));
219         // }
220 
221         // assert XmlEventUtil.isNextEventEndElement(reader, nextName) :
222         // XmlEventUtil.toString(reader.peek());
223 
224         // reader.nextEvent();
225       } else if (event.isCharacters()) {
226         Characters characters = event.asCharacters();
227         buffer.append(Escaping.escapeHtml(characters.getData(), true));
228         reader.nextEvent();
229       }
230     }
231 
232     assert start == null
233         || XmlEventUtil.isEventEndElement(reader.peek(), ObjectUtils.notNull(start.getName())) : XmlEventUtil
234             .generateExpectedMessage(reader.peek(), XMLStreamConstants.END_ELEMENT, start.getName());
235 
236     // if (LOGGER.isDebugEnabled()) {
237     // LOGGER.debug("parseContents(exit): {}", reader.peek() != null ?
238     // XmlEventUtil.toString(reader.peek()) : "");
239     // }
240   }
241 
242 }