001/*
002 * Portions of this software was developed by employees of the National Institute
003 * of Standards and Technology (NIST), an agency of the Federal Government and is
004 * being made available as a public service. Pursuant to title 17 United States
005 * Code Section 105, works of NIST employees are not subject to copyright
006 * protection in the United States. This software may be subject to foreign
007 * copyright. Permission in the United States and in foreign countries, to the
008 * extent that NIST may hold copyright, to use, copy, modify, create derivative
009 * works, and distribute this software and its documentation without fee is hereby
010 * granted on a non-exclusive basis, provided that this notice and disclaimer
011 * of warranty appears in all copies.
012 *
013 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
014 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
015 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
016 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
017 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
018 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
019 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
020 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
021 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
022 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
023 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
024 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
025 */
026
027package gov.nist.secauto.metaschema.core.datatype.markup.flexmark;
028
029import com.vladsch.flexmark.util.sequence.Escaping;
030
031import gov.nist.secauto.metaschema.core.datatype.markup.MarkupLine;
032import gov.nist.secauto.metaschema.core.datatype.markup.MarkupMultiline;
033import gov.nist.secauto.metaschema.core.model.util.XmlEventUtil;
034import gov.nist.secauto.metaschema.core.util.CollectionUtil;
035import gov.nist.secauto.metaschema.core.util.ObjectUtils;
036
037import org.apache.logging.log4j.LogManager;
038import org.apache.logging.log4j.Logger;
039import org.codehaus.stax2.XMLEventReader2;
040
041import java.util.Set;
042
043import javax.xml.namespace.QName;
044import javax.xml.stream.XMLStreamConstants;
045import javax.xml.stream.XMLStreamException;
046import javax.xml.stream.events.Attribute;
047import javax.xml.stream.events.Characters;
048import javax.xml.stream.events.StartElement;
049import javax.xml.stream.events.XMLEvent;
050
051import edu.umd.cs.findbugs.annotations.NonNull;
052
053public class XmlMarkupParser { // NOPMD - acceptable
054  private static final Logger LOGGER = LogManager.getLogger(XmlMarkupParser.class);
055
056  @NonNull
057  public static final Set<String> BLOCK_ELEMENTS = ObjectUtils.notNull(
058      Set.of(
059          "h1",
060          "h2",
061          "h3",
062          "h4",
063          "h5",
064          "h6",
065          "ul",
066          "ol",
067          "pre",
068          "hr",
069          "blockquote",
070          "p",
071          "table",
072          "img"));
073
074  @NonNull
075  private static final XmlMarkupParser SINGLETON = new XmlMarkupParser();
076
077  @NonNull
078  public static XmlMarkupParser instance() {
079    return SINGLETON;
080  }
081
082  public MarkupLine parseMarkupline(XMLEventReader2 reader) throws XMLStreamException { // NOPMD - acceptable
083    StringBuilder buffer = new StringBuilder();
084    parseContents(reader, null, buffer);
085    String html = buffer.toString().trim();
086    return html.isEmpty() ? null : MarkupLine.fromHtml(html);
087  }
088
089  public MarkupMultiline parseMarkupMultiline(XMLEventReader2 reader) throws XMLStreamException {
090    StringBuilder buffer = new StringBuilder();
091    parseToString(reader, buffer);
092    String html = buffer.toString().trim();
093
094    if (LOGGER.isDebugEnabled()) {
095      LOGGER.debug("XML->HTML: {}", html);
096    }
097    return html.isEmpty() ? null : MarkupMultiline.fromHtml(html);
098  }
099
100  protected void parseToString(XMLEventReader2 reader, StringBuilder buffer) // NOPMD - acceptable
101      throws XMLStreamException {
102    // if (LOGGER.isDebugEnabled()) {
103    // LOGGER.debug("parseToString(enter): {}",
104    // XmlEventUtil.toString(reader.peek()));
105    // }
106
107    outer: while (reader.hasNextEvent() && !reader.peek().isEndElement()) {
108      // skip whitespace before the next block element
109      XMLEvent nextEvent = XmlEventUtil.skipWhitespace(reader);
110
111      // if (LOGGER.isDebugEnabled()) {
112      // LOGGER.debug("parseToString: {}", XmlEventUtil.toString(nextEvent));
113      // }
114
115      if (nextEvent.isStartElement()) {
116        StartElement start = nextEvent.asStartElement();
117        QName name = start.getName();
118
119        // Note: the next element is not consumed. The called method is expected to
120        // consume it
121        if (BLOCK_ELEMENTS.contains(name.getLocalPart())) {
122          parseStartElement(reader, start, buffer);
123
124          // the next event should be the event after the start's END_ELEMENT
125          // assert XmlEventUtil.isNextEventEndElement(reader, name) :
126          // XmlEventUtil.toString(reader.peek());
127        } else {
128          // throw new IllegalStateException();
129          // stop parsing on first unrecognized event
130          break outer;
131        }
132      }
133      // reader.nextEvent();
134
135      // skip whitespace before the next block element
136      XmlEventUtil.skipWhitespace(reader);
137    }
138
139    // if (LOGGER.isDebugEnabled()) {
140    // LOGGER.debug("parseToString(exit): {}", reader.peek() != null ?
141    // XmlEventUtil.toString(reader.peek()) : "");
142    // }
143  }
144
145  private void parseStartElement(XMLEventReader2 reader, StartElement start, StringBuilder buffer)
146      throws XMLStreamException {
147    if (LOGGER.isDebugEnabled()) {
148      LOGGER.debug("parseStartElement(enter): {}", XmlEventUtil.toString(start));
149    }
150
151    // consume the start event
152    reader.nextEvent();
153
154    QName name = start.getName();
155    buffer.append('<')
156        .append(name.getLocalPart());
157    for (Attribute attribute : CollectionUtil.toIterable(
158        ObjectUtils.notNull(start.getAttributes()))) {
159      buffer
160          .append(' ')
161          .append(attribute.getName().getLocalPart())
162          .append("=\"")
163          .append(attribute.getValue())
164          .append('"');
165    }
166
167    XMLEvent next = reader.peek();
168    if (next != null && next.isEndElement()) {
169      buffer.append("/>");
170      // consume end element event
171      reader.nextEvent();
172    } else {
173      buffer.append('>');
174
175      // parse until the start's END_ELEMENT is reached
176      parseContents(reader, start, buffer);
177
178      buffer
179          .append("</")
180          .append(name.getLocalPart())
181          .append('>');
182
183      // the next event should be the start's END_ELEMENT
184      XmlEventUtil.assertNext(reader, XMLStreamConstants.END_ELEMENT, name);
185
186      // consume the start's END_ELEMENT
187      reader.nextEvent();
188    }
189
190    if (LOGGER.isDebugEnabled()) {
191      LOGGER.debug("parseStartElement(exit): {}", reader.peek() != null ? XmlEventUtil.toString(reader.peek()) : "");
192    }
193  }
194
195  private void parseContents(XMLEventReader2 reader, StartElement start, StringBuilder buffer)
196      throws XMLStreamException {
197    // if (LOGGER.isDebugEnabled()) {
198    // LOGGER.debug("parseContents(enter): {}",
199    // XmlEventUtil.toString(reader.peek()));
200    // }
201
202    XMLEvent event;
203    while (reader.hasNextEvent() && !(event = reader.peek()).isEndElement()) {
204      // // skip whitespace before the next list item
205      // event = XmlEventUtil.skipWhitespace(reader);
206
207      // if (LOGGER.isDebugEnabled()) {
208      // LOGGER.debug("parseContents(before): {}", XmlEventUtil.toString(event));
209      // }
210
211      if (event.isStartElement()) {
212        StartElement nextStart = event.asStartElement();
213        // QName nextName = nextStart.getName();
214        parseStartElement(reader, nextStart, buffer);
215
216        // if (LOGGER.isDebugEnabled()) {
217        // LOGGER.debug("parseContents(after): {}",
218        // XmlEventUtil.toString(reader.peek()));
219        // }
220
221        // assert XmlEventUtil.isNextEventEndElement(reader, nextName) :
222        // XmlEventUtil.toString(reader.peek());
223
224        // reader.nextEvent();
225      } else if (event.isCharacters()) {
226        Characters characters = event.asCharacters();
227        buffer.append(Escaping.escapeHtml(characters.getData(), true));
228        reader.nextEvent();
229      }
230    }
231
232    assert start == null
233        || XmlEventUtil.isEventEndElement(reader.peek(), ObjectUtils.notNull(start.getName())) : XmlEventUtil
234            .generateExpectedMessage(reader.peek(), XMLStreamConstants.END_ELEMENT, start.getName());
235
236    // if (LOGGER.isDebugEnabled()) {
237    // LOGGER.debug("parseContents(exit): {}", reader.peek() != null ?
238    // XmlEventUtil.toString(reader.peek()) : "");
239    // }
240  }
241
242}