001/*
002 * Portions of this software was developed by employees of the National Institute
003 * of Standards and Technology (NIST), an agency of the Federal Government and is
004 * being made available as a public service. Pursuant to title 17 United States
005 * Code Section 105, works of NIST employees are not subject to copyright
006 * protection in the United States. This software may be subject to foreign
007 * copyright. Permission in the United States and in foreign countries, to the
008 * extent that NIST may hold copyright, to use, copy, modify, create derivative
009 * works, and distribute this software and its documentation without fee is hereby
010 * granted on a non-exclusive basis, provided that this notice and disclaimer
011 * of warranty appears in all copies.
012 *
013 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
014 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
015 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
016 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
017 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
018 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
019 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
020 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
021 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
022 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
023 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
024 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
025 */
026
027package gov.nist.secauto.metaschema.core.datatype.markup;
028
029import com.ctc.wstx.api.WstxOutputProperties;
030import com.ctc.wstx.stax.WstxOutputFactory;
031import com.vladsch.flexmark.formatter.Formatter;
032import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
033import com.vladsch.flexmark.parser.Parser;
034import com.vladsch.flexmark.util.ast.Document;
035import com.vladsch.flexmark.util.ast.Node;
036
037import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.AstCollectingVisitor;
038import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.FlexmarkFactory;
039import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupVisitor;
040import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupWriter;
041import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertAnchorExtension.InsertAnchorNode;
042import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertVisitor;
043import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupVisitor;
044import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlEventWriter;
045import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlStreamWriter;
046import gov.nist.secauto.metaschema.core.util.ObjectUtils;
047
048import org.apache.logging.log4j.LogManager;
049import org.apache.logging.log4j.Logger;
050import org.codehaus.stax2.XMLOutputFactory2;
051import org.codehaus.stax2.XMLStreamWriter2;
052import org.codehaus.stax2.evt.XMLEventFactory2;
053import org.jsoup.Jsoup;
054import org.jsoup.nodes.TextNode;
055import org.jsoup.select.NodeTraversor;
056import org.jsoup.select.NodeVisitor;
057
058import java.io.ByteArrayOutputStream;
059import java.io.IOException;
060import java.nio.charset.StandardCharsets;
061import java.util.List;
062import java.util.function.Predicate;
063import java.util.regex.Pattern;
064import java.util.stream.Stream;
065import java.util.stream.StreamSupport;
066
067import javax.xml.stream.XMLEventWriter;
068import javax.xml.stream.XMLOutputFactory;
069import javax.xml.stream.XMLStreamException;
070import javax.xml.stream.XMLStreamWriter;
071
072import edu.umd.cs.findbugs.annotations.NonNull;
073import edu.umd.cs.findbugs.annotations.Nullable;
074
075public abstract class AbstractMarkupString<TYPE extends AbstractMarkupString<TYPE>>
076    implements IMarkupString<TYPE> {
077  private static final Logger LOGGER = LogManager.getLogger(FlexmarkFactory.class);
078
079  private static final Pattern QUOTE_TAG_REPLACEMENT_PATTERN
080      = Pattern.compile("</?q>");
081
082  //
083  // @NonNull
084  // private static final String DEFAULT_HTML_NS = "http://www.w3.org/1999/xhtml";
085  // @NonNull
086  // private static final String DEFAULT_HTML_PREFIX = "";
087
088  @NonNull
089  private final Document document;
090
091  public AbstractMarkupString(@NonNull Document document) {
092    this.document = document;
093  }
094
095  @Override
096  public Document getDocument() {
097    return document;
098  }
099
100  // @Override
101  // public void writeHtml(@NonNull XMLStreamWriter2 xmlStreamWriter, @NonNull
102  // String namespace)
103  // throws XMLStreamException {
104  //
105  //
106  // IMarkupString<?> markupString = (IMarkupString<>)value;
107  //
108  // MarkupXmlStreamWriter writingVisitor
109  // = new MarkupXmlStreamWriter(namespace, markupString.isBlock());
110  // writingVisitor.visitChildren(getDocument(), xmlStreamWriter);
111  // xmlStreamWriter.flush();
112  // }
113  //
114  // @Override
115  // public void writeHtml(@NonNull OutputStream os, @Nullable String namespace,
116  // @Nullable String
117  // prefix)
118  // throws XMLStreamException {
119  // XMLOutputFactory2 factory = (XMLOutputFactory2)
120  // XMLOutputFactory.newInstance();
121  // assert factory instanceof WstxOutputFactory;
122  // factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, false);
123  // XMLStreamWriter2 xmlStreamWriter = (XMLStreamWriter2)
124  // factory.createXMLStreamWriter(os);
125  //
126  // String effectiveNamespace = namespace == null ? DEFAULT_HTML_NS : namespace;
127  // String effectivePrefix = prefix == null ? DEFAULT_HTML_PREFIX : prefix;
128  // NamespaceContext nsContext =
129  // MergedNsContext.construct(xmlStreamWriter.getNamespaceContext(),
130  // List.of(NamespaceEventImpl.constructNamespace(null, effectivePrefix,
131  // effectiveNamespace)));
132  // xmlStreamWriter.setNamespaceContext(nsContext);
133  //
134  //
135  // writeHtml(xmlStreamWriter, effectiveNamespace);
136  // }
137
138  @NonNull
139  protected static Document parseHtml(@NonNull String html, @NonNull FlexmarkHtmlConverter htmlParser,
140      @NonNull Parser markdownParser) {
141    org.jsoup.nodes.Document document = Jsoup.parse(html);
142
143    // Fix for usnistgov/liboscal-java#5
144    // Caused by not stripping out extra newlines inside HTML tags
145    NodeTraversor.traverse(new NodeVisitor() {
146
147      @Override
148      public void head(org.jsoup.nodes.Node node, int depth) {
149        if (node instanceof TextNode) {
150          TextNode textNode = (TextNode) node;
151
152          org.jsoup.nodes.Node parent = textNode.parent();
153
154          if (!isTag(parent, "code") || !isTag(parent.parent(), "pre")) {
155            node.replaceWith(new TextNode(textNode.text()));
156          }
157        }
158      }
159
160      private boolean isTag(@Nullable org.jsoup.nodes.Node node, @NonNull String tagName) {
161        return node != null && tagName.equals(node.normalName());
162      }
163
164    }, document);
165
166    String markdown = htmlParser.convert(document);
167    assert markdown != null;
168    if (LOGGER.isDebugEnabled()) {
169      LOGGER.debug("html->markdown: {}", markdown);
170    }
171    return parseMarkdown(markdown, markdownParser);
172  }
173
174  @SuppressWarnings("null")
175  @NonNull
176  protected static Document parseMarkdown(@NonNull String markdown, @NonNull Parser parser) {
177    return parser.parse(markdown);
178  }
179
180  @Override
181  public String toXHtml(@NonNull String namespace) throws XMLStreamException, IOException {
182
183    String retval;
184
185    Document document = getDocument();
186    if (document.hasChildren()) {
187
188      XMLOutputFactory2 factory = (XMLOutputFactory2) XMLOutputFactory.newInstance();
189      assert factory instanceof WstxOutputFactory;
190      factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, false);
191      try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
192        XMLStreamWriter2 xmlStreamWriter = (XMLStreamWriter2) factory.createXMLStreamWriter(os);
193
194        writeXHtml(namespace, ObjectUtils.notNull(xmlStreamWriter));
195
196        xmlStreamWriter.flush();
197        xmlStreamWriter.close();
198        os.flush();
199        retval = ObjectUtils.notNull(os.toString(StandardCharsets.UTF_8));
200      }
201    } else {
202      retval = "";
203    }
204    return retval;
205  }
206
207  @Override
208  public String toHtml() {
209    // String html;
210    // try {
211    // html = toXHtml("");
212    // } catch(RuntimeException ex) {
213    // throw ex;
214    // } catch (Throwable ex) {
215    // throw new RuntimeException(ex);
216    // }
217    // return QUOTE_TAG_REPLACEMENT_PATTERN.matcher(html)
218    // .replaceAll("&quot;");
219    String html = getFlexmarkFactory().getHtmlRenderer().render(getDocument());
220    return ObjectUtils.notNull(QUOTE_TAG_REPLACEMENT_PATTERN.matcher(html)
221        .replaceAll("&quot;"));
222  }
223
224  @Override
225  public String toMarkdown() {
226    return toMarkdown(getFlexmarkFactory().getFormatter());
227  }
228
229  @Override
230  public String toMarkdown(Formatter formatter) {
231    return ObjectUtils.notNull(formatter.render(getDocument()));
232  }
233
234  @Override
235  public void writeXHtml(String namespace, XMLStreamWriter2 streamWriter) throws XMLStreamException {
236    Document document = getDocument();
237    if (document.hasChildren()) {
238      IMarkupWriter<XMLStreamWriter, XMLStreamException> writer = new MarkupXmlStreamWriter(
239          namespace,
240          getFlexmarkFactory().getListOptions(),
241          streamWriter);
242
243      IMarkupVisitor<XMLStreamWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock());
244      visitor.visitDocument(document, writer);
245    } else {
246      streamWriter.writeCharacters("");
247    }
248  }
249
250  @Override
251  public void writeXHtml(String namespace, XMLEventFactory2 eventFactory, XMLEventWriter eventWriter)
252      throws XMLStreamException {
253    Document document = getDocument();
254    if (document.hasChildren()) {
255
256      IMarkupWriter<XMLEventWriter, XMLStreamException> writer = new MarkupXmlEventWriter(
257          namespace,
258          getFlexmarkFactory().getListOptions(),
259          eventWriter,
260          eventFactory);
261
262      IMarkupVisitor<XMLEventWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock());
263      visitor.visitDocument(getDocument(), writer);
264    } else {
265      eventWriter.add(eventFactory.createSpace(""));
266    }
267
268  }
269
270  @SuppressWarnings("null")
271  @Override
272  public Stream<Node> getNodesAsStream() {
273    return Stream.concat(Stream.of(getDocument()),
274        StreamSupport.stream(getDocument().getDescendants().spliterator(), false));
275  }
276
277  @Override
278  @NonNull
279  public List<InsertAnchorNode> getInserts() {
280    return getInserts(insert -> true);
281  }
282
283  /**
284   * Retrieve all insert statements that are contained within this markup text
285   * that match the provided filter.
286   *
287   * @param filter
288   *          a filter used to identify matching insert statements
289   * @return the matching insert statements
290   */
291  @Override
292  @NonNull
293  public List<InsertAnchorNode> getInserts(@NonNull Predicate<InsertAnchorNode> filter) {
294    InsertVisitor visitor = new InsertVisitor(filter);
295    visitor.visitChildren(getDocument());
296    return visitor.getInserts();
297  }
298
299  @Override
300  public String toString() {
301    return AstCollectingVisitor.asString(getDocument());
302  }
303}