AbstractMarkupString.java

/*
 * Portions of this software was developed by employees of the National Institute
 * of Standards and Technology (NIST), an agency of the Federal Government and is
 * being made available as a public service. Pursuant to title 17 United States
 * Code Section 105, works of NIST employees are not subject to copyright
 * protection in the United States. This software may be subject to foreign
 * copyright. Permission in the United States and in foreign countries, to the
 * extent that NIST may hold copyright, to use, copy, modify, create derivative
 * works, and distribute this software and its documentation without fee is hereby
 * granted on a non-exclusive basis, provided that this notice and disclaimer
 * of warranty appears in all copies.
 *
 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
 */

package gov.nist.secauto.metaschema.core.datatype.markup;

import com.ctc.wstx.api.WstxOutputProperties;
import com.ctc.wstx.stax.WstxOutputFactory;
import com.vladsch.flexmark.formatter.Formatter;
import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.ast.Document;
import com.vladsch.flexmark.util.ast.Node;

import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.AstCollectingVisitor;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.FlexmarkFactory;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupVisitor;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupWriter;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertAnchorExtension.InsertAnchorNode;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertVisitor;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupVisitor;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlEventWriter;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlStreamWriter;
import gov.nist.secauto.metaschema.core.util.ObjectUtils;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codehaus.stax2.XMLOutputFactory2;
import org.codehaus.stax2.XMLStreamWriter2;
import org.codehaus.stax2.evt.XMLEventFactory2;
import org.jsoup.Jsoup;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;

import edu.umd.cs.findbugs.annotations.NonNull;
import edu.umd.cs.findbugs.annotations.Nullable;

public abstract class AbstractMarkupString<TYPE extends AbstractMarkupString<TYPE>>
    implements IMarkupString<TYPE> {
  private static final Logger LOGGER = LogManager.getLogger(FlexmarkFactory.class);

  private static final Pattern QUOTE_TAG_REPLACEMENT_PATTERN
      = Pattern.compile("</?q>");

  //
  // @NonNull
  // private static final String DEFAULT_HTML_NS = "http://www.w3.org/1999/xhtml";
  // @NonNull
  // private static final String DEFAULT_HTML_PREFIX = "";

  @NonNull
  private final Document document;

  public AbstractMarkupString(@NonNull Document document) {
    this.document = document;
  }

  @Override
  public Document getDocument() {
    return document;
  }

  // @Override
  // public void writeHtml(@NonNull XMLStreamWriter2 xmlStreamWriter, @NonNull
  // String namespace)
  // throws XMLStreamException {
  //
  //
  // IMarkupString<?> markupString = (IMarkupString<>)value;
  //
  // MarkupXmlStreamWriter writingVisitor
  // = new MarkupXmlStreamWriter(namespace, markupString.isBlock());
  // writingVisitor.visitChildren(getDocument(), xmlStreamWriter);
  // xmlStreamWriter.flush();
  // }
  //
  // @Override
  // public void writeHtml(@NonNull OutputStream os, @Nullable String namespace,
  // @Nullable String
  // prefix)
  // throws XMLStreamException {
  // XMLOutputFactory2 factory = (XMLOutputFactory2)
  // XMLOutputFactory.newInstance();
  // assert factory instanceof WstxOutputFactory;
  // factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, false);
  // XMLStreamWriter2 xmlStreamWriter = (XMLStreamWriter2)
  // factory.createXMLStreamWriter(os);
  //
  // String effectiveNamespace = namespace == null ? DEFAULT_HTML_NS : namespace;
  // String effectivePrefix = prefix == null ? DEFAULT_HTML_PREFIX : prefix;
  // NamespaceContext nsContext =
  // MergedNsContext.construct(xmlStreamWriter.getNamespaceContext(),
  // List.of(NamespaceEventImpl.constructNamespace(null, effectivePrefix,
  // effectiveNamespace)));
  // xmlStreamWriter.setNamespaceContext(nsContext);
  //
  //
  // writeHtml(xmlStreamWriter, effectiveNamespace);
  // }

  @NonNull
  protected static Document parseHtml(@NonNull String html, @NonNull FlexmarkHtmlConverter htmlParser,
      @NonNull Parser markdownParser) {
    org.jsoup.nodes.Document document = Jsoup.parse(html);

    // Fix for usnistgov/liboscal-java#5
    // Caused by not stripping out extra newlines inside HTML tags
    NodeTraversor.traverse(new NodeVisitor() {

      @Override
      public void head(org.jsoup.nodes.Node node, int depth) {
        if (node instanceof TextNode) {
          TextNode textNode = (TextNode) node;

          org.jsoup.nodes.Node parent = textNode.parent();

          if (!isTag(parent, "code") || !isTag(parent.parent(), "pre")) {
            node.replaceWith(new TextNode(textNode.text()));
          }
        }
      }

      private boolean isTag(@Nullable org.jsoup.nodes.Node node, @NonNull String tagName) {
        return node != null && tagName.equals(node.normalName());
      }

    }, document);

    String markdown = htmlParser.convert(document);
    assert markdown != null;
    if (LOGGER.isDebugEnabled()) {
      LOGGER.debug("html->markdown: {}", markdown);
    }
    return parseMarkdown(markdown, markdownParser);
  }

  @SuppressWarnings("null")
  @NonNull
  protected static Document parseMarkdown(@NonNull String markdown, @NonNull Parser parser) {
    return parser.parse(markdown);
  }

  @Override
  public String toXHtml(@NonNull String namespace) throws XMLStreamException, IOException {

    String retval;

    Document document = getDocument();
    if (document.hasChildren()) {

      XMLOutputFactory2 factory = (XMLOutputFactory2) XMLOutputFactory.newInstance();
      assert factory instanceof WstxOutputFactory;
      factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, false);
      try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
        XMLStreamWriter2 xmlStreamWriter = (XMLStreamWriter2) factory.createXMLStreamWriter(os);

        writeXHtml(namespace, ObjectUtils.notNull(xmlStreamWriter));

        xmlStreamWriter.flush();
        xmlStreamWriter.close();
        os.flush();
        retval = ObjectUtils.notNull(os.toString(StandardCharsets.UTF_8));
      }
    } else {
      retval = "";
    }
    return retval;
  }

  @Override
  public String toHtml() {
    // String html;
    // try {
    // html = toXHtml("");
    // } catch(RuntimeException ex) {
    // throw ex;
    // } catch (Throwable ex) {
    // throw new RuntimeException(ex);
    // }
    // return QUOTE_TAG_REPLACEMENT_PATTERN.matcher(html)
    // .replaceAll("&quot;");
    String html = getFlexmarkFactory().getHtmlRenderer().render(getDocument());
    return ObjectUtils.notNull(QUOTE_TAG_REPLACEMENT_PATTERN.matcher(html)
        .replaceAll("&quot;"));
  }

  @Override
  public String toMarkdown() {
    return toMarkdown(getFlexmarkFactory().getFormatter());
  }

  @Override
  public String toMarkdown(Formatter formatter) {
    return ObjectUtils.notNull(formatter.render(getDocument()));
  }

  @Override
  public void writeXHtml(String namespace, XMLStreamWriter2 streamWriter) throws XMLStreamException {
    Document document = getDocument();
    if (document.hasChildren()) {
      IMarkupWriter<XMLStreamWriter, XMLStreamException> writer = new MarkupXmlStreamWriter(
          namespace,
          getFlexmarkFactory().getListOptions(),
          streamWriter);

      IMarkupVisitor<XMLStreamWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock());
      visitor.visitDocument(document, writer);
    } else {
      streamWriter.writeCharacters("");
    }
  }

  @Override
  public void writeXHtml(String namespace, XMLEventFactory2 eventFactory, XMLEventWriter eventWriter)
      throws XMLStreamException {
    Document document = getDocument();
    if (document.hasChildren()) {

      IMarkupWriter<XMLEventWriter, XMLStreamException> writer = new MarkupXmlEventWriter(
          namespace,
          getFlexmarkFactory().getListOptions(),
          eventWriter,
          eventFactory);

      IMarkupVisitor<XMLEventWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock());
      visitor.visitDocument(getDocument(), writer);
    } else {
      eventWriter.add(eventFactory.createSpace(""));
    }

  }

  @SuppressWarnings("null")
  @Override
  public Stream<Node> getNodesAsStream() {
    return Stream.concat(Stream.of(getDocument()),
        StreamSupport.stream(getDocument().getDescendants().spliterator(), false));
  }

  @Override
  @NonNull
  public List<InsertAnchorNode> getInserts() {
    return getInserts(insert -> true);
  }

  /**
   * Retrieve all insert statements that are contained within this markup text
   * that match the provided filter.
   *
   * @param filter
   *          a filter used to identify matching insert statements
   * @return the matching insert statements
   */
  @Override
  @NonNull
  public List<InsertAnchorNode> getInserts(@NonNull Predicate<InsertAnchorNode> filter) {
    InsertVisitor visitor = new InsertVisitor(filter);
    visitor.visitChildren(getDocument());
    return visitor.getInserts();
  }

  @Override
  public String toString() {
    return AstCollectingVisitor.asString(getDocument());
  }
}