View Javadoc
1   /*
2    * Portions of this software was developed by employees of the National Institute
3    * of Standards and Technology (NIST), an agency of the Federal Government and is
4    * being made available as a public service. Pursuant to title 17 United States
5    * Code Section 105, works of NIST employees are not subject to copyright
6    * protection in the United States. This software may be subject to foreign
7    * copyright. Permission in the United States and in foreign countries, to the
8    * extent that NIST may hold copyright, to use, copy, modify, create derivative
9    * works, and distribute this software and its documentation without fee is hereby
10   * granted on a non-exclusive basis, provided that this notice and disclaimer
11   * of warranty appears in all copies.
12   *
13   * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER
14   * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY
15   * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM
17   * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE
18   * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE.  IN NO EVENT
19   * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT,
20   * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM,
21   * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
22   * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR
23   * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT
24   * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER.
25   */
26  
27  package gov.nist.secauto.metaschema.core.datatype.markup;
28  
29  import com.ctc.wstx.api.WstxOutputProperties;
30  import com.ctc.wstx.stax.WstxOutputFactory;
31  import com.vladsch.flexmark.formatter.Formatter;
32  import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
33  import com.vladsch.flexmark.parser.Parser;
34  import com.vladsch.flexmark.util.ast.Document;
35  import com.vladsch.flexmark.util.ast.Node;
36  
37  import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.AstCollectingVisitor;
38  import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.FlexmarkFactory;
39  import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupVisitor;
40  import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupWriter;
41  import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertAnchorExtension.InsertAnchorNode;
42  import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertVisitor;
43  import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupVisitor;
44  import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlEventWriter;
45  import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlStreamWriter;
46  import gov.nist.secauto.metaschema.core.util.ObjectUtils;
47  
48  import org.apache.logging.log4j.LogManager;
49  import org.apache.logging.log4j.Logger;
50  import org.codehaus.stax2.XMLOutputFactory2;
51  import org.codehaus.stax2.XMLStreamWriter2;
52  import org.codehaus.stax2.evt.XMLEventFactory2;
53  import org.jsoup.Jsoup;
54  import org.jsoup.nodes.TextNode;
55  import org.jsoup.select.NodeTraversor;
56  import org.jsoup.select.NodeVisitor;
57  
58  import java.io.ByteArrayOutputStream;
59  import java.io.IOException;
60  import java.nio.charset.StandardCharsets;
61  import java.util.List;
62  import java.util.function.Predicate;
63  import java.util.regex.Pattern;
64  import java.util.stream.Stream;
65  import java.util.stream.StreamSupport;
66  
67  import javax.xml.stream.XMLEventWriter;
68  import javax.xml.stream.XMLOutputFactory;
69  import javax.xml.stream.XMLStreamException;
70  import javax.xml.stream.XMLStreamWriter;
71  
72  import edu.umd.cs.findbugs.annotations.NonNull;
73  import edu.umd.cs.findbugs.annotations.Nullable;
74  
75  public abstract class AbstractMarkupString<TYPE extends AbstractMarkupString<TYPE>>
76      implements IMarkupString<TYPE> {
77    private static final Logger LOGGER = LogManager.getLogger(FlexmarkFactory.class);
78  
79    private static final Pattern QUOTE_TAG_REPLACEMENT_PATTERN
80        = Pattern.compile("</?q>");
81  
82    //
83    // @NonNull
84    // private static final String DEFAULT_HTML_NS = "http://www.w3.org/1999/xhtml";
85    // @NonNull
86    // private static final String DEFAULT_HTML_PREFIX = "";
87  
88    @NonNull
89    private final Document document;
90  
91    public AbstractMarkupString(@NonNull Document document) {
92      this.document = document;
93    }
94  
95    @Override
96    public Document getDocument() {
97      return document;
98    }
99  
100   // @Override
101   // public void writeHtml(@NonNull XMLStreamWriter2 xmlStreamWriter, @NonNull
102   // String namespace)
103   // throws XMLStreamException {
104   //
105   //
106   // IMarkupString<?> markupString = (IMarkupString<>)value;
107   //
108   // MarkupXmlStreamWriter writingVisitor
109   // = new MarkupXmlStreamWriter(namespace, markupString.isBlock());
110   // writingVisitor.visitChildren(getDocument(), xmlStreamWriter);
111   // xmlStreamWriter.flush();
112   // }
113   //
114   // @Override
115   // public void writeHtml(@NonNull OutputStream os, @Nullable String namespace,
116   // @Nullable String
117   // prefix)
118   // throws XMLStreamException {
119   // XMLOutputFactory2 factory = (XMLOutputFactory2)
120   // XMLOutputFactory.newInstance();
121   // assert factory instanceof WstxOutputFactory;
122   // factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, false);
123   // XMLStreamWriter2 xmlStreamWriter = (XMLStreamWriter2)
124   // factory.createXMLStreamWriter(os);
125   //
126   // String effectiveNamespace = namespace == null ? DEFAULT_HTML_NS : namespace;
127   // String effectivePrefix = prefix == null ? DEFAULT_HTML_PREFIX : prefix;
128   // NamespaceContext nsContext =
129   // MergedNsContext.construct(xmlStreamWriter.getNamespaceContext(),
130   // List.of(NamespaceEventImpl.constructNamespace(null, effectivePrefix,
131   // effectiveNamespace)));
132   // xmlStreamWriter.setNamespaceContext(nsContext);
133   //
134   //
135   // writeHtml(xmlStreamWriter, effectiveNamespace);
136   // }
137 
138   @NonNull
139   protected static Document parseHtml(@NonNull String html, @NonNull FlexmarkHtmlConverter htmlParser,
140       @NonNull Parser markdownParser) {
141     org.jsoup.nodes.Document document = Jsoup.parse(html);
142 
143     // Fix for usnistgov/liboscal-java#5
144     // Caused by not stripping out extra newlines inside HTML tags
145     NodeTraversor.traverse(new NodeVisitor() {
146 
147       @Override
148       public void head(org.jsoup.nodes.Node node, int depth) {
149         if (node instanceof TextNode) {
150           TextNode textNode = (TextNode) node;
151 
152           org.jsoup.nodes.Node parent = textNode.parent();
153 
154           if (!isTag(parent, "code") || !isTag(parent.parent(), "pre")) {
155             node.replaceWith(new TextNode(textNode.text()));
156           }
157         }
158       }
159 
160       private boolean isTag(@Nullable org.jsoup.nodes.Node node, @NonNull String tagName) {
161         return node != null && tagName.equals(node.normalName());
162       }
163 
164     }, document);
165 
166     String markdown = htmlParser.convert(document);
167     assert markdown != null;
168     if (LOGGER.isDebugEnabled()) {
169       LOGGER.debug("html->markdown: {}", markdown);
170     }
171     return parseMarkdown(markdown, markdownParser);
172   }
173 
174   @SuppressWarnings("null")
175   @NonNull
176   protected static Document parseMarkdown(@NonNull String markdown, @NonNull Parser parser) {
177     return parser.parse(markdown);
178   }
179 
180   @Override
181   public String toXHtml(@NonNull String namespace) throws XMLStreamException, IOException {
182 
183     String retval;
184 
185     Document document = getDocument();
186     if (document.hasChildren()) {
187 
188       XMLOutputFactory2 factory = (XMLOutputFactory2) XMLOutputFactory.newInstance();
189       assert factory instanceof WstxOutputFactory;
190       factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, false);
191       try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
192         XMLStreamWriter2 xmlStreamWriter = (XMLStreamWriter2) factory.createXMLStreamWriter(os);
193 
194         writeXHtml(namespace, ObjectUtils.notNull(xmlStreamWriter));
195 
196         xmlStreamWriter.flush();
197         xmlStreamWriter.close();
198         os.flush();
199         retval = ObjectUtils.notNull(os.toString(StandardCharsets.UTF_8));
200       }
201     } else {
202       retval = "";
203     }
204     return retval;
205   }
206 
207   @Override
208   public String toHtml() {
209     // String html;
210     // try {
211     // html = toXHtml("");
212     // } catch(RuntimeException ex) {
213     // throw ex;
214     // } catch (Throwable ex) {
215     // throw new RuntimeException(ex);
216     // }
217     // return QUOTE_TAG_REPLACEMENT_PATTERN.matcher(html)
218     // .replaceAll("&quot;");
219     String html = getFlexmarkFactory().getHtmlRenderer().render(getDocument());
220     return ObjectUtils.notNull(QUOTE_TAG_REPLACEMENT_PATTERN.matcher(html)
221         .replaceAll("&quot;"));
222   }
223 
224   @Override
225   public String toMarkdown() {
226     return toMarkdown(getFlexmarkFactory().getFormatter());
227   }
228 
229   @Override
230   public String toMarkdown(Formatter formatter) {
231     return ObjectUtils.notNull(formatter.render(getDocument()));
232   }
233 
234   @Override
235   public void writeXHtml(String namespace, XMLStreamWriter2 streamWriter) throws XMLStreamException {
236     Document document = getDocument();
237     if (document.hasChildren()) {
238       IMarkupWriter<XMLStreamWriter, XMLStreamException> writer = new MarkupXmlStreamWriter(
239           namespace,
240           getFlexmarkFactory().getListOptions(),
241           streamWriter);
242 
243       IMarkupVisitor<XMLStreamWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock());
244       visitor.visitDocument(document, writer);
245     } else {
246       streamWriter.writeCharacters("");
247     }
248   }
249 
250   @Override
251   public void writeXHtml(String namespace, XMLEventFactory2 eventFactory, XMLEventWriter eventWriter)
252       throws XMLStreamException {
253     Document document = getDocument();
254     if (document.hasChildren()) {
255 
256       IMarkupWriter<XMLEventWriter, XMLStreamException> writer = new MarkupXmlEventWriter(
257           namespace,
258           getFlexmarkFactory().getListOptions(),
259           eventWriter,
260           eventFactory);
261 
262       IMarkupVisitor<XMLEventWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock());
263       visitor.visitDocument(getDocument(), writer);
264     } else {
265       eventWriter.add(eventFactory.createSpace(""));
266     }
267 
268   }
269 
270   @SuppressWarnings("null")
271   @Override
272   public Stream<Node> getNodesAsStream() {
273     return Stream.concat(Stream.of(getDocument()),
274         StreamSupport.stream(getDocument().getDescendants().spliterator(), false));
275   }
276 
277   @Override
278   @NonNull
279   public List<InsertAnchorNode> getInserts() {
280     return getInserts(insert -> true);
281   }
282 
283   /**
284    * Retrieve all insert statements that are contained within this markup text
285    * that match the provided filter.
286    *
287    * @param filter
288    *          a filter used to identify matching insert statements
289    * @return the matching insert statements
290    */
291   @Override
292   @NonNull
293   public List<InsertAnchorNode> getInserts(@NonNull Predicate<InsertAnchorNode> filter) {
294     InsertVisitor visitor = new InsertVisitor(filter);
295     visitor.visitChildren(getDocument());
296     return visitor.getInserts();
297   }
298 
299   @Override
300   public String toString() {
301     return AstCollectingVisitor.asString(getDocument());
302   }
303 }