001/* 002 * Portions of this software was developed by employees of the National Institute 003 * of Standards and Technology (NIST), an agency of the Federal Government and is 004 * being made available as a public service. Pursuant to title 17 United States 005 * Code Section 105, works of NIST employees are not subject to copyright 006 * protection in the United States. This software may be subject to foreign 007 * copyright. Permission in the United States and in foreign countries, to the 008 * extent that NIST may hold copyright, to use, copy, modify, create derivative 009 * works, and distribute this software and its documentation without fee is hereby 010 * granted on a non-exclusive basis, provided that this notice and disclaimer 011 * of warranty appears in all copies. 012 * 013 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER 014 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY 015 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF 016 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM 017 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE 018 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE. IN NO EVENT 019 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT, 020 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM, 021 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY, 022 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR 023 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT 024 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER. 025 */ 026 027package gov.nist.secauto.metaschema.core.datatype.markup; 028 029import com.ctc.wstx.api.WstxOutputProperties; 030import com.ctc.wstx.stax.WstxOutputFactory; 031import com.vladsch.flexmark.formatter.Formatter; 032import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter; 033import com.vladsch.flexmark.parser.Parser; 034import com.vladsch.flexmark.util.ast.Document; 035import com.vladsch.flexmark.util.ast.Node; 036 037import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.AstCollectingVisitor; 038import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.FlexmarkFactory; 039import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupVisitor; 040import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupWriter; 041import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertAnchorExtension.InsertAnchorNode; 042import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertVisitor; 043import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupVisitor; 044import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlEventWriter; 045import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlStreamWriter; 046import gov.nist.secauto.metaschema.core.util.ObjectUtils; 047 048import org.apache.logging.log4j.LogManager; 049import org.apache.logging.log4j.Logger; 050import org.codehaus.stax2.XMLOutputFactory2; 051import org.codehaus.stax2.XMLStreamWriter2; 052import org.codehaus.stax2.evt.XMLEventFactory2; 053import org.jsoup.Jsoup; 054import org.jsoup.nodes.TextNode; 055import org.jsoup.select.NodeTraversor; 056import org.jsoup.select.NodeVisitor; 057 058import java.io.ByteArrayOutputStream; 059import java.io.IOException; 060import java.nio.charset.StandardCharsets; 061import java.util.List; 062import java.util.function.Predicate; 063import java.util.regex.Pattern; 064import java.util.stream.Stream; 065import java.util.stream.StreamSupport; 066 067import javax.xml.stream.XMLEventWriter; 068import javax.xml.stream.XMLOutputFactory; 069import javax.xml.stream.XMLStreamException; 070import javax.xml.stream.XMLStreamWriter; 071 072import edu.umd.cs.findbugs.annotations.NonNull; 073import edu.umd.cs.findbugs.annotations.Nullable; 074 075public abstract class AbstractMarkupString<TYPE extends AbstractMarkupString<TYPE>> 076 implements IMarkupString<TYPE> { 077 private static final Logger LOGGER = LogManager.getLogger(FlexmarkFactory.class); 078 079 private static final Pattern QUOTE_TAG_REPLACEMENT_PATTERN 080 = Pattern.compile("</?q>"); 081 082 // 083 // @NonNull 084 // private static final String DEFAULT_HTML_NS = "http://www.w3.org/1999/xhtml"; 085 // @NonNull 086 // private static final String DEFAULT_HTML_PREFIX = ""; 087 088 @NonNull 089 private final Document document; 090 091 public AbstractMarkupString(@NonNull Document document) { 092 this.document = document; 093 } 094 095 @Override 096 public Document getDocument() { 097 return document; 098 } 099 100 // @Override 101 // public void writeHtml(@NonNull XMLStreamWriter2 xmlStreamWriter, @NonNull 102 // String namespace) 103 // throws XMLStreamException { 104 // 105 // 106 // IMarkupString<?> markupString = (IMarkupString<>)value; 107 // 108 // MarkupXmlStreamWriter writingVisitor 109 // = new MarkupXmlStreamWriter(namespace, markupString.isBlock()); 110 // writingVisitor.visitChildren(getDocument(), xmlStreamWriter); 111 // xmlStreamWriter.flush(); 112 // } 113 // 114 // @Override 115 // public void writeHtml(@NonNull OutputStream os, @Nullable String namespace, 116 // @Nullable String 117 // prefix) 118 // throws XMLStreamException { 119 // XMLOutputFactory2 factory = (XMLOutputFactory2) 120 // XMLOutputFactory.newInstance(); 121 // assert factory instanceof WstxOutputFactory; 122 // factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, false); 123 // XMLStreamWriter2 xmlStreamWriter = (XMLStreamWriter2) 124 // factory.createXMLStreamWriter(os); 125 // 126 // String effectiveNamespace = namespace == null ? DEFAULT_HTML_NS : namespace; 127 // String effectivePrefix = prefix == null ? DEFAULT_HTML_PREFIX : prefix; 128 // NamespaceContext nsContext = 129 // MergedNsContext.construct(xmlStreamWriter.getNamespaceContext(), 130 // List.of(NamespaceEventImpl.constructNamespace(null, effectivePrefix, 131 // effectiveNamespace))); 132 // xmlStreamWriter.setNamespaceContext(nsContext); 133 // 134 // 135 // writeHtml(xmlStreamWriter, effectiveNamespace); 136 // } 137 138 @NonNull 139 protected static Document parseHtml(@NonNull String html, @NonNull FlexmarkHtmlConverter htmlParser, 140 @NonNull Parser markdownParser) { 141 org.jsoup.nodes.Document document = Jsoup.parse(html); 142 143 // Fix for usnistgov/liboscal-java#5 144 // Caused by not stripping out extra newlines inside HTML tags 145 NodeTraversor.traverse(new NodeVisitor() { 146 147 @Override 148 public void head(org.jsoup.nodes.Node node, int depth) { 149 if (node instanceof TextNode) { 150 TextNode textNode = (TextNode) node; 151 152 org.jsoup.nodes.Node parent = textNode.parent(); 153 154 if (!isTag(parent, "code") || !isTag(parent.parent(), "pre")) { 155 node.replaceWith(new TextNode(textNode.text())); 156 } 157 } 158 } 159 160 private boolean isTag(@Nullable org.jsoup.nodes.Node node, @NonNull String tagName) { 161 return node != null && tagName.equals(node.normalName()); 162 } 163 164 }, document); 165 166 String markdown = htmlParser.convert(document); 167 assert markdown != null; 168 if (LOGGER.isDebugEnabled()) { 169 LOGGER.debug("html->markdown: {}", markdown); 170 } 171 return parseMarkdown(markdown, markdownParser); 172 } 173 174 @SuppressWarnings("null") 175 @NonNull 176 protected static Document parseMarkdown(@NonNull String markdown, @NonNull Parser parser) { 177 return parser.parse(markdown); 178 } 179 180 @Override 181 public String toXHtml(@NonNull String namespace) throws XMLStreamException, IOException { 182 183 String retval; 184 185 Document document = getDocument(); 186 if (document.hasChildren()) { 187 188 XMLOutputFactory2 factory = (XMLOutputFactory2) XMLOutputFactory.newInstance(); 189 assert factory instanceof WstxOutputFactory; 190 factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, false); 191 try (ByteArrayOutputStream os = new ByteArrayOutputStream()) { 192 XMLStreamWriter2 xmlStreamWriter = (XMLStreamWriter2) factory.createXMLStreamWriter(os); 193 194 writeXHtml(namespace, ObjectUtils.notNull(xmlStreamWriter)); 195 196 xmlStreamWriter.flush(); 197 xmlStreamWriter.close(); 198 os.flush(); 199 retval = ObjectUtils.notNull(os.toString(StandardCharsets.UTF_8)); 200 } 201 } else { 202 retval = ""; 203 } 204 return retval; 205 } 206 207 @Override 208 public String toHtml() { 209 // String html; 210 // try { 211 // html = toXHtml(""); 212 // } catch(RuntimeException ex) { 213 // throw ex; 214 // } catch (Throwable ex) { 215 // throw new RuntimeException(ex); 216 // } 217 // return QUOTE_TAG_REPLACEMENT_PATTERN.matcher(html) 218 // .replaceAll("""); 219 String html = getFlexmarkFactory().getHtmlRenderer().render(getDocument()); 220 return ObjectUtils.notNull(QUOTE_TAG_REPLACEMENT_PATTERN.matcher(html) 221 .replaceAll(""")); 222 } 223 224 @Override 225 public String toMarkdown() { 226 return toMarkdown(getFlexmarkFactory().getFormatter()); 227 } 228 229 @Override 230 public String toMarkdown(Formatter formatter) { 231 return ObjectUtils.notNull(formatter.render(getDocument())); 232 } 233 234 @Override 235 public void writeXHtml(String namespace, XMLStreamWriter2 streamWriter) throws XMLStreamException { 236 Document document = getDocument(); 237 if (document.hasChildren()) { 238 IMarkupWriter<XMLStreamWriter, XMLStreamException> writer = new MarkupXmlStreamWriter( 239 namespace, 240 getFlexmarkFactory().getListOptions(), 241 streamWriter); 242 243 IMarkupVisitor<XMLStreamWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock()); 244 visitor.visitDocument(document, writer); 245 } else { 246 streamWriter.writeCharacters(""); 247 } 248 } 249 250 @Override 251 public void writeXHtml(String namespace, XMLEventFactory2 eventFactory, XMLEventWriter eventWriter) 252 throws XMLStreamException { 253 Document document = getDocument(); 254 if (document.hasChildren()) { 255 256 IMarkupWriter<XMLEventWriter, XMLStreamException> writer = new MarkupXmlEventWriter( 257 namespace, 258 getFlexmarkFactory().getListOptions(), 259 eventWriter, 260 eventFactory); 261 262 IMarkupVisitor<XMLEventWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock()); 263 visitor.visitDocument(getDocument(), writer); 264 } else { 265 eventWriter.add(eventFactory.createSpace("")); 266 } 267 268 } 269 270 @SuppressWarnings("null") 271 @Override 272 public Stream<Node> getNodesAsStream() { 273 return Stream.concat(Stream.of(getDocument()), 274 StreamSupport.stream(getDocument().getDescendants().spliterator(), false)); 275 } 276 277 @Override 278 @NonNull 279 public List<InsertAnchorNode> getInserts() { 280 return getInserts(insert -> true); 281 } 282 283 /** 284 * Retrieve all insert statements that are contained within this markup text 285 * that match the provided filter. 286 * 287 * @param filter 288 * a filter used to identify matching insert statements 289 * @return the matching insert statements 290 */ 291 @Override 292 @NonNull 293 public List<InsertAnchorNode> getInserts(@NonNull Predicate<InsertAnchorNode> filter) { 294 InsertVisitor visitor = new InsertVisitor(filter); 295 visitor.visitChildren(getDocument()); 296 return visitor.getInserts(); 297 } 298 299 @Override 300 public String toString() { 301 return AstCollectingVisitor.asString(getDocument()); 302 } 303}