1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package gov.nist.secauto.metaschema.core.datatype.markup;
28
29 import com.ctc.wstx.api.WstxOutputProperties;
30 import com.ctc.wstx.stax.WstxOutputFactory;
31 import com.vladsch.flexmark.formatter.Formatter;
32 import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
33 import com.vladsch.flexmark.parser.Parser;
34 import com.vladsch.flexmark.util.ast.Document;
35 import com.vladsch.flexmark.util.ast.Node;
36
37 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.AstCollectingVisitor;
38 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.FlexmarkFactory;
39 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupVisitor;
40 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupWriter;
41 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertAnchorExtension.InsertAnchorNode;
42 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertVisitor;
43 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupVisitor;
44 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlEventWriter;
45 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.MarkupXmlStreamWriter;
46 import gov.nist.secauto.metaschema.core.util.ObjectUtils;
47
48 import org.apache.logging.log4j.LogManager;
49 import org.apache.logging.log4j.Logger;
50 import org.codehaus.stax2.XMLOutputFactory2;
51 import org.codehaus.stax2.XMLStreamWriter2;
52 import org.codehaus.stax2.evt.XMLEventFactory2;
53 import org.jsoup.Jsoup;
54 import org.jsoup.nodes.TextNode;
55 import org.jsoup.select.NodeTraversor;
56 import org.jsoup.select.NodeVisitor;
57
58 import java.io.ByteArrayOutputStream;
59 import java.io.IOException;
60 import java.nio.charset.StandardCharsets;
61 import java.util.List;
62 import java.util.function.Predicate;
63 import java.util.regex.Pattern;
64 import java.util.stream.Stream;
65 import java.util.stream.StreamSupport;
66
67 import javax.xml.stream.XMLEventWriter;
68 import javax.xml.stream.XMLOutputFactory;
69 import javax.xml.stream.XMLStreamException;
70 import javax.xml.stream.XMLStreamWriter;
71
72 import edu.umd.cs.findbugs.annotations.NonNull;
73 import edu.umd.cs.findbugs.annotations.Nullable;
74
75 public abstract class AbstractMarkupString<TYPE extends AbstractMarkupString<TYPE>>
76 implements IMarkupString<TYPE> {
77 private static final Logger LOGGER = LogManager.getLogger(FlexmarkFactory.class);
78
79 private static final Pattern QUOTE_TAG_REPLACEMENT_PATTERN
80 = Pattern.compile("</?q>");
81
82
83
84
85
86
87
88 @NonNull
89 private final Document document;
90
91 public AbstractMarkupString(@NonNull Document document) {
92 this.document = document;
93 }
94
95 @Override
96 public Document getDocument() {
97 return document;
98 }
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138 @NonNull
139 protected static Document parseHtml(@NonNull String html, @NonNull FlexmarkHtmlConverter htmlParser,
140 @NonNull Parser markdownParser) {
141 org.jsoup.nodes.Document document = Jsoup.parse(html);
142
143
144
145 NodeTraversor.traverse(new NodeVisitor() {
146
147 @Override
148 public void head(org.jsoup.nodes.Node node, int depth) {
149 if (node instanceof TextNode) {
150 TextNode textNode = (TextNode) node;
151
152 org.jsoup.nodes.Node parent = textNode.parent();
153
154 if (!isTag(parent, "code") || !isTag(parent.parent(), "pre")) {
155 node.replaceWith(new TextNode(textNode.text()));
156 }
157 }
158 }
159
160 private boolean isTag(@Nullable org.jsoup.nodes.Node node, @NonNull String tagName) {
161 return node != null && tagName.equals(node.normalName());
162 }
163
164 }, document);
165
166 String markdown = htmlParser.convert(document);
167 assert markdown != null;
168 if (LOGGER.isDebugEnabled()) {
169 LOGGER.debug("html->markdown: {}", markdown);
170 }
171 return parseMarkdown(markdown, markdownParser);
172 }
173
174 @SuppressWarnings("null")
175 @NonNull
176 protected static Document parseMarkdown(@NonNull String markdown, @NonNull Parser parser) {
177 return parser.parse(markdown);
178 }
179
180 @Override
181 public String toXHtml(@NonNull String namespace) throws XMLStreamException, IOException {
182
183 String retval;
184
185 Document document = getDocument();
186 if (document.hasChildren()) {
187
188 XMLOutputFactory2 factory = (XMLOutputFactory2) XMLOutputFactory.newInstance();
189 assert factory instanceof WstxOutputFactory;
190 factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, false);
191 try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
192 XMLStreamWriter2 xmlStreamWriter = (XMLStreamWriter2) factory.createXMLStreamWriter(os);
193
194 writeXHtml(namespace, ObjectUtils.notNull(xmlStreamWriter));
195
196 xmlStreamWriter.flush();
197 xmlStreamWriter.close();
198 os.flush();
199 retval = ObjectUtils.notNull(os.toString(StandardCharsets.UTF_8));
200 }
201 } else {
202 retval = "";
203 }
204 return retval;
205 }
206
207 @Override
208 public String toHtml() {
209
210
211
212
213
214
215
216
217
218
219 String html = getFlexmarkFactory().getHtmlRenderer().render(getDocument());
220 return ObjectUtils.notNull(QUOTE_TAG_REPLACEMENT_PATTERN.matcher(html)
221 .replaceAll("""));
222 }
223
224 @Override
225 public String toMarkdown() {
226 return toMarkdown(getFlexmarkFactory().getFormatter());
227 }
228
229 @Override
230 public String toMarkdown(Formatter formatter) {
231 return ObjectUtils.notNull(formatter.render(getDocument()));
232 }
233
234 @Override
235 public void writeXHtml(String namespace, XMLStreamWriter2 streamWriter) throws XMLStreamException {
236 Document document = getDocument();
237 if (document.hasChildren()) {
238 IMarkupWriter<XMLStreamWriter, XMLStreamException> writer = new MarkupXmlStreamWriter(
239 namespace,
240 getFlexmarkFactory().getListOptions(),
241 streamWriter);
242
243 IMarkupVisitor<XMLStreamWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock());
244 visitor.visitDocument(document, writer);
245 } else {
246 streamWriter.writeCharacters("");
247 }
248 }
249
250 @Override
251 public void writeXHtml(String namespace, XMLEventFactory2 eventFactory, XMLEventWriter eventWriter)
252 throws XMLStreamException {
253 Document document = getDocument();
254 if (document.hasChildren()) {
255
256 IMarkupWriter<XMLEventWriter, XMLStreamException> writer = new MarkupXmlEventWriter(
257 namespace,
258 getFlexmarkFactory().getListOptions(),
259 eventWriter,
260 eventFactory);
261
262 IMarkupVisitor<XMLEventWriter, XMLStreamException> visitor = new MarkupVisitor<>(isBlock());
263 visitor.visitDocument(getDocument(), writer);
264 } else {
265 eventWriter.add(eventFactory.createSpace(""));
266 }
267
268 }
269
270 @SuppressWarnings("null")
271 @Override
272 public Stream<Node> getNodesAsStream() {
273 return Stream.concat(Stream.of(getDocument()),
274 StreamSupport.stream(getDocument().getDescendants().spliterator(), false));
275 }
276
277 @Override
278 @NonNull
279 public List<InsertAnchorNode> getInserts() {
280 return getInserts(insert -> true);
281 }
282
283
284
285
286
287
288
289
290
291 @Override
292 @NonNull
293 public List<InsertAnchorNode> getInserts(@NonNull Predicate<InsertAnchorNode> filter) {
294 InsertVisitor visitor = new InsertVisitor(filter);
295 visitor.visitChildren(getDocument());
296 return visitor.getInserts();
297 }
298
299 @Override
300 public String toString() {
301 return AstCollectingVisitor.asString(getDocument());
302 }
303 }