1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package gov.nist.secauto.metaschema.core.datatype.markup.flexmark;
28
29 import com.vladsch.flexmark.ast.AutoLink;
30 import com.vladsch.flexmark.ast.BlockQuote;
31 import com.vladsch.flexmark.ast.Code;
32 import com.vladsch.flexmark.ast.CodeBlock;
33 import com.vladsch.flexmark.ast.FencedCodeBlock;
34 import com.vladsch.flexmark.ast.HardLineBreak;
35 import com.vladsch.flexmark.ast.Heading;
36 import com.vladsch.flexmark.ast.HtmlBlock;
37 import com.vladsch.flexmark.ast.HtmlCommentBlock;
38 import com.vladsch.flexmark.ast.HtmlEntity;
39 import com.vladsch.flexmark.ast.HtmlInline;
40 import com.vladsch.flexmark.ast.Image;
41 import com.vladsch.flexmark.ast.IndentedCodeBlock;
42 import com.vladsch.flexmark.ast.Link;
43 import com.vladsch.flexmark.ast.ListBlock;
44 import com.vladsch.flexmark.ast.ListItem;
45 import com.vladsch.flexmark.ast.MailLink;
46 import com.vladsch.flexmark.ast.OrderedList;
47 import com.vladsch.flexmark.ast.Paragraph;
48 import com.vladsch.flexmark.ast.ParagraphItemContainer;
49 import com.vladsch.flexmark.ast.Text;
50 import com.vladsch.flexmark.ast.TextBase;
51 import com.vladsch.flexmark.ast.ThematicBreak;
52 import com.vladsch.flexmark.ext.escaped.character.EscapedCharacter;
53 import com.vladsch.flexmark.ext.tables.TableBlock;
54 import com.vladsch.flexmark.ext.tables.TableBody;
55 import com.vladsch.flexmark.ext.tables.TableCell;
56 import com.vladsch.flexmark.ext.tables.TableHead;
57 import com.vladsch.flexmark.ext.tables.TableRow;
58 import com.vladsch.flexmark.ext.typographic.TypographicQuotes;
59 import com.vladsch.flexmark.ext.typographic.TypographicSmarts;
60 import com.vladsch.flexmark.parser.ListOptions;
61 import com.vladsch.flexmark.util.ast.Block;
62 import com.vladsch.flexmark.util.ast.Node;
63 import com.vladsch.flexmark.util.sequence.BasedSequence;
64 import com.vladsch.flexmark.util.sequence.Escaping;
65
66 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.HtmlQuoteTagExtension.DoubleQuoteNode;
67 import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertAnchorExtension.InsertAnchorNode;
68 import gov.nist.secauto.metaschema.core.util.CollectionUtil;
69 import gov.nist.secauto.metaschema.core.util.ObjectUtils;
70
71 import org.apache.commons.text.StringEscapeUtils;
72 import org.jsoup.Jsoup;
73 import org.jsoup.nodes.Attributes;
74 import org.jsoup.nodes.Document;
75 import org.jsoup.select.NodeVisitor;
76
77 import java.net.URI;
78 import java.net.URISyntaxException;
79 import java.util.HashMap;
80 import java.util.LinkedHashMap;
81 import java.util.Map;
82 import java.util.regex.Matcher;
83 import java.util.regex.Pattern;
84
85 import javax.xml.namespace.QName;
86
87 import edu.umd.cs.findbugs.annotations.NonNull;
88 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
89
90
91
92
93
94
95
96
97
98 @SuppressFBWarnings(
99 value = "THROWS_METHOD_THROWS_CLAUSE_THROWABLE",
100 justification = "Class supports writers that use both Exception and RuntimeException.")
101 public abstract class AbstractMarkupWriter<T, E extends Throwable>
102 implements IMarkupWriter<T, E> {
103 private static final Pattern ENTITY_PATTERN = Pattern.compile("^&([^;]+);$");
104 private static final Map<String, String> ENTITY_MAP;
105
106 static {
107 ENTITY_MAP = new HashMap<>();
108
109 ENTITY_MAP.put("&npsb;", "&npsb;");
110
111
112
113
114
115
116
117
118 }
119
120 @NonNull
121 private final String namespace;
122
123 @NonNull
124 private final T stream;
125
126 @NonNull
127 private final ListOptions options;
128
129 public AbstractMarkupWriter(@NonNull String namespace, @NonNull ListOptions options, T stream) {
130 this.namespace = namespace;
131 this.options = options;
132 this.stream = ObjectUtils.requireNonNull(stream);
133 }
134
135 @NonNull
136 protected String getNamespace() {
137 return namespace;
138 }
139
140 protected ListOptions getOptions() {
141 return options;
142 }
143
144 @NonNull
145 protected T getStream() {
146 return stream;
147 }
148
149 @Override
150 @NonNull
151 public QName asQName(@NonNull String localName) {
152 return new QName(getNamespace(), localName);
153 }
154
155 protected void visitChildren(
156 @NonNull Node parentNode,
157 @NonNull ChildHandler<T, E> childHandler) throws E {
158 for (Node node : parentNode.getChildren()) {
159 assert node != null;
160 childHandler.accept(node, this);
161 }
162 }
163
164 protected void writePrecedingNewline(@NonNull Block node) throws E {
165 Node prev = node.getPrevious();
166 if (prev != null
167 || !(node.getParent() instanceof com.vladsch.flexmark.util.ast.Document)) {
168 writeText("\n");
169 }
170 }
171
172 protected void writeTrailingNewline(@NonNull Block node) throws E {
173 Node next = node.getNext();
174 if (next != null && !next.isOrDescendantOfType(Block.class)
175 || next == null && !(node.getParent() instanceof com.vladsch.flexmark.util.ast.Document)) {
176 writeText("\n");
177 }
178 }
179
180 @Override
181 public final void writeElement(
182 QName qname,
183 Node node,
184 Map<String, String> attributes,
185 ChildHandler<T, E> childHandler) throws E {
186 if (node.hasChildren()) {
187 writeElementStart(qname, attributes);
188 if (childHandler != null) {
189 visitChildren(node, childHandler);
190 }
191 writeElementEnd(qname);
192 } else {
193 writeEmptyElement(qname, attributes);
194 }
195 }
196
197 @SuppressWarnings({
198 "unchecked",
199 "unused",
200 "PMD.UnusedPrivateMethod"
201 })
202 private void writeHtml(Node node) throws E {
203 Document doc = Jsoup.parse(node.getChars().toString());
204 try {
205 doc.body().traverse(new MarkupNodeVisitor());
206 } catch (NodeVisitorException ex) {
207 throw (E) ex.getCause();
208 }
209 }
210
211 @Override
212 public final void writeText(Text node) throws E {
213 BasedSequence text = node.getChars();
214 Node prev = node.getPrevious();
215 if (prev instanceof HardLineBreak) {
216
217 assert text != null;
218 text = text.trimStart();
219 }
220 assert text != null;
221 writeText(text);
222 }
223
224 @Override
225 public void writeText(@NonNull TextBase node) throws E {
226 StringBuilder buf = new StringBuilder(node.getChars().length());
227 for (Node child : node.getChildren()) {
228 CharSequence chars;
229 if (child instanceof Text) {
230 Text text = (Text) child;
231 chars = text.getChars();
232 } else if (child instanceof EscapedCharacter) {
233 EscapedCharacter ec = (EscapedCharacter) child;
234 chars = ec.getChars().unescape();
235 } else {
236 throw new UnsupportedOperationException("Node type: " + child.getNodeName());
237 }
238 buf.append(chars);
239 }
240 writeText(buf);
241 }
242
243 @Override
244 public void writeHtmlEntity(@NonNull HtmlEntity node) throws E {
245 String text = node.getChars().unescape();
246 assert text != null;
247 writeHtmlEntity(text);
248 }
249
250 @Override
251 public void writeHtmlEntity(@NonNull TypographicSmarts node) throws E {
252 String text = ObjectUtils.requireNonNull(node.getTypographicText());
253 assert text != null;
254 writeHtmlEntity(text);
255 }
256
257 private void writeHtmlEntity(String entityText) throws E {
258 String replacement = ENTITY_MAP.get(entityText);
259 if (replacement != null) {
260 Matcher matcher = ENTITY_PATTERN.matcher(replacement);
261 if (matcher.matches()) {
262 writeHtmlEntityInternal(ObjectUtils.notNull(matcher.group(1)));
263 } else {
264 writeText(replacement);
265 }
266 } else {
267 String value = StringEscapeUtils.unescapeHtml4(entityText);
268 assert value != null;
269 writeText(value);
270 }
271 }
272
273 protected void writeHtmlEntityInternal(@NonNull String text) throws E {
274 writeText(text);
275 }
276
277 @Override
278 public void writeParagraph(
279 @NonNull Paragraph node,
280 @NonNull ChildHandler<T, E> childHandler) throws E {
281 if (node.getParent() instanceof ParagraphItemContainer && getOptions().isInTightListItem(node)) {
282 if (node.getPrevious() != null) {
283 writeText("\n");
284 }
285 visitChildren(node, childHandler);
286 } else {
287 writePrecedingNewline(node);
288 writeElement("p", node, childHandler);
289 writeTrailingNewline(node);
290 }
291 }
292
293 @Override
294 public void writeLink(
295 @NonNull Link node,
296 @NonNull ChildHandler<T, E> childHandler) throws E {
297 Map<String, String> attributes = new LinkedHashMap<>();
298 String href = Escaping.percentEncodeUrl(node.getUrl().unescape());
299 try {
300 attributes.put("href", new URI(href).toASCIIString());
301 } catch (URISyntaxException ex) {
302 throw new IllegalStateException(ex);
303 }
304
305 if (!node.getTitle().isBlank()) {
306 String title = ObjectUtils.requireNonNull(node.getTitle().unescape());
307 attributes.put("title", title);
308 }
309
310
311 QName qname = asQName("a");
312 writeElementStart(qname, attributes);
313 if (node.hasChildren()) {
314 visitChildren(node, childHandler);
315 } else {
316 writeText("");
317 }
318 writeElementEnd(qname);
319 }
320
321 @Override
322 public void writeLink(@NonNull MailLink node) throws E {
323 Map<String, String> attributes = new LinkedHashMap<>();
324
325 String href = Escaping.percentEncodeUrl(node.getText().unescape());
326 try {
327 attributes.put("href", new URI("mailto:" + href).toASCIIString());
328 } catch (URISyntaxException ex) {
329 throw new IllegalStateException(ex);
330 }
331
332 QName qname = asQName("a");
333 writeElementStart(qname, attributes);
334
335 BasedSequence text = node.getText();
336 writeText(text == null ? "\n" : ObjectUtils.notNull(text.unescape()));
337 writeElementEnd(qname);
338 }
339
340 @Override
341 public void writeLink(@NonNull AutoLink node) throws E {
342 Map<String, String> attributes = new LinkedHashMap<>();
343
344 String href = Escaping.percentEncodeUrl(node.getUrl().unescape());
345 try {
346 attributes.put("href", new URI(href).toASCIIString());
347 } catch (URISyntaxException ex) {
348 throw new IllegalStateException(ex);
349 }
350
351 QName qname = asQName("a");
352 writeElementStart(qname, attributes);
353 writeText(ObjectUtils.notNull(node.getText().unescape()));
354 writeElementEnd(qname);
355 }
356
357 @Override
358 public final void writeTypographicQuotes(
359 TypographicQuotes node,
360 ChildHandler<T, E> childHandler) throws E {
361 if (node instanceof DoubleQuoteNode) {
362 writeElement("q", node, childHandler);
363 } else {
364 String opening = node.getTypographicOpening();
365 if (opening != null && !opening.isEmpty()) {
366 writeHtmlEntity(opening);
367 }
368
369 visitChildren(node, childHandler);
370
371 String closing = node.getTypographicClosing();
372 if (closing != null && !closing.isEmpty()) {
373 writeHtmlEntity(closing);
374 }
375 }
376 }
377
378 @Override
379 public final void writeInlineHtml(HtmlInline node) throws E {
380
381
382 writeHtml(node);
383 }
384
385 @Override
386 public final void writeBlockHtml(HtmlBlock node) throws E {
387
388
389
390 writePrecedingNewline(node);
391 writeHtml(node);
392 writeTrailingNewline(node);
393 }
394
395 @Override
396 public final void writeTable(
397 TableBlock node,
398 ChildHandler<T, E> cellChildHandler) throws E {
399 writePrecedingNewline(node);
400 QName qname = asQName("table");
401 writeElementStart(qname);
402
403 TableHead head = (TableHead) node.getChildOfType(TableHead.class);
404
405 QName theadQName = asQName("thead");
406 if (head != null) {
407 writeText("\n");
408 writeElementStart(theadQName);
409 for (Node childNode : head.getChildren()) {
410 if (childNode instanceof TableRow) {
411 writeTableRow((TableRow) childNode, cellChildHandler);
412 }
413 }
414 writeElementEnd(theadQName);
415 }
416
417 TableBody body = (TableBody) node.getChildOfType(TableBody.class);
418
419 if (body != null) {
420 QName tbodyQName = asQName("tbody");
421 writeText("\n");
422 writeElementStart(tbodyQName);
423 for (Node childNode : body.getChildren()) {
424 if (childNode instanceof TableRow) {
425 writeTableRow((TableRow) childNode, cellChildHandler);
426 }
427 }
428 writeElementEnd(tbodyQName);
429 }
430
431 writeText("\n");
432 writeElementEnd(qname);
433 writeTrailingNewline(node);
434 }
435
436 private void writeTableRow(
437 @NonNull TableRow node,
438 @NonNull ChildHandler<T, E> cellChildHandler) throws E {
439 writeText("\n");
440 QName qname = asQName("tr");
441 writeElementStart(qname);
442
443 for (Node childNode : node.getChildren()) {
444 if (childNode instanceof TableCell) {
445 writeTableCell((TableCell) childNode, cellChildHandler);
446 }
447 }
448
449 writeElementEnd(qname);
450 if (node.getNext() == null) {
451 writeText("\n");
452 }
453 }
454
455 private void writeTableCell(
456 @NonNull TableCell node,
457 @NonNull ChildHandler<T, E> cellChildHandler) throws E {
458 QName qname = node.isHeader() ? asQName("th") : asQName("td");
459
460 Map<String, String> attributes = new LinkedHashMap<>();
461 if (node.getAlignment() != null) {
462 attributes.put("align", ObjectUtils.requireNonNull(node.getAlignment().toString()));
463 }
464
465 writeElementStart(qname, attributes);
466 visitChildren(node, cellChildHandler);
467 writeElementEnd(qname);
468 }
469
470 @Override
471 public void writeImage(
472 @NonNull Image node) throws E {
473 Map<String, String> attributes = new LinkedHashMap<>();
474 String href = ObjectUtils.requireNonNull(Escaping.percentEncodeUrl(node.getUrl().unescape()));
475 try {
476 attributes.put("src", new URI(href).toASCIIString());
477 } catch (URISyntaxException ex) {
478 throw new IllegalStateException(ex);
479 }
480
481 attributes.put("alt", ObjectUtils.requireNonNull(node.getText().toString()));
482
483 if (!node.getTitle().isBlank()) {
484 attributes.put("title", ObjectUtils.requireNonNull(node.getTitle().toString()));
485 }
486
487 writeEmptyElement("img", attributes);
488 }
489
490 @Override
491 public void writeInsertAnchor(@NonNull InsertAnchorNode node) throws E {
492 Map<String, String> attributes = new LinkedHashMap<>();
493 attributes.put("type", ObjectUtils.requireNonNull(node.getType().toString()));
494 attributes.put("id-ref", ObjectUtils.requireNonNull(node.getIdReference().toString()));
495
496 writeElement("insert", node, attributes, null);
497 }
498
499 @Override
500 public void writeHeading(
501 @NonNull Heading node,
502 @NonNull ChildHandler<T, E> childHandler) throws E {
503 writePrecedingNewline(node);
504 int level = node.getLevel();
505
506 QName qname = asQName(ObjectUtils.notNull(String.format("h%d", level)));
507
508 writeElementStart(qname);
509 if (node.hasChildren()) {
510 visitChildren(node, childHandler);
511 } else {
512
513 writeText("");
514 }
515 writeElementEnd(qname);
516 writeTrailingNewline(node);
517 }
518
519
520
521
522
523
524
525
526
527 @NonNull
528 protected static String collapseWhitespace(@NonNull CharSequence text) {
529 StringBuilder sb = new StringBuilder(text.length());
530 int length = text.length();
531 boolean needsSpace = false;
532 for (int i = 0; i < length; i++) {
533 char ch = text.charAt(i);
534
535 if (ch == '\n' || ch == '\r') {
536 if (sb.length() > 0) {
537
538 needsSpace = true;
539 }
540 } else {
541 if (needsSpace) {
542 sb.append(' ');
543 needsSpace = false;
544 }
545 sb.append(ch);
546 }
547 }
548
549 String result = sb.toString();
550 if (result.matches("^[ ]{1,}[^ ].* $")) {
551
552 result = result.substring(1, result.length() - 1);
553 }
554 return ObjectUtils.notNull(result);
555 }
556
557 @Override
558 public void writeCode(Code node, ChildHandler<T, E> childHandler) throws E {
559 QName qname = asQName("code");
560 writeElementStart(qname);
561 visitChildren(node, (child, writer) -> {
562 if (child instanceof Text || child instanceof TextBase) {
563 String text = collapseWhitespace(ObjectUtils.notNull(child.getChars()));
564 writeText(text);
565 } else {
566 childHandler.accept(child, writer);
567 }
568 });
569 writeElementEnd(qname);
570 }
571
572 @Override
573 public final void writeCodeBlock(
574 IndentedCodeBlock node,
575 ChildHandler<T, E> childHandler) throws E {
576 writePrecedingNewline(node);
577 QName preQName = asQName("pre");
578
579 writeElementStart(preQName);
580
581 QName codeQName = asQName("code");
582
583 writeElementStart(codeQName);
584
585 if (node.hasChildren()) {
586 visitChildren(node, childHandler);
587 } else {
588
589 writeText("");
590 }
591
592 writeElementEnd(codeQName);
593
594 writeElementEnd(preQName);
595 writeTrailingNewline(node);
596 }
597
598 @Override
599 public final void writeCodeBlock(
600 FencedCodeBlock node,
601 ChildHandler<T, E> childHandler) throws E {
602 writePrecedingNewline(node);
603 QName preQName = asQName("pre");
604
605 writeElementStart(preQName);
606
607 QName codeQName = asQName("code");
608 Map<String, String> attributes = new LinkedHashMap<>();
609 if (node.getInfo().isNotNull()) {
610 attributes.put("class", "language-" + node.getInfo().unescape());
611 }
612
613 writeElementStart(codeQName, attributes);
614
615 if (node.hasChildren()) {
616 visitChildren(node, childHandler);
617 } else {
618
619 writeText("");
620 }
621
622 writeElementEnd(codeQName);
623
624 writeElementEnd(preQName);
625 writeTrailingNewline(node);
626 }
627
628 @Override
629 public void writeCodeBlock(CodeBlock node, ChildHandler<T, E> childHandler) throws E {
630 String text;
631 if (node.getParent() instanceof IndentedCodeBlock) {
632 text = node.getContentChars().trimTailBlankLines().toString();
633 } else {
634 text = node.getContentChars().toString();
635 }
636 writeText(ObjectUtils.notNull(text));
637 }
638
639 @Override
640 public void writeBlockQuote(BlockQuote node, ChildHandler<T, E> childHandler) throws E {
641 writePrecedingNewline(node);
642 QName qname = asQName("blockquote");
643 writeElementStart(qname);
644
645
646 if (node.hasChildren()) {
647 visitChildren(node, childHandler);
648 } else {
649
650 writeText("\n");
651 }
652
653
654 writeElementEnd(qname);
655 writeTrailingNewline(node);
656 }
657
658 @Override
659 public void writeList(QName qname, ListBlock node, ChildHandler<T, E> listItemHandler) throws E {
660 Map<String, String> attributes = new LinkedHashMap<>();
661 if (node instanceof OrderedList) {
662 OrderedList ol = (OrderedList) node;
663 int start = ol.getStartNumber();
664 if (start != 1) {
665 attributes.put("start", String.valueOf(start));
666 }
667 }
668
669 writePrecedingNewline(node);
670 writeElementStart(qname, attributes);
671
672 visitChildren(node, (child, writer) -> {
673 ListItem item = (ListItem) child;
674 writeListItem(item, listItemHandler);
675 });
676
677 writeElementEnd(qname);
678 writeTrailingNewline(node);
679 }
680
681 @Override
682 public void writeListItem(ListItem node, ChildHandler<T, E> listItemHandler) throws E {
683 QName qname = asQName("li");
684 writePrecedingNewline(node);
685 writeElementStart(qname);
686
687 if (node.hasChildren()) {
688 visitChildren(node, listItemHandler);
689 } else {
690
691 writeText("");
692 }
693 writeElementEnd(qname);
694 writeTrailingNewline(node);
695 }
696
697 @Override
698 public void writeBreak(HardLineBreak node) throws E {
699 writeElement("br", node, null);
700 writeText("\n");
701 }
702
703 @Override
704 public void writeBreak(ThematicBreak node) throws E {
705 writePrecedingNewline(node);
706 writeElement("hr", node, null);
707 writeTrailingNewline(node);
708 }
709
710 @Override
711 public void writeComment(HtmlCommentBlock node) throws E {
712 writePrecedingNewline(node);
713
714 BasedSequence text = node.getChars();
715 text = text.subSequence(4, text.length() - 4);
716 writeComment(ObjectUtils.notNull(text.unescape()));
717 writeTrailingNewline(node);
718
719 }
720
721 protected abstract void writeComment(@NonNull CharSequence text) throws E;
722
723 protected static class NodeVisitorException
724 extends IllegalStateException {
725
726
727
728 private static final long serialVersionUID = 1L;
729
730 public NodeVisitorException(Throwable cause) {
731 super(cause);
732 }
733 }
734
735 private final class MarkupNodeVisitor implements NodeVisitor {
736 @Override
737 public void head(org.jsoup.nodes.Node node, int depth) {
738 if (depth > 0) {
739 try {
740 if (node instanceof org.jsoup.nodes.Element) {
741 org.jsoup.nodes.Element element = (org.jsoup.nodes.Element) node;
742
743 Attributes attributes = element.attributes();
744
745 Map<String, String> attrMap;
746 if (attributes.isEmpty()) {
747 attrMap = CollectionUtil.emptyMap();
748 } else {
749 attrMap = new LinkedHashMap<>();
750 for (org.jsoup.nodes.Attribute attr : attributes) {
751 attrMap.put(attr.getKey(), attr.getValue());
752 }
753 }
754
755 QName qname = asQName(ObjectUtils.notNull(element.tagName()));
756 if (element.childNodes().isEmpty()) {
757 writeEmptyElement(qname, attrMap);
758 } else {
759 writeElementStart(qname, attrMap);
760 }
761 } else if (node instanceof org.jsoup.nodes.TextNode) {
762 org.jsoup.nodes.TextNode text = (org.jsoup.nodes.TextNode) node;
763 writeText(ObjectUtils.requireNonNull(text.text()));
764 }
765 } catch (Throwable ex) {
766 throw new NodeVisitorException(ex);
767 }
768 }
769 }
770
771 @Override
772 public void tail(org.jsoup.nodes.Node node, int depth) {
773 if (depth > 0 && node instanceof org.jsoup.nodes.Element) {
774 org.jsoup.nodes.Element element = (org.jsoup.nodes.Element) node;
775 if (!element.childNodes().isEmpty()) {
776 QName qname = asQName(ObjectUtils.notNull(element.tagName()));
777 try {
778 writeElementEnd(qname);
779 } catch (Throwable ex) {
780 throw new NodeVisitorException(ex);
781 }
782 }
783 }
784 }
785 }
786 }