1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package gov.nist.secauto.metaschema.core.datatype.markup.flexmark;
28
29 import com.vladsch.flexmark.util.sequence.Escaping;
30
31 import gov.nist.secauto.metaschema.core.datatype.markup.MarkupLine;
32 import gov.nist.secauto.metaschema.core.datatype.markup.MarkupMultiline;
33 import gov.nist.secauto.metaschema.core.model.util.XmlEventUtil;
34 import gov.nist.secauto.metaschema.core.util.CollectionUtil;
35 import gov.nist.secauto.metaschema.core.util.ObjectUtils;
36
37 import org.apache.logging.log4j.LogManager;
38 import org.apache.logging.log4j.Logger;
39 import org.codehaus.stax2.XMLEventReader2;
40
41 import java.util.Set;
42
43 import javax.xml.namespace.QName;
44 import javax.xml.stream.XMLStreamConstants;
45 import javax.xml.stream.XMLStreamException;
46 import javax.xml.stream.events.Attribute;
47 import javax.xml.stream.events.Characters;
48 import javax.xml.stream.events.StartElement;
49 import javax.xml.stream.events.XMLEvent;
50
51 import edu.umd.cs.findbugs.annotations.NonNull;
52
53 public class XmlMarkupParser {
54 private static final Logger LOGGER = LogManager.getLogger(XmlMarkupParser.class);
55
56 @NonNull
57 public static final Set<String> BLOCK_ELEMENTS = ObjectUtils.notNull(
58 Set.of(
59 "h1",
60 "h2",
61 "h3",
62 "h4",
63 "h5",
64 "h6",
65 "ul",
66 "ol",
67 "pre",
68 "hr",
69 "blockquote",
70 "p",
71 "table",
72 "img"));
73
74 @NonNull
75 private static final XmlMarkupParser SINGLETON = new XmlMarkupParser();
76
77 @NonNull
78 public static XmlMarkupParser instance() {
79 return SINGLETON;
80 }
81
82 public MarkupLine parseMarkupline(XMLEventReader2 reader) throws XMLStreamException {
83 StringBuilder buffer = new StringBuilder();
84 parseContents(reader, null, buffer);
85 String html = buffer.toString().trim();
86 return html.isEmpty() ? null : MarkupLine.fromHtml(html);
87 }
88
89 public MarkupMultiline parseMarkupMultiline(XMLEventReader2 reader) throws XMLStreamException {
90 StringBuilder buffer = new StringBuilder();
91 parseToString(reader, buffer);
92 String html = buffer.toString().trim();
93
94 if (LOGGER.isDebugEnabled()) {
95 LOGGER.debug("XML->HTML: {}", html);
96 }
97 return html.isEmpty() ? null : MarkupMultiline.fromHtml(html);
98 }
99
100 protected void parseToString(XMLEventReader2 reader, StringBuilder buffer)
101 throws XMLStreamException {
102
103
104
105
106
107 outer: while (reader.hasNextEvent() && !reader.peek().isEndElement()) {
108
109 XMLEvent nextEvent = XmlEventUtil.skipWhitespace(reader);
110
111
112
113
114
115 if (nextEvent.isStartElement()) {
116 StartElement start = nextEvent.asStartElement();
117 QName name = start.getName();
118
119
120
121 if (BLOCK_ELEMENTS.contains(name.getLocalPart())) {
122 parseStartElement(reader, start, buffer);
123
124
125
126
127 } else {
128
129
130 break outer;
131 }
132 }
133
134
135
136 XmlEventUtil.skipWhitespace(reader);
137 }
138
139
140
141
142
143 }
144
145 private void parseStartElement(XMLEventReader2 reader, StartElement start, StringBuilder buffer)
146 throws XMLStreamException {
147 if (LOGGER.isDebugEnabled()) {
148 LOGGER.debug("parseStartElement(enter): {}", XmlEventUtil.toString(start));
149 }
150
151
152 reader.nextEvent();
153
154 QName name = start.getName();
155 buffer.append('<')
156 .append(name.getLocalPart());
157 for (Attribute attribute : CollectionUtil.toIterable(
158 ObjectUtils.notNull(start.getAttributes()))) {
159 buffer
160 .append(' ')
161 .append(attribute.getName().getLocalPart())
162 .append("=\"")
163 .append(attribute.getValue())
164 .append('"');
165 }
166
167 XMLEvent next = reader.peek();
168 if (next != null && next.isEndElement()) {
169 buffer.append("/>");
170
171 reader.nextEvent();
172 } else {
173 buffer.append('>');
174
175
176 parseContents(reader, start, buffer);
177
178 buffer
179 .append("</")
180 .append(name.getLocalPart())
181 .append('>');
182
183
184 XmlEventUtil.assertNext(reader, XMLStreamConstants.END_ELEMENT, name);
185
186
187 reader.nextEvent();
188 }
189
190 if (LOGGER.isDebugEnabled()) {
191 LOGGER.debug("parseStartElement(exit): {}", reader.peek() != null ? XmlEventUtil.toString(reader.peek()) : "");
192 }
193 }
194
195 private void parseContents(XMLEventReader2 reader, StartElement start, StringBuilder buffer)
196 throws XMLStreamException {
197
198
199
200
201
202 XMLEvent event;
203 while (reader.hasNextEvent() && !(event = reader.peek()).isEndElement()) {
204
205
206
207
208
209
210
211 if (event.isStartElement()) {
212 StartElement nextStart = event.asStartElement();
213
214 parseStartElement(reader, nextStart, buffer);
215
216
217
218
219
220
221
222
223
224
225 } else if (event.isCharacters()) {
226 Characters characters = event.asCharacters();
227 buffer.append(Escaping.escapeHtml(characters.getData(), true));
228 reader.nextEvent();
229 }
230 }
231
232 assert start == null
233 || XmlEventUtil.isEventEndElement(reader.peek(), ObjectUtils.notNull(start.getName())) : XmlEventUtil
234 .generateExpectedMessage(reader.peek(), XMLStreamConstants.END_ELEMENT, start.getName());
235
236
237
238
239
240 }
241
242 }