001/* 002 * Portions of this software was developed by employees of the National Institute 003 * of Standards and Technology (NIST), an agency of the Federal Government and is 004 * being made available as a public service. Pursuant to title 17 United States 005 * Code Section 105, works of NIST employees are not subject to copyright 006 * protection in the United States. This software may be subject to foreign 007 * copyright. Permission in the United States and in foreign countries, to the 008 * extent that NIST may hold copyright, to use, copy, modify, create derivative 009 * works, and distribute this software and its documentation without fee is hereby 010 * granted on a non-exclusive basis, provided that this notice and disclaimer 011 * of warranty appears in all copies. 012 * 013 * THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND, EITHER 014 * EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTY 015 * THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY IMPLIED WARRANTIES OF 016 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND FREEDOM FROM 017 * INFRINGEMENT, AND ANY WARRANTY THAT THE DOCUMENTATION WILL CONFORM TO THE 018 * SOFTWARE, OR ANY WARRANTY THAT THE SOFTWARE WILL BE ERROR FREE. IN NO EVENT 019 * SHALL NIST BE LIABLE FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT, 020 * INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM, 021 * OR IN ANY WAY CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY, 022 * CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY PERSONS OR 023 * PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS SUSTAINED FROM, OR AROSE OUT 024 * OF THE RESULTS OF, OR USE OF, THE SOFTWARE OR SERVICES PROVIDED HEREUNDER. 025 */ 026 027package gov.nist.secauto.metaschema.core.datatype.markup.flexmark; 028 029import com.vladsch.flexmark.util.sequence.Escaping; 030 031import gov.nist.secauto.metaschema.core.datatype.markup.MarkupLine; 032import gov.nist.secauto.metaschema.core.datatype.markup.MarkupMultiline; 033import gov.nist.secauto.metaschema.core.model.util.XmlEventUtil; 034import gov.nist.secauto.metaschema.core.util.CollectionUtil; 035import gov.nist.secauto.metaschema.core.util.ObjectUtils; 036 037import org.apache.logging.log4j.LogManager; 038import org.apache.logging.log4j.Logger; 039import org.codehaus.stax2.XMLEventReader2; 040 041import java.util.Set; 042 043import javax.xml.namespace.QName; 044import javax.xml.stream.XMLStreamConstants; 045import javax.xml.stream.XMLStreamException; 046import javax.xml.stream.events.Attribute; 047import javax.xml.stream.events.Characters; 048import javax.xml.stream.events.StartElement; 049import javax.xml.stream.events.XMLEvent; 050 051import edu.umd.cs.findbugs.annotations.NonNull; 052 053public class XmlMarkupParser { // NOPMD - acceptable 054 private static final Logger LOGGER = LogManager.getLogger(XmlMarkupParser.class); 055 056 @NonNull 057 public static final Set<String> BLOCK_ELEMENTS = ObjectUtils.notNull( 058 Set.of( 059 "h1", 060 "h2", 061 "h3", 062 "h4", 063 "h5", 064 "h6", 065 "ul", 066 "ol", 067 "pre", 068 "hr", 069 "blockquote", 070 "p", 071 "table", 072 "img")); 073 074 @NonNull 075 private static final XmlMarkupParser SINGLETON = new XmlMarkupParser(); 076 077 @NonNull 078 public static XmlMarkupParser instance() { 079 return SINGLETON; 080 } 081 082 public MarkupLine parseMarkupline(XMLEventReader2 reader) throws XMLStreamException { // NOPMD - acceptable 083 StringBuilder buffer = new StringBuilder(); 084 parseContents(reader, null, buffer); 085 String html = buffer.toString().trim(); 086 return html.isEmpty() ? null : MarkupLine.fromHtml(html); 087 } 088 089 public MarkupMultiline parseMarkupMultiline(XMLEventReader2 reader) throws XMLStreamException { 090 StringBuilder buffer = new StringBuilder(); 091 parseToString(reader, buffer); 092 String html = buffer.toString().trim(); 093 094 if (LOGGER.isDebugEnabled()) { 095 LOGGER.debug("XML->HTML: {}", html); 096 } 097 return html.isEmpty() ? null : MarkupMultiline.fromHtml(html); 098 } 099 100 protected void parseToString(XMLEventReader2 reader, StringBuilder buffer) // NOPMD - acceptable 101 throws XMLStreamException { 102 // if (LOGGER.isDebugEnabled()) { 103 // LOGGER.debug("parseToString(enter): {}", 104 // XmlEventUtil.toString(reader.peek())); 105 // } 106 107 outer: while (reader.hasNextEvent() && !reader.peek().isEndElement()) { 108 // skip whitespace before the next block element 109 XMLEvent nextEvent = XmlEventUtil.skipWhitespace(reader); 110 111 // if (LOGGER.isDebugEnabled()) { 112 // LOGGER.debug("parseToString: {}", XmlEventUtil.toString(nextEvent)); 113 // } 114 115 if (nextEvent.isStartElement()) { 116 StartElement start = nextEvent.asStartElement(); 117 QName name = start.getName(); 118 119 // Note: the next element is not consumed. The called method is expected to 120 // consume it 121 if (BLOCK_ELEMENTS.contains(name.getLocalPart())) { 122 parseStartElement(reader, start, buffer); 123 124 // the next event should be the event after the start's END_ELEMENT 125 // assert XmlEventUtil.isNextEventEndElement(reader, name) : 126 // XmlEventUtil.toString(reader.peek()); 127 } else { 128 // throw new IllegalStateException(); 129 // stop parsing on first unrecognized event 130 break outer; 131 } 132 } 133 // reader.nextEvent(); 134 135 // skip whitespace before the next block element 136 XmlEventUtil.skipWhitespace(reader); 137 } 138 139 // if (LOGGER.isDebugEnabled()) { 140 // LOGGER.debug("parseToString(exit): {}", reader.peek() != null ? 141 // XmlEventUtil.toString(reader.peek()) : ""); 142 // } 143 } 144 145 private void parseStartElement(XMLEventReader2 reader, StartElement start, StringBuilder buffer) 146 throws XMLStreamException { 147 if (LOGGER.isDebugEnabled()) { 148 LOGGER.debug("parseStartElement(enter): {}", XmlEventUtil.toString(start)); 149 } 150 151 // consume the start event 152 reader.nextEvent(); 153 154 QName name = start.getName(); 155 buffer.append('<') 156 .append(name.getLocalPart()); 157 for (Attribute attribute : CollectionUtil.toIterable( 158 ObjectUtils.notNull(start.getAttributes()))) { 159 buffer 160 .append(' ') 161 .append(attribute.getName().getLocalPart()) 162 .append("=\"") 163 .append(attribute.getValue()) 164 .append('"'); 165 } 166 167 XMLEvent next = reader.peek(); 168 if (next != null && next.isEndElement()) { 169 buffer.append("/>"); 170 // consume end element event 171 reader.nextEvent(); 172 } else { 173 buffer.append('>'); 174 175 // parse until the start's END_ELEMENT is reached 176 parseContents(reader, start, buffer); 177 178 buffer 179 .append("</") 180 .append(name.getLocalPart()) 181 .append('>'); 182 183 // the next event should be the start's END_ELEMENT 184 XmlEventUtil.assertNext(reader, XMLStreamConstants.END_ELEMENT, name); 185 186 // consume the start's END_ELEMENT 187 reader.nextEvent(); 188 } 189 190 if (LOGGER.isDebugEnabled()) { 191 LOGGER.debug("parseStartElement(exit): {}", reader.peek() != null ? XmlEventUtil.toString(reader.peek()) : ""); 192 } 193 } 194 195 private void parseContents(XMLEventReader2 reader, StartElement start, StringBuilder buffer) 196 throws XMLStreamException { 197 // if (LOGGER.isDebugEnabled()) { 198 // LOGGER.debug("parseContents(enter): {}", 199 // XmlEventUtil.toString(reader.peek())); 200 // } 201 202 XMLEvent event; 203 while (reader.hasNextEvent() && !(event = reader.peek()).isEndElement()) { 204 // // skip whitespace before the next list item 205 // event = XmlEventUtil.skipWhitespace(reader); 206 207 // if (LOGGER.isDebugEnabled()) { 208 // LOGGER.debug("parseContents(before): {}", XmlEventUtil.toString(event)); 209 // } 210 211 if (event.isStartElement()) { 212 StartElement nextStart = event.asStartElement(); 213 // QName nextName = nextStart.getName(); 214 parseStartElement(reader, nextStart, buffer); 215 216 // if (LOGGER.isDebugEnabled()) { 217 // LOGGER.debug("parseContents(after): {}", 218 // XmlEventUtil.toString(reader.peek())); 219 // } 220 221 // assert XmlEventUtil.isNextEventEndElement(reader, nextName) : 222 // XmlEventUtil.toString(reader.peek()); 223 224 // reader.nextEvent(); 225 } else if (event.isCharacters()) { 226 Characters characters = event.asCharacters(); 227 buffer.append(Escaping.escapeHtml(characters.getData(), true)); 228 reader.nextEvent(); 229 } 230 } 231 232 assert start == null 233 || XmlEventUtil.isEventEndElement(reader.peek(), ObjectUtils.notNull(start.getName())) : XmlEventUtil 234 .generateExpectedMessage(reader.peek(), XMLStreamConstants.END_ELEMENT, start.getName()); 235 236 // if (LOGGER.isDebugEnabled()) { 237 // LOGGER.debug("parseContents(exit): {}", reader.peek() != null ? 238 // XmlEventUtil.toString(reader.peek()) : ""); 239 // } 240 } 241 242}