diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c6166b..d5a0780 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# v1.1.3 +## 02/10/2015 + +3. [](#bugfix) + * Fixed self-closing tags in HTML5 and ensured to return contents compliant to HTML(5) + # v1.1.2 ## 02/10/2015 diff --git a/blueprints.yaml b/blueprints.yaml index 3140c73..d8e3049 100644 --- a/blueprints.yaml +++ b/blueprints.yaml @@ -1,5 +1,5 @@ name: External Links -version: 1.1.2 +version: 1.1.3 description: "This plugin adds small icons to external and mailto links, informing users the link will take them to a new site or open their email client." icon: external-link author: diff --git a/external_links.php b/external_links.php index 887010d..8b26bcf 100644 --- a/external_links.php +++ b/external_links.php @@ -1,6 +1,6 @@ * @author Benjamin Regler * @copyright 2015, Benjamin Regler @@ -87,6 +87,19 @@ public function onPageContentProcessed(Event $event) { if ( $config->get('process', TRUE) ) { $content = $page->getRawContent(); + /** + * Two Really good resources to handle DOMDocument with HTML(5) + * correctly. + * + * @see http://stackoverflow.com/questions/3577641/how-do-you-parse-and-process-html-xml-in-php + * @see http://stackoverflow.com/questions/7997936/how-do-you-format-dom-structures-in-php + */ + + // Clear previous errors + if ( libxml_use_internal_errors(TRUE) === TRUE ) { + libxml_clear_errors(); + } + // Create a DOM parser object $dom = new \DOMDocument('1.0', 'UTF-8'); @@ -94,6 +107,9 @@ public function onPageContentProcessed(Event $event) { $dom->preserveWhiteSpace = FALSE; $dom->formatOutput = TRUE; + // Normalize newlines + $content = preg_replace('~\R~u', "\n", $content); + // Parse the HTML using UTF-8 // The @ before the method call suppresses any warnings that // loadHTML might throw because of invalid HTML in the page. @@ -183,15 +199,28 @@ public function onPageContentProcessed(Event $event) { } $content = ''; - // Process HTML from DOM document + // Transform DOM document to valid HTML(5) $body = $dom->getElementsByTagName('body')->item(0); foreach ( $body->childNodes as $node ) { - $content .= $dom->saveXML($node); + // Expand empty tags (e.g.
to

) + if ( ($html = $dom->saveXML($node, LIBXML_NOEMPTYTAG)) !== FALSE ) { + $content .= $html; + } } - // Fix formatting for self-closing tags in HTML5 - $content = preg_replace('~<(area|base(?:font)?|br|col|command|embed|frame|hr|img|input|keygen|link|meta|param|source|track|wbr)(.*?)\s*/>~i', '<$1$2 />', $content); + // Fix formatting for self-closing tags in HTML5 and removing + // encapsulated (uncommented) CDATA blocks in