From 42f092e960bca3788153465228cfbd4687da5104 Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Mon, 22 Jan 2024 22:10:09 +0100 Subject: [PATCH] Add support for pretty printing XML --- xml/src/main/scala/fs2/data/xml/Attr.scala | 11 ++ .../fs2/data/xml/internals/Renderer.scala | 141 +++++++++++++++++ xml/src/main/scala/fs2/data/xml/package.scala | 37 ++++- .../scala/fs2/data/xml/XmlRenderTest.scala | 142 +++++++++++++++++- 4 files changed, 324 insertions(+), 7 deletions(-) create mode 100644 xml/src/main/scala/fs2/data/xml/internals/Renderer.scala diff --git a/xml/src/main/scala/fs2/data/xml/Attr.scala b/xml/src/main/scala/fs2/data/xml/Attr.scala index 1c151aa7..f1848cd5 100644 --- a/xml/src/main/scala/fs2/data/xml/Attr.scala +++ b/xml/src/main/scala/fs2/data/xml/Attr.scala @@ -18,4 +18,15 @@ package fs2 package data package xml +import cats.Show +import cats.syntax.all._ + case class Attr(name: QName, value: List[XmlEvent.XmlTexty]) + +object Attr { + + implicit val show: Show[Attr] = Show.show { case Attr(name, value) => + show"""$name="${value.foldMap(_.render)}"""" + } + +} diff --git a/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala b/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala new file mode 100644 index 00000000..b6d03a1b --- /dev/null +++ b/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala @@ -0,0 +1,141 @@ +/* + * Copyright 2024 fs2-data Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package xml +package internals + +import cats.syntax.all._ + +private[xml] class Renderer(collapseEmpty: Boolean, resetOnChunk: Boolean, indent: String, attributeThreshold: Int) + extends Collector.Builder[XmlEvent, String] { + + private val builder = new StringBuilder + + private var level = 0 + + private var newline = false + + private var skipClose = false + + private def indentation(): Unit = + if (newline) { + builder.append('\n') + builder.append(indent * level) + } + + override def +=(chunk: Chunk[XmlEvent]): Unit = { + if (resetOnChunk) + builder.setLength(0) + chunk.foreach { + case e @ (XmlEvent.XmlDecl(_, _, _) | XmlEvent.XmlPI(_, _)) => + indentation() + builder ++= e.show + newline = true + + case XmlEvent.Comment(content) => + newline = true + indentation() + builder ++= "" + + case XmlEvent.StartTag(name, attributes, isEmpty) => + indentation() + val renderedName = name.show + builder ++= show"<$renderedName" + + attributes match { + case a :: as => + val exceedThreshold = as.size > attributeThreshold - 1 + builder ++= show" $a" + as.foreach { a => + if (exceedThreshold) { + builder += '\n' + builder ++= " " * (renderedName.length() + 2) + } else { + builder += ' ' + } + builder ++= a.show + } + case Nil => // do nothing + } + + if (isEmpty && collapseEmpty) { + builder ++= " />" + skipClose = true + } else { + builder += '>' + level += 1 + } + newline = true + + case XmlEvent.EndTag(name) => + level -= 1 + newline = true + if (!skipClose) { + indentation() + builder ++= show"" + } + skipClose = false + + case XmlEvent.XmlString(content, true) => + indentation() + builder ++= show"" + newline = true + + case XmlEvent.XmlString(content, false) => + content.linesIterator.foreach { line => + indentation() + if (newline) + builder ++= line.stripLeading() + else + builder ++= line + newline = true + } + newline = content.matches("^.*\n\\s*$") + + case XmlEvent.StartDocument | XmlEvent.EndDocument => + // do nothing + case e => + indentation() + builder ++= e.show + newline = false + } + } + + override def result: String = builder.result() + +} + +private[xml] object Renderer { + + def pipe[F[_]](collapseEmpty: Boolean, indent: String, attributeThreshold: Int): Pipe[F, XmlEvent, String] = + in => + Stream.suspend(Stream.emit(new Renderer(collapseEmpty, true, indent, attributeThreshold))).flatMap { builder => + in.mapChunks { chunk => + builder += chunk + Chunk.singleton(builder.result) + } + + } + +} diff --git a/xml/src/main/scala/fs2/data/xml/package.scala b/xml/src/main/scala/fs2/data/xml/package.scala index 21920d49..6a71d675 100644 --- a/xml/src/main/scala/fs2/data/xml/package.scala +++ b/xml/src/main/scala/fs2/data/xml/package.scala @@ -80,12 +80,39 @@ package object xml { * without additional (or original) whitespace and with empty tags being collapsed to the short self-closed form * if collapseEmpty is true. Preserves chunking, each String in the output will correspond to one event in the input. */ + @deprecated(message = "Use `fs2.data.xml.render.raw() instead.`", since = "fs2-data 1.11.0") def render[F[_]](collapseEmpty: Boolean = true): Pipe[F, XmlEvent, String] = - _.zipWithPrevious.map { - case (_, st: XmlEvent.StartTag) => st.render(collapseEmpty) - case (Some(XmlEvent.StartTag(_, _, true)), XmlEvent.EndTag(_)) if collapseEmpty => "" - case (_, event) => event.show - } + render.raw(collapseEmpty) + + object render { + + /** + * Render the incoming xml events to their string representation. The output will be concise, + * without additional (or original) whitespace and with empty tags being collapsed to the short self-closed form + * if collapseEmpty is true. Preserves chunking, each String in the output will correspond to one event in the input. + */ + def raw[F[_]](collapseEmpty: Boolean = true): Pipe[F, XmlEvent, String] = + _.zipWithPrevious.map { + case (_, st: XmlEvent.StartTag) => st.render(collapseEmpty) + case (Some(XmlEvent.StartTag(_, _, true)), XmlEvent.EndTag(_)) if collapseEmpty => "" + case (_, event) => event.show + } + + /** + * Render the incoming xml events intot a prettified string representation. + * _Prettified_ means that nested tags will be indented as per `indent` parameter + * and text data (except for `CDATA`, which remains untouched) is indented to the current + * indentation level after each new line. + * + * This pipe can be used when whitespace characters are not relevant to the application + * and to make it more readable to human beings. + */ + def pretty[F[_]](collapseEmpty: Boolean = true, + indent: String = " ", + attributeThreshold: Int = 3): Pipe[F, XmlEvent, String] = + Renderer.pipe(collapseEmpty, indent, attributeThreshold) + + } val ncNameStart = CharRanges.fromRanges( ('A', 'Z'), diff --git a/xml/src/test/scala/fs2/data/xml/XmlRenderTest.scala b/xml/src/test/scala/fs2/data/xml/XmlRenderTest.scala index b6ac15c9..99f7e052 100644 --- a/xml/src/test/scala/fs2/data/xml/XmlRenderTest.scala +++ b/xml/src/test/scala/fs2/data/xml/XmlRenderTest.scala @@ -24,18 +24,156 @@ object XmlRenderTest extends SimpleIOSuite { test("renders xml with self-closing tags") { val result = - xml"""""".through(render()).compile.string + xml"""""".through(render.raw()).compile.string result.liftTo[IO].map { result => expect.eql("""""", result) } } + test("renders xml with self-closing tags prettily") { + val result = + xml"""""".through(render.pretty()).compile.string + result.liftTo[IO].map { result => + expect.eql( + """ + | + | + |""".stripMargin, + result + ) + } + } + test("renders xml without self-closing tags if disabled") { val result = - xml"""""".through(render(false)).compile.string + xml"""""".through(render.raw(false)).compile.string result.liftTo[IO].map { result => expect.eql("""""", result) } } + test("renders xml without self-closing tags prettily") { + val result = + xml"""""" + .through(render.pretty(false)) + .compile + .string + result.liftTo[IO].map { result => + expect.eql( + """ + | + | + | + |""".stripMargin, + result + ) + } + } + + test("renders xml with attributes prettily if below threshold") { + val result = + xml"""""" + .through(render.pretty()) + .compile + .string + result.liftTo[IO].map { result => + expect.eql( + """ + | + | + |""".stripMargin, + result + ) + } + } + + test("renders xml with attributes prettily if above threshold") { + val result = + xml"""""" + .through(render.pretty()) + .compile + .string + result.liftTo[IO].map { result => + expect.eql( + """ + | + | + |""".stripMargin, + result + ) + } + } + + test("renders text prettily") { + val result = + xml"""This is a test. +The text is not originally formatted.""".through(render.pretty()).compile.string + result.liftTo[IO].map { result => + expect.eql( + """ + | + | This is a test. + | The text is not originally formatted. + |""".stripMargin, + result + ) + } + } + + test("renders text with entities prettily") { + val result = + xml"""This is a test. +The text is not originally formatted but contains & and +´ as entities.""".through(render.pretty()).compile.string + result.liftTo[IO].map { result => + expect.eql( + """ + | + | This is a test. + | The text is not originally formatted but contains & and + | ´ as entities. + |""".stripMargin, + result + ) + } + } + + test("renders CDATA as-is") { + val result = + xml"""""".through(render.pretty()).compile.string + result.liftTo[IO].map { result => + expect.eql( + """ + | + | + |""".stripMargin, + result + ) + } + } + + test("renders comments prettily") { + val result = + rawxml"""""" + .through(render.pretty()) + .compile + .string + result.liftTo[IO].map { result => + expect.eql( + """ + | + | + |""".stripMargin, + result + ) + } + } + }