diff --git a/site/documentation/xml/index.md b/site/documentation/xml/index.md
index 4b1b48e48..d646234fc 100644
--- a/site/documentation/xml/index.md
+++ b/site/documentation/xml/index.md
@@ -8,7 +8,7 @@ The `fs2-data-xml` module provides tools to parse XML data in a streaming manner
To create a stream of XML events from an input stream, use the `events` pipe in `fs2.data.xml` package.
-```scala mdoc
+```scala mdoc:height=500
import cats.effect._
import cats.effect.unsafe.implicits.global
@@ -33,14 +33,14 @@ The pipe validates the XML structure while parsing. It reads all the XML element
Namespace can be resolved by using the `namespaceResolver` pipe.
-```scala mdoc
+```scala mdoc:height=500
val nsResolved = stream.through(namespaceResolver[IO])
nsResolved.compile.toList.unsafeRunSync()
```
Using the `referenceResolver` pipe, entity and character references can be resolved. By defaut the standard `xmlEntities` mapping is used, but it can be replaced by any mapping you see fit.
-```scala mdoc
+```scala mdoc:height=500
val entityResolved = stream.through(referenceResolver[IO]())
entityResolved.compile.toList.unsafeRunSync()
```
@@ -49,7 +49,7 @@ entityResolved.compile.toList.unsafeRunSync()
Once entites and namespaces are resolved, the events might be numerous and can be normalized to avoid emitting too many of them. For instance, after reference resolution, consecutive text events can be merged. This is achieved by using the `normalize` pipe.
-```scala mdoc
+```scala mdoc:height=500
val normalized = entityResolved.through(normalize)
normalized.compile.toList.unsafeRunSync()
```
@@ -82,3 +82,30 @@ implicit val eventifier: DocumentEventifier[SomeDocType] = ???
stream.through(documents[IO, SomeDocType])
.through(eventify[IO, SomeDocType])
```
+
+## XML Renderers
+
+Once you got an XML event stream, selected and transformed what you needed in it, you can then write the resulting event stream to some storage. This can be achieved using renderers.
+
+For instance, let's say you want to write the resulting XML stream to a file in raw form (i.e. without trying to format the nested tags and text), you can do:
+
+```scala mdoc:compile-only
+import fs2.io.file.{Files, Flags, Path}
+
+stream
+ .through(render.raw())
+ .through(text.utf8.encode)
+ .through(Files[IO].writeAll(Path("/some/path/to/file.xml"), Flags.Write))
+ .compile
+ .drain
+```
+
+There exists also a `pretty()` renderer, that indents inner tags and text by the given indent string.
+
+If you are interested in the String rendering as a value, the library also provides `Collector`s:
+
+```scala mdoc
+stream.compile.to(collector.raw()).unsafeRunSync()
+
+stream.compile.to(collector.pretty()).unsafeRunSync()
+```
diff --git a/site/documentation/xml/xpath.md b/site/documentation/xml/xpath.md
index 77c7b0f42..1240b84c2 100644
--- a/site/documentation/xml/xpath.md
+++ b/site/documentation/xml/xpath.md
@@ -84,14 +84,13 @@ The `filter.raw` emits a stream of all matches.
Each match is represented as a nested stream of XML events which must be consumed.
```scala mdoc
-import cats.Show
import cats.effect._
import cats.effect.unsafe.implicits.global
stream
.lift[IO]
.through(filter.raw(path))
- .parEvalMapUnbounded(_.map(Show[XmlEvent].show(_)).compile.foldMonoid)
+ .parEvalMapUnbounded(_.through(render.raw()).compile.foldMonoid)
.compile
.toList
.unsafeRunSync()
@@ -105,7 +104,7 @@ The library offers `filter.collect` to collect each match for any collector.
```scala mdoc
stream
.lift[IO]
- .through(filter.collect(path, collector.show))
+ .through(filter.collect(path, collector.raw()))
.compile
.toList
.unsafeRunSync()
@@ -116,7 +115,7 @@ If you want to have results emitted as early as possible instead of in order, yo
```scala mdoc
stream
.lift[IO]
- .through(filter.collect(path, collector.show, deterministic = false))
+ .through(filter.collect(path, collector.raw(), deterministic = false))
.compile
.toList
.unsafeRunSync()
diff --git a/xml/src/main/scala/fs2/data/xml/Attr.scala b/xml/src/main/scala/fs2/data/xml/Attr.scala
index 1c151aa73..9e606fd95 100644
--- a/xml/src/main/scala/fs2/data/xml/Attr.scala
+++ b/xml/src/main/scala/fs2/data/xml/Attr.scala
@@ -18,4 +18,17 @@ package fs2
package data
package xml
+import cats.Show
+import cats.syntax.all._
+
+import scala.runtime.AbstractFunction2
+
case class Attr(name: QName, value: List[XmlEvent.XmlTexty])
+
+object Attr extends AbstractFunction2[QName, List[XmlEvent.XmlTexty], Attr] {
+
+ implicit val show: Show[Attr] = Show.show { case Attr(name, value) =>
+ show"""$name="${value.foldMap[String](_.render)}""""
+ }
+
+}
diff --git a/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala b/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala
new file mode 100644
index 000000000..5d027ddbf
--- /dev/null
+++ b/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2024 fs2-data Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package fs2
+package data
+package xml
+package internals
+
+import cats.syntax.all._
+
+private[xml] class Renderer(pretty: Boolean,
+ collapseEmpty: Boolean,
+ resetOnChunk: Boolean,
+ indent: String,
+ attributeThreshold: Int)
+ extends Collector.Builder[XmlEvent, String] {
+
+ private val builder = new StringBuilder
+
+ private var level = 0
+
+ private var newline = false
+
+ private var skipClose = false
+
+ private def indentation(): Unit =
+ if (pretty && newline) {
+ builder.append('\n')
+ builder.append(indent * level)
+ }
+
+ override def +=(chunk: Chunk[XmlEvent]): Unit = {
+ if (resetOnChunk)
+ builder.setLength(0)
+ chunk.foreach {
+ case e @ (XmlEvent.XmlDecl(_, _, _) | XmlEvent.XmlPI(_, _)) =>
+ indentation()
+ builder ++= e.show
+ newline = true
+
+ case XmlEvent.Comment(content) =>
+ newline = true
+ indentation()
+ builder ++= ""
+
+ case XmlEvent.StartTag(name, attributes, isEmpty) =>
+ indentation()
+ val renderedName = name.show
+ builder ++= show"<$renderedName"
+
+ attributes match {
+ case a :: as =>
+ val exceedThreshold = as.size > attributeThreshold - 1
+ builder ++= show" $a"
+ as.foreach { a =>
+ if (exceedThreshold) {
+ builder += '\n'
+ builder ++= " " * (renderedName.length() + 2)
+ } else {
+ builder += ' '
+ }
+ builder ++= a.show
+ }
+ case Nil => // do nothing
+ }
+
+ if (isEmpty && collapseEmpty) {
+ if (pretty)
+ builder ++= " />"
+ else
+ builder ++= "/>"
+ skipClose = true
+ } else {
+ builder += '>'
+ level += 1
+ }
+ newline = true
+
+ case XmlEvent.EndTag(name) =>
+ newline = true
+ if (!skipClose) {
+ level -= 1
+ indentation()
+ builder ++= show"$name>"
+ }
+ skipClose = false
+
+ case XmlEvent.XmlString(content, true) =>
+ indentation()
+ builder ++= show""
+ newline = true
+
+ case XmlEvent.XmlString(content, false) if pretty =>
+ content.linesIterator.foreach { line =>
+ if (line.matches("\\s*")) {
+ // empty line, ignore it
+ } else {
+ indentation()
+ if (newline)
+ builder ++= line.stripLeading()
+ else
+ builder ++= line
+ newline = true
+ }
+ }
+ newline = content.matches("^.*\n\\s*$")
+
+ case XmlEvent.StartDocument | XmlEvent.EndDocument =>
+ // do nothing
+ case e =>
+ indentation()
+ builder ++= e.show
+ newline = false
+ }
+ }
+
+ override def result: String = builder.result()
+
+}
+
+private[xml] object Renderer {
+
+ def pipe[F[_]](pretty: Boolean,
+ collapseEmpty: Boolean,
+ indent: String,
+ attributeThreshold: Int): Pipe[F, XmlEvent, String] =
+ in =>
+ Stream.suspend(Stream.emit(new Renderer(pretty, collapseEmpty, true, indent, attributeThreshold))).flatMap {
+ builder =>
+ in.mapChunks { chunk =>
+ builder += chunk
+ Chunk.singleton(builder.result)
+ }
+
+ }
+
+}
diff --git a/xml/src/main/scala/fs2/data/xml/package.scala b/xml/src/main/scala/fs2/data/xml/package.scala
index 21920d497..5035c3ab6 100644
--- a/xml/src/main/scala/fs2/data/xml/package.scala
+++ b/xml/src/main/scala/fs2/data/xml/package.scala
@@ -78,14 +78,42 @@ package object xml {
/**
* Render the incoming xml events to their string representation. The output will be concise,
* without additional (or original) whitespace and with empty tags being collapsed to the short self-closed form
- * if collapseEmpty is true. Preserves chunking, each String in the output will correspond to one event in the input.
+ * if collapseEmpty is true.
*/
+ @deprecated(message = "Use `fs2.data.xml.render.raw() instead.`", since = "fs2-data 1.11.0")
def render[F[_]](collapseEmpty: Boolean = true): Pipe[F, XmlEvent, String] =
- _.zipWithPrevious.map {
- case (_, st: XmlEvent.StartTag) => st.render(collapseEmpty)
- case (Some(XmlEvent.StartTag(_, _, true)), XmlEvent.EndTag(_)) if collapseEmpty => ""
- case (_, event) => event.show
- }
+ render.raw(collapseEmpty)
+
+ /** XML Event stream pipes to render XML values. */
+ object render {
+
+ /**
+ * Render the incoming xml events to their string representation. The output will be concise,
+ * without additional (or original) whitespace and with empty tags being collapsed to the short self-closed form
+ * if collapseEmpty is true.
+ */
+ def raw[F[_]](collapseEmpty: Boolean = true): Pipe[F, XmlEvent, String] =
+ Renderer.pipe(false, collapseEmpty, "", 0)
+
+ /**
+ * Render the incoming xml events intot a prettified string representation.
+ * _Prettified_ means that nested tags will be indented as per `indent` parameter
+ * and text data (except for `CDATA`, which remains untouched) is indented to the current
+ * indentation level after each new line.
+ *
+ * This pipe can be used when whitespace characters are not relevant to the application
+ * and to make it more readable to human beings.
+ *
+ * @param collapseEmpty Whether empty tags are collapsed in a single self closing tag
+ * @param indent THe indentation string
+ * @param attributeThreshold Number of attributes above which each attribute is rendered on a new line
+ */
+ def pretty[F[_]](collapseEmpty: Boolean = true,
+ indent: String = " ",
+ attributeThreshold: Int = 3): Pipe[F, XmlEvent, String] =
+ Renderer.pipe(true, collapseEmpty, indent, attributeThreshold)
+
+ }
val ncNameStart = CharRanges.fromRanges(
('A', 'Z'),
@@ -121,6 +149,7 @@ package object xml {
object collector {
/** Renders all events using the `Show` instance and build the result string. */
+ @deprecated(message = "Use `fs2.data.xml.collector.raw(false)` instead", since = "fs2-data 1.11.0")
object show extends Collector[XmlEvent] {
type Out = String
def newBuilder: Collector.Builder[XmlEvent, Out] =
@@ -137,6 +166,31 @@ package object xml {
}
}
+ /** Renders all events without extra formatting. */
+ def raw(collapseEmpty: Boolean = true): Collector.Aux[XmlEvent, String] =
+ new Collector[XmlEvent] {
+ type Out = String
+ def newBuilder: Collector.Builder[XmlEvent, Out] =
+ new Renderer(false, collapseEmpty, false, "", 0)
+ }
+
+ /** Renders all events with trying to make it more readable.
+ * This collector should only be used if white spaces is not relevant to the application
+ * and results in more human readable XML.
+ *
+ * @param collapseEmpty Whether empty tags are collapsed in a single self closing tag
+ * @param indent THe indentation string
+ * @param attributeThreshold Number of attributes above which each attribute is rendered on a new line
+ */
+ def pretty(collapseEmpty: Boolean = true,
+ indent: String = " ",
+ attributeThreshold: Int = 3): Collector.Aux[XmlEvent, String] =
+ new Collector[XmlEvent] {
+ type Out = String
+ def newBuilder: Collector.Builder[XmlEvent, Out] =
+ new Renderer(true, collapseEmpty, false, indent, attributeThreshold)
+ }
+
}
implicit class XmlInterpolators(val sc: StringContext) extends AnyVal {
diff --git a/xml/src/test/scala/fs2/data/xml/XmlRenderTest.scala b/xml/src/test/scala/fs2/data/xml/XmlRenderTest.scala
index b6ac15c9b..99f7e0525 100644
--- a/xml/src/test/scala/fs2/data/xml/XmlRenderTest.scala
+++ b/xml/src/test/scala/fs2/data/xml/XmlRenderTest.scala
@@ -24,18 +24,156 @@ object XmlRenderTest extends SimpleIOSuite {
test("renders xml with self-closing tags") {
val result =
- xml"""""".through(render()).compile.string
+ xml"""""".through(render.raw()).compile.string
result.liftTo[IO].map { result =>
expect.eql("""""", result)
}
}
+ test("renders xml with self-closing tags prettily") {
+ val result =
+ xml"""""".through(render.pretty()).compile.string
+ result.liftTo[IO].map { result =>
+ expect.eql(
+ """
+ |
+ |
+ |""".stripMargin,
+ result
+ )
+ }
+ }
+
test("renders xml without self-closing tags if disabled") {
val result =
- xml"""""".through(render(false)).compile.string
+ xml"""""".through(render.raw(false)).compile.string
result.liftTo[IO].map { result =>
expect.eql("""""", result)
}
}
+ test("renders xml without self-closing tags prettily") {
+ val result =
+ xml""""""
+ .through(render.pretty(false))
+ .compile
+ .string
+ result.liftTo[IO].map { result =>
+ expect.eql(
+ """
+ |
+ |
+ |
+ |""".stripMargin,
+ result
+ )
+ }
+ }
+
+ test("renders xml with attributes prettily if below threshold") {
+ val result =
+ xml""""""
+ .through(render.pretty())
+ .compile
+ .string
+ result.liftTo[IO].map { result =>
+ expect.eql(
+ """
+ |
+ |
+ |""".stripMargin,
+ result
+ )
+ }
+ }
+
+ test("renders xml with attributes prettily if above threshold") {
+ val result =
+ xml""""""
+ .through(render.pretty())
+ .compile
+ .string
+ result.liftTo[IO].map { result =>
+ expect.eql(
+ """
+ |
+ |
+ |""".stripMargin,
+ result
+ )
+ }
+ }
+
+ test("renders text prettily") {
+ val result =
+ xml"""This is a test.
+The text is not originally formatted.""".through(render.pretty()).compile.string
+ result.liftTo[IO].map { result =>
+ expect.eql(
+ """
+ |
+ | This is a test.
+ | The text is not originally formatted.
+ |""".stripMargin,
+ result
+ )
+ }
+ }
+
+ test("renders text with entities prettily") {
+ val result =
+ xml"""This is a test.
+The text is not originally formatted but contains & and
+´ as entities.""".through(render.pretty()).compile.string
+ result.liftTo[IO].map { result =>
+ expect.eql(
+ """
+ |
+ | This is a test.
+ | The text is not originally formatted but contains & and
+ | ´ as entities.
+ |""".stripMargin,
+ result
+ )
+ }
+ }
+
+ test("renders CDATA as-is") {
+ val result =
+ xml"""""".through(render.pretty()).compile.string
+ result.liftTo[IO].map { result =>
+ expect.eql(
+ """
+ |
+ |
+ |""".stripMargin,
+ result
+ )
+ }
+ }
+
+ test("renders comments prettily") {
+ val result =
+ rawxml""""""
+ .through(render.pretty())
+ .compile
+ .string
+ result.liftTo[IO].map { result =>
+ expect.eql(
+ """
+ |
+ |
+ |""".stripMargin,
+ result
+ )
+ }
+ }
+
}