From b8354b9b904cb012c2f4da3b647cd5a35685f922 Mon Sep 17 00:00:00 2001 From: Lucas Satabin Date: Tue, 23 Jan 2024 20:42:52 +0100 Subject: [PATCH] Add XML renderer documentation --- site/documentation/xml/index.md | 35 ++++++++++++++++--- site/documentation/xml/xpath.md | 7 ++-- .../fs2/data/xml/internals/Renderer.scala | 18 ++++++---- xml/src/main/scala/fs2/data/xml/package.scala | 6 ++-- 4 files changed, 49 insertions(+), 17 deletions(-) diff --git a/site/documentation/xml/index.md b/site/documentation/xml/index.md index 4b1b48e48..d646234fc 100644 --- a/site/documentation/xml/index.md +++ b/site/documentation/xml/index.md @@ -8,7 +8,7 @@ The `fs2-data-xml` module provides tools to parse XML data in a streaming manner To create a stream of XML events from an input stream, use the `events` pipe in `fs2.data.xml` package. -```scala mdoc +```scala mdoc:height=500 import cats.effect._ import cats.effect.unsafe.implicits.global @@ -33,14 +33,14 @@ The pipe validates the XML structure while parsing. It reads all the XML element Namespace can be resolved by using the `namespaceResolver` pipe. -```scala mdoc +```scala mdoc:height=500 val nsResolved = stream.through(namespaceResolver[IO]) nsResolved.compile.toList.unsafeRunSync() ``` Using the `referenceResolver` pipe, entity and character references can be resolved. By defaut the standard `xmlEntities` mapping is used, but it can be replaced by any mapping you see fit. -```scala mdoc +```scala mdoc:height=500 val entityResolved = stream.through(referenceResolver[IO]()) entityResolved.compile.toList.unsafeRunSync() ``` @@ -49,7 +49,7 @@ entityResolved.compile.toList.unsafeRunSync() Once entites and namespaces are resolved, the events might be numerous and can be normalized to avoid emitting too many of them. For instance, after reference resolution, consecutive text events can be merged. This is achieved by using the `normalize` pipe. -```scala mdoc +```scala mdoc:height=500 val normalized = entityResolved.through(normalize) normalized.compile.toList.unsafeRunSync() ``` @@ -82,3 +82,30 @@ implicit val eventifier: DocumentEventifier[SomeDocType] = ??? stream.through(documents[IO, SomeDocType]) .through(eventify[IO, SomeDocType]) ``` + +## XML Renderers + +Once you got an XML event stream, selected and transformed what you needed in it, you can then write the resulting event stream to some storage. This can be achieved using renderers. + +For instance, let's say you want to write the resulting XML stream to a file in raw form (i.e. without trying to format the nested tags and text), you can do: + +```scala mdoc:compile-only +import fs2.io.file.{Files, Flags, Path} + +stream + .through(render.raw()) + .through(text.utf8.encode) + .through(Files[IO].writeAll(Path("/some/path/to/file.xml"), Flags.Write)) + .compile + .drain +``` + +There exists also a `pretty()` renderer, that indents inner tags and text by the given indent string. + +If you are interested in the String rendering as a value, the library also provides `Collector`s: + +```scala mdoc +stream.compile.to(collector.raw()).unsafeRunSync() + +stream.compile.to(collector.pretty()).unsafeRunSync() +``` diff --git a/site/documentation/xml/xpath.md b/site/documentation/xml/xpath.md index 77c7b0f42..1240b84c2 100644 --- a/site/documentation/xml/xpath.md +++ b/site/documentation/xml/xpath.md @@ -84,14 +84,13 @@ The `filter.raw` emits a stream of all matches. Each match is represented as a nested stream of XML events which must be consumed. ```scala mdoc -import cats.Show import cats.effect._ import cats.effect.unsafe.implicits.global stream .lift[IO] .through(filter.raw(path)) - .parEvalMapUnbounded(_.map(Show[XmlEvent].show(_)).compile.foldMonoid) + .parEvalMapUnbounded(_.through(render.raw()).compile.foldMonoid) .compile .toList .unsafeRunSync() @@ -105,7 +104,7 @@ The library offers `filter.collect` to collect each match for any collector. ```scala mdoc stream .lift[IO] - .through(filter.collect(path, collector.show)) + .through(filter.collect(path, collector.raw())) .compile .toList .unsafeRunSync() @@ -116,7 +115,7 @@ If you want to have results emitted as early as possible instead of in order, yo ```scala mdoc stream .lift[IO] - .through(filter.collect(path, collector.show, deterministic = false)) + .through(filter.collect(path, collector.raw(), deterministic = false)) .compile .toList .unsafeRunSync() diff --git a/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala b/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala index 8ddc41ef2..5d027ddbf 100644 --- a/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala +++ b/xml/src/main/scala/fs2/data/xml/internals/Renderer.scala @@ -96,9 +96,9 @@ private[xml] class Renderer(pretty: Boolean, newline = true case XmlEvent.EndTag(name) => - level -= 1 newline = true if (!skipClose) { + level -= 1 indentation() builder ++= show"" } @@ -111,12 +111,16 @@ private[xml] class Renderer(pretty: Boolean, case XmlEvent.XmlString(content, false) if pretty => content.linesIterator.foreach { line => - indentation() - if (newline) - builder ++= line.stripLeading() - else - builder ++= line - newline = true + if (line.matches("\\s*")) { + // empty line, ignore it + } else { + indentation() + if (newline) + builder ++= line.stripLeading() + else + builder ++= line + newline = true + } } newline = content.matches("^.*\n\\s*$") diff --git a/xml/src/main/scala/fs2/data/xml/package.scala b/xml/src/main/scala/fs2/data/xml/package.scala index e0563c3bd..5035c3ab6 100644 --- a/xml/src/main/scala/fs2/data/xml/package.scala +++ b/xml/src/main/scala/fs2/data/xml/package.scala @@ -167,7 +167,7 @@ package object xml { } /** Renders all events without extra formatting. */ - def raw(collapseEmpty: Boolean = true): Collector[XmlEvent] = + def raw(collapseEmpty: Boolean = true): Collector.Aux[XmlEvent, String] = new Collector[XmlEvent] { type Out = String def newBuilder: Collector.Builder[XmlEvent, Out] = @@ -182,7 +182,9 @@ package object xml { * @param indent THe indentation string * @param attributeThreshold Number of attributes above which each attribute is rendered on a new line */ - def pretty(collapseEmpty: Boolean = true, indent: String = " ", attributeThreshold: Int = 3): Collector[XmlEvent] = + def pretty(collapseEmpty: Boolean = true, + indent: String = " ", + attributeThreshold: Int = 3): Collector.Aux[XmlEvent, String] = new Collector[XmlEvent] { type Out = String def newBuilder: Collector.Builder[XmlEvent, Out] =