From 014d803a584f4161d4213ddf5b400a5320558c5a Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 23 Aug 2024 09:26:16 +0200 Subject: [PATCH 01/10] Support of `svg` tag element --- .../Expressions/HtmlDomExpression.cs | 1 + .../Expressions/Image/SvgExpression.cs | 105 ++++++++++++++++++ test/HtmlToOpenXml.Tests/ImgTests.cs | 10 ++ test/HtmlToOpenXml.Tests/Resources/kiwi.svg | 30 +++++ 4 files changed, 146 insertions(+) create mode 100644 src/Html2OpenXml/Expressions/Image/SvgExpression.cs create mode 100644 test/HtmlToOpenXml.Tests/Resources/kiwi.svg diff --git a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs index a60c603..44ec6bc 100644 --- a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs +++ b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs @@ -67,6 +67,7 @@ private static Dictionary> InitKnownTa { TagNames.Strong, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) }, { TagNames.Sub, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) }, { TagNames.Sup, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) }, + { TagNames.Svg, el => new SvgExpression((AngleSharp.Svg.Dom.ISvgSvgElement) el) }, { TagNames.Table, el => new TableExpression((IHtmlTableElement) el) }, { TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) }, { TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) }, diff --git a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs new file mode 100644 index 0000000..b408227 --- /dev/null +++ b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs @@ -0,0 +1,105 @@ +/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved + * + * This source is subject to the Microsoft Permissive License. + * Please see the License.txt file for more information. + * All other rights reserved. + * + * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY + * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A + * PARTICULAR PURPOSE. + */ +using AngleSharp.Svg.Dom; +using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Wordprocessing; +using DocumentFormat.OpenXml.Office2019.Drawing.SVG; +using System.Text; + +using a = DocumentFormat.OpenXml.Drawing; +using pic = DocumentFormat.OpenXml.Drawing.Pictures; +using wp = DocumentFormat.OpenXml.Drawing.Wordprocessing; +using AngleSharp.Text; + +namespace HtmlToOpenXml.Expressions; + +/// +/// Process the parsing of a svg element. +/// +sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) +{ + /// Specify that local BLIP compression setting overrides the document default compression setting. + /// + const string UseLocalDpi = "{28A0092B-C50C-407E-A947-70E740481C1C}"; + private readonly ISvgSvgElement svgNode = node; + + + protected override Drawing? CreateDrawing(ParsingContext context) + { + var imgPart = context.MainPart.AddImagePart(ImagePartType.Svg); + using var stream = new System.IO.MemoryStream(Encoding.UTF8.GetBytes(svgNode.OuterHtml), writable: false); + imgPart.FeedData(stream); + var imagePartId = context.MainPart.GetIdOfPart(imgPart); + + Size preferredSize = Size.Empty; + var width = Unit.Parse(svgNode.GetAttribute("width")); + var height = Unit.Parse(svgNode.GetAttribute("height")); + if (width.IsValid && height.IsValid) + preferredSize = new Size(width.ValueInPx, height.ValueInPx); + + var (imageObjId, drawingObjId) = IncrementDrawingObjId(context); + + long widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus; + long heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus; + + string? title = svgNode.QuerySelector("title")?.TextContent?.CollapseAndStrip() ?? "Picture " + imageObjId; + string? description = svgNode.QuerySelector("desc")?.TextContent?.CollapseAndStrip() ?? string.Empty; + + var img = new Drawing( + new wp.Inline( + new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus }, + new wp.EffectExtent() { LeftEdge = 0L, TopEdge = 0L, RightEdge = 0L, BottomEdge = 0L }, + new wp.DocProperties() { Id = drawingObjId, Name = title, Description = description }, + new wp.NonVisualGraphicFrameDrawingProperties + { + GraphicFrameLocks = new a.GraphicFrameLocks() { NoChangeAspect = true } + }, + new a.Graphic( + new a.GraphicData( + new pic.Picture( + new pic.NonVisualPictureProperties { + NonVisualDrawingProperties = new pic.NonVisualDrawingProperties() { + Id = imageObjId, Name = title + }, + NonVisualPictureDrawingProperties = new() + //new a.PictureLocks() { NoChangeAspect = true, NoChangeArrowheads = true }) + }, + new pic.BlipFill( + new a.Blip( + new a.BlipExtensionList( + new a.BlipExtension( + new DocumentFormat.OpenXml.Office2010.Drawing.UseLocalDpi() { Val = false } + ) { Uri = UseLocalDpi }, + /* Extra Blip extension for SVG support */ + new a.BlipExtension(new SVGBlip { Embed = imagePartId }) { + Uri = "{96DAC541-7B7A-43D3-8B79-37D633B846F1}" + }) + ) { Embed = imagePartId, CompressionState = a.BlipCompressionValues.Print }, + new a.Stretch( + new a.FillRectangle()) + ), + new pic.ShapeProperties( + new a.Transform2D( + new a.Offset() { X = 0L, Y = 0L }, + new a.Extents() { Cx = widthInEmus, Cy = heightInEmus }), + new a.PresetGeometry( + new a.AdjustValueList() + ) { Preset = a.ShapeTypeValues.Rectangle }) + ) + ) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" }) + ) { DistanceFromTop = (UInt32Value)0U, DistanceFromBottom = (UInt32Value)0U, DistanceFromLeft = (UInt32Value)0U, DistanceFromRight = (UInt32Value)0U } + ); + + return img; + } +} \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index 2b13f28..ac4a7f4 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -93,6 +93,16 @@ public async Task FileSystem_LocalImage_WithSpaceInName_ShouldSucceed() AssertIsImg(elements.First()); } + [Test] + public void Svg_ReturnsImage() + { + var elements = converter.Parse(ResourceHelper.GetString("Resources.kiwi.svg")); + Assert.That(elements, Has.Count.EqualTo(1)); + var drawing = AssertIsImg(elements[0]); + Assert.That(drawing.Inline!.DocProperties?.Name?.Value, Is.EqualTo("Illustration of a Kiwi")); + Assert.That(drawing.Inline!.DocProperties?.Description?.Value, Is.EqualTo("Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes.")); + } + [Test(Description = "Reading local file containing a space in the name")] public async Task RemoteImage_WithBaseUri_ShouldSucceed() { diff --git a/test/HtmlToOpenXml.Tests/Resources/kiwi.svg b/test/HtmlToOpenXml.Tests/Resources/kiwi.svg new file mode 100644 index 0000000..9a5b8dd --- /dev/null +++ b/test/HtmlToOpenXml.Tests/Resources/kiwi.svg @@ -0,0 +1,30 @@ + + + + + + Illustration of a Kiwi + + + Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes. + + + + From 7f81613a711392a5ecd8f5df09858f274342c005 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 23 Aug 2024 09:26:44 +0200 Subject: [PATCH 02/10] Detect Xml/Svg image and read for its preferred size --- src/Html2OpenXml/IO/ImageHeader.cs | 31 +++++++++++++++++-- .../ImageFormats/ImageHeaderTests.cs | 2 ++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/Html2OpenXml/IO/ImageHeader.cs b/src/Html2OpenXml/IO/ImageHeader.cs index 762c9c9..858a923 100755 --- a/src/Html2OpenXml/IO/ImageHeader.cs +++ b/src/Html2OpenXml/IO/ImageHeader.cs @@ -18,6 +18,7 @@ using System.IO; using System.Linq; using System.Text; +using System.Xml.XPath; namespace HtmlToOpenXml.IO; @@ -29,7 +30,7 @@ public static class ImageHeader // https://en.wikipedia.org/wiki/List_of_file_signatures #pragma warning disable CS1591 // Missing XML comment for publicly visible type or member - public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf } + public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf, Xml } #pragma warning restore CS1591 // Missing XML comment for publicly visible type or member private static readonly byte[] pngSignatureBytes = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; @@ -41,7 +42,8 @@ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf } { Encoding.UTF8.GetBytes("GIF89a"), FileType.Gif }, // animated gif { pngSignatureBytes, FileType.Png }, { new byte[] { 0xff, 0xd8 }, FileType.Jpeg }, - { new byte[] { 0x1, 0, 0, 0 }, FileType.Emf } + { new byte[] { 0x1, 0, 0, 0 }, FileType.Emf }, + { Encoding.UTF8.GetBytes(" @@ -53,6 +54,7 @@ public void PngSof2_ReturnsImageSize() [TestCase("Resources.html2openxml.gif", ExpectedResult = ImageHeader.FileType.Gif)] [TestCase("Resources.html2openxml.jpg", ExpectedResult = ImageHeader.FileType.Jpeg)] [TestCase("Resources.html2openxml.png", ExpectedResult = ImageHeader.FileType.Png)] + [TestCase("Resources.kiwi.svg", ExpectedResult = ImageHeader.FileType.Xml)] public ImageHeader.FileType GuessFormat_ReturnsFileType(string resourceName) { using var imageStream = ResourceHelper.GetStream(resourceName); From f6617888e3e2e603cbac3ec353ead601540af2ac Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 23 Aug 2024 11:40:30 +0200 Subject: [PATCH 03/10] Rationalise object declaration --- src/Html2OpenXml/Expressions/Image/SvgExpression.cs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs index b408227..3132c1b 100644 --- a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs +++ b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs @@ -28,9 +28,6 @@ namespace HtmlToOpenXml.Expressions; /// sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) { - /// Specify that local BLIP compression setting overrides the document default compression setting. - /// - const string UseLocalDpi = "{28A0092B-C50C-407E-A947-70E740481C1C}"; private readonly ISvgSvgElement svgNode = node; @@ -72,19 +69,14 @@ sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) Id = imageObjId, Name = title }, NonVisualPictureDrawingProperties = new() - //new a.PictureLocks() { NoChangeAspect = true, NoChangeArrowheads = true }) }, new pic.BlipFill( new a.Blip( new a.BlipExtensionList( - new a.BlipExtension( - new DocumentFormat.OpenXml.Office2010.Drawing.UseLocalDpi() { Val = false } - ) { Uri = UseLocalDpi }, - /* Extra Blip extension for SVG support */ new a.BlipExtension(new SVGBlip { Embed = imagePartId }) { Uri = "{96DAC541-7B7A-43D3-8B79-37D633B846F1}" }) - ) { Embed = imagePartId, CompressionState = a.BlipCompressionValues.Print }, + ) { Embed = imagePartId /* ideally, that should be a png representation of the png */ }, new a.Stretch( new a.FillRectangle()) ), From e1003bb0e01767f51ba0c69c7f425541428f8da7 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 23 Aug 2024 09:26:16 +0200 Subject: [PATCH 04/10] Support of `svg` tag element --- .../Expressions/HtmlDomExpression.cs | 1 + .../Expressions/Image/SvgExpression.cs | 105 ++++++++++++++++++ test/HtmlToOpenXml.Tests/ImgTests.cs | 10 ++ test/HtmlToOpenXml.Tests/Resources/kiwi.svg | 30 +++++ 4 files changed, 146 insertions(+) create mode 100644 src/Html2OpenXml/Expressions/Image/SvgExpression.cs create mode 100644 test/HtmlToOpenXml.Tests/Resources/kiwi.svg diff --git a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs index a60c603..44ec6bc 100644 --- a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs +++ b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs @@ -67,6 +67,7 @@ private static Dictionary> InitKnownTa { TagNames.Strong, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) }, { TagNames.Sub, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) }, { TagNames.Sup, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) }, + { TagNames.Svg, el => new SvgExpression((AngleSharp.Svg.Dom.ISvgSvgElement) el) }, { TagNames.Table, el => new TableExpression((IHtmlTableElement) el) }, { TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) }, { TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) }, diff --git a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs new file mode 100644 index 0000000..b408227 --- /dev/null +++ b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs @@ -0,0 +1,105 @@ +/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved + * + * This source is subject to the Microsoft Permissive License. + * Please see the License.txt file for more information. + * All other rights reserved. + * + * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY + * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A + * PARTICULAR PURPOSE. + */ +using AngleSharp.Svg.Dom; +using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Wordprocessing; +using DocumentFormat.OpenXml.Office2019.Drawing.SVG; +using System.Text; + +using a = DocumentFormat.OpenXml.Drawing; +using pic = DocumentFormat.OpenXml.Drawing.Pictures; +using wp = DocumentFormat.OpenXml.Drawing.Wordprocessing; +using AngleSharp.Text; + +namespace HtmlToOpenXml.Expressions; + +/// +/// Process the parsing of a svg element. +/// +sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) +{ + /// Specify that local BLIP compression setting overrides the document default compression setting. + /// + const string UseLocalDpi = "{28A0092B-C50C-407E-A947-70E740481C1C}"; + private readonly ISvgSvgElement svgNode = node; + + + protected override Drawing? CreateDrawing(ParsingContext context) + { + var imgPart = context.MainPart.AddImagePart(ImagePartType.Svg); + using var stream = new System.IO.MemoryStream(Encoding.UTF8.GetBytes(svgNode.OuterHtml), writable: false); + imgPart.FeedData(stream); + var imagePartId = context.MainPart.GetIdOfPart(imgPart); + + Size preferredSize = Size.Empty; + var width = Unit.Parse(svgNode.GetAttribute("width")); + var height = Unit.Parse(svgNode.GetAttribute("height")); + if (width.IsValid && height.IsValid) + preferredSize = new Size(width.ValueInPx, height.ValueInPx); + + var (imageObjId, drawingObjId) = IncrementDrawingObjId(context); + + long widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus; + long heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus; + + string? title = svgNode.QuerySelector("title")?.TextContent?.CollapseAndStrip() ?? "Picture " + imageObjId; + string? description = svgNode.QuerySelector("desc")?.TextContent?.CollapseAndStrip() ?? string.Empty; + + var img = new Drawing( + new wp.Inline( + new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus }, + new wp.EffectExtent() { LeftEdge = 0L, TopEdge = 0L, RightEdge = 0L, BottomEdge = 0L }, + new wp.DocProperties() { Id = drawingObjId, Name = title, Description = description }, + new wp.NonVisualGraphicFrameDrawingProperties + { + GraphicFrameLocks = new a.GraphicFrameLocks() { NoChangeAspect = true } + }, + new a.Graphic( + new a.GraphicData( + new pic.Picture( + new pic.NonVisualPictureProperties { + NonVisualDrawingProperties = new pic.NonVisualDrawingProperties() { + Id = imageObjId, Name = title + }, + NonVisualPictureDrawingProperties = new() + //new a.PictureLocks() { NoChangeAspect = true, NoChangeArrowheads = true }) + }, + new pic.BlipFill( + new a.Blip( + new a.BlipExtensionList( + new a.BlipExtension( + new DocumentFormat.OpenXml.Office2010.Drawing.UseLocalDpi() { Val = false } + ) { Uri = UseLocalDpi }, + /* Extra Blip extension for SVG support */ + new a.BlipExtension(new SVGBlip { Embed = imagePartId }) { + Uri = "{96DAC541-7B7A-43D3-8B79-37D633B846F1}" + }) + ) { Embed = imagePartId, CompressionState = a.BlipCompressionValues.Print }, + new a.Stretch( + new a.FillRectangle()) + ), + new pic.ShapeProperties( + new a.Transform2D( + new a.Offset() { X = 0L, Y = 0L }, + new a.Extents() { Cx = widthInEmus, Cy = heightInEmus }), + new a.PresetGeometry( + new a.AdjustValueList() + ) { Preset = a.ShapeTypeValues.Rectangle }) + ) + ) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" }) + ) { DistanceFromTop = (UInt32Value)0U, DistanceFromBottom = (UInt32Value)0U, DistanceFromLeft = (UInt32Value)0U, DistanceFromRight = (UInt32Value)0U } + ); + + return img; + } +} \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index d3a62d7..1350a94 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -94,6 +94,16 @@ public async Task FileSystem_LocalImage_WithSpaceInName_ShouldSucceed() AssertIsImg(mainPart, elements.First()); } + [Test] + public void Svg_ReturnsImage() + { + var elements = converter.Parse(ResourceHelper.GetString("Resources.kiwi.svg")); + Assert.That(elements, Has.Count.EqualTo(1)); + var drawing = AssertIsImg(elements[0]); + Assert.That(drawing.Inline!.DocProperties?.Name?.Value, Is.EqualTo("Illustration of a Kiwi")); + Assert.That(drawing.Inline!.DocProperties?.Description?.Value, Is.EqualTo("Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes.")); + } + [Test(Description = "Reading local file containing a space in the name")] public async Task RemoteImage_WithBaseUri_ShouldSucceed() { diff --git a/test/HtmlToOpenXml.Tests/Resources/kiwi.svg b/test/HtmlToOpenXml.Tests/Resources/kiwi.svg new file mode 100644 index 0000000..9a5b8dd --- /dev/null +++ b/test/HtmlToOpenXml.Tests/Resources/kiwi.svg @@ -0,0 +1,30 @@ + + + + + + Illustration of a Kiwi + + + Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes. + + + + From f45d12262878c00994fbeb488629ff98fa659159 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 23 Aug 2024 09:26:44 +0200 Subject: [PATCH 05/10] Detect Xml/Svg image and read for its preferred size --- src/Html2OpenXml/IO/ImageHeader.cs | 31 +++++++++++++++++-- .../ImageFormats/ImageHeaderTests.cs | 2 ++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/Html2OpenXml/IO/ImageHeader.cs b/src/Html2OpenXml/IO/ImageHeader.cs index 762c9c9..858a923 100755 --- a/src/Html2OpenXml/IO/ImageHeader.cs +++ b/src/Html2OpenXml/IO/ImageHeader.cs @@ -18,6 +18,7 @@ using System.IO; using System.Linq; using System.Text; +using System.Xml.XPath; namespace HtmlToOpenXml.IO; @@ -29,7 +30,7 @@ public static class ImageHeader // https://en.wikipedia.org/wiki/List_of_file_signatures #pragma warning disable CS1591 // Missing XML comment for publicly visible type or member - public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf } + public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf, Xml } #pragma warning restore CS1591 // Missing XML comment for publicly visible type or member private static readonly byte[] pngSignatureBytes = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; @@ -41,7 +42,8 @@ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf } { Encoding.UTF8.GetBytes("GIF89a"), FileType.Gif }, // animated gif { pngSignatureBytes, FileType.Png }, { new byte[] { 0xff, 0xd8 }, FileType.Jpeg }, - { new byte[] { 0x1, 0, 0, 0 }, FileType.Emf } + { new byte[] { 0x1, 0, 0, 0 }, FileType.Emf }, + { Encoding.UTF8.GetBytes(" @@ -53,6 +54,7 @@ public void PngSof2_ReturnsImageSize() [TestCase("Resources.html2openxml.gif", ExpectedResult = ImageHeader.FileType.Gif)] [TestCase("Resources.html2openxml.jpg", ExpectedResult = ImageHeader.FileType.Jpeg)] [TestCase("Resources.html2openxml.png", ExpectedResult = ImageHeader.FileType.Png)] + [TestCase("Resources.kiwi.svg", ExpectedResult = ImageHeader.FileType.Xml)] public ImageHeader.FileType GuessFormat_ReturnsFileType(string resourceName) { using var imageStream = ResourceHelper.GetStream(resourceName); From cb9f91916d9a904d30edc4902da149473d07f77d Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Fri, 23 Aug 2024 11:40:30 +0200 Subject: [PATCH 06/10] Rationalise object declaration --- src/Html2OpenXml/Expressions/Image/SvgExpression.cs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs index b408227..3132c1b 100644 --- a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs +++ b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs @@ -28,9 +28,6 @@ namespace HtmlToOpenXml.Expressions; /// sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) { - /// Specify that local BLIP compression setting overrides the document default compression setting. - /// - const string UseLocalDpi = "{28A0092B-C50C-407E-A947-70E740481C1C}"; private readonly ISvgSvgElement svgNode = node; @@ -72,19 +69,14 @@ sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) Id = imageObjId, Name = title }, NonVisualPictureDrawingProperties = new() - //new a.PictureLocks() { NoChangeAspect = true, NoChangeArrowheads = true }) }, new pic.BlipFill( new a.Blip( new a.BlipExtensionList( - new a.BlipExtension( - new DocumentFormat.OpenXml.Office2010.Drawing.UseLocalDpi() { Val = false } - ) { Uri = UseLocalDpi }, - /* Extra Blip extension for SVG support */ new a.BlipExtension(new SVGBlip { Embed = imagePartId }) { Uri = "{96DAC541-7B7A-43D3-8B79-37D633B846F1}" }) - ) { Embed = imagePartId, CompressionState = a.BlipCompressionValues.Print }, + ) { Embed = imagePartId /* ideally, that should be a png representation of the png */ }, new a.Stretch( new a.FillRectangle()) ), From da46504d79596160e4b5660131d33e1db8e955f3 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Sat, 21 Sep 2024 22:09:24 +0200 Subject: [PATCH 07/10] Fix after rebase --- test/HtmlToOpenXml.Tests/BodyTests.cs | 2 +- test/HtmlToOpenXml.Tests/ImgTests.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/HtmlToOpenXml.Tests/BodyTests.cs b/test/HtmlToOpenXml.Tests/BodyTests.cs index 187e2f2..706556d 100644 --- a/test/HtmlToOpenXml.Tests/BodyTests.cs +++ b/test/HtmlToOpenXml.Tests/BodyTests.cs @@ -72,7 +72,7 @@ public async Task WithGoBackBookmark_ShouldBeAfterAppendedOutput() Assert.That(goBackBookmark, Is.Not.Null); HtmlConverter converter = new HtmlConverter(mainPart); - await converter.ParseHtml("

Placeholder

"); + await converter.ParseBody("

Placeholder

"); Assert.That(mainPart.Document.Body!.LastChild, Is.TypeOf()); var paragrahs = mainPart.Document.Body!.Elements(); diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index 1350a94..9cc6300 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -99,7 +99,7 @@ public void Svg_ReturnsImage() { var elements = converter.Parse(ResourceHelper.GetString("Resources.kiwi.svg")); Assert.That(elements, Has.Count.EqualTo(1)); - var drawing = AssertIsImg(elements[0]); + var drawing = AssertIsImg(mainPart, elements[0]); Assert.That(drawing.Inline!.DocProperties?.Name?.Value, Is.EqualTo("Illustration of a Kiwi")); Assert.That(drawing.Inline!.DocProperties?.Description?.Value, Is.EqualTo("Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes.")); } From 393f44f4ef90d305f77ccf14d5a06c0052914016 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Sun, 22 Sep 2024 20:31:40 +0200 Subject: [PATCH 08/10] Minor fixup --- src/Html2OpenXml/Expressions/Image/SvgExpression.cs | 2 +- test/HtmlToOpenXml.Tests/ImgTests.cs | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs index 3132c1b..13376f7 100644 --- a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs +++ b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs @@ -76,7 +76,7 @@ sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) new a.BlipExtension(new SVGBlip { Embed = imagePartId }) { Uri = "{96DAC541-7B7A-43D3-8B79-37D633B846F1}" }) - ) { Embed = imagePartId /* ideally, that should be a png representation of the png */ }, + ) { Embed = imagePartId /* ideally, that should be a png representation of the svg */ }, new a.Stretch( new a.FillRectangle()) ), diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index 96850d1..9cc6300 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -104,16 +104,6 @@ public void Svg_ReturnsImage() Assert.That(drawing.Inline!.DocProperties?.Description?.Value, Is.EqualTo("Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes.")); } - [Test] - public void Svg_ReturnsImage() - { - var elements = converter.Parse(ResourceHelper.GetString("Resources.kiwi.svg")); - Assert.That(elements, Has.Count.EqualTo(1)); - var drawing = AssertIsImg(elements[0]); - Assert.That(drawing.Inline!.DocProperties?.Name?.Value, Is.EqualTo("Illustration of a Kiwi")); - Assert.That(drawing.Inline!.DocProperties?.Description?.Value, Is.EqualTo("Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes.")); - } - [Test(Description = "Reading local file containing a space in the name")] public async Task RemoteImage_WithBaseUri_ShouldSucceed() { From 950bb3ebff19ce662612f3fe59260b1e6202903c Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Sun, 22 Sep 2024 21:16:29 +0200 Subject: [PATCH 09/10] Improve testing --- .../Expressions/Image/SvgExpression.cs | 3 +- test/HtmlToOpenXml.Tests/ImgTests.cs | 32 ++++++++++--------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs index 13376f7..0a58e98 100644 --- a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs +++ b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs @@ -57,8 +57,7 @@ sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus }, new wp.EffectExtent() { LeftEdge = 0L, TopEdge = 0L, RightEdge = 0L, BottomEdge = 0L }, new wp.DocProperties() { Id = drawingObjId, Name = title, Description = description }, - new wp.NonVisualGraphicFrameDrawingProperties - { + new wp.NonVisualGraphicFrameDrawingProperties { GraphicFrameLocks = new a.GraphicFrameLocks() { NoChangeAspect = true } }, new a.Graphic( diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index 9cc6300..64d97ae 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -15,12 +15,14 @@ namespace HtmlToOpenXml.Tests [TestFixture] public class ImgTests : HtmlConverterTestBase { - [Test] - public void AbsoluteUri_ReturnsDrawing_WithDownloadedData() + [TestCase("https://www.w3schools.com/tags/smiley.gif", "image/gif")] + [TestCase("https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/helloworld.svg", "image/svg+xml")] + public void AbsoluteUri_ReturnsDrawing_WithDownloadedData(string imageUri, string contentType) { - var elements = converter.Parse(@"Smiley face"); + var elements = converter.Parse(@$"Smiley face"); Assert.That(elements, Has.Count.EqualTo(1)); - AssertIsImg(mainPart, elements[0]); + var (_, imagePart) = AssertIsImg(mainPart, elements[0]); + Assert.That(imagePart.ContentType, Is.EqualTo(contentType)); } [Test] @@ -58,7 +60,6 @@ public void ManualProvisioning_ReturnsDrawing_WithProvidedData() AssertIsImg(mainPart, elements[0]); } - [TestCase("Smiley face", Description = "Empty image")] [TestCase("", Description = "Unsupported protocol")] [TestCase("", Description = "Relative url without providing BaseImagerUri")] public void IgnoreImage_ShouldBeIgnored(string html) @@ -95,13 +96,14 @@ public async Task FileSystem_LocalImage_WithSpaceInName_ShouldSucceed() } [Test] - public void Svg_ReturnsImage() + public void SvgNode_ReturnsImage() { var elements = converter.Parse(ResourceHelper.GetString("Resources.kiwi.svg")); Assert.That(elements, Has.Count.EqualTo(1)); - var drawing = AssertIsImg(mainPart, elements[0]); + var (drawing, imagePart) = AssertIsImg(mainPart, elements[0]); Assert.That(drawing.Inline!.DocProperties?.Name?.Value, Is.EqualTo("Illustration of a Kiwi")); Assert.That(drawing.Inline!.DocProperties?.Description?.Value, Is.EqualTo("Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes.")); + Assert.That(imagePart.ContentType, Is.EqualTo("image/svg+xml")); } [Test(Description = "Reading local file containing a space in the name")] @@ -201,21 +203,21 @@ public async Task ParseIntoDocumentPart_ReturnsImageParentedToPart (Type openXml AssertThatOpenXmlDocumentIsValid(); } - private static Drawing AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph) + private static (Drawing, ImagePart) AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph) { var run = paragraph.GetFirstChild(); Assert.That(run, Is.Not.Null); - var img = run.GetFirstChild(); - Assert.That(img, Is.Not.Null); - Assert.That(img.Inline?.Graphic?.GraphicData, Is.Not.Null); - var pic = img.Inline.Graphic.GraphicData.GetFirstChild(); + var drawing = run.GetFirstChild(); + Assert.That(drawing, Is.Not.Null); + Assert.That(drawing.Inline?.Graphic?.GraphicData, Is.Not.Null); + var pic = drawing.Inline.Graphic.GraphicData.GetFirstChild(); Assert.That(pic?.BlipFill?.Blip?.Embed, Is.Not.Null); var imagePartId = pic.BlipFill.Blip.Embed.Value; Assert.That(imagePartId, Is.Not.Null); - var part = container.GetPartById(imagePartId); - Assert.That(part, Is.TypeOf(typeof(ImagePart))); - return img; + var imagePart = container.GetPartById(imagePartId); + Assert.That(imagePart, Is.TypeOf(typeof(ImagePart))); + return (drawing, (ImagePart) imagePart); } } } \ No newline at end of file From cc63ee9688e4e13e3c0ef91cdfa9d4a45e66532a Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Mon, 23 Sep 2024 14:28:00 +0200 Subject: [PATCH 10/10] Handle SVG image in an unified code --- examples/Demo/Program.cs | 7 +------ .../Expressions/Image/ImageExpression.cs | 18 +++++++++++++++-- .../Expressions/Image/SvgExpression.cs | 19 +++++++++++++----- src/Html2OpenXml/IO/ImagePrefetcher.cs | 20 ++++++++++--------- src/Html2OpenXml/Primitives/HtmlImageInfo.cs | 11 +++++++--- 5 files changed, 50 insertions(+), 25 deletions(-) diff --git a/examples/Demo/Program.cs b/examples/Demo/Program.cs index e8b7539..47c0124 100644 --- a/examples/Demo/Program.cs +++ b/examples/Demo/Program.cs @@ -15,7 +15,7 @@ static class Program static async Task Main(string[] args) { const string filename = "test.docx"; - string html = ResourceHelper.GetString("Resources.Document.html"); + string html = ResourceHelper.GetString("Resources.AdvancedTable.html"); if (File.Exists(filename)) File.Delete(filename); using (MemoryStream generatedDocument = new MemoryStream()) @@ -39,14 +39,9 @@ static async Task Main(string[] args) } HtmlConverter converter = new HtmlConverter(mainPart); - // HeaderPart headerPart = mainPart.AddNewPart(); - //FooterPart footerPart = mainPart.AddNewPart(); converter.RenderPreAsTable = true; Body body = mainPart.Document.Body; - await converter.ParseHeader(@" - Red Dot"); - await converter.ParseBody(html); mainPart.Document.Save(); diff --git a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs index be9bb9c..a4b61c3 100644 --- a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs +++ b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs @@ -11,8 +11,11 @@ */ using System; using System.Threading; +using AngleSharp.Dom; using AngleSharp.Html.Dom; +using AngleSharp.Svg.Dom; using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using HtmlToOpenXml.IO; @@ -57,14 +60,24 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node) preferredSize.Height = imgNode.DisplayHeight; } - var (imageObjId, drawingObjId) = IncrementDrawingObjId(context); - HtmlImageInfo? iinfo = context.ImageLoader.Download(src, CancellationToken.None) .ConfigureAwait(false).GetAwaiter().GetResult(); if (iinfo == null) return null; + if (iinfo.TypeInfo == ImagePartType.Svg) + { + var imagePart = context.HostingPart.GetPartById(iinfo.ImagePartId); + using var stream = imagePart.GetStream(System.IO.FileMode.Open); + using var sreader = new System.IO.StreamReader(stream); + imgNode.Insert(AdjacentPosition.AfterBegin, sreader.ReadToEnd()); + + var svgNode = imgNode.FindChild(); + if (svgNode is null) return null; + return SvgExpression.CreateSvgDrawing(context, svgNode, iinfo.ImagePartId, preferredSize); + } + if (preferredSize.IsEmpty) { preferredSize = iinfo.Size; @@ -78,6 +91,7 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node) long widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus; long heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus; + var (imageObjId, drawingObjId) = IncrementDrawingObjId(context); var img = new Drawing( new wp.Inline( new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus }, diff --git a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs index 0a58e98..7bb9a7f 100644 --- a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs +++ b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs @@ -37,18 +37,27 @@ sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) using var stream = new System.IO.MemoryStream(Encoding.UTF8.GetBytes(svgNode.OuterHtml), writable: false); imgPart.FeedData(stream); var imagePartId = context.MainPart.GetIdOfPart(imgPart); + return CreateSvgDrawing(context, svgNode, imagePartId, Size.Empty); + } - Size preferredSize = Size.Empty; + internal static Drawing CreateSvgDrawing(ParsingContext context, ISvgSvgElement svgNode, string imagePartId, Size preferredSize) + { var width = Unit.Parse(svgNode.GetAttribute("width")); var height = Unit.Parse(svgNode.GetAttribute("height")); + long widthInEmus, heightInEmus; if (width.IsValid && height.IsValid) - preferredSize = new Size(width.ValueInPx, height.ValueInPx); + { + widthInEmus = width.ValueInEmus; + heightInEmus = height.ValueInEmus; + } + else + { + widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus; + heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus; + } var (imageObjId, drawingObjId) = IncrementDrawingObjId(context); - long widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus; - long heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus; - string? title = svgNode.QuerySelector("title")?.TextContent?.CollapseAndStrip() ?? "Picture " + imageObjId; string? description = svgNode.QuerySelector("desc")?.TextContent?.CollapseAndStrip() ?? string.Empty; diff --git a/src/Html2OpenXml/IO/ImagePrefetcher.cs b/src/Html2OpenXml/IO/ImagePrefetcher.cs index 93cf951..9ff6dfc 100644 --- a/src/Html2OpenXml/IO/ImagePrefetcher.cs +++ b/src/Html2OpenXml/IO/ImagePrefetcher.cs @@ -110,7 +110,6 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader) if (response?.Content == null) return null; - HtmlImageInfo info = new HtmlImageInfo(src); using (response) { // For requested url with no filename, we need to read the media mime type if provided @@ -123,16 +122,19 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader) } var ipart = hostingPart.AddImagePart(type); + Size originalSize; using (var outputStream = ipart.GetStream(FileMode.Create)) { response.Content.CopyTo(outputStream); outputStream.Seek(0L, SeekOrigin.Begin); - info.Size = GetImageSize(outputStream); + originalSize = GetImageSize(outputStream); } - info.ImagePartId = hostingPart.GetIdOfPart(ipart); - return info; + return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) { + TypeInfo = type, + Size = originalSize + }; } } @@ -143,7 +145,7 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader) { if (DataUri.TryCreate(src, out var dataUri)) { - Size size; + Size originalSize; knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type); var ipart = hostingPart.AddImagePart(type); using (var outputStream = ipart.GetStream(FileMode.Create)) @@ -151,12 +153,12 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader) outputStream.Write(dataUri.Data, 0, dataUri.Data.Length); outputStream.Seek(0L, SeekOrigin.Begin); - size = GetImageSize(outputStream); + originalSize = GetImageSize(outputStream); } - return new HtmlImageInfo(src) { - ImagePartId = hostingPart.GetIdOfPart(ipart), - Size = size + return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) { + TypeInfo = type, + Size = originalSize }; } diff --git a/src/Html2OpenXml/Primitives/HtmlImageInfo.cs b/src/Html2OpenXml/Primitives/HtmlImageInfo.cs index 7ecda1f..4ffca08 100755 --- a/src/Html2OpenXml/Primitives/HtmlImageInfo.cs +++ b/src/Html2OpenXml/Primitives/HtmlImageInfo.cs @@ -16,7 +16,7 @@ namespace HtmlToOpenXml; /// /// Represents an image and its metadata. /// -sealed class HtmlImageInfo(string source) +sealed class HtmlImageInfo(string source, string partId) { /// /// The URI identifying this cached image information. @@ -26,12 +26,17 @@ sealed class HtmlImageInfo(string source) /// /// The Unique identifier of the ImagePart in the . /// - public string? ImagePartId { get; set; } + public string ImagePartId { get; set; } = partId; /// - /// Gets or sets the size of the image + /// Gets or sets the original size of the image. /// public Size Size { get; set; } + + /// + /// Gets the content type of the image. + /// + public PartTypeInfo TypeInfo { get; set; } } ///