Documentation errata.

- Conformance status. - OPAM instructions. - Packaging. - Fixed various typos.
aantron · Jan 15, 2016 · 6bdec86 · 6bdec86
1 parent 22351e2
commit 6bdec86
Show file tree

Hide file tree

Showing 13 changed files with 102 additions and 45 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,5 @@ test/coverage
 doc/markup.odocl
 doc/html
 doc/publish
+doc/*.zip
 opam
diff --git a/Makefile b/Makefile
@@ -1,4 +1,5 @@
 LIB := markup
+VERSION := 0.5
 
 if_package = ! ocamlfind query $(1) > /dev/null 2> /dev/null || ( $(2) )
 
@@ -118,13 +119,7 @@ docs-odocl :
 PUBLISH := doc/publish
 
 .PHONY : publish-docs
-publish-docs : docs
-	@test $(OCAML_VERSION) -ne 402 \
-		|| (echo "\nocamldoc is broken in 4.02" && false)
-	@ocamlfind query lwt.unix > /dev/null 2> /dev/null \
-		|| (echo "\nLwt not installed" && false)
-	@ocamlfind query lambdasoup > /dev/null 2> /dev/null \
-		|| (echo "\nLambda Soup not installed" && false)
+publish-docs : check-doc-prereqs docs
 	rm -rf $(PUBLISH)
 	mkdir -p $(PUBLISH)
 	cd $(PUBLISH) \
@@ -135,6 +130,22 @@ publish-docs : docs
 		&& git commit -m 'Markup.ml documentation.' \
 		&& git push -uf github master:gh-pages
 
+DOC_ZIP := doc/$(LIB)-$(VERSION)-doc.zip
+
+.PHONY : package-docs
+package-docs : check-doc-prereqs docs
+	rm -f $(DOC_ZIP)
+	zip -9 $(DOC_ZIP) $(HTML)/*
+
+.PHONY : check-doc-prereqs
+check-doc-prereqs :
+	@test $(OCAML_VERSION) -ne 402 \
+		|| (echo "\nocamldoc is broken in 4.02" && false)
+	@ocamlfind query lwt.unix > /dev/null 2> /dev/null \
+		|| (echo "\nLwt not installed" && false)
+	@ocamlfind query lambdasoup > /dev/null 2> /dev/null \
+		|| (echo "\nLambda Soup not installed" && false)
+
 need_package = \
 	ocamlfind query $(1) > /dev/null 2> /dev/null \
 		|| echo "Missing package '$(1)' (opam install $(2))"
@@ -193,5 +204,5 @@ uninstall :
 .PHONY : clean
 clean :
 	$(OCAMLBUILD) -clean
-	rm -rf bisect*.out $(COVERAGE) $(HTML) $(PUBLISH) opam
+	rm -rf bisect*.out $(COVERAGE) $(HTML) $(PUBLISH) $(DOC_ZIP) opam
 	cd $(DEP_TEST_DIR) && $(OCAMLBUILD) -clean
diff --git a/README.md b/README.md
@@ -82,13 +82,14 @@ let report =
     count := !count + 1;
     if !count >= 10 then raise_notrace Exit
 
-string "some xml" |> parse_xml ~report |> drain
+string "some xml" |> parse_xml ~report |> signals |> drain
 
 (* Load HTML into a custom document tree data type. *)
 type html = Text of string | Element of string * html list
 
 file "some_file"
 |> parse_html
+|> signals
 |> tree
   ~text:(fun ss -> Text (String.concat "" ss))
   ~element:(fun (_, name) _ children -> Element (name, children))
@@ -151,19 +152,12 @@ opam install lwt cohttp lambdasoup markup
 
 ## Installing
 
-Until Markup.ml is added to OPAM, the easiest way to install it is by cloning
-this repository, then running
+Simply
 
 ```sh
-make install
+opam install markup
 ```
 
-in the cloned directory. This will use OPAM to pin Markup.ml, install the
-dependency Uutf, then build and install Markup.ml. If you want to use the module
-`Markup_lwt`, check that Lwt is installed before installing Markup.ml.
-
-To remove the pin later, run `make uninstall`.
-
 ## Documentation
 
 The interface of Markup.ml is three modules [`Markup`][Markup],

diff --git a/doc/postprocess.ml b/doc/postprocess.ml
@@ -229,7 +229,8 @@ let clean_up_content soup =
   uncolor "constructor" "ASYNCHRONOUS";
   uncolor "constructor" "Pervasives";
   uncolor "constructor" "Lwt_io";
-  uncolor "keyword" "false"
+  uncolor "keyword" "false";
+  uncolor "keyword" "parser"
 
 let add_with_type soup type_name =
   let extra =

diff --git a/doc/style.css b/doc/style.css
@@ -183,6 +183,12 @@ h2 {
     line-height: 1;
 }
 
+h3 {
+    font-size: 100%;
+    margin-top: 1.5em;
+    margin-bottom: 1.5em;
+}
+
 body > pre:first-of-type {
     margin-top: 1.5em;
 }

diff --git a/src/markup.mli b/src/markup.mli
@@ -77,7 +77,9 @@ val write_xml  : signal stream -> char stream
     Markup.ml is developed on {{:https://github.com/aantron/markup.ml} GitHub}
     and distributed under the
     {{:https://github.com/aantron/markup.ml/blob/master/doc/LICENSE}
-    BSD license}. This documentation is for version 0.5 of the library. *)
+    BSD license}. This documentation is for version 0.5 of the library.
+    Documentation for older versions can be found on the
+    {{: https://github.com/aantron/markup.ml/releases} releases page}. *)
 
 
 
@@ -193,7 +195,7 @@ sig
   val decode :
     ?report:(location -> Error.t -> unit) -> t ->
     (char, 's) stream -> (int, 's) stream
-  (** Applies a decoder to byte stream. Illegal input byte sequences result in
+  (** Applies a decoder to a byte stream. Illegal input byte sequences result in
       calls to the error handler [~report] with error kind [`Decoding_error].
       The illegal bytes are then skipped, and zero or more U+FFFD replacement
       characters are emitted. The default handler ignores errors.
@@ -308,9 +310,9 @@ val signal_to_string : [< signal ] -> string
 (** {2 Parsers} *)
 
 type 's parser
-(** A ['s parser] is a thin wrapper around a [(signal, 's) stream] that supports
-    access to additional information that is not carried directly in the stream,
-    such as source locations. *)
+(** An ['s parser] is a thin wrapper around a [(signal, 's) stream] that
+    supports access to additional information that is not carried directly in
+    the stream, such as source locations. *)
 
 val signals : 's parser -> (signal, 's) stream
 (** Converts a parser to its underlying signal stream. *)
@@ -761,3 +763,66 @@ val kstream : ('a, _) stream -> 'a Kstream.t
 val of_kstream : 'a Kstream.t -> ('a, _) stream
 
 (**/**)
+
+
+
+(** {2 Conformance status}
+
+    The HTML parser seeks to implement section 8 of the HTML5 specification.
+    That section describes a parser, part of a full-blown user agent, that is
+    building up a DOM representation of an HTML document. Markup.ml is neither
+    inherently part of a user agent, nor does it build up a DOM representation.
+    With respect to section 8 of HTML5, Markup.ml is concerned with only the
+    syntax. When that section requires that the user agent perform an action,
+    Markup.ml emits enough information for a hypothetical user agent based on it
+    to be able to decide to perform this action. Likewise, Markup.ml seeks to
+    emit enough information for a hypothetical user agent to build up a
+    conforming DOM.
+
+    The XML parser seeks to be a non-validating implementation of the XML and
+    Namespaces in XML specifications.
+
+    This rest of this section lists known deviations from HTML5, XML, and
+    Namespaces in XML. Some of these deviations are meant to be corrected in
+    future versions of Markup.ml, while others will probably remain. The latter
+    satisfy some or all of the following properties:
+
+    - They require non-local adjustment, especially of past nodes. For example,
+      adjusting the start signal of the root node mid-way through the signal
+      stream is difficult for a one-pass parser.
+    - They are minor. Users implementing less than a conforming browser
+      typically don't care about them, and they typically have to do with
+      obscure error recovery.
+    - They can easily be corrected by code written over Markup.ml that builds up
+      a DOM or maintains other auxiliary data structures during parsing.
+
+    {3 To be corrected}
+
+    - XML: There is no attribute value normalization.
+    - HTML: The {e adoption agency algorithm} is not implemented, because it
+      requires non-local adjustments.
+    - HTML: {e foster parenting} is not implemented, because it requires
+      non-local adjustments.
+    - HTML: Quirks mode is not honored. This affects the interaction between
+      automatic closing of [p] elements and opening of [table] elements.
+    - HTML: The parser ignores the {e head element pointer}.
+    - HTML: The parser ignores the {e form element pointer}.
+    - HTML: The parser ignores interactions between [form] and [template].
+    - HTML: The form translation for [isindex] is completely ignored. [isindex]
+      is handled as an unknown element.
+
+    {3 To remain}
+
+    - HTML: Except when detecting encodings, the parser does not try to read
+      [<meta>] tags for encoding declarations. The user of Markup.ml should read
+      these, if necessary. They are part of the emitted signal stream.
+    - HTML: [noscript] elements are always parsed, as are [script] elements. For
+      conforming behavior, if the user of Markup.ml "supports scripts," the user
+      should serialize the content of [noscript] to a [`Text] signal using
+      [write_html].
+    - HTML: Elements such as [title] that belong in [head], but are found
+      between [head] and [body], are not moved into [head].
+    - HTML: [<html>] tags found in the body do not have their attributes added
+      to the [`Start_element "html"] signal emitted at the beginning of the
+      document.
+*)
diff --git a/test/test_encoding.ml b/test/test_encoding.ml
@@ -9,8 +9,6 @@ open Kstream
 open Stream_io
 open Encoding
 
-(* TODO Test exception pass-through. *)
-
 let ok = wrong_k "failed"
 
 let test_ucs_4 (f : Encoding.t) name s1 s2 bad_bytes =

diff --git a/test/test_html_parser.ml b/test/test_html_parser.ml
@@ -114,7 +114,6 @@ let tests = [
         1, 31, S  `End_element;
         1, 31, S  `End_element]);
 
-  (* TODO Document deviation for non-iframe srcdoc documents. *)
   ("html.parser.no-doctype" >:: fun _ ->
     expect ~prefix:true "<title>foo</title>"
       [ 1,  1, S (start_element "html");
@@ -357,7 +356,6 @@ let tests = [
         1, 18, S  `End_element;
         1, 18, S  `End_element]);
 
-  (* TODO It is strange that the <plaintext> tag always causes a parse error. *)
   ("html.parser.plaintext" >:: fun _ ->
     expect "<p><plaintext>foo</plaintext></p>"
       [ 1,  1, S (start_element "html");
@@ -413,7 +411,6 @@ let tests = [
         1, 40, S  `End_element;
         1, 40, S  `End_element]);
 
-  (* TODO Test condition in EOF case, likewise HTML case. *)
   ("html.parser.truncated-body" >:: fun _ ->
     expect "<body>"
       [ 1,  1, S (start_element "html");
@@ -459,8 +456,6 @@ let tests = [
         1, 22, S  `End_element;
         1, 29, S  `End_element]);
 
-  (* TODO Don't double-report errors on the same start tag. *)
-  (* TODO Change the location of implied start tags? *)
   ("html.parser.reconstruct-active-formatting-elements" >:: fun _ ->
     expect "<p><em><strong>foo<p>bar"
       [ 1,  1, S (start_element "html");
@@ -533,7 +528,6 @@ let tests = [
       [ 1,  1, E (`Bad_token ("U+0000", "foreign content", "null"));
         1,  1, S (`Text ["\xef\xbf\xbdfoo"])];
 
-    (* TODO Throttle `Bad_content. *)
     expect ~context:(Some (`Fragment "body")) "<table>\x00foo</table>"
       [ 1,  1, S (start_element "table");
         1,  8, E (`Bad_token ("U+0000", "table", "null"));

diff --git a/test/test_html_writer.ml b/test/test_html_writer.ml
@@ -5,10 +5,6 @@ open OUnit2
 open Test_support
 open Common
 
-(* TODO Test qnames for non-SVG,MathML,HTML elements. *)
-(* TODO Attribute qnames. *)
-(* TODO Test xmlns, xlink, and xml namespaces in the parser as well. *)
-
 let expect id signals strings =
   let _, iterate, ended = expect_strings id strings in
 
@@ -93,7 +89,6 @@ let tests = [
        S "id"; S "=\""; S "foo<>&quot;&amp;&nbsp;"; S "\""; S ">";
        S "</"; S "p"; S ">"]);
 
-  (* TODO Implement self-closing foreign elements. *)
   ("html.writer.foreign-element" >:: fun _ ->
     expect "foreign element"
       [`Start_element ((svg_ns, "use"), [(xlink_ns, "href"), "#foo"]);

diff --git a/test/test_support.ml b/test/test_support.ml
@@ -42,7 +42,6 @@ let expect_error :
     sprintf "no error\nexpected \"%s\"" (Error.to_string ~location:l error)
     |> assert_failure
 
-(* TODO Rename id to label. *)
 let expect_sequence ?(prefix = false) id to_string sequence =
   let assert_failure s = assert_failure (id ^ "\n" ^ s) in
 

diff --git a/test/test_xml_parser.ml b/test/test_xml_parser.ml
@@ -368,5 +368,3 @@ let tests = [
                    (xmlns_ns, "c"), "baz"]));
         1,  1, S  `End_element])
 ]
-
-(* TODO Test fragment argument. *)
diff --git a/test/test_xml_tokenizer.ml b/test/test_xml_tokenizer.ml
@@ -5,8 +5,6 @@ open OUnit2
 open Test_support
 open Common
 
-(* TODO Test exception pass-through in integration. *)
-
 let xml_decl version encoding standalone =
   `Xml {version; encoding; standalone}
 

diff --git a/test/test_xml_writer.ml b/test/test_xml_writer.ml
@@ -150,7 +150,6 @@ let tests = [
          S "xmlns:a"; S "=\""; S "other_ns"; S "\""; S "/>";
          S "</"; S "foo"; S ">"];
 
-      (* TODO Do this test in both orders. *)
       expect "shadowing resolution"
         [`Start_element (("", "foo"),
           [(xmlns_ns, "a"), "some_ns";
@@ -185,5 +184,3 @@ let tests = [
          S "<"; S "foo"; S " ";
          S "xmlns:a"; S "=\""; S "other_ns"; S "\""; S "/>"])
 ]
-
-(* TODO Ill-formed signal sequences. *)