Skip to content

Commit

Permalink
Documentation errata.
Browse files Browse the repository at this point in the history
- Conformance status.
- OPAM instructions.
- Packaging.
- Fixed various typos.
  • Loading branch information
aantron committed Jan 15, 2016
1 parent 22351e2 commit 6bdec86
Show file tree
Hide file tree
Showing 13 changed files with 102 additions and 45 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ test/coverage
doc/markup.odocl
doc/html
doc/publish
doc/*.zip
opam
27 changes: 19 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
LIB := markup
VERSION := 0.5

if_package = ! ocamlfind query $(1) > /dev/null 2> /dev/null || ( $(2) )

Expand Down Expand Up @@ -118,13 +119,7 @@ docs-odocl :
PUBLISH := doc/publish

.PHONY : publish-docs
publish-docs : docs
@test $(OCAML_VERSION) -ne 402 \
|| (echo "\nocamldoc is broken in 4.02" && false)
@ocamlfind query lwt.unix > /dev/null 2> /dev/null \
|| (echo "\nLwt not installed" && false)
@ocamlfind query lambdasoup > /dev/null 2> /dev/null \
|| (echo "\nLambda Soup not installed" && false)
publish-docs : check-doc-prereqs docs
rm -rf $(PUBLISH)
mkdir -p $(PUBLISH)
cd $(PUBLISH) \
Expand All @@ -135,6 +130,22 @@ publish-docs : docs
&& git commit -m 'Markup.ml documentation.' \
&& git push -uf github master:gh-pages

DOC_ZIP := doc/$(LIB)-$(VERSION)-doc.zip

.PHONY : package-docs
package-docs : check-doc-prereqs docs
rm -f $(DOC_ZIP)
zip -9 $(DOC_ZIP) $(HTML)/*

.PHONY : check-doc-prereqs
check-doc-prereqs :
@test $(OCAML_VERSION) -ne 402 \
|| (echo "\nocamldoc is broken in 4.02" && false)
@ocamlfind query lwt.unix > /dev/null 2> /dev/null \
|| (echo "\nLwt not installed" && false)
@ocamlfind query lambdasoup > /dev/null 2> /dev/null \
|| (echo "\nLambda Soup not installed" && false)

need_package = \
ocamlfind query $(1) > /dev/null 2> /dev/null \
|| echo "Missing package '$(1)' (opam install $(2))"
Expand Down Expand Up @@ -193,5 +204,5 @@ uninstall :
.PHONY : clean
clean :
$(OCAMLBUILD) -clean
rm -rf bisect*.out $(COVERAGE) $(HTML) $(PUBLISH) opam
rm -rf bisect*.out $(COVERAGE) $(HTML) $(PUBLISH) $(DOC_ZIP) opam
cd $(DEP_TEST_DIR) && $(OCAMLBUILD) -clean
14 changes: 4 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,14 @@ let report =
count := !count + 1;
if !count >= 10 then raise_notrace Exit
string "some xml" |> parse_xml ~report |> drain
string "some xml" |> parse_xml ~report |> signals |> drain
(* Load HTML into a custom document tree data type. *)
type html = Text of string | Element of string * html list
file "some_file"
|> parse_html
|> signals
|> tree
~text:(fun ss -> Text (String.concat "" ss))
~element:(fun (_, name) _ children -> Element (name, children))
Expand Down Expand Up @@ -151,19 +152,12 @@ opam install lwt cohttp lambdasoup markup

## Installing

Until Markup.ml is added to OPAM, the easiest way to install it is by cloning
this repository, then running
Simply

```sh
make install
opam install markup
```

in the cloned directory. This will use OPAM to pin Markup.ml, install the
dependency Uutf, then build and install Markup.ml. If you want to use the module
`Markup_lwt`, check that Lwt is installed before installing Markup.ml.

To remove the pin later, run `make uninstall`.

## Documentation

The interface of Markup.ml is three modules [`Markup`][Markup],
Expand Down
3 changes: 2 additions & 1 deletion doc/postprocess.ml
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ let clean_up_content soup =
uncolor "constructor" "ASYNCHRONOUS";
uncolor "constructor" "Pervasives";
uncolor "constructor" "Lwt_io";
uncolor "keyword" "false"
uncolor "keyword" "false";
uncolor "keyword" "parser"

let add_with_type soup type_name =
let extra =
Expand Down
6 changes: 6 additions & 0 deletions doc/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,12 @@ h2 {
line-height: 1;
}

h3 {
font-size: 100%;
margin-top: 1.5em;
margin-bottom: 1.5em;
}

body > pre:first-of-type {
margin-top: 1.5em;
}
Expand Down
75 changes: 70 additions & 5 deletions src/markup.mli
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ val write_xml : signal stream -> char stream
Markup.ml is developed on {{:https://github.com/aantron/markup.ml} GitHub}
and distributed under the
{{:https://github.com/aantron/markup.ml/blob/master/doc/LICENSE}
BSD license}. This documentation is for version 0.5 of the library. *)
BSD license}. This documentation is for version 0.5 of the library.
Documentation for older versions can be found on the
{{: https://github.com/aantron/markup.ml/releases} releases page}. *)



Expand Down Expand Up @@ -193,7 +195,7 @@ sig
val decode :
?report:(location -> Error.t -> unit) -> t ->
(char, 's) stream -> (int, 's) stream
(** Applies a decoder to byte stream. Illegal input byte sequences result in
(** Applies a decoder to a byte stream. Illegal input byte sequences result in
calls to the error handler [~report] with error kind [`Decoding_error].
The illegal bytes are then skipped, and zero or more U+FFFD replacement
characters are emitted. The default handler ignores errors.
Expand Down Expand Up @@ -308,9 +310,9 @@ val signal_to_string : [< signal ] -> string
(** {2 Parsers} *)

type 's parser
(** A ['s parser] is a thin wrapper around a [(signal, 's) stream] that supports
access to additional information that is not carried directly in the stream,
such as source locations. *)
(** An ['s parser] is a thin wrapper around a [(signal, 's) stream] that
supports access to additional information that is not carried directly in
the stream, such as source locations. *)

val signals : 's parser -> (signal, 's) stream
(** Converts a parser to its underlying signal stream. *)
Expand Down Expand Up @@ -761,3 +763,66 @@ val kstream : ('a, _) stream -> 'a Kstream.t
val of_kstream : 'a Kstream.t -> ('a, _) stream

(**/**)



(** {2 Conformance status}
The HTML parser seeks to implement section 8 of the HTML5 specification.
That section describes a parser, part of a full-blown user agent, that is
building up a DOM representation of an HTML document. Markup.ml is neither
inherently part of a user agent, nor does it build up a DOM representation.
With respect to section 8 of HTML5, Markup.ml is concerned with only the
syntax. When that section requires that the user agent perform an action,
Markup.ml emits enough information for a hypothetical user agent based on it
to be able to decide to perform this action. Likewise, Markup.ml seeks to
emit enough information for a hypothetical user agent to build up a
conforming DOM.
The XML parser seeks to be a non-validating implementation of the XML and
Namespaces in XML specifications.
This rest of this section lists known deviations from HTML5, XML, and
Namespaces in XML. Some of these deviations are meant to be corrected in
future versions of Markup.ml, while others will probably remain. The latter
satisfy some or all of the following properties:
- They require non-local adjustment, especially of past nodes. For example,
adjusting the start signal of the root node mid-way through the signal
stream is difficult for a one-pass parser.
- They are minor. Users implementing less than a conforming browser
typically don't care about them, and they typically have to do with
obscure error recovery.
- They can easily be corrected by code written over Markup.ml that builds up
a DOM or maintains other auxiliary data structures during parsing.
{3 To be corrected}
- XML: There is no attribute value normalization.
- HTML: The {e adoption agency algorithm} is not implemented, because it
requires non-local adjustments.
- HTML: {e foster parenting} is not implemented, because it requires
non-local adjustments.
- HTML: Quirks mode is not honored. This affects the interaction between
automatic closing of [p] elements and opening of [table] elements.
- HTML: The parser ignores the {e head element pointer}.
- HTML: The parser ignores the {e form element pointer}.
- HTML: The parser ignores interactions between [form] and [template].
- HTML: The form translation for [isindex] is completely ignored. [isindex]
is handled as an unknown element.
{3 To remain}
- HTML: Except when detecting encodings, the parser does not try to read
[<meta>] tags for encoding declarations. The user of Markup.ml should read
these, if necessary. They are part of the emitted signal stream.
- HTML: [noscript] elements are always parsed, as are [script] elements. For
conforming behavior, if the user of Markup.ml "supports scripts," the user
should serialize the content of [noscript] to a [`Text] signal using
[write_html].
- HTML: Elements such as [title] that belong in [head], but are found
between [head] and [body], are not moved into [head].
- HTML: [<html>] tags found in the body do not have their attributes added
to the [`Start_element "html"] signal emitted at the beginning of the
document.
*)
2 changes: 0 additions & 2 deletions test/test_encoding.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ open Kstream
open Stream_io
open Encoding

(* TODO Test exception pass-through. *)

let ok = wrong_k "failed"

let test_ucs_4 (f : Encoding.t) name s1 s2 bad_bytes =
Expand Down
6 changes: 0 additions & 6 deletions test/test_html_parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ let tests = [
1, 31, S `End_element;
1, 31, S `End_element]);

(* TODO Document deviation for non-iframe srcdoc documents. *)
("html.parser.no-doctype" >:: fun _ ->
expect ~prefix:true "<title>foo</title>"
[ 1, 1, S (start_element "html");
Expand Down Expand Up @@ -357,7 +356,6 @@ let tests = [
1, 18, S `End_element;
1, 18, S `End_element]);

(* TODO It is strange that the <plaintext> tag always causes a parse error. *)
("html.parser.plaintext" >:: fun _ ->
expect "<p><plaintext>foo</plaintext></p>"
[ 1, 1, S (start_element "html");
Expand Down Expand Up @@ -413,7 +411,6 @@ let tests = [
1, 40, S `End_element;
1, 40, S `End_element]);

(* TODO Test condition in EOF case, likewise HTML case. *)
("html.parser.truncated-body" >:: fun _ ->
expect "<body>"
[ 1, 1, S (start_element "html");
Expand Down Expand Up @@ -459,8 +456,6 @@ let tests = [
1, 22, S `End_element;
1, 29, S `End_element]);

(* TODO Don't double-report errors on the same start tag. *)
(* TODO Change the location of implied start tags? *)
("html.parser.reconstruct-active-formatting-elements" >:: fun _ ->
expect "<p><em><strong>foo<p>bar"
[ 1, 1, S (start_element "html");
Expand Down Expand Up @@ -533,7 +528,6 @@ let tests = [
[ 1, 1, E (`Bad_token ("U+0000", "foreign content", "null"));
1, 1, S (`Text ["\xef\xbf\xbdfoo"])];

(* TODO Throttle `Bad_content. *)
expect ~context:(Some (`Fragment "body")) "<table>\x00foo</table>"
[ 1, 1, S (start_element "table");
1, 8, E (`Bad_token ("U+0000", "table", "null"));
Expand Down
5 changes: 0 additions & 5 deletions test/test_html_writer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@ open OUnit2
open Test_support
open Common

(* TODO Test qnames for non-SVG,MathML,HTML elements. *)
(* TODO Attribute qnames. *)
(* TODO Test xmlns, xlink, and xml namespaces in the parser as well. *)

let expect id signals strings =
let _, iterate, ended = expect_strings id strings in

Expand Down Expand Up @@ -93,7 +89,6 @@ let tests = [
S "id"; S "=\""; S "foo<>&quot;&amp;&nbsp;"; S "\""; S ">";
S "</"; S "p"; S ">"]);

(* TODO Implement self-closing foreign elements. *)
("html.writer.foreign-element" >:: fun _ ->
expect "foreign element"
[`Start_element ((svg_ns, "use"), [(xlink_ns, "href"), "#foo"]);
Expand Down
1 change: 0 additions & 1 deletion test/test_support.ml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ let expect_error :
sprintf "no error\nexpected \"%s\"" (Error.to_string ~location:l error)
|> assert_failure

(* TODO Rename id to label. *)
let expect_sequence ?(prefix = false) id to_string sequence =
let assert_failure s = assert_failure (id ^ "\n" ^ s) in

Expand Down
2 changes: 0 additions & 2 deletions test/test_xml_parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -368,5 +368,3 @@ let tests = [
(xmlns_ns, "c"), "baz"]));
1, 1, S `End_element])
]

(* TODO Test fragment argument. *)
2 changes: 0 additions & 2 deletions test/test_xml_tokenizer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ open OUnit2
open Test_support
open Common

(* TODO Test exception pass-through in integration. *)

let xml_decl version encoding standalone =
`Xml {version; encoding; standalone}

Expand Down
3 changes: 0 additions & 3 deletions test/test_xml_writer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ let tests = [
S "xmlns:a"; S "=\""; S "other_ns"; S "\""; S "/>";
S "</"; S "foo"; S ">"];

(* TODO Do this test in both orders. *)
expect "shadowing resolution"
[`Start_element (("", "foo"),
[(xmlns_ns, "a"), "some_ns";
Expand Down Expand Up @@ -185,5 +184,3 @@ let tests = [
S "<"; S "foo"; S " ";
S "xmlns:a"; S "=\""; S "other_ns"; S "\""; S "/>"])
]

(* TODO Ill-formed signal sequences. *)

0 comments on commit 6bdec86

Please sign in to comment.