From 04492710b2bb542f88ea96d3f188a7ac216c95d2 Mon Sep 17 00:00:00 2001 From: Kevin Hendricks Date: Thu, 12 Feb 2015 14:42:29 -0500 Subject: [PATCH] Remove well_formed.cc, update Makefile.am, one parser.c fix --- .gitignore | 10 ++++++ Makefile.am | 3 +- examples/well_formed.cc | 73 ----------------------------------------- src/parser.c | 3 +- 4 files changed, 12 insertions(+), 77 deletions(-) delete mode 100644 examples/well_formed.cc diff --git a/.gitignore b/.gitignore index 020d6813..90672468 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,16 @@ *.swo *.swn +#emacs editor leftovers +*.*~ + +#diff leftovers +*.orig + +# gtest pieces +gtest +gtest-1.7.0 + # Other build artifacts /Debug /visualc/Debug diff --git a/Makefile.am b/Makefile.am index b8922b09..59cc2bb1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -97,7 +97,7 @@ gumbo_test_DEPENDENCIES += check-local gumbo_test_LDADD += gtest/lib/libgtest.la gtest/lib/libgtest_main.la endif -noinst_PROGRAMS = clean_text find_links get_title positions_of_class benchmark serialize prettyprint well_formed +noinst_PROGRAMS = clean_text find_links get_title positions_of_class benchmark serialize prettyprint LDADD = libgumbo.la AM_CPPFLAGS = -I"$(srcdir)/src" @@ -108,4 +108,3 @@ positions_of_class_SOURCES = examples/positions_of_class.cc benchmark_SOURCES = benchmarks/benchmark.cc serialize_SOURCES = examples/serialize.cc prettyprint_SOURCES = examples/prettyprint.cc -well_formed_SOURCES = examples/well_formed.cc diff --git a/examples/well_formed.cc b/examples/well_formed.cc deleted file mode 100644 index bcf4cc8c..00000000 --- a/examples/well_formed.cc +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2015 Kevin B. Hendricks, Stratford, Ontario, All Rights Reserved. -// loosely based on a greatly simplified version of BeautifulSoup4 decode() routine -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Author: Kevin Hendricks -// - -#include -#include -#include -#include - -#include "gumbo.h" -#include "error.h" -#include "parser.h" -#include "string_buffer.h" - -int main(int argc, char** argv) { - if (argc != 2) { - std::cout << "well_formed \n"; - exit(EXIT_FAILURE); - } - const char* filename = argv[1]; - - std::ifstream in(filename, std::ios::in | std::ios::binary); - if (!in) { - std::cout << "File " << filename << " not found!\n"; - exit(EXIT_FAILURE); - } - - std::string contents; - in.seekg(0, std::ios::end); - contents.resize(in.tellg()); - in.seekg(0, std::ios::beg); - in.read(&contents[0], contents.size()); - in.close(); - - fprintf(stdout, "%s", contents.c_str()); - - GumboOptions options = kGumboDefaultOptions; - options.stop_on_first_error = false; - - GumboOutput* output = gumbo_parse_with_options(&options, contents.data(), contents.length()); - - GumboParser parser; - parser._options = &options; - const GumboVector* errors = &output->errors; - for (int i=0; i< errors->length; ++i) { - GumboError* er = static_cast(errors->data[i]); - unsigned int linenum = er->position.line; - unsigned int colnum = er->position.column; - unsigned int typenum = er->type; - GumboStringBuffer text; - gumbo_string_buffer_init(&parser, &text); - gumbo_error_to_string(&parser, er, &text); - std::string errmsg(text.data, text.length); - fprintf(stdout, "line: %d col: %d type %d %s\n", linenum, colnum, typenum, errmsg.c_str()); - gumbo_string_buffer_destroy(&parser, &text); - gumbo_print_caret_diagnostic(&parser, er, contents.c_str()); - } - gumbo_destroy_output(&options, output); -} diff --git a/src/parser.c b/src/parser.c index a4507556..004639dc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1322,12 +1322,11 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) { // Like has_an_element_in_scope, but restricts the expected qualified name to a // range of possible qualified names instead of just a single one. static bool has_an_element_in_scope_with_tagname(GumboParser* parser, gumbo_tagset qualset) { - bool found = has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET), + return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) }); - return found; } // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope