diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..6e22451 --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,35 @@ +require: + - rubocop-performance + - rubocop-rake + +AllCops: + NewCops: enable + TargetRubyVersion: 3.0 + +Metrics/MethodLength: + Max: 30 + +Metrics/ClassLength: + Max: 200 + +Metrics/ModuleLength: + Max: 200 + +Metrics/BlockLength: + Max: 50 + +Gemspec/DevelopmentDependencies: + EnforcedStyle: gemspec + +# TODO: Enable these cops after fixing the issues +Metrics/CyclomaticComplexity: + Enabled: false + +Metrics/PerceivedComplexity: + Enabled: false + +Metrics/AbcSize: + Enabled: false + +Style/Documentation: + Enabled: false diff --git a/Gemfile b/Gemfile index fdc9cbb..bccc7f9 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,5 @@ +# frozen_string_literal: true + source 'https://rubygems.org' # Declare your gem's dependencies in whatever.gemspec. diff --git a/Gemfile.lock b/Gemfile.lock index b89805f..e4c0cd9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -57,6 +57,11 @@ GEM unicode-display_width (>= 2.4.0, < 3.0) rubocop-ast (1.31.3) parser (>= 3.3.1.0) + rubocop-performance (1.21.0) + rubocop (>= 1.48.1, < 2.0) + rubocop-ast (>= 1.31.1, < 2.0) + rubocop-rake (0.6.0) + rubocop (~> 1.0) ruby-progressbar (1.13.0) strscan (3.1.0) thor (0.20.3) @@ -73,6 +78,8 @@ DEPENDENCIES rspec rspec-collection_matchers rubocop + rubocop-performance + rubocop-rake BUNDLED WITH - 2.1.4 + 2.5.11 diff --git a/Rakefile b/Rakefile index f34a48c..cffdd09 100644 --- a/Rakefile +++ b/Rakefile @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'rspec/core/rake_task' RSpec::Core::RakeTask.new(:spec) diff --git a/html2text.gemspec b/html2text.gemspec index 9213100..4385634 100644 --- a/html2text.gemspec +++ b/html2text.gemspec @@ -1,4 +1,6 @@ -$:.push File.expand_path('lib', __dir__) +# frozen_string_literal: true + +$LOAD_PATH.push File.expand_path('lib', __dir__) # Maintain your gem's version: require 'html2text/version' @@ -13,9 +15,9 @@ Gem::Specification.new do |s| s.summary = 'Convert HTML into plain text.' s.description = 'A Ruby component to convert HTML into a plain text format.' s.license = 'MIT' + s.required_ruby_version = '>= 3.0' s.files = Dir['lib/**/*', 'LICENSE.md', 'README.md', 'CHANGELOG.md'] - s.test_files = Dir['spec/**/*'] s.add_dependency 'nokogiri', ['>= 1.0', '< 2.0'] @@ -25,4 +27,8 @@ Gem::Specification.new do |s| s.add_development_dependency 'rspec' s.add_development_dependency 'rspec-collection_matchers' s.add_development_dependency 'rubocop' + s.add_development_dependency 'rubocop-performance' + s.add_development_dependency 'rubocop-rake' + + s.metadata['rubygems_mfa_required'] = 'true' end diff --git a/lib/html2text.rb b/lib/html2text.rb index 7abc7d3..a332387 100644 --- a/lib/html2text.rb +++ b/lib/html2text.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'nokogiri' class Html2Text @@ -10,7 +12,7 @@ def initialize(doc) def self.convert(html) html = html.to_s - if is_office_document?(html) + if office_document?(html) # Emulate the CSS rendering of Office documents html = html.gsub('

', '
') .gsub(' ', '
') @@ -58,15 +60,15 @@ def remove_leading_and_trailing_whitespace(text) end end - output.join('') + output.join end - private - - def self.is_office_document?(text) + private_class_method def self.office_document?(text) text.include?('urn:schemas-microsoft-com:office') end + private + def remove_unnecessary_empty_lines(text) text.gsub(/\n\n\n*/im, "\n\n") end @@ -99,7 +101,7 @@ def iterate_over(node) end output << suffix_whitespace(node) - output = output.compact.join('') || '' + output = output.compact.join || '' unless node.name.nil? if node.name.downcase == 'a' @@ -112,6 +114,7 @@ def iterate_over(node) output end + # rubocop:disable Lint/DuplicateBranch def prefix_whitespace(node) case node.name.downcase when 'hr' @@ -140,7 +143,9 @@ def prefix_whitespace(node) '- ' end end + # rubocop:enable Lint/DuplicateBranch + # rubocop:disable Lint/DuplicateBranch def suffix_whitespace(node) case node.name.downcase when 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' @@ -164,6 +169,7 @@ def suffix_whitespace(node) end end end + # rubocop:enable Lint/DuplicateBranch # links are returned in [text](link) format def wrap_link(node, output) @@ -208,9 +214,9 @@ def wrap_link(node, output) def image_text(node) if node.attribute('title') - '[' + node.attribute('title').to_s + ']' + "[#{node.attribute('title')}]" elsif node.attribute('alt') - '[' + node.attribute('alt').to_s + ']' + "[#{node.attribute('alt')}]" else '' end @@ -224,7 +230,7 @@ def next_node_name(node) next_node = next_node.next_sibling end - return unless next_node && next_node.element? + return unless next_node&.element? next_node.name.downcase end @@ -241,7 +247,7 @@ def previous_node_name(node) previous_node = previous_node.previous_sibling end - return unless previous_node && previous_node.element? + return unless previous_node&.element? previous_node.name.downcase end diff --git a/lib/html2text/version.rb b/lib/html2text/version.rb index 464f127..3a6a6a4 100644 --- a/lib/html2text/version.rb +++ b/lib/html2text/version.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + class Html2Text VERSION = '0.3.1' end diff --git a/spec/examples_spec.rb b/spec/examples_spec.rb index c0e0010..0330eeb 100644 --- a/spec/examples_spec.rb +++ b/spec/examples_spec.rb @@ -1,13 +1,15 @@ +# frozen_string_literal: true + require 'spec_helper' describe Html2Text do describe '#convert' do let(:text) { Html2Text.convert(html) } - examples = Dir[File.dirname(__FILE__) + '/examples/*.html'] + examples = Dir["#{File.dirname(__FILE__)}/examples/*.html"] examples.each do |filename| - context "#{filename}" do + context filename.to_s do let(:html) { File.read(filename) } let(:text_file) { filename.sub('.html', '.txt') } let(:expected) { Html2Text.fix_newlines(File.read(text_file)) } @@ -18,11 +20,7 @@ it 'converts to text' do # Write the output if it failed, for easier comparison - unless text.eql?(expected) - File.open(filename.sub('.html', '.output'), 'w') do |fp| - fp.write(text) - end - end + File.write(filename.sub('.html', '.output'), text) unless text.eql?(expected) # Quick check, don't try to generate a 500kb+ diff, # which can halt the rspec for minutes+ diff --git a/spec/html2text_spec.rb b/spec/html2text_spec.rb index 1d28b25..516d64d 100644 --- a/spec/html2text_spec.rb +++ b/spec/html2text_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'spec_helper' describe Html2Text do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index a7cdbb6..e2acb5a 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'rspec' require 'rspec/collection_matchers'