Skip to content

Commit

Permalink
Fix remaining issues
Browse files Browse the repository at this point in the history
  • Loading branch information
mscrivo committed Jun 7, 2024
1 parent 2588cf5 commit 6363861
Show file tree
Hide file tree
Showing 10 changed files with 82 additions and 20 deletions.
35 changes: 35 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
require:
- rubocop-performance
- rubocop-rake

AllCops:
NewCops: enable
TargetRubyVersion: 3.0

Metrics/MethodLength:
Max: 30

Metrics/ClassLength:
Max: 200

Metrics/ModuleLength:
Max: 200

Metrics/BlockLength:
Max: 50

Gemspec/DevelopmentDependencies:
EnforcedStyle: gemspec

# TODO: Enable these cops after fixing the issues
Metrics/CyclomaticComplexity:
Enabled: false

Metrics/PerceivedComplexity:
Enabled: false

Metrics/AbcSize:
Enabled: false

Style/Documentation:
Enabled: false
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

source 'https://rubygems.org'

# Declare your gem's dependencies in whatever.gemspec.
Expand Down
9 changes: 8 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ GEM
unicode-display_width (>= 2.4.0, < 3.0)
rubocop-ast (1.31.3)
parser (>= 3.3.1.0)
rubocop-performance (1.21.0)
rubocop (>= 1.48.1, < 2.0)
rubocop-ast (>= 1.31.1, < 2.0)
rubocop-rake (0.6.0)
rubocop (~> 1.0)
ruby-progressbar (1.13.0)
strscan (3.1.0)
thor (0.20.3)
Expand All @@ -73,6 +78,8 @@ DEPENDENCIES
rspec
rspec-collection_matchers
rubocop
rubocop-performance
rubocop-rake

BUNDLED WITH
2.1.4
2.5.11
2 changes: 2 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

require 'rspec/core/rake_task'

RSpec::Core::RakeTask.new(:spec)
Expand Down
10 changes: 8 additions & 2 deletions html2text.gemspec
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
$:.push File.expand_path('lib', __dir__)
# frozen_string_literal: true

$LOAD_PATH.push File.expand_path('lib', __dir__)

# Maintain your gem's version:
require 'html2text/version'
Expand All @@ -13,9 +15,9 @@ Gem::Specification.new do |s|
s.summary = 'Convert HTML into plain text.'
s.description = 'A Ruby component to convert HTML into a plain text format.'
s.license = 'MIT'
s.required_ruby_version = '>= 3.0'

s.files = Dir['lib/**/*', 'LICENSE.md', 'README.md', 'CHANGELOG.md']
s.test_files = Dir['spec/**/*']

s.add_dependency 'nokogiri', ['>= 1.0', '< 2.0']

Expand All @@ -25,4 +27,8 @@ Gem::Specification.new do |s|
s.add_development_dependency 'rspec'
s.add_development_dependency 'rspec-collection_matchers'
s.add_development_dependency 'rubocop'
s.add_development_dependency 'rubocop-performance'
s.add_development_dependency 'rubocop-rake'

s.metadata['rubygems_mfa_required'] = 'true'
end
26 changes: 16 additions & 10 deletions lib/html2text.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

require 'nokogiri'

class Html2Text
Expand All @@ -10,7 +12,7 @@ def initialize(doc)
def self.convert(html)
html = html.to_s

if is_office_document?(html)
if office_document?(html)
# Emulate the CSS rendering of Office documents
html = html.gsub('<p class=MsoNormal>', '<br>')
.gsub('<o:p>&nbsp;</o:p>', '<br>')
Expand Down Expand Up @@ -58,15 +60,15 @@ def remove_leading_and_trailing_whitespace(text)
end
end

output.join('')
output.join
end

private

def self.is_office_document?(text)
private_class_method def self.office_document?(text)
text.include?('urn:schemas-microsoft-com:office')
end

private

def remove_unnecessary_empty_lines(text)
text.gsub(/\n\n\n*/im, "\n\n")
end
Expand Down Expand Up @@ -99,7 +101,7 @@ def iterate_over(node)
end
output << suffix_whitespace(node)

output = output.compact.join('') || ''
output = output.compact.join || ''

unless node.name.nil?
if node.name.downcase == 'a'
Expand All @@ -112,6 +114,7 @@ def iterate_over(node)
output
end

# rubocop:disable Lint/DuplicateBranch
def prefix_whitespace(node)
case node.name.downcase
when 'hr'
Expand Down Expand Up @@ -140,7 +143,9 @@ def prefix_whitespace(node)
'- '
end
end
# rubocop:enable Lint/DuplicateBranch

# rubocop:disable Lint/DuplicateBranch
def suffix_whitespace(node)
case node.name.downcase
when 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'
Expand All @@ -164,6 +169,7 @@ def suffix_whitespace(node)
end
end
end
# rubocop:enable Lint/DuplicateBranch

# links are returned in [text](link) format
def wrap_link(node, output)
Expand Down Expand Up @@ -208,9 +214,9 @@ def wrap_link(node, output)

def image_text(node)
if node.attribute('title')
'[' + node.attribute('title').to_s + ']'
"[#{node.attribute('title')}]"
elsif node.attribute('alt')
'[' + node.attribute('alt').to_s + ']'
"[#{node.attribute('alt')}]"
else
''
end
Expand All @@ -224,7 +230,7 @@ def next_node_name(node)
next_node = next_node.next_sibling
end

return unless next_node && next_node.element?
return unless next_node&.element?

next_node.name.downcase
end
Expand All @@ -241,7 +247,7 @@ def previous_node_name(node)
previous_node = previous_node.previous_sibling
end

return unless previous_node && previous_node.element?
return unless previous_node&.element?

previous_node.name.downcase
end
Expand Down
2 changes: 2 additions & 0 deletions lib/html2text/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

class Html2Text
VERSION = '0.3.1'
end
12 changes: 5 additions & 7 deletions spec/examples_spec.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# frozen_string_literal: true

require 'spec_helper'

describe Html2Text do
describe '#convert' do
let(:text) { Html2Text.convert(html) }

examples = Dir[File.dirname(__FILE__) + '/examples/*.html']
examples = Dir["#{File.dirname(__FILE__)}/examples/*.html"]

examples.each do |filename|
context "#{filename}" do
context filename.to_s do
let(:html) { File.read(filename) }
let(:text_file) { filename.sub('.html', '.txt') }
let(:expected) { Html2Text.fix_newlines(File.read(text_file)) }
Expand All @@ -18,11 +20,7 @@

it 'converts to text' do
# Write the output if it failed, for easier comparison
unless text.eql?(expected)
File.open(filename.sub('.html', '.output'), 'w') do |fp|
fp.write(text)
end
end
File.write(filename.sub('.html', '.output'), text) unless text.eql?(expected)

# Quick check, don't try to generate a 500kb+ diff,
# which can halt the rspec for minutes+
Expand Down
2 changes: 2 additions & 0 deletions spec/html2text_spec.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

require 'spec_helper'

describe Html2Text do
Expand Down
2 changes: 2 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

require 'rspec'
require 'rspec/collection_matchers'

Expand Down

0 comments on commit 6363861

Please sign in to comment.