Skip to content

Commit

Permalink
Fix naming
Browse files Browse the repository at this point in the history
  • Loading branch information
vinayak-mehta committed Dec 26, 2024
1 parent ccd5497 commit 86a78eb
Show file tree
Hide file tree
Showing 25 changed files with 289 additions and 443 deletions.
1 change: 0 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ max-line-length = 120
max-complexity = 10
docstring-convention = numpy
per-file-ignores = tests/*:B950,D100,D102,D103,D104,D401,D101,D200,S101,S106,F403,F405,F841
pypdf_table_extraction/*:D100,D103,D104,F401,W0611,C0114 docs/*:D100,D101
__init__.py:D100,D103,D104,F401
__version__.py:D100
__main__.py:D100,D103
Expand Down
8 changes: 4 additions & 4 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ labels: bug
assignees: ""
---

<!-- Please read the filing issues section of the contributor's guide first: https://pypdf-table-extraction.readthedocs.io/en/latest/dev/contributing.html#filing-issues -->
<!-- Please read the filing issues section of the contributor's guide first: https://camelot-py.readthedocs.io/en/latest/dev/contributing.html#filing-issues -->

**Describe the bug**

<!-- A clear and concise description of what the bug is. -->

**Steps to reproduce the bug**

<!-- Steps used to install `pypdf_table_extraction`:
<!-- Steps used to install `camelot`:
1. Add step here (you can add more steps too) -->

<!-- Steps to be used to reproduce behavior:
Expand All @@ -26,7 +26,7 @@ assignees: ""

**Code**

<!-- Add the pypdf_table_extraction code snippet that you used. -->
<!-- Add the camelot code snippet that you used. -->

```
Expand All @@ -48,7 +48,7 @@ assignees: ""
- Numpy version:
- OpenCV version:
- Ghostscript version:
- pypdf_table_extraction version:
- camelot version:

**Additional context**

Expand Down
2 changes: 1 addition & 1 deletion CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ representative at an online or offline event.
## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
https://github.com/camelot-dev/camelot/issues .
reported to the community leaders responsible for enforcement at
https://github.com/py-pdf/pypdf_table_extraction/issues .
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
Expand Down
3 changes: 1 addition & 2 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
MIT License

Copyright (c) 2024 pypdf_table_extraction Developers
Copyright (c) 2019-2023 Camelot Developers
Copyright (c) 2019-2024 Camelot Developers
Copyright (c) 2018-2019 Peeply Private Ltd (Singapore)

Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down
2 changes: 0 additions & 2 deletions camelot/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
"""Initialize pypdf_table_extraction, formerly known as Camelot."""

__all__ = ("main",)


Expand Down
2 changes: 1 addition & 1 deletion camelot/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""pypdf_table_extraction offers multiple backends to convert the PDFs to images so it can be analyzed by opencv."""
"""Camelot offers multiple backends to convert the PDFs to images so it can be analyzed by opencv."""

from .image_conversion import ImageConversionBackend
8 changes: 2 additions & 6 deletions camelot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,14 @@

import click


try:
import matplotlib.pyplot as plt
except ImportError:
_HAS_MPL = False
else:
_HAS_MPL = True

from . import __version__
from . import plot
from . import read_pdf

from . import __version__, plot, read_pdf

logger = logging.getLogger("camelot")
logger.setLevel(logging.INFO)
Expand Down Expand Up @@ -89,7 +85,7 @@ def set_config(self, key, value):
)
@click.pass_context
def cli(ctx, *args, **kwargs):
"""pypdf_table_extraction: PDF Table Extraction for Humans."""
"""Camelot: PDF Table Extraction for Humans."""
ctx.obj = Config()
for key, value in kwargs.items():
ctx.obj.set_config(key, value)
Expand Down
30 changes: 12 additions & 18 deletions camelot/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,20 @@
from pathlib import Path
from typing import Any

from pdfminer.layout import LTChar
from pdfminer.layout import LTImage
from pdfminer.layout import LTTextLineHorizontal
from pdfminer.layout import LTTextLineVertical
from pypdf import PdfReader
from pypdf import PdfWriter
from pdfminer.layout import LTChar, LTImage, LTTextLineHorizontal, LTTextLineVertical
from pypdf import PdfReader, PdfWriter
from pypdf._utils import StrByteType

from .core import TableList
from .parsers import Hybrid
from .parsers import Lattice
from .parsers import Network
from .parsers import Stream
from .utils import TemporaryDirectory
from .utils import download_url
from .utils import get_image_char_and_text_objects
from .utils import get_page_layout
from .utils import get_rotation
from .utils import is_url

from .parsers import Hybrid, Lattice, Network, Stream
from .utils import (
TemporaryDirectory,
download_url,
get_image_char_and_text_objects,
get_page_layout,
get_rotation,
is_url,
)

PARSERS = {
"lattice": Lattice,
Expand Down Expand Up @@ -219,7 +213,7 @@ def parse(
A dict of `pdfminer.layout.LAParams
<https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams>`_ kwargs.
kwargs : dict
See pypdf_table_extraction.read_pdf kwargs.
See camelot.read_pdf kwargs.
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion camelot/parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""pypdf_table_extraction offers multiple methods to parse pdfs and reconstruct the tables."""
"""Camelot offers multiple methods to parse pdfs and reconstruct the tables."""

from .hybrid import Hybrid
from .lattice import Lattice
Expand Down
14 changes: 8 additions & 6 deletions camelot/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
import pandas as pd

from ..core import Table
from ..utils import bbox_from_str
from ..utils import compute_accuracy
from ..utils import compute_whitespace
from ..utils import get_table_index
from ..utils import text_in_bbox
from ..utils import (
bbox_from_str,
compute_accuracy,
compute_whitespace,
get_table_index,
text_in_bbox,
)


class BaseParser:
Expand Down Expand Up @@ -123,7 +125,7 @@ def _document_has_no_text(self):
if self.images:
warnings.warn(
f"{rootname} is image-based, "
"pypdf_table_extraction only works on text-based pages.",
"camelot only works on text-based pages.",
stacklevel=1,
)
else:
Expand Down
37 changes: 16 additions & 21 deletions camelot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,30 @@
from itertools import groupby
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Callable
from typing import Any, Callable
from urllib.parse import urlparse as parse_url
from urllib.parse import uses_netloc
from urllib.parse import uses_params
from urllib.parse import uses_relative
from urllib.request import Request
from urllib.request import urlopen
from urllib.parse import uses_netloc, uses_params, uses_relative
from urllib.request import Request, urlopen

import numpy as np
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams
from pdfminer.layout import LTAnno
from pdfminer.layout import LTChar
from pdfminer.layout import LTContainer
from pdfminer.layout import LTImage
from pdfminer.layout import LTItem
from pdfminer.layout import LTTextLine
from pdfminer.layout import LTTextLineHorizontal
from pdfminer.layout import LTTextLineVertical
from pdfminer.layout import (
LAParams,
LTAnno,
LTChar,
LTContainer,
LTImage,
LTItem,
LTTextLine,
LTTextLineHorizontal,
LTTextLineVertical,
)
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFPageInterpreter
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfpage import PDFTextExtractionNotAllowed
from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager
from pdfminer.pdfpage import PDFPage, PDFTextExtractionNotAllowed
from pdfminer.pdfparser import PDFParser
from pypdf._utils import StrByteType


_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
_VALID_URLS.discard("")

Expand Down
Binary file added docs/_static/camelot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed docs/_static/pypdf-table-extraction.png
Binary file not shown.
24 changes: 12 additions & 12 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,45 @@
API Reference
=============

.. module:: pypdf_table_extraction
.. module:: camelot

Main Interface
--------------
.. autofunction:: pypdf_table_extraction.read_pdf
.. autofunction:: camelot.read_pdf

Lower-Level Classes
-------------------

.. autoclass:: pypdf_table_extraction.handlers.PDFHandler
.. autoclass:: camelot.handlers.PDFHandler
:inherited-members:

.. autoclass:: pypdf_table_extraction.parsers.Stream
.. autoclass:: camelot.parsers.Stream
:inherited-members:

.. autoclass:: pypdf_table_extraction.parsers.Lattice
.. autoclass:: camelot.parsers.Lattice
:inherited-members:

.. autoclass:: pypdf_table_extraction.parsers.Network
.. autoclass:: camelot.parsers.Network
:inherited-members:

.. autoclass:: pypdf_table_extraction.parsers.Hybrid
.. autoclass:: camelot.parsers.Hybrid
:inherited-members:

Lower-Lower-Level Classes
-------------------------

.. autoclass:: pypdf_table_extraction.core.TableList
.. autoclass:: camelot.core.TableList
:inherited-members:

.. autoclass:: pypdf_table_extraction.core.Table
.. autoclass:: camelot.core.Table
:inherited-members:

.. autoclass:: pypdf_table_extraction.core.Cell
.. autoclass:: camelot.core.Cell

Plotting
--------

.. autofunction:: pypdf_table_extraction.plot
.. autofunction:: camelot.plot

.. autoclass:: pypdf_table_extraction.plotting.PlotMethods
.. autoclass:: camelot.plotting.PlotMethods
:inherited-members:
29 changes: 14 additions & 15 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@

import camelot


# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# sys.path.insert(0, os.path.abspath('..'))

# Insert pypdf_table_extraction's path into the system.
# Insert camelot's path into the system.
sys.path.insert(0, os.path.abspath("../camelot"))
sys.path.insert(0, os.path.abspath("_themes"))

Expand Down Expand Up @@ -61,9 +60,9 @@
master_doc = "index"

# General information about the project.
project = "pypdf_table_extraction"
copyright = "2024, pypdf_table_extraction Developers"
author = "pypdf_table_extraction Developers"
project = "Camelot"
copyright = "2024, Camelot Developers"
author = "Camelot Developers"

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
Expand Down Expand Up @@ -137,7 +136,7 @@
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
"repository_url": "https://github.com/py-pdf/pypdf_table_extraction",
"repository_url": "https://github.com/camelot-dev/camelot",
"repository_branch": "main",
"path_to_docs": "/docs",
"use_repository_button": True,
Expand All @@ -159,7 +158,7 @@
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#
html_logo = "_static/pypdf-table-extraction.png"
html_logo = "_static/camelot.png"

# The name of an image file (relative to this directory) to use as a favicon of
# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
Expand Down Expand Up @@ -244,7 +243,7 @@
# html_search_scorer = 'scorer.js'

# Output file base name for HTML help builder.
htmlhelp_basename = "pypdf_table_extraction-doc"
htmlhelp_basename = "camelot-doc"

# -- Options for LaTeX output ---------------------------------------------

Expand All @@ -269,8 +268,8 @@
latex_documents = [
(
master_doc,
"pypdf-table-extraction.tex",
"pypdf-table-extraction Documentation",
"camelot.tex",
"Camelot Documentation",
"Vinayak Mehta",
"manual",
),
Expand Down Expand Up @@ -316,8 +315,8 @@
man_pages = [
(
master_doc,
"pypdf_table_extraction",
"pypdf_table_extraction Documentation",
"Camelot",
"Camelot Documentation",
[author],
1,
)
Expand All @@ -336,10 +335,10 @@
texinfo_documents = [
(
master_doc,
"pypdf_table_extraction",
"pypdf_table_extraction Documentation",
"Camelot",
"Camelot Documentation",
author,
"pypdf_table_extraction",
"Camelot",
"PDF Table Extraction for Humans.",
"Miscellaneous",
),
Expand Down
Loading

0 comments on commit 86a78eb

Please sign in to comment.