From b92b682604714e061de55d89b27bddca441b45c1 Mon Sep 17 00:00:00 2001 From: Sangjoon Bob Lee Date: Fri, 3 Jan 2025 20:06:12 -0500 Subject: [PATCH 1/9] chore: re-cookiecut, add authors, etc --- .codecov.yml | 14 + .codespell/ignore_lines.txt | 2 + .codespell/ignore_words.txt | 11 + .github/ISSUE_TEMPLATE/release_checklist.md | 44 +++- .github/pull_request_template.md | 22 -- .../workflows/build-wheel-release-upload.yml | 10 +- .github/workflows/check-news-item.yml | 6 +- .../matrix-and-codecov-on-merge-to-main.yml | 21 ++ .github/workflows/publish-docs-on-release.yml | 12 + .github/workflows/tests-on-pr.yml | 42 +-- .gitignore | 102 +++----- .isort.cfg | 5 + AUTHORS.rst | 10 + MANIFEST.in | 10 +- environment.yml | 6 + paper.bib | 241 ------------------ paper.md | 183 ------------- 17 files changed, 174 insertions(+), 567 deletions(-) create mode 100644 .codecov.yml create mode 100644 .codespell/ignore_lines.txt create mode 100644 .codespell/ignore_words.txt delete mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/matrix-and-codecov-on-merge-to-main.yml create mode 100644 .github/workflows/publish-docs-on-release.yml create mode 100644 .isort.cfg create mode 100644 AUTHORS.rst create mode 100644 environment.yml delete mode 100644 paper.bib delete mode 100644 paper.md diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..4af5eb2 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,14 @@ +coverage: + status: + project: # more options at https://docs.codecov.com/docs/commit-status + default: + target: auto # use the coverage from the base commit, fail if coverage is lower + threshold: 0% # allow the coverage to drop by + +comment: + layout: " diff, flags, files" + behavior: default + require_changes: false + require_base: false # [true :: must have a base report to post] + require_head: false # [true :: must have a head report to post] + hide_project_coverage: false # [true :: only show coverage on the git diff aka patch coverage] diff --git a/.codespell/ignore_lines.txt b/.codespell/ignore_lines.txt new file mode 100644 index 0000000..07fa7c8 --- /dev/null +++ b/.codespell/ignore_lines.txt @@ -0,0 +1,2 @@ +;; Please include filenames and explanations for each ignored line. +;; See https://docs.openverse.org/meta/codespell.html for docs. diff --git a/.codespell/ignore_words.txt b/.codespell/ignore_words.txt new file mode 100644 index 0000000..9757d7c --- /dev/null +++ b/.codespell/ignore_words.txt @@ -0,0 +1,11 @@ +;; Please include explanations for each ignored word (lowercase). +;; See https://docs.openverse.org/meta/codespell.html for docs. + +;; abbreviation for "materials" often used in a journal title +mater + +;; alternative use of socioeconomic +socio-economic + +;; Frobenius norm used in np.linalg.norm +fro diff --git a/.github/ISSUE_TEMPLATE/release_checklist.md b/.github/ISSUE_TEMPLATE/release_checklist.md index c93803d..fa94779 100644 --- a/.github/ISSUE_TEMPLATE/release_checklist.md +++ b/.github/ISSUE_TEMPLATE/release_checklist.md @@ -6,15 +6,41 @@ labels: "release" assignees: "" --- -### Release checklist for GitHub contributors +### PyPI/GitHub rc-release preparation checklist: - [ ] All PRs/issues attached to the release are merged. - [ ] All the badges on the README are passing. -- [ ] Locally rendered documentation contains all appropriate pages, including - API references (check no modules are missing), tutorials, and other human - written text is up-to-date with any changes in the code. -- [ ] Installation instructions in the README, documentation and on the website - are updated and tested -- [ ] Successfully run any tutorial examples or do functional testing in some - other way. -- [ ] Grammar and writing quality have been checked (no typos). +- [ ] License information is verified as correct. If you are unsure, please comment below. +- [ ] Locally rendered documentation contains all appropriate pages, including API references (check no modules are + missing), tutorials, and other human-written text is up-to-date with any changes in the code. +- [ ] Installation instructions in the README, documentation, and the website (e.g., diffpy.org) are updated. +- [ ] Successfully run any tutorial examples or do functional testing with the latest Python version. +- [ ] Grammar and writing quality are checked (no typos). +- [ ] Install `pip install build twine`, run `python -m build` and `twine check dist/*` to ensure that the package can be built and is correctly formatted for PyPI release. + +Please mention @sbillinge here when you are ready for PyPI/GitHub release. Include any additional comments necessary, such as version information and details about the pre-release here: + +### PyPI/GitHub full-release preparation checklist: + +- [ ] Create a new conda environment and install the rc from PyPI (`pip install ==??`) +- [ ] License information on PyPI is correct. +- [ ] Docs are deployed successfully to `https://www.diffpy.org/`. +- [ ] Successfully run all tests, tutorial examples or do functional testing. + +Please let @sbillinge know that all checks are done and the package is ready for full release. + +### conda-forge release preparation checklist: + + + +- [ ] Ensure that the full release has appeared on PyPI successfully. +- [ ] New package dependencies listed in `conda.txt` and `test.txt` are added to `meta.yaml` in the feedstock. +- [ ] Close any open issues on the feedstock. Reach out to @bobleesj if you have questions. +- [ ] Tag @sbillinge and @bobleesj for conda-forge release. + +### Post-release checklist + + + +- [ ] Run tutorial examples and conduct functional testing using the installation guide in the README. Attach screenshots/results as comments. +- [ ] Documentation (README, tutorials, API references, and websites) is deployed without broken links or missing figures. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md deleted file mode 100644 index 249700b..0000000 --- a/.github/pull_request_template.md +++ /dev/null @@ -1,22 +0,0 @@ -## What type of PR is this? (check all applicable) - -- [ ] Refactor -- [ ] Feature -- [ ] Bug Fix -- [ ] Optimization -- [ ] Documentation Update - -## Description (Screenshots, files, etc) - -## Checklist - -- [ ] Are the tests passing? -- [ ] If it's a new feature, have tests been written? -- [ ] Have you added the `.rst` news file? - -## Added to documentation? - -- [ ] README.md -- [ ] Official documentation -- [ ] Google Codelab -- [ ] No documentation needed diff --git a/.github/workflows/build-wheel-release-upload.yml b/.github/workflows/build-wheel-release-upload.yml index c773346..88c2d0f 100644 --- a/.github/workflows/build-wheel-release-upload.yml +++ b/.github/workflows/build-wheel-release-upload.yml @@ -1,18 +1,18 @@ -name: Release (GitHub/PyPI) +name: Release (GitHub/PyPI) and Deploy Docs on: workflow_dispatch: push: tags: - - '*' + - '*' # Trigger on all tags initially, but tag and release privilege are verified in _build-wheel-release-upload.yml jobs: release: - permissions: - contents: write - uses: bobleesj/release-scripts/.github/workflows/_build-wheel-release-upload.yml@v0 + uses: Billingegroup/release-scripts/.github/workflows/_build-wheel-release-upload.yml@v0 with: project: cifkit + c_extension: false + github_admin_username: bobleesj secrets: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} PAT_TOKEN: ${{ secrets.PAT_TOKEN }} diff --git a/.github/workflows/check-news-item.yml b/.github/workflows/check-news-item.yml index a58322a..20b162c 100644 --- a/.github/workflows/check-news-item.yml +++ b/.github/workflows/check-news-item.yml @@ -2,9 +2,11 @@ name: Check for News on: pull_request_target: + branches: + - main jobs: - build: - uses: Billingegroup/release-scripts/.github/workflows/_check-news-item.yml@{{ VERSION/v0 }} + check-news-item: + uses: Billingegroup/release-scripts/.github/workflows/_check-news-item.yml@v0 with: project: cifkit diff --git a/.github/workflows/matrix-and-codecov-on-merge-to-main.yml b/.github/workflows/matrix-and-codecov-on-merge-to-main.yml new file mode 100644 index 0000000..abdd53e --- /dev/null +++ b/.github/workflows/matrix-and-codecov-on-merge-to-main.yml @@ -0,0 +1,21 @@ +name: CI + +on: + push: + branches: + - main + release: + types: + - prereleased + - published + workflow_dispatch: + +jobs: + matrix-coverage: + uses: Billingegroup/release-scripts/.github/workflows/_matrix-and-codecov-on-merge-to-main.yml@v0 + with: + project: cifkit + c_extension: false + headless: false + secrets: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/publish-docs-on-release.yml b/.github/workflows/publish-docs-on-release.yml new file mode 100644 index 0000000..1b9d578 --- /dev/null +++ b/.github/workflows/publish-docs-on-release.yml @@ -0,0 +1,12 @@ +name: Deploy Documentation on Release + +on: + workflow_dispatch: + +jobs: + docs: + uses: Billingegroup/release-scripts/.github/workflows/_publish-docs-on-release.yml@v0 + with: + project: cifkit + c_extension: false + headless: false diff --git a/.github/workflows/tests-on-pr.yml b/.github/workflows/tests-on-pr.yml index 3b523ec..778132b 100644 --- a/.github/workflows/tests-on-pr.yml +++ b/.github/workflows/tests-on-pr.yml @@ -1,40 +1,18 @@ -name: CI +name: Tests on PR on: push: branches: - main pull_request: + workflow_dispatch: jobs: - build-linux: - runs-on: ubuntu-latest - strategy: - max-parallel: 5 - matrix: - python-version: ["3.10", "3.11", "3.12"] - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Create virtual environment and install dependencies - run: | - python -m venv venv - source venv/bin/activate - pip install . - - - name: Test with pytest and generate coverage report - run: | - source venv/bin/activate - pip install pytest pytest-cov - python -m pytest -m "not pyvista" --cov=./ --cov-report=xml - - - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4.0.1 - with: - token: ${{ secrets.CODECOV_TOKEN }} + tests-on-pr: + uses: Billingegroup/release-scripts/.github/workflows/_tests-on-pr.yml@v0 + with: + project: cifkit + c_extension: false + headless: false + secrets: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.gitignore b/.gitignore index 15085fb..d418364 100644 --- a/.gitignore +++ b/.gitignore @@ -2,21 +2,13 @@ __pycache__/ *.py[cod] *$py.class -.DS_Store - - -# Test file -test.ipynb # C extensions *.so -nodemon.json -debug/ -.ruff_cache - # Distribution / packaging .Python +env/ build/ develop-eggs/ dist/ @@ -28,12 +20,14 @@ lib64/ parts/ sdist/ var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ +venv/ *.egg-info/ .installed.cfg *.egg +bin/ +temp/ +tags/ +errors.err # PyInstaller # Usually these files are written by a python script from a template @@ -44,89 +38,55 @@ share/python-wheels/ # Installer logs pip-log.txt pip-delete-this-directory.txt +MANIFEST # Unit test / coverage reports htmlcov/ .tox/ -.nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml -*.cover -*.py,cover +*,cover .hypothesis/ -.pytest_cache/ -cover/ # Translations *.mo *.pot +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + # Django stuff: *.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy # Sphinx documentation -docs/_build/ +docs/build/ +docs/source/generated/ + +# pytest +.pytest_cache/ # PyBuilder target/ -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +# Editor files +# mac +.DS_Store +*~ -# pytype static type analyzer -.pytype/ +# vim +*.swp +*.swo -# Cython debug symbols -cython_debug/ +# pycharm +.idea/ -# VS Code settings +# VSCode .vscode/ + +# Ipython Notebook +.ipynb_checkpoints diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..7ce0fb1 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,5 @@ +[settings] +# Keep import statement below line_length character limit +line_length = 115 +multi_line_output = 3 +include_trailing_comma = True diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 0000000..19bc24f --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,10 @@ +Authors +======= + +Billinge Group and community contributors. + +Contributors +------------ + +For a list of contributors, visit +https://github.com/bobleesj/cifkit/graphs/contributors diff --git a/MANIFEST.in b/MANIFEST.in index 83f4018..f1a78ee 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,5 +2,11 @@ graft src graft tests graft requirements -recursive-exclude src *.pyc *.pyo __pycache__/ -recursive-exclude tests *.pyc *.pyo __pycache__/ +include AUTHORS.rst LICENSE*.rst README.rst + +# Exclude all bytecode files and __pycache__ directories +global-exclude *.py[cod] # Exclude all .pyc, .pyo, and .pyd files. +global-exclude .DS_Store # Exclude Mac filesystem artifacts. +global-exclude __pycache__ # Exclude Python cache directories. +global-exclude .git* # Exclude git files and directories. +global-exclude .idea # Exclude PyCharm project settings. diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..4e053a2 --- /dev/null +++ b/environment.yml @@ -0,0 +1,6 @@ +name: cifkit +channels: + - conda-forge +dependencies: + - python=3 + - pip diff --git a/paper.bib b/paper.bib deleted file mode 100644 index 6b9dfdc..0000000 --- a/paper.bib +++ /dev/null @@ -1,241 +0,0 @@ - -@article{tyvanchuk_crystal_2024, - title = {The crystal and electronic structure of \textit{{RE}}23Co6.7In20.3 (\textit{{RE}} = {Gd}–{Tm}, {Lu}): A new structure type based on intergrowth of {AlB}2- and {CsCl}-type related slabs}, - volume = {976}, - issn = {0925-8388}, - url = {https://www.sciencedirect.com/science/article/pii/S0925838823045449}, - doi = {10.1016/j.jallcom.2023.173241}, - shorttitle = {The crystal and electronic structure of \textit{{RE}}23Co6.7In20.3 (\textit{{RE}} = {Gd}–{Tm}, {Lu})}, - abstract = {New ternary rare-earth indides {RE}23Co6.7In20.3 ({RE} = {Gd}–{Tm}, {Lu}) have been synthesized by arc-melting the elements under argon and subsequent annealing at 870 K for 1200 h. Single-crystal X-ray diffraction revealed Er23Co6.7In20.3 to crystallize in a new structure type in {oP}100, space group Pbam and Wyckoff sequence h11g13da with a = 23.203(5), b = 28.399(5), c = 3.5306(6) Å. The crystal structures of {RE}23Co6.7In20.3 ({RE} = Tb, Ho, Er and Tm) were determined from single crystal and powder X-ray diffraction data and further investigated by {DFT} methods. The compounds belong to a large family of ternary rare-earth indides with intergrowth of the {AlB}2- and {CsCl}-type related slabs. In the Er23Co6.7In20.3 structure, four types of fragments {REIn} and {RET} of {CsCl}-type, as well as {RET}2 and {REIn}2 of {AlB}2-type, are present simultaneously. A simple Python tool was developed to determine the coordination number for each crystallographic site with various methods and tested on the complex structure of {RE}23Co6.7In20.3.}, - pages = {173241}, - journaltitle = {Journal of Alloys and Compounds}, - shortjournal = {Journal of Alloys and Compounds}, - author = {Tyvanchuk, Yuriy and Babizhetskyy, Volodymyr and Baran, Stanisław and Szytuła, Andrzej and Smetana, Volodymyr and Lee, Sangjoon and Oliynyk, Anton O. and Mudring, Anja-Verena}, - urldate = {2024-08-29}, - date = {2024-03-05}, - keywords = {Bonding, Electronic structure, Indide, Intermetallics, Rare earth}, - file = {ScienceDirect Snapshot:/Users/imac/Zotero/storage/I9T7BI7X/S0925838823045449.html:text/html}, -} - -@article{lee_machine_2024, - title = {Machine learning descriptors in materials chemistry used in multiple experimentally validated studies: Oliynyk elemental property dataset}, - volume = {53}, - issn = {2352-3409}, - url = {https://www.data-in-brief.com/article/S2352-3409(24)00149-5/fulltext}, - doi = {10.1016/j.dib.2024.110178}, - shorttitle = {Machine learning descriptors in materials chemistry used in multiple experimentally validated studies}, - journaltitle = {Data in Brief}, - shortjournal = {Data in Brief}, - author = {Lee, Sangjoon and Chen, Clio and Garcia, Griheydi and Oliynyk, Anton}, - urldate = {2024-08-29}, - date = {2024-04-01}, - note = {Publisher: Elsevier}, - keywords = {Feature engineering, Machine learning, Materials chemistry, Materials informatics}, - file = {Full Text PDF:/Users/imac/Zotero/storage/LT3CRPZS/Lee et al. - 2024 - Machine learning descriptors in materials chemistr.pdf:application/pdf}, -} - -@article{barua_interpretable_2024, - title = {Interpretable Machine Learning Model on Thermal Conductivity Using Publicly Available Datasets and Our Internal Lab Dataset}, - volume = {36}, - issn = {0897-4756}, - url = {https://doi.org/10.1021/acs.chemmater.4c01696}, - doi = {10.1021/acs.chemmater.4c01696}, - abstract = {Machine learning ({ML}), a subdiscipline of artificial intelligence studies, has gained importance in predicting or suggesting efficient thermoelectric materials. Previous {ML} studies have used different literature sources or density functional theory calculations as input. In this work, we develop a {ML} pipeline trained with multivariable inputs on a massive public dataset of ∼200,000 data utilizing a high-performance computing cluster to predict the thermal conductivity (κ) using four test sets: three publicly available datasets and a dataset built using previously published data from our own group. By taking advantage of this massive dataset, our model presents an opportunity to further expand the understanding of the selection of features with various thermoelectric materials. Among the several supervised {ML} models implemented, the {eXtreme} Gradient Boosting algorithm ({XGBoost}) turned out to be the best method during the 5-fold cross-validation method, with their averaged evaluation coefficients of R2 = 0.96, root mean squared error ({RMSE}) = 0.38 W m−1K−1, and mean absolute error ({MAE}) = 0.23 W m−1K−1. Additionally, with the aid of feature selection and importance analysis, useful chemical features were chosen that ultimately led to reasonably good accuracy in the series of test sets measured as per the evaluation coefficients of R2, {RMSE}, and {MAE}, with values ranging from 0.72 to 0.89, 0.52 to 1.08, and 0.40 to 0.66 W m−1K−1, respectively. Checking the worst outliers led to the discovery of some errors in the literature. Postmodel prediction, the {SHapley} Additive {exPlanations} ({SHAP}) algorithm was implemented on the {XGBoost} model to analyze the features that were the key drivers for the model’s decisions. Overall, the developed interpretable methodology produces the prediction of κ of a large variety of materials through the influence of chemical and physical property features. The conclusions drawn apply to the research and applications of thermoelectric and heat insulation materials.}, - pages = {7089--7100}, - number = {14}, - journaltitle = {Chemistry of Materials}, - shortjournal = {Chem. Mater.}, - author = {Barua, Nikhil K. and Hall, Evan and Cheng, Yifei and Oliynyk, Anton O. and Kleinke, Holger}, - urldate = {2024-08-29}, - date = {2024-07-23}, - note = {Publisher: American Chemical Society}, - file = {Full Text PDF:/Users/imac/Zotero/storage/UQ6UBCJS/Barua et al. - 2024 - Interpretable Machine Learning Model on Thermal Co.pdf:application/pdf}, -} - -@article{larsen_atomic_2017, - title = {The atomic simulation environment—a Python library for working with atoms}, - volume = {29}, - issn = {0953-8984}, - url = {https://dx.doi.org/10.1088/1361-648X/aa680e}, - doi = {10.1088/1361-648X/aa680e}, - abstract = {The atomic simulation environment ({ASE}) is a software package written in the Python programming language with the aim of setting up, steering, and analyzing atomistic simulations. In {ASE}, tasks are fully scripted in Python. The powerful syntax of Python combined with the {NumPy} array library make it possible to perform very complex simulation tasks. For example, a sequence of calculations may be performed with the use of a simple ‘for-loop’ construction. Calculations of energy, forces, stresses and other quantities are performed through interfaces to many external electronic structure codes or force fields using a uniform interface. On top of this calculator interface, {ASE} provides modules for performing many standard simulation tasks such as structure optimization, molecular dynamics, handling of constraints and performing nudged elastic band calculations.}, - pages = {273002}, - number = {27}, - journaltitle = {Journal of Physics: Condensed Matter}, - shortjournal = {J. Phys.: Condens. Matter}, - author = {Larsen, Ask Hjorth and Mortensen, Jens Jørgen and Blomqvist, Jakob and Castelli, Ivano E. and Christensen, Rune and Dułak, Marcin and Friis, Jesper and Groves, Michael N. and Hammer, Bjørk and Hargus, Cory and Hermes, Eric D. and Jennings, Paul C. and Jensen, Peter Bjerre and Kermode, James and Kitchin, John R. and Kolsbjerg, Esben Leonhard and Kubal, Joseph and Kaasbjerg, Kristen and Lysgaard, Steen and Maronsson, Jón Bergmann and Maxson, Tristan and Olsen, Thomas and Pastewka, Lars and Peterson, Andrew and Rostgaard, Carsten and Schiøtz, Jakob and Schütt, Ole and Strange, Mikkel and Thygesen, Kristian S. and Vegge, Tejs and Vilhelmsen, Lasse and Walter, Michael and Zeng, Zhenhua and Jacobsen, Karsten W.}, - urldate = {2024-08-29}, - date = {2017-06}, - langid = {english}, - note = {Publisher: {IOP} Publishing}, - file = {IOP Full Text PDF:/Users/imac/Zotero/storage/R2HBZEV6/Larsen et al. - 2017 - The atomic simulation environment—a Python library.pdf:application/pdf}, -} - -@article{hall_crystallographic_1991, - title = {The crystallographic information file ({CIF}): a new standard archive file for crystallography}, - volume = {47}, - issn = {1600-5724}, - url = {https://onlinelibrary.wiley.com/doi/abs/10.1107/S010876739101067X}, - doi = {10.1107/S010876739101067X}, - shorttitle = {The crystallographic information file ({CIF})}, - pages = {655--685}, - number = {6}, - journaltitle = {Acta Crystallographica Section A}, - author = {Hall, S. R. and Allen, F. H. and Brown, I. D.}, - urldate = {2024-08-29}, - date = {1991}, - langid = {english}, - note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1107/S010876739101067X}, - file = {Full Text PDF:/Users/imac/Zotero/storage/QU4JZMZE/Hall et al. - 1991 - The crystallographic information file (CIF) a new.pdf:application/pdf;Snapshot:/Users/imac/Zotero/storage/Z8KVTFL6/S010876739101067X.html:text/html}, -} - -@article{ong_python_2013, - title = {Python Materials Genomics (pymatgen): A robust, open-source python library for materials analysis}, - volume = {68}, - issn = {0927-0256}, - url = {https://www.sciencedirect.com/science/article/pii/S0927025612006295}, - doi = {10.1016/j.commatsci.2012.10.028}, - shorttitle = {Python Materials Genomics (pymatgen)}, - abstract = {We present the Python Materials Genomics (pymatgen) library, a robust, open-source Python library for materials analysis. A key enabler in high-throughput computational materials science efforts is a robust set of software tools to perform initial setup for the calculations (e.g., generation of structures and necessary input files) and post-calculation analysis to derive useful material properties from raw calculated data. The pymatgen library aims to meet these needs by (1) defining core Python objects for materials data representation, (2) providing a well-tested set of structure and thermodynamic analyses relevant to many applications, and (3) establishing an open platform for researchers to collaboratively develop sophisticated analyses of materials data obtained both from first principles calculations and experiments. The pymatgen library also provides convenient tools to obtain useful materials data via the Materials Project’s {REpresentational} State Transfer ({REST}) Application Programming Interface ({API}). As an example, using pymatgen’s interface to the Materials Project’s {RESTful} {API} and phasediagram package, we demonstrate how the phase and electrochemical stability of a recently synthesized material, Li4SnS4, can be analyzed using a minimum of computing resources. We find that Li4SnS4 is a stable phase in the Li–Sn–S phase diagram (consistent with the fact that it can be synthesized), but the narrow range of lithium chemical potentials for which it is predicted to be stable would suggest that it is not intrinsically stable against typical electrodes used in lithium-ion batteries.}, - pages = {314--319}, - journaltitle = {Computational Materials Science}, - shortjournal = {Computational Materials Science}, - author = {Ong, Shyue Ping and Richards, William Davidson and Jain, Anubhav and Hautier, Geoffroy and Kocher, Michael and Cholia, Shreyas and Gunter, Dan and Chevrier, Vincent L. and Persson, Kristin A. and Ceder, Gerbrand}, - urldate = {2024-08-29}, - date = {2013-02-01}, - keywords = {Design, High-throughput, Materials, Project, Thermodynamics}, - file = {Full Text:/Users/imac/Zotero/storage/B8ALJEE7/Ong et al. - 2013 - Python Materials Genomics (pymatgen) A robust, op.pdf:application/pdf;ScienceDirect Snapshot:/Users/imac/Zotero/storage/QMNM7QY4/S0927025612006295.html:text/html}, -} - -@article{waroquiers_chemenv_2020, - title = {{ChemEnv}: a fast and robust coordination environment identification tool}, - volume = {76}, - issn = {2052-5206}, - url = {https://journals.iucr.org/b/issues/2020/04/00/lo5066/}, - doi = {10.1107/S2052520620007994}, - shorttitle = {{ChemEnv}}, - abstract = {Coordination or local environments have been used to describe, analyze and understand crystal structures for more than a century. Here, a new tool called {ChemEnv}, which can identify coordination environments in a fast and robust manner, is presented. In contrast to previous tools, the assessment of the coordination environments is not biased by small distortions of the crystal structure. Its robust and fast implementation enables the analysis of large databases of structures. The code is available open source within the pymatgen package and the software can also be used through a web app available on http://crystaltoolkit.org through the Materials Project.}, - pages = {683--695}, - number = {4}, - journaltitle = {Acta Crystallographica Section B: Structural Science, Crystal Engineering and Materials}, - shortjournal = {Acta Cryst B}, - author = {Waroquiers, D. and George, J. and Horton, M. and Schenk, S. and Persson, K. A. and Rignanese, G.-M. and Gonze, X. and Hautier, G.}, - urldate = {2024-10-28}, - date = {2020-08-01}, - langid = {english}, - note = {Publisher: International Union of Crystallography}, - file = {Full Text PDF:/Users/imac/Zotero/storage/F3I5JFSQ/Waroquiers et al. - 2020 - ChemEnv a fast and robust coordination environmen.pdf:application/pdf}, -} - -@article{sullivan_pyvista_2019, - title = {{PyVista}: 3D plotting and mesh analysis through a streamlined interface for the Visualization Toolkit ({VTK})}, - volume = {4}, - issn = {2475-9066}, - url = {https://joss.theoj.org/papers/10.21105/joss.01450}, - doi = {10.21105/joss.01450}, - shorttitle = {{PyVista}}, - abstract = {Sullivan et al., (2019). {PyVista}: 3D plotting and mesh analysis through a streamlined interface for the Visualization Toolkit ({VTK}). Journal of Open Source Software, 4(37), 1450, https://doi.org/10.21105/joss.01450}, - pages = {1450}, - number = {37}, - journaltitle = {Journal of Open Source Software}, - author = {Sullivan, C. Bane and Kaszynski, Alexander A.}, - urldate = {2024-10-28}, - date = {2019-05-19}, - langid = {english}, - file = {Full Text PDF:/Users/imac/Zotero/storage/L2H42VVR/Sullivan and Kaszynski - 2019 - PyVista 3D plotting and mesh analysis through a s.pdf:application/pdf}, -} - -@article{wojdyr_gemmi_2022, - title = {{GEMMI}: A library for structural biology}, - volume = {7}, - rights = {http://creativecommons.org/licenses/by/4.0/}, - issn = {2475-9066}, - url = {https://joss.theoj.org/papers/10.21105/joss.04200}, - doi = {10.21105/joss.04200}, - shorttitle = {{GEMMI}}, - abstract = {{GEMMI} is a cross-platform library, accompanied by a set of small programs, developed primarily for use in the field of macromolecular crystallography ({MX}). Parts of this library are useful also in structural bioinformatics and in chemical crystallography.}, - pages = {4200}, - number = {73}, - journaltitle = {Journal of Open Source Software}, - shortjournal = {{JOSS}}, - author = {Wojdyr, Marcin}, - urldate = {2024-10-28}, - date = {2022-05-04}, - langid = {english}, - file = {Wojdyr - 2022 - GEMMI A library for structural biology.pdf:/Users/imac/Zotero/storage/UBQA3VQV/Wojdyr - 2022 - GEMMI A library for structural biology.pdf:application/pdf}, -} - -@article{harris_array_2020, - title = {Array programming with {NumPy}}, - volume = {585}, - rights = {2020 The Author(s)}, - issn = {1476-4687}, - url = {https://www.nature.com/articles/s41586-020-2649-2}, - doi = {10.1038/s41586-020-2649-2}, - abstract = {Array programming provides a powerful, compact and expressive syntax for accessing, manipulating and operating on data in vectors, matrices and higher-dimensional arrays. {NumPy} is the primary array programming library for the Python language. It has an essential role in research analysis pipelines in fields as diverse as physics, chemistry, astronomy, geoscience, biology, psychology, materials science, engineering, finance and economics. For example, in astronomy, {NumPy} was an important part of the software stack used in the discovery of gravitational waves1 and in the first imaging of a black hole2. Here we review how a few fundamental array concepts lead to a simple and powerful programming paradigm for organizing, exploring and analysing scientific data. {NumPy} is the foundation upon which the scientific Python ecosystem is constructed. It is so pervasive that several projects, targeting audiences with specialized needs, have developed their own {NumPy}-like interfaces and array objects. Owing to its central position in the ecosystem, {NumPy} increasingly acts as an interoperability layer between such array computation libraries and, together with its application programming interface ({API}), provides a flexible framework to support the next decade of scientific and industrial analysis.}, - pages = {357--362}, - number = {7825}, - journaltitle = {Nature}, - author = {Harris, Charles R. and Millman, K. Jarrod and van der Walt, Stéfan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and van Kerkwijk, Marten H. and Brett, Matthew and Haldane, Allan and del Río, Jaime Fernández and Wiebe, Mark and Peterson, Pearu and Gérard-Marchant, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.}, - urldate = {2024-10-28}, - date = {2020-09}, - langid = {english}, - note = {Publisher: Nature Publishing Group}, - keywords = {Computational neuroscience, Computational science, Computer science, Software, Solar physics}, - file = {Full Text PDF:/Users/imac/Zotero/storage/KEQSGBA9/Harris et al. - 2020 - Array programming with NumPy.pdf:application/pdf}, -} - -@article{virtanen_scipy_2020, - title = {{SciPy} 1.0: fundamental algorithms for scientific computing in Python}, - volume = {17}, - rights = {2020 The Author(s)}, - issn = {1548-7105}, - url = {https://www.nature.com/articles/s41592-019-0686-2}, - doi = {10.1038/s41592-019-0686-2}, - shorttitle = {{SciPy} 1.0}, - abstract = {{SciPy} is an open-source scientific computing library for the Python programming language. Since its initial release in 2001, {SciPy} has become a de facto standard for leveraging scientific algorithms in Python, with over 600 unique code contributors, thousands of dependent packages, over 100,000 dependent repositories and millions of downloads per year. In this work, we provide an overview of the capabilities and development practices of {SciPy} 1.0 and highlight some recent technical developments.}, - pages = {261--272}, - number = {3}, - journaltitle = {Nature Methods}, - shortjournal = {Nat Methods}, - author = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and Haberland, Matt and Reddy, Tyler and Cournapeau, David and Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and Bright, Jonathan and van der Walt, Stéfan J. and Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and Kern, Robert and Larson, Eric and Carey, C. J. and Polat, İlhan and Feng, Yu and Moore, Eric W. and {VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and Harris, Charles R. and Archibald, Anne M. and Ribeiro, Antônio H. and Pedregosa, Fabian and van Mulbregt, Paul}, - urldate = {2024-10-28}, - date = {2020-03}, - langid = {english}, - note = {Publisher: Nature Publishing Group}, - keywords = {Biophysical chemistry, Computational biology and bioinformatics, Technology}, - file = {Full Text PDF:/Users/imac/Zotero/storage/LQLV3NQJ/Virtanen et al. - 2020 - SciPy 1.0 fundamental algorithms for scientific c.pdf:application/pdf}, -} - -@article{hunter_matplotlib_2007, - title = {Matplotlib: A 2D Graphics Environment}, - volume = {9}, - issn = {1558-366X}, - url = {https://ieeexplore.ieee.org/document/4160265}, - doi = {10.1109/MCSE.2007.55}, - shorttitle = {Matplotlib}, - abstract = {Matplotlib is a 2D graphics package used for Python for application development, interactive scripting,and publication-quality image generation across user interfaces and operating systems}, - pages = {90--95}, - number = {3}, - journaltitle = {Computing in Science \& Engineering}, - author = {Hunter, John D.}, - urldate = {2024-10-28}, - date = {2007-05}, - note = {Conference Name: Computing in Science \& Engineering}, - keywords = {application development, Computer languages, Equations, Graphical user interfaces, Graphics, Image generation, Interpolation, Operating systems, Packaging, Programming profession, Python, scientific programming, scripting languages, User interfaces}, - file = {IEEE Xplore Abstract Record:/Users/imac/Zotero/storage/HBZMAHB8/4160265.html:text/html}, -} - -@misc{jaffal_composition_2024, - title = {Composition and structure analyzer/featurizer for explainable machine-learning models to predict solid state structures}, - url = {https://chemrxiv.org/engage/chemrxiv/article-details/670aa269cec5d6c142f3b11a}, - doi = {10.26434/chemrxiv-2024-rrbhc}, - abstract = {Traditional and non-classical machine learning models for solid-state structure prediction have predominantly relied on compositional features (derived from properties of constituent elements) to predict the existence of structure and its properties. However, the lack of structural information can be a source of suboptimal property mapping and increased predictive uncertainty. To address the challenge, we introduce a strategy that generates and combines both compositional and structural features with minimal programming expertise required. Our approach utilizes open-source, interactive Python programs named Composition Analyzer Featurizer ({CAF}) and Structure Analyzer Featurizer ({SAF}). {CAF} generates numerical compositional features from a list of formulas provided in an Excel file, while {SAF} extracts numerical structural features from a .cif file by generating a supercell. 133 features from {CAF} and 94 features from {SAF} were used either individually or in combination to cluster nine structure types in equiatomic {AB} intermetallics. The performance was comparable to those with features state-of-the art featurizers in advanced machine learning models. Our {SAF}+{CAF} features provided a cost-efficient and reliable solution, even with the {PLS}-{DA} method, where a significant fraction of the most contributing features were the same as those identified in the more computationally intensive {XGBoost} models.}, - publisher = {{ChemRxiv}}, - author = {Jaffal, Emil and Lee, Sangjoon and Shiryaev, Danila and Vtorov, Alex and Barua, Nikhil and Kleinke, Holger and Oliynyk, Anton}, - urldate = {2024-10-28}, - date = {2024-10-15}, - langid = {english}, - keywords = {crystal structure, feature engineering, machine learning, materials infomatics, software}, - file = {Full Text PDF:/Users/imac/Zotero/storage/XTFPKIMX/Jaffal et al. - 2024 - Composition and structure analyzerfeaturizer for .pdf:application/pdf}, -} diff --git a/paper.md b/paper.md deleted file mode 100644 index 7a707cd..0000000 --- a/paper.md +++ /dev/null @@ -1,183 +0,0 @@ ---- -title: - "cifkit: A Python package for coordination geometry and atomic site analysis" -tags: - - Python - - CIF - - crystallography - - materials science - - solid state chemistry - - crystal structure - - machine learning - -authors: - - name: Sangjoon Lee - orcid: 0000-0002-2367-3932 - corresponding: true - affiliation: 1 - - name: Anton O. Oliynyk - orcid: 0000-0003-0732-7340 - affiliation: "2, 3" -affiliations: - - name: - Department of Applied Physics and Applied Mathematics, Columbia - University, New York, NY 10027, United States - index: 1 - - name: - Department of Chemistry, Hunter College, City University of New York, New - York, NY 10065, United States - index: 2 - - name: - Ph.D. Program in Chemistry, The Graduate Center of the City University of - New York, New York, NY 10016, United States - index: 3 -date: 29 August 2024 -bibliography: paper.bib ---- - -# Summary - -`cifkit` provides higher-level functions and properties for coordination -geometry and atomic site analysis from .cif files, which are standard file -formats for storing crystallographic data such as atomic fractional coordinates, -symmetry operations, and unit cell dimensions. Designed for functionalities -demanded by experimental synthesists, cifkit has been used as a backend for -Python applications that automate crystal structure analysis, enabling the -extraction of physics-based features crucial for understanding geometric -configurations and identifying irregularities. `cifkit` offers functions such as -plotting a coordination geometry-based polyhedron from each site, calculating bond -fractions, determining atomic mixing information, and sorting .cif files based -on a set of attributes. - -# Statement of need - -In solid-state chemistry and materials science, the Crystallographic Information -File (CIF) [@hall_crystallographic_1991] is the primary file format for storing -and distributing crystal structure information. Open-source Python packages for -reading, editing, and creating CIF files include Python Materials Genomics -(pymatgen) [@ong_python_2013] and the Atomic Simulation Environment (ASE) -[@larsen_atomic_2017]. Pymatgen offers advanced functionalities such as -generating electronic structure properties, phase diagrams, and implementing -coordination environment identification through ChemEnv -[@waroquiers_chemenv_2020]. ASE provides a comprehensive suite of tools for -generating and running atomistic simulations. - -`cifkit` distinguishes itself from existing libraries by offering higher-level -functions and variables that allow solid-state synthesists to obtain intuitive -and measurable properties of interest. It facilitates the visualization -of coordination geometry from each site using four coordination determination -methods and extracts physics-based features like volume and packing efficiency, -which are crucial for structural analysis in machine learning tasks. Moreover, -`cifkit` extracts atomic mixing information at the bond pair level, tasks that -would otherwise require extensive manual effort using GUI-based tools like -VESTA, Diamond, and CrystalMaker. These functions can be further developed -on-demand, as demonstrated by `cifkit`'s ability to extract coordination -geometry information based on four coordination number determination methods for -a newly discovered phase [@tyvanchuk_crystal_2024]. - -`cifkit` further enhances its utility by providing functions for sorting, -preprocessing, and analyzing the distribution of underlying CIF files. It -systematically addresses common issues in CIF files from databases, such as -incorrect loop values and missing fractional coordinates, by standardizing and -filtering out ill-formatted files. The package also preprocesses atomic site -labels, transforming labels such as 'M1' into 'Fe1' in files with atomic mixing for -enhanced visualization and pattern matching. Beyond preprocessing, `cifkit` -offers functionalities to copy, move, and sort files based on attributes such as -coordination numbers, space groups, unit cells, and shortest distances. It -excels in visualizing and cataloging CIF files, organizing them by supercell -size, tags, coordination numbers, elements, and atomic mixing. - -# Examples - -`cifkit` is designed to minimize reliance on API documentation for users with -limited programming experience and no background in computational materials -science or chemistry. By simplifying user interactions while maintaining robust -functionality, `cifkit` enables a broader range of scientists to leverage -computational tools for complex tasks—such as extracting geometry-based -polyhedra descriptors from atomic sites. As shown in Figure 1, `cifkit` provides -a higher-level function to visualize the atomic site coordination geometry from a single .cif file -using the `Cif` object. It also provides an overview of multiple .cif files through the the `CifEnsemble` object. -The full installation process can be executed via a Jupyter notebook, accessible through the Google Colab URL -provided in the official documentation. - -![Atomic site coordination geometry from a single .cif file (left) and distribution of coordination numbers obtained from an ensemble of .cif files (right)](docs/assets/img/ErCoIn-histogram-combined.png) - -```python ->>> from cifkit import Cif, Example - -# Initialize with the .cif file path ->>> cif = Cif(Example.Er10Co9In20_file_path) - -# Plot polyhedron from the site element of In1 ->>> cif.plot_polyhedron("In1") - -# Atomic mixing information ->>> cif.site_mixing_type # full occupancy, full_occupancy_atomic_mixing, etc. - -# Determine coordination numbers based on four methods: ->>> cif.CN_max_gap_per_site -{ - "In1": { - "dist_by_shortest_dist": {"max_gap": 0.306, "CN": 14}, - "dist_by_CIF_radius_sum": {"max_gap": 0.39, "CN": 14}, - "dist_by_CIF_radius_refined_sum": {"max_gap": 0.341, "CN": 12}, - "dist_by_Pauling_radius_sum": {"max_gap": 0.398, "CN": 14}, - }, - ... - "Rh2": { - "dist_by_shortest_dist": {"max_gap": 0.31, "CN": 9}, - "dist_by_CIF_radius_sum": {"max_gap": 0.324, "CN": 9}, - "dist_by_CIF_radius_refined_sum": {"max_gap": 0.397, "CN": 9}, - "dist_by_Pauling_radius_sum": {"max_gap": 0.380, "CN": 9}, - }, -} -``` - -For processing a large number of .cif files, you may use `CifEnsemble`: - -```python ->>> from cifkit import CifEnsemble, Example - -# Initialize with the folder path containing .cif files ->>> ensemble = CifEnsemble(Example.ErCoIn_big_folder_path) - -# Filter .cif by formula(s) ->>> ensemble.filter_by_formulas(["LaRu2Ge2"]) - -# Filter .cif by site mixing type(s) ->>> ensemble.filter_by_site_mixing_types(["deficiency_without_atomic_mixing"]) - -# Filter .cif by coordination number(s) ->>> ensemble.filter_by_CN_min_dist_method_containing([14]) -``` - -# Applications - -`cifkit` has been used for research conducted at academic and national -laboratories for crystal structure analysis and machine learning studies. CIF -Bond Analyzer (CBA) utilizes `cifkit` to extract coordination geometry -information for a newly discovered phase [@tyvanchuk_crystal_2024]. The -Structure Analysis/Featurizer (SAF) employs `cifkit` to construct and extract -physics-based geometric features for binary and ternary compounds -[@jaffal_composition_2024]. Furthermore, geometric features generated with -`cifkit` are being incorporated into a follow-up study on thermoelectric -materials [@barua_interpretable_2024], building upon the compositional -properties explored in [@lee_machine_2024]. - -# Acknowledgement - -We acknowledge the initial testing done by Nishant Yadav, Siddha Sankalpa Sethi, -and Arnab Dutta from the Indian Institute of Technology, Kharagpur. We also -thank Emil Jaffal, Danila Shiryaev, and Alex Vtorov from CUNY Hunter College for -their testing efforts. We acknowledge Fabian Zills for his recommendations on -Python tooling. - -We thank the developers of the following dependencies: - -- gemmi [@wojdyr_gemmi_2022]: .cif parsing and space group operations -- matplotlib [@hunter_matplotlib_2007]: visualization of histograms -- numpy [@harris_array_2020]: angle conversion, linear algebra -- pyvista [@sullivan_pyvista_2019]: visualization of polyhedra -- scipy [@virtanen_scipy_2020]: minimization function to refine of CIF radius - -# References From 939ddbccba46c5791a2a7adfe77b79addc379db4 Mon Sep 17 00:00:00 2001 From: Sangjoon Bob Lee Date: Fri, 3 Jan 2025 20:23:22 -0500 Subject: [PATCH 2/9] fix: apply pre-commit with v0.1.0 scikit-packge --- .codespell/ignore_words.txt | 9 ++ .github/ISSUE_TEMPLATE/release_checklist.md | 40 ++++-- .../workflows/build-wheel-release-upload.yml | 2 +- .github/workflows/check-news-item.yml | 2 +- .isort.cfg | 2 +- .pre-commit-config.yaml | 43 ++++-- .prettierrc | 4 +- CODE_OF_CONDUCT.rst | 133 ++++++++++++++++++ README.md | 127 ----------------- mkdocs.yml | 2 +- pyproject.toml | 48 ++++--- requirements/build.txt | 0 src/cifkit/__init__.py | 27 +++- src/cifkit/coordination/composition.py | 4 +- src/cifkit/coordination/connection.py | 4 +- src/cifkit/coordination/filter.py | 8 +- src/cifkit/coordination/geometry.py | 17 +-- src/cifkit/coordination/method.py | 19 +-- src/cifkit/coordination/site.py | 4 +- src/cifkit/data/mendeleeve_handler.py | 5 +- src/cifkit/data/radius.py | 3 +- src/cifkit/data/radius_handler.py | 24 +--- src/cifkit/data/radius_optimization.py | 12 +- src/cifkit/figures/histogram.py | 12 +- src/cifkit/figures/polyhedron.py | 8 +- src/cifkit/models/cif.py | 79 ++++------- src/cifkit/models/cif_ensemble.py | 95 ++++--------- src/cifkit/occupancy/mixing.py | 17 +-- src/cifkit/preprocessors/environment.py | 12 +- src/cifkit/preprocessors/environment_util.py | 4 +- src/cifkit/preprocessors/error.py | 8 +- src/cifkit/preprocessors/format.py | 61 +++----- src/cifkit/preprocessors/supercell.py | 4 +- src/cifkit/utils/bond_pair.py | 3 +- src/cifkit/utils/cif_editor.py | 7 +- src/cifkit/utils/cif_parser.py | 56 +++----- src/cifkit/utils/error_messages.py | 24 +--- src/cifkit/utils/folder.py | 28 ++-- src/cifkit/utils/formula.py | 7 +- src/cifkit/utils/sort.py | 3 +- src/cifkit/utils/string_parser.py | 7 +- src/cifkit/utils/unit.py | 7 +- src/cifkit/version.py | 22 +++ test-cifkit.ipynb | 55 -------- tests/conftest.py | 4 +- tests/core/coordination/test_composition.py | 9 +- tests/core/coordination/test_filter.py | 4 +- tests/core/coordination/test_geometry.py | 4 +- tests/core/coordination/test_site.py | 4 +- tests/core/data/test_radius_handler.py | 4 +- tests/core/models/test_cif.py | 13 +- tests/core/models/test_cif_ensemble.py | 31 +--- tests/core/occupancy/test_mixing.py | 16 +-- tests/core/preprocessors/test_environment.py | 6 +- tests/core/preprocessors/test_format.py | 12 +- tests/core/preprocessors/test_supercell.py | 4 +- tests/core/util/test_cif_parser.py | 35 ++--- tests/core/util/test_folder.py | 8 +- tests/core/util/test_random.py | 9 +- tests/core/util/test_unit.py | 4 +- tests/test_version.py | 10 ++ 61 files changed, 498 insertions(+), 737 deletions(-) create mode 100644 CODE_OF_CONDUCT.rst delete mode 100644 README.md create mode 100644 requirements/build.txt delete mode 100644 test-cifkit.ipynb create mode 100644 tests/test_version.py diff --git a/.codespell/ignore_words.txt b/.codespell/ignore_words.txt index 9757d7c..f5df16a 100644 --- a/.codespell/ignore_words.txt +++ b/.codespell/ignore_words.txt @@ -9,3 +9,12 @@ socio-economic ;; Frobenius norm used in np.linalg.norm fro + +;; indide is chemical name +indide + +;; used as a date +nd + +;; chemical +te diff --git a/.github/ISSUE_TEMPLATE/release_checklist.md b/.github/ISSUE_TEMPLATE/release_checklist.md index fa94779..b278b12 100644 --- a/.github/ISSUE_TEMPLATE/release_checklist.md +++ b/.github/ISSUE_TEMPLATE/release_checklist.md @@ -10,37 +10,51 @@ assignees: "" - [ ] All PRs/issues attached to the release are merged. - [ ] All the badges on the README are passing. -- [ ] License information is verified as correct. If you are unsure, please comment below. -- [ ] Locally rendered documentation contains all appropriate pages, including API references (check no modules are - missing), tutorials, and other human-written text is up-to-date with any changes in the code. -- [ ] Installation instructions in the README, documentation, and the website (e.g., diffpy.org) are updated. -- [ ] Successfully run any tutorial examples or do functional testing with the latest Python version. +- [ ] License information is verified as correct. If you are unsure, please + comment below. +- [ ] Locally rendered documentation contains all appropriate pages, including + API references (check no modules are missing), tutorials, and other + human-written text is up-to-date with any changes in the code. +- [ ] Installation instructions in the README, documentation, and the website + (e.g., diffpy.org) are updated. +- [ ] Successfully run any tutorial examples or do functional testing with the + latest Python version. - [ ] Grammar and writing quality are checked (no typos). -- [ ] Install `pip install build twine`, run `python -m build` and `twine check dist/*` to ensure that the package can be built and is correctly formatted for PyPI release. +- [ ] Install `pip install build twine`, run `python -m build` and + `twine check dist/*` to ensure that the package can be built and is + correctly formatted for PyPI release. -Please mention @sbillinge here when you are ready for PyPI/GitHub release. Include any additional comments necessary, such as version information and details about the pre-release here: +Please mention @sbillinge here when you are ready for PyPI/GitHub release. +Include any additional comments necessary, such as version information and +details about the pre-release here: ### PyPI/GitHub full-release preparation checklist: -- [ ] Create a new conda environment and install the rc from PyPI (`pip install ==??`) +- [ ] Create a new conda environment and install the rc from PyPI + (`pip install ==??`) - [ ] License information on PyPI is correct. - [ ] Docs are deployed successfully to `https://www.diffpy.org/`. - [ ] Successfully run all tests, tutorial examples or do functional testing. -Please let @sbillinge know that all checks are done and the package is ready for full release. +Please let @sbillinge know that all checks are done and the package is ready for +full release. ### conda-forge release preparation checklist: - [ ] Ensure that the full release has appeared on PyPI successfully. -- [ ] New package dependencies listed in `conda.txt` and `test.txt` are added to `meta.yaml` in the feedstock. -- [ ] Close any open issues on the feedstock. Reach out to @bobleesj if you have questions. +- [ ] New package dependencies listed in `conda.txt` and `test.txt` are added to + `meta.yaml` in the feedstock. +- [ ] Close any open issues on the feedstock. Reach out to @bobleesj if you have + questions. - [ ] Tag @sbillinge and @bobleesj for conda-forge release. ### Post-release checklist -- [ ] Run tutorial examples and conduct functional testing using the installation guide in the README. Attach screenshots/results as comments. -- [ ] Documentation (README, tutorials, API references, and websites) is deployed without broken links or missing figures. +- [ ] Run tutorial examples and conduct functional testing using the + installation guide in the README. Attach screenshots/results as comments. +- [ ] Documentation (README, tutorials, API references, and websites) is + deployed without broken links or missing figures. diff --git a/.github/workflows/build-wheel-release-upload.yml b/.github/workflows/build-wheel-release-upload.yml index 88c2d0f..3733d4a 100644 --- a/.github/workflows/build-wheel-release-upload.yml +++ b/.github/workflows/build-wheel-release-upload.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: push: tags: - - '*' # Trigger on all tags initially, but tag and release privilege are verified in _build-wheel-release-upload.yml + - "*" # Trigger on all tags initially, but tag and release privilege are verified in _build-wheel-release-upload.yml jobs: release: diff --git a/.github/workflows/check-news-item.yml b/.github/workflows/check-news-item.yml index 20b162c..fcb0b78 100644 --- a/.github/workflows/check-news-item.yml +++ b/.github/workflows/check-news-item.yml @@ -3,7 +3,7 @@ name: Check for News on: pull_request_target: branches: - - main + - main jobs: check-news-item: diff --git a/.isort.cfg b/.isort.cfg index 7ce0fb1..15177f1 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,5 +1,5 @@ [settings] # Keep import statement below line_length character limit -line_length = 115 +line_length = 90 multi_line_output = 3 include_trailing_comma = True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 06a72bc..2caee67 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,8 +1,17 @@ default_language_version: - python: python3 + python: python3 +ci: + autofix_commit_msg: | + [pre-commit.ci] auto fixes from pre-commit hooks + autofix_prs: true + autoupdate_branch: "pre-commit-autoupdate" + autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate" + autoupdate_schedule: monthly + skip: [no-commit-to-branch] + submodules: false repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v4.6.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -13,8 +22,12 @@ repos: - id: check-merge-conflict - id: check-toml - id: check-added-large-files + - repo: https://github.com/psf/black + rev: 24.4.2 + hooks: + - id: black - repo: https://github.com/pycqa/flake8 - rev: 7.1.1 + rev: 7.0.0 hooks: - id: flake8 - repo: https://github.com/pycqa/isort @@ -23,11 +36,11 @@ repos: - id: isort args: ["--profile", "black"] - repo: https://github.com/kynan/nbstripout - rev: 0.8.1 + rev: 0.7.1 hooks: - id: nbstripout - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v4.4.0 hooks: - id: no-commit-to-branch name: Prevent Commit to Main Branch @@ -36,6 +49,20 @@ repos: - repo: https://github.com/codespell-project/codespell rev: v2.3.0 hooks: - - id: codespell - additional_dependencies: - - tomli + - id: codespell + additional_dependencies: + - tomli + # prettier - multi formatter for .json, .yml, and .md files + - repo: https://github.com/pre-commit/mirrors-prettier + rev: f12edd9c7be1c20cfa42420fd0e6df71e42b51ea # frozen: v4.0.0-alpha.8 + hooks: + - id: prettier + additional_dependencies: + - "prettier@^3.2.4" + # docformatter - PEP 257 compliant docstring formatter + - repo: https://github.com/s-weigand/docformatter + rev: 5757c5190d95e5449f102ace83df92e7d3b06c6c + hooks: + - id: docformatter + additional_dependencies: [tomli] + args: [--in-place, --config, ./pyproject.toml] diff --git a/.prettierrc b/.prettierrc index b09ab5e..cfae7e2 100644 --- a/.prettierrc +++ b/.prettierrc @@ -1,4 +1,4 @@ { - "proseWrap": "always", - "printWidth": 80 + "proseWrap": "always", + "printWidth": 80 } diff --git a/CODE_OF_CONDUCT.rst b/CODE_OF_CONDUCT.rst new file mode 100644 index 0000000..ff9c356 --- /dev/null +++ b/CODE_OF_CONDUCT.rst @@ -0,0 +1,133 @@ +===================================== + Contributor Covenant Code of Conduct +===================================== + +Our Pledge +---------- + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +Our Standards +------------- + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +Enforcement Responsibilities +---------------------------- + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +Scope +----- + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +Enforcement +----------- + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +sb2896@columbia.edu. All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +Enforcement Guidelines +---------------------- + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +1. Correction +**************** + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +2. Warning +************* + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +3. Temporary Ban +****************** + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +4. Permanent Ban +****************** + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +Attribution +----------- + +This Code of Conduct is adapted from the `Contributor Covenant `_. + +Community Impact Guidelines were inspired by `Mozilla's code of conduct enforcement ladder `_. + +For answers to common questions about this code of conduct, see the `FAQ `_. `Translations are available `_ diff --git a/README.md b/README.md deleted file mode 100644 index d71a861..0000000 --- a/README.md +++ /dev/null @@ -1,127 +0,0 @@ -# cifkit - -[![CI](https://github.com/bobleesj/cifkit/actions/workflows/tests-on-pr.yml/badge.svg?branch=main)](https://github.com/bobleesj/cifkit/actions/workflows/tests-on-pr.yml) -[![codecov](https://codecov.io/gh/bobleesj/cifkit/graph/badge.svg?token=AN2YAC337A)](https://codecov.io/gh/bobleesj/cifkit) -![Python - Version](https://img.shields.io/pypi/pyversions/cifkit) -[![PyPi version](https://img.shields.io/pypi/v/cifkit.svg)](https://pypi.python.org/pypi/cifkit) - - - -![Logo light mode](docs/assets/img/logo-black.png#gh-light-mode-only "cifkit logo light") -![Logo dark mode](docs/assets/img/logo-color.png#gh-dark-mode-only "cifkit logo dark") - -`cifkit` is designed to provide a set of fully-tested utility functions and -variables for handling large datasets, on the order of tens of thousands, of -`.cif` files. - -## Features: - -`cifkit` provides higher-level functions in just a few lines of code. - -- **Coordination geometry** - `cifkit` provides functions for visualing - coordination geometry from each site and extracts physics-based features like - volume and packing efficiency in each polyhedron. -- **Atomic mixing** - `cifkit` extracts atomic mixing information at the bond - pair level—tasks that would otherwise require extensive manual effort using - GUI-based tools like VESTA, Diamond, and CrystalMaker. -- **Filter** - `cifkit` offers features for preprocessing. It systematically - addresses common issues in CIF files from databases, such as incorrect loop - values and missing fractional coordinates, by standardizing and filtering out - ill-formatted files. It also preprocesses atomic site labels, transforming - labels such as 'M1' to 'Fe1' in files with atomic mixing. -- **Sort** - `cifkit` allows you to copy, move, and sort `.cif` files based on - attributes such as coordination numbers, space groups, unit cells, shortest - distances, elements, and more. - -### Example usage 1 - coordination geometry - -The example below uses `cifkit` to visualize the polyhedron generated from each -atomic site based on the coordination number geometry. - -```python -from cifkit import Cif - -cif = Cif("your_cif_file_path") -site_labels = cif.site_labels - -# Loop through each site label -for label in site_labels: - # Dipslay each polyhedron, .png saved for each label - cif.plot_polyhedron(label, is_displayed=True) -``` - -![Polyhedron generation](docs/assets/img/ErCoIn-polyhedron.png) - -### Example Usage 2 - sort - -The following example generates a distribution of structure. - -```python -from cifkit import CifEnsemble - -ensemble = CifEnsemble("your_folder_path_containing_cif_files") -ensemble.generate_structure_histogram() -``` - -![structure distribution](docs/assets/img/histogram-structure.png) - -Basde on your visual histogram above, you can copy and move .cif files based on -specific attributes: - -```python -# Return file paths matching structures either Co1.75Ge or CoIn2 -ensemble.filter_by_structures(["Co1.75Ge", "CoIn2"]) - -# Return file path matching CeAl2Ga2 -ensemble.filter_by_structures("CeAl2Ga2") -``` - -To learn more, please read the official documentation here: -https://bobleesj.github.io/cifkit. - -## Quotes - -Here is a quote illustrating how `cifkit` addresses one of the challenges -mentioned above. - -> "I am building an X-Ray diffraction analysis (XRD) pattern visualization -> script for my lab using `pymatgen`. I feel like `cifkit` integrated really -> well into my existing stable of libraries, while surpassing some alternatives -> in preprocessing and parsing. For example, it was often unclear at what stage -> an error occurred—whether during pre-processing with `CifParser`, or XRD plot -> generation with `diffraction.core` in `pymatgen`. The pre-processing logic in -> `cifkit` was communicated clearly, both in documentation and in actual -> outputs, allowing me to catch errors in my data before it was used in my -> visualizations. I now use `cifkit` by default for processing CIFs before they -> pass through the rest of my pipeline." - Alex Vtorov ` - -## Documentation - -- [Official documentation](https://bobleesj.github.io/cifkit) -- [Contribution guide](https://github.com/bobleesj/cifkit/blob/main/CONTRIBUTING.md) -- [MIT license](https://github.com/bobleesj/cifkit/blob/main/LICENSE) - -## How to contribute - -Here is how you can contribute to the `cifkit` project if you found it helpful: - -- Star the repository on GitHub and recommend it to your colleagues who might - find `cifkit` helpful as well. - [![Star GitHub repository](https://img.shields.io/github/stars/bobleesj/cifkit.svg?style=social)](https://github.com/bobleesj/cifkit/stargazers) -- Create a new issue for any bugs or feature requests - [here](https://github.com/bobleesj/cifkit/issues) -- Fork the repository and consider contributing changes via a pull request. - [![Fork GitHub repository](https://img.shields.io/github/forks/bobleesj/cifkit?style=social)](https://github.com/bobleesj/cifkit/fork). - Check out - [CONTRIBUTING.md](https://github.com/bobleesj/cifkit/blob/main/CONTRIBUTING.md) - for instructions. -- If you have any suggestions or need further clarification on how to use - `cifkit`, please reach out to Bob Lee - ([@bobleesj](https://github.com/bobleesj)). - -## To render documentation - -```bash -pip install -r requirements/docs.txt -mkdocs serve -``` diff --git a/mkdocs.yml b/mkdocs.yml index 6eeff87..48b5e15 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -60,6 +60,6 @@ plugins: paths: [src] options: show_source: false - show_root_heading: true # Show module names as headings + show_root_heading: true # Show module names as headings docstring_style: numpy - mkdocs-jupyter diff --git a/pyproject.toml b/pyproject.toml index e7cef03..c407852 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,13 +5,22 @@ build-backend = "setuptools.build_meta" [project] name = "cifkit" dynamic=['version', 'dependencies'] +authors = [ + { name="Sangjoon Lee", email="bobleesj@gmail.com" }, +] +maintainers = [ + { name="Sangjoon Lee", email="bobleesj@gmail.com" }, +] +description = "A Python package for coordination geometry and atomic site analysis." +keywords = ["cif", "solid-state", "high-throughput", "crystallography"] +readme = "README.rst" requires-python = ">=3.11, <3.14" classifiers = [ 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', + 'License :: OSI Approved :: BSD License', 'Operating System :: MacOS :: MacOS X', 'Operating System :: Microsoft :: Windows', 'Operating System :: POSIX', @@ -22,17 +31,10 @@ classifiers = [ 'Topic :: Scientific/Engineering :: Physics', 'Topic :: Scientific/Engineering :: Chemistry', ] -description = "A Python package for coordination geometry and atomic site analysis" -authors = [{name = "Sangjoon Lee", email = "bobleesj@gmail.com"}] -readme = "README.md" -license = {file = "LICENSE"} -keywords = ["cif", "solid-state", "high-throughput", "crystallography"] [project.urls] -Documentation = "https://bobleesj.github.io/cifkit" -Repository = "https://github.com/bobleesj/cifkit" -Issues = "https://github.com/bobleesj/cifkit/issues" -Changelog = "https://github.com/bobleesj/cifkit/blob/main/CHANGELOG" +Homepage = "https://github.com/bobleesj/cifkit/" +Issues = "https://github.com/bobleesj/cifkit/issues/" [tool.setuptools-git-versioning] enabled = true @@ -41,32 +43,38 @@ dev_template = "{tag}" dirty_template = "{tag}" [tool.setuptools.packages.find] -where = ["src"] +where = ["src"] # list of folders that contain the packages (["."] by default) +include = ["*"] # package names should match these glob patterns (["*"] by default) +exclude = [] # exclude packages matching these glob patterns (empty by default) +namespaces = false # to disable scanning PEP 420 namespaces (true by default) [tool.setuptools.dynamic] dependencies = {file = ["requirements/pip.txt"]} [tool.codespell] -ignore-words-list = "indide,nd,te,caf,mater" -skip = "*.cif" +exclude-file = ".codespell/ignore_lines.txt" +ignore-words = ".codespell/ignore_words.txt" +skip = "*.cif,*.dat" [tool.black] -line-length = 79 +line-length = 90 include = '\.pyi?$' exclude = ''' /( - \.git\ + \.git + | \.hg | \.mypy_cache | \.tox | \.venv - | _build/ + | \.rst + | \.txt + | _build | buck-out | build | dist + + # The following are specific to Black, you probably don't want those. | blib2to3 - | tests/data/ - | hooks/post_gen_project.py$ - | .*\.py$ - | .*\.toml$ + | tests/data )/ ''' diff --git a/requirements/build.txt b/requirements/build.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/cifkit/__init__.py b/src/cifkit/__init__.py index c15ca83..921f508 100644 --- a/src/cifkit/__init__.py +++ b/src/cifkit/__init__.py @@ -1,5 +1,30 @@ +#!/usr/bin/env python +############################################################################## +# +# (c) 2025 Sangjoon Lee. +# All rights reserved. +# +# File coded by: Sangjoon Lee, Anton Oliynyk, and community contributors. +# +# See GitHub contributions for a more detailed list of contributors. +# https://github.com/bobleesj/cifkit/graphs/contributors +# +# See LICENSE.rst for license information. +# +############################################################################## +"""Python package for doing science.""" + +# package version +from cifkit.version import __version__ + from .data.example import Example from .models.cif import Cif from .models.cif_ensemble import CifEnsemble -__all__ = ["Cif", "CifEnsemble", "Example"] +# silence the pyflakes syntax checker +assert __version__ or True +assert Example or True +assert Cif or True +assert CifEnsemble or True + +# End of file diff --git a/src/cifkit/coordination/composition.py b/src/cifkit/coordination/composition.py index 74b8b4e..25a127e 100644 --- a/src/cifkit/coordination/composition.py +++ b/src/cifkit/coordination/composition.py @@ -39,9 +39,7 @@ def get_bond_counts( # Sort by Mendeeleve if sorted_by_mendeleev: - sorted_bond_pair = bond_pair.order_tuple_pair_by_mendeleev( - pair - ) + sorted_bond_pair = bond_pair.order_tuple_pair_by_mendeleev(pair) else: sorted_bond_pair = tuple(sorted((ref_element, conn_element))) diff --git a/src/cifkit/coordination/connection.py b/src/cifkit/coordination/connection.py index e82fd02..1698f11 100644 --- a/src/cifkit/coordination/connection.py +++ b/src/cifkit/coordination/connection.py @@ -1,6 +1,4 @@ -def get_CN_connections_by_best_methods( - best_methods, connections: dict -) -> dict: +def get_CN_connections_by_best_methods(best_methods, connections: dict) -> dict: """Retrieve connections limited by the number of vertices (CN) for each label.""" CN_connections = {} diff --git a/src/cifkit/coordination/filter.py b/src/cifkit/coordination/filter.py index e3b2789..afbd15a 100644 --- a/src/cifkit/coordination/filter.py +++ b/src/cifkit/coordination/filter.py @@ -41,9 +41,7 @@ def find_best_polyhedron(max_gaps_per_label, connections): continue # Move to the next method # Returns non if there is any error - polyhedron_metrics = compute_polyhedron_metrics( - polyhedron_points, hull - ) + polyhedron_metrics = compute_polyhedron_metrics(polyhedron_points, hull) # If there is no metrics, then skip the method if polyhedron_metrics is None: @@ -71,9 +69,7 @@ def get_CN_connections_by_min_dist_method(max_gaps_per_label, connections): CN_by_shortest_dist = {} for label, methods_info in max_gaps_per_label.items(): # Access the 'dist_by_shortest_dist' method and get the 'CN' value - CN_by_shortest_dist[label] = methods_info["dist_by_shortest_dist"][ - "CN" - ] + CN_by_shortest_dist[label] = methods_info["dist_by_shortest_dist"]["CN"] CN_connections: dict = {} # Iterate through each label and number of connections diff --git a/src/cifkit/coordination/geometry.py b/src/cifkit/coordination/geometry.py index b788069..1752c7b 100644 --- a/src/cifkit/coordination/geometry.py +++ b/src/cifkit/coordination/geometry.py @@ -9,7 +9,6 @@ def get_polyhedron_coordinates_labels( """Return a list of Cartesian coordinates and labels. The central atom is the last index. - """ conn_data = connections[label] polyhedron_points = [conn[3] for conn in conn_data] @@ -51,9 +50,7 @@ def compute_polyhedron_metrics(polyhedron_points, hull): number_of_vertices = len(polyhedron_points) face_centers = np.mean(polyhedron_points[hull.simplices], axis=1) - distances_to_faces = np.linalg.norm( - face_centers - central_atom_coord, axis=1 - ) + distances_to_faces = np.linalg.norm(face_centers - central_atom_coord, axis=1) shortest_distance_to_face = np.min(distances_to_faces) edge_centers = np.array( @@ -62,16 +59,12 @@ def compute_polyhedron_metrics(polyhedron_points, hull): for edge in edges ] ) - distances_to_edges = np.linalg.norm( - edge_centers - central_atom_coord, axis=1 - ) + distances_to_edges = np.linalg.norm(edge_centers - central_atom_coord, axis=1) shortest_distance_to_edge = np.min(distances_to_edges) radius_of_inscribed_sphere = shortest_distance_to_face - volume_of_inscribed_sphere = ( - 4 / 3 * np.pi * radius_of_inscribed_sphere**3 - ) + volume_of_inscribed_sphere = 4 / 3 * np.pi * radius_of_inscribed_sphere**3 packing_efficiency = volume_of_inscribed_sphere / hull.volume @@ -94,9 +87,7 @@ def compute_polyhedron_metrics(polyhedron_points, hull): return None -def compute_center_of_mass_and_distance( - polyhedron_points, hull, central_atom_coord -): +def compute_center_of_mass_and_distance(polyhedron_points, hull, central_atom_coord): """Calculate the center of mass of a polyhedron and the distance from the center of mass to a given point.""" center_of_mass = np.mean(polyhedron_points[hull.vertices, :], axis=0) diff --git a/src/cifkit/coordination/method.py b/src/cifkit/coordination/method.py index 8631c61..b7f2f26 100644 --- a/src/cifkit/coordination/method.py +++ b/src/cifkit/coordination/method.py @@ -6,7 +6,7 @@ def compute_CN_max_gap_per_site( all_labels_connections, is_radius_data_available: bool, site_mixing_type: str, -) -> dict[str: dict[str: dict[str: float]]]: +) -> dict[str : dict[str : dict[str:float]]]: use_all_methods = False if is_radius_data_available and site_mixing_type == "full_occupancy": @@ -72,9 +72,7 @@ def compute_CN_max_gap_per_site( ) # Compute normalized distances - norm_dist_by_min_dist = compute_normalized_value( - pair_dist, shortest_dist - ) + norm_dist_by_min_dist = compute_normalized_value(pair_dist, shortest_dist) # Store distances if use_all_methods: distances = { @@ -97,13 +95,8 @@ def compute_CN_max_gap_per_site( abs(norm_distance - previous_values[method]), 3, ) - if ( - current_gap - > max_gaps_per_label[ref_label][method]["max_gap"] - ): - max_gaps_per_label[ref_label][method][ - "max_gap" - ] = current_gap + if current_gap > max_gaps_per_label[ref_label][method]["max_gap"]: + max_gaps_per_label[ref_label][method]["max_gap"] = current_gap max_gaps_per_label[ref_label][method]["CN"] = i previous_values[method] = norm_distance @@ -133,8 +126,6 @@ def get_rad_sum_value( if method_name not in rad_sum_data: raise KeyError(f"Method {method_name} not found in rad_sum") if key not in rad_sum_data[method_name]: - raise KeyError( - f"Key {key} not found in method {method_name} of rad_sum" - ) + raise KeyError(f"Key {key} not found in method {method_name} of rad_sum") return rad_sum_data[method_name][key] diff --git a/src/cifkit/coordination/site.py b/src/cifkit/coordination/site.py index eb4e983..da2c698 100644 --- a/src/cifkit/coordination/site.py +++ b/src/cifkit/coordination/site.py @@ -14,8 +14,8 @@ def get_min_distance_pair( def get_min_distance_pair_per_site_label( connections: dict, ) -> list[tuple[tuple[str, str], float]]: - """Return a list of tuples containing element pairs and the minimum distance - from each site label in the loop.""" + """Return a list of tuples containing element pairs and the minimum + distance from each site label in the loop.""" element_pairs = [] # Iterate over each pair and their list of distances for ref_label, pair_data in connections.items(): diff --git a/src/cifkit/data/mendeleeve_handler.py b/src/cifkit/data/mendeleeve_handler.py index c80a67d..f8cbc35 100644 --- a/src/cifkit/data/mendeleeve_handler.py +++ b/src/cifkit/data/mendeleeve_handler.py @@ -8,13 +8,10 @@ def get_mendeleev_nums_from_pair_tuple( """Parse Mendeleev number for each label in the tuple. If no number is found, default to 0 for that element. - """ # Parse the first and second elements first_element = string_parser.get_atom_type_from_label(label_pair_tuple[0]) - second_element = string_parser.get_atom_type_from_label( - label_pair_tuple[1] - ) + second_element = string_parser.get_atom_type_from_label(label_pair_tuple[1]) mendeleev_numbers = get_mendeleev_numbers() # Get Mendeleev number for the first element, default to 0 if not found first_mendeleev_num = mendeleev_numbers.get(first_element, 0) diff --git a/src/cifkit/data/radius.py b/src/cifkit/data/radius.py index 9275a2c..e251150 100644 --- a/src/cifkit/data/radius.py +++ b/src/cifkit/data/radius.py @@ -74,8 +74,7 @@ def get_radius_data() -> dict: "Zr": [1.553, 1.597], } data: dict = { - k: {"CIF_radius": v[0], "Pauling_radius_CN12": v[1]} - for k, v in rad_data.items() + k: {"CIF_radius": v[0], "Pauling_radius_CN12": v[1]} for k, v in rad_data.items() } return data diff --git a/src/cifkit/data/radius_handler.py b/src/cifkit/data/radius_handler.py index 80579c8..bc1a31c 100644 --- a/src/cifkit/data/radius_handler.py +++ b/src/cifkit/data/radius_handler.py @@ -6,15 +6,12 @@ def get_is_radius_data_available(elements: list[str]) -> bool: - """Check if both CIF and Pauling radius data are available for each element in - the list.""" + """Check if both CIF and Pauling radius data are available for each element + in the list.""" data = get_radius_data() for element in elements: element_data = data.get(element, {}) - if not ( - "CIF_radius" in element_data - and "Pauling_radius_CN12" in element_data - ): + if not ("CIF_radius" in element_data and "Pauling_radius_CN12" in element_data): return False return True @@ -48,12 +45,8 @@ def get_radius_values_per_element( for element in elements: combined_radii[element] = { "CIF_radius": CIF_pauling_rad[element]["CIF_radius"], - "CIF_radius_refined": float( - np.round(CIF_refined_rad.get(element), 3) - ), - "Pauling_radius_CN12": CIF_pauling_rad[element][ - "Pauling_radius_CN12" - ], + "CIF_radius_refined": float(np.round(CIF_refined_rad.get(element), 3)), + "Pauling_radius_CN12": CIF_pauling_rad[element]["Pauling_radius_CN12"], } return round_dict_values(combined_radii) @@ -80,14 +73,11 @@ def compute_radius_sum( elem_j = elements[j] # Element pair label, e.g., A-B or A-A - pair_label = ( - f"{elem_i}-{elem_j}" if i != j else f"{elem_i}-{elem_i}" - ) + pair_label = f"{elem_i}-{elem_j}" if i != j else f"{elem_i}-{elem_i}" # Sum radii for each radius type pair_distances["CIF_radius_sum"][pair_label] = round( - radius_values[elem_i]["CIF_radius"] - + radius_values[elem_j]["CIF_radius"], + radius_values[elem_i]["CIF_radius"] + radius_values[elem_j]["CIF_radius"], 3, ) pair_distances["CIF_radius_refined_sum"][pair_label] = round( diff --git a/src/cifkit/data/radius_optimization.py b/src/cifkit/data/radius_optimization.py index a2c7ff6..cda2c84 100644 --- a/src/cifkit/data/radius_optimization.py +++ b/src/cifkit/data/radius_optimization.py @@ -14,22 +14,20 @@ def generate_adjacent_pairs( # Binary -> [('In', 'Rh')] # Ternary -> [('In', 'Rh'), ('Rh', 'U')] - label_to_pair = [ - (elements[i], elements[i + 1]) for i in range(len(elements) - 1) - ] + label_to_pair = [(elements[i], elements[i + 1]) for i in range(len(elements) - 1)] return label_to_pair def objective(params, original_radii: list[float]) -> list[float]: - """Calculate the objective function value,which is the sum of squared percent - differences between original and refined radii.""" + """Calculate the objective function value,which is the sum of squared + percent differences between original and refined radii.""" return np.sum(((original_radii - params) / original_radii) ** 2) def constraint(params, index_pair: tuple[int, int], shortest_distance: dict): - """Enforce that the sum of the radii of the pair does not exceed the shortest - allowed distance between them.""" + """Enforce that the sum of the radii of the pair does not exceed the + shortest allowed distance between them.""" i, j = index_pair return shortest_distance - (params[i] + params[j]) diff --git a/src/cifkit/figures/histogram.py b/src/cifkit/figures/histogram.py index 25d0177..6520581 100644 --- a/src/cifkit/figures/histogram.py +++ b/src/cifkit/figures/histogram.py @@ -131,24 +131,20 @@ def plot_histogram(attribute, stats, dir_path, display, output_dir): "key_data_type": "string", }, } - generate_histogram( - histogram["data"], histogram["settings"], display, output_dir - ) + generate_histogram(histogram["data"], histogram["settings"], display, output_dir) # Make a default folder if the output folder is not provided def generate_histogram(data, settings, display, output_dir: str) -> None: - """Generate a histogram from a dictionary of data and save it to a specified - directory.""" + """Generate a histogram from a dictionary of data and save it to a + specified directory.""" plt.figure(figsize=(10, 6)) # Create a new figure for each histogram if settings.get("key_data_type") == "float": # If keys are supposed to be numeric but are strings, convert them - data = { - float(key): data[key] for key in sorted(data.keys(), key=float) - } + data = {float(key): data[key] for key in sorted(data.keys(), key=float)} if settings.get("key_data_type") == "int": data = {int(key): data[key] for key in sorted(data.keys(), key=int)} diff --git a/src/cifkit/figures/polyhedron.py b/src/cifkit/figures/polyhedron.py index 33a02ac..a14eded 100644 --- a/src/cifkit/figures/polyhedron.py +++ b/src/cifkit/figures/polyhedron.py @@ -126,9 +126,7 @@ def plot( plotter.add_mesh(poly_data, color="aqua", opacity=0.5, show_edges=True) plotter.show() - """ - Output - """ + """Output.""" # Determine the output directory based on provided path if not output_dir: @@ -146,8 +144,6 @@ def plot( + ".png" ) save_path = os.path.join(output_dir, plot_filename) - """ - Save - """ + """Save.""" # Save the screenshot plotter.screenshot(save_path) diff --git a/src/cifkit/models/cif.py b/src/cifkit/models/cif.py index 65e0477..4538e84 100644 --- a/src/cifkit/models/cif.py +++ b/src/cifkit/models/cif.py @@ -83,9 +83,7 @@ def wrapper(self, *args, **kwargs): class Cif: - def __init__( - self, file_path: str, is_formatted=False, logging_enabled=False - ) -> None: + def __init__(self, file_path: str, is_formatted=False, logging_enabled=False) -> None: """Initializes an object from a .cif file. Parameters @@ -238,15 +236,13 @@ def _parse_cif_data(self): self.bond_pairs_sorted_by_mendeleev = get_pairs_sorted_by_mendeleev( self.unique_elements ) - self.site_label_pairs_sorted_by_mendeleev = ( - get_pairs_sorted_by_mendeleev(self.site_labels) + self.site_label_pairs_sorted_by_mendeleev = get_pairs_sorted_by_mendeleev( + self.site_labels ) self.site_mixing_type = get_site_mixing_type( self.site_labels, self.atom_site_info ) - self.is_radius_data_available = get_is_radius_data_available( - self.unique_elements - ) + self.is_radius_data_available = get_is_radius_data_available(self.unique_elements) self.mixing_info_per_label_pair = get_mixing_type_per_pair_dict( self.site_labels, self.site_label_pairs, self.atom_site_info ) @@ -299,8 +295,8 @@ def compute_connections(self, cutoff_radius=10.0) -> None: self._shortest_distance = get_shortest_distance(self.connections) # Shortest distance per bond pair - self._shortest_bond_pair_distance = ( - get_shortest_distance_per_bond_pair(self.connections_flattened) + self._shortest_bond_pair_distance = get_shortest_distance_per_bond_pair( + self.connections_flattened ) # Shortest distance per site @@ -327,22 +323,17 @@ def compute_connections(self, cutoff_radius=10.0) -> None: # Find the best methods self._CN_best_methods = find_best_polyhedron( - self.CN_max_gap_per_site, self.connections ) # Get CN connections by the best methods - self._CN_connections_by_best_methods = ( - get_CN_connections_by_best_methods( - self.CN_best_methods, self.connections - ) + self._CN_connections_by_best_methods = get_CN_connections_by_best_methods( + self.CN_best_methods, self.connections ) # Get CN connections by the best methods - self._CN_connections_by_min_dist_method = ( - get_CN_connections_by_min_dist_method( - self.CN_max_gap_per_site, self.connections - ) + self._CN_connections_by_min_dist_method = get_CN_connections_by_min_dist_method( + self.CN_max_gap_per_site, self.connections ) # Bond counts self._CN_bond_count_by_min_dist_method = get_bond_counts( @@ -353,19 +344,15 @@ def compute_connections(self, cutoff_radius=10.0) -> None: ) # Bond counts sorted by mendeleev - self._CN_bond_count_by_min_dist_method_sorted_by_mendeleev = ( - get_bond_counts( - self.unique_elements, - self.CN_connections_by_min_dist_method, - sorted_by_mendeleev=True, - ) + self._CN_bond_count_by_min_dist_method_sorted_by_mendeleev = get_bond_counts( + self.unique_elements, + self.CN_connections_by_min_dist_method, + sorted_by_mendeleev=True, ) - self._CN_bond_count_by_best_methods_sorted_by_mendeleev = ( - get_bond_counts( - self.unique_elements, - self.CN_connections_by_best_methods, - sorted_by_mendeleev=True, - ) + self._CN_bond_count_by_best_methods_sorted_by_mendeleev = get_bond_counts( + self.unique_elements, + self.CN_connections_by_best_methods, + sorted_by_mendeleev=True, ) # Bond fractions @@ -378,15 +365,11 @@ def compute_connections(self, cutoff_radius=10.0) -> None: # Bond fractions sorted by Mendeleev self._CN_bond_fractions_by_min_dist_method_sorted_by_mendeleev = ( - get_bond_fractions( - self.CN_bond_count_by_min_dist_method_sorted_by_mendeleev - ) + get_bond_fractions(self.CN_bond_count_by_min_dist_method_sorted_by_mendeleev) ) - self._CN_bond_fractions_by_best_methods_sorted_by_mendeleev = ( - get_bond_fractions( - self.CN_bond_count_by_best_methods_sorted_by_mendeleev - ) + self._CN_bond_fractions_by_best_methods_sorted_by_mendeleev = get_bond_fractions( + self.CN_bond_count_by_best_methods_sorted_by_mendeleev ) # Unique CN @@ -402,24 +385,14 @@ def compute_connections(self, cutoff_radius=10.0) -> None: self.CN_connections_by_min_dist_method ) - self._CN_avg_by_best_methods = compute_avg_CN( - self.CN_connections_by_best_methods - ) + self._CN_avg_by_best_methods = compute_avg_CN(self.CN_connections_by_best_methods) # Max CN - self._CN_max_by_min_dist_method = max( - self.CN_unique_values_by_min_dist_method - ) - self._CN_max_by_best_methods = max( - self.CN_unique_values_by_best_methods - ) + self._CN_max_by_min_dist_method = max(self.CN_unique_values_by_min_dist_method) + self._CN_max_by_best_methods = max(self.CN_unique_values_by_best_methods) # Min CN - self._CN_min_by_min_dist_method = min( - self.CN_unique_values_by_min_dist_method - ) - self._CN_min_by_best_methods = min( - self.CN_unique_values_by_best_methods - ) + self._CN_min_by_min_dist_method = min(self.CN_unique_values_by_min_dist_method) + self._CN_min_by_best_methods = min(self.CN_unique_values_by_best_methods) @property @ensure_connections diff --git a/src/cifkit/models/cif_ensemble.py b/src/cifkit/models/cif_ensemble.py index b82bb91..bee7b77 100644 --- a/src/cifkit/models/cif_ensemble.py +++ b/src/cifkit/models/cif_ensemble.py @@ -57,9 +57,7 @@ def __init__( # Process each file, handling exceptions that may occur self.logging_enabled = logging_enabled - file_paths = get_file_paths( - cif_dir_path, add_nested_files=add_nested_files - ) + file_paths = get_file_paths(cif_dir_path, add_nested_files=add_nested_files) self.dir_path = cif_dir_path if preprocess: @@ -70,9 +68,7 @@ def __init__( move_files_based_on_errors(cif_dir_path, file_paths) # Initialize new files after ill-formatted files are moved - self.file_paths = get_file_paths( - cif_dir_path, add_nested_files=add_nested_files - ) + self.file_paths = get_file_paths(cif_dir_path, add_nested_files=add_nested_files) self.file_count = len(self.file_paths) print(f"Initializing {self.file_count} Cif objects...") @@ -83,8 +79,7 @@ def __init__( ] else: self.cifs: list[Cif] = [ - Cif(file_path, is_formatted=True) - for file_path in self.file_paths + Cif(file_path, is_formatted=True) for file_path in self.file_paths ] print("Finished initialization!") @@ -336,9 +331,7 @@ def _filter_by_single_value(self, property_name: str, values: list): return cif_file_paths # With sets - def _filter_contains_any( - self, property_name: str, values: list - ) -> set[str]: + def _filter_contains_any(self, property_name: str, values: list) -> set[str]: cif_file_paths = set() for cif in self.cifs: property_value: str = getattr(cif, property_name) @@ -346,9 +339,7 @@ def _filter_contains_any( cif_file_paths.add(cif.file_path) return cif_file_paths - def _filter_exact_match( - self, property_name: str, values: list - ) -> set[str]: + def _filter_exact_match(self, property_name: str, values: list) -> set[str]: cif_file_paths = set() for cif in self.cifs: property_value: str = getattr(cif, property_name) @@ -388,33 +379,17 @@ def filter_by_elements_exact_matching(self, values: list[str]) -> set[str]: Filter by CN """ - def filter_by_CN_min_dist_method_containing( - self, values: list[int] - ) -> set[str]: - return self._filter_contains_any( - "CN_unique_values_by_min_dist_method", values - ) + def filter_by_CN_min_dist_method_containing(self, values: list[int]) -> set[str]: + return self._filter_contains_any("CN_unique_values_by_min_dist_method", values) - def filter_by_CN_min_dist_method_exact_matching( - self, values: list[int] - ) -> set[str]: - return self._filter_exact_match( - "CN_unique_values_by_min_dist_method", values - ) + def filter_by_CN_min_dist_method_exact_matching(self, values: list[int]) -> set[str]: + return self._filter_exact_match("CN_unique_values_by_min_dist_method", values) - def filter_by_CN_best_methods_containing( - self, values: list[int] - ) -> set[str]: - return self._filter_contains_any( - "CN_unique_values_by_best_methods", values - ) + def filter_by_CN_best_methods_containing(self, values: list[int]) -> set[str]: + return self._filter_contains_any("CN_unique_values_by_best_methods", values) - def filter_by_CN_best_methods_exact_matching( - self, values: list[int] - ) -> set[str]: - return self._filter_exact_match( - "CN_unique_values_by_best_methods", values - ) + def filter_by_CN_best_methods_exact_matching(self, values: list[int]) -> set[str]: + return self._filter_exact_match("CN_unique_values_by_best_methods", values) def _filter_by_range( self, property: str, min: float | int, max: float | int @@ -432,22 +407,16 @@ def _filter_by_range( def filter_by_min_distance( self, min_distance: float, max_distance: float ) -> set[str]: - return self._filter_by_range( - "shortest_distance", min_distance, max_distance - ) + return self._filter_by_range("shortest_distance", min_distance, max_distance) - def filter_by_supercell_count( - self, min_count: int, max_count: int - ) -> set[str]: + def filter_by_supercell_count(self, min_count: int, max_count: int) -> set[str]: return self._filter_by_range( "supercell_atom_count", min_count, max_count, ) - def move_cif_files( - self, file_paths: set[str], to_directory_path: str - ) -> None: + def move_cif_files(self, file_paths: set[str], to_directory_path: str) -> None: """Move a set of CIF files to a destination directory. Parameters @@ -468,9 +437,7 @@ def move_cif_files( """ move_files(to_directory_path, list(file_paths)) - def copy_cif_files( - self, file_paths: set[str], to_directory_path: str - ) -> None: + def copy_cif_files(self, file_paths: set[str], to_directory_path: str) -> None: """Copy a set of CIF files to a destination directory. Parameters @@ -565,9 +532,7 @@ def generate_tag_histogram(self, display=False, output_dir=None): output_dir, ) - def generate_space_group_number_histogram( - self, display=False, output_dir=None - ): + def generate_space_group_number_histogram(self, display=False, output_dir=None): """Generate a histogram of the 'space_group_number' property from CIF files. @@ -592,9 +557,7 @@ def generate_space_group_number_histogram( output_dir, ) - def generate_space_group_name_histogram( - self, display=False, output_dir=None - ): + def generate_space_group_name_histogram(self, display=False, output_dir=None): """Generate a histogram of the 'space_group_name' property from CIF files. @@ -619,9 +582,7 @@ def generate_space_group_name_histogram( output_dir, ) - def generate_supercell_size_histogram( - self, display=False, output_dir=None - ): + def generate_supercell_size_histogram(self, display=False, output_dir=None): """Generate a histogram of the 'supercell_count' property from CIF files. @@ -671,9 +632,7 @@ def generate_elements_histogram(self, display=False, output_dir=None): output_dir, ) - def generate_CN_by_min_dist_method_histogram( - self, display=False, output_dir=None - ): + def generate_CN_by_min_dist_method_histogram(self, display=False, output_dir=None): """Generate a histogram of the 'CN_by_min' property from CIF files. This method creates a histogram based on the 'CN_by_min' statistics of @@ -697,9 +656,7 @@ def generate_CN_by_min_dist_method_histogram( output_dir, ) - def generate_CN_by_best_methods_histogram( - self, display=False, output_dir=None - ): + def generate_CN_by_best_methods_histogram(self, display=False, output_dir=None): """Generate a histogram of the 'CN_by_best_methods' property from CIF files. @@ -724,9 +681,7 @@ def generate_CN_by_best_methods_histogram( output_dir, ) - def generate_composition_type_histogram( - self, display=False, output_dir=None - ): + def generate_composition_type_histogram(self, display=False, output_dir=None): """Generate a histogram of the 'composition_type' property from CIF files. @@ -751,9 +706,7 @@ def generate_composition_type_histogram( output_dir, ) - def generate_site_mixing_type_histogram( - self, display=False, output_dir=None - ): + def generate_site_mixing_type_histogram(self, display=False, output_dir=None): """Generate a histogram of the 'site_mixing_type' property from CIF files. diff --git a/src/cifkit/occupancy/mixing.py b/src/cifkit/occupancy/mixing.py index 86a77a9..6ee88ca 100644 --- a/src/cifkit/occupancy/mixing.py +++ b/src/cifkit/occupancy/mixing.py @@ -30,9 +30,7 @@ def get_site_mixing_type(site_labels: list[str], atom_site_info: dict) -> str: """Get file-level atomic site mixing info.""" is_full_occupancy = True - coord_occupancy_sum = compute_coord_occupancy_sum( - site_labels, atom_site_info - ) + coord_occupancy_sum = compute_coord_occupancy_sum(site_labels, atom_site_info) # Now check summed occupancies for _, occupancy_sum in coord_occupancy_sum.items(): @@ -62,9 +60,7 @@ def get_mixing_type_per_pair_dict( site_labels: list[str], label_pairs: list[str], atom_site_info: dict ): """Return a dictionary, alphabetically sorted pair.""" - coord_occupancy_sum = compute_coord_occupancy_sum( - site_labels, atom_site_info - ) + coord_occupancy_sum = compute_coord_occupancy_sum(site_labels, atom_site_info) # Store categorizy per pair atom_site_pair_dict = {} @@ -117,17 +113,16 @@ def get_mixing_type_per_pair_dict( # Check 1. One of the labels is deficient # Check 2. Both labels are not atomic mixed if (is_first_label_site_deficient or is_second_label_deficient) and ( - not is_first_label_atomic_mixed - and not is_second_label_atomic_mixed + not is_first_label_atomic_mixed and not is_second_label_atomic_mixed ): atom_site_pair_dict[pair] = "deficiency_without_atomic_mixing" # Check 1. Both labels are not deficient # Check 2. At least one label is atomic mixed # Assign "2" for "full_occupancy_atomic_mixing" - if ( - not is_first_label_site_deficient and not is_second_label_deficient - ) and (is_first_label_atomic_mixed or is_second_label_atomic_mixed): + if (not is_first_label_site_deficient and not is_second_label_deficient) and ( + is_first_label_atomic_mixed or is_second_label_atomic_mixed + ): atom_site_pair_dict[pair] = "full_occupancy_atomic_mixing" # Assign "1" for "deficiency_with_atomic_mixing" diff --git a/src/cifkit/preprocessors/environment.py b/src/cifkit/preprocessors/environment.py index 12c0379..efc71a7 100644 --- a/src/cifkit/preprocessors/environment.py +++ b/src/cifkit/preprocessors/environment.py @@ -96,11 +96,9 @@ def get_nearest_dists_per_site( return dist_dict, dist_set -def get_most_connected_point_per_site( - label: str, dist_dict: dict, dist_set: set -): - """Identify the reference point with the highest number of connections within - the 50 shortest distances from a set of distances.""" +def get_most_connected_point_per_site(label: str, dist_dict: dict, dist_set: set): + """Identify the reference point with the highest number of connections + within the 50 shortest distances from a set of distances.""" sorted_unique_dists = sorted(dist_set) # Get the 30 shortest distances shortest_dists = sorted_unique_dists[:50] @@ -144,9 +142,7 @@ def remove_duplicate_connections(connections): unique_list = [] for item in value: # The tuple representing the endpoint coordinates is item[3] - coords = tuple( - item[3] - ) # Need to convert list to tuple to use it in a set + coords = tuple(item[3]) # Need to convert list to tuple to use it in a set if coords not in seen: seen.add(coords) unique_list.append(item) diff --git a/src/cifkit/preprocessors/environment_util.py b/src/cifkit/preprocessors/environment_util.py index e4042b6..40b786f 100644 --- a/src/cifkit/preprocessors/environment_util.py +++ b/src/cifkit/preprocessors/environment_util.py @@ -6,8 +6,8 @@ def flat_site_connections( site_connections: dict, ) -> list[tuple[tuple[str, str], float]]: - """Transform site connections into a sorted list of tuples, each containing a - pair of alphabetically distance.""" + """Transform site connections into a sorted list of tuples, each containing + a pair of alphabetically distance.""" flattened_points = [] for site_label, connections in site_connections.items(): for connection in connections: diff --git a/src/cifkit/preprocessors/error.py b/src/cifkit/preprocessors/error.py index dc57d16..b912739 100644 --- a/src/cifkit/preprocessors/error.py +++ b/src/cifkit/preprocessors/error.py @@ -55,13 +55,9 @@ def move_files_based_on_errors(dir_path, file_paths): else: error_type = "error_others" - make_directory_and_move( - file_path, error_directories[error_type], filename - ) + make_directory_and_move(file_path, error_directories[error_type], filename) num_files_moved[error_type] += 1 - print( - f"File {filename} moved to '{error_type}' due to: {error_message}" - ) + print(f"File {filename} moved to '{error_type}' due to: {error_message}") # Display the number of files moved to each folder print("\nSUMMARY") diff --git a/src/cifkit/preprocessors/format.py b/src/cifkit/preprocessors/format.py index bfda52d..0e6ae94 100644 --- a/src/cifkit/preprocessors/format.py +++ b/src/cifkit/preprocessors/format.py @@ -4,10 +4,10 @@ def preprocess_label_element_loop_values(file_path: str) -> None: """Modify the atomic label site text in a .cif file. - .cif files may have the atomic labels in symbolic forms such as "M1" and some - also have two elements provided such as "In1,Co3B". Each case is handled with - specific examples demonstrated in the source and test code. - + .cif files may have the atomic labels in symbolic forms such as "M1" + and some also have two elements provided such as "In1,Co3B". Each + case is handled with specific examples demonstrated in the source + and test code. """ is_cif_file_updated = False cif_block = cif_parser.get_cif_block(file_path) @@ -22,14 +22,11 @@ def preprocess_label_element_loop_values(file_path: str) -> None: for line in content_lines: line = line.strip() site_label, atom_type_symbol = line.split()[:2] - atom_type_from_label = string_parser.get_atom_type_from_label( - site_label - ) + atom_type_from_label = string_parser.get_atom_type_from_label(site_label) unique_elements = cif_parser.get_unique_elements_from_loop(loop_values) - """ - Type 8. - Ex) 1817279.cif + """Type 8. Ex) 1817279.cif. + In1,Co3B Co 4 c 0.75 0.25 0.59339 0.07(3) -> Co13B Co 4 c 0.75 0.25 0.59339 0.07(3) @@ -69,9 +66,7 @@ def preprocess_label_element_loop_values(file_path: str) -> None: and site_label[2].isalpha() ): # Uppercase the last character - modified_label = ( - site_label[0] + site_label[1] + site_label[2].upper() - ) + modified_label = site_label[0] + site_label[1] + site_label[2].upper() # Modify the label line = line.replace(site_label, modified_label) # Modify the line @@ -83,7 +78,6 @@ def preprocess_label_element_loop_values(file_path: str) -> None: M1 Th 4 a 0 0 0 0.99 -> ThM1 Th 4 a 0 0 0 0.99 - """ if ( len(site_label) == 2 @@ -112,9 +106,7 @@ def preprocess_label_element_loop_values(file_path: str) -> None: and site_label[-2].isdigit() and site_label[-3].isalpha() ): - new_label = site_label.replace( - atom_type_from_label, atom_type_symbol - ) + new_label = site_label.replace(atom_type_from_label, atom_type_symbol) line = line.replace(site_label, new_label) # Modify the line is_cif_file_updated = True @@ -127,9 +119,7 @@ def preprocess_label_element_loop_values(file_path: str) -> None: """ if len(site_label) == 1 and site_label[-1].isalpha(): - new_label = site_label.replace( - atom_type_from_label, atom_type_symbol - ) + new_label = site_label.replace(atom_type_from_label, atom_type_symbol) line = line.replace(site_label, new_label) is_cif_file_updated = True @@ -148,9 +138,7 @@ def preprocess_label_element_loop_values(file_path: str) -> None: if site_label.lower() not in atom_type_symbol.lower(): # print(atom_type_label.lower(), atom_type_symbol.lower()) # Do not use get_atom_type since replace the entire label - new_label = site_label.replace( - site_label, atom_type_symbol - ) + new_label = site_label.replace(site_label, atom_type_symbol) line = line.replace(site_label, new_label) is_cif_file_updated = True @@ -169,13 +157,8 @@ def preprocess_label_element_loop_values(file_path: str) -> None: and site_label[2].isdigit() ): first_two_label_characters = site_label[0] + site_label[1] - if ( - first_two_label_characters.lower() - == atom_type_symbol.lower() - ): - modified_label = ( - site_label[0] + site_label[1].lower() + site_label[2] - ) + if first_two_label_characters.lower() == atom_type_symbol.lower(): + modified_label = site_label[0] + site_label[1].lower() + site_label[2] line = line.replace(site_label, modified_label) is_cif_file_updated = True @@ -195,13 +178,8 @@ def preprocess_label_element_loop_values(file_path: str) -> None: and site_label[3].isalpha() ): first_two_label_characters = site_label[0] + site_label[1] - if ( - first_two_label_characters.lower() - != atom_type_symbol.lower() - ): - modified_label = ( - atom_type_symbol + site_label[2] + site_label[3] - ) + if first_two_label_characters.lower() != atom_type_symbol.lower(): + modified_label = atom_type_symbol + site_label[2] + site_label[3] line = line.replace(site_label, modified_label) is_cif_file_updated = True @@ -219,10 +197,7 @@ def preprocess_label_element_loop_values(file_path: str) -> None: and site_label[2].isdigit() ): first_two_label_characters = site_label[0] + site_label[1] - if ( - first_two_label_characters.lower() - != atom_type_symbol.lower() - ): + if first_two_label_characters.lower() != atom_type_symbol.lower(): modified_label = atom_type_symbol + site_label[2] line = line.replace(site_label, modified_label) is_cif_file_updated = True @@ -236,9 +211,7 @@ def preprocess_label_element_loop_values(file_path: str) -> None: ( start_index, end_index, - ) = cif_parser.get_start_end_line_indexes( - file_path, "_atom_site_occupancy" - ) + ) = cif_parser.get_start_end_line_indexes(file_path, "_atom_site_occupancy") # Replace the specific section in original_lines with modified_lines original_lines[start_index:end_index] = modified_lines diff --git a/src/cifkit/preprocessors/supercell.py b/src/cifkit/preprocessors/supercell.py index f1d8884..8686d98 100644 --- a/src/cifkit/preprocessors/supercell.py +++ b/src/cifkit/preprocessors/supercell.py @@ -74,8 +74,8 @@ def get_unitcell_coords_after_sym_operations_per_label( atom_site_fracs: tuple[float, float, float], atom_site_label: str, ) -> list[tuple[float, float, float, str]]: - """Generate a list of coordinates for each atom site after applying symmetry - operations.""" + """Generate a list of coordinates for each atom site after applying + symmetry operations.""" symmetry_operations = find_symmetry_operations(block) if symmetry_operations is not None: diff --git a/src/cifkit/utils/bond_pair.py b/src/cifkit/utils/bond_pair.py index c8c7631..278fed4 100644 --- a/src/cifkit/utils/bond_pair.py +++ b/src/cifkit/utils/bond_pair.py @@ -17,7 +17,8 @@ def get_bond_pairs(labels: list[str]) -> set[tuple[str, str]]: def get_pairs_sorted_by_mendeleev( labels: list[str], ) -> set[tuple[str, str]]: - """Generate all unique pairs, each tuple sorted by the Mendeleeve number.""" + """Generate all unique pairs, each tuple sorted by the Mendeleeve + number.""" pairs = get_bond_pairs(labels) sorted_pairs = {order_tuple_pair_by_mendeleev(pair) for pair in pairs} diff --git a/src/cifkit/utils/cif_editor.py b/src/cifkit/utils/cif_editor.py index 33dfba8..832833d 100644 --- a/src/cifkit/utils/cif_editor.py +++ b/src/cifkit/utils/cif_editor.py @@ -13,14 +13,11 @@ def remove_author_loop(file_path: str) -> None: caused by a wrongly formatted author block. This is a common issue in PCD files. - """ ( start_index, end_index, - ) = cif_parser.get_start_end_line_indexes( - file_path, "_publ_author_address" - ) + ) = cif_parser.get_start_end_line_indexes(file_path, "_publ_author_address") with open(file_path, "r") as f: original_lines = f.readlines() @@ -36,7 +33,6 @@ def add_hashtag_in_first_line(file_path: str): """ICSD files start with (C) which causes parsing issues with gemmi. If that is the case, add a # before (C) to fix the parsing issue. - """ # First, check if the file exists and is a CIF file if not os.path.exists(file_path) or not file_path.endswith(".cif"): @@ -61,7 +57,6 @@ def edit_cif_file_based_on_db(file_path: str): PCD: Remove author loop and preprocess label element loop values ICSD: Add a hashtag in the first line - """ db_source = get_cif_db_source(file_path) if db_source == "ICSD": diff --git a/src/cifkit/utils/cif_parser.py b/src/cifkit/utils/cif_parser.py index 06d47ec..354ff79 100644 --- a/src/cifkit/utils/cif_parser.py +++ b/src/cifkit/utils/cif_parser.py @@ -35,8 +35,7 @@ def get_unitcell_lengths( ] lengths = [ - get_string_to_formatted_float(block.find_value(key)) - for key in keys_lengths + get_string_to_formatted_float(block.find_value(key)) for key in keys_lengths ] return lengths @@ -53,10 +52,7 @@ def get_unitcell_angles_rad( "_cell_angle_gamma", ] - angles = [ - get_string_to_formatted_float(block.find_value(key)) - for key in keys_angles - ] + angles = [get_string_to_formatted_float(block.find_value(key)) for key in keys_angles] return unit.get_radians_from_degrees(angles) @@ -81,7 +77,6 @@ def get_loop_values(block: Block) -> list[Column]: """Retrieve a list of predefined loop tags for atomic site description. If a tag is not found, None is inserted in its place in the list. - """ loop_tags = get_loop_tags() @@ -100,7 +95,8 @@ def get_unique_label_count(loop_values: list) -> int: def get_unique_elements_from_loop(loop_values: list) -> set[str]: - """Return a list of alphabetically sorted unique elements from loop values.""" + """Return a list of alphabetically sorted unique elements from loop + values.""" num_atom_labels = get_unique_label_count(loop_values) unique_elements = set() for i in range(num_atom_labels): @@ -157,9 +153,7 @@ def get_loop_value_dict( return loop_value_dict -def get_start_end_line_indexes( - file_path: str, start_keyword: str -) -> tuple[int, int]: +def get_start_end_line_indexes(file_path: str, start_keyword: str) -> tuple[int, int]: """Find the starting and ending indexes of the lines in atom_site_loop.""" with open(file_path, "r") as f: @@ -186,13 +180,10 @@ def get_start_end_line_indexes( def get_line_content_from_tag(file_path: str, start_keyword: str) -> list[str]: """Returns a list containing file content with starting keyword. - This function only appropriate for PCD format for removing the author - section. - + This function only appropriate for PCD format for removing the + author section. """ - start_index, end_index = get_start_end_line_indexes( - file_path, start_keyword - ) + start_index, end_index = get_start_end_line_indexes(file_path, start_keyword) if start_index is None or end_index is None: return None @@ -235,9 +226,8 @@ def get_unique_formulas_structures_weights_s_groups( ) -> tuple[set[str], set[str], set[float], set[int], set[str]]: """Find all unique structures, formulas, weights, space groups. - This function requires no initialization and should be more efficient in - analyzing and filtering a dataset. - + This function requires no initialization and should be more + efficient in analyzing and filtering a dataset. """ formulas = set() structures = set() @@ -293,8 +283,8 @@ def get_tag_from_third_line(file_path: str, db_source="PCD") -> str: def parse_atom_site_occupancy_info(file_path: str) -> dict: - """Parse atom site loop information including element, occupancy, fractional - coordinates, multiplicity, and wyckoff symbol.""" + """Parse atom site loop information including element, occupancy, + fractional coordinates, multiplicity, and wyckoff symbol.""" block = get_cif_block(file_path) loop_vals = get_loop_values(block) label_count = len(loop_vals[0]) @@ -304,32 +294,20 @@ def parse_atom_site_occupancy_info(file_path: str) -> dict: for i in range(label_count): # Safely extract data, assuming the possibility of None values in columns atom_site_label = loop_vals[0][i] if loop_vals[0] else None - element = ( - strip_numbers_and_symbols(loop_vals[1][i]) - if loop_vals[1] - else None - ) + element = strip_numbers_and_symbols(loop_vals[1][i]) if loop_vals[1] else None symmetry_multiplicity = int(loop_vals[2][i]) if loop_vals[2] else None wyckoff_symbol = loop_vals[3][i] if loop_vals[3] else None x_frac_coord = ( - get_string_to_formatted_float(loop_vals[4][i]) - if loop_vals[4] - else None + get_string_to_formatted_float(loop_vals[4][i]) if loop_vals[4] else None ) y_frac_coord = ( - get_string_to_formatted_float(loop_vals[5][i]) - if loop_vals[5] - else None + get_string_to_formatted_float(loop_vals[5][i]) if loop_vals[5] else None ) z_frac_coord = ( - get_string_to_formatted_float(loop_vals[6][i]) - if loop_vals[6] - else None + get_string_to_formatted_float(loop_vals[6][i]) if loop_vals[6] else None ) site_occupancy = ( - get_string_to_formatted_float(loop_vals[7][i]) - if loop_vals[7] - else None + get_string_to_formatted_float(loop_vals[7][i]) if loop_vals[7] else None ) parsed_data[atom_site_label] = { diff --git a/src/cifkit/utils/error_messages.py b/src/cifkit/utils/error_messages.py index 8112c47..d5222f1 100644 --- a/src/cifkit/utils/error_messages.py +++ b/src/cifkit/utils/error_messages.py @@ -5,26 +5,16 @@ class GeneralError(Enum): INVALID_TYPE = "The formula/label must be a string." EMPTY_STRING_INPUT = "The formula/label cannot be empty." - NON_ALPHABETIC_START = ( - "The first character must be alphabetic after trimming." - ) - NON_MATCHING_ELEMENT = ( - "No matching element was parsed from the site label." - ) + NON_ALPHABETIC_START = "The first character must be alphabetic after trimming." + NON_MATCHING_ELEMENT = "No matching element was parsed from the site label." INVALID_CIF_BLOCK = "The CIF block should not be None." class CifParserError(Enum): - INVALID_LOOP_TAGS = ( - "The returned loop tags do not match the expected tags." - ) + INVALID_LOOP_TAGS = "The returned loop tags do not match the expected tags." DUPLICATE_LABELS = "The file contains duplicate atom site labels." - INVALID_PARSED_ELEMENT = ( - "The element was not correctly parsed from the site label." - ) - SYMMETRY_OPERATION_ERROR = ( - "An error occurred while processing symmetry operation" - ) + INVALID_PARSED_ELEMENT = "The element was not correctly parsed from the site label." + SYMMETRY_OPERATION_ERROR = "An error occurred while processing symmetry operation" MISSING_COORDINATES = "Missing atomic coordinates" MISSING_LOOP_VALUES = "Wrong number of values in loop" @@ -35,6 +25,4 @@ class FileError(Enum): class OccupancyError(Enum): - INVALID_MIXING_TYPE = ( - "The file cannot be categorized into one of the 4 mixing types." - ) + INVALID_MIXING_TYPE = "The file cannot be categorized into one of the 4 mixing types." diff --git a/src/cifkit/utils/folder.py b/src/cifkit/utils/folder.py index ea9babe..c902f9a 100644 --- a/src/cifkit/utils/folder.py +++ b/src/cifkit/utils/folder.py @@ -16,20 +16,14 @@ def get_file_count(dir_path: str, ext=".cif") -> int: return len(glob.glob(os.path.join(dir_path, f"*{ext}"))) -def get_file_paths( - dir_path: str, ext=".cif", add_nested_files=False -) -> list[str]: +def get_file_paths(dir_path: str, ext=".cif", add_nested_files=False) -> list[str]: """Return a list of file paths with a given extension from a directory.""" if add_nested_files: # Traverse through directory and subdirectories files_list = [] for root, dirs, files in os.walk(dir_path): files_list.extend( - [ - os.path.join(root, file) - for file in files - if file.endswith(ext) - ] + [os.path.join(root, file) for file in files if file.endswith(ext)] ) return files_list else: @@ -65,36 +59,32 @@ def check_file_not_empty(file_path: str) -> bool: """Check if the specified file is not empty.""" if os.path.getsize(file_path) == 0: # Using enum value and formatting it with file_path - raise ValueError( - FileError.FILE_IS_EMPTY.value.format(file_path=file_path) - ) + raise ValueError(FileError.FILE_IS_EMPTY.value.format(file_path=file_path)) return True def move_files(to_directory: str, file_path_list: list[str]) -> None: - """Move files to another folder, creating the folder if it doesn't exist.""" + """Move files to another folder, creating the folder if it doesn't + exist.""" # Ensure the destination directory exists os.makedirs(to_directory, exist_ok=True) # Move each file in the list for file_path in file_path_list: - dest_file_path = os.path.join( - to_directory, os.path.basename(file_path) - ) + dest_file_path = os.path.join(to_directory, os.path.basename(file_path)) # Move file to new directory shutil.move(file_path, dest_file_path) def copy_files(to_directory: str, file_path_list: list[str]) -> None: - """Copy files to another folder, creating the folder if it doesn't exist.""" + """Copy files to another folder, creating the folder if it doesn't + exist.""" # Ensure the destination directory exists os.makedirs(to_directory, exist_ok=True) # Copy each file in the list for file_path in file_path_list: # Construct full destination path - dest_file_path = os.path.join( - to_directory, os.path.basename(file_path) - ) + dest_file_path = os.path.join(to_directory, os.path.basename(file_path)) # Copy file to new directory shutil.copy(file_path, dest_file_path) diff --git a/src/cifkit/utils/formula.py b/src/cifkit/utils/formula.py index 27296e7..b304399 100644 --- a/src/cifkit/utils/formula.py +++ b/src/cifkit/utils/formula.py @@ -17,7 +17,8 @@ def get_validated_formula_label(formula: str) -> str: def get_parsed_formula(formula: str) -> list[tuple[str, str]]: - """Return a list of tuples, each tuple containing an element and its index.""" + """Return a list of tuples, each tuple containing an element and its + index.""" trimmed_formula = get_validated_formula_label(formula) pattern = r"([A-Z][a-z]*)(\d*\.?\d*)" elements = re.findall(pattern, trimmed_formula) @@ -43,9 +44,7 @@ def get_normalized_formula(formula: str, demical_places=3) -> str: else: normalized_index = float(element_index) / index_sum - normalized_formula_parts.append( - f"{element}{normalized_index:.{demical_places}f}" - ) + normalized_formula_parts.append(f"{element}{normalized_index:.{demical_places}f}") # Join all parts into one string for the normalized formula normalized_formula = "".join(normalized_formula_parts) diff --git a/src/cifkit/utils/sort.py b/src/cifkit/utils/sort.py index 793f74a..778bf35 100644 --- a/src/cifkit/utils/sort.py +++ b/src/cifkit/utils/sort.py @@ -4,8 +4,7 @@ def sort_element_pair_tuples( """Alphabetically sort the pair tuple of elements.""" # First, sort the elements within each tuple alp_sorted_tuples = [ - ((min(a, b), max(a, b)), distance) - for (a, b), distance in element_pair_tuples + ((min(a, b), max(a, b)), distance) for (a, b), distance in element_pair_tuples ] # Priotize alphabetic sort, and sort by distance diff --git a/src/cifkit/utils/string_parser.py b/src/cifkit/utils/string_parser.py index d3fee64..d02506d 100644 --- a/src/cifkit/utils/string_parser.py +++ b/src/cifkit/utils/string_parser.py @@ -27,11 +27,7 @@ def get_string_to_formatted_float(str_value: str) -> float: """Remove parentheses from a value string and convert to float.""" str_value = str_value.strip() - return ( - float(str_value.split("(")[0]) - if "(" in str_value - else float(str_value) - ) + return float(str_value.split("(")[0]) if "(" in str_value else float(str_value) def trim_string(formula: str) -> str: @@ -48,6 +44,5 @@ def strip_numbers_and_symbols(value: str) -> str: """Removes all digits and '+' and '-' characters from the input string. Some ICSD, COD have charges in atomic site element e.g. "Fe0+". - """ return re.sub(r"[\d\+\-]", "", value) diff --git a/src/cifkit/utils/unit.py b/src/cifkit/utils/unit.py index 1bc6bff..e778f6e 100644 --- a/src/cifkit/utils/unit.py +++ b/src/cifkit/utils/unit.py @@ -17,8 +17,8 @@ def fractional_to_cartesian( cell_lengths: list[float], cell_angles_rad: list[float], ) -> list[float]: - """Convert fractional coordinates to Cartesian coordinates using cell lengths - and angles.""" + """Convert fractional coordinates to Cartesian coordinates using cell + lengths and angles.""" alpha, beta, gamma = cell_angles_rad # Calculate the components of the transformation matrix @@ -64,7 +64,6 @@ def round_dict_values(dict, precision=3): if dict is None: return None rounded_dict = { - k: round(v, precision) if isinstance(v, float) else v - for k, v in dict.items() + k: round(v, precision) if isinstance(v, float) else v for k, v in dict.items() } return rounded_dict diff --git a/src/cifkit/version.py b/src/cifkit/version.py index 5c0a266..1bb2142 100644 --- a/src/cifkit/version.py +++ b/src/cifkit/version.py @@ -1,3 +1,25 @@ +#!/usr/bin/env python +############################################################################## +# +# (c) 2025 Sangjoon Lee. +# All rights reserved. +# +# File coded by: Sangjoon Lee, Anton Oliynyk, and community contributors. +# +# See GitHub contributions for a more detailed list of contributors. +# https://github.com/bobleesj/cifkit/graphs/contributors +# +# See LICENSE.rst for license information. +# +############################################################################## +"""Definition of __version__.""" + +# We do not use the other three variables, but can be added back if needed. +# __all__ = ["__date__", "__git_commit__", "__timestamp__", "__version__"] + +# obtain version information from importlib.metadata import version __version__ = version("cifkit") + +# End of file diff --git a/test-cifkit.ipynb b/test-cifkit.ipynb deleted file mode 100644 index 9d87bb1..0000000 --- a/test-cifkit.ipynb +++ /dev/null @@ -1,55 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from cifkit import CifEnsemble, Cif\n", - "\n", - "ensemble_CCDC = CifEnsemble(\"tests/data/cif/sources/CCDC\")\n", - "ensemble_COD = CifEnsemble(\"tests/data/cif/sources/COD\")\n", - "ensemble_ICSD = CifEnsemble(\"tests/data/cif/sources/ICSD\")\n", - "ensemble_MP = CifEnsemble(\"tests/data/cif/sources/MP\")\n", - "ensemble_PCD = CifEnsemble(\"tests/data/cif/sources/PCD\")\n", - "ensemble_MS = CifEnsemble(\"tests/data/cif/sources/MS\")\n", - "\n", - "print(\"ensemble_CCDC:\", ensemble_CCDC.CN_unique_values_by_best_methods)\n", - "print(\"ensemble_COD:\", ensemble_COD.CN_unique_values_by_best_methods)\n", - "print(\"ensemble_ICSD:\", ensemble_ICSD.CN_unique_values_by_best_methods)\n", - "print(\"ensemble_MP:\", ensemble_MP.CN_unique_values_by_best_methods)\n", - "print(\"ensemble_PCD:\", ensemble_PCD.CN_unique_values_by_best_methods)\n", - "print(\"ensemble_MS:\", ensemble_MS.CN_unique_values_by_best_methods)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "cifkit_env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tests/conftest.py b/tests/conftest.py index 0e9267e..7d9cc66 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -258,9 +258,7 @@ def max_gaps_per_label_URhIn(): @pytest.fixture(scope="module") -def CN_connections_by_min_dist_URhIn( - max_gaps_per_label_URhIn, connections_URhIn -): +def CN_connections_by_min_dist_URhIn(max_gaps_per_label_URhIn, connections_URhIn): return filter.get_CN_connections_by_min_dist_method( max_gaps_per_label_URhIn, connections_URhIn ) diff --git a/tests/core/coordination/test_composition.py b/tests/core/coordination/test_composition.py index 517a34b..93be8f0 100644 --- a/tests/core/coordination/test_composition.py +++ b/tests/core/coordination/test_composition.py @@ -19,9 +19,7 @@ def test_get_bond_counts(CN_connections_by_min_dist_URhIn): } elements = {"In", "Rh", "U"} - assert ( - get_bond_counts(elements, CN_connections_by_min_dist_URhIn) == expected - ) + assert get_bond_counts(elements, CN_connections_by_min_dist_URhIn) == expected @pytest.mark.slow @@ -68,10 +66,7 @@ def test_get_bond_fraction(bond_counts_CN): @pytest.mark.slow def test_get_coordination_numbers(CN_connections_by_min_dist_URhIn): expected = {"In1": 14, "Rh1": 9, "Rh2": 9, "U1": 11} - assert ( - count_connections_per_site(CN_connections_by_min_dist_URhIn) - == expected - ) + assert count_connections_per_site(CN_connections_by_min_dist_URhIn) == expected def test_get_average_coordination_number( diff --git a/tests/core/coordination/test_filter.py b/tests/core/coordination/test_filter.py index 21196d7..4a7ac0b 100644 --- a/tests/core/coordination/test_filter.py +++ b/tests/core/coordination/test_filter.py @@ -36,9 +36,7 @@ def test_find_best_polyhedron(max_gaps_per_label_URhIn, connections_URhIn): # Iterate over the keys and check only the specified fields for key, expected in expected_values.items(): assert result[key]["method_used"] == expected["method_used"] - assert ( - result[key]["number_of_vertices"] == expected["number_of_vertices"] - ) + assert result[key]["number_of_vertices"] == expected["number_of_vertices"] assert ( result[key]["distance_from_avg_point_to_center"] == expected["distance_from_avg_point_to_center"] diff --git a/tests/core/coordination/test_geometry.py b/tests/core/coordination/test_geometry.py index 735bb05..470a9e4 100644 --- a/tests/core/coordination/test_geometry.py +++ b/tests/core/coordination/test_geometry.py @@ -14,9 +14,7 @@ def test_get_polyhedron_coordinates_labels( ( polyhedron_points, vertex_labels, - ) = get_polyhedron_coordinates_labels( - CN_connections_by_min_dist_URhIn, "U1" - ) + ) = get_polyhedron_coordinates_labels(CN_connections_by_min_dist_URhIn, "U1") assert vertex_labels == [ "Rh1", diff --git a/tests/core/coordination/test_site.py b/tests/core/coordination/test_site.py index 298a00d..2c20035 100644 --- a/tests/core/coordination/test_site.py +++ b/tests/core/coordination/test_site.py @@ -2,8 +2,6 @@ def test_get_min_distance_pair(connections_URhIn): - """ - Return the shortest distance - """ + """Return the shortest distance.""" min_dist_tuple = get_min_distance_pair(connections_URhIn) assert min_dist_tuple == (("In", "Rh"), 2.697) diff --git a/tests/core/data/test_radius_handler.py b/tests/core/data/test_radius_handler.py index a51354f..6d53727 100644 --- a/tests/core/data/test_radius_handler.py +++ b/tests/core/data/test_radius_handler.py @@ -33,9 +33,7 @@ def test_compute_radius_sum(radius_data_URhIn, radius_sum_data_URhIn): # Assert each element and sub-element individually for element, radii in expected.items(): for key, value in radii.items(): - assert combined_radii[element][key] == pytest.approx( - value, abs=0.001 - ) + assert combined_radii[element][key] == pytest.approx(value, abs=0.001) @pytest.mark.parametrize( diff --git a/tests/core/models/test_cif.py b/tests/core/models/test_cif.py index f75aa3f..72c1b00 100644 --- a/tests/core/models/test_cif.py +++ b/tests/core/models/test_cif.py @@ -162,10 +162,7 @@ def test_init_with_log(caplog): with caplog.at_level(logging.INFO): cif = Cif(file_path, logging_enabled=True) assert "Preprocessing tests/data/cif/URhIn.cif" in caplog.text - assert ( - "Parsing .cif and generating supercell for URhIn.cif" - in caplog.text - ) + assert "Parsing .cif and generating supercell for URhIn.cif" in caplog.text cif.compute_connections() assert "Computing pair distances for URhIn.cif" in caplog.text @@ -607,9 +604,7 @@ def test_init_atomic_mixing_deficiency_without_atomic_mixing(): def print_connected_points(all_labels_connections): - """ - Utility function for printing connections per site label - """ + """Utility function for printing connections per site label.""" for label, connections in all_labels_connections.items(): print(f"\nAtom site {label}:") for ( @@ -625,8 +620,8 @@ def print_connected_points(all_labels_connections): def test_init_atomic_mixing(): file_path = "tests/data/cif/atomic_mixing/261241.cif" cif = Cif(file_path) - polyhedron_points, vertex_labels = ( - cif.get_polyhedron_labels_by_CN_best_methods("CoM1") + polyhedron_points, vertex_labels = cif.get_polyhedron_labels_by_CN_best_methods( + "CoM1" ) assert len(polyhedron_points) == 13 assert len(vertex_labels) == 13 diff --git a/tests/core/models/test_cif_ensemble.py b/tests/core/models/test_cif_ensemble.py index 7d9d5e9..5725537 100644 --- a/tests/core/models/test_cif_ensemble.py +++ b/tests/core/models/test_cif_ensemble.py @@ -78,13 +78,8 @@ def test_distances_supercell_size(cif_ensemble_test: CifEnsemble): ] ) - assert ( - set(cif_ensemble_test.minimum_distances) == expected_minimum_distances - ) - assert ( - set(cif_ensemble_test.supercell_atom_counts) - == expected_supercell_atom_counts - ) + assert set(cif_ensemble_test.minimum_distances) == expected_minimum_distances + assert set(cif_ensemble_test.supercell_atom_counts) == expected_supercell_atom_counts """ @@ -133,9 +128,7 @@ def test_filter_by_value(cif_ensemble_test: CifEnsemble): } # Site mixing types - assert cif_ensemble_test.filter_by_site_mixing_types( - ["full_occupancy"] - ) == { + assert cif_ensemble_test.filter_by_site_mixing_types(["full_occupancy"]) == { "tests/data/cif/ensemble_test/300169.cif", "tests/data/cif/ensemble_test/300170.cif", "tests/data/cif/ensemble_test/260171.cif", @@ -181,9 +174,7 @@ def test_filter_by_elements(cif_ensemble_test): "tests/data/cif/ensemble_test/300169.cif", } - assert cif_ensemble_test.filter_by_elements_exact_matching( - ["Ge", "Ru", "La"] - ) == { + assert cif_ensemble_test.filter_by_elements_exact_matching(["Ge", "Ru", "La"]) == { "tests/data/cif/ensemble_test/300169.cif", } @@ -217,9 +208,7 @@ def test_filter_by_CN_dist_method_containing( def test_filter_by_CN_dist_method_exact_matching( cif_ensemble_test: CifEnsemble, ): - assert cif_ensemble_test.filter_by_CN_min_dist_method_exact_matching( - [14] - ) == { + assert cif_ensemble_test.filter_by_CN_min_dist_method_exact_matching([14]) == { "tests/data/cif/ensemble_test/260171.cif", "tests/data/cif/ensemble_test/250709.cif", "tests/data/cif/ensemble_test/250697.cif", @@ -241,10 +230,7 @@ def test_filter_by_CN_best_methods_containing( def test_filter_by_CN_best_methods_exact_matching( cif_ensemble_test: CifEnsemble, ): - assert ( - cif_ensemble_test.filter_by_CN_best_methods_exact_matching([10]) - == set() - ) + assert cif_ensemble_test.filter_by_CN_best_methods_exact_matching([10]) == set() """# @@ -262,7 +248,6 @@ def test_filter_by_CN_best_methods_exact_matching( {'Ge1': 5, 'Ru1': 12, 'Ce1': 16} """ - """ Test filter by value """ @@ -313,9 +298,7 @@ def test_move_files(tmp_path: Path, cif_ensemble_test: CifEnsemble): cif_ensemble_test.move_cif_files(file_paths, dest_dir_str) assert get_file_count(dest_dir_str) == initial_file_count - len(file_paths) - cif_ensemble_test.move_cif_files( - set(get_file_paths(dest_dir_str)), initial_dir_path - ) + cif_ensemble_test.move_cif_files(set(get_file_paths(dest_dir_str)), initial_dir_path) assert get_file_count(initial_dir_path) == initial_file_count diff --git a/tests/core/occupancy/test_mixing.py b/tests/core/occupancy/test_mixing.py index b7c337c..0f964c5 100644 --- a/tests/core/occupancy/test_mixing.py +++ b/tests/core/occupancy/test_mixing.py @@ -8,10 +8,6 @@ get_site_mixing_type, ) -""" -Test atomic mixing at the file level -""" - @pytest.mark.fast def test_full_occupancy(cif_URhIn): @@ -24,27 +20,21 @@ def test_full_occupancy(cif_URhIn): @pytest.mark.fast def test_deficiency_without_atomic_mixing(): cif = Cif("tests/data/cif/occupancy/527000.cif") - file_mixing_type = get_site_mixing_type( - cif.site_labels, cif.atom_site_info - ) + file_mixing_type = get_site_mixing_type(cif.site_labels, cif.atom_site_info) assert file_mixing_type == "deficiency_without_atomic_mixing" @pytest.mark.fast def test_full_occupancy_atomic_mixing(): cif = Cif("tests/data/cif/occupancy/529848.cif") - file_mixing_type = get_site_mixing_type( - cif.site_labels, cif.atom_site_info - ) + file_mixing_type = get_site_mixing_type(cif.site_labels, cif.atom_site_info) assert file_mixing_type == "full_occupancy_atomic_mixing" @pytest.mark.fast def test_deficiency_and_atomic_mixing(): cif = Cif("tests/data/cif/occupancy/554324.cif") - file_mixing_type = get_site_mixing_type( - cif.site_labels, cif.atom_site_info - ) + file_mixing_type = get_site_mixing_type(cif.site_labels, cif.atom_site_info) assert file_mixing_type == "deficiency_atomic_mixing" diff --git a/tests/core/preprocessors/test_environment.py b/tests/core/preprocessors/test_environment.py index b8eaa75..60dcf1f 100644 --- a/tests/core/preprocessors/test_environment.py +++ b/tests/core/preprocessors/test_environment.py @@ -5,10 +5,8 @@ def assert_minimum_distance(label, connections_dict, expected_min_distance): - """ - Asserts that the minimum distance for a given label in the - connections dictionary matches the expected minimum distance. - """ + """Asserts that the minimum distance for a given label in the connections + dictionary matches the expected minimum distance.""" connections = connections_dict.get(label, []) # Check if there are any connections, and calculate the minimum distance diff --git a/tests/core/preprocessors/test_format.py b/tests/core/preprocessors/test_format.py index edbd4a5..8dca3f4 100644 --- a/tests/core/preprocessors/test_format.py +++ b/tests/core/preprocessors/test_format.py @@ -156,15 +156,9 @@ def test_preprocess_cif_file_on_label_element_type_mixed(): assert len(lines) == 13 assert lines[0].strip() == "Er7 Er 16 h 0.06284 0.06662 0.39495 1" assert lines[9].strip() == "Er13A Er 4 c 0.75 0.25 0.14542 0.83(2)" - assert ( - lines[10].strip() == "In13B In 4 c 0.75 0.25 0.14542 0.17(2)" - ) - assert ( - lines[11].strip() == "In13A In 4 c 0.75 0.25 0.59339 0.93(3)" - ) - assert ( - lines[12].strip() == "Co13B Co 4 c 0.75 0.25 0.59339 0.07(3)" - ) + assert lines[10].strip() == "In13B In 4 c 0.75 0.25 0.14542 0.17(2)" + assert lines[11].strip() == "In13A In 4 c 0.75 0.25 0.59339 0.93(3)" + assert lines[12].strip() == "Co13B Co 4 c 0.75 0.25 0.59339 0.07(3)" # Type 9 if filename == "1200981.cif": diff --git a/tests/core/preprocessors/test_supercell.py b/tests/core/preprocessors/test_supercell.py index 5306619..8fc993b 100644 --- a/tests/core/preprocessors/test_supercell.py +++ b/tests/core/preprocessors/test_supercell.py @@ -166,9 +166,7 @@ def test_get_unit_cell_coordinates(cif_block_URhIn, unitcell_points_URhIn): coordinates = get_unitcell_coords_for_all_labels(cif_block_URhIn) # Flatten the coordinates for comparison with the expected - coordinates_set = set( - tuple(coord) for sublist in coordinates for coord in sublist - ) + coordinates_set = set(tuple(coord) for sublist in coordinates for coord in sublist) # Compare the two sets assert coordinates_set == unitcell_points_URhIn diff --git a/tests/core/util/test_cif_parser.py b/tests/core/util/test_cif_parser.py index b85ae20..90142a3 100644 --- a/tests/core/util/test_cif_parser.py +++ b/tests/core/util/test_cif_parser.py @@ -47,9 +47,7 @@ def test_get_loop_tags(): "_atom_site_occupancy", ] - assert ( - get_loop_tags() == expected_tags - ), CifParserError.INVALID_LOOP_TAGS.value + assert get_loop_tags() == expected_tags, CifParserError.INVALID_LOOP_TAGS.value def test_get_loop_values(cif_block_URhIn): @@ -109,16 +107,12 @@ def test_get_atom_labels(loop_values_URhIn): def test_get_label_occupancy_coordinates(loop_values_URhIn): - label, occupacny, coordinates = get_label_occupancy_coordinates( - loop_values_URhIn, 0 - ) + label, occupacny, coordinates = get_label_occupancy_coordinates(loop_values_URhIn, 0) assert label == "In1" assert occupacny == 1.0 assert coordinates == (0.2505, 0.0, 0.5) - label, occupacny, coordinates = get_label_occupancy_coordinates( - loop_values_URhIn, 1 - ) + label, occupacny, coordinates = get_label_occupancy_coordinates(loop_values_URhIn, 1) assert label == "U1" assert occupacny == 1.0 assert coordinates == (0.5925, 0.0, 0.0) @@ -149,9 +143,7 @@ def test_get_start_end_line_indexes(): def test_get_line_content_from_tag(file_path_URhIn): - content_lines = get_line_content_from_tag( - file_path_URhIn, "_atom_site_occupancy" - ) + content_lines = get_line_content_from_tag(file_path_URhIn, "_atom_site_occupancy") assert len(content_lines) == 4 assert content_lines[0].strip() == "In1 In 3 g 0.2505 0 0.5 1" @@ -274,10 +266,8 @@ def test_get_parsed_atom_site_occupancy_info_ICSD(file_path_ICSD_formatted): def test_get_parsed_atom_site_occupancy_info_with_braket(): - """ - Er7 Er 16 h 0.06284 0.06662 0.39495 1 - Co13B Co 4 c 0.75 0.25 0.59339 0.07(3) - `""" + """Er7 Er 16 h 0.06284 0.06662 0.39495 1 Co13B Co 4 c 0.75 0.25 0.59339 + 0.07(3) `""" file_path = "tests/data/cif/cif_parser/1814810.cif" atom_site_info = parse_atom_site_occupancy_info(file_path) @@ -307,19 +297,12 @@ def test_get_parsed_atom_site_occupancy_info_with_braket(): def test_check_unique_atom_site_labels(file_path_URhIn): check_unique_atom_site_labels(file_path_URhIn) - duplicate_labels_file_path = ( - "tests/data/cif/bad_cif_format/duplicate_labels.cif" - ) + duplicate_labels_file_path = "tests/data/cif/bad_cif_format/duplicate_labels.cif" with pytest.raises(ValueError) as e: check_unique_atom_site_labels(duplicate_labels_file_path) assert str(e.value) == "The file contains duplicate atom site labels." - unparsable_file_path = ( - "tests/data/cif/bad_cif_format/label_element_different.cif" - ) + unparsable_file_path = "tests/data/cif/bad_cif_format/label_element_different.cif" with pytest.raises(ValueError) as e: check_unique_atom_site_labels(unparsable_file_path) - assert ( - str(e.value) - == "The element was not correctly parsed from the site label." - ) + assert str(e.value) == "The element was not correctly parsed from the site label." diff --git a/tests/core/util/test_folder.py b/tests/core/util/test_folder.py index 6fdef6d..6a3a6ef 100644 --- a/tests/core/util/test_folder.py +++ b/tests/core/util/test_folder.py @@ -103,9 +103,7 @@ def test_check_file_not_empty(tmp_path): empty_file.touch() # Create an empty file with pytest.raises(ValueError) as e: check_file_not_empty(str(empty_file)) - assert str(e.value) == FileError.FILE_IS_EMPTY.value.format( - file_path=empty_file - ) + assert str(e.value) == FileError.FILE_IS_EMPTY.value.format(file_path=empty_file) def test_move_files(tmp_path, cif_folder_path_test, file_paths_test): @@ -140,9 +138,7 @@ def test_copy_files(tmp_path, cif_folder_path_test, file_paths_test): destination_files = get_file_paths(str(dest_dir)) # Extract basenames and sort to ensure order does not affect comparison - source_basenames = sorted( - [Path(file_path).name for file_path in source_files] - ) + source_basenames = sorted([Path(file_path).name for file_path in source_files]) destination_basenames = sorted( [Path(file_path).name for file_path in destination_files] ) diff --git a/tests/core/util/test_random.py b/tests/core/util/test_random.py index 2b2c5eb..8dea020 100644 --- a/tests/core/util/test_random.py +++ b/tests/core/util/test_random.py @@ -12,10 +12,5 @@ def test_generate_random_numbers(): assert all(low <= x <= high for x in int_results) # Test type and lengths - assert ( - all(isinstance(x, int) for x in int_results) and len(int_results) == 10 - ) - assert ( - all(isinstance(x, float) for x in float_results) - and len(float_results) == 10 - ) + assert all(isinstance(x, int) for x in int_results) and len(int_results) == 10 + assert all(isinstance(x, float) for x in float_results) and len(float_results) == 10 diff --git a/tests/core/util/test_unit.py b/tests/core/util/test_unit.py index a1975b6..87e0229 100644 --- a/tests/core/util/test_unit.py +++ b/tests/core/util/test_unit.py @@ -62,9 +62,7 @@ def test_round_dict_values(): "float2": 2.71828, "int": 1, "string": "test", - "nested_dict": { - "float": 9.8765 - }, # Note: Nested dictionaries are not processed. + "nested_dict": {"float": 9.8765}, # Note: Nested dictionaries are not processed. } expected_dict = { "float1": 3.142, diff --git a/tests/test_version.py b/tests/test_version.py new file mode 100644 index 0000000..bcdc34d --- /dev/null +++ b/tests/test_version.py @@ -0,0 +1,10 @@ +"""Unit tests for __version__.py.""" + +import cifkit + + +def test_package_version(): + """Ensure the package version is defined and not set to the initial + placeholder.""" + assert hasattr(cifkit, "__version__") + assert cifkit.__version__ != "0.0.0" From 3724921f2bb52e5a40b20561f22849d74242e5f0 Mon Sep 17 00:00:00 2001 From: Sangjoon Bob Lee Date: Fri, 3 Jan 2025 20:26:02 -0500 Subject: [PATCH 3/9] chore: fix authors and update doi in citation --- AUTHORS.rst | 3 ++- CITATION.cff | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 19bc24f..28fe6f0 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -1,7 +1,8 @@ Authors ======= -Billinge Group and community contributors. +Sangjoon Lee +Anton Oliynyk Contributors ------------ diff --git a/CITATION.cff b/CITATION.cff index d9d2a6e..8e7660b 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -6,9 +6,8 @@ authors: given-names: Sangjoon orcid: "https://orcid.org/0000-0002-2367-3932" cff-version: 1.2.0 -doi: 10.5281/zenodo.12784260 +doi: 10.21105/joss.07205 message: >- If you use cifkit, please consider citing. title: >- cifkit: A Python package for coordination geometry and atomic site analysis -url: https://doi.org/10.21105/joss.07205 From fec16864038bc72b1a804dc80014fc83e03d2359 Mon Sep 17 00:00:00 2001 From: Sangjoon Bob Lee Date: Fri, 3 Jan 2025 20:27:11 -0500 Subject: [PATCH 4/9] chore: add news for this branch --- news/scikit-package-0.1.0.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 news/scikit-package-0.1.0.rst diff --git a/news/scikit-package-0.1.0.rst b/news/scikit-package-0.1.0.rst new file mode 100644 index 0000000..2cba134 --- /dev/null +++ b/news/scikit-package-0.1.0.rst @@ -0,0 +1,23 @@ +**Added:** + +* Standarlize `cifkit` using `scikit-package` v0.1.0 including `docformatter`, `prettier`, and `codespell` for code formatting and linting. + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* From 6b773efb33d33988f01018064a3a09e78f6807cc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 4 Jan 2025 01:27:41 +0000 Subject: [PATCH 5/9] [pre-commit.ci] auto fixes from pre-commit hooks --- news/scikit-package-0.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/news/scikit-package-0.1.0.rst b/news/scikit-package-0.1.0.rst index 2cba134..7c84ebf 100644 --- a/news/scikit-package-0.1.0.rst +++ b/news/scikit-package-0.1.0.rst @@ -1,6 +1,6 @@ **Added:** -* Standarlize `cifkit` using `scikit-package` v0.1.0 including `docformatter`, `prettier`, and `codespell` for code formatting and linting. +* Standarlize `cifkit` using `scikit-package` v0.1.0 including `docformatter`, `prettier`, and `codespell` for code formatting and linting. **Changed:** From 664c1512f5e634194bef8aa1dfa3b3e2461dd317 Mon Sep 17 00:00:00 2001 From: Sangjoon Bob Lee Date: Fri, 3 Jan 2025 20:31:45 -0500 Subject: [PATCH 6/9] chore: add readme.md back and remove not needed github workflows --- .github/ISSUE_TEMPLATE/release_checklist.md | 8 +- .../matrix-and-codecov-on-merge-to-main.yml | 21 --- .github/workflows/publish-docs-on-release.yml | 12 -- .github/workflows/tests-on-pr.yml | 40 ++++-- README.md | 127 ++++++++++++++++++ 5 files changed, 161 insertions(+), 47 deletions(-) delete mode 100644 .github/workflows/matrix-and-codecov-on-merge-to-main.yml delete mode 100644 .github/workflows/publish-docs-on-release.yml create mode 100644 README.md diff --git a/.github/ISSUE_TEMPLATE/release_checklist.md b/.github/ISSUE_TEMPLATE/release_checklist.md index b278b12..7e4a00c 100644 --- a/.github/ISSUE_TEMPLATE/release_checklist.md +++ b/.github/ISSUE_TEMPLATE/release_checklist.md @@ -24,7 +24,7 @@ assignees: "" `twine check dist/*` to ensure that the package can be built and is correctly formatted for PyPI release. -Please mention @sbillinge here when you are ready for PyPI/GitHub release. +Please mention @bobleesj here when you are ready for PyPI/GitHub release. Include any additional comments necessary, such as version information and details about the pre-release here: @@ -36,19 +36,19 @@ details about the pre-release here: - [ ] Docs are deployed successfully to `https://www.diffpy.org/`. - [ ] Successfully run all tests, tutorial examples or do functional testing. -Please let @sbillinge know that all checks are done and the package is ready for +Please let @bobleesj know that all checks are done and the package is ready for full release. ### conda-forge release preparation checklist: - + - [ ] Ensure that the full release has appeared on PyPI successfully. - [ ] New package dependencies listed in `conda.txt` and `test.txt` are added to `meta.yaml` in the feedstock. - [ ] Close any open issues on the feedstock. Reach out to @bobleesj if you have questions. -- [ ] Tag @sbillinge and @bobleesj for conda-forge release. +- [ ] Tag @bobleesj for conda-forge release. ### Post-release checklist diff --git a/.github/workflows/matrix-and-codecov-on-merge-to-main.yml b/.github/workflows/matrix-and-codecov-on-merge-to-main.yml deleted file mode 100644 index abdd53e..0000000 --- a/.github/workflows/matrix-and-codecov-on-merge-to-main.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: CI - -on: - push: - branches: - - main - release: - types: - - prereleased - - published - workflow_dispatch: - -jobs: - matrix-coverage: - uses: Billingegroup/release-scripts/.github/workflows/_matrix-and-codecov-on-merge-to-main.yml@v0 - with: - project: cifkit - c_extension: false - headless: false - secrets: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/publish-docs-on-release.yml b/.github/workflows/publish-docs-on-release.yml deleted file mode 100644 index 1b9d578..0000000 --- a/.github/workflows/publish-docs-on-release.yml +++ /dev/null @@ -1,12 +0,0 @@ -name: Deploy Documentation on Release - -on: - workflow_dispatch: - -jobs: - docs: - uses: Billingegroup/release-scripts/.github/workflows/_publish-docs-on-release.yml@v0 - with: - project: cifkit - c_extension: false - headless: false diff --git a/.github/workflows/tests-on-pr.yml b/.github/workflows/tests-on-pr.yml index 778132b..38479ad 100644 --- a/.github/workflows/tests-on-pr.yml +++ b/.github/workflows/tests-on-pr.yml @@ -1,18 +1,38 @@ -name: Tests on PR +name: CI on: push: branches: - main pull_request: - workflow_dispatch: jobs: - tests-on-pr: - uses: Billingegroup/release-scripts/.github/workflows/_tests-on-pr.yml@v0 - with: - project: cifkit - c_extension: false - headless: false - secrets: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + build-linux: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Create virtual environment and install dependencies + run: | + python -m venv venv + source venv/bin/activate + pip install . + - name: Test with pytest and generate coverage report + run: | + source venv/bin/activate + pip install pytest pytest-cov + python -m pytest -m "not pyvista" --cov=./ --cov-report=xml + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..d71a861 --- /dev/null +++ b/README.md @@ -0,0 +1,127 @@ +# cifkit + +[![CI](https://github.com/bobleesj/cifkit/actions/workflows/tests-on-pr.yml/badge.svg?branch=main)](https://github.com/bobleesj/cifkit/actions/workflows/tests-on-pr.yml) +[![codecov](https://codecov.io/gh/bobleesj/cifkit/graph/badge.svg?token=AN2YAC337A)](https://codecov.io/gh/bobleesj/cifkit) +![Python - Version](https://img.shields.io/pypi/pyversions/cifkit) +[![PyPi version](https://img.shields.io/pypi/v/cifkit.svg)](https://pypi.python.org/pypi/cifkit) + + + +![Logo light mode](docs/assets/img/logo-black.png#gh-light-mode-only "cifkit logo light") +![Logo dark mode](docs/assets/img/logo-color.png#gh-dark-mode-only "cifkit logo dark") + +`cifkit` is designed to provide a set of fully-tested utility functions and +variables for handling large datasets, on the order of tens of thousands, of +`.cif` files. + +## Features: + +`cifkit` provides higher-level functions in just a few lines of code. + +- **Coordination geometry** - `cifkit` provides functions for visualing + coordination geometry from each site and extracts physics-based features like + volume and packing efficiency in each polyhedron. +- **Atomic mixing** - `cifkit` extracts atomic mixing information at the bond + pair level—tasks that would otherwise require extensive manual effort using + GUI-based tools like VESTA, Diamond, and CrystalMaker. +- **Filter** - `cifkit` offers features for preprocessing. It systematically + addresses common issues in CIF files from databases, such as incorrect loop + values and missing fractional coordinates, by standardizing and filtering out + ill-formatted files. It also preprocesses atomic site labels, transforming + labels such as 'M1' to 'Fe1' in files with atomic mixing. +- **Sort** - `cifkit` allows you to copy, move, and sort `.cif` files based on + attributes such as coordination numbers, space groups, unit cells, shortest + distances, elements, and more. + +### Example usage 1 - coordination geometry + +The example below uses `cifkit` to visualize the polyhedron generated from each +atomic site based on the coordination number geometry. + +```python +from cifkit import Cif + +cif = Cif("your_cif_file_path") +site_labels = cif.site_labels + +# Loop through each site label +for label in site_labels: + # Dipslay each polyhedron, .png saved for each label + cif.plot_polyhedron(label, is_displayed=True) +``` + +![Polyhedron generation](docs/assets/img/ErCoIn-polyhedron.png) + +### Example Usage 2 - sort + +The following example generates a distribution of structure. + +```python +from cifkit import CifEnsemble + +ensemble = CifEnsemble("your_folder_path_containing_cif_files") +ensemble.generate_structure_histogram() +``` + +![structure distribution](docs/assets/img/histogram-structure.png) + +Basde on your visual histogram above, you can copy and move .cif files based on +specific attributes: + +```python +# Return file paths matching structures either Co1.75Ge or CoIn2 +ensemble.filter_by_structures(["Co1.75Ge", "CoIn2"]) + +# Return file path matching CeAl2Ga2 +ensemble.filter_by_structures("CeAl2Ga2") +``` + +To learn more, please read the official documentation here: +https://bobleesj.github.io/cifkit. + +## Quotes + +Here is a quote illustrating how `cifkit` addresses one of the challenges +mentioned above. + +> "I am building an X-Ray diffraction analysis (XRD) pattern visualization +> script for my lab using `pymatgen`. I feel like `cifkit` integrated really +> well into my existing stable of libraries, while surpassing some alternatives +> in preprocessing and parsing. For example, it was often unclear at what stage +> an error occurred—whether during pre-processing with `CifParser`, or XRD plot +> generation with `diffraction.core` in `pymatgen`. The pre-processing logic in +> `cifkit` was communicated clearly, both in documentation and in actual +> outputs, allowing me to catch errors in my data before it was used in my +> visualizations. I now use `cifkit` by default for processing CIFs before they +> pass through the rest of my pipeline." - Alex Vtorov ` + +## Documentation + +- [Official documentation](https://bobleesj.github.io/cifkit) +- [Contribution guide](https://github.com/bobleesj/cifkit/blob/main/CONTRIBUTING.md) +- [MIT license](https://github.com/bobleesj/cifkit/blob/main/LICENSE) + +## How to contribute + +Here is how you can contribute to the `cifkit` project if you found it helpful: + +- Star the repository on GitHub and recommend it to your colleagues who might + find `cifkit` helpful as well. + [![Star GitHub repository](https://img.shields.io/github/stars/bobleesj/cifkit.svg?style=social)](https://github.com/bobleesj/cifkit/stargazers) +- Create a new issue for any bugs or feature requests + [here](https://github.com/bobleesj/cifkit/issues) +- Fork the repository and consider contributing changes via a pull request. + [![Fork GitHub repository](https://img.shields.io/github/forks/bobleesj/cifkit?style=social)](https://github.com/bobleesj/cifkit/fork). + Check out + [CONTRIBUTING.md](https://github.com/bobleesj/cifkit/blob/main/CONTRIBUTING.md) + for instructions. +- If you have any suggestions or need further clarification on how to use + `cifkit`, please reach out to Bob Lee + ([@bobleesj](https://github.com/bobleesj)). + +## To render documentation + +```bash +pip install -r requirements/docs.txt +mkdocs serve +``` From 8390f40ae525c27992492fd59137cdd736827f1f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 4 Jan 2025 01:32:03 +0000 Subject: [PATCH 7/9] [pre-commit.ci] auto fixes from pre-commit hooks --- .github/workflows/tests-on-pr.yml | 38 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/tests-on-pr.yml b/.github/workflows/tests-on-pr.yml index 38479ad..553c0b8 100644 --- a/.github/workflows/tests-on-pr.yml +++ b/.github/workflows/tests-on-pr.yml @@ -15,24 +15,24 @@ jobs: python-version: ["3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} - - name: Create virtual environment and install dependencies - run: | - python -m venv venv - source venv/bin/activate - pip install . - - name: Test with pytest and generate coverage report - run: | - source venv/bin/activate - pip install pytest pytest-cov - python -m pytest -m "not pyvista" --cov=./ --cov-report=xml - - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4.0.1 - with: - token: ${{ secrets.CODECOV_TOKEN }} \ No newline at end of file + - name: Create virtual environment and install dependencies + run: | + python -m venv venv + source venv/bin/activate + pip install . + - name: Test with pytest and generate coverage report + run: | + source venv/bin/activate + pip install pytest pytest-cov + python -m pytest -m "not pyvista" --cov=./ --cov-report=xml + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} From 51d317ab07998f57d3c6bd60f2b8d52d585d8206 Mon Sep 17 00:00:00 2001 From: Sangjoon Bob Lee Date: Fri, 3 Jan 2025 20:33:27 -0500 Subject: [PATCH 8/9] ci: update python verison to 3.13 for cifkit --- .github/workflows/tests-on-pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests-on-pr.yml b/.github/workflows/tests-on-pr.yml index 553c0b8..bbf5ffd 100644 --- a/.github/workflows/tests-on-pr.yml +++ b/.github/workflows/tests-on-pr.yml @@ -12,7 +12,7 @@ jobs: strategy: max-parallel: 5 matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.11", "3.11", "3.13"] steps: - uses: actions/checkout@v4 From d0200b461c2040e77e682b3cd02cb663e580f616 Mon Sep 17 00:00:00 2001 From: Sangjoon Bob Lee Date: Fri, 3 Jan 2025 20:36:12 -0500 Subject: [PATCH 9/9] ci: fix 3.11 to 3.12 in tests on pr --- .github/workflows/tests-on-pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests-on-pr.yml b/.github/workflows/tests-on-pr.yml index bbf5ffd..5144367 100644 --- a/.github/workflows/tests-on-pr.yml +++ b/.github/workflows/tests-on-pr.yml @@ -12,7 +12,7 @@ jobs: strategy: max-parallel: 5 matrix: - python-version: ["3.11", "3.11", "3.13"] + python-version: ["3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4