Skip to content

Commit

Permalink
Merge branch 'release/0.2.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
dev-zero committed May 5, 2020
2 parents a8db682 + 0e598cd commit f8b785b
Show file tree
Hide file tree
Showing 16 changed files with 2,580 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[run]
parallel = True
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## [0.2.0] - 2020-05-05

* add cp2k_bs2csv with support for CP2K v8+ and an API
* add xyz_restart_cleaner

## [0.1.4] - 2020-04-03

* updated project urls
Expand Down
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# cp2k-output-tools

[![Build Status](https://github.com/cp2k/cp2k-output-tools/workflows/tests/badge.svg)](https://github.com/dev-zero/cp2k-output-tools/actions) [![codecov](https://codecov.io/gh/dev-zero/cp2k-output-tools/branch/develop/graph/badge.svg)](https://codecov.io/gh/dev-zero/cp2k-output-tools) [![PyPI](https://img.shields.io/pypi/pyversions/cp2k-output-tools)](https://pypi.org/project/cp2k-output-tools/)
[![Build Status](https://github.com/cp2k/cp2k-output-tools/workflows/tests/badge.svg)](https://github.com/cp2k/cp2k-output-tools/actions) [![codecov](https://codecov.io/gh/cp2k/cp2k-output-tools/branch/develop/graph/badge.svg)](https://codecov.io/gh/cp2k/cp2k-output-tools) [![PyPI](https://img.shields.io/pypi/pyversions/cp2k-output-tools)](https://pypi.org/project/cp2k-output-tools/)

Modular CP2K output file parsers, mostly in the form of regular expressions.
Modular CP2K output file parsers, mostly in the form of regular expressions plus other tools to mangle various CP2K output:

* `cp2kparse` ... parse CP2K output files (for restart & input files look at the [cp2k-input-tools](https://github.com/cp2k/cp2k-input-tools) project)
* `xyz_restart_parser` ... when restarts occur during an MD you may end up with duplicated frames in the trajectory, this tool filters them
* `cp2k_bs2csv` ... convert a CP2K band structure file to multiple (one-per-set) CSV files for easier plotting. There is also an API available if you need to import bandstructure data into your application.

## Requirements

Expand All @@ -12,7 +16,7 @@ Modular CP2K output file parsers, mostly in the form of regular expressions.
For development: https://poetry.eustace.io/ https://pytest.org/


## Usage
## Usage: cp2kparse

There is a simple command-line interface `cp2kparse`:

Expand Down Expand Up @@ -493,6 +497,14 @@ with open("calc.out", "r") as fhandle:
print(match.values)
```

## Usage: xyz_restart_cleaner

```console
$ xyz_restart_cleaner orig_trajectory.xyz new_trajectory.xyz
found restart point @1, dropping 1 frames, flushing 1
flushing remaining 2 frames
```

## Development

```console
Expand Down
2 changes: 1 addition & 1 deletion cp2k_output_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.1.4"
__version__ = "0.2.0"

__all__ = ["builtin_matchers", "parse_iter"]

Expand Down
192 changes: 192 additions & 0 deletions cp2k_output_tools/bandstructure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
"""
Convert the CP2K band structure output to CSV files
"""

__all__ = ["SpecialPoint", "Point", "set_gen"]

import re
import argparse
from dataclasses import dataclass
from typing import List, Optional
import itertools


@dataclass
class SpecialPoint:
number: int
name: str
a: float
b: float
c: float


@dataclass
class Point:
a: float
b: float
c: float
bands: List[float]
spin: int
weight: Optional[float] = None


SET_MATCH = re.compile(
r"""
[ ]*
SET: [ ]* (?P<setnr>\d+) [ ]*
TOTAL [ ] POINTS: [ ]* (?P<totalpoints>\d+) [ ]*
\n
(?P<content>
[\s\S]*?(?=\n.*?[ ] SET|$) # match everything until next 'SET' or EOL
)
""",
re.VERBOSE,
)

SPOINTS_MATCH = re.compile(
r"""
[ ]*
POINT [ ]+ (?P<number>\d+) [ ]+ (?P<name>\S+) [ ]+ (?P<a>\S+) [ ]+ (?P<b>\S+) [ ]+ (?P<c>\S+)
""",
re.VERBOSE,
)

POINTS_MATCH = re.compile(
r"""
[ ]*
Nr\. [ ]+ (?P<nr>\d+) [ ]+
Spin [ ]+ (?P<spin>\d+) [ ]+
K-Point [ ]+ (?P<a>\S+) [ ]+ (?P<b>\S+) [ ]+ (?P<c>\S+) [ ]*
\n
[ ]* (?P<npoints>\d+) [ ]* \n
(?P<bands>
[\s\S]*?(?=\n.*?[ ] Nr|$) # match everything until next 'Nr.' or EOL
)
""",
re.VERBOSE,
)


def _specialpoints_gen(content):
for match in SPOINTS_MATCH.finditer(content):
yield SpecialPoint(int(match["number"]), match["name"], float(match["a"]), float(match["b"]), float(match["c"]))


def _points_gen(content):
for match in POINTS_MATCH.finditer(content):
yield Point(
a=float(match["a"]),
b=float(match["b"]),
c=float(match["c"]),
bands=[float(v) for v in match["bands"].split()],
spin=int(match["spin"]),
)


SET_MATCH8 = re.compile(
r"""
\#\ Set\ (?P<setnr>\d+):\ \d+\ special\ points,\ (?P<totalpoints>\d+)\ k-points,\ \d+\ bands \s*
(?P<content>
[\s\S]*?(?=\n.*?\#\ Set|$) # match everything until next 'SET' or EOL
)
""",
re.VERBOSE,
)


SPOINTS_MATCH8 = re.compile(
r"""
\#\s+ Special\ point\ (?P<number>\d+) \s+ (?P<a>\S+) \s+ (?P<b>\S+) \s+ (?P<c>\S+) \s+ (?P<name>\S+)
""",
re.VERBOSE,
)


POINTS_MATCH8 = re.compile(
r"""
\#\ \ Point\ (?P<nr>\d+)\s+ Spin\ (?P<spin>\d+): \s+ (?P<a>\S+) \s+ (?P<b>\S+) \s+ (?P<c>\S+) [ ]* ((?P<weight>\S+) [ ]*)? \n
\#\ \ \ Band \s+ Energy\ \[eV\] \s+ Occupation \s*
(?P<bands>
[\s\S]*?(?=\n.*?\#\ \ Point|$) # match everything until next '# Point' or EOL
)
""",
re.VERBOSE,
)


def _points_gen8(content):
for match in POINTS_MATCH8.finditer(content):
try:
weight = float(match["weight"])
except TypeError:
weight = None

values = match["bands"].split()

yield Point(
a=float(match["a"]),
b=float(match["b"]),
c=float(match["c"]),
bands=[float(v) for v in values[1::3]],
weight=weight,
spin=int(match["spin"]),
)


def _specialpoints_gen8(content):
for match in SPOINTS_MATCH8.finditer(content):
yield SpecialPoint(int(match["number"]), match["name"], float(match["a"]), float(match["b"]), float(match["c"]))


def set_gen(content):
# try with the CP2K+8+ regex first
matchiter = SET_MATCH8.finditer(content)
specialpoints_gen = _specialpoints_gen8
points_gen = _points_gen8

try:
peek = next(matchiter)
matchiter = itertools.chain([peek], matchiter)
except StopIteration:
# if nothing could be found, fallback to the older format
matchiter = SET_MATCH.finditer(content)
specialpoints_gen = _specialpoints_gen
points_gen = _points_gen

for match in matchiter:
yield (int(match["setnr"]), int(match["totalpoints"]), specialpoints_gen(match["content"]), points_gen(match["content"]))


def cp2k_bs2csv():
parser = argparse.ArgumentParser(
description="""
Convert the input from the given input file handle and write
CSV output files based on the given pattern.
""",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"bsfile", metavar="<bandstructure-file>", type=argparse.FileType("r"), help="the band structure file generated by CP2K"
)
parser.add_argument(
"-p", "--output-pattern", help="The output pattern for the different set files", default="{bsfile.name}.set-{setnr}.csv"
)
args = parser.parse_args()

content = args.bsfile.read()

for setnr, totalpoints, specialpoints, points in set_gen(content):
filename = args.output_pattern.format(bsfile=args.bsfile, setnr=setnr)

with open(filename, "w") as csvout:
print(f"writing point set {filename} (total number of k-points: {totalpoints})")
print("with the following special points:")

for point in specialpoints:
print(f" {point.name:>8}: {point.a:10.8f} / {point.b:10.8f} / {point.c:10.8f}")

for point in points:
csvout.write(f"{point.a:10.8f} {point.b:10.8f} {point.c:10.8f}")
for value in point.bands:
csvout.write(f" {value:10.8f}")
csvout.write("\n")
Loading

0 comments on commit f8b785b

Please sign in to comment.