Source code for scythe.dft
from typing import Union, Iterable, Tuple, List
from scythe.utils.grouping import preprocess_paths, group_by_postfix
from scythe.base import BaseExtractor
from dfttopif import files_to_pif
from operator import itemgetter
import itertools
import os
# List of files that are known to the VASP parser
_vasp_file_names = ["outcar", "incar", "chgcar", "wavecar", "wavcar", "oszicar", "ibzcar",
"kpoints", "doscar", "poscar", "contcar", "vasp_run.xml", "xdatcar"]
[docs]class DFTExtractor(BaseExtractor):
"""Extract metadata from Density Functional Theory calculation results
Uses the `dfttopif <https://github.com/CitrineInformatics/pif-dft>`_ parser to extract metadata from each file
"""
def __init__(self, quality_report=False):
"""Initialize the extractor
Args:
quality_report (bool): Whether to generate a quality report
"""
self.quality_report = quality_report
def group(self, files: Union[str, List[str]], directories: List[str] = None,
context: dict = None):
# Convert paths into standardized form
files = set(preprocess_paths(files))
# Find all files, and attempt to group them
for group in self._group_vasp(files): # VASP grouping logic
# Remove all files matched as VASP from the matchable files
files.difference_update(group)
yield group
for group in self._group_pwscf(files):
yield group # Do not remove, as the PWSCF group is not reliable
def _group_vasp(self, files: Iterable[str]) -> Iterable[Tuple[str, ...]]:
"""Find groupings of files associated with VASP calculations
Find files that start with the name "OUTCAR" (not case sensitive) and groups those files
together with any file that share the same postfix (e.g., "OUTCAR.1" and "INCAR.1" are
grouped together)
Args:
files ([str]): List of files to be grouped
Yields:
((files)): List of VASP files from the same calculation
"""
for group in group_by_postfix(files, _vasp_file_names):
yield group
def _group_pwscf(self, files: Iterable[str]) -> Iterable[Tuple[str, ...]]:
"""Assemble groups of files that are potentially PWSCF calculations
Args:
files ([str]): List of files to be grouped
Yields:
((str)): Groups of potential-pwscf files
"""
# For now, we just group files by directory
# TODO (lw): Find files that have PWSCF flags in them
# TODO (lw): Read PWSCF input files to know the save directory
file_and_dir = [(os.path.dirname(f), f) for f in files]
for k, group in itertools.groupby(sorted(file_and_dir), key=itemgetter(0)):
yield [x[1] for x in group]
[docs] def extract(self, group: Iterable[str], context: dict = None):
return files_to_pif(group, quality_report=self.quality_report).as_dictionary()
def implementors(self):
return ['Logan Ward <lward@anl.gov>']
def version(self):
return '0.0.1'