Source code for scythe.dft

from typing import Union, Iterable, Tuple, List
from scythe.utils.grouping import preprocess_paths, group_by_postfix
from scythe.base import BaseExtractor
from dfttopif import files_to_pif
from operator import itemgetter
import itertools
import os


# List of files that are known to the VASP parser
_vasp_file_names = ["outcar", "incar", "chgcar", "wavecar", "wavcar", "oszicar", "ibzcar",
                    "kpoints", "doscar", "poscar", "contcar", "vasp_run.xml", "xdatcar"]


[docs]class DFTExtractor(BaseExtractor): """Extract metadata from Density Functional Theory calculation results Uses the `dfttopif <https://github.com/CitrineInformatics/pif-dft>`_ parser to extract metadata from each file """ def __init__(self, quality_report=False): """Initialize the extractor Args: quality_report (bool): Whether to generate a quality report """ self.quality_report = quality_report def group(self, files: Union[str, List[str]], directories: List[str] = None, context: dict = None): # Convert paths into standardized form files = set(preprocess_paths(files)) # Find all files, and attempt to group them for group in self._group_vasp(files): # VASP grouping logic # Remove all files matched as VASP from the matchable files files.difference_update(group) yield group for group in self._group_pwscf(files): yield group # Do not remove, as the PWSCF group is not reliable def _group_vasp(self, files: Iterable[str]) -> Iterable[Tuple[str, ...]]: """Find groupings of files associated with VASP calculations Find files that start with the name "OUTCAR" (not case sensitive) and groups those files together with any file that share the same postfix (e.g., "OUTCAR.1" and "INCAR.1" are grouped together) Args: files ([str]): List of files to be grouped Yields: ((files)): List of VASP files from the same calculation """ for group in group_by_postfix(files, _vasp_file_names): yield group def _group_pwscf(self, files: Iterable[str]) -> Iterable[Tuple[str, ...]]: """Assemble groups of files that are potentially PWSCF calculations Args: files ([str]): List of files to be grouped Yields: ((str)): Groups of potential-pwscf files """ # For now, we just group files by directory # TODO (lw): Find files that have PWSCF flags in them # TODO (lw): Read PWSCF input files to know the save directory file_and_dir = [(os.path.dirname(f), f) for f in files] for k, group in itertools.groupby(sorted(file_and_dir), key=itemgetter(0)): yield [x[1] for x in group]
[docs] def extract(self, group: Iterable[str], context: dict = None): return files_to_pif(group, quality_report=self.quality_report).as_dictionary()
def implementors(self): return ['Logan Ward <lward@anl.gov>'] def version(self): return '0.0.1'