Source code for ipyvasp.core.parser

__all__ = ["Vasprun", "Vaspout", "minify_vasprun", "xml2dict","read"]

import re
from io import StringIO
from itertools import islice, chain, product
from collections.abc import Iterable
import xml.etree.ElementTree as ET
from pathlib import Path

import numpy as np

from . import serializer


class DataSource:
    """Base class for all data sources. It provides a common interface to access data from different sources.
    Subclass it to get data from a source and implement the abstract methods."""

    def __init__(self, path, skipk=None):
        self._path = Path(path).absolute()
        self._skipk = (
            skipk if isinstance(skipk, (int, np.integer)) else self._read_skipk()
        )
        if not self._path.is_file():
            raise FileNotFoundError("File: '{}'' does not exist!".format(path))
        self._summary = self.get_summary()  # summary data is read only once

    def __repr__(self):
        return f"{self.__class__.__name__}({str(self.path)})"

    @property
    def path(self):
        return self._path

    @property
    def summary(self):
        return self._summary

    @property
    def bands(self):
        if not hasattr(self, "_bands"):
            from ..bsdos import Bands

            self._bands = Bands(self)

        return self._bands  # keep same instance to avoid data loss

    @property
    def dos(self):
        if not hasattr(self, "_dos"):
            from ..bsdos import DOS

            self._dos = DOS(self)

        return self._dos  # keep same instance to avoid data loss

    @property
    def poscar(self):
        if not hasattr(self, "_poscar"):
            from ..lattice import POSCAR

            self._poscar = POSCAR(data=self.get_structure())

        return self._poscar  # keep same instance to avoid data loss

    def get_evals_dataframe(self, spins=[0], bands=[0], atoms=None, orbs=None):
        """Initialize and return an EvalsDataFrame instance, with the given parameters."""
        from ..evals_dataframe import EvalsDataFrame

        return EvalsDataFrame(self, spins=spins, bands=bands, atoms=atoms, orbs=orbs)

    # Following methods should be implemented in a subclass
    def get_summary(self):
        raise NotImplementedError(
            "`get_summary` should be implemented in a subclass. See Vasprun.get_summary as example."
        )

    def get_structure(self):
        raise NotImplementedError(
            "`get_structure` should be implemented in a subclass. See Vasprun.get_structure as example."
        )
    
    def _read_skipk(self):
        raise NotImplementedError(
            "`_read_skipk` should be implemented in a subclass. See Vasprun._read_skipk as example."
        )
    
    def get_skipk(self):
        "Returns the number of first few k-points to skip in band structure plot in case of HSE calculation."
        return self._skipk
    
    def set_skipk(self, skipk):
        "Set the number of first few k-points to skip in band structure plot in case of HSE calculation."
        if isinstance(skipk, (int, np.integer)):
            self._skipk = skipk
        else:
            raise TypeError("skipk should be an integer to skip first few random kpoints.")
        
    def get_kpoints(self):
        raise NotImplementedError(
            "`get_kpoints` should be implemented in a subclass. See Vasprun.get_kpoints as example."
        )

    def get_evals(self, *args, **kwargs):
        raise NotImplementedError(
            "`get_evals` should be implemented in a subclass. See Vasprun.get_evals as example."
        )

    def get_dos(self, *args, **kwargs):
        raise NotImplementedError(
            "`get_dos` should be implemented in a subclass. See Vasprun.get_dos as example."
        )

    def get_forces(self):
        raise NotImplementedError(
            "`get_forces` should be implemented in a subclass. See Vasprun.get_forces as example."
        )

    def get_scsteps(self):
        raise NotImplementedError(
            "`get_scsteps` should be implemented in a subclass. See Vasprun.get_scsteps as example."
        )


[docs] class Vaspout(DataSource): "Read data from vaspout.h5 file on demand." # These methods are accessible from parent class, but need here for including in sphinx documentation get_evals_dataframe = DataSource.get_evals_dataframe poscar = DataSource.poscar dos = DataSource.dos bands = DataSource.bands def __init__(self, path, skipk=None): # super().__init__(path, skipk) raise NotImplementedError("Vaspout is not implemented yet.")
[docs] def read(file, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None): """Reads a part of the file between start_match and stop_match and returns a generator. It is lazy and fast. `start_match` and `stop_match`(default is end of same line) are regular expressions. `nth_match` is the number of occurence of start_match to start reading. `skip_last` is used to determine whether to keep or skip last line. `apply` should be None or `func` to transform each captured line. """ if "|" in start_match: raise ValueError( "start_match should be a single match, so '|' character is not allowed." ) with Path(file).open("r") as f: lines = islice(f, None) # this is fast matched = False n_start = 1 for line in lines: if re.search(start_match, line, flags=re.DOTALL): if nth_match != n_start: n_start += 1 else: matched = True if matched and re.search( stop_match, line, flags=re.DOTALL ): # avoid stop before start matched = False if not skip_last: yield apply(line) if callable(apply) else line break # stop reading if matched: # should be after break to handle last line above yield apply(line) if callable(apply) else line
[docs] class Vasprun(DataSource): "Reads vasprun.xml file lazily. It reads only the required data from the file when a plot or data access is requested." # These methods are accessible from parent class, but need here for including in sphinx documentation get_evals_dataframe = DataSource.get_evals_dataframe poscar = DataSource.poscar dos = DataSource.dos bands = DataSource.bands def __init__(self, path="./vasprun.xml", skipk=None): super().__init__(path, skipk)
[docs] def read(self, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None): """Reads a part of the file between start_match and stop_match and returns a generator. It is lazy and fast. `start_match` and `stop_match`(default is end of same line) are regular expressions. `nth_match` is the number of occurence of start_match to start reading. `skip_last` is used to determine whether to keep or skip last line. `apply` should be None or `func` to transform each captured line. """ kws = {k:v for k,v in locals().items() if k !='self'} return read(self.path,**kws)
def _read_skipk(self): "Returns the number of k-points to skip in band structure plot in case of HSE calculation." weights = np.fromiter( ( v.text for v in ET.fromstringlist( self.read("<varray.*weights", "</varray") ).iter("v") ), dtype=float, ) return len([w for w in weights if w != weights[-1]])
[docs] def get_summary(self): "Returns summary data of calculation." incar = { v.attrib["name"]: v.text for v in ET.fromstringlist(self.read("<incar", "</incar")).iter("i") } info_dict = {"SYSTEM": incar.get("SYSTEM", None)} # some people don't write system in incar info_dict["ISPIN"] = int( ET.fromstring(next(self.read("<i.*ISPIN", "</i>"))).text ) info_dict["NKPTS"] = len( [ v for v in ET.fromstringlist( self.read("<varray.*kpoint", "</varray") ).iter("v") ] ) _nbands = [*self.read("<i.*NBANDS[\"\']", "</i>"),*self.read("<i.*NBANDS[\"\']", "</i>",nth_match=2)] info_dict["NBANDS"] = int( # Bad initializations of NBANDS, read last one ET.fromstring(_nbands[-1]).text ) info_dict["NELECTS"] = int( float(ET.fromstring(next(self.read("<i.*NELECT", "</i>"))).text) ) # Beacuse they are poorly writtern as float, come on VASP! info_dict["LSORBIT"] = ( True if "t" in ET.fromstring(next(self.read("<i.*LSORBIT", "</i>"))).text.lower() else False ) info_dict["EFERMI"] = float( ET.fromstring(next(chain( self.read("<i.*efermi", "</i>"), # not always there but this is correct one self.read("<i.*EFERMI", "</i>") # zero, always there in start ))).text ) info_dict["NEDOS"] = int( ET.fromstring(next(self.read("<i.*NEDOS", "</i>"))).text ) # Getting ions is kind of terrbile, but it works atominfo = ET.fromstringlist(self.read("<atominfo", "</atominfo")) info_dict["NIONS"] = [int(atom.text) for atom in atominfo.iter("atoms")][0] types = [int(_type.text) for _type in atominfo.iter("types")][0] elems = [rc[0].text.strip() for rc in atominfo.iter("rc")] _inds = [int(a) for a in elems[-types:]] inds = np.cumsum([0, *_inds]).astype(int) names = [] # for keeping order, do not use unique or set for elem in elems[:-types]: if elem not in names: names.append(elem) info_dict["types"] = { name: range(inds[i], inds[i + 1]) for i, name in enumerate(names) } if info_dict["SYSTEM"] is None: info_dict["SYSTEM"] = "".join(names) # Getting projection fields fields = [ f.replace("field", "").strip(" ></\n") for f in self.read("<partial", "<set") if "field" in f ] # poor formatting again if fields: *others, sets_lines = [ r for r in self.read("<partial>", "</partial>") if "spin " in r ] # Least worse way to read sets info_dict["NSETS"] = int(re.findall("(\d+)", sets_lines)[0]) info_dict["orbs"] = [f for f in fields if "energy" not in f] info_dict["incar"] = incar # Just at end return serializer.Dict2Data(info_dict)
[docs] def get_structure(self): "Returns a structure object including types, basis, rec_basis and positions." arrays = np.array( [ [float(i) for i in v.split()[1:4]] for v in self.read("<structure.*finalpos", "select|</structure") if "<v>" in v ] ) # Stop at selctive dynamics if exists info = self.summary return serializer.PoscarData( { "SYSTEM": info.SYSTEM, "basis": arrays[:3], "rec_basis": arrays[3:6], "positions": arrays[6:], "types": info.types, "metadata": { "comment": "Exported from vasprun.xml", "cartesian": False, "scale": 1, }, } )
[docs] def get_kpoints(self): "Returns k-points data including kpoints, coords, weights and rec_basis in which coords are calculated." kpoints = np.array( [ [float(i) for i in v.text.split()] for v in ET.fromstringlist( self.read("<varray.*kpoint", "</varray") ).iter("v") ] )[self._skipk :] weights = np.fromiter( ( v.text for v in ET.fromstringlist( self.read("<varray.*weights", "</varray") ).iter("v") ), dtype=float, )[self._skipk :] # Get rec_basis to make cartesian coordinates rec_basis = np.array( [ [float(i) for i in v.split()[1:4]] for v in self.read("<structure.*finalpos", "select|</structure") if "<v>" in v ] )[ 3:6 ] # could be selective dynamics there coords = kpoints.dot(rec_basis) # cartesian coordinates kpath = np.cumsum([0, *np.linalg.norm(coords[1:] - coords[:-1], axis=1)]) kpath = ( kpath / kpath[-1] ) # normalized kpath to see the band structure of all materials in same scale return serializer.Dict2Data( { "kpoints": kpoints, "coords": coords, "kpath": kpath, "weights": weights, "rec_basis": rec_basis, } )
[docs] def get_dos(self, elim=None, ezero=None, atoms=None, orbs=None, spins=None): """ Returns energy, total and integrated dos of the calculation. If atoms and orbs are specified, then partial dos are included too. Parameters ---------- elim : tuple, energy range to be returned. Default is None, which returns full range. `elim` is applied around `ezero` if specified, otherwise around VBM. ezero : float, energy reference to be used. Default is None, which uses VBM as reference. ezero is ignored if elim is not specified. In output data, ezero would be VBM or ezero itself if specified. atoms : list/tuple/range, indices of atoms to be returned. Default is None, which does not return ionic projections. orbs : list/tuple/range, indices of orbitals to be returned. Default is None, which does not return orbitals. spins : list/tuple/range of spin sets indices to pick. If None, spin set will be picked 0 or [0,1] if spin-polarized. Default is None. Returns ------- Dict2Data : object which includes `energy`,'tdos` and `idos` as attributes, and includes `pdos` if atoms and orbs specified. Shape of arrays a is (spins, [atoms, orbs], energy). """ ds = (r.split()[1:4] for r in self.read("<total>", "</total>") if "<r>" in r) ds = (d for dd in ds for d in dd) # flatten ds = np.fromiter(ds, dtype=float) if ds.size: ds = ds.reshape(-1, self.summary.NEDOS, 3) # E, total, integrated else: raise ValueError("No dos data found in the file!") en, total, integrad = ds.transpose((2, 0, 1)) vbm = float( en[(integrad < self.summary.NELECTS) & (integrad > 0)].max() ) # second condition is important cbm = float(en[(integrad > self.summary.NELECTS) & (integrad > 0)].min()) zero = vbm grid_range = range(en.shape[1]) # default range full if elim is not None: if (not isinstance(elim, (list, tuple))) and (len(elim) != 2): raise TypeError("elim should be a tuple of length 2") if ezero is not None: if not isinstance(ezero, (int, np.integer, float)): raise TypeError("ezero should be a float or integer") zero = ezero _max = ( np.max(np.where(en - zero <= np.max(elim))[1]) + 1 ) # +1 to make range inclusive _min = np.min(np.where(en - zero >= np.min(elim))[1]) en = en[:, _min:_max] total = total[:, _min:_max] integrad = integrad[:, _min:_max] grid_range = range(_min, _max) out = { "energy": en, "tdos": total, "idos": integrad, "evc": (vbm, cbm), "ezero": zero, "elim": elim, "spins": list(range(en.shape[0])), } if atoms and orbs: if not list( self.read("<partial", "") ): # no partial dos, hust check that line raise ValueError("No partial dos found in the file!") which_spins = tuple(range(en.shape[0])) if spins is not None: if not isinstance(spins, (list, tuple, range)): raise TypeError( "spins should be a list, tuple or range got {}".format( type(spins) ) ) for s in spins: if (not isinstance(s, (int, np.integer))) and ( s not in range(self.summary.NSETS) ): raise TypeError( f"spins should be a tuple/list/range of positive integers in {list(range(self.summary.NSETS))}" ) which_spins = spins gen = ( r.strip(" \n/<>r") for r in self.read(f"<partial>", f"</partial>") if "<r>" in r ) # stripping is must here to ensure that we get only numbers old_shape = ( self.summary.NIONS, self.summary.NSETS, self.summary.NEDOS, len(self.summary.orbs) + 1, ) data = gen2numpy( gen, old_shape, slices=[atoms, which_spins, grid_range, [o + 1 for o in orbs]], dtype=float, ) # No need to pick up the first column as it is the energy out["pdos"] = data.transpose((1, 0, 3, 2)) # (spins, atoms, orbitsl,energy) out["atoms"] = atoms out["orbs"] = orbs out["spins"] = which_spins elif (atoms, orbs) != (None, None): raise ValueError("atoms and orbs should be specified together") out["shape"] = "(spin, [atoms, orbitals], energy)" return serializer.Dict2Data(out)
def _get_spin_set(self, spin, bands, atoms, orbs, sys_info): "sys_info is the summary data of calculation, used to get the shape of the array." if not hasattr(sys_info, "orbs"): raise ValueError( f"The file {self.path!r} does not have projected orbitals!" ) shape = (sys_info.NKPTS, sys_info.NBANDS, sys_info.NIONS, len(sys_info.orbs)) slices = [range(self._skipk, sys_info.NKPTS), bands, atoms, orbs] if not list(self.read(f"spin{spin+1}", "")): raise ValueError( f"Given {spin} index for spin is larger than available in the file!" ) gen = ( r.strip(" \n/<>r") for r in self.read(f"spin{spin+1}", f"spin{spin+2}|</projected") if "<r>" in r ) # stripping is must here to ensure that we get only numbers return gen2numpy(gen, shape, slices).transpose( (2, 3, 0, 1) ) # (atoms, orbs, kpts, bands)
[docs] def get_evals( self, elim=None, ezero=None, atoms=None, orbs=None, spins=None, bands=None ): """ Returns eigenvalues and occupations of the calculation. If atoms and orbs are specified, then orbitals are included too. Parameters ---------- elim : tuple, energy range to be returned. Default is None, which returns all eigenvalues. `elim` is applied around `ezero` if specified, otherwise around VBM. ezero : float, energy reference to be used. Default is None, which uses VBM as reference. ezero is ignored if elim is not specified. In output data, ezero would be VBM or ezero itself if specified. atoms : list/tuple/range, indices of atoms to be returned. Default is None, which does not return ionic projections. orbs : list/tuple/range, indices of orbitals to be returned. Default is None, which does not return orbitals. spins : list/tuple/range of spin sets indices to pick. If None, spin set will be picked 0 or [0,1] if spin-polarized. Default is None. bands : list/tuple/range, indices of bands to pick. overrides elim. Useful to load same bands range for spin up and down channels. Plotting classes automatically handle this for spin up and down channels. Returns ------- Dict2Data object which includes `evals` and `occs` as attributes, and `pros` if atoms and orbs specified. Shape arrays is (spin, [atoms, orbitals], kpts, bands) """ info = self.summary bands_range = range(info.NBANDS) ev = ( r.split()[1:3] for r in self.read(f"<eigenvalues>", f"</eigenvalues>") if "<r>" in r ) ev = (e for es in ev for e in es) # flatten ev = np.fromiter(ev, float) if ev.size: # if not empty ev = ev.reshape((-1, info.NKPTS, info.NBANDS, 2))[ :, self._skipk :, :, : ] # shape is (NSPIN, NKPTS, NBANDS, 2) else: raise ValueError("No eigenvalues found in this file!") evals, occs = ev[:, :, :, 0], ev[:, :, :, 1] sv, kv, ev = np.where( evals == evals[occs > 0.5].max() ) # more than half filled indices sc, kc, ec = np.where( evals == evals[occs < 0.5].min() ) # less than half filled indices evc, kvc, zero = (0.0, 0.0), (0, 0), 0 # default values are empty if sv.size and sc.size: # if both are not empty evc = (float(evals[sv[0], kv[0], ev[0]]), float(evals[sc[0], kc[0], ec[0]])) zero = evc[0] # default value of ezero # svc = (sv[0],sc[0]) # spin indices, implement later if needed kvc = tuple(sorted(product(kv, kc), key=lambda K: np.ptp(K)))[ 0 ] # bring closer points first by sorting and take closest ones if ezero is not None: if not isinstance(ezero, (int, np.integer, float)): raise TypeError("ezero should be a float or integer") zero = ezero if bands: if not isinstance(bands, (list, tuple, range)): raise TypeError( "bands should be a list, tuple or range got {}".format(type(bands)) ) for b in bands: if (not isinstance(b, (int, np.integer))) and (b < 0): raise TypeError( "bands should be a tuple/list/range of positive integers" ) _bands = list(bands) evals = evals[:, :, _bands] occs = occs[:, :, _bands] bands_range = _bands elif elim: if (not isinstance(elim, (list, tuple))) and (len(elim) != 2): raise TypeError("elim should be a tuple of length 2") idx_max = np.max(np.where(evals - zero <= np.max(elim))[2]) + 1 idx_min = np.min(np.where(evals - zero >= np.min(elim))[2]) evals = evals[:, :, idx_min:idx_max] occs = occs[:, :, idx_min:idx_max] bands_range = range(idx_min, idx_max) which_spins = tuple( range(evals.shape[0]) ) # which spin set to pick, defult is what is available out = { "evals": evals, "occs": occs, "ezero": zero, "elim": elim, "evc": evc, "kvc": kvc, "bands": bands_range, "spins": which_spins, } if atoms and orbs: if spins is not None: if not isinstance(spins, (list, tuple, range)): raise TypeError( f"spins should be a list, tuple or range got {type(spins)}" ) for s in spins: if (not isinstance(s, (int, np.integer))) and ( s not in range(self.summary.NSETS) ): raise TypeError( f"spins should be a tuple/list/range of positive integers in {list(range(self.summary.NSETS))}" ) which_spins = spins pros = [] for ws in which_spins: pros.append(self._get_spin_set(ws, bands_range, atoms, orbs, info)) out["pros"] = np.array(pros) # (spins, atoms, orbitals, kpoints, bands) out["atoms"] = atoms out["orbs"] = orbs out["spins"] = which_spins elif (atoms, orbs) != (None, None): raise ValueError("atoms and orbs should be passed together") out["shape"] = "(spins, [atoms, orbitals], kpoints, bands)" return serializer.Dict2Data(out)
[docs] def get_forces(self): "Reads force on each ion from vasprun.xml" node = ET.fromstringlist(self.read("<varray.*forces", "</varray>")) return np.array([[float(i) for i in v.text.split()] for v in node.iter("v")])
[docs] def get_scsteps(self): "Reads all self-consistent steps from vasprun.xml" node = ET.fromstringlist( [ "<steps>", *self.read("<.*scstep", "<structure>", skip_last=True), "</steps>", ] ) # poor formatting steps = [] for e in node.iter("energy"): _d = {} for i in e.iter("i"): if "_en" in i.attrib["name"]: _d[i.attrib["name"]] = float(i.text) steps.append(_d) if steps: arrays = {k: [] for k in steps[0].keys()} for step in steps: for k, v in step.items(): arrays[k].append(v) return serializer.Dict2Data({k: np.array(v) for k, v in arrays.items()})
[docs] def minify(self): "Removes partial dos and projected eigenvalues data from large vasprun.xml to make it smaller in size to save on disk." path = self.path.parent / "mini-vasprun.xml" lines_1 = self.read("xml", "<partial", skip_last=True) lines_2 = islice(self.read("</partial", "<projected", skip_last=True), 1, None) lines_3 = islice(self.read("</projected", "</xml"), 1, None) text = "".join(chain(lines_1, lines_2, lines_3)) with path.open("w") as f: f.write(text) print("Minified vasprun.xml saved at {}".format(str(path)))
[docs] def xml2dict(xmlnode_or_filepath): """Convert xml node or xml file content to dictionary. All output text is in string format, so further processing is required to convert into data types/split etc. Each node has ``tag,text,attr,nodes`` attributes. Every text element can be accessed via ``xml2dict()['nodes'][index]['nodes'][index]...`` tree which makes it simple. """ if isinstance(xmlnode_or_filepath, str): node = ET.parse(xmlnode_or_filepath).getroot() else: node = xmlnode_or_filepath text = node.text.strip() if node.text else "" nodes = [xml2dict(child) for child in list(node)] return {"tag": node.tag, "text": text, "attr": node.attrib, "nodes": nodes}
def gen2numpy( gen, shape, slices, raw: bool = False, dtype=float, delimiter="\s+", include: str = None, exclude: str = "#", fix_format: bool = True, ): """Convert a generator of text lines to numpy array while excluding comments, given matches and empty lines. Data is sliced and reshaped as per given shape and slices. It is very efficient for large data files to fetch only required data. Parameters ---------- gen : generator Typical example is `with open('file.txt') as f: gen = itertools.islice(f,0,None)` shape : tuple Tuple or list of integers. Given shape of data to be read. Last item is considered to be columns. User should keep track of empty lines and excluded lines. slices : tuple Tuple or list of integers or range or -1. Given slices of data to be read along each dimension. Last item is considered to be columns. raw : bool Returns raw data for quick visualizing and determining shape such as columns, if True. dtype : object Data type of numpy array to be returned. Default is float. delimiter : str Delimiter of data in text file. include : str Match to include in each line to be read. If None, all lines are included. exclude : str Match to exclude in each line to be read. If None, no lines are excluded. fix_format : bool If True, it will fix the format of data in each line. It is useful when data is not properly formatted. like 0.500-0.700 -> 0.500 -0.700 Returns ------- ndarray numpy array of given shape and dtype. """ if not isinstance(shape, (list, tuple)): raise TypeError(f"shape must be a list/tuple of size of dimensions.") if not isinstance(slices, (list, tuple)): raise TypeError(f"slices must be a list/tuple of size of dimensions.") if len(shape) != len(slices): raise ValueError(f"shape and slices must be of same size.") for sh in shape: if (not isinstance(sh, (int, np.integer))) or (sh < 1): raise TypeError( f"Each item in shape must be integer of size of that dimension, at least 1." ) for idx, sli in enumerate(slices): if not isinstance(sli, (list, tuple, range, int, np.integer)): raise TypeError( f"Expect -1 to pick all data or list/tuple/range to slice data in a dimension, got {sli}" ) if isinstance(sli, (int, np.integer)) and (sli != -1): raise TypeError( f"Expect -1 to pick all data or list/tuple/range to slice data in a dimension, got {sli}" ) if isinstance(sli, (list, tuple, range)) and not sli: raise TypeError(f"Expect non-empty items in slices, got {type(sli)}") # Verify order, index and values in take if isinstance(sli, (list, tuple, range)): if not all(isinstance(i, (int, np.integer)) for i in sli): raise TypeError(f"Expect integers in a slice, got {sli}") if not all(i >= 0 for i in sli): raise ValueError(f"Expect positive integers in a slice, got {sli}") if not all(i < shape[idx] for i in sli): raise ValueError( f"Some indices in slice {sli} are out of bound for dimension {idx} of size {shape[idx]}" ) if not all(a < b for a, b in zip(sli[:-1], sli[1:])): # Check order raise ValueError(f"Expect increasing order in a slice, got {sli}") new_shape = tuple( [ len(sli) if isinstance(sli, (list, tuple, range)) else N for sli, N in zip(slices, shape) ] ) count = int(np.prod(new_shape)) # include columns here for overall data count. # Process generator if include: gen = (l for l in gen if re.search(include, l)) if exclude: gen = (l for l in gen if not re.search(exclude, l)) gen = (l for l in gen if l.strip()) # remove empty lines gen = islice( gen, 0, int(np.prod(shape[:-1])) ) # Discard lines after required data def fold_dim(gen, take, N): if take == -1: yield from gen # return does not work here. j = 0 group = () for i, line in enumerate(gen, start=1): if j in take: group = chain(group, (line,)) j = j + 1 if i % N == 0: j = 0 yield group group = () def flatten(gen): for g in gen: if isinstance(g, Iterable) and not isinstance(g, str): yield from flatten(g) else: yield g # Slice data, but we keep columns here for now, should return raw data if asked. # reverse order to pick innermost first but leave columns for take, N in zip(slices[:-1][::-1], shape[:-1][::-1]): gen = fold_dim(gen, take, N) else: # flatter on success gen = flatten(gen) # Negative connected digits to avoid fix after slicing to reduce time. if fix_format: gen = (re.sub(r"(\d)-(\d)", r"\1 -\2", l) for l in gen) if raw: return "".join(gen) # lines already have '\n' at the end. # Split columns and flatten after escaped from raw return. gen = (item for line in gen for item in line.replace(delimiter, " ").split()) gen = flatten(fold_dim(gen, slices[-1], shape[-1])) # columns data = np.fromiter(gen, dtype=dtype, count=count) return data.reshape(new_shape)
[docs] def minify_vasprun(path: str): "Minify vasprun.xml file by removing projected data." return Vasprun(path).minify()
def export_outcar(path=None): "Read potential at ionic sites from OUTCAR file." if path is None: path = "./OUTCAR" P = Path(path) if not P.is_file(): raise FileNotFoundError("{} does not exist!".format(path)) # Raeding it with P.open("r") as f: lines = f.readlines() # Processing for i, l in enumerate(lines): if "NIONS" in l: N = int(l.split()[-1]) nlines = np.ceil(N / 5).astype(int) if "electrostatic" in l: start_index = i + 3 stop_index = start_index + nlines if "fractional" in l: first = i + 1 if "vectors are now" in l: b_first = i + 5 # Data manipulation # Potential data = lines[start_index:stop_index] initial = np.loadtxt(StringIO("".join(data[:-1]))).reshape((-1)) last = np.loadtxt(StringIO(data[-1])) pot_arr = np.hstack([initial, last]).reshape((-1, 2)) pot_arr[:, 0] = pot_arr[:, 0] - 1 # Ion index fixing # Nearest neighbors pos = lines[first : first + N] pos_arr = np.loadtxt(StringIO("\n".join(pos))) pos_arr[pos_arr > 0.98] = pos_arr[pos_arr > 0.98] - 1 # Fixing outer layers # positions and potential pos_pot = np.hstack([pos_arr, pot_arr[:, 1:]]) basis = np.loadtxt(StringIO("".join(lines[b_first : b_first + 3]))) final_dict = { "ion_pot": pot_arr, "positions": pos_arr, "site_pot": pos_pot, "basis": basis[:, :3], "rec_basis": basis[:, 3:], } return serializer.OutcarData(final_dict) def export_locpot(path: str = None, data_set: str = 0): """ Returns Data from LOCPOT and similar structure files like CHG/PARCHG etc. Loads only single set based on what is given in data_set argument. Parameters ---------- locpot : str, path/to/LOCPOT or similar stuructured file like CHG. LOCPOT is auto picked in CWD. data_set : int, 0 for electrostatic data, 1 for magnetization data if ISPIN = 2. If non-colinear calculations, 1,2,3 will pick Mx,My,Mz data sets respectively. Only one data set is loaded, so you should know what you are loading. Returns ------- GridData : ``ipyvasp.serializer.GridData`` object with 3D volumetric data set loaded as attribute 'values'. Exceptions ---------- Would raise index error if magnetization density set is not present in case ``data_set > 0``. .. note:: Read `vaspwiki-CHGCAR <https://www.vasp.at/wiki/index.php/CHGCAR>`_ for more info on what data sets are available corresponding to different calculations. """ path = Path(path or "./LOCPOT") if not path.is_file(): raise FileNotFoundError("File {!r} does not exist!".format(str(path))) if data_set < 0 or data_set > 3: raise ValueError( f"`data_set` should be 0 (for electrostatic),1 (for M or Mx),2 (for My),3 (for Mz)! Got {data_set}" ) # data fixing after reading islice from file. def fix_data(islice_gen, shape): try: new_gen = (float(l) for line in islice_gen for l in line.split()) COUNT = np.prod(shape).astype(int) data = np.fromiter( new_gen, dtype=float, count=COUNT ) # Count is must for performance # data written on LOCPOT is in shape of (NGz,NGy,NGx) N_reshape = [shape[2], shape[1], shape[0]] data = data.reshape(N_reshape).transpose([2, 1, 0]) return data except: if data_set == 0: raise ValueError("File {!r} may not be in proper format!".format(path)) else: raise IndexError( "Magnetization density may not be present in {!r}!".format(path) ) # Reading File with path.open("r") as f: lines = [] f.seek(0) for i in range(8): lines.append(f.readline()) N = sum([int(v) for v in lines[6].split()]) f.seek(0) poscar = [] for _ in range(N + 8): poscar.append(f.readline()) f.readline() # Empty one Nxyz = [int(v) for v in f.readline().split()] # Grid line read nlines = np.ceil(np.prod(Nxyz) / 5).astype(int) # islice is faster generator for reading potential pot_dict = {} if data_set == 0: pot_dict.update({"values": fix_data(islice(f, nlines), Nxyz)}) ignore_set = 0 # Pointer already ahead. else: ignore_set = nlines # Needs to move pointer to magnetization # reading Magnetization if True ignore_n = np.ceil(N / 5).astype(int) + 1 # Some kind of useless data if data_set == 1: print( "Note: data_set = 1 picks Mx for non-colinear case, and M for ISPIN = 2." ) start = ignore_n + ignore_set pot_dict.update( {"values": fix_data(islice(f, start, start + nlines), Nxyz)} ) elif data_set == 2: start = 2 * ignore_n + nlines + ignore_set pot_dict.update( {"values": fix_data(islice(f, start, start + nlines), Nxyz)} ) elif data_set == 3: start = 3 * ignore_n + 2 * nlines + ignore_set pot_dict.update( {"values": fix_data(islice(f, start, start + nlines), Nxyz)} ) # Read Info from .._lattice import export_poscar # Keep inside to avoid import loop poscar_data = export_poscar(content="\n".join(p.strip() for p in poscar)) final_dict = dict( SYSTEM=poscar_data.SYSTEM, path=str(path), **pot_dict, poscar=poscar_data ) return serializer.GridData(final_dict)