__all__ = ["Vasprun", "Vaspout", "minify_vasprun", "xml2dict","read"]
import re
from io import StringIO
from itertools import islice, chain, product
from collections.abc import Iterable
import xml.etree.ElementTree as ET
from pathlib import Path
import numpy as np
from . import serializer
class DataSource:
"""Base class for all data sources. It provides a common interface to access data from different sources.
Subclass it to get data from a source and implement the abstract methods."""
def __init__(self, path, skipk=None):
self._path = Path(path).absolute()
self._skipk = (
skipk if isinstance(skipk, (int, np.integer)) else self._read_skipk()
)
if not self._path.is_file():
raise FileNotFoundError("File: '{}'' does not exist!".format(path))
self._summary = self.get_summary() # summary data is read only once
def __repr__(self):
return f"{self.__class__.__name__}({str(self.path)})"
@property
def path(self):
return self._path
@property
def summary(self):
return self._summary
@property
def bands(self):
if not hasattr(self, "_bands"):
from ..bsdos import Bands
self._bands = Bands(self)
return self._bands # keep same instance to avoid data loss
@property
def dos(self):
if not hasattr(self, "_dos"):
from ..bsdos import DOS
self._dos = DOS(self)
return self._dos # keep same instance to avoid data loss
@property
def poscar(self):
if not hasattr(self, "_poscar"):
from ..lattice import POSCAR
self._poscar = POSCAR(data=self.get_structure())
return self._poscar # keep same instance to avoid data loss
def get_evals_dataframe(self, spins=[0], bands=[0], atoms=None, orbs=None):
"""Initialize and return an EvalsDataFrame instance, with the given parameters."""
from ..evals_dataframe import EvalsDataFrame
return EvalsDataFrame(self, spins=spins, bands=bands, atoms=atoms, orbs=orbs)
# Following methods should be implemented in a subclass
def get_summary(self):
raise NotImplementedError(
"`get_summary` should be implemented in a subclass. See Vasprun.get_summary as example."
)
def get_structure(self):
raise NotImplementedError(
"`get_structure` should be implemented in a subclass. See Vasprun.get_structure as example."
)
def _read_skipk(self):
raise NotImplementedError(
"`_read_skipk` should be implemented in a subclass. See Vasprun._read_skipk as example."
)
def get_skipk(self):
"Returns the number of first few k-points to skip in band structure plot in case of HSE calculation."
return self._skipk
def set_skipk(self, skipk):
"Set the number of first few k-points to skip in band structure plot in case of HSE calculation."
if isinstance(skipk, (int, np.integer)):
self._skipk = skipk
else:
raise TypeError("skipk should be an integer to skip first few random kpoints.")
def get_kpoints(self):
raise NotImplementedError(
"`get_kpoints` should be implemented in a subclass. See Vasprun.get_kpoints as example."
)
def get_evals(self, *args, **kwargs):
raise NotImplementedError(
"`get_evals` should be implemented in a subclass. See Vasprun.get_evals as example."
)
def get_dos(self, *args, **kwargs):
raise NotImplementedError(
"`get_dos` should be implemented in a subclass. See Vasprun.get_dos as example."
)
def get_forces(self):
raise NotImplementedError(
"`get_forces` should be implemented in a subclass. See Vasprun.get_forces as example."
)
def get_scsteps(self):
raise NotImplementedError(
"`get_scsteps` should be implemented in a subclass. See Vasprun.get_scsteps as example."
)
[docs]
class Vaspout(DataSource):
"Read data from vaspout.h5 file on demand."
# These methods are accessible from parent class, but need here for including in sphinx documentation
get_evals_dataframe = DataSource.get_evals_dataframe
poscar = DataSource.poscar
dos = DataSource.dos
bands = DataSource.bands
def __init__(self, path, skipk=None):
# super().__init__(path, skipk)
raise NotImplementedError("Vaspout is not implemented yet.")
[docs]
def read(file, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None):
"""Reads a part of the file between start_match and stop_match and returns a generator. It is lazy and fast.
`start_match` and `stop_match`(default is end of same line) are regular expressions. `nth_match` is the number of occurence of start_match to start reading.
`skip_last` is used to determine whether to keep or skip last line.
`apply` should be None or `func` to transform each captured line.
"""
if "|" in start_match:
raise ValueError(
"start_match should be a single match, so '|' character is not allowed."
)
with Path(file).open("r") as f:
lines = islice(f, None) # this is fast
matched = False
n_start = 1
for line in lines:
if re.search(start_match, line, flags=re.DOTALL):
if nth_match != n_start:
n_start += 1
else:
matched = True
if matched and re.search(
stop_match, line, flags=re.DOTALL
): # avoid stop before start
matched = False
if not skip_last:
yield apply(line) if callable(apply) else line
break # stop reading
if matched: # should be after break to handle last line above
yield apply(line) if callable(apply) else line
[docs]
class Vasprun(DataSource):
"Reads vasprun.xml file lazily. It reads only the required data from the file when a plot or data access is requested."
# These methods are accessible from parent class, but need here for including in sphinx documentation
get_evals_dataframe = DataSource.get_evals_dataframe
poscar = DataSource.poscar
dos = DataSource.dos
bands = DataSource.bands
def __init__(self, path="./vasprun.xml", skipk=None):
super().__init__(path, skipk)
[docs]
def read(self, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None):
"""Reads a part of the file between start_match and stop_match and returns a generator. It is lazy and fast.
`start_match` and `stop_match`(default is end of same line) are regular expressions. `nth_match` is the number of occurence of start_match to start reading.
`skip_last` is used to determine whether to keep or skip last line.
`apply` should be None or `func` to transform each captured line.
"""
kws = {k:v for k,v in locals().items() if k !='self'}
return read(self.path,**kws)
def _read_skipk(self):
"Returns the number of k-points to skip in band structure plot in case of HSE calculation."
weights = np.fromiter(
(
v.text
for v in ET.fromstringlist(
self.read("<varray.*weights", "</varray")
).iter("v")
),
dtype=float,
)
return len([w for w in weights if w != weights[-1]])
[docs]
def get_summary(self):
"Returns summary data of calculation."
incar = {
v.attrib["name"]: v.text
for v in ET.fromstringlist(self.read("<incar", "</incar")).iter("i")
}
info_dict = {"SYSTEM": incar.get("SYSTEM", None)} # some people don't write system in incar
info_dict["ISPIN"] = int(
ET.fromstring(next(self.read("<i.*ISPIN", "</i>"))).text
)
info_dict["NKPTS"] = len(
[
v
for v in ET.fromstringlist(
self.read("<varray.*kpoint", "</varray")
).iter("v")
]
)
_nbands = [*self.read("<i.*NBANDS[\"\']", "</i>"),*self.read("<i.*NBANDS[\"\']", "</i>",nth_match=2)]
info_dict["NBANDS"] = int( # Bad initializations of NBANDS, read last one
ET.fromstring(_nbands[-1]).text
)
info_dict["NELECTS"] = int(
float(ET.fromstring(next(self.read("<i.*NELECT", "</i>"))).text)
) # Beacuse they are poorly writtern as float, come on VASP!
info_dict["LSORBIT"] = (
True
if "t" in ET.fromstring(next(self.read("<i.*LSORBIT", "</i>"))).text.lower()
else False
)
info_dict["EFERMI"] = float(
ET.fromstring(next(chain(
self.read("<i.*efermi", "</i>"), # not always there but this is correct one
self.read("<i.*EFERMI", "</i>") # zero, always there in start
))).text
)
info_dict["NEDOS"] = int(
ET.fromstring(next(self.read("<i.*NEDOS", "</i>"))).text
)
# Getting ions is kind of terrbile, but it works
atominfo = ET.fromstringlist(self.read("<atominfo", "</atominfo"))
info_dict["NIONS"] = [int(atom.text) for atom in atominfo.iter("atoms")][0]
types = [int(_type.text) for _type in atominfo.iter("types")][0]
elems = [rc[0].text.strip() for rc in atominfo.iter("rc")]
_inds = [int(a) for a in elems[-types:]]
inds = np.cumsum([0, *_inds]).astype(int)
names = [] # for keeping order, do not use unique or set
for elem in elems[:-types]:
if elem not in names:
names.append(elem)
info_dict["types"] = {
name: range(inds[i], inds[i + 1]) for i, name in enumerate(names)
}
if info_dict["SYSTEM"] is None:
info_dict["SYSTEM"] = "".join(names)
# Getting projection fields
fields = [
f.replace("field", "").strip(" ></\n")
for f in self.read("<partial", "<set")
if "field" in f
] # poor formatting again
if fields:
*others, sets_lines = [
r for r in self.read("<partial>", "</partial>") if "spin " in r
] # Least worse way to read sets
info_dict["NSETS"] = int(re.findall("(\d+)", sets_lines)[0])
info_dict["orbs"] = [f for f in fields if "energy" not in f]
info_dict["incar"] = incar # Just at end
return serializer.Dict2Data(info_dict)
[docs]
def get_structure(self):
"Returns a structure object including types, basis, rec_basis and positions."
arrays = np.array(
[
[float(i) for i in v.split()[1:4]]
for v in self.read("<structure.*finalpos", "select|</structure")
if "<v>" in v
]
) # Stop at selctive dynamics if exists
info = self.summary
return serializer.PoscarData(
{
"SYSTEM": info.SYSTEM,
"basis": arrays[:3],
"rec_basis": arrays[3:6],
"positions": arrays[6:],
"types": info.types,
"metadata": {
"comment": "Exported from vasprun.xml",
"cartesian": False,
"scale": 1,
},
}
)
[docs]
def get_kpoints(self):
"Returns k-points data including kpoints, coords, weights and rec_basis in which coords are calculated."
kpoints = np.array(
[
[float(i) for i in v.text.split()]
for v in ET.fromstringlist(
self.read("<varray.*kpoint", "</varray")
).iter("v")
]
)[self._skipk :]
weights = np.fromiter(
(
v.text
for v in ET.fromstringlist(
self.read("<varray.*weights", "</varray")
).iter("v")
),
dtype=float,
)[self._skipk :]
# Get rec_basis to make cartesian coordinates
rec_basis = np.array(
[
[float(i) for i in v.split()[1:4]]
for v in self.read("<structure.*finalpos", "select|</structure")
if "<v>" in v
]
)[
3:6
] # could be selective dynamics there
coords = kpoints.dot(rec_basis) # cartesian coordinates
kpath = np.cumsum([0, *np.linalg.norm(coords[1:] - coords[:-1], axis=1)])
kpath = (
kpath / kpath[-1]
) # normalized kpath to see the band structure of all materials in same scale
return serializer.Dict2Data(
{
"kpoints": kpoints,
"coords": coords,
"kpath": kpath,
"weights": weights,
"rec_basis": rec_basis,
}
)
[docs]
def get_dos(self, elim=None, ezero=None, atoms=None, orbs=None, spins=None):
"""
Returns energy, total and integrated dos of the calculation. If atoms and orbs are specified, then partial dos are included too.
Parameters
----------
elim : tuple, energy range to be returned. Default is None, which returns full range. `elim` is applied around `ezero` if specified, otherwise around VBM.
ezero : float, energy reference to be used. Default is None, which uses VBM as reference. ezero is ignored if elim is not specified. In output data, ezero would be VBM or ezero itself if specified.
atoms : list/tuple/range, indices of atoms to be returned. Default is None, which does not return ionic projections.
orbs : list/tuple/range, indices of orbitals to be returned. Default is None, which does not return orbitals.
spins : list/tuple/range of spin sets indices to pick. If None, spin set will be picked 0 or [0,1] if spin-polarized. Default is None.
Returns
-------
Dict2Data : object which includes `energy`,'tdos` and `idos` as attributes, and includes `pdos` if atoms and orbs specified. Shape of arrays a is (spins, [atoms, orbs], energy).
"""
ds = (r.split()[1:4] for r in self.read("<total>", "</total>") if "<r>" in r)
ds = (d for dd in ds for d in dd) # flatten
ds = np.fromiter(ds, dtype=float)
if ds.size:
ds = ds.reshape(-1, self.summary.NEDOS, 3) # E, total, integrated
else:
raise ValueError("No dos data found in the file!")
en, total, integrad = ds.transpose((2, 0, 1))
vbm = float(
en[(integrad < self.summary.NELECTS) & (integrad > 0)].max()
) # second condition is important
cbm = float(en[(integrad > self.summary.NELECTS) & (integrad > 0)].min())
zero = vbm
grid_range = range(en.shape[1]) # default range full
if elim is not None:
if (not isinstance(elim, (list, tuple))) and (len(elim) != 2):
raise TypeError("elim should be a tuple of length 2")
if ezero is not None:
if not isinstance(ezero, (int, np.integer, float)):
raise TypeError("ezero should be a float or integer")
zero = ezero
_max = (
np.max(np.where(en - zero <= np.max(elim))[1]) + 1
) # +1 to make range inclusive
_min = np.min(np.where(en - zero >= np.min(elim))[1])
en = en[:, _min:_max]
total = total[:, _min:_max]
integrad = integrad[:, _min:_max]
grid_range = range(_min, _max)
out = {
"energy": en,
"tdos": total,
"idos": integrad,
"evc": (vbm, cbm),
"ezero": zero,
"elim": elim,
"spins": list(range(en.shape[0])),
}
if atoms and orbs:
if not list(
self.read("<partial", "")
): # no partial dos, hust check that line
raise ValueError("No partial dos found in the file!")
which_spins = tuple(range(en.shape[0]))
if spins is not None:
if not isinstance(spins, (list, tuple, range)):
raise TypeError(
"spins should be a list, tuple or range got {}".format(
type(spins)
)
)
for s in spins:
if (not isinstance(s, (int, np.integer))) and (
s not in range(self.summary.NSETS)
):
raise TypeError(
f"spins should be a tuple/list/range of positive integers in {list(range(self.summary.NSETS))}"
)
which_spins = spins
gen = (
r.strip(" \n/<>r")
for r in self.read(f"<partial>", f"</partial>")
if "<r>" in r
) # stripping is must here to ensure that we get only numbers
old_shape = (
self.summary.NIONS,
self.summary.NSETS,
self.summary.NEDOS,
len(self.summary.orbs) + 1,
)
data = gen2numpy(
gen,
old_shape,
slices=[atoms, which_spins, grid_range, [o + 1 for o in orbs]],
dtype=float,
) # No need to pick up the first column as it is the energy
out["pdos"] = data.transpose((1, 0, 3, 2)) # (spins, atoms, orbitsl,energy)
out["atoms"] = atoms
out["orbs"] = orbs
out["spins"] = which_spins
elif (atoms, orbs) != (None, None):
raise ValueError("atoms and orbs should be specified together")
out["shape"] = "(spin, [atoms, orbitals], energy)"
return serializer.Dict2Data(out)
def _get_spin_set(self, spin, bands, atoms, orbs, sys_info):
"sys_info is the summary data of calculation, used to get the shape of the array."
if not hasattr(sys_info, "orbs"):
raise ValueError(
f"The file {self.path!r} does not have projected orbitals!"
)
shape = (sys_info.NKPTS, sys_info.NBANDS, sys_info.NIONS, len(sys_info.orbs))
slices = [range(self._skipk, sys_info.NKPTS), bands, atoms, orbs]
if not list(self.read(f"spin{spin+1}", "")):
raise ValueError(
f"Given {spin} index for spin is larger than available in the file!"
)
gen = (
r.strip(" \n/<>r")
for r in self.read(f"spin{spin+1}", f"spin{spin+2}|</projected")
if "<r>" in r
) # stripping is must here to ensure that we get only numbers
return gen2numpy(gen, shape, slices).transpose(
(2, 3, 0, 1)
) # (atoms, orbs, kpts, bands)
[docs]
def get_evals(
self, elim=None, ezero=None, atoms=None, orbs=None, spins=None, bands=None
):
"""
Returns eigenvalues and occupations of the calculation. If atoms and orbs are specified, then orbitals are included too.
Parameters
----------
elim : tuple, energy range to be returned. Default is None, which returns all eigenvalues. `elim` is applied around `ezero` if specified, otherwise around VBM.
ezero : float, energy reference to be used. Default is None, which uses VBM as reference. ezero is ignored if elim is not specified. In output data, ezero would be VBM or ezero itself if specified.
atoms : list/tuple/range, indices of atoms to be returned. Default is None, which does not return ionic projections.
orbs : list/tuple/range, indices of orbitals to be returned. Default is None, which does not return orbitals.
spins : list/tuple/range of spin sets indices to pick. If None, spin set will be picked 0 or [0,1] if spin-polarized. Default is None.
bands : list/tuple/range, indices of bands to pick. overrides elim. Useful to load same bands range for spin up and down channels. Plotting classes automatically handle this for spin up and down channels.
Returns
-------
Dict2Data object which includes `evals` and `occs` as attributes, and `pros` if atoms and orbs specified. Shape arrays is (spin, [atoms, orbitals], kpts, bands)
"""
info = self.summary
bands_range = range(info.NBANDS)
ev = (
r.split()[1:3]
for r in self.read(f"<eigenvalues>", f"</eigenvalues>")
if "<r>" in r
)
ev = (e for es in ev for e in es) # flatten
ev = np.fromiter(ev, float)
if ev.size: # if not empty
ev = ev.reshape((-1, info.NKPTS, info.NBANDS, 2))[
:, self._skipk :, :, :
] # shape is (NSPIN, NKPTS, NBANDS, 2)
else:
raise ValueError("No eigenvalues found in this file!")
evals, occs = ev[:, :, :, 0], ev[:, :, :, 1]
sv, kv, ev = np.where(
evals == evals[occs > 0.5].max()
) # more than half filled indices
sc, kc, ec = np.where(
evals == evals[occs < 0.5].min()
) # less than half filled indices
evc, kvc, zero = (0.0, 0.0), (0, 0), 0 # default values are empty
if sv.size and sc.size: # if both are not empty
evc = (float(evals[sv[0], kv[0], ev[0]]), float(evals[sc[0], kc[0], ec[0]]))
zero = evc[0] # default value of ezero
# svc = (sv[0],sc[0]) # spin indices, implement later if needed
kvc = tuple(sorted(product(kv, kc), key=lambda K: np.ptp(K)))[
0
] # bring closer points first by sorting and take closest ones
if ezero is not None:
if not isinstance(ezero, (int, np.integer, float)):
raise TypeError("ezero should be a float or integer")
zero = ezero
if bands:
if not isinstance(bands, (list, tuple, range)):
raise TypeError(
"bands should be a list, tuple or range got {}".format(type(bands))
)
for b in bands:
if (not isinstance(b, (int, np.integer))) and (b < 0):
raise TypeError(
"bands should be a tuple/list/range of positive integers"
)
_bands = list(bands)
evals = evals[:, :, _bands]
occs = occs[:, :, _bands]
bands_range = _bands
elif elim:
if (not isinstance(elim, (list, tuple))) and (len(elim) != 2):
raise TypeError("elim should be a tuple of length 2")
idx_max = np.max(np.where(evals - zero <= np.max(elim))[2]) + 1
idx_min = np.min(np.where(evals - zero >= np.min(elim))[2])
evals = evals[:, :, idx_min:idx_max]
occs = occs[:, :, idx_min:idx_max]
bands_range = range(idx_min, idx_max)
which_spins = tuple(
range(evals.shape[0])
) # which spin set to pick, defult is what is available
out = {
"evals": evals,
"occs": occs,
"ezero": zero,
"elim": elim,
"evc": evc,
"kvc": kvc,
"bands": bands_range,
"spins": which_spins,
}
if atoms and orbs:
if spins is not None:
if not isinstance(spins, (list, tuple, range)):
raise TypeError(
f"spins should be a list, tuple or range got {type(spins)}"
)
for s in spins:
if (not isinstance(s, (int, np.integer))) and (
s not in range(self.summary.NSETS)
):
raise TypeError(
f"spins should be a tuple/list/range of positive integers in {list(range(self.summary.NSETS))}"
)
which_spins = spins
pros = []
for ws in which_spins:
pros.append(self._get_spin_set(ws, bands_range, atoms, orbs, info))
out["pros"] = np.array(pros) # (spins, atoms, orbitals, kpoints, bands)
out["atoms"] = atoms
out["orbs"] = orbs
out["spins"] = which_spins
elif (atoms, orbs) != (None, None):
raise ValueError("atoms and orbs should be passed together")
out["shape"] = "(spins, [atoms, orbitals], kpoints, bands)"
return serializer.Dict2Data(out)
[docs]
def get_forces(self):
"Reads force on each ion from vasprun.xml"
node = ET.fromstringlist(self.read("<varray.*forces", "</varray>"))
return np.array([[float(i) for i in v.text.split()] for v in node.iter("v")])
[docs]
def get_scsteps(self):
"Reads all self-consistent steps from vasprun.xml"
node = ET.fromstringlist(
[
"<steps>",
*self.read("<.*scstep", "<structure>", skip_last=True),
"</steps>",
]
) # poor formatting
steps = []
for e in node.iter("energy"):
_d = {}
for i in e.iter("i"):
if "_en" in i.attrib["name"]:
_d[i.attrib["name"]] = float(i.text)
steps.append(_d)
if steps:
arrays = {k: [] for k in steps[0].keys()}
for step in steps:
for k, v in step.items():
arrays[k].append(v)
return serializer.Dict2Data({k: np.array(v) for k, v in arrays.items()})
[docs]
def minify(self):
"Removes partial dos and projected eigenvalues data from large vasprun.xml to make it smaller in size to save on disk."
path = self.path.parent / "mini-vasprun.xml"
lines_1 = self.read("xml", "<partial", skip_last=True)
lines_2 = islice(self.read("</partial", "<projected", skip_last=True), 1, None)
lines_3 = islice(self.read("</projected", "</xml"), 1, None)
text = "".join(chain(lines_1, lines_2, lines_3))
with path.open("w") as f:
f.write(text)
print("Minified vasprun.xml saved at {}".format(str(path)))
[docs]
def xml2dict(xmlnode_or_filepath):
"""Convert xml node or xml file content to dictionary.
All output text is in string format, so further processing is required to convert into data types/split etc.
Each node has ``tag,text,attr,nodes`` attributes. Every text element can be accessed via
``xml2dict()['nodes'][index]['nodes'][index]...`` tree which makes it simple.
"""
if isinstance(xmlnode_or_filepath, str):
node = ET.parse(xmlnode_or_filepath).getroot()
else:
node = xmlnode_or_filepath
text = node.text.strip() if node.text else ""
nodes = [xml2dict(child) for child in list(node)]
return {"tag": node.tag, "text": text, "attr": node.attrib, "nodes": nodes}
def gen2numpy(
gen,
shape,
slices,
raw: bool = False,
dtype=float,
delimiter="\s+",
include: str = None,
exclude: str = "#",
fix_format: bool = True,
):
"""Convert a generator of text lines to numpy array while excluding comments, given matches and empty lines.
Data is sliced and reshaped as per given shape and slices. It is very efficient for large data files to fetch only required data.
Parameters
----------
gen : generator
Typical example is `with open('file.txt') as f: gen = itertools.islice(f,0,None)`
shape : tuple
Tuple or list of integers. Given shape of data to be read. Last item is considered to be columns.
User should keep track of empty lines and excluded lines.
slices : tuple
Tuple or list of integers or range or -1. Given slices of data to be read along each dimension.
Last item is considered to be columns.
raw : bool
Returns raw data for quick visualizing and determining shape such as columns, if True.
dtype : object
Data type of numpy array to be returned. Default is float.
delimiter : str
Delimiter of data in text file.
include : str
Match to include in each line to be read. If None, all lines are included.
exclude : str
Match to exclude in each line to be read. If None, no lines are excluded.
fix_format : bool
If True, it will fix the format of data in each line. It is useful when data is not properly formatted. like 0.500-0.700 -> 0.500 -0.700
Returns
-------
ndarray
numpy array of given shape and dtype.
"""
if not isinstance(shape, (list, tuple)):
raise TypeError(f"shape must be a list/tuple of size of dimensions.")
if not isinstance(slices, (list, tuple)):
raise TypeError(f"slices must be a list/tuple of size of dimensions.")
if len(shape) != len(slices):
raise ValueError(f"shape and slices must be of same size.")
for sh in shape:
if (not isinstance(sh, (int, np.integer))) or (sh < 1):
raise TypeError(
f"Each item in shape must be integer of size of that dimension, at least 1."
)
for idx, sli in enumerate(slices):
if not isinstance(sli, (list, tuple, range, int, np.integer)):
raise TypeError(
f"Expect -1 to pick all data or list/tuple/range to slice data in a dimension, got {sli}"
)
if isinstance(sli, (int, np.integer)) and (sli != -1):
raise TypeError(
f"Expect -1 to pick all data or list/tuple/range to slice data in a dimension, got {sli}"
)
if isinstance(sli, (list, tuple, range)) and not sli:
raise TypeError(f"Expect non-empty items in slices, got {type(sli)}")
# Verify order, index and values in take
if isinstance(sli, (list, tuple, range)):
if not all(isinstance(i, (int, np.integer)) for i in sli):
raise TypeError(f"Expect integers in a slice, got {sli}")
if not all(i >= 0 for i in sli):
raise ValueError(f"Expect positive integers in a slice, got {sli}")
if not all(i < shape[idx] for i in sli):
raise ValueError(
f"Some indices in slice {sli} are out of bound for dimension {idx} of size {shape[idx]}"
)
if not all(a < b for a, b in zip(sli[:-1], sli[1:])): # Check order
raise ValueError(f"Expect increasing order in a slice, got {sli}")
new_shape = tuple(
[
len(sli) if isinstance(sli, (list, tuple, range)) else N
for sli, N in zip(slices, shape)
]
)
count = int(np.prod(new_shape)) # include columns here for overall data count.
# Process generator
if include:
gen = (l for l in gen if re.search(include, l))
if exclude:
gen = (l for l in gen if not re.search(exclude, l))
gen = (l for l in gen if l.strip()) # remove empty lines
gen = islice(
gen, 0, int(np.prod(shape[:-1]))
) # Discard lines after required data
def fold_dim(gen, take, N):
if take == -1:
yield from gen # return does not work here.
j = 0
group = ()
for i, line in enumerate(gen, start=1):
if j in take:
group = chain(group, (line,))
j = j + 1
if i % N == 0:
j = 0
yield group
group = ()
def flatten(gen):
for g in gen:
if isinstance(g, Iterable) and not isinstance(g, str):
yield from flatten(g)
else:
yield g
# Slice data, but we keep columns here for now, should return raw data if asked.
# reverse order to pick innermost first but leave columns
for take, N in zip(slices[:-1][::-1], shape[:-1][::-1]):
gen = fold_dim(gen, take, N)
else: # flatter on success
gen = flatten(gen)
# Negative connected digits to avoid fix after slicing to reduce time.
if fix_format:
gen = (re.sub(r"(\d)-(\d)", r"\1 -\2", l) for l in gen)
if raw:
return "".join(gen) # lines already have '\n' at the end.
# Split columns and flatten after escaped from raw return.
gen = (item for line in gen for item in line.replace(delimiter, " ").split())
gen = flatten(fold_dim(gen, slices[-1], shape[-1])) # columns
data = np.fromiter(gen, dtype=dtype, count=count)
return data.reshape(new_shape)
[docs]
def minify_vasprun(path: str):
"Minify vasprun.xml file by removing projected data."
return Vasprun(path).minify()
def export_outcar(path=None):
"Read potential at ionic sites from OUTCAR file."
if path is None:
path = "./OUTCAR"
P = Path(path)
if not P.is_file():
raise FileNotFoundError("{} does not exist!".format(path))
# Raeding it
with P.open("r") as f:
lines = f.readlines()
# Processing
for i, l in enumerate(lines):
if "NIONS" in l:
N = int(l.split()[-1])
nlines = np.ceil(N / 5).astype(int)
if "electrostatic" in l:
start_index = i + 3
stop_index = start_index + nlines
if "fractional" in l:
first = i + 1
if "vectors are now" in l:
b_first = i + 5
# Data manipulation
# Potential
data = lines[start_index:stop_index]
initial = np.loadtxt(StringIO("".join(data[:-1]))).reshape((-1))
last = np.loadtxt(StringIO(data[-1]))
pot_arr = np.hstack([initial, last]).reshape((-1, 2))
pot_arr[:, 0] = pot_arr[:, 0] - 1 # Ion index fixing
# Nearest neighbors
pos = lines[first : first + N]
pos_arr = np.loadtxt(StringIO("\n".join(pos)))
pos_arr[pos_arr > 0.98] = pos_arr[pos_arr > 0.98] - 1 # Fixing outer layers
# positions and potential
pos_pot = np.hstack([pos_arr, pot_arr[:, 1:]])
basis = np.loadtxt(StringIO("".join(lines[b_first : b_first + 3])))
final_dict = {
"ion_pot": pot_arr,
"positions": pos_arr,
"site_pot": pos_pot,
"basis": basis[:, :3],
"rec_basis": basis[:, 3:],
}
return serializer.OutcarData(final_dict)
def export_locpot(path: str = None, data_set: str = 0):
"""
Returns Data from LOCPOT and similar structure files like CHG/PARCHG etc. Loads only single set based on what is given in data_set argument.
Parameters
----------
locpot : str, path/to/LOCPOT or similar stuructured file like CHG. LOCPOT is auto picked in CWD.
data_set : int, 0 for electrostatic data, 1 for magnetization data if ISPIN = 2. If non-colinear calculations, 1,2,3 will pick Mx,My,Mz data sets respectively. Only one data set is loaded, so you should know what you are loading.
Returns
-------
GridData : ``ipyvasp.serializer.GridData`` object with 3D volumetric data set loaded as attribute 'values'.
Exceptions
----------
Would raise index error if magnetization density set is not present in case ``data_set > 0``.
.. note::
Read `vaspwiki-CHGCAR <https://www.vasp.at/wiki/index.php/CHGCAR>`_ for more info on what data sets are available corresponding to different calculations.
"""
path = Path(path or "./LOCPOT")
if not path.is_file():
raise FileNotFoundError("File {!r} does not exist!".format(str(path)))
if data_set < 0 or data_set > 3:
raise ValueError(
f"`data_set` should be 0 (for electrostatic),1 (for M or Mx),2 (for My),3 (for Mz)! Got {data_set}"
)
# data fixing after reading islice from file.
def fix_data(islice_gen, shape):
try:
new_gen = (float(l) for line in islice_gen for l in line.split())
COUNT = np.prod(shape).astype(int)
data = np.fromiter(
new_gen, dtype=float, count=COUNT
) # Count is must for performance
# data written on LOCPOT is in shape of (NGz,NGy,NGx)
N_reshape = [shape[2], shape[1], shape[0]]
data = data.reshape(N_reshape).transpose([2, 1, 0])
return data
except:
if data_set == 0:
raise ValueError("File {!r} may not be in proper format!".format(path))
else:
raise IndexError(
"Magnetization density may not be present in {!r}!".format(path)
)
# Reading File
with path.open("r") as f:
lines = []
f.seek(0)
for i in range(8):
lines.append(f.readline())
N = sum([int(v) for v in lines[6].split()])
f.seek(0)
poscar = []
for _ in range(N + 8):
poscar.append(f.readline())
f.readline() # Empty one
Nxyz = [int(v) for v in f.readline().split()] # Grid line read
nlines = np.ceil(np.prod(Nxyz) / 5).astype(int)
# islice is faster generator for reading potential
pot_dict = {}
if data_set == 0:
pot_dict.update({"values": fix_data(islice(f, nlines), Nxyz)})
ignore_set = 0 # Pointer already ahead.
else:
ignore_set = nlines # Needs to move pointer to magnetization
# reading Magnetization if True
ignore_n = np.ceil(N / 5).astype(int) + 1 # Some kind of useless data
if data_set == 1:
print(
"Note: data_set = 1 picks Mx for non-colinear case, and M for ISPIN = 2."
)
start = ignore_n + ignore_set
pot_dict.update(
{"values": fix_data(islice(f, start, start + nlines), Nxyz)}
)
elif data_set == 2:
start = 2 * ignore_n + nlines + ignore_set
pot_dict.update(
{"values": fix_data(islice(f, start, start + nlines), Nxyz)}
)
elif data_set == 3:
start = 3 * ignore_n + 2 * nlines + ignore_set
pot_dict.update(
{"values": fix_data(islice(f, start, start + nlines), Nxyz)}
)
# Read Info
from .._lattice import export_poscar # Keep inside to avoid import loop
poscar_data = export_poscar(content="\n".join(p.strip() for p in poscar))
final_dict = dict(
SYSTEM=poscar_data.SYSTEM, path=str(path), **pot_dict, poscar=poscar_data
)
return serializer.GridData(final_dict)