https://github.com/deepmodeling/deepks-kit
Tip revision: 940b9a28c6e9423f34a8fd31fc3139d00df5388e authored by Yixiao Chen on 09 December 2021, 05:47:50 UTC
Merge pull request #12 from ouqi0711/master
Merge pull request #12 from ouqi0711/master
Tip revision: 940b9a2
convert_xyz.py
import os
import numpy as np
from glob import glob
BOHR = 0.52917721092
ELEMENTS = ['X', # Ghost
'H' , 'He', 'Li', 'Be', 'B' , 'C' , 'N' , 'O' , 'F' , 'Ne',
'Na', 'Mg', 'Al', 'Si', 'P' , 'S' , 'Cl', 'Ar', 'K' , 'Ca',
'Sc', 'Ti', 'V' , 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y' , 'Zr',
'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn',
'Sb', 'Te', 'I' , 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',
'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb',
'Lu', 'Hf', 'Ta', 'W' , 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg',
'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th',
'Pa', 'U' , 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm',
'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds',
'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og',
]
CHARGES = dict(((x,i) for i,x in enumerate(ELEMENTS)))
def parse_xyz(filename):
with open(filename) as fp:
natom = int(fp.readline())
comments = fp.readline().strip()
atom_str = fp.readlines()
atom_list = [a.split() for a in atom_str if a.strip()]
elements = [a[0] for a in atom_list]
coords = np.array([a[1:] for a in atom_list], dtype=float)
return natom, comments, elements, coords
def parse_unit(rawunit):
if isinstance(rawunit, str):
try:
unit = float(rawunit)
except ValueError:
if rawunit.upper().startswith(('B', 'AU')):
unit = BOHR
else: #unit[:3].upper() == 'ANG':
unit = 1.
else:
unit = rawunit
return unit
def load_array(file):
ext = os.path.splitext(file)[-1]
if "npy" in ext:
return np.load(file)
elif "npz" in ext:
raise NotImplementedError
else:
try:
arr = np.loadtxt(file)
except ValueError:
arr = np.loadtxt(file, dtype=str)
return arr
def load_glob(pattern):
[fn] = glob(pattern)
return load_array(fn)
def load_system(xyz_file):
base, ext = os.path.splitext(xyz_file)
assert ext == '.xyz'
natom, _, ele, coord = parse_xyz(xyz_file)
try:
energy = load_glob(f"{base}.energy*").reshape(1)
except:
energy = None
try:
force = load_glob(f"{base}.force*").reshape(natom, 3)
except:
force = None
try:
dm = load_glob(f"{base}.dm*")
nao = np.sqrt(dm.size).astype(int)
dm = dm.reshape(nao, nao)
except:
dm = None
return ele, coord, energy, force, dm
def dump_systems(xyz_files, dump_dir, unit="Bohr", ext_type=False):
print(f"saving to {dump_dir} ... ", end="", flush=True)
os.makedirs(dump_dir, exist_ok=True)
if not xyz_files:
print("empty list! did nothing")
return
unit = parse_unit(unit)
a_ele, a_coord, a_energy, a_force, a_dm = map(np.array,
zip(*[load_system(fl) for fl in xyz_files]))
a_coord /= unit
if ext_type:
ele = a_ele[0]
assert all(e == ele for e in a_ele), "element type for each xyz file has to be the same"
np.savetxt(os.path.join(dump_dir, "type.raw"), ele, fmt="%s")
np.save(os.path.join(dump_dir, "coord.npy"), a_coord)
else:
a_chg = [[[CHARGES[e]] for e in ele] for ele in a_ele]
a_atom = np.concatenate([a_chg, a_coord], axis=-1)
np.save(os.path.join(dump_dir, "atom.npy"), a_atom)
if not all(ene is None for ene in a_energy):
assert not any(ele is None for ele in a_energy)
np.save(os.path.join(dump_dir, "energy.npy"), a_energy)
if not all(ff is None for ff in a_force):
assert not any(ff is None for ff in a_force)
a_force *= unit
np.save(os.path.join(dump_dir, "force.npy"), a_force)
if not all(dm is None for dm in a_dm):
assert not any(dm is None for dm in a_dm)
np.save(os.path.join(dump_dir, "dm.npy"), a_dm)
print(f"finished", flush=True)
return
def main(xyz_files, dump_dir=".", group_size=-1, group_prefix="sys", unit="Bohr", ext_type=False):
if isinstance(xyz_files, str):
xyz_files = [xyz_files]
if group_size <= 0:
dump_systems(xyz_files, dump_dir, unit=unit, ext_type=ext_type)
return
ns = len(xyz_files)
ngroup = np.ceil(ns / group_size).astype(int)
nd = max(len(str(ngroup)), 2)
for i in range(ngroup):
dump_systems(xyz_files[i*group_size:(i+1)*group_size],
os.path.join(dump_dir, f"{group_prefix}.{i:0>{nd}d}"),
unit=unit, ext_type=ext_type)
return
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="convert .xyz files and corresponding properties "
"into systems with .npy files grouped in folders.",
argument_default=argparse.SUPPRESS)
parser.add_argument("xyz_files", metavar='FILE', nargs="+",
help="input xyz files")
parser.add_argument("-d", "--dump-dir",
help="directory of dumped system, default is current dir")
parser.add_argument("-U", "--unit",
help="length unit used to save npy files (assume xyz in Angstrom)")
parser.add_argument("-G", "--group-size", type=int,
help="if positive, split data into sub systems with given size, default: -1")
parser.add_argument("-P", "--group-prefix",
help=r"save sub systems with given prefix as `$dump_dir/$prefix.ii`, default: sys")
parser.add_argument("-T", "--ext-type", action="store_true",
help="if set, save the element type into separete `type.raw` file")
args = parser.parse_args()
main(**vars(args))