Cell Extraction

Dependencies

/// script requires-python = ">=3.10" dependencies = [ "a2c-ase @ git+https://github.com/abhijeetgangan/a2c_ase.git", "numpy", "pymatgen", "tqdm", "mace-torch", "matplotlib", ] ///

Sodium Crystallization from Amorphous Phase - Cell Extraction Example

This example demonstrates how to extract crystallizable subcells from an amorphous sodium structure, relax them using the MACE machine learning potential, and analyze the resulting space group distribution. This is a simplified workflow that focuses on the cell extraction and analysis steps without running the full melt-quench MD simulation.

Setup and Imports¶

We'll use MACE for accurate energy and force calculations, pymatgen for space group analysis, and matplotlib for visualization.

In [1]:

Copied!





import os
from collections import Counter
from pathlib import Path

import matplotlib
import matplotlib.pyplot as plt
from ase.io import read
from mace.calculators.foundations_models import mace_mp  # type: ignore
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from tqdm import tqdm

from a2c_ase.runner import relax_unit_cell
from a2c_ase.utils import extract_crystallizable_subcells

# Set matplotlib backend for CI
if os.getenv("CI") is not None:
    matplotlib.use("Agg")
import os
from collections import Counter
from pathlib import Path

import matplotlib
import matplotlib.pyplot as plt
from ase.io import read
from mace.calculators.foundations_models import mace_mp  # type: ignore
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from tqdm import tqdm

from a2c_ase.runner import relax_unit_cell
from a2c_ase.utils import extract_crystallizable_subcells

# Set matplotlib backend for CI
if os.getenv("CI") is not None:
    matplotlib.use("Agg")

/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/site-packages/e3nn/o3/_wigner.py:10: UserWarning: Environment variable TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD detected, since the`weights_only` argument was not explicitly passed to `torch.load`, forcing weights_only=False.
  _Jd, _W3j_flat, _W3j_indices = torch.load(os.path.join(os.path.dirname(__file__), 'constants.pt'))

cuequivariance or cuequivariance_torch is not available. Cuequivariance acceleration will be disabled.

Configuration¶

Define paths and simulation parameters. In CI mode, we use reduced parameters for faster testing.

In [2]:

Copied!





IS_CI = os.getenv("CI") is not None

# Get the path to the data file relative to this script
try:
    script_dir = Path(__file__).parent
except NameError:
    script_dir = Path.cwd()

# Search for data file in multiple locations
for parent in [script_dir.parent, script_dir, script_dir.parent.parent]:
    data_file = parent / "data" / "Na_2000.xyz"
    if data_file.exists():
        break
else:
    msg = f"Could not find data/Na_2000.xyz. Run from project root. CWD: {Path.cwd()}"
    raise FileNotFoundError(msg) from None

# Relaxation parameters
max_iter = 20 if IS_CI else 200  # Maximum optimization steps
fmax = 0.01 if IS_CI else 0.05  # Force convergence criterion in eV/Å

if IS_CI:
    print("Running in CI mode with reduced parameters for fast testing")
IS_CI = os.getenv("CI") is not None

# Get the path to the data file relative to this script
try:
    script_dir = Path(__file__).parent
except NameError:
    script_dir = Path.cwd()

# Search for data file in multiple locations
for parent in [script_dir.parent, script_dir, script_dir.parent.parent]:
    data_file = parent / "data" / "Na_2000.xyz"
    if data_file.exists():
        break
else:
    msg = f"Could not find data/Na_2000.xyz. Run from project root. CWD: {Path.cwd()}"
    raise FileNotFoundError(msg) from None

# Relaxation parameters
max_iter = 20 if IS_CI else 200  # Maximum optimization steps
fmax = 0.01 if IS_CI else 0.05  # Force convergence criterion in eV/Å

if IS_CI:
    print("Running in CI mode with reduced parameters for fast testing")

Running in CI mode with reduced parameters for fast testing

Step 1: Load Amorphous Structure¶

Load the pre-generated amorphous sodium structure from the data directory. This structure was obtained from a melt-quench simulation.

In [3]:

Copied!

amorphous_atoms = read(data_file, index="0")
print(f"Loaded structure with {len(amorphous_atoms)} atoms")
print(f"Cell dimensions: {amorphous_atoms.cell.lengths()}")  # type: ignore
amorphous_atoms = read(data_file, index="0")
print(f"Loaded structure with {len(amorphous_atoms)} atoms")
print(f"Cell dimensions: {amorphous_atoms.cell.lengths()}")  # type: ignore

Loaded structure with 2000 atoms
Cell dimensions: [42.3 42.3 42.3]

Step 2: Extract Crystallizable Subcells¶

Search for periodic subcells within the amorphous structure that could represent crystalline unit cells. The algorithm uses a grid-based search to identify regions with translational symmetry.

In [4]:

Copied!





crystallizable_cells = extract_crystallizable_subcells(
    atoms=amorphous_atoms,  # type: ignore
    d_frac=0.15,  # Grid spacing as fraction of cell dimensions
    n_min=2,  # Minimum grid divisions per dimension
    n_max=12,  # Maximum grid divisions per dimension
    cubic_only=False,  # Allow non-cubic structures
    allowed_atom_counts=None,  # No restriction on number of atoms per cell
)

print(f"Found {len(crystallizable_cells)} crystallizable cells")

crystallizable_cells = extract_crystallizable_subcells(
    atoms=amorphous_atoms,  # type: ignore
    d_frac=0.15,  # Grid spacing as fraction of cell dimensions
    n_min=2,  # Minimum grid divisions per dimension
    n_max=12,  # Maximum grid divisions per dimension
    cubic_only=False,  # Allow non-cubic structures
    allowed_atom_counts=None,  # No restriction on number of atoms per cell
)

print(f"Found {len(crystallizable_cells)} crystallizable cells")

Created 214 subcells from amorphous structure
Subcells kept after filtering: 214
Found 214 crystallizable cells

Step 3: Initialize MACE Calculator¶

In [5]:

Copied!

device = "cpu" if IS_CI else "cuda"
calculator = mace_mp(model="small-omat-0", device=device, dtype="float32")
print(f"MACE calculator initialized on {device}")
device = "cpu" if IS_CI else "cuda"
calculator = mace_mp(model="small-omat-0", device=device, dtype="float32")
print(f"MACE calculator initialized on {device}")

Using model under Academic Software License (ASL) license, see https://github.com/gabor1/ASL 
 To use this model you accept the terms of the license.
Using Materials Project MACE for MACECalculator with /home/runner/.cache/mace/maceomat0smallmodel
Using float32 for MACECalculator, which is faster but less accurate. Recommended for MD. Use float64 for geometry optimization.
Using head omat_pbe out of ['omat_pbe']
Default dtype float32 does not match model dtype float64, converting models to float32.
MACE calculator initialized on cpu

/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/site-packages/mace/calculators/mace.py:197: UserWarning: Environment variable TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD detected, since the`weights_only` argument was not explicitly passed to `torch.load`, forcing weights_only=False.
  torch.load(f=model_path, map_location=device)

Step 4: Relax Candidate Structures¶

For each extracted subcell, we perform a full structure relaxation (both atomic positions and cell parameters) using the FIRE optimizer. This identifies the energetically favorable crystalline structures.

In [6]:

Copied!





space_groups = []
relaxed_cells = []

print("Relaxing structures...")
for atoms in tqdm(crystallizable_cells[:20] if IS_CI else crystallizable_cells):
    relaxed_cell, log_dict = relax_unit_cell(
        atoms=atoms,
        calculator=calculator,
        max_iter=max_iter,
        fmax=fmax,
        verbose=False,
    )

    # Convert to pymatgen and get space group
    adaptor = AseAtomsAdaptor()
    structure = adaptor.get_structure(relaxed_cell)
    sga = SpacegroupAnalyzer(structure, symprec=0.1)
    space_group = sga.get_space_group_symbol()

    space_groups.append(space_group)
    relaxed_cells.append(relaxed_cell)

print(f"Successfully relaxed {len(relaxed_cells)} structures")
space_groups = []
relaxed_cells = []

print("Relaxing structures...")
for atoms in tqdm(crystallizable_cells[:20] if IS_CI else crystallizable_cells):
    relaxed_cell, log_dict = relax_unit_cell(
        atoms=atoms,
        calculator=calculator,
        max_iter=max_iter,
        fmax=fmax,
        verbose=False,
    )

    # Convert to pymatgen and get space group
    adaptor = AseAtomsAdaptor()
    structure = adaptor.get_structure(relaxed_cell)
    sga = SpacegroupAnalyzer(structure, symprec=0.1)
    space_group = sga.get_space_group_symbol()

    space_groups.append(space_group)
    relaxed_cells.append(relaxed_cell)

print(f"Successfully relaxed {len(relaxed_cells)} structures")

Relaxing structures...

  0%|          | 0/20 [00:00<?, ?it/s]

  5%|▌         | 1/20 [00:03<01:13,  3.86s/it]

 10%|█         | 2/20 [00:07<01:08,  3.82s/it]

 15%|█▌        | 3/20 [00:11<01:04,  3.80s/it]

 20%|██        | 4/20 [00:15<01:00,  3.78s/it]

 25%|██▌       | 5/20 [00:18<00:54,  3.62s/it]

 30%|███       | 6/20 [00:22<00:52,  3.75s/it]

 35%|███▌      | 7/20 [00:26<00:48,  3.69s/it]

 40%|████      | 8/20 [00:29<00:45,  3.76s/it]

 45%|████▌     | 9/20 [00:34<00:42,  3.89s/it]

 50%|█████     | 10/20 [00:38<00:39,  3.97s/it]

 55%|█████▌    | 11/20 [00:42<00:35,  3.95s/it]

 60%|██████    | 12/20 [00:46<00:31,  3.91s/it]

 65%|██████▌   | 13/20 [00:50<00:28,  4.04s/it]

 70%|███████   | 14/20 [00:54<00:23,  3.95s/it]

 75%|███████▌  | 15/20 [00:57<00:19,  3.80s/it]

 80%|████████  | 16/20 [01:00<00:14,  3.64s/it]

 85%|████████▌ | 17/20 [01:04<00:11,  3.70s/it]

 90%|█████████ | 18/20 [01:08<00:07,  3.61s/it]

 95%|█████████▌| 19/20 [01:11<00:03,  3.62s/it]

100%|██████████| 20/20 [01:15<00:00,  3.72s/it]

100%|██████████| 20/20 [01:15<00:00,  3.78s/it]

Successfully relaxed 20 structures

Step 5: Analyze Space Group Distribution¶

Analyze the symmetry of relaxed structures to understand what crystal structures are accessible from the amorphous precursor. For sodium, we expect to see body-centered cubic (bcc, space group Im-3m) as the most stable phase at ambient conditions.

In [7]:

Copied!





space_group_counts = Counter(space_groups)
sorted_groups = sorted(space_group_counts.items(), key=lambda x: x[1], reverse=True)

# Create visualization
fig, ax = plt.subplots(figsize=(9, 6))
labels = [sg[0] for sg in sorted_groups]
counts = [sg[1] for sg in sorted_groups]

ax.bar(range(len(labels)), counts)
ax.set_xticks(range(len(labels)))
ax.set_xticklabels(labels, rotation=45, ha="right")
ax.set_xlabel("Space Group")
ax.set_ylabel("Count")
ax.set_title("Space Group Distribution of Relaxed Structures")
ax.grid(axis="y", alpha=0.3)

plt.tight_layout()

# Save plot (skip display in CI)
if not IS_CI:
    output_file = script_dir / "space_group_distribution.png"
    plt.savefig(output_file, dpi=300, bbox_inches="tight")
    print(f"\nPlot saved to {output_file}")
    plt.show()
else:
    plt.close()  # Clean up in CI
space_group_counts = Counter(space_groups)
sorted_groups = sorted(space_group_counts.items(), key=lambda x: x[1], reverse=True)

# Create visualization
fig, ax = plt.subplots(figsize=(9, 6))
labels = [sg[0] for sg in sorted_groups]
counts = [sg[1] for sg in sorted_groups]

ax.bar(range(len(labels)), counts)
ax.set_xticks(range(len(labels)))
ax.set_xticklabels(labels, rotation=45, ha="right")
ax.set_xlabel("Space Group")
ax.set_ylabel("Count")
ax.set_title("Space Group Distribution of Relaxed Structures")
ax.grid(axis="y", alpha=0.3)

plt.tight_layout()

# Save plot (skip display in CI)
if not IS_CI:
    output_file = script_dir / "space_group_distribution.png"
    plt.savefig(output_file, dpi=300, bbox_inches="tight")
    print(f"\nPlot saved to {output_file}")
    plt.show()
else:
    plt.close()  # Clean up in CI

Results Summary¶

Print detailed statistics about the space group distribution. This helps identify the most frequently occurring crystal structures.

In [8]:

Copied!





print("\nSpace Group Statistics:")
for sg, count in sorted_groups:
    percentage = count / len(space_groups) * 100
    print(f"{sg}: {count} structures ({percentage:>5.1f}%)")
print(f"Total: {len(space_groups)} unique structures analyzed")
print("\nSpace Group Statistics:")
for sg, count in sorted_groups:
    percentage = count / len(space_groups) * 100
    print(f"{sg}: {count} structures ({percentage:>5.1f}%)")
print(f"Total: {len(space_groups)} unique structures analyzed")

Space Group Statistics:
P1: 20 structures (100.0%)
Total: 20 unique structures analyzed