Source code for pyccapt.calibration.clustering.isosurface

import numpy as np
import pyvista as pv



[docs]
def bin_vectors_from_distance(dist, bin_values, mode='distance'):
    """
    Create a set of grid vectors to be used in nD binning. The bounds are calculated
    such that they don't go beyond the size of the dataset.

    Parameters:
        dist (numpy.ndarray): The distance variable to be binned. One column per dimension.
                              It is the generalized distance.
        bin_values (list or numpy.ndarray): The bin 'distance' per bin in either a distance metric or a count.
                                             Non-isometric bins are possible.
        mode (str): Mode can be 'distance' (constant distance) or 'count' (constant count). Default is 'distance'.

    Returns:
        tuple:
            - bin_centers (list of numpy.ndarray): The bin centers of each bin.
            - bin_edges (list of numpy.ndarray): The edges of each bin.
    """
    if mode not in ['distance', 'count']:
        raise ValueError("Mode must be 'distance' or 'count'.")

    is_constant_count = mode == 'count'
    is_constant_distance = mode == 'distance'
    num_dim = len(bin_values)
    dist = np.array(dist).reshape(-1, num_dim)
    if dist.shape[1] != num_dim:
        raise ValueError("Dimensions of distance variable and bin variable must match.")
    if is_constant_count and num_dim != 1:
        raise ValueError("Constant count mode is only available for 1D binning.")

    bin_centers = []
    bin_edges = []

    # Constant bin distance interval
    if is_constant_distance:
        for dim in range(num_dim):
            # Generate raw bin vector
            bin_vector_raw = np.linspace(0, 10000 * bin_values[dim], 10001)
            bin_vector_raw = np.concatenate((-np.flip(bin_vector_raw[1:]), bin_vector_raw))

            # Filter bin centers within the distance range
            centers = bin_vector_raw[
                (bin_vector_raw >= dist[:, dim].min() - bin_values[dim])
                & (bin_vector_raw <= dist[:, dim].max() + bin_values[dim])
            ]
            bin_centers.append(centers)

            # Calculate bin edges
            edges = (centers[1:] + centers[:-1]) / 2
            edges = np.concatenate(
                ([centers[0] - (centers[1] - centers[0]) / 2], edges, [centers[-1] + (centers[-1] - centers[-2]) / 2])
            )
            bin_edges.append(edges)

    # Constant bin count interval
    elif is_constant_count:
        dist = np.sort(dist.flatten())
        idx_edge = np.arange(0, len(dist), bin_values[0])

        # Handle remainder
        if idx_edge[-1] < len(dist):
            idx_edge = np.append(idx_edge, len(dist))

        idx_cent = np.round((idx_edge[1:] + idx_edge[:-1]) / 2).astype(int)
        centers = dist[idx_cent]
        edges = dist[idx_edge]

        # Adjust edges to avoid creating extra bins
        edges[0] -= 0.0001
        edges[-1] += 0.0001

        bin_centers.append(centers)
        bin_edges.append(edges)

    return bin_centers, bin_edges



import numpy as np



[docs]
def pos_to_voxel(data, grid_vec, species=None):
    """
        Creates a voxelization of the data in 'pos' based on the bin centers in 'grid_vec'
        for the atoms/ions in the specified species.

        Parameters:
            data (pyccapt DataFrame): The data to be voxelized. when input species is given, ranges must be allocated.
    %          A decomposed DataFrame file is also possible. Use range_to_pyccapt to decompose the data.
            grid_vec (list of numpy.ndarray): Grid vectors for the voxel grid. These are the bin centers.
            species (list, str, or numpy.ndarray, optional): The species to filter by. Can be:
                                                             - List of species names (e.g., ['Fe', 'Mn']).
                                                             - Boolean array matching the length of `pos`.
                                                             - None, to include all atoms/ions.

        Returns:
            numpy.ndarray: A 3D array representing the voxelized data.
    """
    # Ensure `pos` is a numpy array
    if hasattr(data, "columns"):  # Assume pandas.DataFrame
        pos_array = np.array([data["x (nm)"], data["y (nm)"], data["z (nm)"]]).T
        element_col = data.columns.get_loc("element") if "element" in data.columns else None
    else:
        pos_array = np.array(data)

    # Check for species filtering
    if species is not None:
        if isinstance(species, list) and (element_col):
            if element_col:
                species_mask = data['element'].isin(species)
            else:
                raise ValueError("Invalid species filter or table format.")
        elif isinstance(species, np.ndarray) and species.dtype == bool:
            species_mask = species
        else:
            raise ValueError("Species must be a list, boolean array, or None.")

        pos_array = pos_array[species_mask]

    # Calculate bin sizes and edge vectors
    bin_sizes = [grid_vec[d][1] - grid_vec[d][0] for d in range(3)]
    edge_vec = [np.concatenate(([grid_vec[d][0] - bin_sizes[d] / 2], grid_vec[d] + bin_sizes[d] / 2)) for d in range(3)]

    # Determine voxel indices
    loc = np.empty((pos_array.shape[0], 3), dtype=int)
    for d in range(3):
        loc[:, d] = np.digitize(pos_array[:, d], edge_vec[d]) - 1  # Adjust for 0-based indexing

    # Calculate the voxel grid size
    grid_size = np.maximum(np.max(loc, axis=0) + 1, [len(e) - 1 for e in edge_vec])

    # Count atoms in each voxel
    vox = np.zeros(grid_size, dtype=int)
    for i in range(loc.shape[0]):
        vox[tuple(loc[i])] += 1

    return vox




[docs]
def isosurface(gridVec, data, isovalue):
    """
    Extract isosurface using pyvista for a custom 3D grid.

    Parameters:
        gridVec (list of np.ndarray): List of 3 arrays representing the grid points in x, y, and z.
        data (np.ndarray): 3D scalar field (same shape as the meshgrid defined by gridVec).
        isovalue (float): Scalar value to extract the isosurface.

    Returns:
        pyvista.PolyData: Isosurface with faces and vertices.
    """
    # Create a pyvista structured grid
    x, y, z = np.meshgrid(gridVec[0], gridVec[1], gridVec[2], indexing='ij')
    grid = pv.StructuredGrid(x, y, z)
    grid.point_data["values"] = data.flatten()

    # Extract the isosurface
    isosurf = grid.contour([isovalue])  # Pass isovalue as a list for compatibility
    return isosurf



if __name__ == "__main__":
    import pandas as pd

    # Test the function
    def generate_atom_dataset(num_atoms):
        """
        Generate a dataset with atom positions and random element assignment.

        Parameters:
            num_atoms (int): Number of atoms to generate (100 to 10M).

        Returns:
            pd.DataFrame: A DataFrame with columns `x(nm)`, `y(nm)`, `z(nm)`, `element`.
        """
        # Generate random positions in nanometers (x, y, z)
        positions = np.random.uniform(0, 100, (num_atoms, 3))  # Example range: [0, 100] nm

        # Assign elements randomly with 80% Al and 20% Fe
        elements = np.random.choice(["Al", "Fe"], size=num_atoms, p=[0.8, 0.2])

        # Create the DataFrame
        df = pd.DataFrame(positions, columns=["x (nm)", "y (nm)", "z (nm)"])
        df["element"] = elements

        return df

    # Example usage
    num_atoms = 1_000  # Set the desired dataset size
    data = generate_atom_dataset(num_atoms)

    # make pandas dataframe
    bin_values = [1, 1, 1]  # nm
    bin_centers, bin_edges = bin_vectors_from_distance(
        [data['x (nm)'].to_numpy(), data['y (nm)'].to_numpy(), data['z (nm)'].to_numpy()], bin_values, mode='distance'
    )

    grid_vec = np.array(bin_centers)
    vox = pos_to_voxel(data, grid_vec)
    voxIon = pos_to_voxel(data, grid_vec, species=['Fe'])
    conc = np.divide(voxIon, vox, out=np.zeros_like(voxIon, dtype=float), where=vox != 0)

    print("Concentration value range:", conc.min(), conc.max())
    iso_value = (conc.max() + conc.min()) / 2
    isosurf = isosurface(grid_vec, conc, isovalue=iso_value)

    import matplotlib.pyplot as plt

    # # Visualize using pyvista
    # plotter = pv.Plotter()
    # plotter.add_mesh(isosurf, color="blue", opacity=0.6)
    # plotter.show()

    import plotly.graph_objects as go

    # Extract Al atom positions
    al_positions = data[data["element"] == "Al"][["x (nm)", "y (nm)", "z (nm)"]].to_numpy()

    # Extract vertices and faces from the isosurface
    vertices = isosurf.points
    faces = isosurf.faces.reshape(-1, 4)[:, 1:]  # Faces have a leading count

    # Create the scatter plot for Al atoms
    scatter = go.Scatter3d(
        x=al_positions[:, 0],
        y=al_positions[:, 1],
        z=al_positions[:, 2],
        mode="markers",
        marker=dict(size=2, color="red", opacity=0.5),
        name="Al Atoms",
    )

    # Create the mesh for the isosurface
    mesh = go.Mesh3d(
        x=vertices[:, 0],
        y=vertices[:, 1],
        z=vertices[:, 2],
        i=faces[:, 0],
        j=faces[:, 1],
        k=faces[:, 2],
        opacity=0.6,
        alphahull=5,
        color="blue",
        name="Fe Isosurface",
    )

    # Combine and plot
    fig = go.Figure(data=[scatter, mesh])
    fig.update_layout(
        scene=dict(xaxis_title="X (nm)", yaxis_title="Y (nm)", zaxis_title="Z (nm)"),
        title="Scatter Plot of Al Atoms with Fe Isosurface",
    )

    fig.show()