Source code for pyccapt.calibration.data_tools.merge_range

import numpy as np



[docs]
def merge_by_range(data_df, range_df, full=False):
    """
    Optimized merging function based on the 'mc' column value falling within the 'mc_low' and 'mc_up' range.
    Uses vectorized operations for performance.

    Parameters:
        data_df (pd.DataFrame): The dataframe containing the data to be merged.
        range_df (pd.DataFrame): The dataframe containing the range values 'mc_low' and 'mc_up'.
        full (bool): If True, the merged dataframe will contain all columns from the range_df. Default is False.

    Returns:
        pd.DataFrame: The merged dataframe with the range data attached.
    """
    # Prepare the necessary columns for merging
    data_mc = data_df['mc (Da)']

    # Use broadcasting to create masks for matching conditions
    mask = (range_df['mc_low'].values[:, None] <= data_mc.values) & (range_df['mc_up'].values[:, None] >= data_mc.values)

    # Find the matching range index for each data row (max mask index per row)
    matched_idx = mask.argmax(axis=0)  # For each data point, find the index of the matching range in range_df

    # Check if a valid match exists (mask is not empty for the given row)
    valid_matches = mask[matched_idx, np.arange(len(data_mc))]

    # Create merged dataframe
    merged_df = data_df.copy()

    # Default values for no matches
    default_values = {
        'name': np.nan,
        'ion': np.nan,
        'mass': np.nan,
        'mc': np.nan,
        'mc_low': np.nan,
        'mc_up': np.nan,
        'color': 'black',
        'element': ['noise'],
        'complex': [np.nan],
        'isotope': [np.nan],
        'charge': np.nan,
    }
    if full:
        # For valid matches, update with the corresponding values
        for col in ['name', 'ion', 'mass', 'mc', 'mc_low', 'mc_up', 'color', 'element', 'complex', 'isotope', 'charge']:
            merged_df[col] = np.where(valid_matches, range_df[col].values[matched_idx], default_values[col])
    else:
        # only add the columns name and ion
        for col in ['name', 'ion']:
            merged_df[col] = np.where(valid_matches, range_df[col].values[matched_idx], default_values[col])

    return merged_df