Source code for pyccapt.calibration.data_tools.merge_range

import numpy as np


[docs] def merge_by_range(data_df, range_df, full=False): """ Optimized merging function based on the 'mc' column value falling within the 'mc_low' and 'mc_up' range. Uses vectorized operations for performance. Parameters: data_df (pd.DataFrame): The dataframe containing the data to be merged. range_df (pd.DataFrame): The dataframe containing the range values 'mc_low' and 'mc_up'. full (bool): If True, the merged dataframe will contain all columns from the range_df. Default is False. Returns: pd.DataFrame: The merged dataframe with the range data attached. """ # Prepare the necessary columns for merging data_mc = data_df['mc (Da)'] # Use broadcasting to create masks for matching conditions mask = (range_df['mc_low'].values[:, None] <= data_mc.values) & (range_df['mc_up'].values[:, None] >= data_mc.values) # Find the matching range index for each data row (max mask index per row) matched_idx = mask.argmax(axis=0) # For each data point, find the index of the matching range in range_df # Check if a valid match exists (mask is not empty for the given row) valid_matches = mask[matched_idx, np.arange(len(data_mc))] # Create merged dataframe merged_df = data_df.copy() # Default values for no matches default_values = { 'name': np.nan, 'ion': np.nan, 'mass': np.nan, 'mc': np.nan, 'mc_low': np.nan, 'mc_up': np.nan, 'color': 'black', 'element': ['noise'], 'complex': [np.nan], 'isotope': [np.nan], 'charge': np.nan, } if full: # For valid matches, update with the corresponding values for col in ['name', 'ion', 'mass', 'mc', 'mc_low', 'mc_up', 'color', 'element', 'complex', 'isotope', 'charge']: merged_df[col] = np.where(valid_matches, range_df[col].values[matched_idx], default_values[col]) else: # only add the columns name and ion for col in ['name', 'ion']: merged_df[col] = np.where(valid_matches, range_df[col].values[matched_idx], default_values[col]) return merged_df