# Find common keys
common_ids = set(df1['id']) & set(df2['id'])
# Align DataFrames on the key
df1_common = df1[df1['id'].isin(common_ids)].sort_values('id').reset_index(drop=True)
df2_common = df2[df2['id'].isin(common_ids)].sort_values('id').reset_index(drop=True)
# Find common columns (headers)
common_columns = [col for col in df1.columns if col in df2.columns and col != 'id']
# Compare all matching columns
for col in common_columns:
print(f"Differences in column '{col}':")
print(df1_common[col].values != df2_common[col].values)
def merge_compare(dataframe_1: pd.DataFrame, dataframe_2: pd.DataFrame) -> pd.DataFrame:
# Merge on 'id' with suffixes to distinguish columns
merged = pd.merge(
dataframe_1, dataframe_2,
on='id',
how='inner',
suffixes=('_df1', '_df2')
)
# Find common columns (excluding 'id')
headers_1 = set(dataframe_1.columns)
headers_2 = set(dataframe_2.columns)
comparison_columns = [col for col in headers_1 & headers_2 if col != 'id']
# For each common column, add a boolean column showing if values match
for col in comparison_columns:
merged[f'{col}_match'] = (
merged[f'{col}_df1'].fillna('').astype(str).str.strip().str.upper() ==
merged[f'{col}_df2'].fillna('').astype(str).str.strip().str.upper()
)
return merged
# Align DataFrames on the key df1_common = df1[df1['id'].isin(common_ids)].sort_values('id').reset_index(drop=True) df2_common = df2[df2['id'].isin(common_ids)].sort_values('id').reset_index(drop=True)
# Find common columns (headers) common_columns = [col for col in df1.columns if col in df2.columns and col != 'id']
# Compare all matching columns for col in common_columns: print(f"Differences in column '{col}':") print(df1_common[col].values != df2_common[col].values) [/code] [code]Differences in column 'name': [False False] Differences in column 'age': [False True] [/code] [b]2. Сравните с использованием Pandas Merge[/b] [code]def merge_compare(dataframe_1: pd.DataFrame, dataframe_2: pd.DataFrame) -> pd.DataFrame: # Merge on 'id' with suffixes to distinguish columns merged = pd.merge( dataframe_1, dataframe_2, on='id', how='inner', suffixes=('_df1', '_df2') )
# Find common columns (excluding 'id') headers_1 = set(dataframe_1.columns) headers_2 = set(dataframe_2.columns) comparison_columns = [col for col in headers_1 & headers_2 if col != 'id']
# For each common column, add a boolean column showing if values match for col in comparison_columns: merged[f'{col}_match'] = ( merged[f'{col}_df1'].fillna('').astype(str).str.strip().str.upper() == merged[f'{col}_df2'].fillna('').astype(str).str.strip().str.upper() )
return merged [/code] [code]>>> merge_compare(df1, df2) id name_df1 age_df1 name_df2 age_df2 name_match age_match 0 2 Bob 30 Bob 30 True True 1 3 Charlie 35 Charlie 36 True False [/code] [list] [*]В чем разница между этими двумя методами?
[*]Когда какой-либо из них лучше подходит для определенных случаев использования?