Код: Выделить всё
import pandas as pd
def compare_rows(df):
"""
Compares each pair of consecutive rows in a Pandas DataFrame and outputs the differences.
Args:
df: The input Pandas DataFrame.
Returns:
A new Pandas DataFrame containing the differences between consecutive rows.
"""
diff_list = []
for i in range(0,len(df) - 1,2):
row1 = df.iloc[i]
row2 = df.iloc[i + 1]
print(i)
diff = {}
for col in df.columns:
if row1[col] != row2[col]:
diff[col] = (row1[col], row2[col])
diff_list.append(diff)
return (diff_list)
# Convert spark dataframe to pandas
strSQL = """select * table
where batch_id = (select max(batch_id)
from table)"""
df_spark = spark.sql(strSQL)
df = df_spark.toPandas()
diff_df = compare_rows(df)
df = pd.DataFrame.from_dict(diff_df)
df.to_excel('your_excel_file.xlsx', sheet_name='Sheet1', index=False)
Что я хочу:
Подробнее здесь: https://stackoverflow.com/questions/795 ... -eachother