def generate_links_and_nodes(dataframe):
cleaned_links = []
for _, row in dataframe.iterrows():
q10_tasks = set(row['q10'].split(', '))
q3_tasks = set(row['q3'].split(', '))
q11_tasks = set(row['q11'].split(', '))
# Create links between q10 and q3
for q10 in q10_tasks:
for q3 in q3_tasks:
if q10 != q3:
cleaned_links.append((q10, q3))
# Create links between q3 and q11
for q3 in q3_tasks:
for q11 in q11_tasks:
if q3 != q11:
cleaned_links.append((q3, q11))
# DataFrame from links
links_df = pd.DataFrame(cleaned_links, columns=["source", "target"])
# Collect unique nodes
unique_nodes = sorted(set(pd.concat([links_df['source'], links_df['target']])))
node_indices = {node: i for i, node in enumerate(unique_nodes)}
# Map sources and targets to node indices
sources = links_df['source'].map(node_indices).tolist()
targets = links_df['target'].map(node_indices).tolist()
values = [1] * len(links_df) # Default weight of 1 for each link
return sources, targets, values, unique_nodes
# Generate the Sankey diagram inputs
sources, targets, values, nodes = generate_links_and_nodes(df)
# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
node=dict(
pad=25,
thickness=70,
line=dict(color="black", width=0.5),
label=nodes # Only sub-tasks are shown
),
link=dict(
source=sources,
target=targets,
value=values
)
)])

Пример данных результатов запроса. это результаты моей базы данных, когда df_q10 = pd.read_sql_query(query_q10, conn)
df_q3 = pd.read_sql_query(query_q3, conn)
df_q11 = pd.read_sql_query(query_q11, conn)
q3
0 T4.2
1 T4.2, T4.3, T4.4
2 T2.3
3 T2.2
4 T6.3
5 T6.3
6 T6.3
7 T4.1, T4.2
8 T1.3
9 T1.2
10 T1.3
11 T1.3
12 T7.3
13 T2.3
14 T2.1
q10
0
1
2
3
4 T6.2
5 T6.2
6
7 T1.1, T3.1, T3.2, T4.4, T5.1
8
9
10
11
12 T7.1
13 T2.1, T2.2, T2.4, T3.2
14
q11
0
1 T1.1, T1.3, T3.1, T3.2
2
3
4
5
6
7 T1.1, T1.3, T3.1, T3.2
8
9
10
11
12 T7.2
13
14
Подробнее здесь: https://stackoverflow.com/questions/792 ... hon-plotly
Мобильная версия