В настоящее время я использую многопроцессорную обработку, чтобы ускорить этот процесс, но это все равно отнимает довольно много времени. Есть ли какие-либо потенциальные оптимизации, которые я мог бы внести в код?
Код: Выделить всё
def multiprocess_sub_graphs(graph):
"""
Process all pairs of subgraphs in parallel using multiprocessing.
"""
countries = set(node[1] for node in graph.nodes())
tasks = []
# Generate tasks
nodes_of_country = [node for node in graph.nodes() if node[1] == country]
sub_graphs_nodes = [
list(c) for c in sorted(connected_components(graph.subgraph(nodes_of_country)), key=len)
]
subgraph_combinations = combinations(sub_graphs_nodes, 2)
for combo in subgraph_combinations:
tasks.append(combo)
# Process tasks in parallel
with Pool() as pool:
result_edges = pool.imap_unordered(process_sub_graphs, tasks, chunksize=6)
# Collect results
return [x for x in result_edges if x is not None]
def name_pairs(names_a, names_b):
for name1 in names_a:
for name2 in names_b:
yield name1, name2
def process_sub_graphs(args):
"""
Compare two subgraphs to find matching edges based on node attributes.
"""
subgraph1_nodes, subgraph2_nodes = args
names_a = set()
names_b = set()
# Collect names from subgraph1
for node1 in subgraph1_nodes:
names_a.update(node1[1].get('all_names', []))
# Collect names from subgraph2
for node2 in subgraph2_nodes:
names_b.update(node2[1].get('all_names', []))
# Find matching nodes
for first_name, second_name in name_pairs(names_a, names_b):
if partial_match(first_name, second_name) > 92:
# Return an edge between the first nodes of each subgraph
return subgraph1_nodes[0], subgraph2_nodes[0]
return None
Код: Выделить всё
graph = nx.Graph()
graph.add_nodes_from([
(1, {"country": "A", "all_names": ["Alice"]}),
(2, {"country": "A", "all_names": ["Alicia"]}),
(3, {"country": "B", "all_names": ["Alicia"]}),
(4, {"country": "B", "all_names": ["Robert"]}),
])
Спасибо.
Подробнее здесь: https://stackoverflow.com/questions/792 ... processing