Я реализовал алгоритмы Лувена и Лейдена без агрегирования графов. Моя реализация в Лейдене с параметром разрешения гаммы, установленным на 0,5, похоже, работает неправильно. Моя программа сначала помещает два узла в сообщество, а затем удаляет один узел из того же сообщества, причем изменение значения функции CPM оба раза является положительным. Похоже, что алгоритм Лейдена должен работать не так.
Графики присутствуют в подпапке данных папки моего проекта в виде текстовых файлов с целыми числами, представляющими узлы, и строками, представляющими ребра. Тот, с которым я сейчас работаю, присутствует в файле с именем «example_graph_changed.txt». Это показано ниже:
У меня есть класс для загрузки графиков (с именем GraphLoader) и интерфейс Graph. GraphLoader присутствует в пакете util в папке моего проекта, а интерфейс Graph присутствует в пакете Graph вместе со всеми другими созданными мной классами.
GraphLoader:
package util;
import java.io.File;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Set;
public class GraphLoader {
/**
* Loads graph with data from a file.
* The file should consist of lines with 2 integers each, corresponding
* to a "from" vertex and a "to" vertex.
*/
public static void loadGraph(graph.Graph g, String filename) {
Set seen = new HashSet();
Scanner sc;
try {
sc = new Scanner(new File(filename));
} catch (Exception e) {
e.printStackTrace();
return;
}
// Iterate over the lines in the file, adding new
// vertices as they are found and connecting them with edges.
while (sc.hasNextInt()) {
int v1 = sc.nextInt();
int v2 = sc.nextInt();
if (!seen.contains(v1)) {
g.addVertex(v1);
seen.add(v1);
}
if (!seen.contains(v2)) {
g.addVertex(v2);
seen.add(v2);
}
g.addEdge(v1, v2);
}
sc.close();
}
}
package graph;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
public interface Graph {
/* Creates a vertex with the given number. */
public void addVertex(int num);
/* Creates an edge from the first vertex to the second. */
public void addEdge(int i1, int i2);
/* Finds the egonet centered at a given node. */
public Graph getEgonet(int center);
/* Returns all SCCs in a directed graph */
public List getSCCs();
/* Return the graph's connections in a readable format.
* The keys in this HashMap are the vertices in the graph.
* The values are the nodes that are reachable via a directed
* edge from the corresponding key.
* The returned representation ignores edge weights and
* multi-edges. */
public HashMap exportGraph();
}
Ниже показан класс, реализующий интерфейс Graph, который я использую для фактического хранения графика как объекта:
package graph;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
public class ComDetGraph implements Graph {
//data structures to store nodes and edges
private HashMap nodeMap;
private HashSet edges;
public ComDetGraph () {
this.nodeMap = new HashMap();
this.edges = new HashSet();
}
//methods to expose properties of the graph
public int getNumVertices() {
return nodeMap.values().size();
}
public int getNumEdges() {
return edges.size();
}
public HashMap getNodeMap() {
return nodeMap;
}
public HashSet getEdges() {
return edges;
}
//methods to add vertices and edges
@Override
public void addVertex(int num) {
ComDetNode n = nodeMap.get(num);
if (n == null) {
n = new ComDetNode(num);
nodeMap.put(num, n);
}
}
@Override
public void addEdge(int i1, int i2) {
ComDetNode n1 = nodeMap.get(i1);
ComDetNode n2 = nodeMap.get(i2);
if (n1 == null) {
throw new IllegalArgumentException("Node with value " + i1 +
" does not exist in graph");
}
if (n2 == null) {
throw new IllegalArgumentException("Node with value " + i2 +
" does not exist in graph");
}
ComDetEdge edge = new ComDetEdge(n1, n2, 1);
edges.add(edge);
n1.addNeighbor(n2);
n2.addNeighbor(n1);
}
@Override
public Graph getEgonet(int center) {
// TODO Auto-generated method stub
return null;
}
@Override
public List getSCCs() {
// TODO Auto-generated method stub
return null;
}
@Override
public HashMap exportGraph() {
// TODO Auto-generated method stub
return null;
}
}
Я создал классы ComDetNode и ComDetEdge для хранения узлов и ребер в виде объектов.
ComDetNode:
package graph;
import java.util.HashSet;
import java.util.Objects;
public class ComDetNode {
/*the node object has an integer value and a set of nodes as its
neighbors*/
private Integer value;
private HashSet neighbors;
//constructor
public ComDetNode(Integer value) {
this.value = value;
this.neighbors = new HashSet();
}
//getters and setters
public Integer getValue() {
return value;
}
public void setValue(Integer value) {
this.value = value;
}
public HashSet getNeighbors() {
return neighbors;
}
public void addNeighbor (ComDetNode nbr) {
neighbors.add(nbr);
}
/*since many methods will be removing and adding these nodes to
* sets and maps, these methods are overridden to ensure removal
* and addition can be carried out without the object hashCodes needing
* to match exactly*/
@Override
public int hashCode() {
return Objects.hash(value);
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ComDetNode other = (ComDetNode) obj;
return Objects.equals(value, other.value);
}
@Override
public String toString() {
return "" + value + "";
}
}
package graph;
public class ComDetEdge {
//starting and ending nodes of the edge
private ComDetNode start;
private ComDetNode end;
public ComDetEdge (ComDetNode start, ComDetNode end, int weight) {
this.start = start;
this.end = end;
}
//getter for the end node
public ComDetNode getEndNode() {
return end;
}
@Override
public String toString() {
return start + "->" + end;
}
}
У меня также есть класс для хранения разделов графа, как это определено в контексте алгоритмов Лувена и Лейдена:
package graph;
import java.util.HashMap;
import java.util.HashSet;
public class Partition {
/*data structures to ensure quick retrieval of community ID for a given node
* and quick retrieval of set of nodes in a community for a given community
* ID*/
private HashMap nodeIDMap;
private HashMap iDNodeSetMap;
public Partition() {
nodeIDMap = new HashMap();
iDNodeSetMap = new HashMap();
}
//getters
public HashMap getNodeIDMap() {
return nodeIDMap;
}
public HashMap getIDNodeSetMap() {
return iDNodeSetMap;
}
//getters for community ID and node set
public int getComID (ComDetNode node) {
return nodeIDMap.get(node);
}
public HashSet getNodeSet (int id) {
return iDNodeSetMap.get(id);
}
}
Наконец, у меня есть класс, в котором я написал функции для использования в алгоритмах Лувена и Лейдена, а также основную функцию для их запуска.
package graph;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import util.GraphLoader;
public class ComDet {
/*method to create a singleton partition of the graph i.e. store every
* node in its own separate community*/
public Partition singletonPartition (ComDetGraph g) {
Partition partition = new Partition();
for (ComDetNode node : g.getNodeMap().values()) {
/*placing nodes and community IDs in one of the maps in the
* Partition object*/
partition.getNodeIDMap().put(node, node.getValue());
//a node set to store just the current node
HashSet newNodeSet = new HashSet();
newNodeSet.add(node);
//storing the community IDs along with corresponding node sets
partition.getIDNodeSetMap().put(node.getValue(), newNodeSet);
}
return partition;
}
//method to create a singleton partition of a community
/*exactly the same as above except only for a community and not an entire
graph*/
private Partition singletonPartitionForComm (HashSet comm) {
Partition partition = new Partition();
for (ComDetNode node : comm) {
partition.getNodeIDMap().put(node, node.getValue());
HashSet newNodeSet = new HashSet();
newNodeSet.add(node);
partition.getIDNodeSetMap().put(node.getValue(), newNodeSet);
}
return partition;
}
//function to calculate modularity for the community C
public double q (ComDetGraph g, Partition partition, int C, double gamma) {
int numEdges = g.getNumEdges();
double q_C = 0;
double int_comp = 0;
double out_comp = 0;
/*calculating the internal edges of the community and the total number
of edges for each node in the community*/
for (ComDetNode n : partition.getIDNodeSetMap().get(C)) {
for (ComDetNode nbr : n.getNeighbors()) {
if (partition.getIDNodeSetMap().get(C).contains(nbr)) {
++int_comp;
}
++out_comp;
}
}
//normalization and other arithmetic operations
double nEC = (double) (2 * numEdges);
int_comp /= nEC;
out_comp *= out_comp;
out_comp /= (Math.pow((2 * numEdges), 2));
out_comp *= gamma;
q_C = int_comp - out_comp;
return q_C;
}
//Calculating Constant Potts Model (CPM) function value for community C
//Similar to above
public double cpm (Partition partition, int C, double gamma) {
double cpm = 0;
double int_comp = 0;
double out_comp = 0;
//expected number of edges in C using CPM
int n_c = partition.getIDNodeSetMap().get(C).size();
double n_c_2 = (double) n_c * ((n_c - 1) / 2);
out_comp *= (gamma * n_c_2);
//actual number of edges in C
for (ComDetNode n : partition.getIDNodeSetMap().get(C)) {
for (ComDetNode nbr : n.getNeighbors()) {
if (partition.getIDNodeSetMap().get(C).contains(nbr)) {
++int_comp;
}
}
}
cpm = int_comp - out_comp;
return cpm;
}
/*Calculating change in modularity on moving node i from community C to
* community D*/
public double del_Q
(ComDetGraph g, Partition partition, ComDetNode i, int C, int D, double gamma) {
int numEdges = g.getNumEdges();
double step1output = 0.0;
double step2output = 0.0;
int deg_i_orig = i.getNeighbors().size();
int deg_i_C = deg_i_orig;
int deg_i_D = deg_i_orig;
int sum_deg_C = 0;
int sum_deg_D = 0;
double del_Q = 0;
//calculating degrees of node i in communities C and D
for (ComDetNode nbr : i.getNeighbors()) {
if (!partition.getIDNodeSetMap().get(C).contains(nbr)) {
--deg_i_C;
}
}
for (ComDetNode nbr : i.getNeighbors()) {
if (!partition.getIDNodeSetMap().get(D).contains(nbr)) {
--deg_i_D;
}
}
//calculating sums of degrees of nodes in communities C and D
for (ComDetNode n : partition.getIDNodeSetMap().get(C)) {
sum_deg_C += n.getNeighbors().size();
}
for (ComDetNode n : partition.getIDNodeSetMap().get(D)) {
sum_deg_D += n.getNeighbors().size();
}
/*if node i is being moved from community C to D, its contribution to
* the sum of degrees of nodes in D must be considered*/
if (C != D) {
sum_deg_D += i.getNeighbors().size();
}
//normalization and other arithmetic
step1output = (((double) -1 / numEdges) * (deg_i_C)) +
((deg_i_orig / (2 * Math.pow(numEdges, 2)))
* sum_deg_C * gamma);
step2output = (((double) 1 / numEdges) * (deg_i_D)) -
((deg_i_orig / (2 * Math.pow(numEdges, 2)))
* sum_deg_D * gamma);
del_Q = step1output + step2output;
return del_Q;
}
//Calculating change in CPM on moving node i from community C to community D
public double del_CPM
(Partition partition, ComDetNode i, int C, int D, double gamma) {
double delta_C = 0.0;
double delta_D = 0.0;
int deg_i_orig = i.getNeighbors().size();
int deg_i_C = deg_i_orig;
int deg_i_D = deg_i_orig;
int n_c = partition.getIDNodeSetMap().get(C).size();
int n_d = partition.getIDNodeSetMap().get(D).size();
double del_CPM = 0;
//degrees of node i in C and D
for (ComDetNode nbr : i.getNeighbors()) {
if (!partition.getIDNodeSetMap().get(C).contains(nbr)) {
--deg_i_C;
}
}
for (ComDetNode nbr : i.getNeighbors()) {
if (!partition.getIDNodeSetMap().get(D).contains(nbr)) {
--deg_i_D;
}
}
//changes in CPM for moving i out of C and into D
delta_C = (double) -deg_i_C + (gamma * (n_c - 1));
delta_D = (double) deg_i_D - (gamma * n_d);
del_CPM = delta_C + delta_D;
return del_CPM;
}
/*Calculating change in modularity on moving node i from community C to
the empty community*/
public double del_Q_empty
(ComDetGraph g, Partition partition, ComDetNode i, int C, double gamma) {
int numEdges = g.getNumEdges();
int deg_i_orig = i.getNeighbors().size();
int deg_i_C = deg_i_orig;
int sum_deg_C = 0;
double del_Q = 0;
//degree of i in C and sum of degrees of C
for (ComDetNode nbr : i.getNeighbors()) {
if (!partition.getIDNodeSetMap().get(C).contains(nbr)) {
--deg_i_C;
}
}
for (ComDetNode n : partition.getIDNodeSetMap().get(C)) {
sum_deg_C += n.getNeighbors().size();
}
//change in modularity for moving i out of C
double del_Q_1 = (((double) -1 / numEdges) * (deg_i_C)) +
((deg_i_orig / (2 * Math.pow(numEdges, 2))) *
sum_deg_C * gamma);
//change in modularity for moving i into the empty community
double del_Q_2_1 = ((double) -gamma / 2);
double del_Q_2_2 = ((double) deg_i_orig / numEdges);
double del_Q_2 = del_Q_2_1 * Math.pow(del_Q_2_2, 2);
del_Q = del_Q_1 + del_Q_2;
return del_Q;
}
/*Calculating change in CPM on moving the node i from the community C to
* the empty community*/
public double del_CPM_empty
(Partition partition, ComDetNode i, int C, double gamma) {
int deg_i_orig = i.getNeighbors().size();
int deg_i_C = deg_i_orig;
int n_c = partition.getIDNodeSetMap().get(C).size();
double del_CPM = 0;
//degree of i in C
for (ComDetNode nbr : i.getNeighbors()) {
if (!partition.getIDNodeSetMap().get(C).contains(nbr)) {
--deg_i_C;
}
}
//change in CPM
del_CPM = (double) deg_i_C - (gamma * n_c);
del_CPM += gamma;
return del_CPM;
}
//Modularity for a certain partition of the graph
public double hLvn (ComDetGraph g, Partition partition, double gamma) {
double h = 0.0;
//sum of modularities for each community
for (int C : partition.getIDNodeSetMap().keySet()) {
h += q(g, partition, C, gamma);
}
return h;
}
//CPM for a certain partition of the graph
public double hLdn (Partition partition, double gamma) {
double h = 0.0;
//sum of CPM values for each community
for (int C : partition.getIDNodeSetMap().keySet()) {
h += cpm(partition, C, gamma);
}
return h;
}
//local move for the Louvain algorithm
public Partition moveNodes
(ComDetGraph g, Partition partition, double gamma) {
double h_old = 0.0;
do {
//calculating modularity of old partition
h_old = hLvn(g, partition, gamma);
//randomizing order of nodes
List nodes = new ArrayList(g.getNodeMap().values());
Collections.shuffle(nodes);
for (ComDetNode node : nodes) {
//max value of change in modularity
double max_del_H = Double.NEGATIVE_INFINITY;
int nC = partition.getNodeIDMap().get(node);
/*map to store the change in modularity for each community
we might move this node to*/
HashMap comm_to_del_H = new HashMap();
//moving this node into each of its neighbor's communities
for (ComDetNode nbr : node.getNeighbors()) {
int C = partition.getNodeIDMap().get(nbr);
/*ensuring communities with the same change in modularity as
a community already contained in the map aren't added*/
if (!comm_to_del_H.containsKey(C)) {
/*computing change in modularity for this community,
* storing it and updating the max value
*/
double del_H = del_Q(g, partition, node, nC, C, gamma);
comm_to_del_H.put(C, del_H);
max_del_H = Math.max(del_H, max_del_H);
}
}
/*computing change in modularity for moving this node into
* the empty community, storing it and updating max*/
double del_H_empty = del_Q_empty(g, partition, node, nC, gamma);
comm_to_del_H.put(Integer.MIN_VALUE, del_H_empty);
max_del_H = Math.max(del_H_empty, max_del_H);
int newCom = Integer.MIN_VALUE;
/*only if the change in modularity is positive will we perform
* a local move*/
if (max_del_H > 0) {
//find community corresponding to max change in modularity
for (int C : comm_to_del_H.keySet()) {
if (comm_to_del_H.get(C) == max_del_H) {
newCom = C;
break;
}
}
//if empty community, set its ID to a new value
if (newCom == Integer.MIN_VALUE) {
newCom = g.getNumVertices();
++newCom;
}
/*remove node from its old community and add it to its new
one*/
partition.getNodeIDMap().put(node, newCom);
partition.getIDNodeSetMap().get(nC).remove(node);
if (partition.getIDNodeSetMap().get(nC).isEmpty()) {
partition.getIDNodeSetMap().remove(nC);
}
if (newCom == (g.getNumVertices() + 1)) {
HashSet emptyPlOne =
new HashSet();
emptyPlOne.add(node);
partition.getIDNodeSetMap().
put(newCom, emptyPlOne);
} else {
partition.getIDNodeSetMap().get(newCom).add(node);
}
}
}
} while (hLvn(g, partition, gamma) > h_old);
return partition;
}
//local move for the Leiden algorithm
public Partition moveNodesFast (ComDetGraph g, Partition partition, double gamma) {
/*randomizing order of nodes, creating queue and set for dealing with
the smart local move procedure*/
List nodes =
new ArrayList(g.getNodeMap().values());
Collections.shuffle(nodes, new Random(123));
LinkedList nodeLinked = new LinkedList(nodes);
HashSet nodesNew = new HashSet(nodes);
do {
ComDetNode node = nodeLinked.remove();
nodesNew.remove(node);
//max value of change in CPM
double max_del_CPM = Double.NEGATIVE_INFINITY;
int nC = partition.getNodeIDMap().get(node);
//map to store communities and change in CPM
HashMap comm_to_del_CPM = new HashMap();
/*iterating over neighbors to store changes in CPM and find max
change in CPM*/
for (ComDetNode nbr : node.getNeighbors()) {
int C = partition.getNodeIDMap().get(nbr);
if (!comm_to_del_CPM.containsKey(C)) {
double del_CPM = del_CPM(partition, node, nC, C, gamma);
comm_to_del_CPM.put(C, del_CPM);
max_del_CPM = Math.max(max_del_CPM, del_CPM);
}
}
//doing the same as above for empty community
double del_CPM_empty = del_CPM_empty(partition, node, nC, gamma);
comm_to_del_CPM.put(Integer.MIN_VALUE, del_CPM_empty);
max_del_CPM = Math.max(max_del_CPM, del_CPM_empty);
int newCom = Integer.MIN_VALUE;
//moving only if change in CPM > 0
if (max_del_CPM > 0) {
//finding appropriate community with max change in CPM
for (int C : comm_to_del_CPM.keySet()) {
if (comm_to_del_CPM.get(C) == max_del_CPM) {
newCom = C;
break;
}
}
if (newCom == Integer.MIN_VALUE) {
newCom = g.getNumVertices();
++newCom;
}
//removing node from old community and moving it to the new one
partition.getNodeIDMap().put(node, newCom);
partition.getIDNodeSetMap().get(nC).remove(node);
if (partition.getIDNodeSetMap().get(nC).isEmpty()) {
partition.getIDNodeSetMap().remove(nC);
}
if (newCom == (g.getNumVertices() + 1)) {
HashSet emptyPlOne =
new HashSet();
emptyPlOne.add(node);
partition.getIDNodeSetMap().
put(newCom, emptyPlOne);
} else {
partition.getIDNodeSetMap().get(newCom).add(node);
}
/*adding neighbors not in the new community to the queue and set
to visit later*/
for (ComDetNode nbr : node.getNeighbors()) {
int nbrCom = partition.getNodeIDMap().get(nbr);
if (nbrCom != newCom) {
nodeLinked.add(node);
nodesNew.add(node);
}
}
}
} while (!nodeLinked.isEmpty());
return partition;
}
//Leiden algorithm partition refinement
public Partition refinePartition
(ComDetGraph g, Partition partition, double gamma) {
Partition p_refined = singletonPartition(g);
//merging those subsets of the community that cause an optimum change in
//CPM
for (HashSet comm :
partition.getIDNodeSetMap().values()) {
Partition p_comm = singletonPartitionForComm(comm);
p_refined = mergeNodeSubsets(g, p_comm, comm, gamma);
}
return p_refined;
}
public Partition mergeNodeSubsets
(ComDetGraph g, Partition p, HashSet comm, double gamma) {
HashSet R = new HashSet();
for (ComDetNode n : comm) {
HashSet nSingle = new HashSet();
nSingle.add(n);
//if well-connected then add to the set for further processing
if (checkGammaConnected(nSingle, comm, gamma)) {
R.add(n);
}
}
//randomize order
ArrayList R_rnd = new ArrayList(R);
Collections.shuffle(R_rnd, new Random(123));
for (ComDetNode node : R_rnd) {
int C = p.getNodeIDMap().get(node);
//if singleton then process further
if (p.getIDNodeSetMap().get(C).size() == 1) {
HashMap T = new HashMap();
//find well-connected communities
for (Integer Co : p.getIDNodeSetMap().keySet()) {
HashSet com = p.getIDNodeSetMap().get(Co);
if (checkGammaConnected(com, comm, gamma)) {
T.put(Co, com);
}
}
HashMap comm_to_prob = new HashMap();
//find probability of move causing an optimum change in CPM
for (Integer Co : T.keySet()) {
if (del_CPM(p, node, C, Co, gamma) >= 0) {
double exponent = 10 * del_CPM(p, node, C, Co, gamma);
double prob = Math.exp(exponent);
comm_to_prob.put(Co, prob);
} else {
comm_to_prob.put(Co, 0.0);
}
}
//randomly select community to move to with probabilities
//proportional to change in CPM
int com = randomWeightedSelection(comm_to_prob);
p.getNodeIDMap().put(node, com);
p.getIDNodeSetMap().get(C).remove(node);
if (p.getIDNodeSetMap().get(C).isEmpty()) {
p.getIDNodeSetMap().remove(C);
}
p.getIDNodeSetMap().get(com).add(node);
}
}
return p;
}
//randomly select community with probability proportional to the change in
//modularity
private int randomWeightedSelection(HashMap comm_to_prob) {
double totalWt = 0;
//find total weight for the set of communities
for (Integer Co : comm_to_prob.keySet()) {
totalWt += comm_to_prob.get(Co);
}
//randomly calculate a weight
double rnd = Math.random() * totalWt;
double commWeight = 0;
//return community with weight above randomly calculated weight
for (Integer Co : comm_to_prob.keySet()) {
commWeight += comm_to_prob.get(Co);
if (commWeight >= rnd) {
return Co;
}
}
return 0;
}
//check if a subset of a set of nodes is well-connected to the rest of the
//set
private boolean checkGammaConnected
(HashSet T, HashSet S, double gamma) {
double S_not_T_edges = 0.0;
for (ComDetNode node : T) {
for (ComDetNode nbr : node.getNeighbors()) {
if (S.contains(nbr) && !T.contains(nbr)) {
S_not_T_edges++;
}
}
}
double secondTerm = gamma * T.size() * (S.size() - T.size());
return S_not_T_edges >= secondTerm;
}
//calculate optimum partition using the Louvain algorithm
public Partition louvain (ComDetGraph g, Partition partition, double gamma) {
double q_prev = hLvn(g, partition, gamma);
Partition p_new = moveNodes(g, partition, gamma);
if ((hLvn(g, p_new, gamma) - q_prev) >= 0.001) {
louvain(g, p_new, gamma);
}
return p_new;
}
//calculate optimum partition using the Leiden algorithm
public Partition leiden (ComDetGraph g, Partition partition, double gamma) {
double cpm_prev = hLdn(partition, gamma);
Partition p_new = moveNodesFast(g, partition, gamma);
if ((hLdn(p_new, gamma) - cpm_prev) >= 0.001) {
leiden(g, p_new, gamma);
}
return p_new;
}
public static void main(String[] args) {
//load graph and calculate final partition using Leiden algorithm
ComDet cd = new ComDet();
ComDetGraph g = new ComDetGraph();
GraphLoader.loadGraph(g, "data/example_graph_changed.txt");
Partition sP = cd.singletonPartition(g);
Partition finalPart = cd.leiden(g, sP, 0.5);
System.out.println(finalPart.getIDNodeSetMap());
}
}
Узел с номером 10 перемещается в сообщество с узлом с номером 15, а в более поздней итерации внешнего цикла функции moveNodesFast() выводится из того же сообщества. Я не понимаю, почему это происходит. Я явно где-то допустил ошибку, но поскольку формула CPM, насколько я понимаю, верна, я понятия не имею, почему программа вычисляет прибавку для локального хода и прибавку для обратного того же самого локального хода.
Я реализовал алгоритмы Лувена и Лейдена без агрегирования графов. Моя реализация в Лейдене с параметром разрешения гаммы, установленным на 0,5, похоже, работает неправильно. Моя программа сначала помещает два узла в сообщество, а затем удаляет один узел из того же сообщества, причем изменение значения функции CPM оба раза является положительным. Похоже, что алгоритм Лейдена должен работать не так. Графики присутствуют в подпапке данных папки моего проекта в виде текстовых файлов с целыми числами, представляющими узлы, и строками, представляющими ребра. Тот, с которым я сейчас работаю, присутствует в файле с именем «example_graph_changed.txt». Это показано ниже: [code]1 2 1 3 1 4 2 3 2 4 3 4 3 6 4 8 6 5 6 7 6 8 8 5 8 7 8 9 7 9 7 10 9 10 9 13 10 11 10 13 10 14 10 15 13 11 13 12 11 12 11 14 11 15 11 16 14 16 [/code] У меня есть класс для загрузки графиков (с именем GraphLoader) и интерфейс Graph. GraphLoader присутствует в пакете util в папке моего проекта, а интерфейс Graph присутствует в пакете Graph вместе со всеми другими созданными мной классами. GraphLoader:[code]package util;
public class GraphLoader { /** * Loads graph with data from a file. * The file should consist of lines with 2 integers each, corresponding * to a "from" vertex and a "to" vertex. */ public static void loadGraph(graph.Graph g, String filename) { Set seen = new HashSet(); Scanner sc; try { sc = new Scanner(new File(filename)); } catch (Exception e) { e.printStackTrace(); return; } // Iterate over the lines in the file, adding new // vertices as they are found and connecting them with edges. while (sc.hasNextInt()) { int v1 = sc.nextInt(); int v2 = sc.nextInt(); if (!seen.contains(v1)) { g.addVertex(v1); seen.add(v1); } if (!seen.contains(v2)) { g.addVertex(v2); seen.add(v2); } g.addEdge(v1, v2); }
public interface Graph { /* Creates a vertex with the given number. */ public void addVertex(int num);
/* Creates an edge from the first vertex to the second. */ public void addEdge(int i1, int i2);
/* Finds the egonet centered at a given node. */ public Graph getEgonet(int center);
/* Returns all SCCs in a directed graph */ public List getSCCs();
/* Return the graph's connections in a readable format. * The keys in this HashMap are the vertices in the graph. * The values are the nodes that are reachable via a directed * edge from the corresponding key. * The returned representation ignores edge weights and * multi-edges. */ public HashMap exportGraph(); } [/code] Ниже показан класс, реализующий интерфейс Graph, который я использую для фактического хранения графика как объекта: [code]package graph;
//data structures to store nodes and edges private HashMap nodeMap; private HashSet edges;
public ComDetGraph () { this.nodeMap = new HashMap(); this.edges = new HashSet(); }
//methods to expose properties of the graph public int getNumVertices() { return nodeMap.values().size(); }
public int getNumEdges() { return edges.size(); }
public HashMap getNodeMap() { return nodeMap; }
public HashSet getEdges() { return edges; }
//methods to add vertices and edges @Override public void addVertex(int num) { ComDetNode n = nodeMap.get(num); if (n == null) { n = new ComDetNode(num); nodeMap.put(num, n); } }
@Override public void addEdge(int i1, int i2) { ComDetNode n1 = nodeMap.get(i1); ComDetNode n2 = nodeMap.get(i2);
if (n1 == null) { throw new IllegalArgumentException("Node with value " + i1 + " does not exist in graph"); } if (n2 == null) { throw new IllegalArgumentException("Node with value " + i2 + " does not exist in graph"); }
/*the node object has an integer value and a set of nodes as its neighbors*/ private Integer value; private HashSet neighbors;
//constructor public ComDetNode(Integer value) { this.value = value; this.neighbors = new HashSet(); }
//getters and setters public Integer getValue() { return value; }
public void setValue(Integer value) { this.value = value; }
public HashSet getNeighbors() { return neighbors; }
public void addNeighbor (ComDetNode nbr) { neighbors.add(nbr); }
/*since many methods will be removing and adding these nodes to * sets and maps, these methods are overridden to ensure removal * and addition can be carried out without the object hashCodes needing * to match exactly*/ @Override public int hashCode() { return Objects.hash(value); }
@Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; ComDetNode other = (ComDetNode) obj; return Objects.equals(value, other.value); }
@Override public String toString() { return "" + value + ""; }
} [/code] ComDetEdge: [code]package graph;
public class ComDetEdge {
//starting and ending nodes of the edge private ComDetNode start; private ComDetNode end;
public ComDetEdge (ComDetNode start, ComDetNode end, int weight) { this.start = start; this.end = end; }
//getter for the end node public ComDetNode getEndNode() { return end; }
/*data structures to ensure quick retrieval of community ID for a given node * and quick retrieval of set of nodes in a community for a given community * ID*/ private HashMap nodeIDMap; private HashMap iDNodeSetMap;
public Partition() { nodeIDMap = new HashMap(); iDNodeSetMap = new HashMap(); }
//getters public HashMap getNodeIDMap() { return nodeIDMap; }
public HashMap getIDNodeSetMap() { return iDNodeSetMap; }
//getters for community ID and node set public int getComID (ComDetNode node) { return nodeIDMap.get(node); }
public HashSet getNodeSet (int id) { return iDNodeSetMap.get(id); }
} [/code] Наконец, у меня есть класс, в котором я написал функции для использования в алгоритмах Лувена и Лейдена, а также основную функцию для их запуска. [code]package graph;
/*method to create a singleton partition of the graph i.e. store every * node in its own separate community*/ public Partition singletonPartition (ComDetGraph g) { Partition partition = new Partition(); for (ComDetNode node : g.getNodeMap().values()) { /*placing nodes and community IDs in one of the maps in the * Partition object*/ partition.getNodeIDMap().put(node, node.getValue());
//a node set to store just the current node HashSet newNodeSet = new HashSet(); newNodeSet.add(node);
//storing the community IDs along with corresponding node sets partition.getIDNodeSetMap().put(node.getValue(), newNodeSet); } return partition; }
//method to create a singleton partition of a community /*exactly the same as above except only for a community and not an entire graph*/ private Partition singletonPartitionForComm (HashSet comm) { Partition partition = new Partition(); for (ComDetNode node : comm) { partition.getNodeIDMap().put(node, node.getValue()); HashSet newNodeSet = new HashSet(); newNodeSet.add(node); partition.getIDNodeSetMap().put(node.getValue(), newNodeSet); } return partition; }
//function to calculate modularity for the community C public double q (ComDetGraph g, Partition partition, int C, double gamma) {
/*calculating the internal edges of the community and the total number of edges for each node in the community*/ for (ComDetNode n : partition.getIDNodeSetMap().get(C)) { for (ComDetNode nbr : n.getNeighbors()) { if (partition.getIDNodeSetMap().get(C).contains(nbr)) { ++int_comp; } ++out_comp; } }
//Calculating Constant Potts Model (CPM) function value for community C //Similar to above public double cpm (Partition partition, int C, double gamma) {
//expected number of edges in C using CPM int n_c = partition.getIDNodeSetMap().get(C).size(); double n_c_2 = (double) n_c * ((n_c - 1) / 2);
out_comp *= (gamma * n_c_2);
//actual number of edges in C for (ComDetNode n : partition.getIDNodeSetMap().get(C)) { for (ComDetNode nbr : n.getNeighbors()) { if (partition.getIDNodeSetMap().get(C).contains(nbr)) { ++int_comp; } } }
cpm = int_comp - out_comp;
return cpm; }
/*Calculating change in modularity on moving node i from community C to * community D*/ public double del_Q (ComDetGraph g, Partition partition, ComDetNode i, int C, int D, double gamma) {
int numEdges = g.getNumEdges(); double step1output = 0.0; double step2output = 0.0; int deg_i_orig = i.getNeighbors().size(); int deg_i_C = deg_i_orig; int deg_i_D = deg_i_orig; int sum_deg_C = 0; int sum_deg_D = 0; double del_Q = 0;
//calculating degrees of node i in communities C and D for (ComDetNode nbr : i.getNeighbors()) { if (!partition.getIDNodeSetMap().get(C).contains(nbr)) { --deg_i_C; } }
for (ComDetNode nbr : i.getNeighbors()) { if (!partition.getIDNodeSetMap().get(D).contains(nbr)) { --deg_i_D; } }
//calculating sums of degrees of nodes in communities C and D for (ComDetNode n : partition.getIDNodeSetMap().get(C)) { sum_deg_C += n.getNeighbors().size(); }
for (ComDetNode n : partition.getIDNodeSetMap().get(D)) { sum_deg_D += n.getNeighbors().size(); }
/*if node i is being moved from community C to D, its contribution to * the sum of degrees of nodes in D must be considered*/ if (C != D) { sum_deg_D += i.getNeighbors().size(); }
//Calculating change in CPM on moving node i from community C to community D public double del_CPM (Partition partition, ComDetNode i, int C, int D, double gamma) {
double delta_C = 0.0; double delta_D = 0.0; int deg_i_orig = i.getNeighbors().size(); int deg_i_C = deg_i_orig; int deg_i_D = deg_i_orig; int n_c = partition.getIDNodeSetMap().get(C).size(); int n_d = partition.getIDNodeSetMap().get(D).size(); double del_CPM = 0;
//degrees of node i in C and D for (ComDetNode nbr : i.getNeighbors()) { if (!partition.getIDNodeSetMap().get(C).contains(nbr)) { --deg_i_C; } }
for (ComDetNode nbr : i.getNeighbors()) { if (!partition.getIDNodeSetMap().get(D).contains(nbr)) { --deg_i_D; } }
//changes in CPM for moving i out of C and into D delta_C = (double) -deg_i_C + (gamma * (n_c - 1)); delta_D = (double) deg_i_D - (gamma * n_d);
del_CPM = delta_C + delta_D;
return del_CPM; }
/*Calculating change in modularity on moving node i from community C to the empty community*/ public double del_Q_empty (ComDetGraph g, Partition partition, ComDetNode i, int C, double gamma) {
int numEdges = g.getNumEdges(); int deg_i_orig = i.getNeighbors().size(); int deg_i_C = deg_i_orig; int sum_deg_C = 0; double del_Q = 0;
//degree of i in C and sum of degrees of C for (ComDetNode nbr : i.getNeighbors()) { if (!partition.getIDNodeSetMap().get(C).contains(nbr)) { --deg_i_C; } }
for (ComDetNode n : partition.getIDNodeSetMap().get(C)) { sum_deg_C += n.getNeighbors().size(); }
//change in modularity for moving i out of C double del_Q_1 = (((double) -1 / numEdges) * (deg_i_C)) + ((deg_i_orig / (2 * Math.pow(numEdges, 2))) * sum_deg_C * gamma);
//change in modularity for moving i into the empty community double del_Q_2_1 = ((double) -gamma / 2); double del_Q_2_2 = ((double) deg_i_orig / numEdges); double del_Q_2 = del_Q_2_1 * Math.pow(del_Q_2_2, 2); del_Q = del_Q_1 + del_Q_2;
return del_Q; }
/*Calculating change in CPM on moving the node i from the community C to * the empty community*/ public double del_CPM_empty (Partition partition, ComDetNode i, int C, double gamma) {
int deg_i_orig = i.getNeighbors().size(); int deg_i_C = deg_i_orig; int n_c = partition.getIDNodeSetMap().get(C).size(); double del_CPM = 0;
//degree of i in C for (ComDetNode nbr : i.getNeighbors()) { if (!partition.getIDNodeSetMap().get(C).contains(nbr)) { --deg_i_C; } }
//Modularity for a certain partition of the graph public double hLvn (ComDetGraph g, Partition partition, double gamma) {
double h = 0.0;
//sum of modularities for each community for (int C : partition.getIDNodeSetMap().keySet()) { h += q(g, partition, C, gamma); }
return h; }
//CPM for a certain partition of the graph public double hLdn (Partition partition, double gamma) {
double h = 0.0;
//sum of CPM values for each community for (int C : partition.getIDNodeSetMap().keySet()) { h += cpm(partition, C, gamma); }
return h; }
//local move for the Louvain algorithm public Partition moveNodes (ComDetGraph g, Partition partition, double gamma) {
double h_old = 0.0;
do {
//calculating modularity of old partition h_old = hLvn(g, partition, gamma);
//randomizing order of nodes List nodes = new ArrayList(g.getNodeMap().values()); Collections.shuffle(nodes);
for (ComDetNode node : nodes) {
//max value of change in modularity double max_del_H = Double.NEGATIVE_INFINITY; int nC = partition.getNodeIDMap().get(node);
/*map to store the change in modularity for each community we might move this node to*/ HashMap comm_to_del_H = new HashMap();
//moving this node into each of its neighbor's communities for (ComDetNode nbr : node.getNeighbors()) { int C = partition.getNodeIDMap().get(nbr);
/*ensuring communities with the same change in modularity as a community already contained in the map aren't added*/ if (!comm_to_del_H.containsKey(C)) {
/*computing change in modularity for this community, * storing it and updating the max value */ double del_H = del_Q(g, partition, node, nC, C, gamma); comm_to_del_H.put(C, del_H); max_del_H = Math.max(del_H, max_del_H); } }
/*computing change in modularity for moving this node into * the empty community, storing it and updating max*/ double del_H_empty = del_Q_empty(g, partition, node, nC, gamma); comm_to_del_H.put(Integer.MIN_VALUE, del_H_empty); max_del_H = Math.max(del_H_empty, max_del_H);
int newCom = Integer.MIN_VALUE;
/*only if the change in modularity is positive will we perform * a local move*/ if (max_del_H > 0) {
//find community corresponding to max change in modularity for (int C : comm_to_del_H.keySet()) { if (comm_to_del_H.get(C) == max_del_H) { newCom = C; break; } }
//if empty community, set its ID to a new value if (newCom == Integer.MIN_VALUE) { newCom = g.getNumVertices(); ++newCom; }
/*remove node from its old community and add it to its new one*/ partition.getNodeIDMap().put(node, newCom); partition.getIDNodeSetMap().get(nC).remove(node); if (partition.getIDNodeSetMap().get(nC).isEmpty()) { partition.getIDNodeSetMap().remove(nC); } if (newCom == (g.getNumVertices() + 1)) { HashSet emptyPlOne = new HashSet(); emptyPlOne.add(node); partition.getIDNodeSetMap(). put(newCom, emptyPlOne); } else { partition.getIDNodeSetMap().get(newCom).add(node); } } }
} while (hLvn(g, partition, gamma) > h_old);
return partition;
}
//local move for the Leiden algorithm public Partition moveNodesFast (ComDetGraph g, Partition partition, double gamma) {
/*randomizing order of nodes, creating queue and set for dealing with the smart local move procedure*/ List nodes = new ArrayList(g.getNodeMap().values()); Collections.shuffle(nodes, new Random(123)); LinkedList nodeLinked = new LinkedList(nodes); HashSet nodesNew = new HashSet(nodes);
do { ComDetNode node = nodeLinked.remove(); nodesNew.remove(node);
//max value of change in CPM double max_del_CPM = Double.NEGATIVE_INFINITY; int nC = partition.getNodeIDMap().get(node);
//map to store communities and change in CPM HashMap comm_to_del_CPM = new HashMap();
/*iterating over neighbors to store changes in CPM and find max change in CPM*/ for (ComDetNode nbr : node.getNeighbors()) { int C = partition.getNodeIDMap().get(nbr); if (!comm_to_del_CPM.containsKey(C)) { double del_CPM = del_CPM(partition, node, nC, C, gamma); comm_to_del_CPM.put(C, del_CPM); max_del_CPM = Math.max(max_del_CPM, del_CPM); } }
//doing the same as above for empty community double del_CPM_empty = del_CPM_empty(partition, node, nC, gamma); comm_to_del_CPM.put(Integer.MIN_VALUE, del_CPM_empty); max_del_CPM = Math.max(max_del_CPM, del_CPM_empty);
int newCom = Integer.MIN_VALUE;
//moving only if change in CPM > 0 if (max_del_CPM > 0) {
//finding appropriate community with max change in CPM for (int C : comm_to_del_CPM.keySet()) { if (comm_to_del_CPM.get(C) == max_del_CPM) { newCom = C; break; } } if (newCom == Integer.MIN_VALUE) { newCom = g.getNumVertices(); ++newCom; }
//removing node from old community and moving it to the new one partition.getNodeIDMap().put(node, newCom); partition.getIDNodeSetMap().get(nC).remove(node); if (partition.getIDNodeSetMap().get(nC).isEmpty()) { partition.getIDNodeSetMap().remove(nC); } if (newCom == (g.getNumVertices() + 1)) { HashSet emptyPlOne = new HashSet(); emptyPlOne.add(node); partition.getIDNodeSetMap(). put(newCom, emptyPlOne); } else { partition.getIDNodeSetMap().get(newCom).add(node); }
/*adding neighbors not in the new community to the queue and set to visit later*/ for (ComDetNode nbr : node.getNeighbors()) { int nbrCom = partition.getNodeIDMap().get(nbr); if (nbrCom != newCom) { nodeLinked.add(node); nodesNew.add(node); } }
//merging those subsets of the community that cause an optimum change in //CPM for (HashSet comm : partition.getIDNodeSetMap().values()) { Partition p_comm = singletonPartitionForComm(comm); p_refined = mergeNodeSubsets(g, p_comm, comm, gamma); }
//randomly select community with probability proportional to the change in //modularity private int randomWeightedSelection(HashMap comm_to_prob) {
double totalWt = 0;
//find total weight for the set of communities for (Integer Co : comm_to_prob.keySet()) { totalWt += comm_to_prob.get(Co); }
//randomly calculate a weight double rnd = Math.random() * totalWt;
double commWeight = 0;
//return community with weight above randomly calculated weight for (Integer Co : comm_to_prob.keySet()) { commWeight += comm_to_prob.get(Co); if (commWeight >= rnd) { return Co; } }
return 0; }
//check if a subset of a set of nodes is well-connected to the rest of the //set private boolean checkGammaConnected (HashSet T, HashSet S, double gamma) {
double S_not_T_edges = 0.0;
for (ComDetNode node : T) { for (ComDetNode nbr : node.getNeighbors()) { if (S.contains(nbr) && !T.contains(nbr)) { S_not_T_edges++; } } }
public static void main(String[] args) { //load graph and calculate final partition using Leiden algorithm ComDet cd = new ComDet(); ComDetGraph g = new ComDetGraph(); GraphLoader.loadGraph(g, "data/example_graph_changed.txt"); Partition sP = cd.singletonPartition(g); Partition finalPart = cd.leiden(g, sP, 0.5); System.out.println(finalPart.getIDNodeSetMap());
}
} [/code] Узел с номером 10 перемещается в сообщество с узлом с номером 15, а в более поздней итерации внешнего цикла функции moveNodesFast() выводится из того же сообщества. Я не понимаю, почему это происходит. Я явно где-то допустил ошибку, но поскольку формула CPM, насколько я понимаю, верна, я понятия не имею, почему программа вычисляет прибавку для локального хода и прибавку для обратного того же самого локального хода.