community
If a picture is a description of an area, divide the picture into many sub-pictures. When the correlation within the subgraph is as large as possible, and the correlation between subgraphs is as low as possible, such a subgraph can be called a community.
Community discovery algorithm
There are many community discovery algorithms, such as LPA, HANP, SLPA, and Louvain. Different algorithms have different effects in dividing communities. Louvain algorithm is a community discovery algorithm based on modularity. The algorithm performs well in both efficiency and effectiveness, and can discover a hierarchical community structure. Its optimization goal is to maximize the modularity of the entire community network.
Modularity
Modularity is a measure to evaluate the quality of a community network. Its physical meaning is that the number of connected edges of nodes in the community is only different from the number of edges in a random case. Its value range is [−1 / 2,1) . It can be simply understood as the weight of the edges within the community minus the weight of all the edges connected to the nodes of the community, and the undirected graph is better understood, that is, the degree of the edges within the community minus the total degree of the nodes within the community.
Louvain algorithm
Algorithm flow:
1. Initially, each vertex is regarded as a community, and the number of communities is the same as the number of vertices.
2. Combine each vertex with its adjacent vertex in turn and calculate whether their modularity gain is greater than 0. If it is greater than 0, put the node into the community where the adjacent node is located.
3. Iterate the second step until the algorithm is stable, that is, the communities to which all vertices belong no longer change.
4. Compress all nodes in each community into one node, the weight of the nodes in the community is converted into the weight of the new node ring, and the weight between the communities is converted into the weight of the new node side.
5. Repeat steps 1-3 until the algorithm is stable.
# coding=utf-8
import collections
import random
def load_graph(path):
G = collections.defaultdict(dict)
with open(path) as text:
for line in text:
vertices = line.strip().split()
v_i = int(vertices[0])
v_j = int(vertices[1])
w = float(vertices[2])
G[v_i][v_j] = w
G[v_j][v_i] = w
return G
class Vertex():
def __init__(self, vid, cid, nodes, k_in=0):
self._vid = vid
self._cid = cid
self._nodes = nodes
self._kin = k_in # 结点内部的边的权重
class Louvain():
def __init__(self, G):
self._G = G
self._m = 0 # 边数量
self._cid_vertices = {} # 需维护的关于社区的信息(社区编号,其中包含的结点编号的集合)
self._vid_vertex = {} # 需维护的关于结点的信息(结点编号,相应的Vertex实例)
for vid in self._G.keys():
self._cid_vertices[vid] = set([vid])
self._vid_vertex[vid] = Vertex(vid, vid, set([vid]))
self._m += sum([1 for neighbor in self._G[vid].keys() if neighbor > vid])
def first_stage(self):
mod_inc = False # 用于判断算法是否可终止
visit_sequence = self._G.keys()
random.shuffle(list(visit_sequence))
while True:
can_stop = True # 第一阶段是否可终止
for v_vid in visit_sequence:
v_cid = self._vid_vertex[v_vid]._cid
k_v = sum(self._G[v_vid].values()) + self._vid_vertex[v_vid]._kin
cid_Q = {}
for w_vid in self._G[v_vid].keys():
w_cid = self._vid_vertex[w_vid]._cid
if w_cid in cid_Q:
continue
else:
tot = sum(
[sum(self._G[k].values()) + self._vid_vertex[k]._kin for k in self._cid_vertices[w_cid]])
if w_cid == v_cid:
tot -= k_v
k_v_in = sum([v for k, v in self._G[v_vid].items() if k in self._cid_vertices[w_cid]])
delta_Q = k_v_in - k_v * tot / self._m # 由于只需要知道delta_Q的正负,所以少乘了1/(2*self._m)
cid_Q[w_cid] = delta_Q
cid, max_delta_Q = sorted(cid_Q.items(), key=lambda item: item[1], reverse=True)[0]
if max_delta_Q > 0.0 and cid != v_cid:
self._vid_vertex[v_vid]._cid = cid
self._cid_vertices[cid].add(v_vid)
self._cid_vertices[v_cid].remove(v_vid)
can_stop = False
mod_inc = True
if can_stop:
break
return mod_inc
def second_stage(self):
cid_vertices = {}
vid_vertex = {}
for cid, vertices in self._cid_vertices.items():
if len(vertices) == 0:
continue
new_vertex = Vertex(cid, cid, set())
for vid in vertices:
new_vertex._nodes.update(self._vid_vertex[vid]._nodes)
new_vertex._kin += self._vid_vertex[vid]._kin
for k, v in self._G[vid].items():
if k in vertices:
new_vertex._kin += v / 2.0
cid_vertices[cid] = set([cid])
vid_vertex[cid] = new_vertex
G = collections.defaultdict(dict)
for cid1, vertices1 in self._cid_vertices.items():
if len(vertices1) == 0:
continue
for cid2, vertices2 in self._cid_vertices.items():
if cid2 <= cid1 or len(vertices2) == 0:
continue
edge_weight = 0.0
for vid in vertices1:
for k, v in self._G[vid].items():
if k in vertices2:
edge_weight += v
if edge_weight != 0:
G[cid1][cid2] = edge_weight
G[cid2][cid1] = edge_weight
self._cid_vertices = cid_vertices
self._vid_vertex = vid_vertex
self._G = G
def get_communities(self):
communities = []
for vertices in self._cid_vertices.values():
if len(vertices) != 0:
c = set()
for vid in vertices:
c.update(self._vid_vertex[vid]._nodes)
communities.append(c)
return communities
def execute(self):
iter_time = 1
while True:
iter_time += 1
mod_inc = self.first_stage()
if mod_inc:
self.second_stage()
else:
break
return self.get_communities()
if __name__ == '__main__':
G = load_graph('s.txt')
algorithm = Louvain(G)
communities = algorithm.execute()
# 按照社区大小从大到小排序输出
communities = sorted(communities, key=lambda b: -len(b)) # 按社区大小排序
count = 0
for communitie in communities:
count += 1
print("社区", count, " ", communitie)
Networkx and community community division and visualization
installation
Use community to install python-louvain to
pip install python-louvain
pip install networkx
use
Best division
community.best_partition(graph, partition=None, weight='weight', resolution=1.0)
Compute the partition of the graph nodes which maximises the modularity (or try…) using the Louvain heuristics.
This is the partition of highest modularity, i.e. the highest partition of the dendrogram generated by the Louvain algorithm.
import community
import networkx as nx
import matplotlib.pyplot as plt
#better with karate_graph() as defined in networkx example.
#erdos renyi don't have true community structure
G = nx.erdos_renyi_graph(30, 0.05)
#first compute the best partition
partition = community.best_partition(G)
#drawing
size = float(len(set(partition.values())))
pos = nx.spring_layout(G)
count = 0.
for com in set(partition.values()) :
count = count + 1.
list_nodes = [nodes for nodes in partition.keys()
if partition[nodes] == com]
nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20,
node_color = str(count / size))
nx.draw_networkx_edges(G,pos, alpha=0.5)
plt.show()