The Louvain method for community detection

叁歲伎倆 2022-06-02 11:38 274阅读 0赞

The Louvainmethod for community detection

Louvain method是一个非重叠社团发现的算法,该方法具有较快的执行效率。

对应的paper地址:http://arxiv.org/abs/0803.0476

Paper作者对算法的介绍网址:

http://perso.uclouvain.be/vincent.blondel/research/louvain.html

算法代码下载地址(只有c++版本和matlab版本,没有java版本):

https://sites.google.com/site/findcommunities/

找到c++版本时,想改成java版本,无奈发现c++版本里的数据结构略显复杂,失去耐心,就打算自己写一个java版本出来,结果自己写的不仅时间多,而且模块度值与论文中的模块度值也有一定的差距。

于是google到一个别人已经实现的java版本的代码

http://wiki.cns.iu.edu/display/CISHELL/Louvain+Community+Detection

除了实现Louvain方法之外,他还实现了一个slm算法,并将这几个算法实现打包成jar文件,源码可以在下面的网址中下载到

http://www.ludowaltman.nl/slm/

算法的总体思想是:

1.刚开始时,每一个节点形成一个小的簇

2.通过模块度函数值优化,将每一个节点归到‘最好’的簇当中去。该步骤知道所有的节点所属的簇不再变化才停止

3.将网络图进行reduce,把一个簇内的所有节点抽象成一个节点,看抽象之后的网络图是否还有优化的可能性。如果有,对抽象之后的图,继续进行第2步。

经过我对其代码的精简,Louvain算法代码如下:

ModularityOptimizer.java

[java] view plain copy

  1. /**
  2. * ModularityOptimizer
  3. *
  4. * @author Ludo Waltman
  5. * @author Nees Jan van Eck
  6. * @version 1.2.0, 05/14/14
  7. */
  8. import java.io.BufferedReader;
  9. import java.io.BufferedWriter;
  10. import java.io.FileReader;
  11. import java.io.FileWriter;
  12. import java.io.IOException;
  13. import java.util.Arrays;
  14. import java.util.Random;
  15. public class ModularityOptimizer
  16. {
  17. public static void main(String[] args) throws IOException
  18. {
  19. boolean update;
  20. double modularity, maxModularity, resolution, resolution2;
  21. int algorithm, i, j, modularityFunction, nClusters, nIterations, nRandomStarts;
  22. int[] cluster;
  23. long beginTime, endTime;
  24. Network network;
  25. Random random;
  26. String inputFileName, outputFileName;
  27. inputFileName = “fb.txt”;
  28. outputFileName = “communities.txt”;
  29. modularityFunction = 1;
  30. resolution = 1.0;
  31. algorithm = 1;
  32. nRandomStarts = 10;
  33. nIterations = 3;
  34. System.out.println(“Modularity Optimizer version 1.2.0 by Ludo Waltman and Nees Jan van Eck”);
  35. System.out.println();
  36. System.out.println(“Reading input file…”);
  37. System.out.println();
  38. network = readInputFile(inputFileName, modularityFunction);
  39. System.out.format(“Number of nodes: %d%n”, network.getNNodes());
  40. System.out.format(“Number of edges: %d%n”, network.getNEdges() / 2);
  41. System.out.println();
  42. System.out.println(“Running “ + ((algorithm == 1) ? “Louvain algorithm” : ((algorithm == 2) ? “Louvain algorithm with multilevel refinement” : “smart local moving algorithm”)) + “…”);
  43. System.out.println();
  44. resolution2 = ((modularityFunction == 1) ? (resolution / network.getTotalEdgeWeight()) : resolution);
  45. beginTime = System.currentTimeMillis();
  46. cluster = null;
  47. nClusters = -1;
  48. maxModularity = Double.NEGATIVE_INFINITY;
  49. random = new Random(100);
  50. for (i = 0; i < nRandomStarts; i++)
  51. {
  52. if (nRandomStarts > 1)
  53. System.out.format(“Random start: %d%n”, i + 1);
  54. network.initSingletonClusters(); //网络初始化,每个节点一个簇
  55. j = 0;
  56. update = true;
  57. do
  58. {
  59. if (nIterations > 1)
  60. System.out.format(“Iteration: %d%n”, j + 1);
  61. if (algorithm == 1)
  62. update = network.runLouvainAlgorithm(resolution2, random);
  63. j++;
  64. modularity = network.calcQualityFunction(resolution2);
  65. if (nIterations > 1)
  66. System.out.format(“Modularity: %.4f%n”, modularity);
  67. }
  68. while ((j < nIterations) && update);
  69. if (modularity > maxModularity)
  70. {
  71. cluster = network.getClusters();
  72. nClusters = network.getNClusters();
  73. maxModularity = modularity;
  74. }
  75. if (nRandomStarts > 1)
  76. {
  77. if (nIterations == 1)
  78. System.out.format(“Modularity: %.4f%n”, modularity);
  79. System.out.println();
  80. }
  81. }
  82. endTime = System.currentTimeMillis();
  83. if (nRandomStarts == 1)
  84. {
  85. if (nIterations > 1)
  86. System.out.println();
  87. System.out.format(“Modularity: %.4f%n”, maxModularity);
  88. }
  89. else
  90. System.out.format(“Maximum modularity in %d random starts: %.4f%n”, nRandomStarts, maxModularity);
  91. System.out.format(“Number of communities: %d%n”, nClusters);
  92. System.out.format(“Elapsed time: %d seconds%n”, Math.round((endTime - beginTime) / 1000.0));
  93. System.out.println();
  94. System.out.println(“Writing output file…”);
  95. System.out.println();
  96. writeOutputFile(outputFileName, cluster);
  97. }
  98. private static Network readInputFile(String fileName, int modularityFunction) throws IOException
  99. {
  100. BufferedReader bufferedReader;
  101. double[] edgeWeight1, edgeWeight2, nodeWeight;
  102. int i, j, nEdges, nLines, nNodes;
  103. int[] firstNeighborIndex, neighbor, nNeighbors, node1, node2;
  104. Network network;
  105. String[] splittedLine;
  106. bufferedReader = new BufferedReader(new FileReader(fileName));
  107. nLines = 0;
  108. while (bufferedReader.readLine() != null)
  109. nLines++;
  110. bufferedReader.close();
  111. bufferedReader = new BufferedReader(new FileReader(fileName));
  112. node1 = new int[nLines];
  113. node2 = new int[nLines];
  114. edgeWeight1 = new double[nLines];
  115. i = -1;
  116. for (j = 0; j < nLines; j++)
  117. {
  118. splittedLine = bufferedReader.readLine().split(“\t”);
  119. node1[j] = Integer.parseInt(splittedLine[0]);
  120. if (node1[j] > i)
  121. i = node1[j];
  122. node2[j] = Integer.parseInt(splittedLine[1]);
  123. if (node2[j] > i)
  124. i = node2[j];
  125. edgeWeight1[j] = (splittedLine.length > 2) ? Double.parseDouble(splittedLine[2]) : 1;
  126. }
  127. nNodes = i + 1;
  128. bufferedReader.close();
  129. nNeighbors = new int[nNodes];
  130. for (i = 0; i < nLines; i++)
  131. if (node1[i] < node2[i])
  132. {
  133. nNeighbors[node1[i]]++;
  134. nNeighbors[node2[i]]++;
  135. }
  136. firstNeighborIndex = new int[nNodes + 1];
  137. nEdges = 0;
  138. for (i = 0; i < nNodes; i++)
  139. {
  140. firstNeighborIndex[i] = nEdges;
  141. nEdges += nNeighbors[i];
  142. }
  143. firstNeighborIndex[nNodes] = nEdges;
  144. neighbor = new int[nEdges];
  145. edgeWeight2 = new double[nEdges];
  146. Arrays.fill(nNeighbors, 0);
  147. for (i = 0; i < nLines; i++)
  148. if (node1[i] < node2[i])
  149. {
  150. j = firstNeighborIndex[node1[i]] + nNeighbors[node1[i]];
  151. neighbor[j] = node2[i];
  152. edgeWeight2[j] = edgeWeight1[i];
  153. nNeighbors[node1[i]]++;
  154. j = firstNeighborIndex[node2[i]] + nNeighbors[node2[i]];
  155. neighbor[j] = node1[i];
  156. edgeWeight2[j] = edgeWeight1[i];
  157. nNeighbors[node2[i]]++;
  158. }
  159. {
  160. nodeWeight = new double[nNodes];
  161. for (i = 0; i < nEdges; i++)
  162. nodeWeight[neighbor[i]] += edgeWeight2[i];
  163. network = new Network(nNodes, firstNeighborIndex, neighbor, edgeWeight2, nodeWeight);
  164. }
  165. return network;
  166. }
  167. private static void writeOutputFile(String fileName, int[] cluster) throws IOException
  168. {
  169. BufferedWriter bufferedWriter;
  170. int i;
  171. bufferedWriter = new BufferedWriter(new FileWriter(fileName));
  172. for (i = 0; i < cluster.length; i++)
  173. {
  174. bufferedWriter.write(Integer.toString(cluster[i]));
  175. bufferedWriter.newLine();
  176. }
  177. bufferedWriter.close();
  178. }
  179. }

Network.java

[java] view plain copy

  1. /**
  2. * Network
  3. *
  4. * @author LudoWaltman
  5. * @author Nees Janvan Eck
  6. * @version 1.2.0,05/14/14
  7. */
  8. import java.io.Serializable;
  9. import java.util.Random;
  10. public class Network implements Cloneable, Serializable
  11. {
  12. private staticfinal long serialVersionUID = 1;
  13. private intnNodes;
  14. private int[]firstNeighborIndex;
  15. private int[]neighbor;
  16. private double[]edgeWeight;
  17. private double[]nodeWeight;
  18. private intnClusters;
  19. private int[]cluster;
  20. private double[]clusterWeight;
  21. private int[]nNodesPerCluster;
  22. private int[][]nodePerCluster;
  23. private booleanclusteringStatsAvailable;
  24. publicNetwork(int nNodes, int[] firstNeighborIndex, int[] neighbor, double[]edgeWeight, double[] nodeWeight)
  25. {
  26. this(nNodes,firstNeighborIndex, neighbor, edgeWeight, nodeWeight, null);
  27. }
  28. publicNetwork(int nNodes, int[] firstNeighborIndex, int[] neighbor, double[]edgeWeight, double[] nodeWeight, int[] cluster)
  29. {
  30. int i,nEdges;
  31. this.nNodes= nNodes;
  32. this.firstNeighborIndex = firstNeighborIndex;
  33. this.neighbor = neighbor;
  34. if(edgeWeight == null)
  35. {
  36. nEdges =neighbor.length;
  37. this.edgeWeight = new double[nEdges];
  38. for (i =0; i < nEdges; i++)
  39. this.edgeWeight[i] = 1;
  40. }
  41. else
  42. this.edgeWeight = edgeWeight;
  43. if(nodeWeight == null)
  44. {
  45. this.nodeWeight = new double[nNodes];
  46. for (i =0; i < nNodes; i++)
  47. this.nodeWeight[i] = 1;
  48. }
  49. else
  50. this.nodeWeight = nodeWeight;
  51. }
  52. public intgetNNodes()
  53. {
  54. returnnNodes;
  55. }
  56. public intgetNEdges()
  57. {
  58. returnneighbor.length;
  59. }
  60. public doublegetTotalEdgeWeight()
  61. {
  62. doubletotalEdgeWeight;
  63. int i;
  64. totalEdgeWeight = 0;
  65. for (i = 0;i < neighbor.length; i++)
  66. totalEdgeWeight += edgeWeight[i];
  67. returntotalEdgeWeight;
  68. }
  69. public double[] getEdgeWeights()
  70. {
  71. returnedgeWeight;
  72. }
  73. public double[]getNodeWeights()
  74. {
  75. returnnodeWeight;
  76. }
  77. public intgetNClusters()
  78. {
  79. returnnClusters;
  80. }
  81. public int[]getClusters()
  82. {
  83. returncluster;
  84. }
  85. public voidinitSingletonClusters()
  86. {
  87. int i;
  88. nClusters =nNodes;
  89. cluster =new int[nNodes];
  90. for (i = 0;i < nNodes; i++)
  91. cluster[i] = i;
  92. deleteClusteringStats();
  93. }
  94. public voidmergeClusters(int[] newCluster)
  95. {
  96. int i, j, k;
  97. if (cluster== null)
  98. return;
  99. i = 0;
  100. for (j = 0;j < nNodes; j++)
  101. {
  102. k =newCluster[cluster[j]];
  103. if (k> i)
  104. i =k;
  105. cluster[j] = k;
  106. }
  107. nClusters =i + 1;
  108. deleteClusteringStats();
  109. }
  110. public NetworkgetReducedNetwork()
  111. {
  112. double[]reducedNetworkEdgeWeight1, reducedNetworkEdgeWeight2;
  113. int i, j, k,l, m, reducedNetworkNEdges1, reducedNetworkNEdges2;
  114. int[]reducedNetworkNeighbor1, reducedNetworkNeighbor2;
  115. NetworkreducedNetwork;
  116. if (cluster== null)
  117. returnnull;
  118. if(!clusteringStatsAvailable)
  119. calcClusteringStats();
  120. reducedNetwork = new Network();
  121. reducedNetwork.nNodes = nClusters;
  122. reducedNetwork.firstNeighborIndex = new int[nClusters + 1];
  123. reducedNetwork.nodeWeight = new double[nClusters];
  124. reducedNetworkNeighbor1 = new int[neighbor.length];
  125. reducedNetworkEdgeWeight1 = new double[edgeWeight.length];
  126. reducedNetworkNeighbor2 = new int[nClusters - 1];
  127. reducedNetworkEdgeWeight2 = new double[nClusters];
  128. reducedNetworkNEdges1 = 0;
  129. for (i = 0;i < nClusters; i++)
  130. {
  131. reducedNetworkNEdges2 = 0;
  132. for (j = 0; j <nodePerCluster[i].length; j++)
  133. {
  134. k =nodePerCluster[i][j]; //k是簇i中第j个节点的id
  135. for(l = firstNeighborIndex[k]; l < firstNeighborIndex[k + 1]; l++)
  136. {
  137. m = cluster[neighbor[l]]; //m是k的在l位置的邻居节点所属的簇id
  138. if (m != i)
  139. {
  140. if (reducedNetworkEdgeWeight2[m] == 0)
  141. {
  142. reducedNetworkNeighbor2[reducedNetworkNEdges2] = m;
  143. reducedNetworkNEdges2++;
  144. }
  145. reducedNetworkEdgeWeight2[m] += edgeWeight[l];
  146. }
  147. }
  148. reducedNetwork.nodeWeight[i] += nodeWeight[k];
  149. }
  150. for (j =0; j < reducedNetworkNEdges2; j++)
  151. {
  152. reducedNetworkNeighbor1[reducedNetworkNEdges1 + j] = reducedNetworkNeighbor2[j];
  153. reducedNetworkEdgeWeight1[reducedNetworkNEdges1 + j] =reducedNetworkEdgeWeight2[reducedNetworkNeighbor2[j]];
  154. reducedNetworkEdgeWeight2[reducedNetworkNeighbor2[j]] = 0;
  155. }
  156. reducedNetworkNEdges1 += reducedNetworkNEdges2;
  157. reducedNetwork.firstNeighborIndex[i + 1] = reducedNetworkNEdges1;
  158. }
  159. reducedNetwork.neighbor = new int[reducedNetworkNEdges1];
  160. reducedNetwork.edgeWeight = new double[reducedNetworkNEdges1];
  161. System.arraycopy(reducedNetworkNeighbor1, 0, reducedNetwork.neighbor, 0,reducedNetworkNEdges1);
  162. System.arraycopy(reducedNetworkEdgeWeight1, 0,reducedNetwork.edgeWeight, 0, reducedNetworkNEdges1);
  163. returnreducedNetwork;
  164. }
  165. public doublecalcQualityFunction(double resolution)
  166. {
  167. doublequalityFunction, totalEdgeWeight;
  168. int i, j, k;
  169. if (cluster== null)
  170. returnDouble.NaN;
  171. if(!clusteringStatsAvailable)
  172. calcClusteringStats();
  173. qualityFunction = 0;
  174. totalEdgeWeight = 0;
  175. for (i = 0;i < nNodes; i++)
  176. {
  177. j =cluster[i];
  178. for (k =firstNeighborIndex[i]; k < firstNeighborIndex[i + 1]; k++)
  179. {
  180. if(cluster[neighbor[k]] == j)
  181. qualityFunction += edgeWeight[k];
  182. totalEdgeWeight += edgeWeight[k];
  183. }
  184. }
  185. for (i = 0;i < nClusters; i++)
  186. qualityFunction -= clusterWeight[i] * clusterWeight[i] * resolution;
  187. qualityFunction /= totalEdgeWeight;
  188. returnqualityFunction;
  189. }
  190. public booleanrunLocalMovingAlgorithm(double resolution)
  191. {
  192. returnrunLocalMovingAlgorithm(resolution, new Random());
  193. }
  194. public booleanrunLocalMovingAlgorithm(double resolution, Random random)
  195. {
  196. booleanupdate;
  197. doublemaxQualityFunction, qualityFunction;
  198. double[]clusterWeight, edgeWeightPerCluster;
  199. intbestCluster, i, j, k, l, nNeighboringClusters, nStableNodes, nUnusedClusters;
  200. int[]neighboringCluster, newCluster, nNodesPerCluster, nodeOrder, unusedCluster;
  201. if ((cluster== null) || (nNodes == 1))
  202. returnfalse;
  203. update =false;
  204. clusterWeight = new double[nNodes];
  205. nNodesPerCluster = new int[nNodes];
  206. for (i = 0;i < nNodes; i++)
  207. {
  208. clusterWeight[cluster[i]] += nodeWeight[i];
  209. nNodesPerCluster[cluster[i]]++;
  210. }
  211. nUnusedClusters = 0;
  212. unusedCluster = new int[nNodes];
  213. for (i = 0;i < nNodes; i++)
  214. if(nNodesPerCluster[i] == 0)
  215. {
  216. unusedCluster[nUnusedClusters] = i;
  217. nUnusedClusters++;
  218. }
  219. nodeOrder =new int[nNodes];
  220. for (i = 0;i < nNodes; i++)
  221. nodeOrder[i] = i;
  222. for (i = 0;i < nNodes; i++)
  223. {
  224. j = random.nextInt(nNodes);
  225. k =nodeOrder[i];
  226. nodeOrder[i] = nodeOrder[j];
  227. nodeOrder[j] = k;
  228. }
  229. edgeWeightPerCluster = new double[nNodes];
  230. neighboringCluster = new int[nNodes - 1];
  231. nStableNodes = 0;
  232. i = 0;
  233. do
  234. {
  235. j =nodeOrder[i];
  236. nNeighboringClusters = 0;
  237. for (k =firstNeighborIndex[j]; k < firstNeighborIndex[j + 1]; k++)
  238. {
  239. l =cluster[neighbor[k]];
  240. if(edgeWeightPerCluster[l] == 0)
  241. {
  242. neighboringCluster[nNeighboringClusters] = l;
  243. nNeighboringClusters++;
  244. }
  245. edgeWeightPerCluster[l]+= edgeWeight[k];
  246. }
  247. clusterWeight[cluster[j]] -= nodeWeight[j];
  248. nNodesPerCluster[cluster[j]]—;
  249. if(nNodesPerCluster[cluster[j]] == 0)
  250. {
  251. unusedCluster[nUnusedClusters] = cluster[j];
  252. nUnusedClusters++;
  253. }
  254. bestCluster = -1;
  255. maxQualityFunction = 0;
  256. for (k =0; k < nNeighboringClusters; k++)
  257. {
  258. l =neighboringCluster[k];
  259. qualityFunction = edgeWeightPerCluster[l] - nodeWeight[j] *clusterWeight[l] * resolution;
  260. if((qualityFunction > maxQualityFunction) || ((qualityFunction ==maxQualityFunction) && (l < bestCluster)))
  261. {
  262. bestCluster = l;
  263. maxQualityFunction = qualityFunction;
  264. }
  265. edgeWeightPerCluster[l] = 0;
  266. }
  267. if(maxQualityFunction == 0)
  268. {
  269. bestCluster = unusedCluster[nUnusedClusters - 1];
  270. nUnusedClusters—;
  271. }
  272. clusterWeight[bestCluster] += nodeWeight[j];
  273. nNodesPerCluster[bestCluster]++;
  274. if(bestCluster == cluster[j])
  275. nStableNodes++;
  276. else
  277. {
  278. cluster[j] = bestCluster;
  279. nStableNodes = 1;
  280. update = true;
  281. }
  282. i = (i< nNodes - 1) ? (i + 1) : 0;
  283. }
  284. while(nStableNodes < nNodes); //优化步骤是直到所有的点都稳定下来才结束
  285. newCluster =new int[nNodes];
  286. nClusters =0;
  287. for (i = 0;i < nNodes; i++)
  288. if(nNodesPerCluster[i] > 0)
  289. {
  290. newCluster[i] = nClusters;
  291. nClusters++;
  292. }
  293. for (i = 0;i < nNodes; i++)
  294. cluster[i] = newCluster[cluster[i]];
  295. deleteClusteringStats();
  296. returnupdate;
  297. }
  298. public booleanrunLouvainAlgorithm(double resolution)
  299. {
  300. return runLouvainAlgorithm(resolution,new Random());
  301. }
  302. public booleanrunLouvainAlgorithm(double resolution, Random random)
  303. {
  304. booleanupdate, update2;
  305. NetworkreducedNetwork;
  306. if ((cluster== null) || (nNodes == 1))
  307. returnfalse;
  308. update =runLocalMovingAlgorithm(resolution, random);
  309. if(nClusters < nNodes)
  310. {
  311. reducedNetwork = getReducedNetwork();
  312. reducedNetwork.initSingletonClusters();
  313. update2= reducedNetwork.runLouvainAlgorithm(resolution, random);
  314. if(update2)
  315. {
  316. update = true;
  317. mergeClusters(reducedNetwork.getClusters());
  318. }
  319. }
  320. deleteClusteringStats();
  321. return update;
  322. }
  323. privateNetwork()
  324. {
  325. }
  326. private voidcalcClusteringStats()
  327. {
  328. int i, j;
  329. clusterWeight = new double[nClusters];
  330. nNodesPerCluster = new int[nClusters];
  331. nodePerCluster = new int[nClusters][];
  332. for (i = 0;i < nNodes; i++)
  333. {
  334. clusterWeight[cluster[i]] += nodeWeight[i];
  335. nNodesPerCluster[cluster[i]]++;
  336. }
  337. for (i = 0;i < nClusters; i++)
  338. {
  339. nodePerCluster[i] = new int[nNodesPerCluster[i]];
  340. nNodesPerCluster[i] = 0;
  341. }
  342. for (i = 0;i < nNodes; i++)
  343. {
  344. j =cluster[i];
  345. nodePerCluster[j][nNodesPerCluster[j]] = i;
  346. nNodesPerCluster[j]++;
  347. }
  348. clusteringStatsAvailable = true;
  349. }
  350. private voiddeleteClusteringStats()
  351. {
  352. clusterWeight = null;
  353. nNodesPerCluster = null;
  354. nodePerCluster = null;
  355. clusteringStatsAvailable = false;
  356. }
  357. }

算法的效果

对karate数据集

Center

分簇结果

Center 1

在facebook,4039个数据集上的结果

Center 2

下载的java 版本代码不好理解,尤其是 firstneighborindex 等数组表示什么意思等,于是我借鉴它,实现了一个新的版本,这里面的代码容易理解一些,并且给里面的变量、函数以及关键步骤都加了一些注释

而且在代码中增加了输出分簇之后的网络图的功能,输出一个gml文件,输入节点的节点和边信息都保留了,初次之外,加上了分簇的结果,即每一个节点属于哪一个簇,在node中加了一个type字段来表示,这样就可以在gephi中直接依据type字段来查看分割好的社团。

与下载的不一致的是,我们的输入文件第一行需要给出网络中节点的数量

以下是程序的代码

DataSet.java

[java] view plain copy

  1. packagecommunitydetection;
  2. importjava.io.*;
  3. importjava.util.*;
  4. public classDataSet {
  5. double weight[][];
  6. LinkedList neighborlist[];
  7. double totalEdgeWeights = 0;
  8. int nodecount;
  9. double nodeweight[];
  10. DataSet(String filename) throws IOException
  11. {
  12. BufferedReader reader = newBufferedReader(new FileReader(filename));
  13. String line = reader.readLine();
  14. nodecount = Integer.parseInt(line); //读文件,文件第一行为节点的数量
  15. weight = new double[nodecount][nodecount];
  16. neighborlist = new LinkedList[nodecount];
  17. for(int i=0 ;i < nodecount;i++)
  18. neighborlist[i] = newLinkedList();
  19. nodeweight = new double[nodecount];
  20. for(int i=0;i<nodecount;i++)
  21. nodeweight[i] = 0;
  22. while((line = reader.readLine())!=null)
  23. {
  24. String args[] =line.split(“\t”);
  25. int node1 =Integer.parseInt(args[0]);
  26. int node2 = Integer.parseInt(args[1]);
  27. if(node1 != node2)
  28. {
  29. neighborlist[node1].add(node2);
  30. neighborlist[node2].add(node1);
  31. }
  32. if(args.length > 2)
  33. {
  34. double we =Double.parseDouble(args[2]);
  35. weight[node1][node2] = we;
  36. weight[node2][node1] = we;
  37. totalEdgeWeights += we;
  38. nodeweight[node1] += we;
  39. if(node2 != node1)
  40. nodeweight[node2] += we;
  41. }
  42. else
  43. {
  44. weight[node1][node2] = 1;
  45. weight[node2][node1] = 1;
  46. totalEdgeWeights += 1;
  47. nodeweight[node1] += 1;
  48. if(node2 != node1)
  49. nodeweight[node2] += 1;
  50. }
  51. }
  52. reader.close();
  53. }
  54. }

NetWork.java

[java] view plain copy

  1. package communitydetection;
  2. import java.util.*;
  3. import java.io.*;
  4. public class NetWork {
  5. double weight[][]; //图中两节点的连接边的权重
  6. LinkedListneighborlist[]; //每个节点的邻居节点有哪些
  7. doublenodeweight[]; //每个节点的权值
  8. intnodecount; //图中节点的总数量
  9. intcluster[]; //记录每个节点属于哪一个簇
  10. intclustercount; //簇的总数量
  11. doubleclusterweight[]; //每一个簇的权重
  12. booleanclusteringStatsAvailable ; //当前的网络信息是否完全
  13. intnodecountsPercluster[]; //每个簇有多少个节点
  14. intnodePercluster[][]; //nodePercluster[i][j]表示簇i中第j个节点的id
  15. NetWork(Stringfilename) throws IOException
  16. {
  17. DataSetds = new DataSet(filename); //ds是用来读取输入文件内容的
  18. weight= ds.weight;
  19. neighborlist= ds.neighborlist;
  20. nodecount= ds.nodecount;
  21. nodeweight= ds.nodeweight;
  22. initSingletonClusters();
  23. }
  24. NetWork()
  25. {
  26. }
  27. publicdouble getTotalEdgeWeight() //获取网络图中所有的边的权值之和, 返回的结果实际上是2倍
  28. { //因为每一条边被计算了2次
  29. doubletotalEdgeWeight;
  30. int i;
  31. totalEdgeWeight = 0;
  32. for (i= 0; i < nodecount; i++)
  33. {
  34. for(intj=0;j<neighborlist[i].size();j++)
  35. {
  36. int neighborid =(Integer)neighborlist[i].get(j);
  37. totalEdgeWeight +=weight[i][neighborid];
  38. }
  39. }
  40. returntotalEdgeWeight;
  41. }
  42. publicvoid initSingletonClusters() //给每个节点指派一个簇
  43. {
  44. int i;
  45. clustercount = nodecount;
  46. cluster = new int[nodecount];
  47. for (i= 0; i < nodecount; i++)
  48. cluster[i] = i;
  49. deleteClusteringStats();
  50. }
  51. privatevoid calcClusteringStats() //统计好每个簇有多少个节点,以及每个簇中的节点都有哪些
  52. {
  53. int i,j;
  54. clusterweight = new double[clustercount];
  55. nodecountsPercluster = new int[clustercount];
  56. nodePercluster = new int[clustercount][];
  57. for (i= 0; i < nodecount; i++)
  58. {
  59. //计算每一个簇的权值,簇的权值是其中的节点的权值之和
  60. clusterweight[cluster[i]] += nodeweight[i];
  61. nodecountsPercluster[cluster[i]]++;
  62. }
  63. for (i= 0; i < clustercount; i++)
  64. {
  65. nodePercluster[i] = new int[nodecountsPercluster[i]];
  66. nodecountsPercluster[i] = 0;
  67. }
  68. for (i= 0; i < nodecount; i++)
  69. {
  70. j= cluster[i]; //j是簇编号, 记录每一个簇中的第几个节点的id
  71. nodePercluster[j][nodecountsPercluster[j]] = i;
  72. nodecountsPercluster[j]++;
  73. }
  74. clusteringStatsAvailable = true;
  75. }
  76. privatevoid deleteClusteringStats()
  77. {
  78. clusterweight = null;
  79. nodecountsPercluster = null;
  80. nodePercluster = null;
  81. clusteringStatsAvailable = false;
  82. }
  83. publicint[] getClusters()
  84. {
  85. returncluster;
  86. }
  87. publicdouble calcQualityFunction(double resolution) //计算模块度的值, 如果所有边的权重之和为m
  88. { //那么resolution就是1/(2m)
  89. doublequalityFunction, totalEdgeWeight;
  90. int i,j, k;
  91. if(cluster == null)
  92. return Double.NaN;
  93. if(!clusteringStatsAvailable)
  94. calcClusteringStats();
  95. qualityFunction = 0;
  96. totalEdgeWeight = 0;
  97. for (i= 0; i < nodecount; i++)
  98. {
  99. j= cluster[i];
  100. for( k=0;k<neighborlist[i].size();k++)
  101. {
  102. int neighborid =(Integer)neighborlist[i].get(k);
  103. if(cluster[neighborid] == j)
  104. qualityFunction += weight[i][neighborid];
  105. totalEdgeWeight += weight[i][neighborid];
  106. } //最终的totalEdgeWeight也是
  107. //图中所有边权重之和的2倍
  108. }
  109. for (i= 0; i < clustercount; i++)
  110. qualityFunction -= clusterweight[i] * clusterweight[i] * resolution;
  111. qualityFunction /= totalEdgeWeight;
  112. returnqualityFunction;
  113. }
  114. public intgetNClusters()
  115. {
  116. returnclustercount;
  117. }
  118. public voidmergeClusters(int[] newCluster) //newcluster是reduced之后的reducednetwork中的所属簇信息
  119. {
  120. int i,j, k;
  121. if(cluster == null)
  122. return;
  123. i = 0;
  124. for (j= 0; j < nodecount; j++)
  125. {
  126. k= newCluster[cluster[j]]; //reducednetwork中的newcluster的一个下标相当于
  127. if(k > i) //reduce之前的网络中的每一个簇
  128. i = k;
  129. cluster[j] = k;
  130. }
  131. clustercount = i + 1;
  132. deleteClusteringStats();
  133. }
  134. publicNetWork getReducedNetwork() //将整个网络进行reduce操作
  135. {
  136. double[] reducedNetworkEdgeWeight2;
  137. int i,j, k, l, m,reducedNetworkNEdges2;
  138. int[] reducedNetworkNeighbor2;
  139. NetWork reducedNetwork;
  140. if (cluster== null)
  141. return null;
  142. if(!clusteringStatsAvailable)
  143. calcClusteringStats();
  144. reducedNetwork = new NetWork();
  145. reducedNetwork.nodecount = clustercount; //reduce之后,原来的一个簇就是现在的一个节点
  146. reducedNetwork.neighborlist = new LinkedList[clustercount];
  147. for(i=0;i<clustercount;i++)
  148. reducedNetwork.neighborlist[i] = newLinkedList();
  149. reducedNetwork.nodeweight = new double[clustercount];
  150. reducedNetwork.weight = new double[clustercount][clustercount];
  151. reducedNetworkNeighbor2 = new int[clustercount -1];
  152. reducedNetworkEdgeWeight2 = new double[clustercount];
  153. for (i= 0; i < clustercount; i++)
  154. {
  155. reducedNetworkNEdges2 = 0;
  156. for (j = 0; j < nodePercluster[i].length; j++)
  157. {
  158. k = nodePercluster[i][j]; //k是原来的簇i中第j个节点的id
  159. for( l=0;l<neighborlist[k].size();l++)
  160. {
  161. int nodeid =(Integer)neighborlist[k].get(l);
  162. m = cluster[nodeid];
  163. if( m != i) //reduce之前簇i与簇m相连,应为它们之间有边存在,edge(k,nodeid)
  164. {
  165. if(reducedNetworkEdgeWeight2[m]== 0)
  166. {
  167. //以前的簇邻居变成了现在的节点邻居
  168. reducedNetworkNeighbor2[reducedNetworkNEdges2]= m;
  169. reducedNetworkNEdges2++;
  170. //reducedNetworkEdges记录在新的图中新的节点i(原来的簇i)有多少个邻居
  171. }
  172. reducedNetworkEdgeWeight2[m]+= weight[k][nodeid];
  173. //新的节点i与新的邻居节点m的之间的边权重的更新
  174. }
  175. }
  176. reducedNetwork.nodeweight[i] += nodeweight[k];
  177. //现在的节点i是以前的簇i, 以前的节点k在以前的簇i中,所以现在的节点i的权重包含以前节点k的权重
  178. }
  179. for (j = 0; j < reducedNetworkNEdges2; j++)
  180. {
  181. reducedNetwork.neighborlist[i].add(reducedNetworkNeighbor2[j]);
  182. reducedNetwork.weight[i][reducedNetworkNeighbor2[j]] =reducedNetworkEdgeWeight2[reducedNetworkNeighbor2[j]];
  183. reducedNetworkEdgeWeight2[reducedNetworkNeighbor2[j]] = 0;
  184. // =0是为了数组复用,不然每次都要开辟新的数组存放与新的邻居节点之间的边的权值
  185. }
  186. }
  187. returnreducedNetwork;
  188. }
  189. publicboolean runLocalMovingAlgorithm(double resolution)
  190. {
  191. returnrunLocalMovingAlgorithm(resolution, new Random());
  192. }
  193. //将每一个节点移入到’最好’的簇当中去
  194. publicboolean runLocalMovingAlgorithm(double resolution, Random random)
  195. {
  196. boolean update;
  197. doublemaxQualityFunction, qualityFunction;
  198. double[] clusterWeight, edgeWeightPerCluster;
  199. intbestCluster, i, j, k, l, nNeighboringClusters, nStableNodes, nUnusedClusters;
  200. int[]neighboringCluster, newCluster, nNodesPerCluster, nodeOrder, unusedCluster;
  201. if((cluster == null) || (nodecount == 1))
  202. return false;
  203. update= false;
  204. clusterWeight = new double[nodecount];
  205. nNodesPerCluster = new int[nodecount]; //记录每一个簇中有多少个节点
  206. for (i = 0; i < nodecount; i++)
  207. {
  208. clusterWeight[cluster[i]] += nodeweight[i];
  209. nNodesPerCluster[cluster[i]]++;
  210. }
  211. nUnusedClusters = 0;
  212. unusedCluster = new int[nodecount]; //统计在整个过程当中,哪些簇一个节点也没有
  213. for (i= 0; i <nodecount; i++) //这些簇在之后会被消除
  214. if(nNodesPerCluster[i] == 0)
  215. {
  216. unusedCluster[nUnusedClusters] = i;
  217. nUnusedClusters++;
  218. }
  219. nodeOrder = new int[nodecount];
  220. for (i= 0; i < nodecount; i++)
  221. nodeOrder[i] = i;
  222. for (i= 0; i < nodecount; i++) //nodeOrder将节点顺序打乱,防止由于顺序问题得不到最好的结果
  223. {
  224. j= random.nextInt(nodecount);
  225. k= nodeOrder[i];
  226. nodeOrder[i] = nodeOrder[j];
  227. nodeOrder[j] = k;
  228. }
  229. edgeWeightPerCluster = new double[nodecount];
  230. neighboringCluster = new int[nodecount -1];
  231. nStableNodes = 0;
  232. i = 0;
  233. do
  234. {
  235. j= nodeOrder[i]; //j是某一个节点的编号
  236. nNeighboringClusters = 0;
  237. for(k = 0; k<neighborlist[j].size();k++)
  238. {
  239. int nodeid =(Integer)neighborlist[j].get(k); //nodeid是j的一个邻居节点的编号
  240. l = cluster[nodeid]; //l是nodeid所属的簇编号
  241. if(edgeWeightPerCluster[l] == 0) //统计与节点j相连的簇有哪些
  242. {
  243. neighboringCluster[nNeighboringClusters] = l;
  244. nNeighboringClusters++;
  245. }
  246. edgeWeightPerCluster[l] +=weight[j][nodeid];
  247. //edgeWeightperCluster[l]记录的是如果将节点j加入到簇l当中,簇l内部的边权值的增量大小
  248. }
  249. //节点j之前所属的簇做相应的变更
  250. clusterWeight[cluster[j]] -= nodeweight[j];
  251. nNodesPerCluster[cluster[j]]—;
  252. if(nNodesPerCluster[cluster[j]] == 0)
  253. {
  254. unusedCluster[nUnusedClusters] = cluster[j];
  255. nUnusedClusters++;
  256. }
  257. bestCluster = -1; //最好的加入的簇下标
  258. maxQualityFunction = 0;
  259. for (k = 0; k < nNeighboringClusters; k++)
  260. {
  261. l = neighboringCluster[k];
  262. qualityFunction = edgeWeightPerCluster[l] - nodeweight[j] *clusterWeight[l] * resolution;
  263. if ((qualityFunction > maxQualityFunction) || ((qualityFunction ==maxQualityFunction) && (l < bestCluster)))
  264. {
  265. bestCluster = l;
  266. maxQualityFunction = qualityFunction;
  267. }
  268. edgeWeightPerCluster[l] = 0;
  269. // =0是为了数组复用,
  270. }
  271. if(maxQualityFunction == 0) //无论到哪一簇都不会有提升
  272. {
  273. bestCluster = unusedCluster[nUnusedClusters - 1];
  274. nUnusedClusters—;
  275. }
  276. clusterWeight[bestCluster] += nodeweight[j];
  277. nNodesPerCluster[bestCluster]++; //最佳簇的节点数量+1
  278. if(bestCluster == cluster[j])
  279. nStableNodes++; //还在原来的簇当中,表示该节点是稳定的,稳定节点的数量+1
  280. else
  281. {
  282. cluster[j] = bestCluster;
  283. nStableNodes = 1;
  284. update = true; //能移动到新的簇当中去,然后需要重新考虑每个节点是否稳定
  285. }
  286. i= (i < nodecount - 1) ? (i + 1) : 0;
  287. }
  288. while(nStableNodes < nodecount); //优化步骤是直到所有的点都稳定下来才结束
  289. newCluster = new int[nodecount];
  290. clustercount = 0;
  291. for (i= 0; i < nodecount; i++)
  292. if(nNodesPerCluster[i] > 0)
  293. { //统计以前的簇编号还有多少能用,然后从0开始重新对它们编号
  294. newCluster[i] = clustercount;
  295. clustercount++;
  296. }
  297. for (i= 0; i < nodecount; i++)
  298. cluster[i] = newCluster[cluster[i]];
  299. deleteClusteringStats();
  300. returnupdate;
  301. }
  302. publicboolean runLouvainAlgorithm(double resolution)
  303. {
  304. returnrunLouvainAlgorithm(resolution, new Random());
  305. }
  306. publicboolean runLouvainAlgorithm(double resolution, Random random)
  307. {
  308. boolean update, update2;
  309. NetWork reducedNetwork;
  310. if((cluster == null) || (nodecount == 1))
  311. return false;
  312. update= runLocalMovingAlgorithm(resolution, random);
  313. //update表示是否有节点变动,即是否移动到了新的簇当中去
  314. if(clustercount < nodecount) //簇的数量小于节点的数量,说明可以进行reduce操作,ruduce不会改变
  315. { //modularity的值
  316. reducedNetwork = getReducedNetwork();
  317. reducedNetwork.initSingletonClusters();
  318. update2 = reducedNetwork.runLouvainAlgorithm(resolution, random);
  319. //update2表示在reduce之后的网络中是否有节点移动到新的簇当中去
  320. if(update2)
  321. {
  322. update = true;
  323. mergeClusters(reducedNetwork.getClusters());
  324. //有变动的话,至少是簇的顺序变掉了,或者是簇的数量减少了
  325. }
  326. }
  327. deleteClusteringStats();
  328. returnupdate;
  329. }
  330. publicvoid generategml() throws IOException
  331. {
  332. BufferedWriter writer = newBufferedWriter(new FileWriter(“generated.gml”));
  333. writer.append(“graph\n”);
  334. writer.append(“[\n”);
  335. for(int i=0;i<nodecount;i++)
  336. {
  337. writer.append(“ node\n”);
  338. writer.append(“ [\n”);
  339. writer.append(“ id “+i+”\n”);
  340. writer.append(“ type “+cluster[i]+”\n”);
  341. writer.append(“ ]\n”);
  342. }
  343. for(int i=0;i<nodecount;i++)
  344. for(int j=i+1;j<nodecount;j++)
  345. {
  346. if(weight[i][j] != 0)
  347. {
  348. writer.append(“ edge\n”);
  349. writer.append(“ [\n”);
  350. writer.append(“ source “+i+”\n”);
  351. writer.append(“ target “+j+”\n”);
  352. writer.append(“ ]\n”);
  353. }
  354. }
  355. writer.append(“]\n”);
  356. writer.close();
  357. }
  358. }

Main.java

[java] view plain copy

  1. package communitydetection;
  2. import java.io.*;
  3. import java.util.*;
  4. public class Main {
  5. staticvoid detect() throws IOException
  6. {
  7. NetWork network;
  8. Stringfilename = “karate_club_network.txt”;
  9. doublemodularity, resolution, maxModularity;
  10. doublebeginTime, endTime;
  11. int[]cluster;
  12. intnRandomStarts = 5;
  13. intnIterations = 3;
  14. network= new NetWork(filename);
  15. resolution= 1.0/network.getTotalEdgeWeight();
  16. beginTime= System.currentTimeMillis();
  17. cluster = null;
  18. int nClusters= -1;
  19. inti,j;
  20. maxModularity = Double.NEGATIVE_INFINITY;
  21. Randomrandom = new Random(100);
  22. for (i= 0; i < nRandomStarts; i++)
  23. {
  24. if( (nRandomStarts > 1))
  25. System.out.format(“Random start: %d%n”, i + 1);
  26. network.initSingletonClusters(); //网络初始化,每个节点一个簇
  27. j= 0;
  28. boolean update = true; //update表示网络是否有节点移动
  29. do
  30. {
  31. if ( (nIterations > 1))
  32. System.out.format(“Iteration: %d%n”, j + 1);
  33. update = network.runLouvainAlgorithm(resolution, random);
  34. j++;
  35. modularity = network.calcQualityFunction(resolution);
  36. if ((nIterations > 1))
  37. System.out.format(“Modularity: %.4f%n”, modularity);
  38. }
  39. while ((j < nIterations) && update);
  40. if(modularity > maxModularity)
  41. {
  42. cluster = network.getClusters();
  43. nClusters = network.getNClusters();
  44. maxModularity = modularity;
  45. }
  46. if((nRandomStarts > 1))
  47. {
  48. if (nIterations == 1)
  49. System.out.format(“Modularity: %.4f%n”, modularity);
  50. System.out.println();
  51. }
  52. }
  53. endTime = System.currentTimeMillis();
  54. network.generategml();
  55. if(nRandomStarts == 1)
  56. {
  57. if(nIterations > 1)
  58. System.out.println();
  59. System.out.format(“Modularity: %.4f%n”, maxModularity);
  60. }
  61. else
  62. System.out.format(“Maximum modularity in %d random starts:%.4f%n”, nRandomStarts, maxModularity);
  63. System.out.format(“Number of communities: %d%n”, nClusters);
  64. System.out.format(“Elapsed time: %f seconds%n”, (endTime -beginTime) / 1000.0);
  65. System.out.println();
  66. System.out.println(“Writingoutput file…”);
  67. System.out.println();
  68. // writeOutputFile(“communities.txt”, cluster);
  69. }
  70. //将每一个节点所属的簇的下标写到文件当中去
  71. privatestatic void writeOutputFile(String fileName, int[] cluster) throws IOException
  72. {
  73. BufferedWriter bufferedWriter;
  74. int i;
  75. bufferedWriter = new BufferedWriter(new FileWriter(fileName));
  76. for (i= 0; i < cluster.length; i++)
  77. {
  78. bufferedWriter.write(Integer.toString(cluster[i]));
  79. bufferedWriter.newLine();
  80. }
  81. bufferedWriter.close();
  82. }
  83. publicstatic void main(String args[]) throws IOException
  84. {
  85. detect();
  86. }
  87. }

算法运行结果

Karate结果

Center 3

分簇结果

Center 4

生成的gml文件

Center 5

导入到Gephi中,按照节点的type分割的结果

Center 6

Center 7

Facebook上4039个数据的结果

Center 8

将生成的gml文件导入到Gephi中,并依据节点的type分割的结果

Center 9

Center 10

发表评论

表情:
评论列表 (有 0 条评论,274人围观)

还没有评论,来说两句吧...

相关阅读