SNAP Library 2.1, Developer Reference  2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
cliques.cpp
Go to the documentation of this file.
00001 #include "stdafx.h"
00002 #include "cliques.h"
00003 
00005 // TCommunity implementation
00006 void TCliqueOverlap::GetRelativeComplement(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& Complement) {
00007   for (THashSet<TInt>::TIter it=A.BegI(); it<A.EndI(); it++) {
00008                 const int nId = it.GetKey();
00009                 if (!B.IsKey(nId)) Complement.AddKey(nId);
00010         }
00011 }
00012 
00013 void TCliqueOverlap::GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C) {
00014         if (A.Len() < B.Len()) {
00015                 for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++) 
00016                         if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
00017         } else {
00018                 for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++) 
00019                         if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
00020         }
00021 }
00022 
00023 int TCliqueOverlap::Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B) {
00024         int n = 0;
00025         if (A.Len() < B.Len()) {
00026                 for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++) 
00027                         if (B.IsKey(it.GetKey())) n++;
00028         } else {
00029                 for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++) 
00030                         if (A.IsKey(it.GetKey())) n++;
00031         }
00032         return n;
00033 }
00034 
00035 void TCliqueOverlap::GetNbrs(int NId, THashSet<TInt>& Nbrs) const{
00036         TUNGraph::TNodeI node = m_G->GetNI(NId);
00037         int deg = node.GetDeg();
00038         for (int i=0; i<deg; i++) Nbrs.AddKey(node.GetNbrNId(i));
00039 }
00040 
00041 int TCliqueOverlap::GetNodeIdWithMaxDeg(const THashSet<TInt>& Set) const{
00042         int id = -1;
00043         int maxDeg = -1;
00044         //
00045         for (THashSetKeyI<TInt> it=Set.BegI(); it<Set.EndI(); it++) {
00046                 int nId = it.GetKey();
00047                 int deg = m_G->GetNI(nId).GetDeg();
00048                 if (maxDeg < deg) { maxDeg=deg; id=nId; }
00049         }
00050         return id;
00051 }
00052 
00053 int TCliqueOverlap::MaxNbrsInCANDNodeId(const THashSet<TInt>& SUBG, const THashSet<TInt>& CAND) const{
00054         int id = -1;
00055         int maxIntersection = -1;
00056         //
00057         for (THashSetKeyI<TInt> it=SUBG.BegI(); it<SUBG.EndI(); it++) {
00058                 int nId = it.GetKey();
00059                 TUNGraph::TNodeI nIt = m_G->GetNI(nId);
00060                 int deg = nIt.GetDeg();
00061                 //
00062                 int curIntersection = 0;
00063                 for (int i=0; i<deg; i++) {
00064                         int nbrId = nIt.GetNbrNId(i);
00065                         if (CAND.IsKey(nbrId)) curIntersection++;
00066                 }
00067                 //
00068                 if (maxIntersection < curIntersection) { maxIntersection=curIntersection; id=nId; }
00069         }
00070         return id;
00071 }
00072 
00073 void TCliqueOverlap::Expand(const THashSet<TInt>& SUBG, THashSet<TInt>& CAND) {
00074         if (SUBG.Len()==0) { if (m_Q.Len() >= m_minMaxCliqueSize) { m_Q.Pack(); m_maxCliques->Add(m_Q); } return; }
00075         if (CAND.Len()==0) return;
00076         //Get u that maximaze CAND intersection with neighbours of vertex u
00077         int u = MaxNbrsInCANDNodeId(SUBG, CAND);
00078         //Get neighbours of node u
00079         THashSet<TInt> nbrsU;
00080         GetNbrs(u, nbrsU);
00081         //Get relative complement of nbrsU in CAND
00082         THashSet<TInt> EXT;
00083         GetRelativeComplement(CAND, nbrsU, EXT);
00084         while(EXT.Len() != 0) {
00085                 int q = GetNodeIdWithMaxDeg(EXT);
00086                 //
00087                 m_Q.Add(q);
00088                 //
00089                 THashSet<TInt> nbrsQ;
00090                 GetNbrs(q, nbrsQ);
00091                 //
00092                 THashSet<TInt> SUBGq;
00093                 GetIntersection(SUBG, nbrsQ, SUBGq);
00094                 //
00095                 THashSet<TInt> CANDq;
00096                 GetIntersection(CAND, nbrsQ, CANDq);
00097                 //
00098                 Expand(SUBGq, CANDq);
00099                 //
00100                 CAND.DelKey(q);
00101                 m_Q.DelLast();
00102                 //
00103                 EXT.Clr();
00104                 GetRelativeComplement(CAND, nbrsU, EXT);
00105         }
00106 }
00107 
00108 void TCliqueOverlap::GetMaximalCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
00109         if (G->GetNodes() == 0) return;
00110         //
00111         m_G = G;
00112         m_minMaxCliqueSize = MinMaxCliqueSize;
00113         m_maxCliques =& MaxCliques;
00114         m_Q.Clr();
00115         //
00116         THashSet<TInt> SUBG;
00117         THashSet<TInt> CAND;
00118         for (TUNGraph::TNodeI NI=m_G->BegNI(); NI<m_G->EndNI(); NI++) {
00119                 TInt nId = NI.GetId();
00120                 SUBG.AddKey(nId);
00121                 CAND.AddKey(nId);
00122         }
00123         //
00124         Expand(SUBG, CAND);
00125 }
00126 
00127 void TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap, TVec<TIntV>& OverlapMtx) {
00128         OverlapMtx.Clr();
00129         //
00130         int n = MaxCliques.Len();
00131         //Convert max cliques to HashSets
00132                 TVec<THashSet<TInt> > cliques;
00133         for (int i=0; i<n; i++) {
00134                 const int len = MaxCliques[i].Len();
00135     cliques.Add();
00136     if (len < MinNodeOverlap) { continue; }
00137                 THashSet<TInt>& set = cliques.Last();  set.Gen(len);
00138     for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); }
00139         }
00140         //Init clique clique overlap matrix
00141         n = cliques.Len();
00142         OverlapMtx.Gen(n);
00143         for (int i=0; i<n; i++) OverlapMtx[i].Gen(n);
00144         //Calculate clique clique overlap matrix
00145   for (int i=0; i<n; i++) {
00146     OverlapMtx[i][i] = cliques[i].Len();
00147     for (int j=i+1; j<n; j++) {
00148       OverlapMtx[i][j] = Intersection(cliques[i], cliques[j]); }
00149   }
00150 }
00151 
00152 PUNGraph TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap) {
00153         const int n = MaxCliques.Len();
00154   //Convert max cliques to HashSets
00155         TVec<THashSet<TInt> > cliques;
00156         for (int i=0; i<n; i++) {
00157                 const int len = MaxCliques[i].Len();
00158     cliques.Add();
00159     if (len < MinNodeOverlap) { continue; }
00160                 THashSet<TInt>& set = cliques.Last();  set.Gen(len);
00161     for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); }
00162         }
00163         //Init clique clique overlap matrix
00164         PUNGraph OverlapMtx = TUNGraph::New();
00165   for (int i=0; i < n; i++) {
00166     OverlapMtx->AddNode(i); }
00167         //Calculate clique clique overlap matrix
00168   for (int i=0; i<n; i++) {
00169     for (int j=i+1; j<n; j++) {
00170       if (Intersection(cliques[i], cliques[j]) >= MinNodeOverlap) {
00171         OverlapMtx->AddEdge(i,j); }
00172     }
00173   }
00174   return OverlapMtx;
00175 }
00176 
00177 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, int MinNodeOverlap, TVec<TIntV>& CliqueIdVV) {
00178         int n = OverlapMtx.Len();
00179         for (int i=0; i<n; i++) {
00180                 bool isCommunity = false;
00181                 for (int j=i+1; j<n; j++) {
00182                         if (OverlapMtx[i][j] >= MinNodeOverlap) {
00183                                 if (! isCommunity) {
00184                                         TIntV v; v.Add(i);
00185                                         CliqueIdVV.Add(v);
00186                                         isCommunity=true;
00187                                 }
00188                                 CliqueIdVV.Last().Add(j);
00189                         }
00190                 }
00191         }
00192 }
00193 
00194 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, const TVec<TIntV>& MaxCliques, double MinOverlapFrac, TVec<TIntV>& CliqueIdVV) {
00195         int n = OverlapMtx.Len();
00196         for(int i=0; i<n; i++){
00197                 bool isCommunity = false;
00198                 int size1 = MaxCliques[i].Len();
00199                 for(int j=i+1; j<n; j++){
00200                         int size2 = MaxCliques[j].Len();
00201                         double ratio = OverlapMtx[i][j];
00202                         if(size1 < size2) ratio /= size1;
00203                         else ratio /= size2;
00204                         if(ratio >= MinOverlapFrac){
00205                                 if(!isCommunity){
00206                                         TIntV v; v.Add(i);
00207                                         CliqueIdVV.Add(v);
00208                                         isCommunity=true;
00209                                 }
00210                                 CliqueIdVV.Last().Add(j);
00211                         }
00212                 }
00213         }
00214 }
00215 
00217 void TCliqueOverlap::GetMaxCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
00218   TCliqueOverlap CO;
00219   MaxCliques.Clr(false);
00220   CO.GetMaximalCliques(G, MinMaxCliqueSize, MaxCliques);
00221 }
00222 
00224 void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) {
00225   printf("Clique Percolation Method\n");
00226   TExeTm ExeTm;
00227   TVec<TIntV> MaxCliques;
00228   TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques);
00229   // op RS 2012/05/15, commented out next line, a parameter is missing,
00230   //   creating a warning on OS X
00231   // printf("...%d cliques found\n");
00232   // get clique overlap matrix (graph)
00233   PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1);
00234   printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges());
00235   // connected components are communities
00236   TCnComV CnComV;
00237   TSnap::GetWccs(OverlapGraph, CnComV);
00238   NIdCmtyVV.Clr(false);
00239   TIntSet CmtySet;
00240   for (int c = 0; c < CnComV.Len(); c++) {
00241     CmtySet.Clr(false);
00242     for (int i = 0; i <CnComV[c].Len(); i++) {
00243       const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]];
00244       CmtySet.AddKeyV(CliqueNIdV);
00245     }
00246     NIdCmtyVV.Add();
00247     CmtySet.GetKeyV(NIdCmtyVV.Last());
00248     NIdCmtyVV.Last().Sort();
00249   }
00250   printf("done [%s].\n", ExeTm.GetStr());
00251 }