SNAP Library 2.1, Developer Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "stdafx.h" 00002 #include "cliques.h" 00003 00005 // TCommunity implementation 00006 void TCliqueOverlap::GetRelativeComplement(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& Complement) { 00007 for (THashSet<TInt>::TIter it=A.BegI(); it<A.EndI(); it++) { 00008 const int nId = it.GetKey(); 00009 if (!B.IsKey(nId)) Complement.AddKey(nId); 00010 } 00011 } 00012 00013 void TCliqueOverlap::GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C) { 00014 if (A.Len() < B.Len()) { 00015 for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++) 00016 if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey()); 00017 } else { 00018 for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++) 00019 if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey()); 00020 } 00021 } 00022 00023 int TCliqueOverlap::Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B) { 00024 int n = 0; 00025 if (A.Len() < B.Len()) { 00026 for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++) 00027 if (B.IsKey(it.GetKey())) n++; 00028 } else { 00029 for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++) 00030 if (A.IsKey(it.GetKey())) n++; 00031 } 00032 return n; 00033 } 00034 00035 void TCliqueOverlap::GetNbrs(int NId, THashSet<TInt>& Nbrs) const{ 00036 TUNGraph::TNodeI node = m_G->GetNI(NId); 00037 int deg = node.GetDeg(); 00038 for (int i=0; i<deg; i++) Nbrs.AddKey(node.GetNbrNId(i)); 00039 } 00040 00041 int TCliqueOverlap::GetNodeIdWithMaxDeg(const THashSet<TInt>& Set) const{ 00042 int id = -1; 00043 int maxDeg = -1; 00044 // 00045 for (THashSetKeyI<TInt> it=Set.BegI(); it<Set.EndI(); it++) { 00046 int nId = it.GetKey(); 00047 int deg = m_G->GetNI(nId).GetDeg(); 00048 if (maxDeg < deg) { maxDeg=deg; id=nId; } 00049 } 00050 return id; 00051 } 00052 00053 int TCliqueOverlap::MaxNbrsInCANDNodeId(const THashSet<TInt>& SUBG, const THashSet<TInt>& CAND) const{ 00054 int id = -1; 00055 int maxIntersection = -1; 00056 // 00057 for (THashSetKeyI<TInt> it=SUBG.BegI(); it<SUBG.EndI(); it++) { 00058 int nId = it.GetKey(); 00059 TUNGraph::TNodeI nIt = m_G->GetNI(nId); 00060 int deg = nIt.GetDeg(); 00061 // 00062 int curIntersection = 0; 00063 for (int i=0; i<deg; i++) { 00064 int nbrId = nIt.GetNbrNId(i); 00065 if (CAND.IsKey(nbrId)) curIntersection++; 00066 } 00067 // 00068 if (maxIntersection < curIntersection) { maxIntersection=curIntersection; id=nId; } 00069 } 00070 return id; 00071 } 00072 00073 void TCliqueOverlap::Expand(const THashSet<TInt>& SUBG, THashSet<TInt>& CAND) { 00074 if (SUBG.Len()==0) { if (m_Q.Len() >= m_minMaxCliqueSize) { m_Q.Pack(); m_maxCliques->Add(m_Q); } return; } 00075 if (CAND.Len()==0) return; 00076 //Get u that maximaze CAND intersection with neighbours of vertex u 00077 int u = MaxNbrsInCANDNodeId(SUBG, CAND); 00078 //Get neighbours of node u 00079 THashSet<TInt> nbrsU; 00080 GetNbrs(u, nbrsU); 00081 //Get relative complement of nbrsU in CAND 00082 THashSet<TInt> EXT; 00083 GetRelativeComplement(CAND, nbrsU, EXT); 00084 while(EXT.Len() != 0) { 00085 int q = GetNodeIdWithMaxDeg(EXT); 00086 // 00087 m_Q.Add(q); 00088 // 00089 THashSet<TInt> nbrsQ; 00090 GetNbrs(q, nbrsQ); 00091 // 00092 THashSet<TInt> SUBGq; 00093 GetIntersection(SUBG, nbrsQ, SUBGq); 00094 // 00095 THashSet<TInt> CANDq; 00096 GetIntersection(CAND, nbrsQ, CANDq); 00097 // 00098 Expand(SUBGq, CANDq); 00099 // 00100 CAND.DelKey(q); 00101 m_Q.DelLast(); 00102 // 00103 EXT.Clr(); 00104 GetRelativeComplement(CAND, nbrsU, EXT); 00105 } 00106 } 00107 00108 void TCliqueOverlap::GetMaximalCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) { 00109 if (G->GetNodes() == 0) return; 00110 // 00111 m_G = G; 00112 m_minMaxCliqueSize = MinMaxCliqueSize; 00113 m_maxCliques =& MaxCliques; 00114 m_Q.Clr(); 00115 // 00116 THashSet<TInt> SUBG; 00117 THashSet<TInt> CAND; 00118 for (TUNGraph::TNodeI NI=m_G->BegNI(); NI<m_G->EndNI(); NI++) { 00119 TInt nId = NI.GetId(); 00120 SUBG.AddKey(nId); 00121 CAND.AddKey(nId); 00122 } 00123 // 00124 Expand(SUBG, CAND); 00125 } 00126 00127 void TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap, TVec<TIntV>& OverlapMtx) { 00128 OverlapMtx.Clr(); 00129 // 00130 int n = MaxCliques.Len(); 00131 //Convert max cliques to HashSets 00132 TVec<THashSet<TInt> > cliques; 00133 for (int i=0; i<n; i++) { 00134 const int len = MaxCliques[i].Len(); 00135 cliques.Add(); 00136 if (len < MinNodeOverlap) { continue; } 00137 THashSet<TInt>& set = cliques.Last(); set.Gen(len); 00138 for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); } 00139 } 00140 //Init clique clique overlap matrix 00141 n = cliques.Len(); 00142 OverlapMtx.Gen(n); 00143 for (int i=0; i<n; i++) OverlapMtx[i].Gen(n); 00144 //Calculate clique clique overlap matrix 00145 for (int i=0; i<n; i++) { 00146 OverlapMtx[i][i] = cliques[i].Len(); 00147 for (int j=i+1; j<n; j++) { 00148 OverlapMtx[i][j] = Intersection(cliques[i], cliques[j]); } 00149 } 00150 } 00151 00152 PUNGraph TCliqueOverlap::CalculateOverlapMtx(const TVec<TIntV>& MaxCliques, int MinNodeOverlap) { 00153 const int n = MaxCliques.Len(); 00154 //Convert max cliques to HashSets 00155 TVec<THashSet<TInt> > cliques; 00156 for (int i=0; i<n; i++) { 00157 const int len = MaxCliques[i].Len(); 00158 cliques.Add(); 00159 if (len < MinNodeOverlap) { continue; } 00160 THashSet<TInt>& set = cliques.Last(); set.Gen(len); 00161 for (int j=0; j<len; j++) { set.AddKey(MaxCliques[i][j]); } 00162 } 00163 //Init clique clique overlap matrix 00164 PUNGraph OverlapMtx = TUNGraph::New(); 00165 for (int i=0; i < n; i++) { 00166 OverlapMtx->AddNode(i); } 00167 //Calculate clique clique overlap matrix 00168 for (int i=0; i<n; i++) { 00169 for (int j=i+1; j<n; j++) { 00170 if (Intersection(cliques[i], cliques[j]) >= MinNodeOverlap) { 00171 OverlapMtx->AddEdge(i,j); } 00172 } 00173 } 00174 return OverlapMtx; 00175 } 00176 00177 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, int MinNodeOverlap, TVec<TIntV>& CliqueIdVV) { 00178 int n = OverlapMtx.Len(); 00179 for (int i=0; i<n; i++) { 00180 bool isCommunity = false; 00181 for (int j=i+1; j<n; j++) { 00182 if (OverlapMtx[i][j] >= MinNodeOverlap) { 00183 if (! isCommunity) { 00184 TIntV v; v.Add(i); 00185 CliqueIdVV.Add(v); 00186 isCommunity=true; 00187 } 00188 CliqueIdVV.Last().Add(j); 00189 } 00190 } 00191 } 00192 } 00193 00194 void TCliqueOverlap::GetOverlapCliques(const TVec<TIntV>& OverlapMtx, const TVec<TIntV>& MaxCliques, double MinOverlapFrac, TVec<TIntV>& CliqueIdVV) { 00195 int n = OverlapMtx.Len(); 00196 for(int i=0; i<n; i++){ 00197 bool isCommunity = false; 00198 int size1 = MaxCliques[i].Len(); 00199 for(int j=i+1; j<n; j++){ 00200 int size2 = MaxCliques[j].Len(); 00201 double ratio = OverlapMtx[i][j]; 00202 if(size1 < size2) ratio /= size1; 00203 else ratio /= size2; 00204 if(ratio >= MinOverlapFrac){ 00205 if(!isCommunity){ 00206 TIntV v; v.Add(i); 00207 CliqueIdVV.Add(v); 00208 isCommunity=true; 00209 } 00210 CliqueIdVV.Last().Add(j); 00211 } 00212 } 00213 } 00214 } 00215 00217 void TCliqueOverlap::GetMaxCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) { 00218 TCliqueOverlap CO; 00219 MaxCliques.Clr(false); 00220 CO.GetMaximalCliques(G, MinMaxCliqueSize, MaxCliques); 00221 } 00222 00224 void TCliqueOverlap::GetCPMCommunities(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& NIdCmtyVV) { 00225 printf("Clique Percolation Method\n"); 00226 TExeTm ExeTm; 00227 TVec<TIntV> MaxCliques; 00228 TCliqueOverlap::GetMaxCliques(G, MinMaxCliqueSize, MaxCliques); 00229 // op RS 2012/05/15, commented out next line, a parameter is missing, 00230 // creating a warning on OS X 00231 // printf("...%d cliques found\n"); 00232 // get clique overlap matrix (graph) 00233 PUNGraph OverlapGraph = TCliqueOverlap::CalculateOverlapMtx(MaxCliques, MinMaxCliqueSize-1); 00234 printf("...overlap matrix (%d, %d)\n", G->GetNodes(), G->GetEdges()); 00235 // connected components are communities 00236 TCnComV CnComV; 00237 TSnap::GetWccs(OverlapGraph, CnComV); 00238 NIdCmtyVV.Clr(false); 00239 TIntSet CmtySet; 00240 for (int c = 0; c < CnComV.Len(); c++) { 00241 CmtySet.Clr(false); 00242 for (int i = 0; i <CnComV[c].Len(); i++) { 00243 const TIntV& CliqueNIdV = MaxCliques[CnComV[c][i]]; 00244 CmtySet.AddKeyV(CliqueNIdV); 00245 } 00246 NIdCmtyVV.Add(); 00247 CmtySet.GetKeyV(NIdCmtyVV.Last()); 00248 NIdCmtyVV.Last().Sort(); 00249 } 00250 printf("done [%s].\n", ExeTm.GetStr()); 00251 }