SNAP Library , User Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
cmty.cpp
Go to the documentation of this file.
00001 
00002 // Community detection algorithms
00003 namespace TSnap {
00004 
00005 
00006 namespace TSnapDetail {
00007 // GIRVAN-NEWMAN algorithm
00008 //      1. The betweenness of all existing edges in the network is calculated first.
00009 //      2. The edge with the highest betweenness is removed.
00010 //      3. The betweenness of all edges affected by the removal is recalculated.
00011 //      4. Steps 2 and 3 are repeated until no edges remain.
00012 //  Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
00013 // Keep removing edges from Graph until one of the connected components of Graph splits into two.
00014 void CmtyGirvanNewmanStep(PUNGraph& Graph, TIntV& Cmty1, TIntV& Cmty2) {
00015   TIntPrFltH BtwEH;
00016   TBreathFS<PUNGraph> BFS(Graph);
00017   Cmty1.Clr(false);  Cmty2.Clr(false);
00018   while (true) {
00019     TSnap::GetBetweennessCentr(Graph, BtwEH);
00020     BtwEH.SortByDat(false);
00021     if (BtwEH.Empty()) { return; }
00022     const int NId1 = BtwEH.GetKey(0).Val1;
00023     const int NId2 = BtwEH.GetKey(0).Val2;
00024     Graph->DelEdge(NId1, NId2);
00025     BFS.DoBfs(NId1, true, false, NId2, TInt::Mx);
00026     if (BFS.GetHops(NId1, NId2) == -1) { // two components
00027       TSnap::GetNodeWcc(Graph, NId1, Cmty1);
00028       TSnap::GetNodeWcc(Graph, NId2, Cmty2);
00029       return;
00030     }
00031   }
00032 }
00033 
00034 // Connected components of a graph define clusters
00035 // OutDegH and OrigEdges stores node degrees and number of edges in the original graph
00036 double _GirvanNewmanGetModularity(const PUNGraph& G, const TIntH& OutDegH, const int& OrigEdges, TCnComV& CnComV) {
00037   TSnap::GetWccs(G, CnComV); // get communities
00038   double Mod = 0;
00039   for (int c = 0; c < CnComV.Len(); c++) {
00040     const TIntV& NIdV = CnComV[c]();
00041     double EIn=0, EEIn=0;
00042     for (int i = 0; i < NIdV.Len(); i++) {
00043       TUNGraph::TNodeI NI = G->GetNI(NIdV[i]);
00044       EIn += NI.GetOutDeg();
00045       EEIn += OutDegH.GetDat(NIdV[i]);
00046     }
00047     Mod += (EIn-EEIn*EEIn/(2.0*OrigEdges));
00048   }
00049   if (Mod == 0) { return 0; }
00050   else { return Mod/(2.0*OrigEdges); }
00051 }
00052 
00053 } // namespace TSnapDetail
00054 
00055 // Maximum modularity clustering by Girvan-Newman algorithm (slow)
00056 //  Girvan M. and Newman M. E. J., Community structure in social and biological networks, Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002)
00057 double CommunityGirvanNewman(PUNGraph& Graph, TCnComV& CmtyV) {
00058   TIntH OutDegH;
00059   const int NEdges = Graph->GetEdges();
00060   for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00061     OutDegH.AddDat(NI.GetId(), NI.GetOutDeg());
00062   }
00063   double BestQ = -1; // modularity
00064   TCnComV CurCmtyV;
00065   CmtyV.Clr();
00066   TIntV Cmty1, Cmty2;
00067   while (true) {
00068     TSnapDetail::CmtyGirvanNewmanStep(Graph, Cmty1, Cmty2);
00069     const double Q = TSnapDetail::_GirvanNewmanGetModularity(Graph, OutDegH, NEdges, CurCmtyV);
00070     //printf("current modularity: %f\n", Q);
00071     if (Q > BestQ) {
00072       BestQ = Q; 
00073       CmtyV.Swap(CurCmtyV);
00074     }
00075     if (Cmty1.Len()==0 || Cmty2.Len() == 0) { break; }
00076   }
00077   return BestQ;
00078 }
00079 
00080 namespace TSnapDetail {
00084 class TCNMQMatrix {
00085 private:
00086   struct TCmtyDat {
00087     double DegFrac;
00088     TIntFltH NIdQH;
00089     int MxQId;
00090     TCmtyDat() : MxQId(-1) { }
00091     TCmtyDat(const double& NodeDegFrac, const int& OutDeg) : 
00092       DegFrac(NodeDegFrac), NIdQH(OutDeg), MxQId(-1) { }
00093     void AddQ(const int& NId, const double& Q) { NIdQH.AddDat(NId, Q);
00094       if (MxQId==-1 || NIdQH[MxQId]<Q) { MxQId=NIdQH.GetKeyId(NId); } }
00095     void UpdateMaxQ() { MxQId=-1; 
00096       for (int i = -1; NIdQH.FNextKeyId(i); ) { 
00097         if (MxQId==-1 || NIdQH[MxQId]< NIdQH[i]) { MxQId=i; } } }
00098     void DelLink(const int& K) { const int NId=GetMxQNId(); 
00099       NIdQH.DelKey(K); if (NId==K) { UpdateMaxQ(); }  }
00100     int GetMxQNId() const { return NIdQH.GetKey(MxQId); }
00101     double GetMxQ() const { return NIdQH[MxQId]; }
00102   };
00103 private:
00104   THash<TInt, TCmtyDat> CmtyQH;
00105   TVec<TFltIntIntTr> MxQHeap;
00106   TUnionFind CmtyIdUF;
00107   double Q;
00108 public:
00109   TCNMQMatrix(const PUNGraph& Graph) : CmtyQH(Graph->GetNodes()), 
00110     MxQHeap(Graph->GetNodes(), 0), CmtyIdUF(Graph->GetNodes()) { Init(Graph); }
00111   void Init(const PUNGraph& Graph) {
00112     const double M = 0.5/Graph->GetEdges(); // 1/2m
00113     Q = 0.0;
00114     for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00115       CmtyIdUF.Add(NI.GetId());
00116       const int OutDeg = NI.GetOutDeg();
00117       if (OutDeg == 0) { continue; }
00118       TCmtyDat& Dat = CmtyQH.AddDat(NI.GetId(), TCmtyDat(M * OutDeg, OutDeg));
00119       for (int e = 0; e < NI.GetOutDeg(); e++) {
00120         const int DstNId = NI.GetOutNId(e);
00121         const double DstMod = 2 * M * (1.0 - OutDeg * Graph->GetNI(DstNId).GetOutDeg() * M);
00122         Dat.AddQ(DstNId, DstMod);
00123       }
00124       Q += -1.0*TMath::Sqr(OutDeg*M);
00125       if (NI.GetId() < Dat.GetMxQNId()) {
00126         MxQHeap.Add(TFltIntIntTr(Dat.GetMxQ(), NI.GetId(), Dat.GetMxQNId())); }
00127     }
00128     MxQHeap.MakeHeap();
00129   }
00130   TFltIntIntTr FindMxQEdge() {
00131     while (true) {
00132       if (MxQHeap.Empty()) { break; }
00133       const TFltIntIntTr TopQ = MxQHeap.PopHeap();
00134       if (! CmtyQH.IsKey(TopQ.Val2) || ! CmtyQH.IsKey(TopQ.Val3)) { continue; }
00135       if (TopQ.Val1!=CmtyQH.GetDat(TopQ.Val2).GetMxQ() && TopQ.Val1!=CmtyQH.GetDat(TopQ.Val3).GetMxQ()) { continue; }
00136       return TopQ;
00137     }
00138     return TFltIntIntTr(-1, -1, -1);
00139   }
00140   bool MergeBestQ() {
00141     const TFltIntIntTr TopQ = FindMxQEdge();
00142     if (TopQ.Val1 <= 0.0) { return false; }
00143     // joint communities
00144     const int I = TopQ.Val3;
00145     const int J = TopQ.Val2;
00146     CmtyIdUF.Union(I, J); // join
00147     Q += TopQ.Val1;
00148     TCmtyDat& DatJ = CmtyQH.GetDat(J);
00149     { TCmtyDat& DatI = CmtyQH.GetDat(I);
00150     DatI.DelLink(J);  DatJ.DelLink(I);
00151     for (int i = -1; DatJ.NIdQH.FNextKeyId(i); ) {
00152       const int K = DatJ.NIdQH.GetKey(i);
00153       TCmtyDat& DatK = CmtyQH.GetDat(K);
00154       double NewQ = DatJ.NIdQH[i];
00155       if (DatI.NIdQH.IsKey(K)) { NewQ = NewQ+DatI.NIdQH.GetDat(K);  DatK.DelLink(I); }     // K connected to I and J
00156       else { NewQ = NewQ-2*DatI.DegFrac*DatK.DegFrac; }  // K connected to J not I
00157       DatJ.AddQ(K, NewQ);
00158       DatK.AddQ(J, NewQ);
00159       MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K)));
00160     }
00161     for (int i = -1; DatI.NIdQH.FNextKeyId(i); ) {
00162       const int K = DatI.NIdQH.GetKey(i);
00163       if (! DatJ.NIdQH.IsKey(K)) { // K connected to I not J
00164         TCmtyDat& DatK = CmtyQH.GetDat(K);
00165         const double NewQ = DatI.NIdQH[i]-2*DatJ.DegFrac*DatK.DegFrac; 
00166         DatJ.AddQ(K, NewQ);
00167         DatK.DelLink(I);
00168         DatK.AddQ(J, NewQ);
00169         MxQHeap.PushHeap(TFltIntIntTr(NewQ, TMath::Mn(J,K), TMath::Mx(J,K)));
00170       }
00171     } 
00172     DatJ.DegFrac += DatI.DegFrac; }
00173     if (DatJ.NIdQH.Empty()) { CmtyQH.DelKey(J); } // isolated community (done)
00174     CmtyQH.DelKey(I);
00175     return true;
00176   }
00177   static double CmtyCMN(const PUNGraph& Graph, TCnComV& CmtyV) {
00178     TCNMQMatrix QMatrix(Graph);
00179     // maximize modularity
00180     while (QMatrix.MergeBestQ()) { }
00181     // reconstruct communities
00182     THash<TInt, TIntV> IdCmtyH;
00183     for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00184       IdCmtyH.AddDat(QMatrix.CmtyIdUF.Find(NI.GetId())).Add(NI.GetId()); 
00185     }
00186     CmtyV.Gen(IdCmtyH.Len());
00187     for (int j = 0; j < IdCmtyH.Len(); j++) {
00188       CmtyV[j].NIdV.Swap(IdCmtyH[j]);
00189     }
00190     return QMatrix.Q;
00191   }
00192 };
00193 
00194 } // namespace TSnapDetail
00195 
00196 double CommunityCNM(const PUNGraph& Graph, TCnComV& CmtyV) {
00197   return TSnapDetail::TCNMQMatrix::CmtyCMN(Graph, CmtyV);
00198 }
00199 
00200 }; //namespace TSnap