SNAP Library 2.1, Developer Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
Local-Spectral-Clustering statistics of a given Graph. More...
#include <ncp.h>
Classes | |
class | TCutInfo |
class | TNodeSweep |
Public Member Functions | |
TLocClustStat (const double &_Alpha, const int &_KMin, const int &_KMax, const double &_KFac, const int &_Coverage, const double &_SizeFrac) | |
void | Save (TSOut &SOut) const |
void | Clr () |
void | SetGraph (const PUNGraph &GraphPt) |
void | Run (const PUNGraph &Graph, const bool &SaveAllSweeps=false, const bool &SaveAllCond=false, const bool &SaveBestNodesAtK=false) |
void | AddBagOfWhiskers () |
void | AddCut (const TIntV &NIdV) |
void | AddCut (const int &ClustSz, const double &Phi) |
void | AddToBestCutH (const PUNGraph &Graph, const TIntV &NIdV, const bool &AddBestCond=true) |
double | FindBestCut (const int &Nodes, const TIntSet &TabuNIdSet, int &BestCutId) const |
const TCutInfo & | FindBestCut (const int &Nodes=-1) const |
double | FindBestCut (const int &Nodes, TIntV &ClustNIdV) const |
int | FindBestCut (const int &Nodes, int &Vol, int &Cut, double &Phi) const |
const TCutInfo & | GetBestCut () const |
int | GetCuts () const |
const TCutInfo & | GetKNodeCut (const int &Nodes) const |
const TCutInfo & | GetCutN (const int &N) const |
int | BestWhiskNodes () const |
int | BestWhiskEdges () const |
double | BestWhiskPhi () const |
TFltPr | GetBestWhisk () const |
const TFltPrV & | GetBagOfWhiskersV () const |
void | GetCurveStat (TFltPrV &MeanV, TFltPrV &MedV, TFltPrV &Dec1V, TFltPrV &MinV, TFltPrV &MaxV) const |
void | GetBoltzmanCurveStat (const TFltV &TempV, TVec< TFltPrV > &NcpV) const |
TStr | ParamStr () const |
void | PlotNCP (const TStr &OutFNm, TStr Desc=TStr()) const |
void | PlotNCPModul (const TStr &OutFNm, TStr Desc=TStr()) const |
void | PlotNcpTop10 (const TStr &OutFNm, TStr Desc, const int &TakeMinN) const |
void | PlotPhiInOut (const TStr &OutFNm, TStr Desc=TStr()) const |
void | PlotBestClustDens (TStr OutFNm, TStr Desc=TStr()) const |
void | PlotNCPScatter (const TStr &OutFNm, TStr Desc=TStr()) const |
void | PlotPhiDistr (const int &CmtySz, const TStr &OutFNm, TStr Desc=TStr()) const |
void | PlotBoltzmanCurve (const TStr &OutFNm, TStr Desc=TStr()) const |
void | ImposeNCP (const TLocClustStat &LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const |
void | ImposeNCP (const TLocClustStat &LcStat2, const TLocClustStat &LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const |
void | SaveTxtInfo (const TStr &OutFNmPref, const TStr &Desc, const bool &SetMaxAt1) const |
Static Public Member Functions | |
static void | BagOfWhiskers (const PUNGraph &Graph, TFltPrV &SizePhiV, TFltPr &BestWhisk) |
static void | BagOfWhiskers2 (const PUNGraph &Graph, TFltPrV &SizePhiV) |
static void | MakeExpBins (const TFltPrV &ValV, TFltPrV &NewV) |
static void | MakeExpBins (const TFltV &ValV, TFltV &NewV) |
Public Attributes | |
TVec< TNodeSweep > | SweepsV |
THash< TInt, TFltV > | SizePhiH |
THash< TInt, TCutInfo > | BestCutH |
TFltPrV | BagOfWhiskerV |
TFltPr | BestWhisk |
TCutInfo | BestCut |
TIntSet | SizeBucketSet |
Static Public Attributes | |
static double | BinFactor = 1.01 |
static int | TakeValAt |
Private Attributes | |
TFlt | Alpha |
TFlt | SizeFrac |
TFlt | KFac |
TInt | KMin |
TInt | KMax |
TInt | Coverage |
PUNGraph | Graph |
TLocClustStat::TLocClustStat | ( | const double & | _Alpha, |
const int & | _KMin, | ||
const int & | _KMax, | ||
const double & | _KFac, | ||
const int & | _Coverage, | ||
const double & | _SizeFrac | ||
) |
void TLocClustStat::AddBagOfWhiskers | ( | ) |
Definition at line 404 of file ncp.cpp.
References BagOfWhiskers(), BagOfWhiskerV, BestWhisk, and Graph.
Referenced by TLocClust::PlotNCP().
{ BagOfWhiskers(Graph, BagOfWhiskerV, BestWhisk); // slow but exact //BagOfWhiskers2(Graph, BagOfWhiskerV); }
void TLocClustStat::AddCut | ( | const TIntV & | NIdV | ) |
Definition at line 410 of file ncp.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), TLocClust::GetCutStat(), TUNGraph::GetEdges(), Graph, TVec< TVal, TSizeTy >::Len(), and SizePhiH.
{ int Vol, Cut; double Phi; TLocClust::GetCutStat(Graph, NIdV, Vol, Cut, Phi, Graph->GetEdges()); SizePhiH.AddDat(NIdV.Len()).Add(Phi); }
void TLocClustStat::AddCut | ( | const int & | ClustSz, |
const double & | Phi | ||
) |
Definition at line 417 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::AddDat(), and SizePhiH.
void TLocClustStat::AddToBestCutH | ( | const PUNGraph & | Graph, |
const TIntV & | NIdV, | ||
const bool & | AddBestCond = true |
||
) |
Definition at line 422 of file ncp.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), BestCutH, TLocClustStat::TCutInfo::GetCutSz(), THash< TKey, TDat, THashFunc >::GetDat(), TLocClustStat::TCutInfo::GetEdges(), TUNGraph::GetEdges(), TLocClustStat::TCutInfo::GetModular(), TLocClustStat::TCutInfo::GetNodes(), TLocClustStat::TCutInfo::GetPhi(), THash< TKey, TDat, THashFunc >::IsKey(), and TVec< TVal, TSizeTy >::Len().
{ const int K = NIdV.Len(); TCutInfo CutInfo(Graph, NIdV, true); printf("new: %d\t%d\t%d\t%f\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), CutInfo.GetCutSz(), CutInfo.GetPhi(), CutInfo.GetModular(Graph->GetEdges())); if (! BestCutH.IsKey(K)) { BestCutH.AddDat(K, CutInfo); return; } TCutInfo& CurCut = BestCutH.GetDat(K); // keep the cluster with best conductance if (AddBestCond && CurCut.GetPhi() > CutInfo.GetPhi()) { CurCut=CutInfo; } // keep the cluster with best modularity //const int E = Graph->GetEdges(); //if (! AddBestCond && CurCut.GetModular(E) < CutInfo.GetModular(E)) { CurCut=CutInfo; } }
void TLocClustStat::BagOfWhiskers | ( | const PUNGraph & | Graph, |
TFltPrV & | SizePhiV, | ||
TFltPr & | BestWhisk | ||
) | [static] |
Definition at line 901 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::AddDat(), THash< TKey, TDat, THashFunc >::AddKey(), THashSet< TKey, THashFunc >::AddKey(), TVec< TVal, TSizeTy >::Clr(), TVec< TVal, TSizeTy >::Empty(), TVec< TVal, TSizeTy >::Gen(), TSnap::Get1CnCom(), THash< TKey, TDat, THashFunc >::GetDat(), THash< TKey, TDat, THashFunc >::GetKey(), TUNGraph::GetNI(), TUNGraph::TNodeI::GetOutDeg(), TExeTm::GetStr(), THash< TKey, TDat, THashFunc >::IsKey(), THashSet< TKey, THashFunc >::IsKey(), THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), THashSet< TKey, THashFunc >::Len(), TMath::Mn(), TVec< TVal, TSizeTy >::Sort(), THash< TKey, TDat, THashFunc >::SortByKey(), and TInt::Val.
Referenced by AddBagOfWhiskers().
{ TCnComV Cn1ComV; TSnap::Get1CnCom(Graph, Cn1ComV); TIntPrV SzVolV; int MxSize=0; if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; } // Graph->SaveEdgeList("g-vol.txt"); TGraphViz::Plot(Graph, gvlNeato, "g-vol.gif"); Fail; } IAssert(vol <= sz*(sz-1)); printf(" 1-connected components: %d\n", Cn1ComV.Len()); MaxWhisk = TFltPr(1,1); for (int c = 0; c < Cn1ComV.Len(); c++) { const TIntV& NIdV = Cn1ComV[c].NIdV; const int sz = NIdV.Len(); if (sz < 2) { continue; } int vol = 0; // volume is the size of degrees for (int n = 0; n < sz; n++) { vol += Graph->GetNI(NIdV[n]).GetOutDeg(); } SzVolV.Add(TIntPr(sz, vol)); MxSize += sz; if (1.0/double(vol) < MaxWhisk.Val2) { MaxWhisk=TFltPr(NIdV.Len(), 1.0/double(vol)); } } SzVolV.Sort(false); // compose clusters THash<TInt, TIntSet> ItemSetH(MxSize, true); THash<TInt, TInt> VolH(MxSize, true); THash<TInt, TFlt> CostH(MxSize, true); ItemSetH.AddKey(0); VolH.AddKey(0); TExeTm ExeTm; // exact up to size 1000 for (int size = 2; size <= TMath::Mn(MxSize, 1000); size++) { for (int item = 0; item <SzVolV.Len(); item++) { const int smallSz = size-SzVolV[item].Val1; if (ItemSetH.IsKey(smallSz)) { const TIntSet& SmallSet = ItemSetH.GetDat(smallSz); if (SmallSet.IsKey(item)) { continue; } const int SmallVol = VolH.GetDat(smallSz); // combine smaller nad new solution const double curCost = CostH.IsKey(size) ? double(CostH.GetDat(size)) : double(10e10); const double newCost = double(SmallSet.Len()+1) / double(SmallVol+SzVolV[item].Val2); if (curCost < newCost) { continue; } VolH.AddDat(size, SmallVol+SzVolV[item].Val2); ItemSetH.AddDat(size, SmallSet); ItemSetH.GetDat(size).AddKey(item); CostH.AddDat(size, newCost); } } if (VolH.IsKey(size) && size%100==0) { printf("\rSize: %d/%d: vol: %d, items: %d/%d [%s]", size, MxSize, VolH.GetDat(size).Val, ItemSetH.GetDat(size).Len(), SzVolV.Len(), ExeTm.GetStr()); } } // add sizes larger than 1000 printf("\nAdding sizes > 1000 nodes..."); int partSz=0; double partVol=0.0; for (int i = 0; i < SzVolV.Len(); i++) { partSz += SzVolV[i].Val1(); partVol += SzVolV[i].Val2(); if (partSz < 1000) { continue; } const double curCost = CostH.IsKey(partSz) ? double(CostH.GetDat(partSz)) : double(10e10); const double partPhi = double(i+1)/partVol; if (partPhi < curCost) { CostH.AddDat(partSz, partPhi); } } VolH.SortByKey(); CostH.SortByKey(); SizePhiV.Gen(CostH.Len(), 0); SizePhiV.Add(TFltPr(1, 1)); for (int s = 0; s < CostH.Len(); s++) { const int size = CostH.GetKey(s); const double cond = CostH[s]; //double(ItemSetH.GetDat(size).Len())/double(VolH[s]); SizePhiV.Add(TFltPr(size, cond)); } printf("done\n"); }
void TLocClustStat::BagOfWhiskers2 | ( | const PUNGraph & | Graph, |
TFltPrV & | SizePhiV | ||
) | [static] |
Definition at line 977 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::AddDat(), TVec< TVal, TSizeTy >::Clr(), TVec< TVal, TSizeTy >::Empty(), TVec< TVal, TSizeTy >::Gen(), TSnap::Get1CnCom(), THash< TKey, TDat, THashFunc >::GetDat(), THash< TKey, TDat, THashFunc >::GetKey(), TUNGraph::GetNI(), TUNGraph::TNodeI::GetOutDeg(), THash< TKey, TDat, THashFunc >::IsKey(), THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), SizePhiH, TVec< TVal, TSizeTy >::Sort(), and THash< TKey, TDat, THashFunc >::SortByKey().
{ TCnComV Cn1ComV; TSnap::Get1CnCom(Graph, Cn1ComV); TIntPrV SzVolV; int MxSize=0, TotVol=0; if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr(); return; } printf(" 1-connected components: %d\n", Cn1ComV.Len()); for (int c = 0; c < Cn1ComV.Len(); c++) { const TIntV& NIdV = Cn1ComV[c].NIdV; const int sz = NIdV.Len(); if (sz < 2) { continue; } int vol = 0; // volume is the size of degrees for (int n = 0; n < sz; n++) { vol += Graph->GetNI(NIdV[n]).GetOutDeg(); } SzVolV.Add(TIntPr(sz, vol)); MxSize += sz; TotVol += vol; } printf(" Total size: %d\t Total vol: %d\n", MxSize, TotVol); SzVolV.Sort(false); // compose clusters THash<TFlt, TFlt> SizePhiH(MxSize, true); for (int i = 0; i < SzVolV.Len(); i++) { const int Sz = SzVolV[i].Val1(); const double Phi = 1.0/double(SzVolV[i].Val2()); if ((! SizePhiH.IsKey(Sz)) || SizePhiH.GetDat(Sz) > Phi) { SizePhiH.AddDat(Sz, Phi); } } double partSz=0.0, partVol=0.0; for (int i = 0; i < SzVolV.Len(); i++) { partSz += SzVolV[i].Val1(); partVol += SzVolV[i].Val2(); const double partPhi = double(i+1)/partVol; if ((! SizePhiH.IsKey(partSz)) || partPhi < SizePhiH.GetDat(partSz)) { SizePhiH.AddDat(partSz, partPhi); } } SizePhiV.Gen(SizePhiH.Len()+1, 0); SizePhiV.Add(TFltPr(1, 1)); SizePhiH.SortByKey(); for (int s = 0; s < SizePhiH.Len(); s++) { SizePhiV.Add(TFltPr(SizePhiH.GetKey(s), SizePhiH[s])); } }
int TLocClustStat::BestWhiskEdges | ( | ) | const [inline] |
Definition at line 212 of file ncp.h.
References BestWhisk, TMath::Round(), TFlt::Val, and TPair< TVal1, TVal2 >::Val2.
{ return (int)TMath::Round(1.0/BestWhisk.Val2.Val)/2-1; }
int TLocClustStat::BestWhiskNodes | ( | ) | const [inline] |
double TLocClustStat::BestWhiskPhi | ( | ) | const [inline] |
void TLocClustStat::Clr | ( | ) |
Definition at line 293 of file ncp.cpp.
References BagOfWhiskerV, BestCutH, THash< TKey, TDat, THashFunc >::Clr(), TVec< TVal, TSizeTy >::Clr(), SizePhiH, and SweepsV.
Referenced by Run().
{ SweepsV.Clr(false); // node ids and conductances for each run of local clustering SizePhiH.Clr(false); // conductances at cluster size K BestCutH.Clr(false); // best cut (min conductance) at size K BagOfWhiskerV.Clr(false); // (Size, Conductance) of bag of whiskers clusters }
double TLocClustStat::FindBestCut | ( | const int & | Nodes, |
const TIntSet & | TabuNIdSet, | ||
int & | BestCutId | ||
) | const |
Definition at line 469 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Empty(), IAssert, THashSet< TKey, THashFunc >::IsKey(), TLocClustStat::TNodeSweep::Len(), TVec< TVal, TSizeTy >::Len(), TLocClustStat::TNodeSweep::NId(), TLocClustStat::TNodeSweep::Phi(), and SweepsV.
Referenced by FindBestCut(), PlotNcpTop10(), and PlotPhiInOut().
{ double bestPhi = 1; BestCutId = -1; bool Tabu; IAssert(! SweepsV.Empty()); for (int c = 0; c < SweepsV.Len(); c++) { const TNodeSweep& Sweep = SweepsV[c]; if (Sweep.Len() < Nodes) { continue; } if (Sweep.Phi(Nodes-1) > bestPhi) { continue; } Tabu = false; for (int i = 0; i < Nodes; i++) { if (TabuNIdSet.IsKey(Sweep.NId(i))) { Tabu=true; break; } } if (Tabu) { continue; } bestPhi = Sweep.Phi(Nodes-1); BestCutId = c; } return bestPhi; }
const TLocClustStat::TCutInfo & TLocClustStat::FindBestCut | ( | const int & | Nodes = -1 | ) | const |
Definition at line 436 of file ncp.cpp.
References BestCutH, THash< TKey, TDat, THashFunc >::Empty(), THash< TKey, TDat, THashFunc >::GetDat(), IAssert, THash< TKey, TDat, THashFunc >::IsKey(), and THash< TKey, TDat, THashFunc >::Len().
{ double bestPhi = 1; int CutId = -1; if (Nodes > 0) { IAssert(BestCutH.IsKey(Nodes)); return BestCutH.GetDat(Nodes); } else { for (int n = 0; n < BestCutH.Len(); n++) { if (BestCutH[n].GetPhi() <= bestPhi) { bestPhi = BestCutH[n].GetPhi(); CutId = n; } } IAssert(CutId != -1); IAssert(! BestCutH[CutId].CutNIdV.Empty()); return BestCutH[CutId]; } }
double TLocClustStat::FindBestCut | ( | const int & | Nodes, |
TIntV & | ClustNIdV | ||
) | const |
Definition at line 454 of file ncp.cpp.
References TLocClustStat::TCutInfo::CutNIdV, FindBestCut(), and TLocClustStat::TCutInfo::GetPhi().
{ const TCutInfo& Cut = FindBestCut(Nodes); ClustNIdV = Cut.CutNIdV; return Cut.GetPhi(); }
int TLocClustStat::FindBestCut | ( | const int & | Nodes, |
int & | Vol, | ||
int & | Cut, | ||
double & | Phi | ||
) | const |
Definition at line 460 of file ncp.cpp.
References FindBestCut(), TLocClustStat::TCutInfo::GetCutSz(), TLocClustStat::TCutInfo::GetNodes(), TLocClustStat::TCutInfo::GetPhi(), and TLocClustStat::TCutInfo::GetVol().
{ const TCutInfo& CutInfo = FindBestCut(Nodes); Vol = CutInfo.GetVol(); Cut = CutInfo.GetCutSz(); Phi = CutInfo.GetPhi(); return CutInfo.GetNodes(); }
const TFltPrV& TLocClustStat::GetBagOfWhiskersV | ( | ) | const [inline] |
const TCutInfo& TLocClustStat::GetBestCut | ( | ) | const [inline] |
TFltPr TLocClustStat::GetBestWhisk | ( | ) | const [inline] |
void TLocClustStat::GetBoltzmanCurveStat | ( | const TFltV & | TempV, |
TVec< TFltPrV > & | NcpV | ||
) | const |
Definition at line 529 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::Empty(), TVec< TVal, TSizeTy >::Gen(), THash< TKey, TDat, THashFunc >::GetKey(), IAssert, THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), MakeExpBins(), SizePhiH, TVec< TVal, TSizeTy >::Swap(), and TInt::Val.
Referenced by PlotBoltzmanCurve().
{ IAssert(! SizePhiH.Empty()); // stores conductances of all little clusters NcpV.Gen(TempV.Len()); TFltPrV BucketV; for (int t = 0; t < TempV.Len(); t++) { const double T = TempV[t]; for (int i = 0; i < SizePhiH.Len(); i++) { const double X = SizePhiH.GetKey(i).Val; IAssert(X >= 1.0); const TFltV& PhiV = SizePhiH[i]; double V = 0.0, SumW = 0.0; for (int j = 0; j < PhiV.Len(); j++) { V += PhiV[j] * exp(-PhiV[j]/T); SumW += exp(-PhiV[j]/T); } //V /= PhiV.Len(); V /= SumW; NcpV[t].Add(TFltPr(X, V)); } TLocClustStat::MakeExpBins(NcpV[t], BucketV); NcpV[t].Swap(BucketV); // normalize to start at (1,1) //for (int i = 0; i < NcpV[t].Len(); i++) { // NcpV[t][i].Val2 /= NcpV[t][0].Val2; //} } }
void TLocClustStat::GetCurveStat | ( | TFltPrV & | MeanV, |
TFltPrV & | MedV, | ||
TFltPrV & | Dec1V, | ||
TFltPrV & | MinV, | ||
TFltPrV & | MaxV | ||
) | const |
Definition at line 490 of file ncp.cpp.
References TMom::Add(), TVec< TVal, TSizeTy >::Add(), BestCutH, TVec< TVal, TSizeTy >::Clr(), TMom::Def(), THash< TKey, TDat, THashFunc >::Empty(), TMom::GetDecile(), THash< TKey, TDat, THashFunc >::GetKey(), TMom::GetMean(), TMom::GetMedian(), TMom::GetMn(), TMom::GetMx(), IAssert, THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), MakeExpBins(), SizePhiH, TVec< TVal, TSizeTy >::Sort(), TVec< TVal, TSizeTy >::Swap(), and TInt::Val.
Referenced by ImposeNCP(), PlotBoltzmanCurve(), and PlotNCP().
{ TFltPrV BucketV; MeanV.Clr(false); MedV.Clr(false); Dec1V.Clr(false); MinV.Clr(false); MaxV.Clr(false); if (! SizePhiH.Empty()) { // stores conductances of all little clusters const THash<TInt, TFltV>& KvH = SizePhiH; for (int i = 0; i < KvH.Len(); i++) { const double X = KvH.GetKey(i).Val; IAssert(X >= 1.0); const TFltV& YVec = KvH[i]; TMom Mom; for (int j = 0; j < YVec.Len(); j++) { Mom.Add(YVec[j]); } Mom.Def(); /*IAssert(Mom.GetMean()>=0 && Mom.GetMean()<=1); IAssert(Mom.GetMedian()>=0 && Mom.GetMedian()<=1); IAssert(Mom.GetDecile(1)>=0 && Mom.GetDecile(1)<=1); IAssert(Mom.GetMn()>=0 && Mom.GetMn()<=1); IAssert(Mom.GetMx()>=0 && Mom.GetMx()<=1);*/ MeanV.Add(TFltPr(X, Mom.GetMean())); MedV.Add(TFltPr(X, Mom.GetMedian())); Dec1V.Add(TFltPr(X, Mom.GetDecile(1))); MinV.Add(TFltPr(X, Mom.GetMn())); MaxV.Add(TFltPr(X, Mom.GetMx())); } MeanV.Sort(); MedV.Sort(); Dec1V.Sort(); MinV.Sort(); MaxV.Sort(); // create log buckets (makes plots smaller and smoother) TLocClustStat::MakeExpBins(MeanV, BucketV); MeanV.Swap(BucketV); TLocClustStat::MakeExpBins(MedV, BucketV); MedV.Swap(BucketV); TLocClustStat::MakeExpBins(Dec1V, BucketV); Dec1V.Swap(BucketV); TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV); TLocClustStat::MakeExpBins(MaxV, BucketV); MaxV.Swap(BucketV); } else { //const int GEdges = Graph->GetEdges(); for (int i = 0; i < BestCutH.Len(); i++) { MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetPhi())); } MinV.Sort(); TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV); } }
const TCutInfo& TLocClustStat::GetCutN | ( | const int & | N | ) | const [inline] |
int TLocClustStat::GetCuts | ( | ) | const [inline] |
const TCutInfo& TLocClustStat::GetKNodeCut | ( | const int & | Nodes | ) | const [inline] |
Definition at line 208 of file ncp.h.
References BestCutH, and THash< TKey, TDat, THashFunc >::GetDat().
Referenced by SaveTxtInfo().
void TLocClustStat::ImposeNCP | ( | const TLocClustStat & | LcStat2, |
TStr | OutFNm, | ||
TStr | Desc, | ||
TStr | Desc1, | ||
TStr | Desc2 | ||
) | const |
Definition at line 786 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), TGnuPlot::AddPlot(), BagOfWhiskerV, BestWhisk, TStr::CStr(), TStr::Empty(), TVec< TVal, TSizeTy >::Empty(), TStr::Fmt(), GetCurveStat(), TUNGraph::GetEdges(), TUNGraph::GetNodes(), gpsLog10XY, gpwLines, gpwPoints, Graph, ParamStr(), TGnuPlot::SavePng(), TGnuPlot::SetScale(), and TGnuPlot::SetXYLabel().
Referenced by TLocClust::PlotNCP().
{ if (Desc.Empty()) { Desc = OutFNm; } TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1; TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2; GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1); LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2); // plot TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc1.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); } if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc2.CStr(), LcStat2.Graph->GetNodes(), LcStat2.Graph->GetEdges()), "lw 1"); } if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk); GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); } if (! LcStat2.BagOfWhiskerV.Empty()) { GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk); GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); } GP.SetScale(gpsLog10XY); GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); //GP.SetXRange(1, Graph->GetNodes()/2); GP.SavePng(); }
void TLocClustStat::ImposeNCP | ( | const TLocClustStat & | LcStat2, |
const TLocClustStat & | LcStat3, | ||
TStr | OutFNm, | ||
TStr | Desc, | ||
TStr | Desc1, | ||
TStr | Desc2, | ||
TStr | Desc3 | ||
) | const |
Definition at line 810 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), TGnuPlot::AddPlot(), BagOfWhiskerV, BestWhisk, TStr::CStr(), TStr::Empty(), TVec< TVal, TSizeTy >::Empty(), TStr::Fmt(), GetCurveStat(), TUNGraph::GetNodes(), gpsLog10XY, gpwLines, gpwPoints, Graph, ParamStr(), TGnuPlot::SavePng(), TGnuPlot::SetScale(), TGnuPlot::SetXRange(), and TGnuPlot::SetXYLabel().
{ if (Desc.Empty()) { Desc = OutFNm; } TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1; TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2; TFltPrV MeanV3, MedV3, Dec1V3, MinV3, MaxV3; GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1); LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2); LcStat3.GetCurveStat(MeanV3, MedV3, Dec1V3, MinV3, MaxV3); // plot TGnuPlot GP("phiTR."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, Desc1+" MIN", "lw 1"); } if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, Desc2+" MIN", "lw 1"); } if (! MinV3.Empty()) { GP.AddPlot(MinV3, gpwLines, Desc3+" MIN", "lw 1"); } if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); TFltPrV BestWhiskV; BestWhiskV.Add(BestWhisk); GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); } if (! LcStat2.BagOfWhiskerV.Empty()) { GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); TFltPrV BestWhiskV; BestWhiskV.Add(LcStat2.BestWhisk); GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); } if (! LcStat3.BagOfWhiskerV.Empty()) { GP.AddPlot(LcStat3.BagOfWhiskerV, gpwLines, Desc3+" whiskers", "lw 1"); TFltPrV BestWhiskV; BestWhiskV.Add(LcStat3.BestWhisk); GP.AddPlot(BestWhiskV, gpwPoints, Desc3+" Best whisker", "pt 5 ps 2"); } GP.SetScale(gpsLog10XY); GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); GP.SetXRange(1, Graph->GetNodes()/2); GP.SavePng(); }
void TLocClustStat::MakeExpBins | ( | const TFltPrV & | ValV, |
TFltPrV & | NewV | ||
) | [static] |
Definition at line 1020 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), BinFactor, TVec< TVal, TSizeTy >::Clr(), TVec< TVal, TSizeTy >::Empty(), TVec< TVal, TSizeTy >::Gen(), TVec< TVal, TSizeTy >::Last(), TVec< TVal, TSizeTy >::Len(), TFlt::Mx, and TPair< TVal1, TVal2 >::Val1.
Referenced by GetBoltzmanCurveStat(), GetCurveStat(), PlotBestClustDens(), TLocClust::PlotCutDistr(), PlotNCPModul(), TLocClust::PlotPhiDistr(), and TLocClust::PlotVolDistr().
{ if (ValV.Empty()) { NewV.Clr(false); return; } NewV.Gen(1000, 0); double PrevBPos = 1, BPos = 1; int i = 0; while (BPos <= ValV.Last().Val1) { //if (TakeValAt == 1) { // while (i < ValV.Len() && ValV[i].Val1 <= BPos) { i++; } // NewV.Add(ValV[i-1]); } //else if (TakeValAt == 2) { int MinI=-1; double MinCnt=TFlt::Mx; while (i < ValV.Len() && ValV[i].Val1 <= BPos) { if (ValV[i].Val2 < MinCnt) { MinCnt=ValV[i].Val2; MinI=i; } i++; } if (MinI>=0 && MinI<ValV.Len()) { NewV.Add(ValV[MinI]); } PrevBPos = (uint) floor(BPos); BPos *= BinFactor; if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; } } NewV.Add(ValV.Last()); }
void TLocClustStat::MakeExpBins | ( | const TFltV & | ValV, |
TFltV & | NewV | ||
) | [static] |
Definition at line 1042 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), BinFactor, TVec< TVal, TSizeTy >::Clr(), TVec< TVal, TSizeTy >::Empty(), TVec< TVal, TSizeTy >::Gen(), TVec< TVal, TSizeTy >::Last(), TVec< TVal, TSizeTy >::Len(), and TFlt::Mx.
{ if (ValV.Empty()) { NewV.Clr(false); return; } NewV.Gen(1000, 0); double PrevBPos = 1, BPos = 1; int i = 1; NewV.Add(ValV[0]); while (BPos <= ValV.Len()) { int MinI=-1; double MinCnt=TFlt::Mx; while (i < ValV.Len() && i <= BPos) { if (ValV[i] < MinCnt) { MinCnt=ValV[i]; MinI=i; } i++; } if (MinI>=0 && MinI<ValV.Len()) { NewV.Add(ValV[MinI]); } PrevBPos = (uint) floor(BPos); BPos *= BinFactor; if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; } } NewV.Add(ValV.Last()); }
TStr TLocClustStat::ParamStr | ( | ) | const |
Definition at line 555 of file ncp.cpp.
References Alpha, Coverage, TPt< TRec >::Empty(), TStr::Fmt(), TUNGraph::GetEdges(), TInt::GetMegaStr(), TUNGraph::GetNodes(), Graph, KFac, KMax, KMin, and SizeFrac.
Referenced by ImposeNCP(), PlotBestClustDens(), PlotBoltzmanCurve(), PlotNCP(), PlotNCPModul(), PlotNCPScatter(), PlotNcpTop10(), PlotPhiDistr(), PlotPhiInOut(), and SaveTxtInfo().
{ if (Graph.Empty()) { return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac()); } else { return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g G(%d, %d)", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac(), Graph->GetNodes(), Graph->GetEdges()); } }
void TLocClustStat::PlotBestClustDens | ( | TStr | OutFNm, |
TStr | Desc = TStr() |
||
) | const |
Definition at line 689 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), TGnuPlot::AddCmd(), TGnuPlot::AddPlot(), BestCutH, TStr::CStr(), TStr::Empty(), TStr::Fmt(), THash< TKey, TDat, THashFunc >::GetKey(), gpsLog10XY, gpwLines, THash< TKey, TDat, THashFunc >::Len(), MakeExpBins(), ParamStr(), TGnuPlot::RunGnuPlot(), TGnuPlot::SavePng(), TGnuPlot::SetScale(), TGnuPlot::SetXYLabel(), and TInt::Val.
{ if (Desc.Empty()) { Desc = OutFNm; } const int len = BestCutH.Len(); TFltPrV CutV(len, 0), EdgesV(len, 0), PhiV(len,0); for (int i = 0; i < BestCutH.Len(); i++) { const double size = BestCutH.GetKey(i).Val; CutV.Add(TFltPr(size, BestCutH[i].GetCutSz())); EdgesV.Add(TFltPr(size, BestCutH[i].GetEdges())); PhiV.Add(TFltPr(size, BestCutH[i].GetPhi())); } TGnuPlot GP("cutEdges."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); TFltPrV NewV; TLocClustStat::MakeExpBins(EdgesV, NewV); GP.AddPlot(NewV, gpwLines, "Edges inside"); TLocClustStat::MakeExpBins(CutV, NewV); GP.AddPlot(NewV, gpwLines, "Cut edges"); TLocClustStat::MakeExpBins(PhiV, NewV); GP.AddPlot(NewV, gpwLines, "Conductance"); GP.SetXYLabel("Cluster size", "Edges"); GP.SetScale(gpsLog10XY); GP.AddCmd("set logscale xyy2 10"); GP.AddCmd("set y2label \"Conductance\""); GP.SavePng(); system(TStr(TStr("replace_all.py cutEdges.")+OutFNm+".plt \"title \\\"Conductance\" \"axis x1y2 title \\\"Conductance\"").CStr()); GP.RunGnuPlot(); }
void TLocClustStat::PlotBoltzmanCurve | ( | const TStr & | OutFNm, |
TStr | Desc = TStr() |
||
) | const |
Definition at line 747 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::AddDat(), TGnuPlot::AddPlot(), TStr::CStr(), TStr::Fmt(), GetBoltzmanCurveStat(), GetCurveStat(), TUNGraph::GetEdges(), THash< TKey, TDat, THashFunc >::GetKey(), TUNGraph::GetNodes(), TVec< TFlt >::GetV(), gpsLog, gpsLog10XY, gpwLines, Graph, THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), ParamStr(), TGnuPlot::PlotValCntH(), TGnuPlot::PlotValV(), TMath::Round(), TGnuPlot::SavePng(), TGnuPlot::SetScale(), TGnuPlot::SetXYLabel(), and SizePhiH.
Referenced by TLocClust::PlotNCP().
{ TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1; GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1); TVec<TFltPrV> NcpV; //const TFltV TempV = TFltV::GetV(0.05, 0.01, 0.1, 10, 100, 1000); const TFltV TempV = TFltV::GetV(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1); GetBoltzmanCurveStat(TempV, NcpV); TGnuPlot GP("ncp."+OutFNm+"-B", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); GP.AddPlot(MeanV1, gpwLines, "Avg", "lw 1"); GP.AddPlot(MedV1, gpwLines, "Median", "lw 1"); GP.AddPlot(Dec1V1, gpwLines, "Decile-1", "lw 1"); for (int t = 0; t < TempV.Len(); t++) { GP.AddPlot(NcpV[t], gpwLines, TStr::Fmt("Temp %g", TempV[t]()), "lw 1"); } GP.SetScale(gpsLog10XY); GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); GP.SavePng(); // TFltPrV SzNClustV; int kCnt=1; for (int i = 0; i < SizePhiH.Len(); i++) { const int K = SizePhiH.GetKey(i); const TFltV& PhiV = SizePhiH[i]; SzNClustV.Add(TFltPr(K, PhiV.Len())); if (K>2 && (pow(10.0, (int)log10((double)K))==K || (K >=10 && K<=100 && K%10==0) || (K >=100 && K<=1000 && K%100==0))) { THash<TFlt, TFlt> CntH; for (int p = 0; p < PhiV.Len(); p++) { CntH.AddDat(TMath::Round(log10(PhiV[p].Val),1)) += 1; } TGnuPlot::PlotValCntH(CntH, TStr::Fmt("ncp.%s-c%02d", OutFNm.CStr(), kCnt++), TStr::Fmt("%s. K=%d, NPieces=%d, %s", Desc.CStr(), K, PhiV.Len(), ParamStr().CStr()), "log_10 {/Symbol \\F} (conductance)", TStr::Fmt("Number of pieces of such conductance, K=%d, NPieces=%d)", K, PhiV.Len())); } } TGnuPlot::PlotValV(SzNClustV, "ncp."+OutFNm+"-ClSz", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), "k (cluster size)", "c(k) (number of extracted clusters)", gpsLog); }
void TLocClustStat::PlotNCP | ( | const TStr & | OutFNm, |
TStr | Desc = TStr() |
||
) | const |
Definition at line 564 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), TGnuPlot::AddPlot(), BagOfWhiskerV, BestWhisk, TStr::CStr(), TStr::Empty(), TVec< TVal, TSizeTy >::Empty(), TStr::Fmt(), GetCurveStat(), TUNGraph::GetNodes(), gpsLog10XY, gpwLines, gpwPoints, Graph, ParamStr(), TGnuPlot::SavePng(), TGnuPlot::SetScale(), TGnuPlot::SetXRange(), and TGnuPlot::SetXYLabel().
{ if (Desc.Empty()) { Desc = OutFNm; } TFltPrV MeanV, MedV, Dec1V, MinV, MaxV; GetCurveStat(MeanV, MedV, Dec1V, MinV, MaxV); TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); if (! MaxV.Empty()) { GP.AddPlot((MaxV), gpwLines, "MAX"); } if (! MedV.Empty()) { GP.AddPlot((MedV), gpwLines, "MEDIAN"); } if (! MeanV.Empty()) { GP.AddPlot((MeanV), gpwLines, "MEAN"); } if (! Dec1V.Empty()) { GP.AddPlot((Dec1V), gpwLines, "1-st DECILE"); } if (! MinV.Empty()) { GP.AddPlot((MinV), gpwLines, "MIN"); //GP.AddPlot(MinV, gpwLines, "smooth MIN", "lw 2 smooth bezier"); } if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, "Whiskers", "lw 1"); TFltPrV BestWhiskV; BestWhiskV.Add(TFltPr(BestWhisk)); GP.AddPlot(BestWhiskV, gpwPoints, "Best whisker", "pt 5 ps 2"); } GP.SetScale(gpsLog10XY); GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); GP.SetXRange(1, Graph->GetNodes()/2); GP.SavePng(); }
void TLocClustStat::PlotNCPModul | ( | const TStr & | OutFNm, |
TStr | Desc = TStr() |
||
) | const |
Definition at line 589 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), TGnuPlot::AddPlot(), BestCutH, TStr::CStr(), TStr::Empty(), TVec< TVal, TSizeTy >::Empty(), TStr::Fmt(), TUNGraph::GetEdges(), THash< TKey, TDat, THashFunc >::GetKey(), TUNGraph::GetNodes(), gpsLog10XY, gpwLines, Graph, THash< TKey, TDat, THashFunc >::Len(), MakeExpBins(), ParamStr(), TGnuPlot::SavePng(), TGnuPlot::SetScale(), TGnuPlot::SetXRange(), TGnuPlot::SetXYLabel(), TVec< TVal, TSizeTy >::Sort(), TVec< TVal, TSizeTy >::Swap(), and TInt::Val.
{ if (Desc.Empty()) { Desc = OutFNm; } TFltPrV MinV, BucketV; const int GEdges = Graph->GetEdges(); for (int i = 0; i < BestCutH.Len(); i++) { MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetModular(GEdges))); } MinV.Sort(); TLocClustStat::MakeExpBins(MinV, BucketV); MinV.Swap(BucketV); TGnuPlot GP("ncpMod."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); if (! MinV.Empty()) { GP.AddPlot((MinV), gpwLines, "MIN"); } GP.SetScale(gpsLog10XY); GP.SetXYLabel("k (number of nodes in the cluster)", "Q (modularity)"); GP.SetXRange(1, Graph->GetNodes()/2); GP.SavePng(); }
void TLocClustStat::PlotNCPScatter | ( | const TStr & | OutFNm, |
TStr | Desc = TStr() |
||
) | const |
Definition at line 715 of file ncp.cpp.
References THashSet< TKey, THashFunc >::AddKey(), TGnuPlot::AddPlot(), TStr::CStr(), THash< TKey, TDat, THashFunc >::Empty(), TStr::Empty(), TStr::Fmt(), THash< TKey, TDat, THashFunc >::GetKey(), THashSet< TKey, THashFunc >::GetKeyV(), TUNGraph::GetNodes(), gpsLog10XY, gpwPoints, Graph, IAssertR, THash< TKey, TDat, THashFunc >::Len(), TVec< TVal, TSizeTy >::Len(), ParamStr(), TMath::Round(), TGnuPlot::SavePng(), TGnuPlot::SetScale(), TGnuPlot::SetXRange(), TGnuPlot::SetXYLabel(), and SizePhiH.
{ if (Desc.Empty()) { Desc = OutFNm; } THashSet<TFltPr> PhiSzH; IAssertR(! SizePhiH.Empty(), "Set SaveAllCond=true in TLocClustStat::Run()"); for (int k = 0; k < SizePhiH.Len(); k++) { const int K = SizePhiH.GetKey(k); const TFltV& PhiV = SizePhiH[k]; for (int p = 0; p < PhiV.Len(); p++) { PhiSzH.AddKey(TFltPr(K, TMath::Round(PhiV[p], 3))); } } TFltPrV PhiSzV; PhiSzH.GetKeyV(PhiSzV); TGnuPlot GP("ncpScatter."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); GP.AddPlot(PhiSzV, gpwPoints, "", "pt 5 ps 0.2"); GP.SetScale(gpsLog10XY); GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); GP.SetXRange(1, Graph->GetNodes()/2); GP.SavePng(); }
void TLocClustStat::PlotNcpTop10 | ( | const TStr & | OutFNm, |
TStr | Desc, | ||
const int & | TakeMinN | ||
) | const |
Definition at line 607 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), TGnuPlot::AddPlot(), BinFactor, TStr::CStr(), TStr::Empty(), FindBestCut(), TStr::Fmt(), TUNGraph::GetNodes(), gpsLog10XY, gpwLines, Graph, TVec< TVal, TSizeTy >::Len(), ParamStr(), TGnuPlot::SavePng(), TGnuPlot::SetScale(), TGnuPlot::SetXRange(), TGnuPlot::SetXYLabel(), and SweepsV.
{ if (Desc.Empty()) { Desc = OutFNm; } const double BinFactor = 1.05; double BinPos=1; int PrevBPos=1, CurBPos=1, CutId; bool AddSome; TVec<TFltPrV> Curves(TakeMinN); while (true) { PrevBPos = CurBPos; BinPos *= BinFactor; // do exponential binning CurBPos = (int) floor(BinPos); if (CurBPos == PrevBPos) { CurBPos=PrevBPos+1; BinPos=CurBPos; } const int Nodes = CurBPos; TIntSet TabuNIdSet(Graph->GetNodes()); AddSome = false; for (int t = 0; t < TakeMinN; t++) { const double Phi = FindBestCut(Nodes, TabuNIdSet, CutId); if (CutId == -1) { break; } Curves[t].Add(TFltPr(Nodes, Phi)); for (int n = 0; n < Nodes; n++) { TabuNIdSet.AddKey(SweepsV[CutId].NId(n)); } AddSome = true; } if (! AddSome) { break; } } TGnuPlot GP("ncpTop."+OutFNm, TStr::Fmt("%s. Top disjoint clusters. Take:%d, %s", Desc.CStr(), TakeMinN, ParamStr().CStr())); for (int i = 0; i < Curves.Len(); i++) { GP.AddPlot(Curves[i], gpwLines, TStr::Fmt("MIN %d", i+1), "lw 1"); } //if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, " Whiskers", "lw 2"); } GP.SetScale(gpsLog10XY); GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); GP.SetXRange(1, Graph->GetNodes()/2); GP.SavePng(); }
void TLocClustStat::PlotPhiDistr | ( | const int & | CmtySz, |
const TStr & | OutFNm, | ||
TStr | Desc = TStr() |
||
) | const |
Definition at line 735 of file ncp.cpp.
References THash< TKey, TDat, THashFunc >::AddDat(), TStr::CStr(), THash< TKey, TDat, THashFunc >::Empty(), TStr::Fmt(), THash< TKey, TDat, THashFunc >::GetDat(), IAssert, ParamStr(), TGnuPlot::PlotValCntH(), TMath::Round(), and SizePhiH.
{ IAssert(! SizePhiH.Empty()); const TFltV& PhiV = SizePhiH.GetDat(CmtySz); THash<TFlt, TInt> PhiCntH; for (int i = 0; i < PhiV.Len(); i++) { const double Buck = TMath::Round(PhiV[i], 3); PhiCntH.AddDat(Buck) += 1; } TGnuPlot::PlotValCntH(PhiCntH, TStr::Fmt("phiDistr%03d.%s", CmtySz, OutFNm.CStr()), TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), "{/Symbol \\F} (conductance)", "Count"); }
void TLocClustStat::PlotPhiInOut | ( | const TStr & | OutFNm, |
TStr | Desc = TStr() |
||
) | const |
Definition at line 643 of file ncp.cpp.
References TGnuPlot::AddPlot(), Alpha, BestCutH, Coverage, TStr::CStr(), TLocClustStat::TCutInfo::CutNIdV, THash< TKey, TDat, THashFunc >::Empty(), TPt< TRec >::Empty(), FindBestCut(), TStr::Fmt(), TLocClustStat::TCutInfo::GetCutSz(), TLocClustStat::TCutInfo::GetEdges(), TLocClustStat::TCutInfo::GetNodes(), TUNGraph::GetNodes(), TLocClustStat::TCutInfo::GetPhi(), TSnap::GetSubGraph(), TLocClustStat::TCutInfo::GetVol(), gpsLog10XY, gpwLines, Graph, IAssert, KFac, KMax, THash< TKey, TDat, THashFunc >::Len(), ParamStr(), TMath::Round(), Run(), TGnuPlot::SavePng(), TGnuPlot::SetScale(), TGnuPlot::SetXRange(), TGnuPlot::SetXYLabel(), and SizeFrac.
{ IAssert(! BestCutH.Empty() && ! Graph.Empty()); TFltPrV PhiInV, PhiBoundV, PhiRatV; FILE *F = fopen(TStr::Fmt("phiInOut.%s-all.tab", OutFNm.CStr()).CStr(), "wt"); fprintf(F, "#Nodes\tEdges\tVol\tCut\tPhi\tInNodes\tInEdges\tInVol\tInCut\tInPhi\n"); TLocClustStat ClustStat2(Alpha, 1, KMax, KFac, Coverage, SizeFrac); const double IdxFact = 1.05; int curIdx=1, prevIdx=1; while (curIdx <= BestCutH.Len()) { const TCutInfo& CutInfo = BestCutH[curIdx-1]; if (CutInfo.GetNodes() > 1) { PUNGraph ClustG = TSnap::GetSubGraph(Graph, CutInfo.CutNIdV); ClustStat2.Run(ClustG); const TCutInfo& InCut = ClustStat2.FindBestCut(-1); PhiInV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi())); PhiBoundV.Add(TFltPr(CutInfo.GetNodes(), CutInfo.GetPhi())); PhiRatV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()/CutInfo.GetPhi())); fprintf(F, "%d\t%d\t%d\t%d\t%f\t%d\t%d\t%d\t%d\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), CutInfo.GetVol(), CutInfo.GetCutSz(), CutInfo.GetPhi(), InCut.GetNodes(), InCut.GetEdges(), InCut.GetVol(), InCut.GetCutSz(), InCut.GetPhi()); fflush(F); } prevIdx = curIdx; curIdx = (int) TMath::Round(double(curIdx)*IdxFact); if (prevIdx == curIdx) { curIdx++; } } fclose(F); { TGnuPlot GP("phiInOut."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); GP.AddPlot(PhiBoundV, gpwLines, "CUT conductance", "lw 1"); GP.AddPlot(PhiInV, gpwLines, "INSIDE conductance", "lw 1"); //GP.AddPlot(PhiRatV, gpwLines, "RATIO (inside/boundary)", "lw 1"); GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY); GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)"); //GP.AddCmd("set logscale xyy2 10"); //GP.AddCmd("set y2label \"Conductance ratio (inside/boundary -- higher better)\""); GP.SavePng(); } //system(TStr(TStr("replace_all.py phiInOut.")+OutFNm+".plt \"title \\\"RATIO (inside/boundary)\" \"axis x1y2 title \\\"RATIO (inside/boundary)\"").CStr()); //GP.RunGnuPlot(); { TGnuPlot GP("phiInOutRat."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr())); GP.AddPlot(PhiRatV, gpwLines, "RATIO (Inside / Boundary)", "lw 1"); GP.SetXRange(1, Graph->GetNodes()/2); GP.SetScale(gpsLog10XY); GP.SetXYLabel("Nodes", "Conductance ratio (inside/boundary) -- higher better"); GP.SavePng(); } }
void TLocClustStat::Run | ( | const PUNGraph & | Graph, |
const bool & | SaveAllSweeps = false , |
||
const bool & | SaveAllCond = false , |
||
const bool & | SaveBestNodesAtK = false |
||
) |
pow(1.1, cnt)
Definition at line 300 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), THash< TKey, TDat, THashFunc >::AddDat(), THashSet< TKey, THashFunc >::AddKey(), Alpha, BestCut, BestCutH, TLocClust::BestCutNodes(), BinFactor, Clr(), TVec< TVal, TSizeTy >::Clr(), THashSet< TKey, THashFunc >::Clr(), Coverage, TLocClustStat::TCutInfo::CutNIdV, TLocClustStat::TCutInfo::CutSz, TLocClustStat::TCutInfo::Edges, THashSet< TKey, THashFunc >::Empty(), TLocClust::FindBestCut(), TExeTm::GetCurTm(), TLocClust::GetCut(), TLocClust::GetCutPhi(), TLocClust::GetCutVol(), THash< TKey, TDat, THashFunc >::GetDat(), TUNGraph::GetEdges(), TSnap::GetMxWcc(), TLocClust::GetNIdV(), TUNGraph::GetNodes(), TLocClust::GetPhi(), TLocClustStat::TCutInfo::GetPhi(), TLocClust::GetPhiV(), TUNGraph::GetRndNId(), TExeTm::GetStr(), TVec< TVal, TSizeTy >::GetSubValV(), TLocClust::GetVol(), Graph, IAssert, THash< TKey, TDat, THashFunc >::IsKey(), THashSet< TKey, THashFunc >::IsKey(), KFac, KMax, KMin, TLocClust::Len(), THash< TKey, TDat, THashFunc >::Len(), Mega, TFlt::Mx, TMath::Round(), SizeBucketSet, SizeFrac, SizePhiH, THash< TKey, TDat, THashFunc >::Sort(), TVec< TVal, TSizeTy >::Sort(), THash< TKey, TDat, THashFunc >::SortByKey(), SweepsV, TExeTm::Tick(), and TLocClust::Verbose.
Referenced by TLocClust::PlotNCP(), and PlotPhiInOut().
{ Graph = TSnap::GetMxWcc(_Graph); const int Nodes = Graph->GetNodes(); const int Edges = Graph->GetEdges(); printf("\nLocal clustering: Graph(%d, %d)\n", Nodes, Edges); printf(" Alpha: %g\n", Alpha()); printf(" K: %d -- %g -- %dm\n", KMin(), KFac(), int(KMax/Mega(1))); printf(" Coverage: %d\n", Coverage()); printf(" SizeFrac: %g\n\n", SizeFrac()); TExeTm TotTm; Clr(); TLocClust Clust(Graph, Alpha); BestCut.CutNIdV.Clr(false); BestCut.CutSz=-1; BestCut.Edges=-1; double BestPhi = TFlt::Mx; int prevK=-1; bool NextDone=false; if (SaveBestNodesAtK) { // fill buckets (only store nodes in clusters for sizes in SizeBucketSet) SizeBucketSet.Clr(); double PrevBPos = 1, BPos = 1; while (BPos <= Mega(100)) { PrevBPos = (uint) floor(BPos); BPos *= BinFactor; if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; } SizeBucketSet.AddKey(int(floor(BPos) - 1)); } } for (int K = KMin, cnt=1; K < KMax; K = int(KFac * double(K))+1, cnt++) { if (K == prevK) { K++; } prevK = K; const int Runs = 2 + int(Coverage * floor(double(Graph->GetEdges()) / double(K))); printf("%d] K: %d, %d runs\n", cnt+1, K, Runs); if (NextDone) { break; } // done if (K+1 > 2*Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; } //if (K+1 > Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; } TExeTm ExeTm, IterTm; double MeanSz=0.0, MeanVol=0.0, Count=0.0; for (int run = 0; run < Runs; run++) { const int SeedNId = Graph->GetRndNId(); IterTm.Tick(); Clust.FindBestCut(SeedNId, K, SizeFrac); const int Sz = Clust.BestCutNodes(); const int Vol = Clust.GetCutVol(); const double Phi = TMath::Round(Clust.GetCutPhi(), 4); if (Sz == 0 || Vol == 0 || Phi == 0) { continue; } MeanSz+=Sz; MeanVol+=Vol; Count+= 1; if (SaveAllSweeps) { // save the full cut set and conductances for all trials SweepsV.Add(TNodeSweep(SeedNId, Clust.GetNIdV(), Clust.GetPhiV())); } int SAtBestPhi=-1; for (int s = 0; s < Clust.Len(); s++) { const int size = s+1; const int cut = Clust.GetCut(s); const int edges = (Clust.GetVol(s)-cut)/2; const double phi = Clust.GetPhi(s); if (( Clust.GetPhi(s) != double(cut)/double(2*edges+cut))) { continue; } // more than half of the edges IAssert((Clust.GetVol(s) - cut) % 2 == 0); IAssert(phi == double(cut)/double(2*edges+cut)); IAssert(phi >= 1.0/double((1+s)*s+1)); // TCutInfo CutInfo(size, edges, cut); Clust.GetNIdV().GetSubValV(0, size-1, CutInfo.CutNIdV); //double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0; //CutInfo.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake); //const double phi = MxFrac; if (BestPhi >= phi) { BestPhi = phi; BestCut = TCutInfo(size, edges, cut); SAtBestPhi = s; } //bool TAKE=false; if (! BestCutH.IsKey(size)) { TAKE=true; } //else { BestCutH.GetDat(size).GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake); if (MxFrac >= phi) { TAKE = true; } } // if (TAKE) { if (! BestCutH.IsKey(size) || BestCutH.GetDat(size).GetPhi() >= phi) { //new best cut (size, edges inside and nodes) BestCutH.AddDat(size, TCutInfo(size, edges, cut)); // for every size store best cut (NIds inside the cut) if (SaveBestNodesAtK) { // store node ids in best community for each size k if (! SizeBucketSet.Empty() && ! SizeBucketSet.IsKey(size)) { continue; } // only save best clusters at SizeBucketSet Clust.GetNIdV().GetSubValV(0, size-1, BestCutH.GetDat(size).CutNIdV); } } if (SaveAllCond) { // for every size store all conductances SizePhiH.AddDat(size).Add(phi); } } if (SAtBestPhi != -1) { // take nodes in best cluster const int size = SAtBestPhi+1; Clust.GetNIdV().GetSubValV(0, size-1, BestCut.CutNIdV); } if (TLocClust::Verbose) { printf("."); if (run % 50 == 0) { printf("\r %d / %d \r", run, Runs); } } } if (TLocClust::Verbose) { printf("\r %d / %d: %s \n", Runs, Runs, ExeTm.GetStr()); } MeanSz/=Count; MeanVol/=Count; printf(" Graph(%d, %d) ", Nodes, Edges); printf(" mean: sz: %.2f vol: %.2f [%s] %s\n", MeanSz, MeanVol, ExeTm.GetStr(), TExeTm::GetCurTm()); } SizePhiH.SortByKey(); for (int k = 0; k < SizePhiH.Len(); k++) { SizePhiH[k].Sort(); } SweepsV.Sort(); BestCutH.SortByKey(); printf("DONE. Graph(%d, %d): Total run time: %s\n\n", Nodes, Edges, TotTm.GetStr()); }
void TLocClustStat::Save | ( | TSOut & | SOut | ) | const |
Definition at line 280 of file ncp.cpp.
References Alpha, Coverage, KFac, KMax, KMin, TInt::Save(), TFlt::Save(), and SizeFrac.
{ // parameters Alpha.Save(SOut); SizeFrac.Save(SOut); KFac.Save(SOut); KMin.Save(SOut); KMax.Save(SOut); Coverage.Save(SOut); // cluster stat //SweepsV.Save(SOut); //SizePhiH.Save(SOut); }
void TLocClustStat::SaveTxtInfo | ( | const TStr & | OutFNmPref, |
const TStr & | Desc, | ||
const bool & | SetMaxAt1 | ||
) | const |
Definition at line 841 of file ncp.cpp.
References TVec< TVal, TSizeTy >::Add(), BestCutH, TStr::CStr(), TLocClustStat::TCutInfo::CutSz, TLocClustStat::TCutInfo::Edges, TStr::Fmt(), TLocClustStat::TCutInfo::GetCutRatio(), TUNGraph::GetEdges(), TLocClustStat::TCutInfo::GetExpansion(), TLocClustStat::TCutInfo::GetExpEdgesIn(), TLocClustStat::TCutInfo::GetFracDegOut(), TLocClustStat::TCutInfo::GetIntDens(), GetKNodeCut(), TLocClustStat::TCutInfo::GetModRat(), TLocClustStat::TCutInfo::GetModular(), TUNGraph::GetNodes(), TLocClustStat::TCutInfo::GetNormCut(), TLocClustStat::TCutInfo::GetPhi(), TExeTm::GetStr(), gpsLog10XY, gpwLines, gpwPoints, Graph, THash< TKey, TDat, THashFunc >::IsKey(), TVec< TVal, TSizeTy >::Len(), THashSet< TKey, THashFunc >::Len(), TMath::Mx(), TLocClustStat::TCutInfo::Nodes, ParamStr(), and SizeBucketSet.
Referenced by TLocClust::PlotNCP().
{ printf("Save text info..."); TExeTm ExeTm; const int GNodes = Graph->GetNodes(); const int GEdges = Graph->GetEdges(); TVec<TFltV> ColV(17); double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0; // double PrevBPos = 1, BPos = 1; for (int i = 0; i < SizeBucketSet.Len(); i++) { if ( !BestCutH.IsKey(SizeBucketSet[i])) { continue; } const TLocClustStat::TCutInfo& C = GetKNodeCut(SizeBucketSet[i]); C.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake); ColV[0].Add(C.Nodes()); ColV[1].Add(C.Edges()); ColV[2].Add(C.CutSz()); ColV[3].Add(C.GetPhi()); ColV[4].Add(C.GetExpansion()); ColV[5].Add(C.GetIntDens()); ColV[6].Add(C.GetCutRatio(GNodes)); ColV[7].Add(C.GetNormCut(GEdges)); ColV[8].Add(MxFrac); ColV[9].Add(AvgFrac); ColV[10].Add(MedianFrac); ColV[11].Add(Pct9Frac); ColV[12].Add(Flake); ColV[13].Add(double(2.0*C.Edges)); ColV[14].Add(C.GetExpEdgesIn(GEdges)); ColV[15].Add(C.GetModular(GEdges)); ColV[16].Add(C.GetModRat(GEdges)); printf("."); } // normalize so that maximum value is at 1 if (SetMaxAt1) { for (int c = 1; c < ColV.Len(); c++) { double MaxVal=1e-10; for (int r = 0; r < ColV[c].Len(); r++) { MaxVal = TMath::Mx(MaxVal, ColV[c][r]()); } for (int r = 0; r < ColV[c].Len(); r++) { ColV[c][r] /= MaxVal; } } } // save const TStr DatFNm = TStr::Fmt("ncp.%s.INFO.tab", OutFNmPref.CStr()); FILE *F = fopen(DatFNm.CStr(), "wt"); fprintf(F, "# %s %s\n", Desc.CStr(), ParamStr().CStr()); fprintf(F, "#N_inside\tE_inside\tE_across\tConductance\tExpansion\tIntDensity\tCutRatio\tNormCut\tMx_FracDegOut\tAvg_FDO\tMedian_FDO\t90Pct_FDO\tFlake_FDO\tVolume\tExpVolume\tModularity\tModRatio\n"); for (int r = 0; r < ColV[0].Len(); r++) { fprintf(F, "%g", ColV[0][r]()); for (int c = 1; c < ColV.Len(); c++) { fprintf(F, "\t%g", ColV[c][r]()); } fprintf(F, "\n"); } fclose(F); printf("[%s]\n", ExeTm.GetStr()); TGnuPlot GP(TStr::Fmt("ncp.%s.All", OutFNmPref.CStr()), TStr::Fmt("%s %s", Desc.CStr(), ParamStr().CStr())); GP.AddPlot(DatFNm, 1, 4, gpwLines, "Conductance", "lw 2"); GP.AddPlot(DatFNm, 1, 5, gpwPoints, "Expansion", "pt 3"); GP.AddPlot(DatFNm, 1, 6, gpwPoints, "Internal Density", "pt 5 ps 0.8"); GP.AddPlot(DatFNm, 1, 7, gpwPoints, "Cut Ratio", "pt 6"); GP.AddPlot(DatFNm, 1, 8, gpwPoints, "Normalized Cut", "pt 7"); GP.AddPlot(DatFNm, 1, 9, gpwPoints, "Maximum FDO", "pt 9"); GP.AddPlot(DatFNm, 1, 10, gpwPoints, "Avg FDO", "pt 11"); GP.AddPlot(DatFNm, 1, 13, gpwPoints, "Flake FDO", "pt 13"); GP.SetScale(gpsLog10XY); GP.SetXYLabel("k (number of nodes in the cluster)", "Normalized community score (lower is better)"); GP.SavePng(); }
void TLocClustStat::SetGraph | ( | const PUNGraph & | GraphPt | ) | [inline] |
TFlt TLocClustStat::Alpha [private] |
Definition at line 178 of file ncp.h.
Referenced by ParamStr(), PlotPhiInOut(), Run(), and Save().
Definition at line 186 of file ncp.h.
Referenced by AddBagOfWhiskers(), Clr(), GetBagOfWhiskersV(), ImposeNCP(), and PlotNCP().
Definition at line 188 of file ncp.h.
Referenced by GetBestCut(), and Run().
Definition at line 185 of file ncp.h.
Referenced by AddToBestCutH(), Clr(), FindBestCut(), GetCurveStat(), GetCutN(), GetCuts(), GetKNodeCut(), PlotBestClustDens(), PlotNCPModul(), PlotPhiInOut(), Run(), and SaveTxtInfo().
Definition at line 187 of file ncp.h.
Referenced by AddBagOfWhiskers(), BestWhiskEdges(), BestWhiskNodes(), BestWhiskPhi(), GetBestWhisk(), ImposeNCP(), and PlotNCP().
double TLocClustStat::BinFactor = 1.01 [static] |
Definition at line 127 of file ncp.h.
Referenced by MakeExpBins(), PlotNcpTop10(), and Run().
TInt TLocClustStat::Coverage [private] |
Definition at line 179 of file ncp.h.
Referenced by ParamStr(), PlotPhiInOut(), Run(), and Save().
PUNGraph TLocClustStat::Graph [private] |
Definition at line 180 of file ncp.h.
Referenced by AddBagOfWhiskers(), AddCut(), ImposeNCP(), ParamStr(), PlotBoltzmanCurve(), PlotNCP(), PlotNCPModul(), PlotNCPScatter(), PlotNcpTop10(), PlotPhiInOut(), Run(), SaveTxtInfo(), and SetGraph().
TFlt TLocClustStat::KFac [private] |
Definition at line 178 of file ncp.h.
Referenced by ParamStr(), PlotPhiInOut(), Run(), and Save().
TInt TLocClustStat::KMax [private] |
Definition at line 179 of file ncp.h.
Referenced by ParamStr(), PlotPhiInOut(), Run(), and Save().
TInt TLocClustStat::KMin [private] |
Definition at line 179 of file ncp.h.
Referenced by ParamStr(), Run(), and Save().
Definition at line 189 of file ncp.h.
Referenced by Run(), and SaveTxtInfo().
TFlt TLocClustStat::SizeFrac [private] |
Definition at line 178 of file ncp.h.
Referenced by ParamStr(), PlotPhiInOut(), Run(), and Save().
Definition at line 184 of file ncp.h.
Referenced by AddCut(), BagOfWhiskers2(), Clr(), GetBoltzmanCurveStat(), GetCurveStat(), PlotBoltzmanCurve(), PlotNCPScatter(), PlotPhiDistr(), and Run().
Definition at line 183 of file ncp.h.
Referenced by Clr(), FindBestCut(), PlotNcpTop10(), and Run().
int TLocClustStat::TakeValAt [static] |