SNAP Library , User Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
TLocClustStat Class Reference

#include <ncp.h>

List of all members.

Classes

class  TCutInfo
class  TNodeSweep

Public Member Functions

 TLocClustStat (const double &_Alpha, const int &_KMin, const int &_KMax, const double &_KFac, const int &_Coverage, const double &_SizeFrac)
void Save (TSOut &SOut) const
void Clr ()
void SetGraph (const PUNGraph &GraphPt)
void Run (const PUNGraph &Graph, const bool &SaveAllSweeps=false, const bool &SaveAllCond=false, const bool &SaveBestNodesAtK=false)
void AddBagOfWhiskers ()
void AddCut (const TIntV &NIdV)
void AddCut (const int &ClustSz, const double &Phi)
void AddToBestCutH (const PUNGraph &Graph, const TIntV &NIdV, const bool &AddBestCond=true)
double FindBestCut (const int &Nodes, const TIntSet &TabuNIdSet, int &BestCutId) const
const TCutInfoFindBestCut (const int &Nodes=-1) const
double FindBestCut (const int &Nodes, TIntV &ClustNIdV) const
int FindBestCut (const int &Nodes, int &Vol, int &Cut, double &Phi) const
const TCutInfoGetBestCut () const
int GetCuts () const
const TCutInfoGetKNodeCut (const int &Nodes) const
const TCutInfoGetCutN (const int &N) const
int BestWhiskNodes () const
int BestWhiskEdges () const
double BestWhiskPhi () const
TFltPr GetBestWhisk () const
const TFltPrVGetBagOfWhiskersV () const
void GetCurveStat (TFltPrV &MeanV, TFltPrV &MedV, TFltPrV &Dec1V, TFltPrV &MinV, TFltPrV &MaxV) const
void GetBoltzmanCurveStat (const TFltV &TempV, TVec< TFltPrV > &NcpV) const
TStr ParamStr () const
void PlotNCP (const TStr &OutFNm, TStr Desc=TStr()) const
void PlotNCPModul (const TStr &OutFNm, TStr Desc=TStr()) const
void PlotNcpTop10 (const TStr &OutFNm, TStr Desc, const int &TakeMinN) const
void PlotPhiInOut (const TStr &OutFNm, TStr Desc=TStr()) const
void PlotBestClustDens (TStr OutFNm, TStr Desc=TStr()) const
void PlotNCPScatter (const TStr &OutFNm, TStr Desc=TStr()) const
void PlotPhiDistr (const int &CmtySz, const TStr &OutFNm, TStr Desc=TStr()) const
void PlotBoltzmanCurve (const TStr &OutFNm, TStr Desc=TStr()) const
void ImposeNCP (const TLocClustStat &LcStat2, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2) const
void ImposeNCP (const TLocClustStat &LcStat2, const TLocClustStat &LcStat3, TStr OutFNm, TStr Desc, TStr Desc1, TStr Desc2, TStr Desc3) const
void SaveTxtInfo (const TStr &OutFNmPref, const TStr &Desc, const bool &SetMaxAt1) const

Static Public Member Functions

static void BagOfWhiskers (const PUNGraph &Graph, TFltPrV &SizePhiV, TFltPr &BestWhisk)
static void BagOfWhiskers2 (const PUNGraph &Graph, TFltPrV &SizePhiV)
static void MakeExpBins (const TFltPrV &ValV, TFltPrV &NewV)
static void MakeExpBins (const TFltV &ValV, TFltV &NewV)

Public Attributes

TVec< TNodeSweepSweepsV
THash< TInt, TFltVSizePhiH
THash< TInt, TCutInfoBestCutH
TFltPrV BagOfWhiskerV
TFltPr BestWhisk
TCutInfo BestCut
TIntSet SizeBucketSet

Static Public Attributes

static double BinFactor = 1.01
static int TakeValAt

Private Attributes

TFlt Alpha
TFlt SizeFrac
TFlt KFac
TInt KMin
TInt KMax
TInt Coverage
PUNGraph Graph

Detailed Description

Local-Spectral-Clustering statistics of a given Graph.

Definition at line 125 of file ncp.h.


Constructor & Destructor Documentation

TLocClustStat::TLocClustStat ( const double &  _Alpha,
const int &  _KMin,
const int &  _KMax,
const double &  _KFac,
const int &  _Coverage,
const double &  _SizeFrac 
)

Definition at line 275 of file ncp.cpp.

                                                 : Alpha(_Alpha), SizeFrac(_SizeFrac), KFac(_KFac),
  KMin(_KMin), KMax(_KMax), Coverage(_Coverage) {
}

Member Function Documentation

Definition at line 404 of file ncp.cpp.

                                     {
  BagOfWhiskers(Graph, BagOfWhiskerV, BestWhisk); // slow but exact
  //BagOfWhiskers2(Graph, BagOfWhiskerV);
}
void TLocClustStat::AddCut ( const TIntV NIdV)

Definition at line 410 of file ncp.cpp.

                                            {
  int Vol, Cut; double Phi;
  TLocClust::GetCutStat(Graph, NIdV, Vol, Cut, Phi, Graph->GetEdges());
  SizePhiH.AddDat(NIdV.Len()).Add(Phi);
}
void TLocClustStat::AddCut ( const int &  ClustSz,
const double &  Phi 
)

Definition at line 417 of file ncp.cpp.

                                                                {
  SizePhiH.AddDat(ClustSz).Add(Phi);
}
void TLocClustStat::AddToBestCutH ( const PUNGraph Graph,
const TIntV NIdV,
const bool &  AddBestCond = true 
)

Definition at line 422 of file ncp.cpp.

                                                                                                   {
  const int K = NIdV.Len();
  TCutInfo CutInfo(Graph, NIdV, true);
  printf("new: %d\t%d\t%d\t%f\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), 
    CutInfo.GetCutSz(), CutInfo.GetPhi(), CutInfo.GetModular(Graph->GetEdges()));
  if (! BestCutH.IsKey(K)) { BestCutH.AddDat(K, CutInfo);  return; }
  TCutInfo& CurCut = BestCutH.GetDat(K);
  // keep the cluster with best conductance
  if (AddBestCond && CurCut.GetPhi() > CutInfo.GetPhi()) { CurCut=CutInfo; }
  // keep the cluster with best modularity
  //const int E = Graph->GetEdges();  
  //if (! AddBestCond && CurCut.GetModular(E) < CutInfo.GetModular(E)) { CurCut=CutInfo; }
}
void TLocClustStat::BagOfWhiskers ( const PUNGraph Graph,
TFltPrV SizePhiV,
TFltPr BestWhisk 
) [static]

Definition at line 901 of file ncp.cpp.

                                                                                            {
  TCnComV Cn1ComV;
  TSnap::Get1CnCom(Graph, Cn1ComV);
  TIntPrV SzVolV;
  int MxSize=0;
  if (Cn1ComV.Empty()) { printf("** No bridges\n"); SizePhiV.Clr();  return; }
  //  Graph->SaveEdgeList("g-vol.txt");  TGraphViz::Plot(Graph, gvlNeato, "g-vol.gif");  Fail; } IAssert(vol <= sz*(sz-1));
  printf("  1-connected components: %d\n", Cn1ComV.Len());
  MaxWhisk = TFltPr(1,1);
  for (int c = 0; c < Cn1ComV.Len(); c++) {
    const TIntV& NIdV = Cn1ComV[c].NIdV;
    const int sz = NIdV.Len();
    if (sz < 2) { continue; }
    int vol = 0; // volume is the size of degrees
    for (int n = 0; n < sz; n++) {
      vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
    SzVolV.Add(TIntPr(sz, vol));
    MxSize += sz;
    if (1.0/double(vol) < MaxWhisk.Val2) { MaxWhisk=TFltPr(NIdV.Len(), 1.0/double(vol)); }
  }
  SzVolV.Sort(false);
  // compose clusters
  THash<TInt, TIntSet> ItemSetH(MxSize, true);
  THash<TInt, TInt> VolH(MxSize, true);
  THash<TInt, TFlt> CostH(MxSize, true);
  ItemSetH.AddKey(0);  VolH.AddKey(0);
  TExeTm ExeTm;
  // exact up to size 1000
  for (int size = 2; size <= TMath::Mn(MxSize, 1000); size++) {
    for (int item = 0; item <SzVolV.Len(); item++) {
      const int smallSz = size-SzVolV[item].Val1;
      if (ItemSetH.IsKey(smallSz)) {
        const TIntSet& SmallSet = ItemSetH.GetDat(smallSz);
        if (SmallSet.IsKey(item)) { continue; }
        const int SmallVol = VolH.GetDat(smallSz);
        // combine smaller nad new solution
        const double curCost = CostH.IsKey(size) ? double(CostH.GetDat(size)) : double(10e10);
        const double newCost = double(SmallSet.Len()+1) / double(SmallVol+SzVolV[item].Val2);
        if (curCost < newCost) { continue; }
        VolH.AddDat(size, SmallVol+SzVolV[item].Val2);
        ItemSetH.AddDat(size, SmallSet);
        ItemSetH.GetDat(size).AddKey(item);
        CostH.AddDat(size, newCost);
      }
    }
    if (VolH.IsKey(size) && size%100==0) {
      printf("\rSize: %d/%d: vol: %d,  items: %d/%d [%s]", size, MxSize, VolH.GetDat(size).Val,
        ItemSetH.GetDat(size).Len(), SzVolV.Len(), ExeTm.GetStr());
    }
  }
  // add sizes larger than 1000
  printf("\nAdding sizes > 1000 nodes...");
  int partSz=0; double partVol=0.0;
  for (int i = 0; i < SzVolV.Len(); i++) {
    partSz += SzVolV[i].Val1();
    partVol += SzVolV[i].Val2();
    if (partSz < 1000) { continue; }
    const double curCost = CostH.IsKey(partSz) ? double(CostH.GetDat(partSz)) : double(10e10);
    const double partPhi = double(i+1)/partVol;
    if (partPhi < curCost) {
      CostH.AddDat(partSz, partPhi);
    }
  }
  VolH.SortByKey();
  CostH.SortByKey();
  SizePhiV.Gen(CostH.Len(), 0);
  SizePhiV.Add(TFltPr(1, 1));
  for (int s = 0; s < CostH.Len(); s++) {
    const int size = CostH.GetKey(s);
    const double cond = CostH[s]; //double(ItemSetH.GetDat(size).Len())/double(VolH[s]);
    SizePhiV.Add(TFltPr(size, cond));
  }
  printf("done\n");
}
void TLocClustStat::BagOfWhiskers2 ( const PUNGraph Graph,
TFltPrV SizePhiV 
) [static]

Definition at line 977 of file ncp.cpp.

                                                                           {
  TCnComV Cn1ComV;
  TSnap::Get1CnCom(Graph, Cn1ComV);
  TIntPrV SzVolV;
  int MxSize=0, TotVol=0;
  if (Cn1ComV.Empty()) { printf("** No bridges\n");  SizePhiV.Clr();  return; }
  printf("  1-connected components: %d\n", Cn1ComV.Len());
  for (int c = 0; c < Cn1ComV.Len(); c++) {
    const TIntV& NIdV = Cn1ComV[c].NIdV;
    const int sz = NIdV.Len();
    if (sz < 2) { continue; }
    int vol = 0; // volume is the size of degrees
    for (int n = 0; n < sz; n++) {
      vol += Graph->GetNI(NIdV[n]).GetOutDeg(); }
    SzVolV.Add(TIntPr(sz, vol));
    MxSize += sz;  TotVol += vol;
  }
  printf("  Total size: %d\t Total vol: %d\n", MxSize, TotVol);
  SzVolV.Sort(false);
  // compose clusters
  THash<TFlt, TFlt> SizePhiH(MxSize, true);
  for (int i = 0; i < SzVolV.Len(); i++) {
    const int Sz = SzVolV[i].Val1();
    const double Phi = 1.0/double(SzVolV[i].Val2());
    if ((! SizePhiH.IsKey(Sz)) || SizePhiH.GetDat(Sz) > Phi) {
      SizePhiH.AddDat(Sz, Phi);  }
  }
  double partSz=0.0, partVol=0.0;
  for (int i = 0; i < SzVolV.Len(); i++) {
    partSz += SzVolV[i].Val1();
    partVol += SzVolV[i].Val2();
    const double partPhi = double(i+1)/partVol;
    if ((! SizePhiH.IsKey(partSz)) || partPhi < SizePhiH.GetDat(partSz)) {
      SizePhiH.AddDat(partSz, partPhi); }
  }
  SizePhiV.Gen(SizePhiH.Len()+1, 0);
  SizePhiV.Add(TFltPr(1, 1));
  SizePhiH.SortByKey();
  for (int s = 0; s < SizePhiH.Len(); s++) {
    SizePhiV.Add(TFltPr(SizePhiH.GetKey(s), SizePhiH[s]));
  }
}
int TLocClustStat::BestWhiskEdges ( ) const [inline]

Definition at line 212 of file ncp.h.

{ return (int)TMath::Round(1.0/BestWhisk.Val2.Val)/2-1; }
int TLocClustStat::BestWhiskNodes ( ) const [inline]

Definition at line 211 of file ncp.h.

{ return int(BestWhisk.Val1.Val); }
double TLocClustStat::BestWhiskPhi ( ) const [inline]

Definition at line 213 of file ncp.h.

{ return BestWhisk.Val2; }

Definition at line 293 of file ncp.cpp.

                        {
  SweepsV.Clr(false);      // node ids and conductances for each run of local clustering
  SizePhiH.Clr(false);     // conductances at cluster size K
  BestCutH.Clr(false);     // best cut (min conductance) at size K
  BagOfWhiskerV.Clr(false); // (Size, Conductance) of bag of whiskers clusters
}
double TLocClustStat::FindBestCut ( const int &  Nodes,
const TIntSet TabuNIdSet,
int &  BestCutId 
) const

Definition at line 469 of file ncp.cpp.

                                                                                                   {
  double bestPhi = 1;
  BestCutId = -1;
  bool Tabu;
  IAssert(! SweepsV.Empty());
  for (int c = 0; c < SweepsV.Len(); c++) {
    const TNodeSweep& Sweep = SweepsV[c];
    if (Sweep.Len() < Nodes) { continue; }
    if (Sweep.Phi(Nodes-1) > bestPhi) { continue; }
    Tabu = false;
    for (int i = 0; i < Nodes; i++) {
      if (TabuNIdSet.IsKey(Sweep.NId(i))) { Tabu=true; break; }
    }
    if (Tabu) { continue; }
    bestPhi = Sweep.Phi(Nodes-1);
    BestCutId = c;
  }
  return bestPhi;
}
const TLocClustStat::TCutInfo & TLocClustStat::FindBestCut ( const int &  Nodes = -1) const

Definition at line 436 of file ncp.cpp.

                                                                              {
  double bestPhi = 1;
  int CutId = -1;
  if (Nodes > 0) {
    IAssert(BestCutH.IsKey(Nodes));
    return BestCutH.GetDat(Nodes);
  } else {
    for (int n = 0; n < BestCutH.Len(); n++) {
      if (BestCutH[n].GetPhi() <= bestPhi) {
        bestPhi = BestCutH[n].GetPhi();  CutId = n; }
    }
    IAssert(CutId != -1);
    IAssert(! BestCutH[CutId].CutNIdV.Empty());
    return BestCutH[CutId];
  }
}
double TLocClustStat::FindBestCut ( const int &  Nodes,
TIntV ClustNIdV 
) const

Definition at line 454 of file ncp.cpp.

                                                                          {
  const TCutInfo& Cut = FindBestCut(Nodes);
  ClustNIdV = Cut.CutNIdV;
  return Cut.GetPhi();
}
int TLocClustStat::FindBestCut ( const int &  Nodes,
int &  Vol,
int &  Cut,
double &  Phi 
) const

Definition at line 460 of file ncp.cpp.

                                                                                      {
  const TCutInfo& CutInfo = FindBestCut(Nodes);
  Vol = CutInfo.GetVol();
  Cut = CutInfo.GetCutSz();
  Phi = CutInfo.GetPhi();
  return CutInfo.GetNodes();
}
const TFltPrV& TLocClustStat::GetBagOfWhiskersV ( ) const [inline]

Definition at line 215 of file ncp.h.

{ return BagOfWhiskerV; }
const TCutInfo& TLocClustStat::GetBestCut ( ) const [inline]

Definition at line 206 of file ncp.h.

{ return BestCut; } // overall best cut
TFltPr TLocClustStat::GetBestWhisk ( ) const [inline]

Definition at line 214 of file ncp.h.

{ return BestWhisk; }
void TLocClustStat::GetBoltzmanCurveStat ( const TFltV TempV,
TVec< TFltPrV > &  NcpV 
) const

Definition at line 529 of file ncp.cpp.

                                                                                      {
  IAssert(! SizePhiH.Empty()); // stores conductances of all little clusters
  NcpV.Gen(TempV.Len());
  TFltPrV BucketV;
  for (int t = 0; t < TempV.Len(); t++) {
    const double T = TempV[t];
    for (int i = 0; i < SizePhiH.Len(); i++) {
      const double X = SizePhiH.GetKey(i).Val;  IAssert(X >= 1.0);
      const TFltV& PhiV = SizePhiH[i];
      double V = 0.0, SumW = 0.0;
      for (int j = 0; j < PhiV.Len(); j++) { 
        V += PhiV[j] * exp(-PhiV[j]/T); 
        SumW += exp(-PhiV[j]/T); 
      }
      //V /= PhiV.Len();
      V /= SumW;
      NcpV[t].Add(TFltPr(X, V));
    }
    TLocClustStat::MakeExpBins(NcpV[t], BucketV);  NcpV[t].Swap(BucketV);
    // normalize to start at (1,1)
    //for (int i = 0; i < NcpV[t].Len(); i++) {
    //  NcpV[t][i].Val2 /= NcpV[t][0].Val2;
    //}
  }
}
void TLocClustStat::GetCurveStat ( TFltPrV MeanV,
TFltPrV MedV,
TFltPrV Dec1V,
TFltPrV MinV,
TFltPrV MaxV 
) const

Definition at line 490 of file ncp.cpp.

                                                                                                                  {
  TFltPrV BucketV;
  MeanV.Clr(false); MedV.Clr(false); Dec1V.Clr(false); MinV.Clr(false); MaxV.Clr(false);
  if (! SizePhiH.Empty()) { // stores conductances of all little clusters
    const THash<TInt, TFltV>& KvH = SizePhiH; 
    for (int i = 0; i < KvH.Len(); i++) {
      const double X = KvH.GetKey(i).Val;  IAssert(X >= 1.0);
      const TFltV& YVec = KvH[i];
      TMom Mom;
      for (int j = 0; j < YVec.Len(); j++) { Mom.Add(YVec[j]); }
      Mom.Def();
      /*IAssert(Mom.GetMean()>=0 && Mom.GetMean()<=1);
      IAssert(Mom.GetMedian()>=0 && Mom.GetMedian()<=1);
      IAssert(Mom.GetDecile(1)>=0 && Mom.GetDecile(1)<=1);
      IAssert(Mom.GetMn()>=0 && Mom.GetMn()<=1);
      IAssert(Mom.GetMx()>=0 && Mom.GetMx()<=1);*/
      MeanV.Add(TFltPr(X, Mom.GetMean()));
      MedV.Add(TFltPr(X, Mom.GetMedian()));
      Dec1V.Add(TFltPr(X, Mom.GetDecile(1)));
      MinV.Add(TFltPr(X, Mom.GetMn()));
      MaxV.Add(TFltPr(X, Mom.GetMx()));
    }
    MeanV.Sort(); MedV.Sort();  Dec1V.Sort();  MinV.Sort();  MaxV.Sort();
    // create log buckets (makes plots smaller and smoother)
    TLocClustStat::MakeExpBins(MeanV, BucketV);  MeanV.Swap(BucketV);
    TLocClustStat::MakeExpBins(MedV, BucketV);  MedV.Swap(BucketV);
    TLocClustStat::MakeExpBins(Dec1V, BucketV);  Dec1V.Swap(BucketV);
    TLocClustStat::MakeExpBins(MinV, BucketV);  MinV.Swap(BucketV);
    TLocClustStat::MakeExpBins(MaxV, BucketV);  MaxV.Swap(BucketV);
  } else {
    //const int GEdges = Graph->GetEdges();
    for (int i = 0; i < BestCutH.Len(); i++) {
      MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetPhi()));
    }
    MinV.Sort();
    TLocClustStat::MakeExpBins(MinV, BucketV);  MinV.Swap(BucketV);
  }
}
const TCutInfo& TLocClustStat::GetCutN ( const int &  N) const [inline]

Definition at line 209 of file ncp.h.

{ return BestCutH[N]; }
int TLocClustStat::GetCuts ( ) const [inline]

Definition at line 207 of file ncp.h.

{ return BestCutH.Len(); }
const TCutInfo& TLocClustStat::GetKNodeCut ( const int &  Nodes) const [inline]

Definition at line 208 of file ncp.h.

{ return BestCutH.GetDat(Nodes); }
void TLocClustStat::ImposeNCP ( const TLocClustStat LcStat2,
TStr  OutFNm,
TStr  Desc,
TStr  Desc1,
TStr  Desc2 
) const

Definition at line 786 of file ncp.cpp.

                                                                                                                {
  if (Desc.Empty()) { Desc = OutFNm; }
  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
  TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
  LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
  // plot
  TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
  if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc1.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1"); }
  if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc2.CStr(), LcStat2.Graph->GetNodes(), LcStat2.Graph->GetEdges()), "lw 1"); }
  if (! BagOfWhiskerV.Empty()) { 
    GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); 
    TFltPrV BestWhiskV;  BestWhiskV.Add(BestWhisk);
    GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
  if (! LcStat2.BagOfWhiskerV.Empty()) {
    GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); 
    TFltPrV BestWhiskV;  BestWhiskV.Add(LcStat2.BestWhisk);
    GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
  //GP.SetXRange(1, Graph->GetNodes()/2);
  GP.SavePng();
}
void TLocClustStat::ImposeNCP ( const TLocClustStat LcStat2,
const TLocClustStat LcStat3,
TStr  OutFNm,
TStr  Desc,
TStr  Desc1,
TStr  Desc2,
TStr  Desc3 
) const

Definition at line 810 of file ncp.cpp.

                                                                                                                                                          {
  if (Desc.Empty()) { Desc = OutFNm; }
  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
  TFltPrV MeanV2, MedV2, Dec1V2, MinV2, MaxV2;
  TFltPrV MeanV3, MedV3, Dec1V3, MinV3, MaxV3;
  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
  LcStat2.GetCurveStat(MeanV2, MedV2, Dec1V2, MinV2, MaxV2);
  LcStat3.GetCurveStat(MeanV3, MedV3, Dec1V3, MinV3, MaxV3);
  // plot
  TGnuPlot GP("phiTR."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
  if (! MinV1.Empty()) { GP.AddPlot(MinV1, gpwLines, Desc1+" MIN", "lw 1"); }
  if (! MinV2.Empty()) { GP.AddPlot(MinV2, gpwLines, Desc2+" MIN", "lw 1"); }
  if (! MinV3.Empty()) { GP.AddPlot(MinV3, gpwLines, Desc3+" MIN", "lw 1"); }
  if (! BagOfWhiskerV.Empty()) { 
    GP.AddPlot(BagOfWhiskerV, gpwLines, Desc1+" whiskers", "lw 1"); 
    TFltPrV BestWhiskV;  BestWhiskV.Add(BestWhisk);
    GP.AddPlot(BestWhiskV, gpwPoints, Desc1+" Best whisker", "pt 5 ps 2"); }
  if (! LcStat2.BagOfWhiskerV.Empty()) {
    GP.AddPlot(LcStat2.BagOfWhiskerV, gpwLines, Desc2+" whiskers", "lw 1"); 
    TFltPrV BestWhiskV;  BestWhiskV.Add(LcStat2.BestWhisk);
    GP.AddPlot(BestWhiskV, gpwPoints, Desc2+" Best whisker", "pt 5 ps 2"); }
  if (! LcStat3.BagOfWhiskerV.Empty()) { 
    GP.AddPlot(LcStat3.BagOfWhiskerV, gpwLines, Desc3+" whiskers", "lw 1"); 
    TFltPrV BestWhiskV;  BestWhiskV.Add(LcStat3.BestWhisk);
    GP.AddPlot(BestWhiskV, gpwPoints, Desc3+" Best whisker", "pt 5 ps 2"); }
  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
  GP.SetXRange(1, Graph->GetNodes()/2);
  GP.SavePng();
}
void TLocClustStat::MakeExpBins ( const TFltPrV ValV,
TFltPrV NewV 
) [static]

Definition at line 1020 of file ncp.cpp.

                                                                  {
  if (ValV.Empty()) { NewV.Clr(false); return; }
  NewV.Gen(1000, 0);
  double PrevBPos = 1, BPos = 1;
  int i = 0;
  while (BPos <= ValV.Last().Val1) {
    //if (TakeValAt == 1) {
    //  while (i < ValV.Len() && ValV[i].Val1 <= BPos) { i++; }
    //  NewV.Add(ValV[i-1]); }
    //else if (TakeValAt == 2) {
    int MinI=-1;  double MinCnt=TFlt::Mx;
    while (i < ValV.Len() && ValV[i].Val1 <= BPos) {
      if (ValV[i].Val2 < MinCnt) { MinCnt=ValV[i].Val2; MinI=i; } i++; }
    if (MinI>=0 && MinI<ValV.Len()) {
      NewV.Add(ValV[MinI]); }
    PrevBPos = (uint) floor(BPos);
    BPos *= BinFactor;
    if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
  }
  NewV.Add(ValV.Last());
}
void TLocClustStat::MakeExpBins ( const TFltV ValV,
TFltV NewV 
) [static]

Definition at line 1042 of file ncp.cpp.

                                                              {
  if (ValV.Empty()) { NewV.Clr(false); return; }
  NewV.Gen(1000, 0);
  double PrevBPos = 1, BPos = 1;
  int i = 1;
  NewV.Add(ValV[0]);
  while (BPos <= ValV.Len()) {
    int MinI=-1;  double MinCnt=TFlt::Mx;
    while (i < ValV.Len() && i <= BPos) {
      if (ValV[i] < MinCnt) { MinCnt=ValV[i]; MinI=i; } i++; }
    if (MinI>=0 && MinI<ValV.Len()) {
      NewV.Add(ValV[MinI]); }
    PrevBPos = (uint) floor(BPos);
    BPos *= BinFactor;
    if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
  }
  NewV.Add(ValV.Last());
}

Definition at line 555 of file ncp.cpp.

                                   {
  if (Graph.Empty()) {
    return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac()); }
  else {
    return TStr::Fmt("A=%g, K=%d-%g-%s, Cvr=%d, SzFrc=%g G(%d, %d)", Alpha(), KMin(), KFac(), TInt::GetMegaStr(KMax()).CStr(), Coverage(), SizeFrac(),
      Graph->GetNodes(), Graph->GetEdges());
  }
}
void TLocClustStat::PlotBestClustDens ( TStr  OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 689 of file ncp.cpp.

                                                                  {
  if (Desc.Empty()) { Desc = OutFNm; }
  const int len = BestCutH.Len();
  TFltPrV CutV(len, 0), EdgesV(len, 0), PhiV(len,0);
  for (int i = 0; i < BestCutH.Len(); i++) {
    const double size = BestCutH.GetKey(i).Val;
    CutV.Add(TFltPr(size, BestCutH[i].GetCutSz()));
    EdgesV.Add(TFltPr(size, BestCutH[i].GetEdges()));
    PhiV.Add(TFltPr(size, BestCutH[i].GetPhi()));
  }
  TGnuPlot GP("cutEdges."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
  TFltPrV NewV;  TLocClustStat::MakeExpBins(EdgesV, NewV);
  GP.AddPlot(NewV, gpwLines, "Edges inside");
  TLocClustStat::MakeExpBins(CutV, NewV);
  GP.AddPlot(NewV, gpwLines, "Cut edges");
  TLocClustStat::MakeExpBins(PhiV, NewV);
  GP.AddPlot(NewV, gpwLines, "Conductance");
  GP.SetXYLabel("Cluster size", "Edges"); GP.SetScale(gpsLog10XY);
  GP.AddCmd("set logscale xyy2 10");
  GP.AddCmd("set y2label \"Conductance\"");
  GP.SavePng();
  system(TStr(TStr("replace_all.py cutEdges.")+OutFNm+".plt \"title \\\"Conductance\" \"axis x1y2 title \\\"Conductance\"").CStr());
  GP.RunGnuPlot();
}
void TLocClustStat::PlotBoltzmanCurve ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 747 of file ncp.cpp.

                                                                         {
  TFltPrV MeanV1, MedV1, Dec1V1, MinV1, MaxV1;
  GetCurveStat(MeanV1, MedV1, Dec1V1, MinV1, MaxV1);
  TVec<TFltPrV> NcpV;
  //const TFltV TempV = TFltV::GetV(0.05, 0.01, 0.1, 10, 100, 1000);
  const TFltV TempV = TFltV::GetV(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.5, 1);
  GetBoltzmanCurveStat(TempV, NcpV);
  TGnuPlot GP("ncp."+OutFNm+"-B", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));  
  GP.AddPlot(MinV1, gpwLines, TStr::Fmt("%s MIN (%d, %d)", Desc.CStr(), Graph->GetNodes(), Graph->GetEdges()), "lw 1");
  GP.AddPlot(MeanV1, gpwLines, "Avg", "lw 1");
  GP.AddPlot(MedV1, gpwLines, "Median", "lw 1");
  GP.AddPlot(Dec1V1, gpwLines, "Decile-1", "lw 1");
  for (int t = 0; t < TempV.Len(); t++) {
    GP.AddPlot(NcpV[t], gpwLines, TStr::Fmt("Temp %g", TempV[t]()), "lw 1");
  }
  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
  GP.SavePng();
  //
  TFltPrV SzNClustV;
  int kCnt=1;
  for (int i = 0; i < SizePhiH.Len(); i++) {
    const int K = SizePhiH.GetKey(i);
    const TFltV& PhiV = SizePhiH[i];
    SzNClustV.Add(TFltPr(K, PhiV.Len()));
    if (K>2 && (pow(10.0, (int)log10((double)K))==K || (K >=10 && K<=100 && K%10==0) || (K >=100 && K<=1000 && K%100==0))) {
      THash<TFlt, TFlt> CntH;
      for (int p = 0; p < PhiV.Len(); p++) {
        CntH.AddDat(TMath::Round(log10(PhiV[p].Val),1)) += 1;
      }
      TGnuPlot::PlotValCntH(CntH, TStr::Fmt("ncp.%s-c%02d", OutFNm.CStr(), kCnt++), TStr::Fmt("%s. K=%d, NPieces=%d, %s", 
        Desc.CStr(), K, PhiV.Len(), ParamStr().CStr()), "log_10 {/Symbol \\F} (conductance)", 
        TStr::Fmt("Number of pieces of such conductance, K=%d, NPieces=%d)", K, PhiV.Len()));
    }
  }
  TGnuPlot::PlotValV(SzNClustV, "ncp."+OutFNm+"-ClSz", TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()),
    "k (cluster size)", "c(k) (number of extracted clusters)", gpsLog);
}
void TLocClustStat::PlotNCP ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 564 of file ncp.cpp.

                                                               {
  if (Desc.Empty()) { Desc = OutFNm; }
  TFltPrV MeanV, MedV, Dec1V, MinV, MaxV;
  GetCurveStat(MeanV, MedV, Dec1V, MinV, MaxV);
  TGnuPlot GP("ncp."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
  if (! MaxV.Empty()) { GP.AddPlot((MaxV), gpwLines, "MAX"); }
  if (! MedV.Empty()) { GP.AddPlot((MedV), gpwLines, "MEDIAN"); }
  if (! MeanV.Empty()) { GP.AddPlot((MeanV), gpwLines, "MEAN"); }
  if (! Dec1V.Empty()) { GP.AddPlot((Dec1V), gpwLines, "1-st DECILE"); }
  if (! MinV.Empty()) { 
    GP.AddPlot((MinV), gpwLines, "MIN"); 
    //GP.AddPlot(MinV, gpwLines, "smooth MIN", "lw 2 smooth bezier");
  }
  if (! BagOfWhiskerV.Empty()) {
    GP.AddPlot(BagOfWhiskerV, gpwLines, "Whiskers", "lw 1"); 
    TFltPrV BestWhiskV;  BestWhiskV.Add(TFltPr(BestWhisk));
    GP.AddPlot(BestWhiskV, gpwPoints, "Best whisker", "pt 5 ps 2");
  }
  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
  GP.SetXRange(1, Graph->GetNodes()/2);
  GP.SavePng();
}
void TLocClustStat::PlotNCPModul ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 589 of file ncp.cpp.

                                                                    {
  if (Desc.Empty()) { Desc = OutFNm; }
  TFltPrV MinV, BucketV;
  const int GEdges = Graph->GetEdges();
  for (int i = 0; i < BestCutH.Len(); i++) {
    MinV.Add(TFltPr(BestCutH.GetKey(i).Val, BestCutH[i].GetModular(GEdges))); }
  MinV.Sort();
  TLocClustStat::MakeExpBins(MinV, BucketV);  MinV.Swap(BucketV);
  TGnuPlot GP("ncpMod."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
  if (! MinV.Empty()) { 
    GP.AddPlot((MinV), gpwLines, "MIN"); }
  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("k (number of nodes in the cluster)", "Q (modularity)");
  GP.SetXRange(1, Graph->GetNodes()/2);
  GP.SavePng();
}
void TLocClustStat::PlotNCPScatter ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 715 of file ncp.cpp.

                                                                      {
  if (Desc.Empty()) { Desc = OutFNm; }
  THashSet<TFltPr> PhiSzH;
  IAssertR(! SizePhiH.Empty(), "Set SaveAllCond=true in TLocClustStat::Run()");
  for (int k = 0; k < SizePhiH.Len(); k++) {
    const int K = SizePhiH.GetKey(k);
    const TFltV& PhiV = SizePhiH[k];
    for (int p = 0; p < PhiV.Len(); p++) {
      PhiSzH.AddKey(TFltPr(K, TMath::Round(PhiV[p], 3))); }
  }
  TFltPrV PhiSzV;  PhiSzH.GetKeyV(PhiSzV);
  TGnuPlot GP("ncpScatter."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
  GP.AddPlot(PhiSzV, gpwPoints, "", "pt 5 ps 0.2");
  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
  GP.SetXRange(1, Graph->GetNodes()/2);
  GP.SavePng();
}
void TLocClustStat::PlotNcpTop10 ( const TStr OutFNm,
TStr  Desc,
const int &  TakeMinN 
) const

Definition at line 607 of file ncp.cpp.

                                                                                         {
  if (Desc.Empty()) { Desc = OutFNm; }
  const double BinFactor = 1.05;
  double BinPos=1;
  int PrevBPos=1, CurBPos=1, CutId;
  bool AddSome;
  TVec<TFltPrV> Curves(TakeMinN);
  while (true) {
    PrevBPos = CurBPos;
    BinPos *= BinFactor; // do exponential binning
    CurBPos = (int) floor(BinPos);
    if (CurBPos == PrevBPos) { CurBPos=PrevBPos+1;  BinPos=CurBPos; }
    const int Nodes = CurBPos;
    TIntSet TabuNIdSet(Graph->GetNodes());
    AddSome = false;
    for (int t = 0; t < TakeMinN; t++) {
      const double Phi = FindBestCut(Nodes, TabuNIdSet, CutId);
      if (CutId == -1) { break; }
      Curves[t].Add(TFltPr(Nodes, Phi));
      for (int n = 0; n < Nodes; n++) {
        TabuNIdSet.AddKey(SweepsV[CutId].NId(n)); }
      AddSome = true;
    }
    if (! AddSome) { break; }
  }
  TGnuPlot GP("ncpTop."+OutFNm, TStr::Fmt("%s. Top disjoint clusters. Take:%d, %s", Desc.CStr(), TakeMinN, ParamStr().CStr()));
  for (int i = 0; i < Curves.Len(); i++) {
    GP.AddPlot(Curves[i], gpwLines, TStr::Fmt("MIN %d", i+1), "lw 1"); }
  //if (! BagOfWhiskerV.Empty()) { GP.AddPlot(BagOfWhiskerV, gpwLines, " Whiskers", "lw 2"); }
  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
  GP.SetXRange(1, Graph->GetNodes()/2);
  GP.SavePng();
}
void TLocClustStat::PlotPhiDistr ( const int &  CmtySz,
const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 735 of file ncp.cpp.

                                                                                       {
  IAssert(! SizePhiH.Empty());
  const TFltV& PhiV = SizePhiH.GetDat(CmtySz);
  THash<TFlt, TInt> PhiCntH;
  for (int i = 0; i < PhiV.Len(); i++) {
    const double Buck =  TMath::Round(PhiV[i], 3);
    PhiCntH.AddDat(Buck) += 1;
  }
  TGnuPlot::PlotValCntH(PhiCntH, TStr::Fmt("phiDistr%03d.%s", CmtySz, OutFNm.CStr()), 
    TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()), "{/Symbol \\F} (conductance)",  "Count");
}
void TLocClustStat::PlotPhiInOut ( const TStr OutFNm,
TStr  Desc = TStr() 
) const

Definition at line 643 of file ncp.cpp.

                                                                    {
  IAssert(! BestCutH.Empty() && ! Graph.Empty());
  TFltPrV PhiInV, PhiBoundV, PhiRatV;
  FILE *F = fopen(TStr::Fmt("phiInOut.%s-all.tab", OutFNm.CStr()).CStr(), "wt");
  fprintf(F, "#Nodes\tEdges\tVol\tCut\tPhi\tInNodes\tInEdges\tInVol\tInCut\tInPhi\n");
  TLocClustStat ClustStat2(Alpha, 1, KMax, KFac, Coverage, SizeFrac);
  const double IdxFact = 1.05;
  int curIdx=1, prevIdx=1;
  while (curIdx <= BestCutH.Len()) {
    const TCutInfo& CutInfo = BestCutH[curIdx-1];
    if (CutInfo.GetNodes() > 1) {
      PUNGraph ClustG = TSnap::GetSubGraph(Graph, CutInfo.CutNIdV);
      ClustStat2.Run(ClustG);
      const TCutInfo& InCut = ClustStat2.FindBestCut(-1);
      PhiInV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()));
      PhiBoundV.Add(TFltPr(CutInfo.GetNodes(), CutInfo.GetPhi()));
      PhiRatV.Add(TFltPr(CutInfo.GetNodes(), InCut.GetPhi()/CutInfo.GetPhi()));
      fprintf(F, "%d\t%d\t%d\t%d\t%f\t%d\t%d\t%d\t%d\t%f\n", CutInfo.GetNodes(), CutInfo.GetEdges(), CutInfo.GetVol(),
        CutInfo.GetCutSz(), CutInfo.GetPhi(),  InCut.GetNodes(), InCut.GetEdges(), InCut.GetVol(), InCut.GetCutSz(), InCut.GetPhi());
      fflush(F);
    }
    prevIdx = curIdx;
    curIdx = (int) TMath::Round(double(curIdx)*IdxFact);
    if (prevIdx == curIdx) { curIdx++; }
  }
  fclose(F);
  { TGnuPlot GP("phiInOut."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
  GP.AddPlot(PhiBoundV, gpwLines, "CUT conductance", "lw 1");
  GP.AddPlot(PhiInV, gpwLines, "INSIDE conductance", "lw 1");
  //GP.AddPlot(PhiRatV, gpwLines, "RATIO (inside/boundary)", "lw 1");
  GP.SetXRange(1, Graph->GetNodes()/2);  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("k (number of nodes in the cluster)", "{/Symbol \\F} (conductance)");
  //GP.AddCmd("set logscale xyy2 10");
  //GP.AddCmd("set y2label \"Conductance ratio (inside/boundary -- higher better)\"");
  GP.SavePng(); }
  //system(TStr(TStr("replace_all.py phiInOut.")+OutFNm+".plt \"title \\\"RATIO (inside/boundary)\" \"axis x1y2 title \\\"RATIO (inside/boundary)\"").CStr());
  //GP.RunGnuPlot();
  { TGnuPlot GP("phiInOutRat."+OutFNm, TStr::Fmt("%s. %s", Desc.CStr(), ParamStr().CStr()));
  GP.AddPlot(PhiRatV, gpwLines, "RATIO (Inside / Boundary)", "lw 1");
  GP.SetXRange(1, Graph->GetNodes()/2);  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("Nodes", "Conductance ratio (inside/boundary) -- higher better");
  GP.SavePng(); }
}
void TLocClustStat::Run ( const PUNGraph Graph,
const bool &  SaveAllSweeps = false,
const bool &  SaveAllCond = false,
const bool &  SaveBestNodesAtK = false 
)

pow(1.1, cnt)

Definition at line 300 of file ncp.cpp.

                                                                                                                                {
  Graph = TSnap::GetMxWcc(_Graph);
  const int Nodes = Graph->GetNodes();
  const int Edges = Graph->GetEdges();
  printf("\nLocal clustering: Graph(%d, %d)\n", Nodes, Edges);
  printf("  Alpha:    %g\n", Alpha());
  printf("  K: %d -- %g -- %dm\n", KMin(), KFac(), int(KMax/Mega(1)));
  printf("  Coverage: %d\n", Coverage());
  printf("  SizeFrac: %g\n\n", SizeFrac());
  TExeTm TotTm;
  Clr();
  TLocClust Clust(Graph, Alpha);
  BestCut.CutNIdV.Clr(false); 
  BestCut.CutSz=-1;  BestCut.Edges=-1;
  double BestPhi = TFlt::Mx;
  int prevK=-1;
  bool NextDone=false;
  if (SaveBestNodesAtK) { // fill buckets (only store nodes in clusters for sizes in SizeBucketSet)
    SizeBucketSet.Clr();
    double PrevBPos = 1, BPos = 1;
    while (BPos <= Mega(100)) {
      PrevBPos = (uint) floor(BPos);
      BPos *= BinFactor;
      if (floor(BPos) == PrevBPos) { BPos = PrevBPos + 1; }
      SizeBucketSet.AddKey(int(floor(BPos) - 1));
    }
  }
  for (int K = KMin, cnt=1; K < KMax; K = int(KFac * double(K))+1, cnt++) {
    if (K == prevK) { K++; } prevK = K;
    const int Runs = 2 + int(Coverage  * floor(double(Graph->GetEdges()) / double(K)));
    printf("%d] K: %d, %d runs\n", cnt+1, K, Runs);
    if (NextDone) { break; } // done
    if (K+1 > 2*Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
    //if (K+1 > Graph->GetEdges()) { K = Graph->GetEdges(); NextDone=true; }
    TExeTm ExeTm, IterTm;
    double MeanSz=0.0, MeanVol=0.0, Count=0.0;
    for (int run = 0; run < Runs; run++) {
      const int SeedNId = Graph->GetRndNId();  IterTm.Tick();
      Clust.FindBestCut(SeedNId, K, SizeFrac);
      const int Sz = Clust.BestCutNodes();
      const int Vol = Clust.GetCutVol();
      const double Phi = TMath::Round(Clust.GetCutPhi(), 4);
      if (Sz == 0 || Vol == 0 || Phi == 0) { continue; }
      MeanSz+=Sz;  MeanVol+=Vol;  Count+= 1;
      if (SaveAllSweeps) { // save the full cut set and conductances for all trials
        SweepsV.Add(TNodeSweep(SeedNId, Clust.GetNIdV(), Clust.GetPhiV())); }
      int SAtBestPhi=-1;
      for (int s = 0; s < Clust.Len(); s++) {
        const int size = s+1;
        const int cut = Clust.GetCut(s);
        const int edges = (Clust.GetVol(s)-cut)/2;
        const double phi = Clust.GetPhi(s);
        if (( Clust.GetPhi(s) != double(cut)/double(2*edges+cut))) { continue; } // more than half of the edges
        IAssert((Clust.GetVol(s) - cut) % 2 == 0);
        IAssert(phi == double(cut)/double(2*edges+cut));
        IAssert(phi >= 1.0/double((1+s)*s+1));
        // TCutInfo CutInfo(size, edges, cut); Clust.GetNIdV().GetSubValV(0, size-1, CutInfo.CutNIdV);
        //double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
        //CutInfo.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
        //const double phi = MxFrac;
        if (BestPhi >= phi) {
          BestPhi = phi;
          BestCut = TCutInfo(size, edges, cut);
          SAtBestPhi = s;
        }
        //bool TAKE=false;  if (! BestCutH.IsKey(size)) { TAKE=true; }
        //else { BestCutH.GetDat(size).GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);  if (MxFrac >= phi) { TAKE = true; } }
        // if (TAKE) {
        if (! BestCutH.IsKey(size) || BestCutH.GetDat(size).GetPhi() >= phi) { //new best cut (size, edges inside and nodes)
          BestCutH.AddDat(size, TCutInfo(size, edges, cut));  // for every size store best cut (NIds inside the cut)
          if (SaveBestNodesAtK) { // store node ids in best community for each size k
            if (! SizeBucketSet.Empty() && ! SizeBucketSet.IsKey(size)) { continue; } // only save best clusters at SizeBucketSet
            Clust.GetNIdV().GetSubValV(0, size-1, BestCutH.GetDat(size).CutNIdV); }
        }
        if (SaveAllCond) { // for every size store all conductances
          SizePhiH.AddDat(size).Add(phi); }
      }
      if (SAtBestPhi != -1) { // take nodes in best cluster
        const int size = SAtBestPhi+1;
        Clust.GetNIdV().GetSubValV(0, size-1, BestCut.CutNIdV); 
      }
      if (TLocClust::Verbose) {
        printf(".");
        if (run % 50 == 0) {
          printf("\r                                                   %d / %d \r", run, Runs); }
      }
    }
    if (TLocClust::Verbose) {
      printf("\r  %d / %d: %s                                                   \n", Runs, Runs, ExeTm.GetStr()); 
    }
    MeanSz/=Count;  MeanVol/=Count;
    printf("  Graph(%d, %d)  ", Nodes, Edges);
    printf("       mean:  sz: %.2f  vol: %.2f [%s] %s\n", MeanSz, MeanVol, ExeTm.GetStr(), TExeTm::GetCurTm());
  }
  SizePhiH.SortByKey();
  for (int k = 0; k < SizePhiH.Len(); k++) { 
    SizePhiH[k].Sort(); }
  SweepsV.Sort();
  BestCutH.SortByKey();
  printf("DONE. Graph(%d, %d): Total run time: %s\n\n", Nodes, Edges, TotTm.GetStr());
}
void TLocClustStat::Save ( TSOut SOut) const

Definition at line 280 of file ncp.cpp.

                                          {
  // parameters
  Alpha.Save(SOut);
  SizeFrac.Save(SOut);
  KFac.Save(SOut);
  KMin.Save(SOut);
  KMax.Save(SOut);
  Coverage.Save(SOut);
  // cluster stat
  //SweepsV.Save(SOut);
  //SizePhiH.Save(SOut);
}
void TLocClustStat::SaveTxtInfo ( const TStr OutFNmPref,
const TStr Desc,
const bool &  SetMaxAt1 
) const

Definition at line 841 of file ncp.cpp.

                                                                                                     {
  printf("Save text info...");
  TExeTm ExeTm;
  const int GNodes = Graph->GetNodes();
  const int GEdges = Graph->GetEdges();
  TVec<TFltV> ColV(17);
  double MxFrac=0, AvgFrac=0, MedianFrac=0, Pct9Frac=0, Flake=0;
  // double PrevBPos = 1, BPos = 1;
  for (int i = 0; i < SizeBucketSet.Len(); i++) {
    if ( !BestCutH.IsKey(SizeBucketSet[i])) { continue; }
    const TLocClustStat::TCutInfo& C = GetKNodeCut(SizeBucketSet[i]);
    C.GetFracDegOut(Graph, MxFrac, AvgFrac, MedianFrac, Pct9Frac, Flake);
    ColV[0].Add(C.Nodes());  ColV[1].Add(C.Edges()); 
    ColV[2].Add(C.CutSz());  ColV[3].Add(C.GetPhi());
    ColV[4].Add(C.GetExpansion());       ColV[5].Add(C.GetIntDens()); 
    ColV[6].Add(C.GetCutRatio(GNodes));  ColV[7].Add(C.GetNormCut(GEdges));
    ColV[8].Add(MxFrac);       ColV[9].Add(AvgFrac); 
    ColV[10].Add(MedianFrac);  ColV[11].Add(Pct9Frac);  ColV[12].Add(Flake);
    ColV[13].Add(double(2.0*C.Edges));  ColV[14].Add(C.GetExpEdgesIn(GEdges));
    ColV[15].Add(C.GetModular(GEdges)); ColV[16].Add(C.GetModRat(GEdges));
    printf(".");
  }
  // normalize so that maximum value is at 1
  if (SetMaxAt1) {
    for (int c = 1; c < ColV.Len(); c++) {
      double MaxVal=1e-10;
      for (int r = 0; r < ColV[c].Len(); r++) { MaxVal = TMath::Mx(MaxVal, ColV[c][r]()); }
      for (int r = 0; r < ColV[c].Len(); r++) { ColV[c][r] /= MaxVal; }
    }
  }
  // save
  const TStr DatFNm = TStr::Fmt("ncp.%s.INFO.tab", OutFNmPref.CStr());
  FILE *F = fopen(DatFNm.CStr(), "wt");
  fprintf(F, "# %s %s\n", Desc.CStr(), ParamStr().CStr());
  fprintf(F, "#N_inside\tE_inside\tE_across\tConductance\tExpansion\tIntDensity\tCutRatio\tNormCut\tMx_FracDegOut\tAvg_FDO\tMedian_FDO\t90Pct_FDO\tFlake_FDO\tVolume\tExpVolume\tModularity\tModRatio\n");
  for (int r = 0; r < ColV[0].Len(); r++) {
    fprintf(F, "%g", ColV[0][r]());
    for (int c = 1; c < ColV.Len(); c++) {
      fprintf(F, "\t%g", ColV[c][r]()); }
    fprintf(F, "\n");
  }
  fclose(F);
  printf("[%s]\n", ExeTm.GetStr());
  TGnuPlot GP(TStr::Fmt("ncp.%s.All", OutFNmPref.CStr()), TStr::Fmt("%s %s", 
    Desc.CStr(), ParamStr().CStr()));
  GP.AddPlot(DatFNm, 1, 4, gpwLines, "Conductance", "lw 2");
  GP.AddPlot(DatFNm, 1, 5, gpwPoints, "Expansion", "pt 3");
  GP.AddPlot(DatFNm, 1, 6, gpwPoints, "Internal Density", "pt 5 ps 0.8");
  GP.AddPlot(DatFNm, 1, 7, gpwPoints, "Cut Ratio", "pt 6");
  GP.AddPlot(DatFNm, 1, 8, gpwPoints, "Normalized Cut", "pt 7");
  GP.AddPlot(DatFNm, 1, 9, gpwPoints, "Maximum FDO", "pt 9");
  GP.AddPlot(DatFNm, 1, 10, gpwPoints, "Avg FDO", "pt 11");
  GP.AddPlot(DatFNm, 1, 13, gpwPoints, "Flake FDO", "pt 13");
  GP.SetScale(gpsLog10XY);
  GP.SetXYLabel("k (number of nodes in the cluster)", "Normalized community score (lower is better)");
  GP.SavePng();
}
void TLocClustStat::SetGraph ( const PUNGraph GraphPt) [inline]

Definition at line 194 of file ncp.h.

{ Graph=GraphPt; }

Member Data Documentation

Definition at line 178 of file ncp.h.

Definition at line 186 of file ncp.h.

Definition at line 188 of file ncp.h.

Definition at line 185 of file ncp.h.

Definition at line 187 of file ncp.h.

double TLocClustStat::BinFactor = 1.01 [static]

Definition at line 127 of file ncp.h.

Definition at line 179 of file ncp.h.

Definition at line 180 of file ncp.h.

Definition at line 178 of file ncp.h.

Definition at line 179 of file ncp.h.

Definition at line 179 of file ncp.h.

Definition at line 189 of file ncp.h.

Definition at line 178 of file ncp.h.

Definition at line 184 of file ncp.h.

Definition at line 183 of file ncp.h.

Definition at line 128 of file ncp.h.


The documentation for this class was generated from the following files: