SNAP Library , User Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
kronecker.cpp
Go to the documentation of this file.
00001 #include "stdafx.h"
00002 #include "kronecker.h"
00003 
00005 // Kronecker Graphs
00006 const double TKronMtx::NInf = -DBL_MAX;
00007 TRnd TKronMtx::Rnd = TRnd(0);
00008 
00009 TKronMtx::TKronMtx(const TFltV& SeedMatrix) : SeedMtx(SeedMatrix) {
00010   MtxDim = (int) sqrt((double)SeedMatrix.Len());
00011   IAssert(MtxDim*MtxDim == SeedMtx.Len());
00012 }
00013 
00014 void TKronMtx::SaveTxt(const TStr& OutFNm) const {
00015   FILE *F = fopen(OutFNm.CStr(), "wt");
00016   for (int i = 0; i < GetDim(); i++) {
00017     for (int j = 0; j < GetDim(); j++) {
00018       if (j > 0) fprintf(F, "\t");
00019       fprintf(F, "%f", At(i,j)); }
00020     fprintf(F, "\n");
00021   }
00022   fclose(F);
00023 }
00024 
00025 TKronMtx& TKronMtx::operator = (const TKronMtx& Kronecker) {
00026   if (this != &Kronecker){
00027     MtxDim=Kronecker.MtxDim;
00028     SeedMtx=Kronecker.SeedMtx;
00029   }
00030   return *this;
00031 }
00032 
00033 bool TKronMtx::IsProbMtx() const {
00034   for (int i = 0; i < Len(); i++) {
00035     if (At(i) < 0.0 || At(i) > 1.0) return false;
00036   }
00037   return true;
00038 }
00039 
00040 void TKronMtx::SetRndMtx(const int& PrmMtxDim, const double& MinProb) {
00041   MtxDim = PrmMtxDim;
00042   SeedMtx.Gen(MtxDim*MtxDim);
00043   for (int p = 0; p < SeedMtx.Len(); p++) {
00044     do {
00045       SeedMtx[p] = TKronMtx::Rnd.GetUniDev();
00046     } while (SeedMtx[p] < MinProb);
00047   }
00048 }
00049 
00050 void TKronMtx::SetEpsMtx(const double& Eps1, const double& Eps0, const int& Eps1Val, const int& Eps0Val) {
00051   for (int i = 0; i < Len(); i++) {
00052     double& Val = At(i);
00053     if (Val == Eps1Val) Val = double(Eps1);
00054     else if (Val == Eps0Val) Val = double(Eps0);
00055   }
00056 }
00057 
00058 // scales parameter values to allow Edges
00059 void TKronMtx::SetForEdges(const int& Nodes, const int& Edges) {
00060   const int KronIter = GetKronIter(Nodes);
00061   const double EZero = pow((double) Edges, 1.0/double(KronIter));
00062   const double Factor = EZero / GetMtxSum();
00063   for (int i = 0; i < Len(); i++) {
00064     At(i) *= Factor;
00065     if (At(i) > 1) { At(i) = 1; }
00066   }
00067 }
00068 
00069 void TKronMtx::AddRndNoise(const double& SDev) {
00070   Dump("before");
00071   double NewVal;
00072   int c =0;
00073   for (int i = 0; i < Len(); i++) {
00074     for(c = 0; ((NewVal = At(i)*Rnd.GetNrmDev(1, SDev, 0.8, 1.2)) < 0.01 || NewVal>0.99) && c <1000; c++) { }
00075     if (c < 999) { At(i) = NewVal; } else { printf("XXXXX\n"); }
00076   }
00077   Dump("after");
00078 }
00079 
00080 TStr TKronMtx::GetMtxStr() const {
00081   TChA ChA("[");
00082   for (int i = 0; i < Len(); i++) {
00083     ChA += TStr::Fmt("%g", At(i));
00084     if ((i+1)%GetDim()==0 && (i+1<Len())) { ChA += "; "; }
00085     else if (i+1<Len()) { ChA += ", "; }
00086   }
00087   ChA += "]";
00088   return TStr(ChA);
00089 }
00090 
00091 void TKronMtx::ToOneMinusMtx() {
00092   for (int i = 0; i < Len(); i++) {
00093     IAssert(At(i) >= 0.0 && At(i) <= 1.0);
00094     At(i) = 1.0 - At(i);
00095   }
00096 }
00097 
00098 void TKronMtx::GetLLMtx(TKronMtx& LLMtx) {
00099   LLMtx.GenMtx(MtxDim);
00100   for (int i = 0; i < Len(); i++) {
00101     if (At(i) != 0.0) { LLMtx.At(i) = log(At(i)); }
00102     else { LLMtx.At(i) = NInf; }
00103   }
00104 }
00105 
00106 void TKronMtx::GetProbMtx(TKronMtx& ProbMtx) {
00107   ProbMtx.GenMtx(MtxDim);
00108   for (int i = 0; i < Len(); i++) {
00109     if (At(i) != NInf) { ProbMtx.At(i) = exp(At(i)); }
00110     else { ProbMtx.At(i) = 0.0; }
00111   }
00112 }
00113 
00114 void TKronMtx::Swap(TKronMtx& KronMtx) {
00115   ::Swap(MtxDim, KronMtx.MtxDim);
00116   SeedMtx.Swap(KronMtx.SeedMtx);
00117 }
00118 
00119 int TKronMtx::GetNodes(const int& NIter) const {
00120   return (int) pow(double(GetDim()), double(NIter));
00121 }
00122 
00123 int TKronMtx::GetEdges(const int& NIter) const {
00124   return (int) pow(double(GetMtxSum()), double(NIter));
00125 }
00126 
00127 int TKronMtx::GetKronIter(const int& Nodes) const {
00128   return (int) ceil(log(double(Nodes)) / log(double(GetDim()))); // upper bound
00129   //return (int) TMath::Round(log(double(Nodes)) / log(double(GetDim()))); // round to nearest power
00130 }
00131 
00132 int TKronMtx::GetNZeroK(const PNGraph& Graph) const {
00133  return GetNodes(GetKronIter(Graph->GetNodes()));
00134 }
00135 
00136 double TKronMtx::GetEZero(const int& Edges, const int& KronIters) const {
00137   return pow((double) Edges, 1.0/double(KronIters));
00138 }
00139 
00140 double TKronMtx::GetMtxSum() const {
00141   double Sum = 0;
00142   for (int i = 0; i < Len(); i++) {
00143     Sum += At(i); }
00144   return Sum;
00145 }
00146 
00147 double TKronMtx::GetRowSum(const int& RowId) const {
00148   double Sum = 0;
00149   for (int c = 0; c < GetDim(); c++) {
00150     Sum += At(RowId, c); }
00151   return Sum;
00152 }
00153 
00154 double TKronMtx::GetColSum(const int& ColId) const {
00155   double Sum = 0;
00156   for (int r = 0; r < GetDim(); r++) {
00157     Sum += At(r, ColId); }
00158   return Sum;
00159 }
00160 
00161 double TKronMtx::GetEdgeProb(int NId1, int NId2, const int& NKronIters) const {
00162   double Prob = 1.0;
00163   for (int level = 0; level < NKronIters; level++) {
00164     Prob *= At(NId1 % MtxDim, NId2 % MtxDim);
00165     if (Prob == 0.0) { return 0.0; }
00166     NId1 /= MtxDim;  NId2 /= MtxDim;
00167   }
00168   return Prob;
00169 }
00170 
00171 double TKronMtx::GetNoEdgeProb(int NId1, int NId2, const int& NKronIters) const {
00172   return 1.0 - GetEdgeProb(NId1, NId2, NKronIters);
00173 }
00174 
00175 double TKronMtx::GetEdgeLL(int NId1, int NId2, const int& NKronIters) const {
00176   double LL = 0.0;
00177   for (int level = 0; level < NKronIters; level++) {
00178     const double& LLVal = At(NId1 % MtxDim, NId2 % MtxDim);
00179     if (LLVal == NInf) return NInf;
00180     LL += LLVal;
00181     NId1 /= MtxDim;  NId2 /= MtxDim;
00182   }
00183   return LL;
00184 }
00185 
00186 double TKronMtx::GetNoEdgeLL(int NId1, int NId2, const int& NKronIters) const {
00187   return log(1.0 - exp(GetEdgeLL(NId1, NId2, NKronIters)));
00188 }
00189 
00190 // 2nd order Taylor approximation log(1-x) ~ -x - 0.5x^2
00191 double TKronMtx::GetApxNoEdgeLL(int NId1, int NId2, const int& NKronIters) const {
00192   const double EdgeLL = GetEdgeLL(NId1, NId2, NKronIters);
00193   return -exp(EdgeLL) - 0.5*exp(2*EdgeLL);
00194 }
00195 
00196 bool TKronMtx::IsEdgePlace(int NId1, int NId2, const int& NKronIters, const double& ProbTresh) const {
00197   double Prob = 1.0;
00198   for (int level = 0; level < NKronIters; level++) {
00199     Prob *= At(NId1 % MtxDim, NId2 % MtxDim);
00200     if (ProbTresh > Prob) { return false; }
00201     NId1 /= MtxDim;  NId2 /= MtxDim;
00202   }
00203   return true;
00204 }
00205 
00206 // deriv a*log(x) = a/x
00207 double TKronMtx::GetEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const {
00208   const int ThetaX = ParamId % GetDim();
00209   const int ThetaY = ParamId / GetDim();
00210   int ThetaCnt = 0;
00211   for (int level = 0; level < NKronIters; level++) {
00212     if ((NId1 % MtxDim) == ThetaX && (NId2 % MtxDim) == ThetaY) {
00213       ThetaCnt++; }
00214     NId1 /= MtxDim;  NId2 /= MtxDim;
00215   }
00216   return double(ThetaCnt) / exp(At(ParamId));
00217 }
00218 
00219 // deriv log(1-x^a*y^b..) = -x'/(1-x) = (-a*x^(a-1)*y^b..) / (1-x^a*y^b..)
00220 double TKronMtx::GetNoEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const {
00221   const int& ThetaX = ParamId % GetDim();
00222   const int& ThetaY = ParamId / GetDim();
00223   int ThetaCnt = 0;
00224   double DLL = 0, LL = 0;
00225   for (int level = 0; level < NKronIters; level++) {
00226     const int X = NId1 % MtxDim;
00227     const int Y = NId2 % MtxDim;
00228     const double LVal = At(X, Y);
00229     if (X == ThetaX && Y == ThetaY) {
00230       if (ThetaCnt != 0) { DLL += LVal; }
00231       ThetaCnt++;
00232     } else { DLL += LVal; }
00233     LL += LVal;
00234     NId1 /= MtxDim;  NId2 /= MtxDim;
00235   }
00236   return -ThetaCnt*exp(DLL) / (1.0 - exp(LL));
00237 }
00238 
00239 // 2nd order Taylor approximation log(1-x) ~ -x - 0.5x^2
00240 double TKronMtx::GetApxNoEdgeDLL(const int& ParamId, int NId1, int NId2, const int& NKronIters) const {
00241   const int& ThetaX = ParamId % GetDim();
00242   const int& ThetaY = ParamId / GetDim();
00243   int ThetaCnt = 0;
00244   double DLL = 0;//, LL = 0;
00245   for (int level = 0; level < NKronIters; level++) {
00246     const int X = NId1 % MtxDim;
00247     const int Y = NId2 % MtxDim;
00248     const double LVal = At(X, Y); IAssert(LVal > NInf);
00249     if (X == ThetaX && Y == ThetaY) {
00250       if (ThetaCnt != 0) { DLL += LVal; }
00251       ThetaCnt++;
00252     } else { DLL += LVal; }
00253     //LL += LVal;
00254     NId1 /= MtxDim;  NId2 /= MtxDim;
00255   }
00256   //return -ThetaCnt*exp(DLL)*(1.0 + exp(LL)); // -x'/(1+x) WRONG!
00257   // deriv = -(ax^(a-1)*y^b..) - a*x^(2a-1)*y^2b..
00258   //       = - (ax^(a-1)*y^b..) - a*x*(x^(a-1)*y^b..)^2
00259   return -ThetaCnt*exp(DLL) - ThetaCnt*exp(At(ThetaX, ThetaY)+2*DLL);
00260 }
00261 
00262 uint TKronMtx::GetNodeSig(const double& OneProb) {
00263   uint Sig = 0;
00264   for (int i = 0; i < (int)(8*sizeof(uint)); i++) {
00265     if (TKronMtx::Rnd.GetUniDev() < OneProb) {
00266       Sig |= (1u<<i); }
00267   }
00268   return Sig;
00269 }
00270 
00271 double TKronMtx::GetEdgeProb(const uint& NId1Sig, const uint& NId2Sig, const int& NIter) const {
00272   Assert(GetDim() == 2);
00273   double Prob = 1.0;
00274   for (int i = 0; i < NIter; i++) {
00275     const uint Mask = (1u<<i);
00276     const uint Bit1 = NId1Sig & Mask;
00277     const uint Bit2 = NId2Sig & Mask;
00278     Prob *= At(int(Bit1!=0), int(Bit2!=0));
00279   }
00280   return Prob;
00281 }
00282 
00283 PNGraph TKronMtx::GenThreshGraph(const double& Thresh) const {
00284   PNGraph Graph = TNGraph::New();
00285   for (int i = 0; i < GetDim(); i++) {
00286     Graph->AddNode(i); }
00287   for (int r = 0; r < GetDim(); r++) {
00288     for (int c = 0; c < GetDim(); c++) {
00289       if (At(r, c) >= Thresh) { Graph->AddEdge(r, c); }
00290     }
00291   }
00292   return Graph;
00293 }
00294 
00295 PNGraph TKronMtx::GenRndGraph(const double& RndFact) const {
00296   PNGraph Graph = TNGraph::New();
00297   for (int i = 0; i < GetDim(); i++) {
00298     Graph->AddNode(i); }
00299   for (int r = 0; r < GetDim(); r++) {
00300     for (int c = 0; c < GetDim(); c++) {
00301       if (RndFact * At(r, c) >= TKronMtx::Rnd.GetUniDev()) { Graph->AddEdge(r, c); }
00302     }
00303   }
00304   return Graph;
00305 }
00306 
00307 int TKronMtx::GetKronIter(const int& GNodes, const int& SeedMtxSz) {
00308   return (int) ceil(log(double(GNodes)) / log(double(SeedMtxSz)));
00309 }
00310 
00311 // slow but exaxt procedure (we flip all O(N^2) edges)
00312 PNGraph TKronMtx::GenKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir, const int& Seed) {
00313   const TKronMtx& SeedGraph = SeedMtx;
00314   const int NNodes = SeedGraph.GetNodes(NIter);
00315   printf("  Kronecker: %d nodes, %s...\n", NNodes, IsDir ? "Directed":"UnDirected");
00316   PNGraph Graph = TNGraph::New(NNodes, -1);
00317   TExeTm ExeTm;
00318   TRnd Rnd(Seed);
00319   int edges = 0;
00320   for (int node1 = 0; node1 < NNodes; node1++) {
00321     Graph->AddNode(node1); }
00322   if (IsDir) {
00323     for (int node1 = 0; node1 < NNodes; node1++) {
00324       for (int node2 = 0; node2 < NNodes; node2++) {
00325         if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) {
00326           Graph->AddEdge(node1, node2);
00327           edges++;
00328         }
00329       }
00330       if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000);
00331     }
00332   } else {
00333     for (int node1 = 0; node1 < NNodes; node1++) {
00334       for (int node2 = node1; node2 < NNodes; node2++) {
00335         if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) {
00336           Graph->AddEdge(node1, node2);
00337           Graph->AddEdge(node2, node1);
00338           edges++;
00339         }
00340       }
00341       if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000);
00342     }
00343   }
00344   printf("\r             %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr());
00345   return Graph;
00346 }
00347 
00348 // use RMat like recursive descent to quickly generate a Kronecker graph
00349 PNGraph TKronMtx::GenFastKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir, const int& Seed) {
00350   const TKronMtx& SeedGraph = SeedMtx;
00351   const int MtxDim = SeedGraph.GetDim();
00352   const double MtxSum = SeedGraph.GetMtxSum();
00353   const int NNodes = SeedGraph.GetNodes(NIter);
00354   const int NEdges = SeedGraph.GetEdges(NIter);
00355   //const double DiagEdges = NNodes * pow(SeedGraph.At(0,0), double(NIter));
00356   //const int NEdges = (int) TMath::Round(((pow(double(SeedGraph.GetMtxSum()), double(NIter)) - DiagEdges) /2.0));
00357   printf("  FastKronecker: %d nodes, %d edges, %s...\n", NNodes, NEdges, IsDir ? "Directed":"UnDirected");
00358   PNGraph Graph = TNGraph::New(NNodes, -1);
00359   TRnd Rnd(Seed);
00360   TExeTm ExeTm;
00361   // prepare cell probability vector
00362   TVec<TFltIntIntTr> ProbToRCPosV; // row, col position
00363   double CumProb = 0.0;
00364   for (int r = 0; r < MtxDim; r++) {
00365     for (int c = 0; c < MtxDim; c++) {
00366       const double Prob = SeedGraph.At(r, c);
00367       if (Prob > 0.0) {
00368         CumProb += Prob;
00369         ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c));
00370       }
00371     }
00372   }
00373   // add nodes
00374   for (int i = 0; i < NNodes; i++) {
00375     Graph->AddNode(i); }
00376   // add edges
00377   int Rng, Row, Col, Collision=0, n = 0;
00378   for (int edges = 0; edges < NEdges; ) {
00379     Rng=NNodes;  Row=0;  Col=0;
00380     for (int iter = 0; iter < NIter; iter++) {
00381       const double& Prob = Rnd.GetUniDev();
00382       n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; }
00383       const int MtxRow = ProbToRCPosV[n].Val2;
00384       const int MtxCol = ProbToRCPosV[n].Val3;
00385       Rng /= MtxDim;
00386       Row += MtxRow * Rng;
00387       Col += MtxCol * Rng;
00388     }
00389     if (! Graph->IsEdge(Row, Col)) { // allow self-loops
00390       Graph->AddEdge(Row, Col);  edges++;
00391       if (! IsDir) {
00392         if (Row != Col) Graph->AddEdge(Col, Row);
00393         edges++;
00394       }
00395     } else { Collision++; }
00396     //if (edges % 1000 == 0) printf("\r...%dk", edges/1000);
00397   }
00398   //printf("             %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr());
00399   printf("             collisions: %d (%.4f)\n", Collision, Collision/(double)Graph->GetEdges());
00400   return Graph;
00401 }
00402 
00403 // use RMat like recursive descent to quickly generate a Kronecker graph
00404 PNGraph TKronMtx::GenFastKronecker(const TKronMtx& SeedMtx, const int& NIter, const int& Edges, const bool& IsDir, const int& Seed) {
00405   const TKronMtx& SeedGraph = SeedMtx;
00406   const int MtxDim = SeedGraph.GetDim();
00407   const double MtxSum = SeedGraph.GetMtxSum();
00408   const int NNodes = SeedGraph.GetNodes(NIter);
00409   const int NEdges = Edges;
00410   //const double DiagEdges = NNodes * pow(SeedGraph.At(0,0), double(NIter));
00411   //const int NEdges = (int) TMath::Round(((pow(double(SeedGraph.GetMtxSum()), double(NIter)) - DiagEdges) /2.0));
00412   printf("  RMat Kronecker: %d nodes, %d edges, %s...\n", NNodes, NEdges, IsDir ? "Directed":"UnDirected");
00413   PNGraph Graph = TNGraph::New(NNodes, -1);
00414   TRnd Rnd(Seed);
00415   TExeTm ExeTm;
00416   // prepare cell probability vector
00417   TVec<TFltIntIntTr> ProbToRCPosV; // row, col position
00418   double CumProb = 0.0;
00419   for (int r = 0; r < MtxDim; r++) {
00420     for (int c = 0; c < MtxDim; c++) {
00421       const double Prob = SeedGraph.At(r, c);
00422       if (Prob > 0.0) {
00423         CumProb += Prob;
00424         ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c));
00425       }
00426     }
00427   }
00428   // add nodes
00429   for (int i = 0; i < NNodes; i++) {
00430     Graph->AddNode(i); }
00431   // add edges
00432   int Rng, Row, Col, Collision=0, n = 0;
00433   for (int edges = 0; edges < NEdges; ) {
00434     Rng=NNodes;  Row=0;  Col=0;
00435     for (int iter = 0; iter < NIter; iter++) {
00436       const double& Prob = Rnd.GetUniDev();
00437       n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; }
00438       const int MtxRow = ProbToRCPosV[n].Val2;
00439       const int MtxCol = ProbToRCPosV[n].Val3;
00440       Rng /= MtxDim;
00441       Row += MtxRow * Rng;
00442       Col += MtxCol * Rng;
00443     }
00444     if (! Graph->IsEdge(Row, Col)) { // allow self-loops
00445       Graph->AddEdge(Row, Col);  edges++;
00446       if (! IsDir) {
00447         if (Row != Col) Graph->AddEdge(Col, Row);
00448         edges++;
00449       }
00450     } else { Collision++; }
00451     //if (edges % 1000 == 0) printf("\r...%dk", edges/1000);
00452   }
00453   //printf("             %d edges [%s]\n", Graph->GetEdges(), ExeTm.GetTmStr());
00454   printf("             collisions: %d (%.4f)\n", Collision, Collision/(double)Graph->GetEdges());
00455   return Graph;
00456 }
00457 
00458 PNGraph TKronMtx::GenDetKronecker(const TKronMtx& SeedMtx, const int& NIter, const bool& IsDir) {
00459   const TKronMtx& SeedGraph = SeedMtx;
00460   const int NNodes = SeedGraph.GetNodes(NIter);
00461   printf("  Deterministic Kronecker: %d nodes, %s...\n", NNodes, IsDir ? "Directed":"UnDirected");
00462   PNGraph Graph = TNGraph::New(NNodes, -1);
00463   TExeTm ExeTm;
00464   int edges = 0;
00465   for (int node1 = 0; node1 < NNodes; node1++) { Graph->AddNode(node1); }
00466 
00467   for (int node1 = 0; node1 < NNodes; node1++) {
00468     for (int node2 = 0; node2 < NNodes; node2++) {
00469       if (SeedGraph.IsEdgePlace(node1, node2, NIter, Rnd.GetUniDev())) {
00470         Graph->AddEdge(node1, node2);
00471         edges++;
00472       }
00473     }
00474     if (node1 % 1000 == 0) printf("\r...%dk, %dk", node1/1000, edges/1000);
00475   }
00476   return Graph;
00477 }
00478 
00479 void TKronMtx::PlotCmpGraphs(const TKronMtx& SeedMtx, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) {
00480   const int KronIters = SeedMtx.GetKronIter(Graph->GetNodes());
00481   PNGraph KronG, WccG;
00482   const bool FastGen = true;
00483   if (FastGen) { KronG = TKronMtx::GenFastKronecker(SeedMtx, KronIters, true, 0); }
00484   else { KronG = TKronMtx::GenKronecker(SeedMtx, KronIters, true, 0); }
00485   TSnap::DelZeroDegNodes(KronG);
00486   WccG = TSnap::GetMxWcc(KronG);
00487   const TStr Desc1 = TStr::Fmt("%s", Desc.CStr());
00488   TGStatVec GS(tmuNodes, TFSet() | gsdInDeg | gsdOutDeg | gsdWcc | gsdHops | gsdScc | gsdClustCf | gsdSngVec | gsdSngVal);
00489   //gsdHops
00490   //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf
00491   GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH  G(%d, %d)", Graph->GetNodes(), Graph->GetEdges()));
00492   GS.Add(KronG, TSecTm(2), TStr::Fmt("KRONECKER  K(%d, %d)", KronG->GetNodes(), KronG->GetEdges()));
00493   GS.Add(WccG, TSecTm(3),  TStr::Fmt("KRONECKER  wccK(%d, %d)", WccG->GetNodes(), WccG->GetEdges()));
00494   const TStr Style = "linewidth 1 pointtype 6 pointsize 1";
00495   GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00496   GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00497   GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00498   GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00499   GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00500   GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00501   GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00502   GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00503   GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00504   GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00505   GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00506   GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00507   GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00508   GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00509   GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00510 //    typedef enum { distrUndef, distrInDeg, distrOutDeg, distrWcc, distrScc,
00511 //  distrHops, distrWccHops, distrSngVal, distrSngVec, distrClustCf, distrMx } TGraphDistr;*/
00512 }
00513 
00514 void TKronMtx::PlotCmpGraphs(const TKronMtx& SeedMtx1, const TKronMtx& SeedMtx2, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) {
00515   const int KronIters1 = SeedMtx1.GetKronIter(Graph->GetNodes());
00516   const int KronIters2 = SeedMtx2.GetKronIter(Graph->GetNodes());
00517   PNGraph KronG1, KronG2;
00518   const bool FastGen = true;
00519   if (FastGen) {
00520     KronG1 = TKronMtx::GenFastKronecker(SeedMtx1, KronIters1, true, 0);
00521     KronG2 = TKronMtx::GenFastKronecker(SeedMtx2, KronIters2, false, 0); } 
00522   else {
00523     KronG1 = TKronMtx::GenKronecker(SeedMtx1, KronIters1, true, 0);
00524     KronG2 = TKronMtx::GenKronecker(SeedMtx2, KronIters2, true, 0);  }
00525   TSnap::DelZeroDegNodes(KronG1);
00526   TSnap::DelZeroDegNodes(KronG2);
00527   const TStr Desc1 = TStr::Fmt("%s", Desc.CStr());
00528   TGStatVec GS(tmuNodes, TFSet() | gsdInDeg | gsdOutDeg | gsdWcc | gsdScc | gsdHops | gsdClustCf | gsdSngVec | gsdSngVal | gsdTriadPart);
00529   //gsdHops
00530   //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf
00531   GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH  G(%d, %d)", Graph->GetNodes(), Graph->GetEdges()));
00532   GS.Add(KronG1, TSecTm(2), TStr::Fmt("KRONECKER1  K(%d, %d) %s", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtx1.GetMtxStr().CStr()));
00533   GS.Add(KronG2, TSecTm(3),  TStr::Fmt("KRONECKER2  K(%d, %d) %s", KronG2->GetNodes(), KronG2->GetEdges(), SeedMtx2.GetMtxStr().CStr()));
00534   const TStr Style = "linewidth 1 pointtype 6 pointsize 1";
00535   // raw data
00536   GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00537   GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00538   GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00539   GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00540   GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00541   GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00542   GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00543   GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00544   GS.ImposeDistr(gsdTriadPart, FNmPref, Desc1, false, false, gpwLinesPoints, Style);
00545   // smooth
00546   GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00547   GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00548   GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00549   GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00550   GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00551   GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00552   GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00553   GS.ImposeDistr(gsdTriadPart, FNmPref+"-B", Desc1, true, false, gpwLinesPoints, Style);
00554 }
00555 
00556 void TKronMtx::PlotCmpGraphs(const TVec<TKronMtx>& SeedMtxV, const PNGraph& Graph, const TStr& FNmPref, const TStr& Desc) {
00557   const TStr Desc1 = TStr::Fmt("%s", Desc.CStr());
00558   TGStatVec GS(tmuNodes, TFSet() | gsdInDeg | gsdOutDeg | gsdWcc | gsdScc | gsdHops | gsdClustCf | gsdSngVec | gsdSngVal);
00559   GS.Add(Graph, TSecTm(1), TStr::Fmt("GRAPH  G(%d, %d)", Graph->GetNodes(), Graph->GetEdges()));
00560   //gsdHops
00561   //gsWccHops, gsdSngVal, gsdSngVec, gsdClustCf
00562   for (int m = 0; m < SeedMtxV.Len(); m++) {
00563     const int KronIters = SeedMtxV[m].GetKronIter(Graph->GetNodes());
00564     PNGraph KronG1 = TKronMtx::GenFastKronecker(SeedMtxV[m], KronIters, true, 0);
00565     printf("*** K(%d, %d) n0=%d\n", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtxV[m].GetDim());
00566     TSnap::DelZeroDegNodes(KronG1);
00567     printf(" del zero deg K(%d, %d) n0=%d\n", KronG1->GetNodes(), KronG1->GetEdges(), m);
00568     GS.Add(KronG1, TSecTm(m+2), TStr::Fmt("K(%d, %d) n0^k=%d n0=%d", KronG1->GetNodes(), KronG1->GetEdges(), SeedMtxV[m].GetNZeroK(Graph), SeedMtxV[m].GetDim()));
00569     // plot after each Kronecker is done
00570     const TStr Style = "linewidth 1 pointtype 6 pointsize 1";
00571     GS.ImposeDistr(gsdInDeg, FNmPref, Desc1, false, false, gpwLines, Style);
00572     GS.ImposeDistr(gsdInDeg, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
00573     GS.ImposeDistr(gsdOutDeg, FNmPref, Desc1, false, false, gpwLines, Style);
00574     GS.ImposeDistr(gsdOutDeg, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
00575     GS.ImposeDistr(gsdHops, FNmPref, Desc1, false, false, gpwLines, Style);
00576     GS.ImposeDistr(gsdClustCf, FNmPref, Desc1, false, false, gpwLines, Style);
00577     GS.ImposeDistr(gsdClustCf, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
00578     GS.ImposeDistr(gsdSngVal, FNmPref, Desc1, false, false, gpwLines, Style);
00579     GS.ImposeDistr(gsdSngVal, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
00580     GS.ImposeDistr(gsdSngVec, FNmPref, Desc1, false, false, gpwLines, Style);
00581     GS.ImposeDistr(gsdSngVec, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
00582     GS.ImposeDistr(gsdWcc, FNmPref, Desc1, false, false, gpwLines, Style);
00583     GS.ImposeDistr(gsdWcc, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
00584     GS.ImposeDistr(gsdScc, FNmPref, Desc1, false, false, gpwLines, Style);
00585     GS.ImposeDistr(gsdScc, FNmPref+"-B", Desc1, true, false, gpwLines, Style);
00586   }
00587   //    typedef enum { distrUndef, distrInDeg, distrOutDeg, distrWcc, distrScc,
00588   //  distrHops, distrWccHops, distrSngVal, distrSngVec, distrClustCf, distrMx } TGraphDistr;*/
00589 }
00590 
00591 void TKronMtx::KronMul(const TKronMtx& Left, const TKronMtx& Right, TKronMtx& Result) {
00592   const int LDim = Left.GetDim();
00593   const int RDim = Right.GetDim();
00594   Result.GenMtx(LDim * RDim);
00595   for (int r1 = 0; r1 < LDim; r1++) {
00596     for (int c1 = 0; c1 < LDim; c1++) {
00597       const double& Val = Left.At(r1, c1);
00598       for (int r2 = 0; r2 < RDim; r2++) {
00599         for (int c2 = 0; c2 < RDim; c2++) {
00600           Result.At(r1*RDim+r2, c1*RDim+c2) = Val * Right.At(r2, c2);
00601         }
00602       }
00603     }
00604   }
00605 }
00606 
00607 void TKronMtx::KronSum(const TKronMtx& Left, const TKronMtx& Right, TKronMtx& Result) {
00608   const int LDim = Left.GetDim();
00609   const int RDim = Right.GetDim();
00610   Result.GenMtx(LDim * RDim);
00611   for (int r1 = 0; r1 < LDim; r1++) {
00612     for (int c1 = 0; c1 < LDim; c1++) {
00613       const double& Val = Left.At(r1, c1);
00614       for (int r2 = 0; r2 < RDim; r2++) {
00615         for (int c2 = 0; c2 < RDim; c2++) {
00616           if (Val == NInf || Right.At(r2, c2) == NInf) {
00617             Result.At(r1*RDim+r2, c1*RDim+c2) = NInf; }
00618           else {
00619             Result.At(r1*RDim+r2, c1*RDim+c2) = Val + Right.At(r2, c2); }
00620         }
00621       }
00622     }
00623   }
00624 }
00625 
00626 void TKronMtx::KronPwr(const TKronMtx& KronMtx, const int& NIter, TKronMtx& OutMtx) {
00627   OutMtx = KronMtx;
00628   TKronMtx NewOutMtx;
00629   for (int iter = 0; iter < NIter; iter++) {
00630     KronMul(OutMtx, KronMtx, NewOutMtx);
00631     NewOutMtx.Swap(OutMtx);
00632   }
00633 
00634 }
00635 
00636 void TKronMtx::Dump(const TStr& MtxNm, const bool& Sort) const {
00637   /*printf("%s: %d x %d\n", MtxNm.Empty()?"Mtx":MtxNm.CStr(), GetDim(), GetDim());
00638   for (int r = 0; r < GetDim(); r++) {
00639     for (int c = 0; c < GetDim(); c++) { printf("  %8.2g", At(r, c)); }
00640     printf("\n");
00641   }*/
00642   if (! MtxNm.Empty()) printf("%s\n", MtxNm.CStr());
00643   double Sum=0.0;
00644   TFltV ValV = SeedMtx;
00645   if (Sort) { ValV.Sort(false); }
00646   for (int i = 0; i < ValV.Len(); i++) {
00647     printf("  %10.4g", ValV[i]());
00648     Sum += ValV[i];
00649     if ((i+1) % GetDim() == 0) { printf("\n"); }
00650   }
00651   printf(" (sum:%.4f)\n", Sum);
00652 }
00653 
00654 // average difference in the parameters
00655 double TKronMtx::GetAvgAbsErr(const TKronMtx& Kron1, const TKronMtx& Kron2) {
00656   TFltV P1 = Kron1.GetMtx();
00657   TFltV P2 = Kron2.GetMtx();
00658   IAssert(P1.Len() == P2.Len());
00659   P1.Sort();  P2.Sort();
00660   double delta = 0.0;
00661   for (int i = 0; i < P1.Len(); i++) {
00662     delta += fabs(P1[i] - P2[i]);
00663   }
00664   return delta/P1.Len();
00665 }
00666 
00667 // average L2 difference in the parameters
00668 double TKronMtx::GetAvgFroErr(const TKronMtx& Kron1, const TKronMtx& Kron2) {
00669   TFltV P1 = Kron1.GetMtx();
00670   TFltV P2 = Kron2.GetMtx();
00671   IAssert(P1.Len() == P2.Len());
00672   P1.Sort();  P2.Sort();
00673   double delta = 0.0;
00674   for (int i = 0; i < P1.Len(); i++) {
00675     delta += pow(P1[i] - P2[i], 2);
00676   }
00677   return sqrt(delta/P1.Len());
00678 }
00679 
00680 // get matrix from matlab matrix notation
00681 TKronMtx TKronMtx::GetMtx(TStr MatlabMtxStr) {
00682   TStrV RowStrV, ColStrV;
00683   MatlabMtxStr.ChangeChAll(',', ' ');
00684   MatlabMtxStr.SplitOnAllCh(';', RowStrV);  IAssert(! RowStrV.Empty());
00685   RowStrV[0].SplitOnWs(ColStrV);    IAssert(! ColStrV.Empty());
00686   const int Rows = RowStrV.Len();
00687   const int Cols = ColStrV.Len();
00688   IAssert(Rows == Cols);
00689   TKronMtx Mtx(Rows);
00690   for (int r = 0; r < Rows; r++) {
00691     RowStrV[r].SplitOnWs(ColStrV);
00692     IAssert(ColStrV.Len() == Cols);
00693     for (int c = 0; c < Cols; c++) {
00694       Mtx.At(r, c) = (double) ColStrV[c].GetFlt(); }
00695   }
00696   return Mtx;
00697 }
00698 
00699 TKronMtx TKronMtx::GetRndMtx(const int& Dim, const double& MinProb) {
00700   TKronMtx Mtx;
00701   Mtx.SetRndMtx(Dim, MinProb);
00702   return Mtx;
00703 }
00704 
00705 TKronMtx TKronMtx::GetInitMtx(const int& Dim, const int& Nodes, const int& Edges) {
00706   const double MxParam = 0.8+TKronMtx::Rnd.GetUniDev()/5.0;
00707   const double MnParam = 0.2-TKronMtx::Rnd.GetUniDev()/5.0;
00708   const double Step = (MxParam-MnParam) / (Dim*Dim-1);
00709   TFltV ParamV(Dim*Dim);
00710   if (Dim == 1) { ParamV.PutAll(0.5); } // random graph
00711   else {
00712     for (int p = 0; p < ParamV.Len(); p++) {
00713       ParamV[p] = MxParam - p*Step; }
00714   }
00715   //IAssert(ParamV[0]==MxParam && ParamV.Last()==MnParam);
00716   TKronMtx Mtx(ParamV);
00717   Mtx.SetForEdges(Nodes, Edges);
00718   return Mtx;
00719 }
00720 
00721 TKronMtx TKronMtx::GetInitMtx(const TStr& MtxStr, const int& Dim, const int& Nodes, const int& Edges) {
00722   TKronMtx Mtx(Dim);
00723   if (TCh::IsNum(MtxStr[0])) { Mtx = TKronMtx::GetMtx(MtxStr); }
00724   else if (MtxStr[0] == 'r') { Mtx = TKronMtx::GetRndMtx(Dim, 0.1); }
00725   else if (MtxStr[0] == 'a') {
00726     const double Prob = TKronMtx::Rnd.GetUniDev();
00727     if (Prob < 0.4) {
00728       Mtx = TKronMtx::GetInitMtx(Dim, Nodes, Edges); }
00729     else { // interpolate so that there are in the corners 0.9, 0.5, 0.1, 0.5
00730       const double Max = 0.9+TKronMtx::Rnd.GetUniDev()/10.0;
00731       const double Min = 0.1-TKronMtx::Rnd.GetUniDev()/10.0;
00732       const double Med = (Max-Min)/2.0;
00733       Mtx.At(0,0)      = Max;       Mtx.At(0,Dim-1) = Med;
00734       Mtx.At(Dim-1, 0) = Med;  Mtx.At(Dim-1, Dim-1) = Min;
00735       for (int i = 1; i < Dim-1; i++) {
00736         Mtx.At(i,i) = Max - double(i)*(Max-Min)/double(Dim-1);
00737         Mtx.At(i, 0) = Mtx.At(0, i) = Max - double(i)*(Max-Med)/double(Dim-1);
00738         Mtx.At(i, Dim-1) = Mtx.At(Dim-1, i) = Med - double(i)*(Med-Min)/double(Dim-1);
00739       }
00740       for (int i = 1; i < Dim-1; i++) {
00741         for (int j = 1; j < Dim-1; j++) {
00742           if (i >= j) { continue; }
00743           Mtx.At(i,j) = Mtx.At(j,i) = Mtx.At(i,i) - (j-i)*(Mtx.At(i,i)-Mtx.At(i,Dim-1))/(Dim-i-1);
00744         }
00745       }
00746       Mtx.AddRndNoise(0.1);
00747     }
00748   } else { FailR("Wrong mtx: matlab str, or random (r), or all (a)"); }
00749   Mtx.SetForEdges(Nodes, Edges);
00750   return Mtx;
00751 }
00752 
00753 TKronMtx TKronMtx::GetMtxFromNm(const TStr& MtxNm) {
00754   if (MtxNm == "3chain") return TKronMtx::GetMtx("1 1 0; 1 1 1; 0 1 1");
00755   else if (MtxNm == "4star") return TKronMtx::GetMtx("1 1 1 1; 1 1 0 0 ; 1 0 1 0; 1 0 0 1");
00756   else if (MtxNm == "4chain") return TKronMtx::GetMtx("1 1 0 0; 1 1 1 0 ; 0 1 1 1; 0 0 1 1");
00757   else if (MtxNm == "4square") return TKronMtx::GetMtx("1 1 0 1; 1 1 1 0 ; 0 1 1 1; 1 0 1 1");
00758   else if (MtxNm == "5star") return TKronMtx::GetMtx("1 1 1 1 1; 1 1 0 0 0; 1 0 1 0 0; 1 0 0 1 0; 1 0 0 0 1");
00759   else if (MtxNm == "6star") return TKronMtx::GetMtx("1 1 1 1 1 1; 1 1 0 0 0 0; 1 0 1 0 0 0; 1 0 0 1 0 0; 1 0 0 0 1 0; 1 0 0 0 0 1");
00760   else if (MtxNm == "7star") return TKronMtx::GetMtx("1 1 1 1 1 1 1; 1 1 0 0 0 0 0; 1 0 1 0 0 0 0; 1 0 0 1 0 0 0; 1 0 0 0 1 0 0; 1 0 0 0 0 1 0; 1 0 0 0 0 0 1");
00761   else if (MtxNm == "5burst") return TKronMtx::GetMtx("1 1 1 1 0; 1 1 0 0 0; 1 0 1 0 0; 1 0 0 1 1; 0 0 0 1 1");
00762   else if (MtxNm == "7burst") return TKronMtx::GetMtx("1 0 0 1 0 0 0; 0 1 0 1 0 0 0; 0 0 1 1 0 0 0; 1 1 1 1 1 0 0; 0 0 0 1 1 1 1; 0 0 0 0 1 1 0; 0 0 0 0 1 0 1");
00763   else if (MtxNm == "7cross") return TKronMtx::GetMtx("1 0 0 1 0 0 0; 0 1 0 1 0 0 0; 0 0 1 1 0 0 0; 1 1 1 1 1 0 0; 0 0 0 1 1 1 0; 0 0 0 0 1 1 1; 0 0 0 0 0 1 1");
00764   FailR(TStr::Fmt("Unknow matrix: '%s'", MtxNm.CStr()).CStr());
00765   return TKronMtx();
00766 }
00767 
00768 TKronMtx TKronMtx::LoadTxt(const TStr& MtxFNm) {
00769   PSs Ss = TSs::LoadTxt(ssfTabSep, MtxFNm);
00770   IAssertR(Ss->GetXLen() == Ss->GetYLen(), "Not a square matrix");
00771   IAssert(Ss->GetYLen() == Ss->GetXLen());
00772   TKronMtx Mtx(Ss->GetYLen());
00773   for (int r = 0; r < Ss->GetYLen(); r++) {
00774     for (int c = 0; c < Ss->GetXLen(); c++) {
00775       Mtx.At(r, c) = (double) Ss->At(c, r).GetFlt(); }
00776   }
00777   return Mtx;
00778 }
00779 
00780 
00782 // Kronecker Log Likelihood
00783 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TFltV& ParamV, const double& PermPSwapNd): PermSwapNodeProb(PermPSwapNd) {
00784   InitLL(GraphPt, TKronMtx(ParamV));
00785 }
00786 
00787 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const double& PermPSwapNd) : PermSwapNodeProb(PermPSwapNd) {
00788   InitLL(GraphPt, ParamMtx);
00789 }
00790 
00791 TKroneckerLL::TKroneckerLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const TIntV& NodeIdPermV, const double& PermPSwapNd) : PermSwapNodeProb(PermPSwapNd) {
00792   InitLL(GraphPt, ParamMtx);
00793   NodePerm = NodeIdPermV;
00794   SetIPerm(NodePerm);
00795 }
00796 
00797 PKroneckerLL TKroneckerLL::New(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const double& PermPSwapNd) {
00798   return new TKroneckerLL(GraphPt, ParamMtx, PermPSwapNd);
00799 }
00800 
00801 PKroneckerLL TKroneckerLL::New(const PNGraph& GraphPt, const TKronMtx& ParamMtx, const TIntV& NodeIdPermV, const double& PermPSwapNd) {
00802   return new TKroneckerLL(GraphPt, ParamMtx, NodeIdPermV, PermPSwapNd);
00803 }
00804 
00805 void TKroneckerLL::SetPerm(const char& PermId) {
00806   if (PermId == 'o') { SetOrderPerm(); }
00807   else if (PermId == 'd') { SetDegPerm(); }
00808   else if (PermId == 'r') { SetRndPerm(); }
00809   else if (PermId == 'b') { SetBestDegPerm(); }
00810   else FailR("Unknown permutation type (o,d,r)");
00811 }
00812 
00813 void TKroneckerLL::SetOrderPerm() {
00814   NodePerm.Gen(Nodes, 0);
00815   for (int i = 0; i < Graph->GetNodes(); i++) {
00816     NodePerm.Add(i); }
00817   SetIPerm(NodePerm);
00818 }
00819 
00820 void TKroneckerLL::SetRndPerm() {
00821   NodePerm.Gen(Nodes, 0);
00822   for (int i = 0; i < Graph->GetNodes(); i++) {
00823     NodePerm.Add(i); }
00824   NodePerm.Shuffle(TKronMtx::Rnd);
00825   SetIPerm(NodePerm);
00826 }
00827 
00828 void TKroneckerLL::SetDegPerm() {
00829   TIntPrV DegNIdV;
00830   for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00831     DegNIdV.Add(TIntPr(NI.GetDeg(), NI.GetId()));
00832   }
00833   DegNIdV.Sort(false);
00834   NodePerm.Gen(DegNIdV.Len(), 0);
00835   for (int i = 0; i < DegNIdV.Len(); i++) {
00836     NodePerm.Add(DegNIdV[i].Val2);
00837   }
00838   SetIPerm(NodePerm);
00839 }
00840 
00842 void TKroneckerLL::SetBestDegPerm() {
00843   NodePerm.Gen(Nodes);
00844   const int NZero = ProbMtx.GetDim();
00845   TFltIntPrV DegV(Nodes), CDegV(Nodes);
00846   TFltV Row(NZero);
00847   TFltV Col(NZero);
00848   for(int i = 0; i < NZero; i++) {
00849           for(int j = 0; j < NZero; j++) {
00850                   Row[i] += ProbMtx.At(i, j);
00851                   Col[i] += ProbMtx.At(j, i);
00852           }
00853   }
00854 
00855   for(int i = 0; i < Nodes; i++) {
00856           TNGraph::TNodeI NodeI = Graph->GetNI(i);
00857           int NId = i;
00858           double RowP = 1.0, ColP = 1.0;
00859           for(int j = 0; j < KronIters; j++) {
00860                   int Bit = NId % NZero;
00861                   RowP *= Row[Bit];             ColP *= Col[Bit];
00862                   NId /= NZero;
00863           }
00864           CDegV[i] = TFltIntPr(RowP + ColP, i);
00865           DegV[i] = TFltIntPr(NodeI.GetDeg(), i);
00866   }
00867   DegV.Sort(false);             CDegV.Sort(false);
00868   for(int i = 0; i < Nodes; i++) {
00869           NodePerm[DegV[i].Val2] = CDegV[i].Val2;
00870   }
00871   SetIPerm(NodePerm);
00872 }
00873 
00875 void TKroneckerLL::SetIPerm(const TIntV& Perm) {
00876         InvertPerm.Gen(Perm.Len());
00877         for (int i = 0; i < Perm.Len(); i++) {
00878                 InvertPerm[Perm[i]] = i;
00879         }
00880 }
00881 
00882 void TKroneckerLL::SetGraph(const PNGraph& GraphPt) {
00883   Graph = GraphPt;
00884   bool NodesOk = true;
00885   // check that nodes IDs are {0,1,..,Nodes-1}
00886   for (int nid = 0; nid < Graph->GetNodes(); nid++) {
00887     if (! Graph->IsNode(nid)) { NodesOk=false; break; } }
00888   if (! NodesOk) {
00889     TIntV NIdV;  GraphPt->GetNIdV(NIdV);
00890     Graph = TSnap::GetSubGraph(GraphPt, NIdV, true);
00891     for (int nid = 0; nid < Graph->GetNodes(); nid++) {
00892       IAssert(Graph->IsNode(nid)); }
00893   }
00894   Nodes = Graph->GetNodes();
00895   IAssert(LLMtx.GetDim() > 1 && LLMtx.Len() == ProbMtx.Len());
00896   KronIters = (int) ceil(log(double(Nodes)) / log(double(ProbMtx.GetDim())));
00897   // edge vector (for swap-edge permutation proposal)
00898 //  if (PermSwapNodeProb < 1.0) { /// !!!!! MYUNGHWAN, CHECK! WHY IS THIS COMMENTED OUT
00899     GEdgeV.Gen(Graph->GetEdges(), 0);
00900     for (TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00901       if (EI.GetSrcNId() != EI.GetDstNId()) {
00902         GEdgeV.Add(TIntTr(EI.GetSrcNId(), EI.GetDstNId(), -1));
00903       }
00904     }
00905 //  }
00906 
00907   RealNodes = Nodes;
00908   RealEdges = Graph->GetEdges();
00909   LEdgeV = TIntTrV();
00910   LSelfEdge = 0;
00911 }
00912 
00913 
00914 void TKroneckerLL::AppendIsoNodes() {
00915   Nodes = (int) pow((double)ProbMtx.GetDim(), KronIters);
00916   // add nodes until filling the Kronecker graph model
00917   for (int nid = Graph->GetNodes(); nid < Nodes; nid++) {
00918           Graph->AddNode(nid);
00919   }
00920 }
00921 
00923 void TKroneckerLL::RestoreGraph(const bool RestoreNodes) {
00924         //      remove from Graph
00925         int NId1, NId2;
00926         for (int e = 0; e < LEdgeV.Len(); e++) {
00927         NId1 = LEdgeV[e].Val1;  NId2 = LEdgeV[e].Val2;
00928                 Graph->DelEdge(NId1, NId2);
00929 //              GEdgeV.DelIfIn(LEdgeV[e]);
00930         }
00931         if(LEdgeV.Len() - LSelfEdge)
00932                 GEdgeV.Del(GEdgeV.Len() - LEdgeV.Len() + LSelfEdge, GEdgeV.Len() - 1);
00933         LEdgeV.Clr();
00934         LSelfEdge = 0;
00935 
00936         if(RestoreNodes) {
00937                 for(int i = Graph->GetNodes()-1; i >= RealNodes; i--) {
00938                         Graph->DelNode(i);
00939                 }
00940         }
00941 }
00942 
00943 double TKroneckerLL::GetFullGraphLL() const {
00944   // the number of times a seed matrix element appears in
00945   // the full kronecker adjacency matrix after KronIter
00946   // kronecker multiplications
00947   double ElemCnt = 1;
00948   const double dim = LLMtx.GetDim();
00949   // count number of times x appears in the full kronecker matrix
00950   for (int i = 1; i < KronIters; i++) {
00951     ElemCnt = dim*dim*ElemCnt + TMath::Power(dim, 2*i);
00952   }
00953   return ElemCnt * LLMtx.GetMtxSum();
00954 }
00955 
00956 double TKroneckerLL::GetFullRowLL(int RowId) const {
00957   double RowLL = 0.0;
00958   const int MtxDim = LLMtx.GetDim();
00959   for (int level = 0; level < KronIters; level++) {
00960     RowLL += LLMtx.GetRowSum(RowId % MtxDim);
00961     RowId /= MtxDim;
00962   }
00963   return RowLL;
00964 }
00965 
00966 double TKroneckerLL::GetFullColLL(int ColId) const {
00967   double ColLL = 0.0;
00968   const int MtxDim = LLMtx.GetDim();
00969   for (int level = 0; level < KronIters; level++) {
00970     ColLL += LLMtx.GetColSum(ColId % MtxDim);
00971     ColId /= MtxDim;
00972   }
00973   return ColLL;
00974 }
00975 
00976 double TKroneckerLL::GetEmptyGraphLL() const {
00977   double LL = 0;
00978   for (int NId1 = 0; NId1 < LLMtx.GetNodes(KronIters); NId1++) {
00979     for (int NId2 = 0; NId2 < LLMtx.GetNodes(KronIters); NId2++) {
00980       LL = LL + LLMtx.GetNoEdgeLL(NId1, NId2, KronIters);
00981     }
00982   }
00983   return LL;
00984 }
00985 
00986 // 2nd prder Taylor approximation, log(1-x) ~ -x - 0.5x^2
00987 double TKroneckerLL::GetApxEmptyGraphLL() const {
00988   double Sum=0.0, SumSq=0.0;
00989   for (int i = 0; i < ProbMtx.Len(); i++) {
00990     Sum += ProbMtx.At(i);
00991     SumSq += TMath::Sqr(ProbMtx.At(i));
00992   }
00993   return -pow(Sum, KronIters) - 0.5*pow(SumSq, KronIters);
00994 }
00995 
00996 void TKroneckerLL::InitLL(const TFltV& ParamV) {
00997   InitLL(TKronMtx(ParamV));
00998 }
00999 
01000 void TKroneckerLL::InitLL(const TKronMtx& ParamMtx) {
01001   IAssert(ParamMtx.IsProbMtx());
01002   ProbMtx = ParamMtx;
01003   ProbMtx.GetLLMtx(LLMtx);
01004   LogLike = TKronMtx::NInf;
01005   if (GradV.Len() != ProbMtx.Len()) {
01006     GradV.Gen(ProbMtx.Len()); }
01007   GradV.PutAll(0.0);
01008 }
01009 
01010 void TKroneckerLL::InitLL(const PNGraph& GraphPt, const TKronMtx& ParamMtx) {
01011   IAssert(ParamMtx.IsProbMtx());
01012   ProbMtx = ParamMtx;
01013   ProbMtx.GetLLMtx(LLMtx);
01014   SetGraph(GraphPt);
01015   LogLike = TKronMtx::NInf;
01016   if (GradV.Len() != ProbMtx.Len()) {
01017     GradV.Gen(ProbMtx.Len()); }
01018   GradV.PutAll(0.0);
01019 }
01020 
01021 // exact graph log-likelihood, takes O(N^2 + E)
01022 double TKroneckerLL::CalcGraphLL() {
01023   LogLike = GetEmptyGraphLL(); // takes O(N^2)
01024   for (int nid = 0; nid < Nodes; nid++) {
01025     const TNGraph::TNodeI Node = Graph->GetNI(nid);
01026     const int SrcNId = NodePerm[nid];
01027     for (int e = 0; e < Node.GetOutDeg(); e++) {
01028       const int DstNId = NodePerm[Node.GetOutNId(e)];
01029       LogLike = LogLike - LLMtx.GetNoEdgeLL(SrcNId, DstNId, KronIters)
01030         + LLMtx.GetEdgeLL(SrcNId, DstNId, KronIters);
01031     }
01032   }
01033   return LogLike;
01034 }
01035 
01036 // approximate graph log-likelihood, takes O(E + N_0)
01037 double TKroneckerLL::CalcApxGraphLL() {
01038   LogLike = GetApxEmptyGraphLL(); // O(N_0)
01039   for (int nid = 0; nid < Nodes; nid++) {
01040     const TNGraph::TNodeI Node = Graph->GetNI(nid);
01041     const int SrcNId = NodePerm[nid];
01042     for (int e = 0; e < Node.GetOutDeg(); e++) {
01043       const int DstNId = NodePerm[Node.GetOutNId(e)];
01044       LogLike = LogLike - LLMtx.GetApxNoEdgeLL(SrcNId, DstNId, KronIters)
01045         + LLMtx.GetEdgeLL(SrcNId, DstNId, KronIters);
01046     }
01047   }
01048   return LogLike;
01049 }
01050 
01051 // Used in TKroneckerLL::SwapNodesLL: DeltaLL if we
01052 // add the node to the matrix (node gets/creates all
01053 // of its in- and out-edges).
01054 // Zero is for the empty row/column (isolated node)
01055 double TKroneckerLL::NodeLLDelta(const int& NId) const {
01056   if (! Graph->IsNode(NId)) { return 0.0; } // zero degree node
01057   double Delta = 0.0;
01058   const TNGraph::TNodeI Node = Graph->GetNI(NId);
01059   // out-edges
01060   const int SrcRow = NodePerm[NId];
01061   for (int e = 0; e < Node.GetOutDeg(); e++) {
01062     const int DstCol = NodePerm[Node.GetOutNId(e)];
01063     Delta += - LLMtx.GetApxNoEdgeLL(SrcRow, DstCol, KronIters)
01064       + LLMtx.GetEdgeLL(SrcRow, DstCol, KronIters);
01065   }
01066   //in-edges
01067   const int SrcCol = NodePerm[NId];
01068   for (int e = 0; e < Node.GetInDeg(); e++) {
01069     const int DstRow = NodePerm[Node.GetInNId(e)];
01070     Delta += - LLMtx.GetApxNoEdgeLL(DstRow, SrcCol, KronIters)
01071       + LLMtx.GetEdgeLL(DstRow, SrcCol, KronIters);
01072   }
01073   // double counted self-edge
01074   if (Graph->IsEdge(NId, NId)) {
01075     Delta += + LLMtx.GetApxNoEdgeLL(SrcRow, SrcCol, KronIters)
01076       - LLMtx.GetEdgeLL(SrcRow, SrcCol, KronIters);
01077     IAssert(SrcRow == SrcCol);
01078   }
01079   return Delta;
01080 }
01081 
01082 // swapping two nodes, only need to go over two rows and columns
01083 double TKroneckerLL::SwapNodesLL(const int& NId1, const int& NId2) {
01084   // subtract old LL (remove nodes)
01085   LogLike = LogLike - NodeLLDelta(NId1) - NodeLLDelta(NId2);
01086   const int PrevId1 = NodePerm[NId1], PrevId2 = NodePerm[NId2];
01087   // double-counted edges
01088   if (Graph->IsEdge(NId1, NId2)) {
01089     LogLike += - LLMtx.GetApxNoEdgeLL(PrevId1, PrevId2, KronIters)
01090       + LLMtx.GetEdgeLL(PrevId1, PrevId2, KronIters); }
01091   if (Graph->IsEdge(NId2, NId1)) {
01092     LogLike += - LLMtx.GetApxNoEdgeLL(PrevId2, PrevId1, KronIters)
01093       + LLMtx.GetEdgeLL(PrevId2, PrevId1, KronIters); }
01094   // swap
01095   NodePerm.Swap(NId1, NId2);
01096   InvertPerm.Swap(NodePerm[NId1], NodePerm[NId2]);
01097   // add new LL (add nodes)
01098   LogLike = LogLike + NodeLLDelta(NId1) + NodeLLDelta(NId2);
01099   const int NewId1 = NodePerm[NId1], NewId2 = NodePerm[NId2];
01100   // correct for double-counted edges
01101   if (Graph->IsEdge(NId1, NId2)) {
01102     LogLike += + LLMtx.GetApxNoEdgeLL(NewId1, NewId2, KronIters)
01103       - LLMtx.GetEdgeLL(NewId1, NewId2, KronIters); }
01104   if (Graph->IsEdge(NId2, NId1)) {
01105     LogLike += + LLMtx.GetApxNoEdgeLL(NewId2, NewId1, KronIters)
01106       - LLMtx.GetEdgeLL(NewId2, NewId1, KronIters); }
01107   return LogLike;
01108 }
01109 
01110 // metropolis sampling from P(permutation|graph)
01111 bool TKroneckerLL::SampleNextPerm(int& NId1, int& NId2) {
01112   // pick 2 uniform nodes and swap
01113   if (TKronMtx::Rnd.GetUniDev() < PermSwapNodeProb) {
01114     NId1 = TKronMtx::Rnd.GetUniDevInt(Nodes);
01115     NId2 = TKronMtx::Rnd.GetUniDevInt(Nodes);
01116     while (NId2 == NId1) { NId2 = TKronMtx::Rnd.GetUniDevInt(Nodes); }
01117   } else {
01118     // pick uniform edge and swap endpoints (slow as it moves around high degree nodes)
01119     const int e = TKronMtx::Rnd.GetUniDevInt(GEdgeV.Len());
01120     NId1 = GEdgeV[e].Val1;  NId2 = GEdgeV[e].Val2;
01121   }
01122   const double U = TKronMtx::Rnd.GetUniDev();
01123   const double OldLL = LogLike;
01124   const double NewLL = SwapNodesLL(NId1, NId2);
01125   const double LogU = log(U);
01126   if (LogU > NewLL - OldLL) { // reject
01127     LogLike = OldLL;
01128     NodePerm.Swap(NId2, NId1); //swap back
01129         InvertPerm.Swap(NodePerm[NId2], NodePerm[NId1]); // swap back
01130     return false;
01131   }
01132   return true; // accept new sample
01133 }
01134 
01135 // exact gradient of an empty graph, O(N^2)
01136 double TKroneckerLL::GetEmptyGraphDLL(const int& ParamId) const {
01137   double DLL = 0.0;
01138   for (int NId1 = 0; NId1 < Nodes; NId1++) {
01139     for (int NId2 = 0; NId2 < Nodes; NId2++) {
01140       DLL += LLMtx.GetNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
01141     }
01142   }
01143   return DLL;
01144 }
01145 
01146 // approx gradient, using 2nd order Taylor approximation, O(N_0^2)
01147 double TKroneckerLL::GetApxEmptyGraphDLL(const int& ParamId) const {
01148   double Sum=0.0, SumSq=0.0;
01149   for (int i = 0; i < ProbMtx.Len(); i++) {
01150     Sum += ProbMtx.At(i);
01151     SumSq += TMath::Sqr(ProbMtx.At(i));
01152   }
01153   // d/dx -sum(x_i) - 0.5sum(x_i^2) = d/dx sum(theta)^k - 0.5 sum(theta^2)^k
01154   return -KronIters*pow(Sum, KronIters-1) - KronIters*pow(SumSq, KronIters-1)*ProbMtx.At(ParamId);
01155 }
01156 
01157 // exact graph gradient, runs O(N^2)
01158 const TFltV& TKroneckerLL::CalcGraphDLL() {
01159   for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
01160     double DLL = 0.0;
01161     for (int NId1 = 0; NId1 < Nodes; NId1++) {
01162       for (int NId2 = 0; NId2 < Nodes; NId2++) {
01163         if (Graph->IsEdge(NId1, NId2)) {
01164           DLL += LLMtx.GetEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
01165         } else {
01166           DLL += LLMtx.GetNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
01167         }
01168       }
01169     }
01170     GradV[ParamId] = DLL;
01171   }
01172   return GradV;
01173 }
01174 
01175 // slow (but correct) approximate gradient, runs O(N^2)
01176 const TFltV& TKroneckerLL::CalcFullApxGraphDLL() {
01177   for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
01178     double DLL = 0.0;
01179     for (int NId1 = 0; NId1 < Nodes; NId1++) {
01180       for (int NId2 = 0; NId2 < Nodes; NId2++) {
01181         if (Graph->IsEdge(NId1, NId2)) {
01182           DLL += LLMtx.GetEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
01183         } else {
01184           DLL += LLMtx.GetApxNoEdgeDLL(ParamId, NodePerm[NId1], NodePerm[NId2], KronIters);
01185         }
01186       }
01187     }
01188     GradV[ParamId] = DLL;
01189   }
01190   return GradV;
01191 }
01192 
01193 // fast approximate gradient, runs O(E)
01194 const TFltV& TKroneckerLL::CalcApxGraphDLL() {
01195   for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
01196     double DLL = GetApxEmptyGraphDLL(ParamId);
01197     for (int nid = 0; nid < Nodes; nid++) {
01198       const TNGraph::TNodeI Node = Graph->GetNI(nid);
01199       const int SrcNId = NodePerm[nid];
01200       for (int e = 0; e < Node.GetOutDeg(); e++) {
01201         const int DstNId = NodePerm[Node.GetOutNId(e)];
01202         DLL = DLL - LLMtx.GetApxNoEdgeDLL(ParamId, SrcNId, DstNId, KronIters)
01203           + LLMtx.GetEdgeDLL(ParamId, SrcNId, DstNId, KronIters);
01204       }
01205     }
01206     GradV[ParamId] = DLL;
01207   }
01208   return GradV;
01209 }
01210 
01211 // Used in TKroneckerLL::UpdateGraphDLL: DeltaDLL if we
01212 // add the node to the empty matrix/graph (node
01213 // gets/creates all of its in- and out-edges).
01214 double TKroneckerLL::NodeDLLDelta(const int ParamId, const int& NId) const {
01215   if (! Graph->IsNode(NId)) { return 0.0; } // zero degree node
01216   double Delta = 0.0;
01217   const TNGraph::TNodeI Node = Graph->GetNI(NId);
01218   const int SrcRow = NodePerm[NId];
01219   for (int e = 0; e < Node.GetOutDeg(); e++) {
01220     const int DstCol = NodePerm[Node.GetOutNId(e)];
01221     Delta += - LLMtx.GetApxNoEdgeDLL(ParamId, SrcRow, DstCol, KronIters)
01222       + LLMtx.GetEdgeDLL(ParamId, SrcRow, DstCol, KronIters);
01223   }
01224   const int SrcCol = NodePerm[NId];
01225   for (int e = 0; e < Node.GetInDeg(); e++) {
01226     const int DstRow = NodePerm[Node.GetInNId(e)];
01227     Delta += - LLMtx.GetApxNoEdgeDLL(ParamId, DstRow, SrcCol, KronIters)
01228       + LLMtx.GetEdgeDLL(ParamId, DstRow, SrcCol, KronIters);
01229   }
01230   // double counter self-edge
01231   if (Graph->IsEdge(NId, NId)) {
01232     Delta += + LLMtx.GetApxNoEdgeDLL(ParamId, SrcRow, SrcCol, KronIters)
01233       - LLMtx.GetEdgeDLL(ParamId, SrcRow, SrcCol, KronIters);
01234     IAssert(SrcRow == SrcCol);
01235   }
01236   return Delta;
01237 }
01238 
01239 // given old DLL and new permutation, efficiently updates the DLL
01240 // permutation is new, but DLL is old
01241 void TKroneckerLL::UpdateGraphDLL(const int& SwapNId1, const int& SwapNId2) {
01242   for (int ParamId = 0; ParamId < LLMtx.Len(); ParamId++) {
01243     // permutation before the swap (swap back to previous position)
01244     NodePerm.Swap(SwapNId1, SwapNId2);
01245     // subtract old DLL
01246     TFlt& DLL = GradV[ParamId];
01247     DLL = DLL - NodeDLLDelta(ParamId, SwapNId1) - NodeDLLDelta(ParamId, SwapNId2);
01248     // double-counted edges
01249     const int PrevId1 = NodePerm[SwapNId1], PrevId2 = NodePerm[SwapNId2];
01250     if (Graph->IsEdge(SwapNId1, SwapNId2)) {
01251       DLL += - LLMtx.GetApxNoEdgeDLL(ParamId, PrevId1, PrevId2, KronIters)
01252         + LLMtx.GetEdgeDLL(ParamId, PrevId1, PrevId2, KronIters); }
01253     if (Graph->IsEdge(SwapNId2, SwapNId1)) {
01254       DLL += - LLMtx.GetApxNoEdgeDLL(ParamId, PrevId2, PrevId1, KronIters)
01255         + LLMtx.GetEdgeDLL(ParamId, PrevId2, PrevId1, KronIters); }
01256     // permutation after the swap (restore the swap)
01257     NodePerm.Swap(SwapNId1, SwapNId2);
01258     // add new DLL
01259     DLL = DLL + NodeDLLDelta(ParamId, SwapNId1) + NodeDLLDelta(ParamId, SwapNId2);
01260     const int NewId1 = NodePerm[SwapNId1], NewId2 = NodePerm[SwapNId2];
01261     // double-counted edges
01262     if (Graph->IsEdge(SwapNId1, SwapNId2)) {
01263       DLL += + LLMtx.GetApxNoEdgeDLL(ParamId, NewId1, NewId2, KronIters)
01264         - LLMtx.GetEdgeDLL(ParamId, NewId1, NewId2, KronIters); }
01265     if (Graph->IsEdge(SwapNId2, SwapNId1)) {
01266       DLL += + LLMtx.GetApxNoEdgeDLL(ParamId, NewId2, NewId1, KronIters)
01267         - LLMtx.GetEdgeDLL(ParamId, NewId2, NewId1, KronIters); }
01268   }
01269 }
01270 
01271 void TKroneckerLL::SampleGradient(const int& WarmUp, const int& NSamples, double& AvgLL, TFltV& AvgGradV) {
01272   printf("SampleGradient: %s (%s warm-up):", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr());
01273   int NId1=0, NId2=0, NAccept=0;
01274   TExeTm ExeTm1;
01275   if (WarmUp > 0) {
01276     CalcApxGraphLL();
01277     for (int s = 0; s < WarmUp; s++) { SampleNextPerm(NId1, NId2); }
01278     printf("  warm-up:%s,", ExeTm1.GetTmStr());  ExeTm1.Tick();
01279   }
01280   CalcApxGraphLL(); // re-calculate LL (due to numerical errors)
01281   CalcApxGraphDLL();
01282   AvgLL = 0;
01283   AvgGradV.Gen(LLMtx.Len());  AvgGradV.PutAll(0.0);
01284   printf("  sampl");
01285   for (int s = 0; s < NSamples; s++) {
01286     if (SampleNextPerm(NId1, NId2)) { // new permutation
01287       UpdateGraphDLL(NId1, NId2);  NAccept++; }
01288     for (int m = 0; m < LLMtx.Len(); m++) { AvgGradV[m] += GradV[m]; }
01289     AvgLL += GetLL();
01290   }
01291   printf("ing");
01292   AvgLL = AvgLL / double(NSamples);
01293   for (int m = 0; m < LLMtx.Len(); m++) {
01294     AvgGradV[m] = AvgGradV[m] / double(NSamples); }
01295   printf(":%s (%.0f/s), accept %.1f%%\n", ExeTm1.GetTmStr(), double(NSamples)/ExeTm1.GetSecs(),
01296     double(100*NAccept)/double(NSamples));
01297 }
01298 
01299 double TKroneckerLL::GradDescent(const int& NIter, const double& LrnRate, double MnStep, double MxStep, const int& WarmUp, const int& NSamples) {
01300   printf("\n----------------------------------------------------------------------\n");
01301   printf("Fitting graph on %d nodes, %d edges\n", Graph->GetNodes(), Graph->GetEdges());
01302   printf("Kron iters:  %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters()));
01303   TExeTm IterTm, TotalTm;
01304   double OldLL=-1e10, CurLL=0;
01305   const double EZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters));
01306   TFltV CurGradV, LearnRateV(GetParams()), LastStep(GetParams());
01307   LearnRateV.PutAll(LrnRate);
01308   TKronMtx NewProbMtx = ProbMtx;
01309 
01310   if(DebugMode) {  
01311           LLV.Gen(NIter, 0);
01312           MtxV.Gen(NIter, 0);
01313   }
01314 
01315   for (int Iter = 0; Iter < NIter; Iter++) {
01316     printf("%03d] ", Iter);
01317     SampleGradient(WarmUp, NSamples, CurLL, CurGradV);
01318     for (int p = 0; p < GetParams(); p++) {
01319       LearnRateV[p] *= 0.95;
01320       if (Iter < 1) {
01321         while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; }
01322         while (fabs(LearnRateV[p]*CurGradV[p]) < 0.02) { LearnRateV[p] *= (1.0/0.95); } // move more
01323       } else {
01324         // set learn rate so that move for each parameter is inside the [MnStep, MxStep]
01325         while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");}
01326         while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");}
01327         if (MxStep > 3*MnStep) { MxStep *= 0.95; }
01328       }
01329       NewProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*CurGradV[p];
01330       if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; }
01331       if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; }
01332     }
01333     printf("  trueE0: %.2f (%d),  estE0: %.2f (%d),  ERR: %f\n", EZero, Graph->GetEdges(),
01334       ProbMtx.GetMtxSum(), ProbMtx.GetEdges(KronIters), fabs(EZero-ProbMtx.GetMtxSum()));
01335     printf("  currLL: %.4f, deltaLL: %.4f\n", CurLL, CurLL-OldLL); // positive is good
01336     for (int p = 0; p < GetParams(); p++) {
01337       printf("    %d]  %f  <--  %f + %9f   Grad: %9.1f   Rate: %g\n", p, NewProbMtx.At(p),
01338         ProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]());
01339     }
01340     if (Iter+1 < NIter) { // skip last update
01341       ProbMtx = NewProbMtx;  ProbMtx.GetLLMtx(LLMtx); }
01342     OldLL=CurLL;
01343     printf("\n");  fflush(stdout);
01344 
01345         if(DebugMode) {  
01346                 LLV.Add(CurLL);
01347                 MtxV.Add(NewProbMtx);
01348         }
01349   }
01350   printf("TotalExeTm: %s %g\n", TotalTm.GetStr(), TotalTm.GetSecs());
01351   ProbMtx.Dump("FITTED PARAMS", false);
01352   return CurLL;
01353 }
01354 
01355 double TKroneckerLL::GradDescent2(const int& NIter, const double& LrnRate, double MnStep, double MxStep, const int& WarmUp, const int& NSamples) {
01356   printf("\n----------------------------------------------------------------------\n");
01357   printf("GradDescent2\n");
01358   printf("Fitting graph on %d nodes, %d edges\n", Graph->GetNodes(), Graph->GetEdges());
01359   printf("Skip moves that make likelihood smaller\n");
01360   printf("Kron iters:  %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters()));
01361   TExeTm IterTm, TotalTm;
01362   double CurLL=0, NewLL=0;
01363   const double EZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters));
01364   TFltV CurGradV, NewGradV, LearnRateV(GetParams()), LastStep(GetParams());
01365   LearnRateV.PutAll(LrnRate);
01366   TKronMtx NewProbMtx=ProbMtx, CurProbMtx=ProbMtx;
01367   bool GoodMove = false;
01368   // Start
01369   for (int Iter = 0; Iter < NIter; Iter++) {
01370     printf("%03d] ", Iter);
01371     if (! GoodMove) { SampleGradient(WarmUp, NSamples, CurLL, CurGradV); }
01372     CurProbMtx = ProbMtx;
01373     // update parameters
01374     for (int p = 0; p < GetParams(); p++) {
01375       while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");}
01376       while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");}
01377       NewProbMtx.At(p) = CurProbMtx.At(p) + LearnRateV[p]*CurGradV[p];
01378       if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; }
01379       if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; }
01380       LearnRateV[p] *= 0.95;
01381     }
01382     printf("  ");
01383     ProbMtx=NewProbMtx;  ProbMtx.GetLLMtx(LLMtx);
01384     SampleGradient(WarmUp, NSamples, NewLL, NewGradV);
01385     if (NewLL > CurLL) { // accept the move
01386       printf("== Good move:\n");
01387       printf("  trueE0: %.2f (%d),  estE0: %.2f (%d),  ERR: %f\n", EZero, Graph->GetEdges(),
01388         ProbMtx.GetMtxSum(), ProbMtx.GetEdges(KronIters), fabs(EZero-ProbMtx.GetMtxSum()));
01389       printf("  currLL: %.4f  deltaLL: %.4f\n", CurLL, NewLL-CurLL); // positive is good
01390       for (int p = 0; p < GetParams(); p++) {
01391         printf("    %d]  %f  <--  %f + %9f   Grad: %9.1f   Rate: %g\n", p, NewProbMtx.At(p),
01392           CurProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); }
01393       CurLL = NewLL;
01394       CurGradV = NewGradV;
01395       GoodMove = true;
01396     } else {
01397       printf("** BAD move:\n");
01398       printf("  *trueE0: %.2f (%d),  estE0: %.2f (%d),  ERR: %f\n", EZero, Graph->GetEdges(),
01399         ProbMtx.GetMtxSum(), ProbMtx.GetEdges(KronIters), fabs(EZero-ProbMtx.GetMtxSum()));
01400       printf("  *curLL:  %.4f  deltaLL: %.4f\n", CurLL, NewLL-CurLL); // positive is good
01401       for (int p = 0; p < GetParams(); p++) {
01402         printf("   b%d]  %f  <--  %f + %9f   Grad: %9.1f   Rate: %g\n", p, NewProbMtx.At(p),
01403           CurProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]()); }
01404       // move to old position
01405       ProbMtx = CurProbMtx;  ProbMtx.GetLLMtx(LLMtx);
01406       GoodMove = false;
01407     }
01408     printf("\n");  fflush(stdout);
01409   }
01410   printf("TotalExeTm: %s %g\n", TotalTm.GetStr(), TotalTm.GetSecs());
01411   ProbMtx.Dump("FITTED PARAMS\n", false);
01412   return CurLL;
01413 }
01414 
01416 // filling in random edges for KronEM
01417 void TKroneckerLL::SetRandomEdges(const int& NEdges, const bool isDir) {
01418         int count = 0, added = 0, collision = 0;
01419         const int MtxDim = ProbMtx.GetDim();
01420         const double MtxSum = ProbMtx.GetMtxSum();
01421         TVec<TFltIntIntTr> ProbToRCPosV; // row, col position
01422         double CumProb = 0.0;
01423 
01424         for(int r = 0; r < MtxDim; r++) {
01425                 for(int c = 0; c < MtxDim; c++) {
01426                         const double Prob = ProbMtx.At(r, c);
01427                         if (Prob > 0.0) {
01428                                 CumProb += Prob;
01429                                 ProbToRCPosV.Add(TFltIntIntTr(CumProb/MtxSum, r, c));
01430                         }
01431                 }
01432         }
01433 
01434         int Rng, Row, Col, n, NId1, NId2;
01435         while(added < NEdges) {
01436                 Rng = Nodes;    Row = 0;        Col = 0;
01437                 for (int iter = 0; iter < KronIters; iter++) {
01438                         const double& Prob = TKronMtx::Rnd.GetUniDev();
01439                         n = 0; while(Prob > ProbToRCPosV[n].Val1) { n++; }
01440                         const int MtxRow = ProbToRCPosV[n].Val2;
01441                         const int MtxCol = ProbToRCPosV[n].Val3;
01442                         Rng /= MtxDim;
01443                         Row += MtxRow * Rng;
01444                         Col += MtxCol * Rng;
01445                 }
01446 
01447                 count++;
01448 
01449                 NId1 = InvertPerm[Row]; NId2 = InvertPerm[Col];
01450 
01451                 //      Check conflicts
01452                 if(EMType != kronEdgeMiss && IsObsEdge(NId1, NId2)) {
01453                         continue;
01454                 }
01455 
01456                 if (! Graph->IsEdge(NId1, NId2)) {
01457                         Graph->AddEdge(NId1, NId2);
01458                         if(NId1 != NId2)        { GEdgeV.Add(TIntTr(NId1, NId2, LEdgeV.Len())); }
01459                         else { LSelfEdge++; }
01460                         LEdgeV.Add(TIntTr(NId1, NId2, GEdgeV.Len()-1));
01461                         added++;
01462                         if (! isDir) {
01463                                 if (NId1 != NId2) {
01464                                    Graph->AddEdge(NId2, NId1);
01465                                    GEdgeV.Add(TIntTr(NId2, NId1, LEdgeV.Len()));
01466                                    LEdgeV.Add(TIntTr(NId2, NId1, GEdgeV.Len()-1));
01467                                    added++;
01468                                 }
01469                         }
01470                 } else { collision ++; }
01471         }
01472 //      printf("total = %d / added = %d / collision = %d\n", count, added, collision);
01473 }
01474 
01475 // sampling setup for KronEM
01476 void TKroneckerLL::MetroGibbsSampleSetup(const int& WarmUp) {
01477         double alpha = log(ProbMtx.GetMtxSum()) / log(double(ProbMtx.GetDim()));
01478         int NId1 = 0, NId2 = 0;
01479         int NMissing;
01480 
01481         RestoreGraph(false);
01482         if(EMType == kronEdgeMiss) {
01483                 CalcApxGraphLL();
01484                 for (int s = 0; s < WarmUp; s++)        SampleNextPerm(NId1, NId2);
01485         }
01486 
01487         if(EMType == kronFutureLink) {
01488                 NMissing = (int) (pow(ProbMtx.GetMtxSum(), KronIters) - pow(double(RealNodes), alpha));
01489         } else if(EMType == kronEdgeMiss) {
01490                 NMissing = MissEdges;
01491         } else {
01492                 NMissing = (int) (pow(ProbMtx.GetMtxSum(), KronIters) * (1.0 - pow(double(RealNodes) / double(Nodes), 2)));
01493         }
01494         NMissing = (NMissing < 1) ? 1 : NMissing;
01495 
01496         SetRandomEdges(NMissing, true);
01497 
01498         CalcApxGraphLL();
01499         for (int s = 0; s < WarmUp; s++)        SampleNextPerm(NId1, NId2);
01500 }
01501 
01502 // Metropolis-Hastings steps for KronEM
01503 void TKroneckerLL::MetroGibbsSampleNext(const int& WarmUp, const bool DLLUpdate) {
01504         int NId1 = 0, NId2 = 0, hit = 0, GId = 0;
01505         TIntTr EdgeToRemove, NewEdge;
01506         double RndAccept;
01507 
01508         if(LEdgeV.Len()) {
01509                 for(int i = 0; i < WarmUp; i++) {
01510                         hit = TKronMtx::Rnd.GetUniDevInt(LEdgeV.Len());
01511 
01512                         NId1 = LEdgeV[hit].Val1;        NId2 = LEdgeV[hit].Val2;
01513                         GId = LEdgeV[hit].Val3;
01514                         SetRandomEdges(1, true);
01515                         NewEdge = LEdgeV.Last();
01516 
01517                         RndAccept = (1.0 - exp(LLMtx.GetEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters))) / (1.0 - exp(LLMtx.GetEdgeLL(NId1, NId2, KronIters)));
01518                         RndAccept = (RndAccept > 1.0) ? 1.0 : RndAccept;
01519 
01520                         if(TKronMtx::Rnd.GetUniDev() > RndAccept) { //  reject
01521                                 Graph->DelEdge(NewEdge.Val1, NewEdge.Val2);
01522                                 if(NewEdge.Val1 != NewEdge.Val2) {      GEdgeV.DelLast();       }
01523                                 else {  LSelfEdge--;    }
01524                                 LEdgeV.DelLast();
01525                         } else {        //      accept
01526                                 Graph->DelEdge(NId1, NId2);
01527                                 LEdgeV[hit] = LEdgeV.Last();
01528                                 LEdgeV.DelLast();
01529                                 if(NId1 == NId2) {
01530                                         LSelfEdge--;
01531                                         if(NewEdge.Val1 != NewEdge.Val2) {
01532                                                 GEdgeV[GEdgeV.Len()-1].Val3 = hit;
01533                                         }
01534                                 } else {
01535                                         IAssertR(GEdgeV.Last().Val3 >= 0, "Invalid indexing");
01536 
01537                                         GEdgeV[GId] = GEdgeV.Last();
01538                                         if(NewEdge.Val1 != NewEdge.Val2) {
01539                                                 GEdgeV[GId].Val3 = hit;
01540                                         }
01541                                         LEdgeV[GEdgeV[GId].Val3].Val3 = GId;
01542                                         GEdgeV.DelLast();
01543                                 }
01544 
01545                         LogLike += LLMtx.GetApxNoEdgeLL(EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters) - LLMtx.GetEdgeLL(EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters);
01546                         LogLike += -LLMtx.GetApxNoEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters) + LLMtx.GetEdgeLL(NewEdge.Val1, NewEdge.Val2, KronIters);
01547 
01548                                 if(DLLUpdate) {
01549                                         for (int p = 0; p < LLMtx.Len(); p++) {
01550                                                 GradV[p] += LLMtx.GetApxNoEdgeDLL(p, EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters) - LLMtx.GetEdgeDLL(p, EdgeToRemove.Val1, EdgeToRemove.Val2, KronIters);
01551                                                 GradV[p] += -LLMtx.GetApxNoEdgeDLL(p, NewEdge.Val1, NewEdge.Val2, KronIters) + LLMtx.GetEdgeDLL(p, NewEdge.Val1, NewEdge.Val2, KronIters);
01552                                         }
01553                                 }
01554                         }
01555                 }
01556         }
01557 
01558 //      CalcApxGraphLL();
01559         for (int s = 0; s < WarmUp; s++) {
01560                 if(SampleNextPerm(NId1, NId2)) {
01561                         if(DLLUpdate)   UpdateGraphDLL(NId1, NId2);
01562                 }
01563         }
01564 }
01565 
01566 // E-step in KronEM
01567 void TKroneckerLL::RunEStep(const int& GibbsWarmUp, const int& WarmUp, const int& NSamples, TFltV& LLV, TVec<TFltV>& DLLV) {
01568         TExeTm ExeTm, TotalTm;
01569         LLV.Gen(NSamples, 0);
01570         DLLV.Gen(NSamples, 0);
01571 
01572         ExeTm.Tick();
01573         for(int i = 0; i < 2; i++)      MetroGibbsSampleSetup(WarmUp);
01574         printf("  Warm-Up [%u] : %s\n", WarmUp, ExeTm.GetTmStr());
01575         CalcApxGraphLL();
01576         for(int i = 0; i < GibbsWarmUp; i++)    MetroGibbsSampleNext(10, false);
01577         printf("  Gibbs Warm-Up [%u] : %s\n", GibbsWarmUp, ExeTm.GetTmStr());
01578 
01579         ExeTm.Tick();
01580         CalcApxGraphLL();
01581         CalcApxGraphDLL();
01582         for(int i = 0; i < NSamples; i++) {
01583                 MetroGibbsSampleNext(50, false);
01584 
01585                 LLV.Add(LogLike);
01586                 DLLV.Add(GradV);
01587 
01588                 int OnePercent = (i+1) % (NSamples / 10);
01589                 if(OnePercent == 0) {
01590                         int TenPercent = ((i+1) / (NSamples / 10)) * 10;
01591                         printf("  %3u%% done : %s\n", TenPercent, ExeTm.GetTmStr());
01592                 }
01593         }
01594 }
01595 
01596 // M-step in KronEM
01597 double TKroneckerLL::RunMStep(const TFltV& LLV, const TVec<TFltV>& DLLV, const int& GradIter, const double& LrnRate, double MnStep, double MxStep) {
01598         TExeTm IterTm, TotalTm;
01599         double OldLL=LogLike, CurLL=0;
01600         const double alpha = log(double(RealEdges)) / log(double(RealNodes));
01601         const double EZero = pow(double(ProbMtx.GetDim()), alpha);
01602 
01603         TFltV CurGradV(GetParams()), LearnRateV(GetParams()), LastStep(GetParams());
01604         LearnRateV.PutAll(LrnRate);
01605 
01606         TKronMtx NewProbMtx = ProbMtx;
01607         const int NSamples = LLV.Len();
01608         const int ReCalcLen = NSamples / 10;
01609 
01610         for (int s = 0; s < LLV.Len(); s++) {
01611                 CurLL += LLV[s];
01612                 for(int p = 0; p < GetParams(); p++) { CurGradV[p] += DLLV[s][p]; }
01613         }
01614         CurLL /= NSamples;
01615         for(int p = 0; p < GetParams(); p++) { CurGradV[p] /= NSamples; }
01616 
01617         double MaxLL = CurLL;
01618         TKronMtx MaxProbMtx = ProbMtx;
01619         TKronMtx OldProbMtx = ProbMtx;
01620 
01621         for (int Iter = 0; Iter < GradIter; Iter++) {
01622                 printf("    %03d] ", Iter+1);
01623                 IterTm.Tick();
01624 
01625                 for (int p = 0; p < GetParams(); p++) {
01626                         if (Iter < 1) {
01627                                 while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; }
01628                                 while (fabs(LearnRateV[p]*CurGradV[p]) < 5 * MnStep) { LearnRateV[p] *= (1.0/0.95); } // move more
01629                         } else {
01630                         // set learn rate so that move for each parameter is inside the [MnStep, MxStep]
01631                                 while (fabs(LearnRateV[p]*CurGradV[p]) > MxStep) { LearnRateV[p] *= 0.95; printf(".");}
01632                                 while (fabs(LearnRateV[p]*CurGradV[p]) < MnStep) { LearnRateV[p] *= (1.0/0.95); printf("*");}
01633                                 if (MxStep > 3*MnStep) { MxStep *= 0.95; }
01634                         }
01635                         NewProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*CurGradV[p];
01636                         if (NewProbMtx.At(p) > 0.9999) { NewProbMtx.At(p)=0.9999; }
01637                         if (NewProbMtx.At(p) < 0.0001) { NewProbMtx.At(p)=0.0001; }
01638                         LearnRateV[p] *= 0.95;
01639                 }
01640                 printf("  trueE0: %.2f (%u from %u),  estE0: %.2f (%u from %u),  ERR: %f\n", EZero, RealEdges(), RealNodes(), ProbMtx.GetMtxSum(), Graph->GetEdges(), Graph->GetNodes(), fabs(EZero-ProbMtx.GetMtxSum()));
01641                 printf("      currLL: %.4f, deltaLL: %.4f\n", CurLL, CurLL-OldLL); // positive is good
01642                 for (int p = 0; p < GetParams(); p++) {
01643                         printf("      %d]  %f  <--  %f + %9f   Grad: %9.1f   Rate: %g\n", p, NewProbMtx.At(p),
01644                         ProbMtx.At(p), (double)(LearnRateV[p]*CurGradV[p]), CurGradV[p](), LearnRateV[p]());
01645                 }
01646 
01647                 OldLL=CurLL;
01648                 if(Iter == GradIter - 1) {
01649                         break;
01650                 }
01651 
01652                 CurLL = 0;
01653                 CurGradV.PutAll(0.0);
01654                 TFltV OneDLL;
01655 
01656                 CalcApxGraphLL();
01657                 CalcApxGraphDLL();
01658 
01659                 for(int s = 0; s < NSamples; s++) {
01660                         ProbMtx = OldProbMtx;  ProbMtx.GetLLMtx(LLMtx);
01661                         MetroGibbsSampleNext(10, true);
01662                         ProbMtx = NewProbMtx;  ProbMtx.GetLLMtx(LLMtx);
01663                         if(s % ReCalcLen == ReCalcLen/2) {
01664                                 CurLL += CalcApxGraphLL();
01665                                 OneDLL = CalcApxGraphDLL();
01666                         } else {
01667                                 CurLL += LogLike;
01668                                 OneDLL = GradV;
01669                         }
01670                         for(int p = 0; p < GetParams(); p++) {
01671                                 CurGradV[p] += OneDLL[p];
01672                         }
01673                 }
01674                 CurLL /= NSamples;
01675 
01676                 if(MaxLL < CurLL) {
01677                         MaxLL = CurLL;  MaxProbMtx = ProbMtx;
01678                 }
01679 
01680                 printf("    Time: %s\n", IterTm.GetTmStr());
01681                 printf("\n");  fflush(stdout);
01682         }
01683         ProbMtx = MaxProbMtx;   ProbMtx.GetLLMtx(LLMtx);
01684 
01685         printf("    FinalLL : %f,   TotalExeTm: %s\n", MaxLL, TotalTm.GetTmStr());
01686         ProbMtx.Dump("    FITTED PARAMS", false);
01687 
01688         return MaxLL;
01689 }
01690 
01691 // KronEM
01692 void TKroneckerLL::RunKronEM(const int& EMIter, const int& GradIter, double LrnRate, double MnStep, double MxStep, const int& GibbsWarmUp, const int& WarmUp, const int& NSamples, const TKronEMType& Type, const int& NMissing) {
01693         printf("\n----------------------------------------------------------------------\n");
01694         printf("Fitting graph on %d nodes, %d edges\n", int(RealNodes), int(RealEdges));
01695         printf("Kron iters:  %d (== %d nodes)\n\n", KronIters(), ProbMtx.GetNodes(KronIters()));
01696 
01697         TFltV LLV(NSamples);
01698         TVec<TFltV> DLLV(NSamples);
01699         //int count = 0;
01700 
01701         EMType = Type;
01702         MissEdges = NMissing;
01703         AppendIsoNodes();
01704         SetRndPerm();
01705 
01706         if(DebugMode) {
01707                 LLV.Gen(EMIter, 0);
01708                 MtxV.Gen(EMIter, 0);
01709         }
01710 
01711         for(int i = 0; i < EMIter; i++) {
01712                 printf("\n----------------------------------------------------------------------\n");
01713                 printf("%03d EM-iter] E-Step\n", i+1);
01714                 RunEStep(GibbsWarmUp, WarmUp, NSamples, LLV, DLLV);
01715                 printf("\n\n");
01716 
01717                 printf("%03d EM-iter] M-Step\n", i+1);
01718                 double CurLL = RunMStep(LLV, DLLV, GradIter, LrnRate, MnStep, MxStep);
01719                 printf("\n\n");
01720 
01721                 if(DebugMode) {
01722                         LLV.Add(CurLL);
01723                         MtxV.Add(ProbMtx);
01724                 }
01725         }
01726 
01727         RestoreGraph();
01728 }
01729 
01730 
01731 
01732 void GetMinMax(const TFltPrV& XYValV, double& Min, double& Max, const bool& ResetMinMax) {
01733   if (ResetMinMax) { Min = TFlt::Mx;  Max = TFlt::Mn; }
01734   for (int i = 0; i < XYValV.Len(); i++) {
01735     Min = TMath::Mn(Min, XYValV[i].Val2.Val);
01736     Max = TMath::Mx(Max, XYValV[i].Val2.Val);
01737   }
01738 }
01739 
01740 void PlotGrad(const TFltPrV& EstLLV, const TFltPrV& TrueLLV, const TVec<TFltPrV>& GradVV, const TFltPrV& AcceptV, const TStr& OutFNm, const TStr& Desc) {
01741   double Min, Max, Min1, Max1;
01742   // plot log-likelihood
01743   { TGnuPlot GP("sLL-"+OutFNm, TStr::Fmt("Log-likelihood (avg 1k samples). %s", Desc.CStr()), true);
01744   GP.AddPlot(EstLLV, gpwLines, "Esimated LL", "linewidth 1");
01745   if (! TrueLLV.Empty()) { GP.AddPlot(TrueLLV, gpwLines, "TRUE LL", "linewidth 1"); }
01746   //GetMinMax(EstLLV, Min, Max, true);  GetMinMax(TrueLLV, Min, Max, false);
01747   //GP.SetYRange((int)floor(Min-1), (int)ceil(Max+1));
01748   GP.SetXYLabel("Sample Index (time)", "Log-likelihood");
01749   GP.SavePng(); }
01750   // plot accept
01751   { TGnuPlot GP("sAcc-"+OutFNm, TStr::Fmt("Pct. accepted rnd moves (over 1k samples). %s", Desc.CStr()), true);
01752   GP.AddPlot(AcceptV, gpwLines, "Pct accepted swaps", "linewidth 1");
01753   GP.SetXYLabel("Sample Index (time)", "Pct accept permutation swaps");
01754   GP.SavePng(); }
01755   // plot grads
01756   TGnuPlot GPAll("sGradAll-"+OutFNm, TStr::Fmt("Gradient (avg 1k samples). %s", Desc.CStr()), true);
01757   GetMinMax(GradVV[0], Min1, Max1, true);
01758   for (int g = 0; g < GradVV.Len(); g++) {
01759     GPAll.AddPlot(GradVV[g], gpwLines, TStr::Fmt("param %d", g+1), "linewidth 1");
01760     GetMinMax(GradVV[g], Min1, Max1, false);
01761     TGnuPlot GP(TStr::Fmt("sGrad%02d-", g+1)+OutFNm, TStr::Fmt("Gradient (avg 1k samples). %s", Desc.CStr()), true);
01762     GP.AddPlot(GradVV[g], gpwLines, TStr::Fmt("param id %d", g+1), "linewidth 1");
01763     GetMinMax(GradVV[g], Min, Max, true);
01764     GP.SetYRange((int)floor(Min-1), (int)ceil(Max+1));
01765     GP.SetXYLabel("Sample Index (time)", "Gradient");
01766     GP.SavePng();
01767   }
01768   GPAll.SetYRange((int)floor(Min1-1), (int)ceil(Max1+1));
01769   GPAll.SetXYLabel("Sample Index (time)", "Gradient");
01770   GPAll.SavePng();
01771 }
01772 
01773 void PlotAutoCorrelation(const TFltV& ValV, const int& MaxK, const TStr& OutFNm, const TStr& Desc) {
01774   double Avg=0.0, Var=0.0;
01775   for (int i = 0; i < ValV.Len(); i++) { Avg += ValV[i]; }
01776   Avg /= (double) ValV.Len();
01777   for (int i = 0; i < ValV.Len(); i++) { Var += TMath::Sqr(ValV[i]-Avg); }
01778   TFltPrV ACorrV;
01779   for (int k = 0; k < TMath::Mn(ValV.Len(), MaxK); k++) {
01780     double corr = 0.0;
01781     for (int i = 0; i < ValV.Len() - k; i++) {
01782       corr += (ValV[i]-Avg)*(ValV[i+k]-Avg);
01783     }
01784     ACorrV.Add(TFltPr(k, corr/Var));
01785   }
01786   // plot grads
01787   TGnuPlot GP("sAutoCorr-"+OutFNm, TStr::Fmt("AutoCorrelation (%d samples). %s", ValV.Len(), Desc.CStr()), true);
01788   GP.AddPlot(ACorrV, gpwLines, "", "linewidth 1");
01789   GP.SetXYLabel("Lag, k", "Autocorrelation, r_k");
01790   GP.SavePng();
01791 }
01792 
01793 // sample permutations and plot the current gradient and log-likelihood as the function
01794 // of the number of samples
01795 TFltV TKroneckerLL::TestSamplePerm(const TStr& OutFNm, const int& WarmUp, const int& NSamples, const TKronMtx& TrueMtx, const bool& DoPlot) {
01796   printf("Sample permutations: %s (warm-up: %s)\n", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr());
01797   int NId1=0, NId2=0, NAccept=0;
01798   TExeTm ExeTm;
01799   const int PlotLen = NSamples/1000+1;
01800   double TrueLL=-1, AvgLL=0.0;
01801   TFltV AvgGradV(GetParams());
01802   TFltPrV TrueLLV(PlotLen, 0); // true log-likelihood (under the correct permutation)
01803   TFltPrV EstLLV(PlotLen, 0);  // estiamted log-likelihood (averaged over last 1k permutation)
01804   TFltPrV AcceptV;             // sample acceptance ratio
01805   TFltV SampleLLV(NSamples, 0);
01806   TVec<TFltPrV> GradVV(GetParams());
01807   for (int g = 0; g < GetParams(); g++) {
01808     GradVV[g].Gen(PlotLen, 0); }
01809   if (! TrueMtx.Empty()) {
01810     TIntV PermV=NodePerm;  TKronMtx CurMtx=ProbMtx;  ProbMtx.Dump();
01811     InitLL(TrueMtx);  SetOrderPerm();  CalcApxGraphLL();  printf("TrueLL: %f\n", LogLike());
01812     TrueLL=LogLike;  InitLL(CurMtx); NodePerm=PermV;
01813   }
01814   CalcApxGraphLL();
01815   printf("LogLike at start:       %f\n", LogLike());
01816   if (WarmUp > 0) {
01817     EstLLV.Add(TFltPr(0, LogLike));
01818     if (TrueLL != -1) { TrueLLV.Add(TFltPr(0, TrueLL)); }
01819     for (int s = 0; s < WarmUp; s++) { SampleNextPerm(NId1, NId2); }
01820     printf("  warm-up:%s,", ExeTm.GetTmStr());  ExeTm.Tick();
01821   }
01822   printf("LogLike afterm warm-up: %f\n", LogLike());
01823   CalcApxGraphLL(); // re-calculate LL (due to numerical errors)
01824   CalcApxGraphDLL();
01825   EstLLV.Add(TFltPr(WarmUp, LogLike));
01826   if (TrueLL != -1) { TrueLLV.Add(TFltPr(WarmUp, TrueLL)); }
01827   printf("  recalculated:         %f\n", LogLike());
01828   // start sampling
01829   printf("  sampling (average per 1000 samples)\n");
01830   TVec<TFltV> SamplVV(5);
01831   for (int s = 0; s < NSamples; s++) {
01832     if (SampleNextPerm(NId1, NId2)) { // new permutation
01833       UpdateGraphDLL(NId1, NId2);  NAccept++; }
01834     for (int m = 0; m < AvgGradV.Len(); m++) { AvgGradV[m] += GradV[m]; }
01835     AvgLL += GetLL();
01836     SampleLLV.Add(GetLL());
01837     /*SamplVV[0].Add(GetLL()); // gives worse autocoreelation than the avg below
01838     SamplVV[1].Add(GradV[0]);
01839     SamplVV[2].Add(GradV[1]);
01840     SamplVV[3].Add(GradV[2]);
01841     SamplVV[4].Add(GradV[3]);*/
01842     if (s > 0 && s % 1000 == 0) {
01843       printf(".");
01844       for (int g = 0; g < AvgGradV.Len(); g++) {
01845         GradVV[g].Add(TFltPr(WarmUp+s, AvgGradV[g] / 1000.0)); }
01846       EstLLV.Add(TFltPr(WarmUp+s, AvgLL / 1000.0));
01847       if (TrueLL != -1) { TrueLLV.Add(TFltPr(WarmUp+s, TrueLL)); }
01848       AcceptV.Add(TFltPr(WarmUp+s, NAccept/1000.0));
01849       // better (faster decaying) autocorrelation when one takes avg. of 1000 consecutive samples
01850       /*SamplVV[0].Add(AvgLL);
01851       SamplVV[1].Add(AvgGradV[0]);
01852       SamplVV[2].Add(AvgGradV[1]);
01853       SamplVV[3].Add(AvgGradV[2]);
01854       SamplVV[4].Add(AvgGradV[3]); //*/
01855       if (s % 100000 == 0 && DoPlot) {
01856         const TStr Desc = TStr::Fmt("P(NodeSwap)=%g. Nodes: %d, Edges: %d, Params: %d, WarmUp: %s, Samples: %s", PermSwapNodeProb(),
01857           Graph->GetNodes(), Graph->GetEdges(), GetParams(), TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr());
01858         PlotGrad(EstLLV, TrueLLV, GradVV, AcceptV, OutFNm, Desc);
01859         for (int n = 0; n < SamplVV.Len(); n++) {
01860           PlotAutoCorrelation(SamplVV[n], 1000, TStr::Fmt("%s-n%d", OutFNm.CStr(), n), Desc); }
01861         printf("  samples: %d, time: %s, samples/s: %.1f\n", s, ExeTm.GetTmStr(), double(s+1)/ExeTm.GetSecs());
01862       }
01863       AvgLL = 0;  AvgGradV.PutAll(0);  NAccept=0;
01864     }
01865   }
01866   if (DoPlot) {
01867     const TStr Desc = TStr::Fmt("P(NodeSwap)=%g. Nodes: %d, Edges: %d, Params: %d, WarmUp: %s, Samples: %s", PermSwapNodeProb(),
01868       Graph->GetNodes(), Graph->GetEdges(), GetParams(), TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr());
01869     PlotGrad(EstLLV, TrueLLV, GradVV, AcceptV, OutFNm, Desc);
01870     for (int n = 0; n < SamplVV.Len(); n++) {
01871       PlotAutoCorrelation(SamplVV[n], 1000, TStr::Fmt("%s-n%d", OutFNm.CStr(), n), Desc); }
01872   }
01873   return SampleLLV; // seems to work better for potential scale reduction plot
01874 }
01875 
01876 void McMcGetAvgAvg(const TFltV& AvgJV, double& AvgAvg) {
01877   AvgAvg = 0.0;
01878   for (int j = 0; j < AvgJV.Len(); j++) {
01879     AvgAvg += AvgJV[j]; }
01880   AvgAvg /= AvgJV.Len();
01881 }
01882 
01883 void McMcGetAvgJ(const TVec<TFltV>& ChainLLV, TFltV& AvgJV) {
01884   for (int j = 0; j < ChainLLV.Len(); j++) {
01885     const TFltV& ChainV = ChainLLV[j];
01886     double Avg = 0;
01887     for (int i = 0; i < ChainV.Len(); i++) {
01888       Avg += ChainV[i];
01889     }
01890     AvgJV.Add(Avg/ChainV.Len());
01891   }
01892 }
01893 
01894 // calculates the chain potential scale reduction (see Gelman Bayesian Statistics book)
01895 double TKroneckerLL::CalcChainR2(const TVec<TFltV>& ChainLLV) {
01896   const double J = ChainLLV.Len();
01897   const double K = ChainLLV[0].Len();
01898   TFltV AvgJV;    McMcGetAvgJ(ChainLLV, AvgJV);
01899   double AvgAvg;  McMcGetAvgAvg(AvgJV, AvgAvg);
01900   IAssert(AvgJV.Len() == ChainLLV.Len());
01901   double InChainVar=0, OutChainVar=0;
01902   // between chain var
01903   for (int j = 0; j < AvgJV.Len(); j++) {
01904     OutChainVar += TMath::Sqr(AvgJV[j] - AvgAvg); }
01905   OutChainVar = OutChainVar * (K/double(J-1));
01906   printf("*** %g chains of len %g\n", J, K);
01907   printf("  ** between chain var: %f\n", OutChainVar);
01908   //within chain variance
01909   for (int j = 0; j < AvgJV.Len(); j++) {
01910     const TFltV& ChainV = ChainLLV[j];
01911     for (int k = 0; k < ChainV.Len(); k++) {
01912       InChainVar += TMath::Sqr(ChainV[k] - AvgJV[j]); }
01913   }
01914   InChainVar = InChainVar * 1.0/double(J*(K-1));
01915   printf("  ** within chain var: %f\n", InChainVar);
01916   const double PostVar = (K-1)/K * InChainVar + 1.0/K * OutChainVar;
01917   printf("  ** posterior var: %f\n", PostVar);
01918   const double ScaleRed = sqrt(PostVar/InChainVar);
01919   printf("  ** scale reduction (< 1.2): %f\n\n", ScaleRed);
01920   return ScaleRed;
01921 }
01922 
01923 // Gelman-Rubin-Brooks plot: how does potential scale reduction chainge with chain length
01924 void TKroneckerLL::ChainGelmapRubinPlot(const TVec<TFltV>& ChainLLV, const TStr& OutFNm, const TStr& Desc) {
01925   TFltPrV LenR2V; // how does potential scale reduction chainge with chain length
01926   TVec<TFltV> SmallLLV(ChainLLV.Len());
01927   const int K = ChainLLV[0].Len();
01928   const int Buckets=1000;
01929   const int BucketSz = K/Buckets;
01930   for (int b = 1; b < Buckets; b++) {
01931     const int End = TMath::Mn(BucketSz*b, K-1);
01932     for (int c = 0; c < ChainLLV.Len(); c++) {
01933       ChainLLV[c].GetSubValV(0, End, SmallLLV[c]); }
01934     LenR2V.Add(TFltPr(End, TKroneckerLL::CalcChainR2(SmallLLV)));
01935   }
01936   LenR2V.Add(TFltPr(K, TKroneckerLL::CalcChainR2(ChainLLV)));
01937   TGnuPlot::PlotValV(LenR2V, TStr::Fmt("gelman-%s", OutFNm.CStr()), TStr::Fmt("%s. %d chains of len %d. BucketSz: %d.",
01938     Desc.CStr(), ChainLLV.Len(), ChainLLV[0].Len(), BucketSz), "Chain length", "Potential scale reduction");
01939 }
01940 
01941 // given a Kronecker graph generate from TrueParam, try to recover the parameters
01942 TFltQu TKroneckerLL::TestKronDescent(const bool& DoExact, const bool& TruePerm, double LearnRate, const int& WarmUp, const int& NSamples, const TKronMtx& TrueParam) {
01943   printf("Test gradient descent on a synthetic kronecker graphs:\n");
01944   if (DoExact) { printf("  -- Exact gradient calculations\n"); }
01945   else { printf("  -- Approximate gradient calculations\n"); }
01946   if (TruePerm) { printf("  -- No permutation sampling (use true permutation)\n"); }
01947   else { printf("  -- Sample permutations (start with degree permutation)\n"); }
01948   TExeTm IterTm;
01949   int Iter;
01950   double OldLL=0, MyLL=0, AvgAbsErr, AbsSumErr;
01951   TFltV MyGradV, SDevV;
01952   TFltV LearnRateV(GetParams());  LearnRateV.PutAll(LearnRate);
01953   if (TruePerm) {
01954     SetOrderPerm();
01955   }
01956   else {
01957     /*printf("SET EIGEN VECTOR PERMUTATIONS\n");
01958     TFltV LeftSV, RightSV;
01959     TGSvd::GetSngVec(Graph, LeftSV, RightSV);
01960     TFltIntPrV V;
01961     for (int v=0; v<LeftSV.Len();v++) { V.Add(TFltIntPr(LeftSV[v], v)); }
01962     V.Sort(false);
01963     NodePerm.Gen(Nodes, 0);
01964     for (int v=0; v < V.Len();v++) { NodePerm.Add(V[v].Val2); } //*/
01965     //printf("RANDOM PERMUTATION\n");   SetRndPerm();
01966     printf("DEGREE  PERMUTATION\n");  SetDegPerm();
01967   }
01968   for (Iter = 0; Iter < 100; Iter++) {
01969     if (TruePerm) {
01970       // don't sample over permutations
01971       if (DoExact) { CalcGraphDLL();  CalcGraphLL(); } // slow, O(N^2)
01972       else { CalcApxGraphDLL();  CalcApxGraphLL(); }   // fast
01973       MyLL = LogLike;  MyGradV = GradV;
01974     } else {
01975       printf(".");
01976       // sample over permutations (approximate calculations)
01977       SampleGradient(WarmUp, NSamples, MyLL, MyGradV);
01978     }
01979     printf("%d] LL: %g, ", Iter, MyLL);
01980     AvgAbsErr = TKronMtx::GetAvgAbsErr(ProbMtx, TrueParam);
01981     AbsSumErr = fabs(ProbMtx.GetMtxSum() - TrueParam.GetMtxSum());
01982     printf("  avgAbsErr: %.4f, absSumErr: %.4f, newLL: %.2f, deltaLL: %.2f\n", AvgAbsErr, AbsSumErr, MyLL, OldLL-MyLL);
01983     for (int p = 0; p < GetParams(); p++) {
01984       // set learn rate so that move for each parameter is inside the [0.01, 0.1]
01985       LearnRateV[p] *= 0.9;
01986       //printf("%d: rate: %f   delta:%f\n", p, LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p]));
01987       while (fabs(LearnRateV[p]*MyGradV[p]) > 0.1) { LearnRateV[p] *= 0.9; }
01988       //printf("   rate: %f   delta:%f\n", LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p]));
01989       while (fabs(LearnRateV[p]*MyGradV[p]) < 0.001) { LearnRateV[p] *= (1.0/0.9); }
01990       //printf("   rate: %f   delta:%f\n", LearnRateV[p], fabs(LearnRateV[p]*MyGradV[p]));
01991       printf("    %d]  %f  <--  %f + %f    lrnRate:%g\n", p, ProbMtx.At(p) + LearnRateV[p]*MyGradV[p],
01992         ProbMtx.At(p), (double)(LearnRateV[p]*MyGradV[p]), LearnRateV[p]());
01993       ProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*MyGradV[p];
01994       // box constraints
01995       if (ProbMtx.At(p) > 0.99) { ProbMtx.At(p)=0.99; }
01996       if (ProbMtx.At(p) < 0.01) { ProbMtx.At(p)=0.01; }
01997     }
01998     ProbMtx.GetLLMtx(LLMtx);  OldLL = MyLL;
01999     if (AvgAbsErr < 0.01) { printf("***CONVERGED!\n");  break; }
02000     printf("\n");  fflush(stdout);
02001   }
02002   TrueParam.Dump("True  Thetas", true);
02003   ProbMtx.Dump("Final Thetas", true);
02004   printf("  AvgAbsErr: %f\n  AbsSumErr: %f\n  Iterations: %d\n", AvgAbsErr, AbsSumErr, Iter);
02005   printf("Iteration run time: %s, sec: %g\n\n", IterTm.GetTmStr(), IterTm.GetSecs());
02006   return TFltQu(AvgAbsErr, AbsSumErr, Iter, IterTm.GetSecs());
02007 }
02008 
02009 void PlotTrueAndEst(const TStr& OutFNm, const TStr& Desc, const TStr& YLabel, const TFltPrV& EstV, const TFltPrV& TrueV) {
02010   TGnuPlot GP(OutFNm, Desc.CStr(), true);
02011   GP.AddPlot(EstV, gpwLinesPoints, YLabel, "linewidth 1 pointtype 6 pointsize 1");
02012   if (! TrueV.Empty()) { GP.AddPlot(TrueV, gpwLines, "TRUE"); }
02013   GP.SetXYLabel("Gradient descent iterations", YLabel);
02014   GP.SavePng();
02015 }
02016 
02017 void TKroneckerLL::GradDescentConvergence(const TStr& OutFNm, const TStr& Desc1, const bool& SamplePerm, const int& NIters,
02018  double LearnRate, const int& WarmUp, const int& NSamples, const int& AvgKGraphs, const TKronMtx& TrueParam) {
02019   TExeTm IterTm;
02020   int Iter;
02021   double OldLL=0, MyLL=0, AvgAbsErr=0, AbsSumErr=0;
02022   TFltV MyGradV, SDevV;
02023   TFltV LearnRateV(GetParams());  LearnRateV.PutAll(LearnRate);
02024   TFltPrV EZeroV, DiamV, Lambda1V, Lambda2V, AvgAbsErrV, AvgLLV;
02025   TFltPrV TrueEZeroV, TrueDiamV, TrueLambda1V, TrueLambda2V, TrueLLV;
02026   TFltV SngValV;  TSnap::GetSngVals(Graph, 2, SngValV);  SngValV.Sort(false);
02027   const double TrueEZero = pow((double) Graph->GetEdges(), 1.0/double(KronIters));
02028   const double TrueEffDiam = TSnap::GetAnfEffDiam(Graph, false, 10);
02029   const double TrueLambda1 = SngValV[0];
02030   const double TrueLambda2 = SngValV[1];
02031   if (! TrueParam.Empty()) {
02032     const TKronMtx CurParam = ProbMtx;  ProbMtx.Dump();
02033     InitLL(TrueParam);  SetOrderPerm();  CalcApxGraphLL(); printf("TrueLL: %f\n", LogLike());
02034     OldLL = LogLike;  InitLL(CurParam);
02035   }
02036   const double TrueLL = OldLL;
02037   if (! SamplePerm) { SetOrderPerm(); } else { SetDegPerm(); }
02038   for (Iter = 0; Iter < NIters; Iter++) {
02039     if (! SamplePerm) {
02040       // don't sample over permutations
02041       CalcApxGraphDLL();  CalcApxGraphLL();   // fast
02042       MyLL = LogLike;  MyGradV = GradV;
02043     } else {
02044       // sample over permutations (approximate calculations)
02045       SampleGradient(WarmUp, NSamples, MyLL, MyGradV);
02046     }
02047     double SumDiam=0, SumSngVal1=0, SumSngVal2=0;
02048     for (int trial = 0; trial < AvgKGraphs; trial++) {
02049       // generate kronecker graph
02050       PNGraph KronGraph = TKronMtx::GenFastKronecker(ProbMtx, KronIters, true, 0); // approx
02051       //PNGraph KronGraph = TKronMtx::GenKronecker(ProbMtx, KronIters, true, 0); // true
02052       SngValV.Clr(true);  TSnap::GetSngVals(KronGraph, 2, SngValV);  SngValV.Sort(false);
02053       SumDiam += TSnap::GetAnfEffDiam(KronGraph, false, 10);
02054       SumSngVal1 += SngValV[0];  SumSngVal2 += SngValV[1];
02055     }
02056     // how good is the current fit
02057     AvgLLV.Add(TFltPr(Iter, MyLL));
02058     EZeroV.Add(TFltPr(Iter, ProbMtx.GetMtxSum()));
02059     DiamV.Add(TFltPr(Iter, SumDiam/double(AvgKGraphs)));
02060     Lambda1V.Add(TFltPr(Iter, SumSngVal1/double(AvgKGraphs)));
02061     Lambda2V.Add(TFltPr(Iter, SumSngVal2/double(AvgKGraphs)));
02062     TrueLLV.Add(TFltPr(Iter, TrueLL));
02063     TrueEZeroV.Add(TFltPr(Iter, TrueEZero));
02064     TrueDiamV.Add(TFltPr(Iter, TrueEffDiam));
02065     TrueLambda1V.Add(TFltPr(Iter, TrueLambda1));
02066     TrueLambda2V.Add(TFltPr(Iter, TrueLambda2));
02067     if (Iter % 10 == 0) {
02068       const TStr Desc = TStr::Fmt("%s. Iter: %d, G(%d, %d)  K(%d, %d)", Desc1.Empty()?OutFNm.CStr():Desc1.CStr(),
02069         Iter, Graph->GetNodes(), Graph->GetEdges(), ProbMtx.GetNodes(KronIters), ProbMtx.GetEdges(KronIters));
02070       PlotTrueAndEst("LL."+OutFNm, Desc, "Average LL", AvgLLV, TrueLLV);
02071       PlotTrueAndEst("E0."+OutFNm, Desc, "E0 (expected number of edges)", EZeroV, TrueEZeroV);
02072       PlotTrueAndEst("Diam."+OutFNm+"-Diam", Desc, "Effective diameter", DiamV, TrueDiamV);
02073       PlotTrueAndEst("Lambda1."+OutFNm, Desc, "Lambda 1", Lambda1V, TrueLambda1V);
02074       PlotTrueAndEst("Lambda2."+OutFNm, Desc, "Lambda 2", Lambda2V, TrueLambda2V);
02075       if (! TrueParam.Empty()) {
02076         PlotTrueAndEst("AbsErr."+OutFNm, Desc, "Average Absolute Error", AvgAbsErrV, TFltPrV()); }
02077     }
02078     if (! TrueParam.Empty()) {
02079       AvgAbsErr = TKronMtx::GetAvgAbsErr(ProbMtx, TrueParam);
02080       AvgAbsErrV.Add(TFltPr(Iter, AvgAbsErr));
02081     } else { AvgAbsErr = 1.0; }
02082     // update parameters
02083     AbsSumErr = fabs(ProbMtx.GetMtxSum() - TrueEZero);
02084     // update parameters
02085     for (int p = 0; p < GetParams(); p++) {
02086       LearnRateV[p] *= 0.99;
02087       while (fabs(LearnRateV[p]*MyGradV[p]) > 0.1) { LearnRateV[p] *= 0.99; printf(".");}
02088       while (fabs(LearnRateV[p]*MyGradV[p]) < 0.002) { LearnRateV[p] *= (1.0/0.95); printf("*");}
02089       printf("    %d]  %f  <--  %f + %9f   Grad: %9.1f,  Rate:%g\n", p, ProbMtx.At(p) + LearnRateV[p]*MyGradV[p],
02090         ProbMtx.At(p), (double)(LearnRateV[p]*MyGradV[p]), MyGradV[p](), LearnRateV[p]());
02091       ProbMtx.At(p) = ProbMtx.At(p) + LearnRateV[p]*MyGradV[p];
02092       // box constraints
02093       if (ProbMtx.At(p) > 1.0) { ProbMtx.At(p)=1.0; }
02094       if (ProbMtx.At(p) < 0.001) { ProbMtx.At(p)=0.001; }
02095     }
02096     printf("%d] LL: %g, ", Iter, MyLL);
02097     printf("  avgAbsErr: %.4f, absSumErr: %.4f, newLL: %.2f, deltaLL: %.2f\n", AvgAbsErr, AbsSumErr, MyLL, OldLL-MyLL);
02098     if (AvgAbsErr < 0.001) { printf("***CONVERGED!\n");  break; }
02099     printf("\n");  fflush(stdout);
02100     ProbMtx.GetLLMtx(LLMtx);  OldLL = MyLL;
02101   }
02102   TrueParam.Dump("True  Thetas", true);
02103   ProbMtx.Dump("Final Thetas", true);
02104   printf("  AvgAbsErr: %f\n  AbsSumErr: %f\n  Iterations: %d\n", AvgAbsErr, AbsSumErr, Iter);
02105   printf("Iteration run time: %s, sec: %g\n\n", IterTm.GetTmStr(), IterTm.GetSecs());
02106 }
02107 
02108 // given true N0, fit the parameters, get likelihood and calculate BIC (MDL), plot n0 vs. BIC
02109 void TKroneckerLL::TestBicCriterion(const TStr& OutFNm, const TStr& Desc1, const PNGraph& G, const int& GradIters,
02110  double LearnRate, const int& WarmUp, const int& NSamples, const int& TrueN0) {
02111   TFltPrV BicV, MdlV, LLV;
02112   const double rndGP = G->GetEdges()/TMath::Sqr(double(G->GetNodes()));
02113   const double RndGLL = G->GetEdges()*log(rndGP )+ (TMath::Sqr(double(G->GetNodes()))-G->GetEdges())*log(1-rndGP);
02114   LLV.Add(TFltPr(1, RndGLL));
02115   BicV.Add(TFltPr(1, -RndGLL + 0.5*TMath::Sqr(1)*log(TMath::Sqr(G->GetNodes()))));
02116   MdlV.Add(TFltPr(1, -RndGLL + 32*TMath::Sqr(1)+2*(log((double)1)+log((double)G->GetNodes()))));
02117   for (int NZero = 2; NZero < 10; NZero++) {
02118     const TKronMtx InitKronMtx = TKronMtx::GetInitMtx(NZero, G->GetNodes(), G->GetEdges());
02119     InitKronMtx.Dump("INIT PARAM", true);
02120     TKroneckerLL KronLL(G, InitKronMtx);
02121     KronLL.SetPerm('d'); // degree perm
02122     const double LastLL = KronLL.GradDescent(GradIters, LearnRate, 0.001, 0.01, WarmUp, NSamples);
02123     LLV.Add(TFltPr(NZero, LastLL));
02124     BicV.Add(TFltPr(NZero, -LastLL + 0.5*TMath::Sqr(NZero)*log(TMath::Sqr(G->GetNodes()))));
02125     MdlV.Add(TFltPr(NZero, -LastLL + 32*TMath::Sqr(NZero)+2*(log((double)NZero)+log((double)KronLL.GetKronIters()))));
02126     { TGnuPlot GP("LL-"+OutFNm, Desc1);
02127     GP.AddPlot(LLV, gpwLinesPoints, "Log-likelihood", "linewidth 1 pointtype 6 pointsize 2");
02128     GP.SetXYLabel("NZero", "Log-Likelihood");  GP.SavePng(); }
02129     { TGnuPlot GP("BIC-"+OutFNm, Desc1);
02130     GP.AddPlot(BicV, gpwLinesPoints, "BIC", "linewidth 1 pointtype 6 pointsize 2");
02131     GP.SetXYLabel("NZero", "BIC");  GP.SavePng(); }
02132     { TGnuPlot GP("MDL-"+OutFNm, Desc1);
02133     GP.AddPlot(MdlV, gpwLinesPoints, "MDL", "linewidth 1 pointtype 6 pointsize 2");
02134     GP.SetXYLabel("NZero", "MDL");  GP.SavePng(); }
02135   }
02136 }
02137 
02138 void TKroneckerLL::TestGradDescent(const int& KronIters, const int& KiloSamples, const TStr& Permutation) {
02139   const TStr OutFNm = TStr::Fmt("grad-%s%d-%dk", Permutation.CStr(), KronIters, KiloSamples);
02140   TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.6; 0.6 0.4");
02141   PNGraph Graph  = TKronMtx::GenFastKronecker(KronParam, KronIters, true, 0);
02142   TKroneckerLL KronLL(Graph, KronParam);
02143   TVec<TFltV> GradVV(4), SDevVV(4);  TFltV XValV;
02144   int NId1 = 0, NId2 = 0, NAccept = 0;
02145   TVec<TMom> GradMomV(4);
02146   TExeTm ExeTm;
02147   if (Permutation == "r") KronLL.SetRndPerm();
02148   else if (Permutation == "d") KronLL.SetDegPerm();
02149   else if (Permutation == "o") KronLL.SetOrderPerm();
02150   else FailR("Unknown permutation (r,d,o)");
02151   KronLL.CalcApxGraphLL();
02152   KronLL.CalcApxGraphDLL();
02153   for (int s = 0; s < 1000*KiloSamples; s++) {
02154     if (KronLL.SampleNextPerm(NId1, NId2)) { // new permutation
02155       KronLL.UpdateGraphDLL(NId1, NId2);  NAccept++; }
02156     if (s > 50000) { //warm up period
02157       for (int m = 0; m < 4; m++) { GradVV[m].Add(KronLL.GradV[m]); }
02158       if ((s+1) % 1000 == 0) {
02159         printf(".");
02160         for (int m = 0; m < 4; m++) { GradVV[m].Add(KronLL.GradV[m]); }
02161         XValV.Add((s+1));
02162         if ((s+1) % 100000 == 0) {
02163           TGnuPlot GP(OutFNm, TStr::Fmt("Gradient vs. samples. %d nodes, %d edges", Graph->GetNodes(), Graph->GetEdges()), true);
02164           for (int g = 0; g < GradVV.Len(); g++) {
02165             GP.AddPlot(XValV, GradVV[g], gpwLines, TStr::Fmt("grad %d", g)); }
02166           GP.SetXYLabel("sample index","log Gradient");
02167           GP.SavePng();
02168         }
02169       }
02170     }
02171   }
02172   printf("\n");
02173   for (int m = 0; m < 4; m++) {
02174     GradMomV[m].Def();
02175     printf("grad %d: mean: %12f  sDev: %12f  median: %12f\n", m,
02176       GradMomV[m].GetMean(), GradMomV[m].GetSDev(), GradMomV[m].GetMedian());
02177   }
02178 }
02179 /*
02180 // sample over permutations
02181 void TKroneckerLL::GradDescent(const double& LearnRate, const int& WarmUp, const int& NSamples, const int& NIter) {
02182   TFltV GradV, SDevV;
02183   double AvgLL;
02184   for (int Iter = 0; Iter < 100; Iter++) {
02185     //SampleGradient(WarmUp, NSamples, AvgLL, GradV, SDevV, true);
02186     SampleGradient(WarmUp, NSamples, AvgLL, GradV);
02187     for (int theta = 0; theta < GetParams(); theta++) {
02188       printf("%d]  %f  <--  %f + %f\n", theta, ProbMtx.At(theta) + LearnRate*GradV[theta], ProbMtx.At(theta), LearnRate*GradV[theta]);
02189       ProbMtx.At(theta) = ProbMtx.At(theta) + LearnRate*GradV[theta];
02190       // box constraints
02191       if (ProbMtx.At(theta) > 0.99) ProbMtx.At(theta)=0.99;
02192       if (ProbMtx.At(theta) < 0.01) ProbMtx.At(theta)=0.01;
02193     }
02194     ProbMtx.GetLLMtx(LLMtx);
02195   }
02196   ProbMtx.Dump("Final Thetas");
02197 }
02198 */
02199 
02200 
02202 // Add Noise to Graph
02204 int TKronNoise::RemoveNodeNoise(PNGraph& Graph, const int& NNodes, const bool Random) {
02205         IAssert(NNodes > 0 && NNodes < (Graph->GetNodes() / 2));
02206 
02207         int i = 0;
02208         TIntV ShufflePerm;
02209         Graph->GetNIdV(ShufflePerm);
02210         if(Random) {
02211                 ShufflePerm.Shuffle(TKronMtx::Rnd);
02212                 for(i = 0; i < NNodes; i++) {
02213                         Graph->DelNode(int(ShufflePerm[i]));
02214                 }
02215         } else {
02216                 for(i = 0; i < NNodes; i++) {
02217                         Graph->DelNode(int(ShufflePerm[ShufflePerm.Len() - 1 - i]));
02218                 }
02219         }
02220 
02221         return Graph->GetNodes();
02222 }
02223 
02224 int TKronNoise::RemoveNodeNoise(PNGraph& Graph, const double& Rate, const bool Random) {
02225         IAssert(Rate > 0 && Rate < 0.5);
02226         return TKronNoise::RemoveNodeNoise(Graph, (int) floor(Rate * double(Graph->GetNodes())), Random);
02227 }
02228 
02229 int TKronNoise::FlipEdgeNoise(PNGraph& Graph, const int& NEdges, const bool Random) {
02230         IAssert(NEdges > 0 && NEdges < Graph->GetEdges());
02231 
02232         const int Nodes = Graph->GetNodes();
02233         const int Edges = Graph->GetEdges();
02234         int Src, Dst;
02235 
02236         TIntV NIdV, TempV;
02237         TIntPrV ToAdd, ToDel;
02238         Graph->GetNIdV(NIdV);
02239 
02240         ToAdd.Gen(NEdges / 2, 0);
02241         for(int i = 0; i < NEdges / 2; i++) {
02242                 Src = NIdV[TKronMtx::Rnd.GetUniDevInt(Nodes)];
02243                 Dst = NIdV[TKronMtx::Rnd.GetUniDevInt(Nodes)];
02244                 if(Graph->IsEdge(Src, Dst)) {   i--;    continue;       }
02245 
02246                 ToAdd.Add(TIntPr(Src, Dst));
02247         }
02248 
02249         ToDel.Gen(Edges, 0);
02250         for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
02251                 ToDel.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
02252         }
02253         ToDel.Shuffle(TKronMtx::Rnd);
02254 
02255         for(int i = 0; i < NEdges / 2; i++) {
02256                 Graph->DelEdge(ToDel[i].Val1, ToDel[i].Val2);
02257                 Graph->AddEdge(ToAdd[i].Val1, ToAdd[i].Val2);
02258         }
02259 
02260         return Graph->GetEdges();
02261 }
02262 
02263 int TKronNoise::FlipEdgeNoise(PNGraph& Graph, const double& Rate, const bool Random) {
02264         IAssert(Rate > 0 && Rate < 0.5);
02265         return TKronNoise::FlipEdgeNoise(Graph, (int) floor(Rate * double(Graph->GetEdges())), Random);
02266 }
02267 
02268 int TKronNoise::RemoveEdgeNoise(PNGraph& Graph, const int& NEdges) {
02269         IAssert(NEdges > 0 && NEdges < Graph->GetEdges());
02270 
02271         TIntPrV ToDel;
02272 
02273         ToDel.Gen(Graph->GetEdges(), 0);
02274         for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
02275                 if(EI.GetSrcNId() != EI.GetDstNId()) {
02276                         ToDel.Add(TIntPr(EI.GetSrcNId(), EI.GetDstNId()));
02277                 }
02278         }
02279         ToDel.Shuffle(TKronMtx::Rnd);
02280 
02281         for(int i = 0; i < NEdges; i++) {
02282                 Graph->DelEdge(ToDel[i].Val1, ToDel[i].Val2);
02283         }
02284 
02285         return Graph->GetEdges();
02286 }
02287 
02288 int TKronNoise::RemoveEdgeNoise(PNGraph& Graph, const double& Rate) {
02289         IAssert(Rate > 0 && Rate < 0.5);
02290         return TKronNoise::RemoveEdgeNoise(Graph, (int) floor(Rate * double(Graph->GetEdges())));
02291 }
02292 
02293 
02294 
02296 // Kronecker Log Likelihood Maximization
02297 void TKronMaxLL::SetPerm(const char& PermId) {
02298   if (PermId == 'o') KronLL.SetOrderPerm();
02299   else if (PermId == 'd') KronLL.SetDegPerm();
02300   else if (PermId == 'r') KronLL.SetRndPerm();
02301   else FailR("Unknown permutation type (o,d,r)");
02302 }
02303 
02304 /* void TKronMaxLL::EvalNewEdgeP(const TKronMtx& ProbMtx) {
02305   ProbMtx.Dump("\n***EVAL:");
02306   for (int i = 0; i < ProbMtx.Len(); i++) {
02307     // parameters are out of bounds
02308     if (ProbMtx.At(i) < 0.05 || ProbMtx.At(i) > 0.95) {
02309       TFltV MxDerivV(ProbMtx.Len()); MxDerivV.PutAll(1e5);
02310       FEvalH.AddDat(ProbMtx, TFEval(-1e10, MxDerivV));
02311       return;
02312     }
02313   }
02314   double AvgLL;
02315   TFltV GradV, SDevV;
02316   KronLL.InitLL(ProbMtx); // set current point
02317   //KronLL.SampleGradient(WarmUp, NSamples, AvgLL, GradV, SDevV, true); //sample the gradient
02318   KronLL.SampleGradient(WarmUp, NSamples, AvgLL, GradV);
02319   FEvalH.AddDat(ProbMtx, TFEval(AvgLL, GradV));
02320 }
02321 
02322 double TKronMaxLL::GetLL(const TFltV& ThetaV) {
02323   TKronMtx ProbMtx;  RoundTheta(ThetaV, ProbMtx);
02324   if (! FEvalH.IsKey(ProbMtx)) {
02325     EvalNewEdgeP(ProbMtx);
02326   }
02327   return FEvalH.GetDat(ProbMtx).LogLike;
02328 }
02329 
02330 void TKronMaxLL::GetDLL(const TFltV& ThetaV, TFltV& GradV) {
02331   TKronMtx ProbMtx;  RoundTheta(ThetaV, ProbMtx);
02332   if (! FEvalH.IsKey(ProbMtx)) {
02333     EvalNewEdgeP(ProbMtx);
02334   }
02335   GradV = FEvalH.GetDat(ProbMtx).GradV;
02336 }
02337 
02338 double TKronMaxLL::GetDLL(const TFltV& ThetaV, const int& ParamId) {
02339   TKronMtx ProbMtx;  RoundTheta(ThetaV, ProbMtx);
02340   if (! FEvalH.IsKey(ProbMtx)) {
02341     EvalNewEdgeP(ProbMtx);
02342   }
02343   return FEvalH.GetDat(ProbMtx).GradV[ParamId];
02344 }
02345 void TKronMaxLL::MaximizeLL(const int& NWarmUp, const int& Samples) {
02346   WarmUp = NWarmUp;
02347   NSamples = Samples;
02348   TConjGrad<TFunc> ConjGrad(KronLL.GetProbMtx().GetMtx(), TFunc(this));
02349   //TConjGrad<TLogBarFunc> ConjGrad(KronLL.GetEdgeP().GetV(), TLogBarFunc(this, 0.1));
02350   ConjGrad.ConjGradMin(0.1);
02351 }*/
02352 
02353 // round to 3 decimal places
02354 void TKronMaxLL::RoundTheta(const TFltV& ThetaV, TFltV& NewThetaV) {
02355   NewThetaV.Gen(ThetaV.Len());
02356   for (int i = 0; i < ThetaV.Len(); i++) {
02357     NewThetaV[i] = TMath::Round(ThetaV[i], 3); }
02358 }
02359 
02360 // round to 3 decimal places
02361 void TKronMaxLL::RoundTheta(const TFltV& ThetaV, TKronMtx& Kronecker) {
02362   Kronecker.GenMtx((int)sqrt((double)ThetaV.Len()));
02363   for (int i = 0; i < ThetaV.Len(); i++) {
02364     Kronecker.At(i) = TMath::Round(ThetaV[i], 3); }
02365 }
02366 
02367 void TKronMaxLL::Test() {
02368   TKronMtx::PutRndSeed(1);
02369   TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.7; 0.6 0.5");
02370   PNGraph Graph  = TKronMtx::GenFastKronecker(KronParam, 8, true, 1);
02371 
02372   TKronMaxLL KronMaxLL(Graph, TFltV::GetV(0.9, 0.7, 0.5, 0.3));
02373   KronMaxLL.SetPerm('d');
02374   //KronMaxLL.MaximizeLL(10000, 50000);
02375   /*TKroneckerLL KronLL(Graph, *TKronMtx::GetMtx("0.9 0.7; 0.5 0.3"));
02376   KronLL.SetDegPerm();
02377   KronLL.GradDescent(0.005/double(Graph->GetNodes()));*/
02378 }
02379 
02381 // Kronecker Phase Plot
02382 /*
02383 void TKronPhasePlot::SaveMatlab(const TStr& OutFNm) const {
02384   FILE *F = fopen(OutFNm.CStr(), "wt");
02385   fprintf(F, "#Take last graph stats\n");
02386   fprintf(F, "#i\tAlpha\tBeta\tNodes\tNonZNodes\tEdges\tWccNodes\tWccEdges\tDiam\tEffDiam\tWccDiam\tWccEffDiam\t1StEigVal\tMxEigVal\n");
02387   for (int i = 0 ; i < PhaseV.Len(); i++) {
02388     const TPhasePoint& Point = PhaseV[i];
02389     const TGrowthStat& GrowthStat = Point.GrowthStat;
02390     const PGraphStat& FirstGrowth = GrowthStat[0];
02391     const PGraphStat& LastGrowth = GrowthStat.Last();
02392     fprintf(F, "%d\t%g\t%g\t", i, Point.Alpha, Point.Beta);
02393     fprintf(F, "%d\t%d\t%d\t", LastGrowth->Nodes, LastGrowth->Edges, LastGrowth->NonZNodes);
02394     fprintf(F, "%d\t%d\t", LastGrowth->WccNodes, LastGrowth->WccEdges);
02395     fprintf(F, "%f\t%f\t%f\t%f\t", LastGrowth->FullDiam, LastGrowth->EffDiam, LastGrowth->FullWccDiam, LastGrowth->EffWccDiam);
02396     //fprintf(F, "%f\t%f", FirstGrowth.MxEigVal, LastGrowth.MxEigVal);
02397     fprintf(F, "\n");
02398   }
02399   fclose(F);
02400 }
02401 
02402 void TKronPhasePlot::KroneckerPhase(const TStr& MtxId, const int& MxIter,
02403  const double& MnAlpha, const double& MxAlpha, const double& AlphaStep,
02404  const double& MnBeta, const double& MxBeta, const double& BetaStep,
02405  const TStr& FNmPref) {
02406   TKronPhasePlot PhasePlot;
02407   TExeTm KronTm;
02408   int AlphaCnt=0, BetaCnt=0;
02409   for (double Alpha = MnAlpha; (Alpha-1e-6) <= MxAlpha; Alpha += AlphaStep) {
02410     AlphaCnt++;  BetaCnt = 0;
02411     printf("\n\n****A:%g***********************************************************************", Alpha);
02412     for (double Beta = MnBeta; (Beta-1e-6) <= MxBeta; Beta += BetaStep) {
02413       printf("\n\n==A[%d]:%g====B[%d]:%g=====================================================\n", AlphaCnt, Alpha, BetaCnt, Beta);
02414       BetaCnt++;
02415       TGrowthStat GrowthStat;
02416       PNGraph Graph;
02417       // run Kronecker
02418       TFullRectMtx SeedMtx = TFullRectMtx::GetMtxFromNm(MtxId);
02419       SeedMtx.Epsilonize(Alpha, Beta);
02420       for (int iter = 1; iter < MxIter + 1; iter++) {
02421         printf("%2d] at %s\n", iter, TExeTm::GetCurTm().CStr());
02422         Graph = PNGraph();  KronTm.Tick();
02423         Graph = SeedMtx.GenRMatKronecker(iter, false, 0);
02424         GrowthStat.Add(Graph, TNodeTm(iter));
02425         if (KronTm.GetSecs() > 30 * 60) {
02426           printf("*** TIME LIMIT [%s]\n", KronTm.GetTmStr().CStr());  break; }
02427       }
02428       const TStr Desc = TStr::Fmt("%s. Alpha:%g. Beta:%g", MtxId.CStr(), Alpha, Beta);
02429       const TStr FNmPref1 = TStr::Fmt("%s.a%02d.b%02d", FNmPref.CStr(), AlphaCnt, BetaCnt);
02430       TGPlot::PlotDegDist(Graph, FNmPref1, Desc, false, true, true);
02431       TGPlot::PlotWccDist(Graph, FNmPref1, Desc);
02432       TGPlot::PlotSccDist(Graph, FNmPref1, Desc);
02433       GrowthStat.PlotAll(FNmPref1, Desc);
02434       GrowthStat.SaveTxt(FNmPref1, Desc);
02435       PhasePlot.PhaseV.Add(TPhasePoint(Alpha, Beta, GrowthStat));
02436     }
02437     {TFOut FOut(TStr::Fmt("phase.%s.bin", FNmPref.CStr()));
02438     PhasePlot.Save(FOut); }
02439   }
02440 }
02441 */
02442 /*void TKroneckerLL::SetRndThetas() {
02443   ProbMtx.Dump("TRUE parameters");
02444   TFltV RndV;
02445   double SumRnd = 0.0;
02446   for (int i = 0; i < ProbMtx.Len(); i++) {
02447     RndV.Add(0.1+TKronMtx::Rnd.GetUniDev());
02448     SumRnd += RndV.Last();
02449   }
02450   RndV.Sort(false);
02451   for (int i = 0; i < ProbMtx.Len(); i++) { ProbMtx.At(i) = RndV[i]; }
02452   ProbMtx.Dump("Random parameters");
02453   const double EdgePSum = pow(Graph->GetEdges(), 1.0/KronIters);
02454   bool Repeat = true;
02455   while (Repeat) {
02456     const double Scale = EdgePSum / SumRnd;
02457     Repeat=false;  SumRnd = 0.0;
02458     for (int i = 0; i < ProbMtx.Len(); i++) {
02459       ProbMtx.At(i) = ProbMtx.At(i)*Scale;
02460       if (ProbMtx.At(i) > 0.95) { ProbMtx.At(i)=0.95; Repeat=true; }
02461       SumRnd += ProbMtx.At(i);
02462     }
02463   }
02464   ProbMtx.Dump("INIT parameters");
02465   ProbMtx.GetLLMtx(LLMtx);
02466 }*/
02467 
02468 /*
02469 void TKroneckerLL::TestLL() {
02470   TExeTm ExeTm;
02471   // approximation to empty graph log-likelihood
02472   */
02473   /*{ PNGraph Graph = TNGraph::New();
02474   for (uint i = 0; i < TMath::Pow2(4); i++) { Graph->AddNode(i); } //8k nodes
02475   PKronecker KronParam = TKronMtx::GetMtx("0.8 0.6; 0.7 0.3");
02476   TKroneckerLL KronLL(Graph, KronParam);
02477   printf("\nNodes: %d\n", KronLL.GetNodes());
02478   printf("Full Graph LL:               %f\n", KronLL.GetFullGraphLL());
02479   printf("Empty Graph Exact LL:        %f\n", KronLL.GetEmptyGraphLL());
02480   printf("Empty Approx x=log(1-x) LL:  %f\n", KronLL.GetApxEmptyGraphLL());
02481   printf("Empty Sample LL (100/node):  %f\n", KronLL.GetSampleEmptyGraphLL(Graph->GetNodes() * 100));
02482   KronLL.SetOrderPerm();
02483   printf("\nEdge    prob: %f, LL: %f\n", KronParam->GetEdgeProb(0,0,8), log(KronParam->GetEdgeProb(0,0,8)));
02484   printf("No Edge prob: %f, LL: %f\n", KronParam->GetNoEdgeProb(0,0,8), log(KronParam->GetNoEdgeProb(0,0,8)));
02485   printf("Empty Graph  LL:     %f\n", KronLL.CalcGraphLL());
02486   printf("Apx Empty Graph  LL: %f\n", KronLL.CalcApxGraphLL());
02487   Graph->AddEdge(0, 0);
02488   printf("add 1 edge.  LL:     %f\n", KronLL.CalcGraphLL());
02489   printf("Apx add 1 edge.  LL: %f\n", KronLL.CalcApxGraphLL()); }
02490   */
02491 
02492   // log-likelihood versus different Kronecker parameters
02493   /*{ PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2");
02494   PNGraph Graph = TKronMtx::GenKronecker(KronParam, 10, true, 10);
02495   TVec<PKronecker> ParamV;
02496   ParamV.Add(KronParam);
02497   ParamV.Add(TKronMtx::GetMtx("0.6 0.6; 0.6 0.5")); // sum = 2.3
02498   //ParamV.Add(TKronMtx::GetMtx("0.9 0.9; 0.4 0.1")); // sum = 2.3
02499   //ParamV.Add(TKronMtx::GetMtx("0.8 0.7; 0.6 0.2")); // sum = 2.3
02500   ParamV.Add(TKronMtx::GetMtx("0.9 0.9; 0.6 0.2")); // sum = 2.6
02501   for (int i = 0; i < ParamV.Len(); i++) {
02502     ParamV[i]->Dump();
02503     TKroneckerLL KronLL(Graph, ParamV[i]);
02504     for (int k = 0; k < 3; k++) {
02505       if (k==0) { KronLL.SetOrderPerm();  printf("Order permutation:\n"); }
02506       if (k==1) { KronLL.SetDegPerm();  printf("Degree permutation:\n"); }
02507       if (k==2) { KronLL.SetRndPerm();  printf("Random permutation:\n"); }
02508       const double LL = KronLL.CalcGraphLL(), aLL = KronLL.CalcApxGraphLL();
02509       printf("  Exact  Graph LL:  %f\n", LL);
02510       printf("  Approx Graph LL:  %f\n", aLL);
02511       printf("  error          :  %.12f\n", -fabs(LL-aLL)/LL);
02512     }
02513   } }
02514   */
02515   // exact vs. approximate log-likelihood
02516   /*{ PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2");
02517   PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 16, true, 0);
02518   TKroneckerLL KronLL(Graph, KronParam);
02519   TMom ExactLL, ApxLL;
02520   printf("Random permutation:\n");
02521   for (int i = 0; i < 100; i++) {
02522     KronLL.SetRndPerm();
02523     //ExactLL.Add(KronLL.CalcGraphLL());
02524     ApxLL.Add(KronLL.CalcApxGraphLL());
02525     //printf("  Exact  Graph LL:  %f\n", ExactLL.GetVal(ExactLL.GetVals()-1));
02526     printf("  Approx Graph LL:  %f\n", ApxLL.GetVal(ApxLL.GetVals()-1));
02527   }
02528   ExactLL.Def();  ApxLL.Def();
02529   //printf("EXACT:   %f  (%f)\n", ExactLL.GetMean(), ExactLL.GetSDev());
02530   printf("APPROX:  %f  (%f)\n", ApxLL.GetMean(), ApxLL.GetSDev());
02531   KronLL.SetOrderPerm();
02532   printf("Order permutation:\n");
02533   printf("  Exact  Graph LL:  %f\n", KronLL.CalcGraphLL());
02534   printf("  Approx Graph LL:  %f\n", KronLL.CalcApxGraphLL());
02535   }
02536   */
02537 
02538   // start from random permultation and sort it using bubble sort
02539   // compare the end result with ordered permutation
02540   //PKronecker KronParam = TKronMtx::GetMtx("0.9 0.6; 0.6 0.2");
02541   //PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 10, true, 1);
02542   /*PKronecker KronParam = TKronMtx::GetMtx("0.9 0.7; 0.9 0.5");
02543   PNGraph Graph = TKronMtx::GenFastKronecker(KronParam, 6, true, 2);
02544   TGAlg::SaveFullMtx(Graph, "kron32.tab");
02545 
02546   TKroneckerLL KronLL(Graph, KronParam);
02547   KronLL.SetOrderPerm();
02548   KronLL.LogLike = KronLL.CalcApxGraphLL();
02549   printf("  Approx Graph LL:   %f\n", KronLL.CalcApxGraphLL());
02550   printf("  swap 1-20:         %f\n", KronLL.SwapNodesLL(1, 20));
02551   printf("  swap 20-30:        %f\n", KronLL.SwapNodesLL(20, 30));
02552   printf("  swap 30-1:         %f\n", KronLL.SwapNodesLL(1, 30));
02553   printf("  swap 20-30:        %f\n", KronLL.SwapNodesLL(30, 20));
02554   IAssert(KronLL.GetPerm().IsSorted());
02555   KronLL.SetRndPerm();
02556   KronLL.LogLike = KronLL.CalcApxGraphLL();
02557   for (int i = 0; i < 1000000; i++) {
02558     const int nid1 = TInt::Rnd.GetUniDevInt(KronLL.Nodes);
02559     const int nid2 = TInt::Rnd.GetUniDevInt(KronLL.Nodes);
02560     printf("%3d]  swap LL:     %f\n", i, KronLL.SwapNodesLL(nid1, nid2));
02561   }
02562   printf("***   approx LL:   %f\n", KronLL.CalcApxGraphLL());
02563   printf("***    exact LL:   %f\n", KronLL.CalcGraphLL());
02564   */
02565   /*ExeTm.Tick();
02566   // bubble sort
02567   for (int i = 0; i < Graph->GetNodes()-1; i++) {
02568     for (int j = 1; j < Graph->GetNodes(); j++) {
02569       if (KronLL.GetPerm()[j-1] > KronLL.GetPerm()[j]) {
02570         const double oldLL = KronLL.GetLL();
02571         const double newLL = KronLL.SwapNodesLL(j-1, j);
02572         //const double trueLL = KronLL.CalcApxGraphLL();
02573         //printf("swap %3d - %3d:   old: %f   new: %f  true:%f\n",
02574         //    KronLL.GetPerm()[j-1], KronLL.GetPerm()[j], oldLL, newLL, trueLL);
02575       }
02576     }
02577   }
02578   //for (int i = 0; i < 100000; i++) {
02579   //  KronLL.SwapNodesLL(TInt::Rnd.GetUniDevInt(TMath::Pow2(16)), TInt::Rnd.GetUniDevInt(TMath::Pow2(16))); }
02580   printf("\nPermutation is %s\n", KronLL.GetPerm().IsSorted()? "SORTED" : "NOT SORTED");
02581   printf("  Swap   Graph LL:   %f\n", KronLL.GetLL());
02582   printf("  Approx Graph LL:   %f\n", KronLL.CalcApxGraphLL());
02583   KronLL.SetOrderPerm();
02584   printf("  Order  Graph LL:   %f\n\n", KronLL.CalcApxGraphLL());
02585   printf("Permutation is %s\n", KronLL.GetPerm().IsSorted()? "SORTED" : "NOT SORTED");
02586   printf("time: %f\n", ExeTm.GetSecs());
02587   */
02588   // evaluate the derivatives
02589   /*{ PNGraph Graph = TNGraph::New();
02590   TKronMtx KronParam = TKronMtx::GetMtx("0.8 0.4; 0.4 0.2");
02591   //for (uint i = 0; i < TMath::Pow2(4); i++) { Graph->AddNode(i); } //8k nodes
02592   Graph = TKronMtx::GenFastKronecker(KronParam, 8, true, 2); //TGAlg::SaveFullMtx(Graph, "kron16.txt");
02593   TKroneckerLL KronLL(Graph, KronParam);
02594   KronLL.SetOrderPerm();
02595   printf("\nNodes: %d\n", KronLL.GetNodes());
02596   printf("Full  Graph Exact LL:        %f\n", KronLL.GetFullGraphLL());
02597   printf("Empty Graph Exact LL:        %f\n", KronLL.GetEmptyGraphLL());
02598   printf("Empty Approx LL:             %f\n", KronLL.GetApxEmptyGraphLL());
02599   printf("Exact Graph LL:   %f\n", KronLL.CalcGraphLL());
02600   printf("Apx   Graph LL:   %f\n\n", KronLL.CalcApxGraphLL());
02601   // derivatives
02602   printf("Empty graph Exact DLL:           %f\n", KronLL.GetEmptyGraphDLL(0));
02603   printf("Empty graph Apx   DLL:           %f\n", KronLL.GetApxEmptyGraphDLL(0));
02604   printf("Theta0    edge(0,1) DLL:      %f\n", KronLL.LLMtx.GetEdgeDLL(0, 0, 1, 4));
02605   printf("Theta0 NO edge(0,1) DLL:      %f\n", KronLL.LLMtx.GetNoEdgeDLL(0, 0, 1, 4));
02606   printf("Theta0 NO edge(0,1) DLL:      %f\n", KronLL.LLMtx.GetApxNoEdgeDLL(0, 0, 1, 4));
02607   KronLL.CalcGraphDLL();  printf("Exact Theta0 DLL:");  KronLL.GetDLL(0);
02608   KronLL.CalcApxGraphDLL();  printf("Apx   Theta0 DLL:");  KronLL.GetDLL(0);
02609   KronLL.CalcFullApxGraphDLL();  printf("Apx   Theta0 DLL:");  KronLL.GetDLL(0);
02610   // swap
02611   */
02612   /*for (int i = 0; i < 100; i++) {
02613     const int A = TInt::Rnd.GetUniDevInt(KronLL.Nodes);
02614     KronLL.NodePerm.Swap(i, A);
02615     //KronLL.UpdateGraphDLL(i, A); printf("Fast  Theta0 DLL:");  KronLL.GetDLL(0);
02616     KronLL.CalcApxGraphDLL();  printf("Apx   Theta0 DLL:");  KronLL.GetDLL(0);
02617     //KronLL.CalcFullApxGraphDLL();  printf("Apx   Theta0 DLL:");  KronLL.GetDLL(0);
02618     //KronLL.CalcGraphDLL();  printf("Exact Theta0 DLL:");  KronLL.GetDLL(0);
02619     printf("\n");
02620   } */
02621   //}
02622 //}
02623 
02624 /*void TKroneckerLL::SampleGradient(const int& WarmUp, const int& NSamples, double& AvgLL, TFltV& AvgGradV, TFltV& SDevV, const bool& Plot) {
02625   printf("Samples: %s (warm-up: %s)\n", TInt::GetMegaStr(NSamples).CStr(), TInt::GetMegaStr(WarmUp).CStr());
02626   int NId1 = 0, NId2 = 0;
02627   TExeTm ExeTm;
02628   CalcApxGraphLL();
02629   for (int s = 0; s < WarmUp; s++) {
02630     SampleNextPerm(NId1, NId2); }
02631   printf("  warm-up:%s", ExeTm.GetTmStr());  ExeTm.Tick();
02632   CalcApxGraphLL();
02633   CalcApxGraphDLL();
02634   AvgLL = 0;
02635   TVec<TMom> DLLMomV(LLMtx.Len());
02636   for (int s = 0; s < NSamples; s++) {
02637     if (SampleNextPerm(NId1, NId2)) { // new permutation
02638       UpdateGraphDLL(NId1, NId2);
02639     }
02640     AvgLL += GetLL();
02641     for (int m = 0; m < LLMtx.Len(); m++) { DLLMomV[m].Add(GradV[m]); }
02642   }
02643   AvgLL = AvgLL / (NSamples*Nodes);
02644   // plot gradients over sampling time
02645   if (Plot) {
02646     TVec<TFltV> FltVV(LLMtx.Len()+1);
02647     for (int s = 0; s < DLLMomV[0].GetVals(); s += 1000) {
02648       for (int m = 0; m < LLMtx.Len(); m++) { FltVV[m].Add(DLLMomV[m].GetVal(s)); }
02649       FltVV.Last().Add(s); }
02650     const TStr FNm = TFile::GetUniqueFNm(TStr::Fmt("grad%dW%sS%s-#.png", KronIters, TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr()));
02651     TGnuPlot GP(FNm.GetFMid(), TStr::Fmt("Gradient vs. Sample Index. Nodes: %d, WarmUp: %s, Samples: %s, Avg LL: %f", Nodes,
02652       TInt::GetMegaStr(WarmUp).CStr(), TInt::GetMegaStr(NSamples).CStr(), AvgLL), true);
02653     for (int m = 0; m < LLMtx.Len(); m++) {
02654       GP.AddPlot(FltVV.Last(), FltVV[m], gpwLines, TStr::Fmt("Grad %d", m+1), "linewidth 5"); }
02655     GP.SetXYLabel("Sample Index (time)", "Log-likelihood gradient");
02656     GP.SavePng();
02657   }
02658   // average gradients
02659   printf("  sampling:%s\n", ExeTm.GetTmStr());
02660   printf("  AverageLL: %f\n", AvgLL);
02661   printf("Gradients:\n");
02662   AvgGradV.Gen(LLMtx.Len());
02663   SDevV.Gen(LLMtx.Len());
02664   for (int m = 0; m < LLMtx.Len(); m++) {
02665     DLLMomV[m].Def();
02666     AvgGradV[m] = DLLMomV[m].GetMean() / (Nodes*Nodes);
02667     SDevV[m] = DLLMomV[m].GetSDev() / (Nodes*Nodes);
02668     printf("  %d]  mean: %16f    sDev: %16f\n", m, AvgGradV[m], SDevV[m]);
02669   }
02670 }
02671 
02672 void TKronMaxLL::TFunc::FDeriv(const TFltV& Point, TFltV& GradV) {
02673   CallBack->GetDLL(Point, GradV);
02674   for (int i = 0; i < GradV.Len(); i++) { GradV[i] = -GradV[i]; }
02675 }
02676 
02677 double TKronMaxLL::TLogBarFunc::FVal(const TFltV& Point) {
02678   // log-likelihood
02679   const double LogLL = CallBack->GetLL(Point);
02680   // log-barrier
02681   const double MinBarrier = 0.05;
02682   const double MaxBarrier = 0.95;
02683   const double T1 = 1.0/T;
02684   double Barrier = 0.0;
02685   for (int i = 0; i < Point.Len(); i++) {
02686     if(Point[i].Val > MinBarrier && Point[i].Val < MaxBarrier) {
02687       Barrier += - T1 * (log(Point[i]-MinBarrier) + log(MaxBarrier-Point[i])); //log-barrier
02688     } else { Barrier = 1e5; }
02689   }
02690   IAssert(Barrier > 0.0);
02691   printf("barrrier: %f\n", Barrier);
02692   return -LogLL + Barrier; // minus LL since we want to maximize it
02693 }
02694 
02695 void TKronMaxLL::TLogBarFunc::FDeriv(const TFltV& Point, TFltV& DerivV) {
02696   // derivative of log-likelihood
02697   CallBack->GetDLL(Point, DerivV);
02698   // derivative of log barrier
02699   const double MinBarrier = 0.05;
02700   const double MaxBarrier = 0.95;
02701   const double T1 = 1.0/T;
02702   for (int i = 0; i < Point.Len(); i++) {
02703     DerivV[i] = - DerivV[i] + (- T1*(1.0/(Point[i]-MinBarrier) - 1.0/(MaxBarrier-Point[i])));
02704   }
02705 }
02706 */