SNAP Library , User Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "stdafx.h" 00002 #include "mag.h" 00003 00004 TRnd TMAGNodeSimple::Rnd = TRnd(0); 00005 TRnd TMAGNodeBern::Rnd = TRnd(0); 00006 TRnd TMAGNodeBeta::Rnd = TRnd(0); 00007 00008 00010 // MAG affinity matrix 00011 00012 const double TMAGAffMtx::NInf = -DBL_MAX; 00013 00014 TMAGAffMtx::TMAGAffMtx(const TFltV& SeedMatrix) : SeedMtx(SeedMatrix) { 00015 MtxDim = (int) sqrt((double)SeedMatrix.Len()); 00016 IAssert(MtxDim*MtxDim == SeedMtx.Len()); 00017 } 00018 00019 TMAGAffMtx& TMAGAffMtx::operator = (const TMAGAffMtx& Kronecker) { 00020 if (this != &Kronecker){ 00021 MtxDim=Kronecker.MtxDim; 00022 SeedMtx=Kronecker.SeedMtx; 00023 } 00024 return *this; 00025 } 00026 00027 bool TMAGAffMtx::IsProbMtx() const { 00028 for (int i = 0; i < Len(); i++) { 00029 if (At(i) < 0.0 || At(i) > 1.0) return false; 00030 } 00031 return true; 00032 } 00033 00034 void TMAGAffMtx::SetRndMtx(TRnd& Rnd, const int& PrmMtxDim, const double& MinProb) { 00035 MtxDim = PrmMtxDim; 00036 SeedMtx.Gen(MtxDim*MtxDim); 00037 for (int p = 0; p < SeedMtx.Len(); p++) { 00038 do { 00039 SeedMtx[p] = Rnd.GetUniDev(); 00040 } while (SeedMtx[p] < MinProb); 00041 } 00042 } 00043 00044 void TMAGAffMtx::SetEpsMtx(const double& Eps1, const double& Eps0, const int& Eps1Val, const int& Eps0Val) { 00045 for (int i = 0; i < Len(); i++) { 00046 double& Val = At(i); 00047 if (Val == Eps1Val) Val = double(Eps1); 00048 else if (Val == Eps0Val) Val = double(Eps0); 00049 } 00050 } 00051 00052 void TMAGAffMtx::AddRndNoise(TRnd& Rnd, const double& SDev) { 00053 Dump("before"); 00054 double NewVal; 00055 int c =0; 00056 for (int i = 0; i < Len(); i++) { 00057 for(c = 0; ((NewVal = At(i)*Rnd.GetNrmDev(1, SDev, 0.8, 1.2)) < 0.01 || NewVal>0.99) && c <1000; c++) { } 00058 if (c < 999) { At(i) = NewVal; } else { printf("XXXXX\n"); } 00059 } 00060 Dump("after"); 00061 } 00062 00063 TStr TMAGAffMtx::GetMtxStr() const { 00064 TChA ChA("["); 00065 for (int i = 0; i < Len(); i++) { 00066 ChA += TStr::Fmt("%g", At(i)); 00067 if ((i+1)%GetDim()==0 && (i+1<Len())) { ChA += "; "; } 00068 else if (i+1<Len()) { ChA += " "; } 00069 } 00070 ChA += "]"; 00071 return TStr(ChA); 00072 } 00073 00074 void TMAGAffMtx::GetLLMtx(TMAGAffMtx& LLMtx) { 00075 LLMtx.GenMtx(MtxDim); 00076 for (int i = 0; i < Len(); i++) { 00077 if (At(i) != 0.0) { LLMtx.At(i) = log(At(i)); } 00078 else { LLMtx.At(i) = NInf; } 00079 } 00080 } 00081 00082 void TMAGAffMtx::GetProbMtx(TMAGAffMtx& ProbMtx) { 00083 ProbMtx.GenMtx(MtxDim); 00084 for (int i = 0; i < Len(); i++) { 00085 if (At(i) != NInf) { ProbMtx.At(i) = exp(At(i)); } 00086 else { ProbMtx.At(i) = 0.0; } 00087 } 00088 } 00089 00090 void TMAGAffMtx::Swap(TMAGAffMtx& Mtx) { 00091 ::Swap(MtxDim, Mtx.MtxDim); 00092 SeedMtx.Swap(Mtx.SeedMtx); 00093 } 00094 00095 double TMAGAffMtx::GetMtxSum() const { 00096 double Sum = 0; 00097 for (int i = 0; i < Len(); i++) { 00098 Sum += At(i); } 00099 return Sum; 00100 } 00101 00102 double TMAGAffMtx::GetRowSum(const int& RowId) const { 00103 double Sum = 0; 00104 for (int c = 0; c < GetDim(); c++) { 00105 Sum += At(RowId, c); } 00106 return Sum; 00107 } 00108 00109 double TMAGAffMtx::GetColSum(const int& ColId) const { 00110 double Sum = 0; 00111 for (int r = 0; r < GetDim(); r++) { 00112 Sum += At(r, ColId); } 00113 return Sum; 00114 } 00115 00116 double TMAGAffMtx::Normalize() { 00117 double Sum = GetMtxSum(); 00118 if(Sum == 0) { 00119 return 0; 00120 } 00121 00122 for(int i = 0; i < Len(); i++) { 00123 At(i) = At(i) / Sum; 00124 } 00125 return Sum; 00126 } 00127 00128 void TMAGAffMtx::Dump(const TStr& MtxNm, const bool& Sort) const { 00129 /*printf("%s: %d x %d\n", MtxNm.Empty()?"Mtx":MtxNm.CStr(), GetDim(), GetDim()); 00130 for (int r = 0; r < GetDim(); r++) { 00131 for (int c = 0; c < GetDim(); c++) { printf(" %8.2g", At(r, c)); } 00132 printf("\n"); 00133 }*/ 00134 if (! MtxNm.Empty()) printf("%s\n", MtxNm.CStr()); 00135 double Sum=0.0; 00136 TFltV ValV = SeedMtx; 00137 if (Sort) { ValV.Sort(false); } 00138 for (int i = 0; i < ValV.Len(); i++) { 00139 printf(" %10.4g", ValV[i]()); 00140 Sum += ValV[i]; 00141 if ((i+1) % GetDim() == 0) { printf("\n"); } 00142 } 00143 printf(" (sum:%.4f)\n", Sum); 00144 } 00145 00146 // average difference in the parameters 00147 double TMAGAffMtx::GetAvgAbsErr(const TMAGAffMtx& Mtx1, const TMAGAffMtx& Mtx2) { 00148 TFltV P1 = Mtx1.GetMtx(); 00149 TFltV P2 = Mtx2.GetMtx(); 00150 IAssert(P1.Len() == P2.Len()); 00151 P1.Sort(); P2.Sort(); 00152 double delta = 0.0; 00153 for (int i = 0; i < P1.Len(); i++) { 00154 delta += fabs(P1[i] - P2[i]); 00155 } 00156 return delta/P1.Len(); 00157 } 00158 00159 // average L2 difference in the parameters 00160 double TMAGAffMtx::GetAvgFroErr(const TMAGAffMtx& Mtx1, const TMAGAffMtx& Mtx2) { 00161 TFltV P1 = Mtx1.GetMtx(); 00162 TFltV P2 = Mtx2.GetMtx(); 00163 IAssert(P1.Len() == P2.Len()); 00164 P1.Sort(); P2.Sort(); 00165 double delta = 0.0; 00166 for (int i = 0; i < P1.Len(); i++) { 00167 delta += pow(P1[i] - P2[i], 2); 00168 } 00169 return sqrt(delta/P1.Len()); 00170 } 00171 00172 // get matrix from matlab matrix notation 00173 TMAGAffMtx TMAGAffMtx::GetMtx(TStr MatlabMtxStr) { 00174 TStrV RowStrV, ColStrV; 00175 MatlabMtxStr.ChangeChAll(',', ' '); 00176 MatlabMtxStr.SplitOnAllCh(';', RowStrV); IAssert(! RowStrV.Empty()); 00177 RowStrV[0].SplitOnWs(ColStrV); IAssert(! ColStrV.Empty()); 00178 const int Rows = RowStrV.Len(); 00179 const int Cols = ColStrV.Len(); 00180 IAssert(Rows == Cols); 00181 TMAGAffMtx Mtx(Rows); 00182 for (int r = 0; r < Rows; r++) { 00183 RowStrV[r].SplitOnWs(ColStrV); 00184 IAssert(ColStrV.Len() == Cols); 00185 for (int c = 0; c < Cols; c++) { 00186 Mtx.At(r, c) = (double) ColStrV[c].GetFlt(); } 00187 } 00188 return Mtx; 00189 } 00190 00191 TMAGAffMtx TMAGAffMtx::GetRndMtx(TRnd& Rnd, const int& Dim, const double& MinProb) { 00192 TMAGAffMtx Mtx; 00193 Mtx.SetRndMtx(Rnd, Dim, MinProb); 00194 return Mtx; 00195 } 00196 00197 00199 // Simple MAG node attributes (Same Bernoulli for every attribute) 00200 00201 void TMAGNodeSimple::AttrGen(TIntVV& AttrVV, const int& NNodes) { 00202 IAssert(Dim > 0); 00203 AttrVV.Gen(NNodes, Dim); 00204 AttrVV.PutAll(0); 00205 00206 for(int i = 0; i < NNodes; i++) { 00207 for(int l = 0; l < Dim; l++) { 00208 if((TMAGNodeSimple::Rnd).GetUniDev() > Mu) { 00209 AttrVV.At(i, l) = 1; 00210 } 00211 } 00212 } 00213 } 00214 00215 void TMAGNodeSimple::LoadTxt(const TStr& InFNm) { 00216 FILE *fp = fopen(InFNm.CStr(), "r"); 00217 IAssertR(fp != NULL, "File does not exist: " + InFNm); 00218 00219 char buf[128]; 00220 char *token; 00221 TStr TokenStr; 00222 TFlt Val; 00223 00224 token = strtok(buf, "&"); 00225 token = strtok(token, " \t"); 00226 TokenStr = TStr(token); 00227 Mu = TokenStr.GetFlt(Val); 00228 00229 fclose(fp); 00230 } 00231 00232 void TMAGNodeSimple::SaveTxt(TStrV& OutStrV) const { 00233 OutStrV.Gen(Dim, 0); 00234 00235 for(int i = 0; i < Dim; i++) { 00236 OutStrV.Add(TStr::Fmt("%f", double(Mu))); 00237 } 00238 } 00239 00240 00242 // MAG node attributes with (different) Bernoulli 00243 00244 TMAGNodeBern& TMAGNodeBern::operator=(const TMAGNodeBern& Dist) { 00245 MuV = Dist.MuV; 00246 Dim = Dist.Dim; 00247 return (*this); 00248 } 00249 00250 void TMAGNodeBern::AttrGen(TIntVV& AttrVV, const int& NNodes) { 00251 IAssert(Dim > 0); 00252 AttrVV.Gen(NNodes, Dim); 00253 AttrVV.PutAll(0); 00254 00255 for(int i = 0; i < NNodes; i++) { 00256 for(int l = 0; l < Dim; l++) { 00257 if((TMAGNodeBern::Rnd).GetUniDev() > MuV[l]) { 00258 AttrVV.At(i, l) = 1; 00259 } 00260 } 00261 } 00262 } 00263 00264 void TMAGNodeBern::LoadTxt(const TStr& InFNm) { 00265 FILE *fp = fopen(InFNm.CStr(), "r"); 00266 IAssertR(fp != NULL, "File does not exist: " + InFNm); 00267 00268 Dim = 0; 00269 MuV.Gen(10, 0); 00270 00271 char buf[128]; 00272 char *token; 00273 TStr TokenStr; 00274 TFlt Val; 00275 00276 while(fgets(buf, sizeof(buf), fp) != NULL) { 00277 token = strtok(buf, "&"); 00278 token = strtok(token, " \t"); 00279 TokenStr = TStr(token); 00280 MuV.Add(TokenStr.GetFlt(Val)); 00281 } 00282 Dim = MuV.Len(); 00283 00284 fclose(fp); 00285 } 00286 00287 void TMAGNodeBern::SaveTxt(TStrV& OutStrV) const { 00288 OutStrV.Gen(Dim, 0); 00289 00290 for(int i = 0; i < Dim; i++) { 00291 OutStrV.Add(TStr::Fmt("%f", double(MuV[i]))); 00292 } 00293 } 00294 00295 00297 // MAG node attributes with Beta + Bernoulli family 00298 00299 TMAGNodeBeta& TMAGNodeBeta::operator=(const TMAGNodeBeta& Dist) { 00300 AlphaV = Dist.AlphaV; 00301 BetaV = Dist.BetaV; 00302 Dim = Dist.Dim; 00303 MuV = Dist.MuV; 00304 Dirty = Dist.Dirty; 00305 return (*this); 00306 } 00307 00308 void TMAGNodeBeta::SetBeta(const int& Attr, const double& Alpha, const double& Beta) { 00309 IAssert(Attr < Dim); 00310 AlphaV[Attr] = Alpha; 00311 BetaV[Attr] = Beta; 00312 Dirty = true; 00313 } 00314 00315 void TMAGNodeBeta::SetBetaV(const TFltV& _AlphaV, const TFltV& _BetaV) { 00316 IAssert(_AlphaV.Len() == _BetaV.Len()); 00317 AlphaV = _AlphaV; 00318 BetaV = _BetaV; 00319 Dim = _AlphaV.Len(); 00320 Dirty = true; 00321 } 00322 00323 void TMAGNodeBeta::AttrGen(TIntVV& AttrVV, const int& NNodes) { 00324 IAssert(Dim > 0); 00325 AttrVV.Gen(NNodes, Dim); 00326 AttrVV.PutAll(0); 00327 00328 // printf("AlphaV = %d, BetaV = %d, MuV = %d\n", AlphaV.Len(), BetaV.Len(), MuV.Len()); 00329 00330 for(int i = 0; i < NNodes; i++) { 00331 for(int l = 0; l < Dim; l++) { 00332 double x = TMAGNodeBeta::Rnd.GetGammaDev((int)AlphaV[l]); 00333 double y = TMAGNodeBeta::Rnd.GetGammaDev((int)BetaV[l]); 00334 MuV[l] = x / (x + y); 00335 if((TMAGNodeBeta::Rnd).GetUniDev() > MuV[l]) { 00336 AttrVV.At(i, l) = 1; 00337 } 00338 } 00339 } 00340 Dirty = false; 00341 } 00342 00343 void TMAGNodeBeta::LoadTxt(const TStr& InFNm) { 00344 FILE *fp = fopen(InFNm.CStr(), "r"); 00345 IAssertR(fp != NULL, "File does not exist: " + InFNm); 00346 00347 Dim = 0; 00348 AlphaV.Gen(10, 0); 00349 BetaV.Gen(10, 0); 00350 00351 char buf[128]; 00352 char *token; 00353 TStr TokenStr; 00354 TFlt Val; 00355 00356 while(fgets(buf, sizeof(buf), fp) != NULL) { 00357 token = strtok(buf, "&"); 00358 00359 token = strtok(token, " \t"); 00360 TokenStr = TStr(token); 00361 AlphaV.Add(TokenStr.GetFlt(Val)); 00362 00363 token = strtok(NULL, " \t"); 00364 TokenStr = TStr(token); 00365 BetaV.Add(TokenStr.GetFlt(Val)); 00366 00367 Dim++; 00368 } 00369 00370 fclose(fp); 00371 } 00372 00373 void TMAGNodeBeta::SaveTxt(TStrV& OutStrV) const { 00374 OutStrV.Gen(Dim, 0); 00375 00376 for(int i = 0; i < Dim; i++) { 00377 OutStrV.Add(TStr::Fmt("%f %f", double(AlphaV[i]), double(BetaV[i]))); 00378 } 00379 } 00380 00381 00383 // MAGFit with Bernoulli node attributes 00384 00385 void TMAGFitBern::SetGraph(const PNGraph& GraphPt) { 00386 Graph = GraphPt; 00387 bool NodesOk = true; 00388 // check that nodes IDs are {0,1,..,Nodes-1} 00389 for (int nid = 0; nid < Graph->GetNodes(); nid++) { 00390 if (! Graph->IsNode(nid)) { NodesOk=false; break; } } 00391 if (! NodesOk) { 00392 TIntV NIdV; GraphPt->GetNIdV(NIdV); 00393 Graph = TSnap::GetSubGraph(GraphPt, NIdV, true); 00394 for (int nid = 0; nid < Graph->GetNodes(); nid++) { 00395 IAssert(Graph->IsNode(nid)); } 00396 } 00397 } 00398 00399 void TMAGFitBern::SetPhiVV(const TIntVV& AttrVV, const int KnownIds) { 00400 const int NNodes = Param.GetNodes(); 00401 const int NAttrs = Param.GetAttrs(); 00402 00403 PhiVV.Gen(NNodes, NAttrs); 00404 KnownVV.Gen(NNodes, NAttrs); 00405 00406 for(int l = 0; l < NAttrs; l++) { 00407 for(int i = 0; i < NNodes; i++) { 00408 if(int(AttrVV(i, l)) == 0) { 00409 PhiVV(i, l) = 0.9999; 00410 } else { 00411 PhiVV(i, l) = 0.0001; 00412 } 00413 } 00414 00415 if(l < KnownIds) { 00416 KnownVV.PutY(l, true); 00417 } else { 00418 KnownVV.PutY(l, false); 00419 } 00420 } 00421 } 00422 00423 void TMAGFitBern::SaveTxt(const TStr& FNm) { 00424 const int NNodes = Param.GetNodes(); 00425 const int NAttrs = Param.GetAttrs(); 00426 const TFltV MuV = GetMuV(); 00427 TMAGAffMtxV MtxV; 00428 Param.GetMtxV(MtxV); 00429 00430 FILE *fp = fopen(FNm.GetCStr(), "w"); 00431 for(int l = 0; l < NAttrs; l++) { 00432 fprintf(fp, "%.4f\t", double(MuV[l])); 00433 for(int row = 0; row < 2; row++) { 00434 for(int col = 0; col < 2; col++) { 00435 fprintf(fp, " %.4f", double(MtxV[l].At(row, col))); 00436 } 00437 fprintf(fp, (row == 0) ? ";" : "\n"); 00438 } 00439 } 00440 fclose(fp); 00441 00442 fp = fopen((FNm + "f").CStr(), "w"); 00443 for(int i = 0; i < NNodes; i++) { 00444 for(int l = 0; l < NAttrs; l++) { 00445 fprintf(fp, "%f ", double(PhiVV(i, l))); 00446 } 00447 fprintf(fp, "\n"); 00448 } 00449 fclose(fp); 00450 } 00451 00452 void TMAGFitBern::Init(const TFltV& MuV, const TMAGAffMtxV& AffMtxV) { 00453 TMAGNodeBern DistParam(MuV); 00454 Param.SetNodeAttr(DistParam); 00455 Param.SetMtxV(AffMtxV); 00456 00457 const int NNodes = Param.GetNodes(); 00458 const int NAttrs = Param.GetAttrs(); 00459 00460 PhiVV.Gen(NNodes, NAttrs); 00461 KnownVV.Gen(NNodes, NAttrs); 00462 KnownVV.PutAll(false); 00463 } 00464 00465 #if 0 00466 void TMAGFitBern::PerturbInit(const TFltV& MuV, const TMAGAffMtxV& AffMtxV, const double& PerturbRate) { 00467 IAssert(PerturbRate < 1.0); 00468 00469 TFltV InitMuV = MuV; //InitMuV.PutAll(0.5); 00470 TMAGNodeBern DistParam(InitMuV); 00471 Param.SetMtxV(AffMtxV); 00472 TRnd& Rnd = TMAGNodeBern::Rnd; 00473 TMAGAffMtxV PerturbMtxV = AffMtxV; 00474 00475 const int NNodes = Param.GetNodes(); 00476 const int NAttrs = Param.GetAttrs(); 00477 00478 for(int l = 0; l < NAttrs; l++) { 00479 double Mu = MuV[l] + PerturbRate * (Rnd.GetUniDev() - 0.5) * 2; 00480 // double Mu = 0.5; 00481 if(Mu < 0.01) { Mu = 0.01; } 00482 if(Mu > 0.99) { Mu = 0.99; } 00483 DistParam.SetMu(l, Mu); 00484 // PhiVV.PutY(l, MuV[l] + Perturb); 00485 TMAGAffMtx AffMtx(AffMtxV[l]); 00486 for(int p = 0; p < 4; p++) { 00487 AffMtx.At(p) += PerturbRate * (Rnd.GetUniDev() - 0.5) * 2; 00488 if(AffMtx.At(p) < 0.05) { AffMtx.At(p) = 0.05; } 00489 if(AffMtx.At(p) > 0.95) { AffMtx.At(p) = 0.95; } 00490 } 00491 AffMtx.At(0, 1) = AffMtx.At(1, 0); 00492 PerturbMtxV[l] = AffMtx; 00493 } 00494 // NormalizeAffMtxV(PerturbMtxV); 00495 00496 printf("\n"); 00497 for(int l = 0; l < NAttrs; l++) { 00498 printf("Mu = %.3f ", DistParam.GetMu(l)); 00499 printf("AffMtx = %s\n", PerturbMtxV[l].GetMtxStr().GetCStr()); 00500 } 00501 Param.SetMtxV(PerturbMtxV); 00502 Param.SetNodeAttr(DistParam); 00503 00504 PhiVV.Gen(NNodes, NAttrs); 00505 KnownVV.Gen(NNodes, NAttrs); 00506 KnownVV.PutAll(false); 00507 } 00508 #endif // 0 00509 00510 void TMAGFitBern::RandomInit(const TFltV& MuV, const TMAGAffMtxV& AffMtxV, const int& Seed) { 00511 TRnd& Rnd = TMAGNodeBern::Rnd; 00512 Rnd.PutSeed(Seed); 00513 00514 TFltV InitMuV = MuV; InitMuV.PutAll(0.5); 00515 TMAGNodeBern DistParam(InitMuV); 00516 Param.SetMtxV(AffMtxV); 00517 00518 const int NNodes = Param.GetNodes(); 00519 const int NAttrs = Param.GetAttrs(); 00520 00521 PhiVV.Gen(NNodes, NAttrs); 00522 KnownVV.Gen(NNodes, NAttrs); 00523 KnownVV.PutAll(false); 00524 00525 for(int i = 0; i < NNodes; i++) { 00526 for(int l = 0; l < NAttrs; l++) { 00527 PhiVV.At(i, l) = Rnd.GetUniDev(); 00528 // PhiVV.At(i, l) = 0.5; 00529 } 00530 } 00531 00532 TMAGAffMtxV RndMtxV = AffMtxV; 00533 for(int l = 0; l < NAttrs; l++) { 00534 for(int p = 0; p < 4; p++) { 00535 RndMtxV[l].At(p) = TMAGNodeBern::Rnd.GetUniDev(); 00536 if(RndMtxV[l].At(p) < 0.1) { RndMtxV[l].At(p) = 0.1; } 00537 if(RndMtxV[l].At(p) > 0.9) { RndMtxV[l].At(p) = 0.9; } 00538 } 00539 RndMtxV[l].At(0, 1) = RndMtxV[l].At(1, 0); 00540 } 00541 00542 printf("\n"); 00543 for(int l = 0; l < NAttrs; l++) { 00544 printf("AffMtx = %s\n", RndMtxV[l].GetMtxStr().GetCStr()); 00545 } 00546 Param.SetMtxV(RndMtxV); 00547 Param.SetNodeAttr(DistParam); 00548 } 00549 00550 const double TMAGFitBern::GetInCoeff(const int& i, const int& j, const int& l, const int& A, const TMAGAffMtx& Theta) const { 00551 return (PhiVV.At(i, l) * Theta.At(0, A) + (1.0 - PhiVV.At(i, l)) * Theta.At(1, A)); 00552 } 00553 00554 const double TMAGFitBern::GetOutCoeff(const int& i, const int& j, const int& l, const int& A, const TMAGAffMtx& Theta) const { 00555 return (PhiVV.At(j, l) * Theta.At(A, 0) + (1.0 - PhiVV.At(j, l)) * Theta.At(A, 1)); 00556 } 00557 00558 const double TMAGFitBern::GetAvgInCoeff(const int& i, const int& AId, const int& A, const TMAGAffMtx& Theta) const { 00559 const int NNodes = Param.GetNodes(); 00560 const double Mu_l = AvgPhiV[AId] / double(NNodes); 00561 return (Mu_l * Theta.At(0, A) + (1.0 - Mu_l) * Theta.At(1, A)); 00562 } 00563 00564 const double TMAGFitBern::GetAvgOutCoeff(const int& i, const int& AId, const int& A, const TMAGAffMtx& Theta) const { 00565 const int NNodes = Param.GetNodes(); 00566 const double Mu_l = AvgPhiV[AId] / double(NNodes); 00567 return (Mu_l * Theta.At(A, 0) + (1.0 - Mu_l) * Theta.At(A, 1)); 00568 } 00569 00570 const double TMAGFitBern::GetProbPhi(const int& NId1, const int& NId2, const int& AId, const int& Attr1, const int& Attr2) const { 00571 double Prob1 = (Attr1 == 0) ? double(PhiVV.At(NId1, AId)) : (1.0 - PhiVV.At(NId1, AId)); 00572 double Prob2 = (Attr2 == 0) ? double(PhiVV.At(NId2, AId)) : (1.0 - PhiVV.At(NId2, AId)); 00573 return (Prob1 * Prob2); 00574 } 00575 00576 const double TMAGFitBern::GetProbMu(const int& NId1, const int& NId2, const int& AId, const int& Attr1, const int& Attr2, const bool Left, const bool Right) const { 00577 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00578 // double Mu = DistParam.GetMu(AId); 00579 double Mu = AvgPhiV[AId] / double(Param.GetNodes()); 00580 double Prob1 = (Left) ? double(PhiVV.At(NId1, AId)) : double(Mu); 00581 double Prob2 = (Right)? double(PhiVV.At(NId2, AId)) : double(Mu); 00582 Prob1 = (Attr1 == 0) ? Prob1 : 1.0 - Prob1; 00583 Prob2 = (Attr2 == 0) ? Prob2 : 1.0 - Prob2; 00584 return (Prob1 * Prob2); 00585 } 00586 00587 const double TMAGFitBern::GetThetaLL(const int& NId1, const int& NId2, const int& AId) const { 00588 double LL = 0.0; 00589 const TMAGAffMtx& Mtx = Param.GetMtx(AId); 00590 for(int A1 = 0; A1 < 2; A1++) { 00591 for(int A2 = 0; A2 < 2; A2++) { 00592 LL += GetProbPhi(NId1, NId2, AId, A1, A2) * Mtx.At(A1, A2); 00593 } 00594 } 00595 return log(LL); 00596 } 00597 00598 const double TMAGFitBern::GetAvgThetaLL(const int& NId1, const int& NId2, const int& AId, const bool Left, const bool Right) const { 00599 double LL = 0.0; 00600 const TMAGAffMtx& Mtx = Param.GetMtx(AId); 00601 for(int A1 = 0; A1 < 2; A1++) { 00602 for(int A2 = 0; A2 < 2; A2++) { 00603 LL += GetProbMu(NId1, NId2, AId, A1, A2, Left, Right) * Mtx.At(A1, A2); 00604 } 00605 } 00606 return log(LL); 00607 } 00608 00609 const double TMAGFitBern::GetSqThetaLL(const int& NId1, const int& NId2, const int& AId) const { 00610 double LL = 0.0; 00611 const TMAGAffMtx& Mtx = Param.GetMtx(AId); 00612 for(int A1 = 0; A1 < 2; A1++) { 00613 for(int A2 = 0; A2 < 2; A2++) { 00614 LL += GetProbPhi(NId1, NId2, AId, A1, A2) * Mtx.At(A1, A2) * Mtx.At(A1, A2); 00615 } 00616 } 00617 return log(LL); 00618 } 00619 00620 const double TMAGFitBern::GetAvgSqThetaLL(const int& NId1, const int& NId2, const int& AId, const bool Left, const bool Right) const { 00621 double LL = 0.0; 00622 const TMAGAffMtx& Mtx = Param.GetMtx(AId); 00623 for(int A1 = 0; A1 < 2; A1++) { 00624 for(int A2 = 0; A2 < 2; A2++) { 00625 LL += GetProbMu(NId1, NId2, AId, A1, A2, Left, Right) * Mtx.At(A1, A2) * Mtx.At(A1, A2); 00626 } 00627 } 00628 return log(LL); 00629 } 00630 00631 const double TMAGFitBern::GetProdLinWeight(const int& NId1, const int& NId2) const { 00632 const int NAttrs = Param.GetAttrs(); 00633 double LL = 0.0; 00634 00635 for(int l = 0; l < NAttrs; l++) { 00636 LL += GetThetaLL(NId1, NId2, l); 00637 } 00638 // return LL; 00639 return LL + log(NormConst); 00640 } 00641 00642 const double TMAGFitBern::GetAvgProdLinWeight(const int& NId1, const int& NId2, const bool Left, const bool Right) const { 00643 const int NAttrs = Param.GetAttrs(); 00644 double LL = 0.0; 00645 00646 for(int l = 0; l < NAttrs; l++) { 00647 LL += GetAvgThetaLL(NId1, NId2, l, Left, Right); 00648 } 00649 // return LL; 00650 return LL + log(NormConst); 00651 } 00652 00653 const double TMAGFitBern::GetProdSqWeight(const int& NId1, const int& NId2) const { 00654 const int NAttrs = Param.GetAttrs(); 00655 double LL = 0.0; 00656 00657 for(int l = 0; l < NAttrs; l++) { 00658 LL += GetSqThetaLL(NId1, NId2, l); 00659 } 00660 // return LL; 00661 return LL + 2 * log(NormConst); 00662 } 00663 00664 const double TMAGFitBern::GetAvgProdSqWeight(const int& NId1, const int& NId2, const bool Left, const bool Right) const { 00665 const int NAttrs = Param.GetAttrs(); 00666 double LL = 0.0; 00667 00668 for(int l = 0; l < NAttrs; l++) { 00669 LL += GetAvgSqThetaLL(NId1, NId2, l, Left, Right); 00670 } 00671 // return LL; 00672 return LL + 2 * log(NormConst); 00673 } 00674 00675 const double LogSumExp(const double LogVal1, const double LogVal2) { 00676 double MaxExp = (LogVal1 > LogVal2) ? LogVal1 : LogVal2; 00677 double Sum = exp(LogVal1 - MaxExp) + exp(LogVal2 - MaxExp); 00678 return (log(Sum) + MaxExp); 00679 } 00680 00681 const double LogSumExp(const TFltV& LogValV) { 00682 const int Len = LogValV.Len(); 00683 double MaxExp = -DBL_MAX; 00684 00685 for(int i = 0; i < Len; i++) { 00686 if(MaxExp < LogValV[i]) { MaxExp = LogValV[i]; } 00687 } 00688 00689 double Sum = 0.0; 00690 for(int i = 0; i < Len; i++) { 00691 Sum += exp(LogValV[i] - MaxExp); 00692 } 00693 00694 return (log(Sum) + MaxExp); 00695 } 00696 00697 const double LogSumExp(const double *LogValArray, const int Len) { 00698 TFltV TmpV(Len); 00699 for(int i = 0; i < Len; i++) { TmpV[i] = LogValArray[i]; } 00700 return LogSumExp(TmpV); 00701 } 00702 00703 const double TMAGFitBern::GradPhiMI(const double& x, const int& NId, const int& AId, const double& Lambda, const double& DeltaQ, const TFltVV& CntVV) { 00704 const int NAttrs = CntVV.GetYDim(); 00705 double Grad = DeltaQ - log(x) + log(1.0-x); 00706 00707 for(int l = 0; l < NAttrs; l++) { 00708 if(l == AId) { continue; } 00709 const double C0 = PhiVV(NId, l); 00710 const double C1 = 1.0 - C0; 00711 Grad -= Lambda * C0 * log(CntVV(0, l) + C0 * x); 00712 Grad -= Lambda * C1 * log(CntVV(1, l) + C1 * x); 00713 Grad += Lambda * C0 * log(CntVV(2, l) + C0 * (1-x)); 00714 Grad += Lambda * C1 * log(CntVV(3, l) + C1 * (1-x)); 00715 Grad -= Lambda * log(CntVV(0, l) + CntVV(1, l) + x); 00716 Grad += Lambda * log(CntVV(2, l) + CntVV(3, l) + (1-x)); 00717 } 00718 00719 return Grad; 00720 } 00721 00722 const double TMAGFitBern::ObjPhiMI(const double& x, const int& NId, const int& AId, const double& Lambda, const double& Q0, const double& Q1, const TFltVV& CntVV) { 00723 const int NAttrs = CntVV.GetYDim(); 00724 double Val = x*(Q0 - log(x)) + (1-x)*(Q1 - log(1.0-x)); 00725 00726 for(int l = 0; l < NAttrs; l++) { 00727 if(l == AId) { continue; } 00728 const double C0 = PhiVV(NId, l); 00729 const double C1 = 1.0 - C0; 00730 Val -= Lambda * (CntVV(0, l) + C0 * x) * log(CntVV(0, l) + C0 * x); 00731 Val -= Lambda * (CntVV(1, l) + C1 * x) * log(CntVV(1, l) + C1 * x); 00732 Val -= Lambda * (CntVV(2, l) + C0 * (1-x)) * log(CntVV(2, l) + C0 * (1-x)); 00733 Val -= Lambda * (CntVV(3, l) + C1 * (1-x)) * log(CntVV(3, l) + C1 * (1-x)); 00734 Val += Lambda * (CntVV(0, l) + CntVV(1, l) + x) * log(CntVV(0, l) + CntVV(1, l) + x); 00735 Val += Lambda * (CntVV(2, l) + CntVV(3, l) + 1 - x) * log(CntVV(2, l) + CntVV(3, l) + (1-x)); 00736 00737 if(!(CntVV(0, l) > 0)) printf("CntVV(0, %d) = %.2f\n", l, double(CntVV(0, l))); 00738 if(!(CntVV(1, l) > 0)) printf("CntVV(1, %d) = %.2f\n", l, double(CntVV(1, l))); 00739 if(!(CntVV(2, l) > 0)) printf("CntVV(2, %d) = %.2f\n", l, double(CntVV(2, l))); 00740 if(!(CntVV(3, l) > 0)) printf("CntVV(3, %d) = %.2f\n", l, double(CntVV(3, l))); 00741 } 00742 00743 return Val; 00744 } 00745 00746 const double TMAGFitBern::GetEstNoEdgeLL(const int& NId, const int& AId) const { 00747 // const int NNodes = Param.GetNodes(); 00748 // const int NAttrs = Param.GetAttrs(); 00749 00750 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00751 double LL = 0.0; 00752 00753 return LL; 00754 } 00755 00756 const double TMAGFitBern::UpdatePhi(const int& NId, const int& AId, double& Phi) { 00757 TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId); 00758 TMAGAffMtx SqTheta(Theta); 00759 const int NNodes = Param.GetNodes(); 00760 // const int NAttrs = Param.GetAttrs(); 00761 Theta.GetLLMtx(LLTheta); 00762 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00763 const double Mu = DistParam.GetMu(AId); 00764 00765 for(int i = 0; i < Theta.Len(); i++) { 00766 SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i); 00767 } 00768 00769 // Using log-sum-exp trick 00770 double EdgeQ[2], NonEdgeQ[2], MaxExp[2]; 00771 TFltV NonEdgeLLV[2]; 00772 for(int i = 0; i < 2; i++) { 00773 EdgeQ[i] = 0.0; 00774 NonEdgeQ[i] = 0.0; 00775 MaxExp[i] = -DBL_MAX; 00776 NonEdgeLLV[i].Gen(4 * NNodes, 0); 00777 } 00778 00779 for(int j = 0; j < NNodes; j++) { 00780 if(j == NId) { continue; } 00781 00782 if(Graph->IsEdge(NId, j)) { 00783 EdgeQ[0] += GetOutCoeff(NId, j, AId, 0, LLTheta); 00784 EdgeQ[1] += GetOutCoeff(NId, j, AId, 1, LLTheta); 00785 } else { 00786 double LinW = GetProdLinWeight(NId, j) - GetThetaLL(NId, j, AId); 00787 double SqW = GetProdSqWeight(NId, j) - GetSqThetaLL(NId, j, AId); 00788 00789 for(int i = 0; i < 2; i++) { 00790 NonEdgeLLV[i].Add(LinW + log(GetOutCoeff(NId, j, AId, i, Theta))); 00791 NonEdgeLLV[i].Add(SqW + log(GetOutCoeff(NId, j, AId, i, SqTheta)) + log(0.5)); 00792 } 00793 } 00794 00795 if(Graph->IsEdge(j, NId)) { 00796 EdgeQ[0] += GetInCoeff(j, NId, AId, 0, LLTheta); 00797 EdgeQ[1] += GetInCoeff(j, NId, AId, 1, LLTheta); 00798 } else { 00799 double LinW = GetProdLinWeight(j, NId) - GetThetaLL(j, NId, AId); 00800 double SqW = GetProdSqWeight(j, NId) - GetSqThetaLL(j, NId, AId); 00801 00802 for(int i = 0; i < 2; i++) { 00803 NonEdgeLLV[i].Add(LinW + log(GetInCoeff(j, NId, AId, i, Theta))); 00804 NonEdgeLLV[i].Add(SqW + log(GetInCoeff(j, NId, AId, i, SqTheta)) + log(0.5)); 00805 } 00806 } 00807 } 00808 00809 NonEdgeQ[0] = LogSumExp(NonEdgeLLV[0]); 00810 NonEdgeQ[1] = LogSumExp(NonEdgeLLV[1]); 00811 00812 double Q[2]; 00813 Q[0] = log(Mu) + EdgeQ[0] - exp(NonEdgeQ[0]); 00814 Q[1] = log(1.0 - Mu) + EdgeQ[1] - exp(NonEdgeQ[1]); 00815 // double Q = Q1 - Q0; 00816 // printf(" [Phi_{%d}{%d}] :: Q0 = %f, Q1 = %f\n", NId, AId, Q0, Q1); 00817 00818 // Phi = 1.0 / (1.0 + exp(Q)); 00819 Phi = Q[0] - LogSumExp(Q, 2); 00820 Phi = exp(Phi); 00821 00822 return Phi - PhiVV.At(NId, AId); 00823 } 00824 00825 00826 const double TMAGFitBern::UpdatePhiMI(const double& Lambda, const int& NId, const int& AId, double& Phi) { 00827 TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId); 00828 TMAGAffMtx SqTheta(Theta); 00829 const int NNodes = Param.GetNodes(); 00830 const int NAttrs = Param.GetAttrs(); 00831 Theta.GetLLMtx(LLTheta); 00832 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00833 const double Mu = DistParam.GetMu(AId); 00834 00835 for(int i = 0; i < Theta.Len(); i++) { 00836 SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i); 00837 } 00838 00839 // Using log-sum-exp trick 00840 double EdgeQ[2], NonEdgeQ[2], MaxExp[2]; 00841 TFltV NonEdgeLLV[2]; 00842 TFltVV CntVV(4, NAttrs); CntVV.PutAll(0.0); 00843 for(int i = 0; i < 2; i++) { 00844 EdgeQ[i] = 0.0; 00845 NonEdgeQ[i] = 0.0; 00846 MaxExp[i] = -DBL_MAX; 00847 NonEdgeLLV[i].Gen(4 * NNodes, 0); 00848 } 00849 00850 for(int j = 0; j < NNodes; j++) { 00851 if(j == NId) { continue; } 00852 00853 for(int l = 0; l < NAttrs; l++) { 00854 if(l == AId) { continue; } 00855 CntVV(0, l) = CntVV(0, l) + PhiVV(j, AId) * PhiVV(j, l); 00856 CntVV(1, l) = CntVV(1, l) + PhiVV(j, AId) * (1.0-PhiVV(j, l)); 00857 CntVV(2, l) = CntVV(2, l) + (1.0-PhiVV(j, AId)) * PhiVV(j, l); 00858 CntVV(3, l) = CntVV(3, l) + (1.0-PhiVV(j, AId)) * (1.0-PhiVV(j, l)); 00859 } 00860 00861 if(Graph->IsEdge(NId, j)) { 00862 EdgeQ[0] += GetOutCoeff(NId, j, AId, 0, LLTheta); 00863 EdgeQ[1] += GetOutCoeff(NId, j, AId, 1, LLTheta); 00864 } else { 00865 double LinW = GetProdLinWeight(NId, j) - GetThetaLL(NId, j, AId); 00866 double SqW = GetProdSqWeight(NId, j) - GetSqThetaLL(NId, j, AId); 00867 00868 for(int i = 0; i < 2; i++) { 00869 NonEdgeLLV[i].Add(LinW + log(GetOutCoeff(NId, j, AId, i, Theta))); 00870 NonEdgeLLV[i].Add(SqW + log(GetOutCoeff(NId, j, AId, i, SqTheta)) + log(0.5)); 00871 } 00872 } 00873 00874 if(Graph->IsEdge(j, NId)) { 00875 EdgeQ[0] += GetInCoeff(j, NId, AId, 0, LLTheta); 00876 EdgeQ[1] += GetInCoeff(j, NId, AId, 1, LLTheta); 00877 } else { 00878 double LinW = GetProdLinWeight(j, NId) - GetThetaLL(j, NId, AId); 00879 double SqW = GetProdSqWeight(j, NId) - GetSqThetaLL(j, NId, AId); 00880 00881 for(int i = 0; i < 2; i++) { 00882 NonEdgeLLV[i].Add(LinW + log(GetInCoeff(j, NId, AId, i, Theta))); 00883 NonEdgeLLV[i].Add(SqW + log(GetInCoeff(j, NId, AId, i, SqTheta)) + log(0.5)); 00884 } 00885 } 00886 } 00887 00888 NonEdgeQ[0] = LogSumExp(NonEdgeLLV[0]); 00889 NonEdgeQ[1] = LogSumExp(NonEdgeLLV[1]); 00890 00891 double Q[2]; 00892 Q[0] = log(Mu) + EdgeQ[0] - exp(NonEdgeQ[0]); 00893 Q[1] = log(1.0 - Mu) + EdgeQ[1] - exp(NonEdgeQ[1]); 00894 double DeltaQ = Q[0] - Q[1]; 00895 00896 // double x[] = {0.1, 0.3, 0.5, 0.7, 0.9}; 00897 double x[] = {PhiVV(NId, AId)}; 00898 // for(int n = 0; n < 5; n++) { 00899 for(int n = 0; n < 1; n++) { 00900 // double LrnRate = 0.0002; 00901 double LrnRate = 0.001; 00902 for(int step = 0; step < 200; step++) { 00903 double Grad = GradPhiMI(x[n], NId, AId, Lambda, DeltaQ, CntVV); 00904 if(Grad > 0.0) { x[n] += LrnRate; } 00905 else { x[n] -= LrnRate; } 00906 if(x[n] > 0.9999) { x[n] = 0.9999; } 00907 if(x[n] < 0.0001) { x[n] = 0.0001; } 00908 LrnRate *= 0.995; 00909 } 00910 } 00911 00912 double MaxVal = -DBL_MAX; 00913 int MaxX = -1; 00914 // for(int n = 0; n < 5; n++) { 00915 for(int n = 0; n < 1; n++) { 00916 double Val = ObjPhiMI(x[n], NId, AId, Lambda, Q[0], Q[1], CntVV); 00917 if(Val > MaxVal) { 00918 MaxVal = Val; 00919 MaxX = n; 00920 } 00921 } 00922 IAssert(MaxX >= 0); 00923 00924 Phi = x[MaxX]; 00925 00926 return Phi - PhiVV.At(NId, AId); 00927 } 00928 00929 00930 const double TMAGFitBern::UpdateApxPhiMI(const double& Lambda, const int& NId, const int& AId, double& Phi, TFltVV& ProdVV) { 00931 TMAGAffMtx LLTheta, Theta = Param.GetMtx(AId); 00932 const int NNodes = Param.GetNodes(); 00933 const int NAttrs = Param.GetAttrs(); 00934 Theta.GetLLMtx(LLTheta); 00935 TMAGNodeBern DistParam = Param.GetNodeAttr(); 00936 const double Mu = DistParam.GetMu(AId); 00937 00938 TMAGAffMtx SqTheta(Theta); 00939 for(int i = 0; i < Theta.Len(); i++) { 00940 SqTheta.At(i) = SqTheta.At(i) * SqTheta.At(i); 00941 } 00942 00943 TFltV ProdV; ProdVV.GetRow(NId, ProdV); 00944 ProdV[0] -= GetAvgThetaLL(NId, NId, AId, true, false); 00945 ProdV[1] -= GetAvgThetaLL(NId, NId, AId, false, true); 00946 ProdV[2] -= log(2.0) + GetAvgSqThetaLL(NId, NId, AId, true, false); 00947 ProdV[3] -= log(2.0) + GetAvgSqThetaLL(NId, NId, AId, false, true); 00948 00949 // Using log-sum-exp trick 00950 double EdgeQ[2], MaxExp[2]; 00951 TFltV NonEdgeLLV[2]; 00952 TFltVV CntVV(4, NAttrs); CntVV.PutAll(0.0); 00953 for(int i = 0; i < 2; i++) { 00954 EdgeQ[i] = 0.0; 00955 MaxExp[i] = -DBL_MAX; 00956 NonEdgeLLV[i].Gen(4 * NNodes, 0); 00957 } 00958 00959 for(int F = 0; F < 2; F++) { 00960 NonEdgeLLV[F].Add(ProdV[0] + log(GetAvgOutCoeff(NId, AId, F, Theta))); 00961 NonEdgeLLV[F].Add(ProdV[1] + log(GetAvgInCoeff(NId, AId, F, Theta))); 00962 NonEdgeLLV[F].Add(ProdV[2] + log(GetAvgOutCoeff(NId, AId, F, SqTheta))); 00963 NonEdgeLLV[F].Add(ProdV[3] + log(GetAvgInCoeff(NId, AId, F, SqTheta))); 00964 } 00965 EdgeQ[0] = -(NNodes - 1) * exp(LogSumExp(NonEdgeLLV[0])); 00966 EdgeQ[1] = -(NNodes - 1) * exp(LogSumExp(NonEdgeLLV[1])); 00967 00968 00969 for(int l = 0; l < NAttrs; l++) { 00970 if(l == AId) { continue; } 00971 int BgId = (AId > l) ? AId : l; 00972 int SmId = (AId + l) - BgId; 00973 int SmL = (l < AId) ? 1 : 0; 00974 BgId *= 4; 00975 CntVV(0, l) = AvgPhiPairVV(SmId, BgId) - PhiVV(NId, AId) * PhiVV(NId, l); 00976 CntVV(1+SmL, l) = AvgPhiPairVV(SmId, BgId+1+SmL) - PhiVV(NId, AId) * (1.0-PhiVV(NId, l)); 00977 CntVV(2-SmL, l) = AvgPhiPairVV(SmId, BgId+2-SmL) - (1.0-PhiVV(NId, AId)) * PhiVV(NId, l); 00978 CntVV(3, l) = AvgPhiPairVV(SmId, BgId+3) - (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, l)); 00979 } 00980 00981 TNGraph::TNodeI NI = Graph->GetNI(NId); 00982 for(int d = 0; d < NI.GetOutDeg(); d++) { 00983 int Out = NI.GetOutNId(d); 00984 if(NId == Out) { continue; } 00985 double LinW = GetProdLinWeight(NId, Out) - GetThetaLL(NId, Out, AId); 00986 double SqW = GetProdSqWeight(NId, Out) - GetSqThetaLL(NId, Out, AId); 00987 00988 for(int F = 0; F < 2; F++) { 00989 EdgeQ[F] += GetOutCoeff(NId, Out, AId, F, LLTheta); 00990 EdgeQ[F] += exp(LinW + log(GetOutCoeff(NId, Out, AId, F, Theta))); 00991 EdgeQ[F] += 0.5 * exp(SqW + log(GetOutCoeff(NId, Out, AId, F, SqTheta))); 00992 } 00993 } 00994 for(int d = 0; d < NI.GetInDeg(); d++) { 00995 int In = NI.GetInNId(d); 00996 if(NId == In) { continue; } 00997 double LinW = GetProdLinWeight(In, NId) - GetThetaLL(In, NId, AId); 00998 double SqW = GetProdSqWeight(In, NId) - GetSqThetaLL(In, NId, AId); 00999 01000 for(int F = 0; F < 2; F++) { 01001 EdgeQ[F] += GetInCoeff(In, NId, AId, F, LLTheta); 01002 EdgeQ[F] += exp(LinW + log(GetInCoeff(In, NId, AId, F, Theta))); 01003 EdgeQ[F] += 0.5 * exp(SqW + log(GetInCoeff(In, NId, AId, F, SqTheta))); 01004 } 01005 } 01006 01007 EdgeQ[0] += log(Mu); 01008 EdgeQ[1] += log(1.0 - Mu); 01009 double DeltaQ = EdgeQ[0] - EdgeQ[1]; 01010 // printf("(%d, %d) :: Q[0] = %f, Q[1] = %f\n", NId, AId, EdgeQ[0], EdgeQ[1]); 01011 01012 // double x[] = {0.1, 0.3, 0.5, 0.7, 0.9}; 01013 double x[] = {PhiVV(NId, AId)}; 01014 TFltV ObjValV; ObjValV.Gen(60, 0); 01015 // for(int n = 0; n < 5; n++) { 01016 for(int n = 0; n < 1; n++) { 01017 // double LrnRate = 0.0002; 01018 double LrnRate = 0.001; 01019 for(int step = 0; step < 50; step++) { 01020 // for(int step = 0; step < 10; step++) { 01021 double Grad = GradPhiMI(x[n], NId, AId, Lambda, DeltaQ, CntVV); 01022 if(Grad > 0.0) { x[n] += LrnRate; } 01023 else { x[n] -= LrnRate; } 01024 if(x[n] > 0.9999) { x[n] = 0.9999; } 01025 if(x[n] < 0.0001) { x[n] = 0.0001; } 01026 if(x[n] == 0.9999 || x[n] == 0.0001) { 01027 break; 01028 } 01029 LrnRate *= 0.995; 01030 } 01031 ObjValV.Add(x[n]); 01032 // ObjValV.Add(PhiVV(NId, AId)); 01033 } 01034 01035 double MaxVal = -DBL_MAX; 01036 int MaxX = -1; 01037 // for(int n = 0; n < 5; n++) { 01038 for(int n = 0; n < ObjValV.Len(); n++) { 01039 double Val = ObjPhiMI(ObjValV[n], NId, AId, Lambda, EdgeQ[0], EdgeQ[1], CntVV); 01040 if(Val > MaxVal) { 01041 MaxVal = Val; 01042 MaxX = n; 01043 } else if(MaxX < 0) { 01044 printf("(%d, %d) : %f Q[0] = %f Q[1] = %f Val = %f\n", NId, AId, double(x[n]), double(EdgeQ[0]), double(EdgeQ[1]), Val); 01045 } 01046 } 01047 IAssert(MaxX >= 0); 01048 01049 Phi = ObjValV[MaxX]; 01050 01051 return Phi - PhiVV.At(NId, AId); 01052 } 01053 01054 01055 01056 double TMAGFitBern::DoEStepOneIter(const TFltV& TrueMuV, TFltVV& NewPhiVV, const double& Lambda) { 01057 const int NNodes = Param.GetNodes(); 01058 const int NAttrs = Param.GetAttrs(); 01059 double MaxDelta = 0, L1 = 0; 01060 double Val; 01061 TFltIntIntTrV NewVal; 01062 int RndCount = 0; 01063 // double OldMI = 0.0, NewMI = 0.0; 01064 TFltV MuV(NAttrs); MuV.PutAll(0.0); 01065 TIntV NIndV(NNodes), AIndV(NAttrs); 01066 01067 // Update Phi 01068 /* 01069 for(int i = 0; i < NNodes; i++) { NIndV[i] = i; } 01070 for(int l = 0; l < NAttrs; l++) { AIndV[l] = l; } 01071 if(Randomized) { 01072 NIndV.Shuffle(TMAGNodeBern::Rnd); 01073 AIndV.Shuffle(TMAGNodeBern::Rnd); 01074 } 01075 */ 01076 01077 NewVal.Gen(NAttrs * 2); 01078 for(int i = 0; i < NNodes; i++) { 01079 // const int NId = NIndV[i]%NNodes; 01080 for(int l = 0; l < NAttrs * 2; l++) { 01081 const int NId = TMAGNodeBern::Rnd.GetUniDevInt(NNodes); 01082 const int AId = TMAGNodeBern::Rnd.GetUniDevInt(NAttrs); 01083 // const int AId = AIndV[l]%NAttrs; 01084 // double Delta = UpdatePhi(NId, AId, Val); 01085 double Delta = 0.0; 01086 if(KnownVV(NId, AId)) { 01087 Val = PhiVV.At(NId, AId); 01088 } else { 01089 Delta = UpdatePhiMI(Lambda, NId, AId, Val); 01090 } 01091 01092 // PhiVV.At(NId, AId) = Val; 01093 NewVal[l] = TFltIntIntTr(Val, NId, AId); 01094 01095 // MuV[AId] = MuV[AId] + Val; 01096 if(fabs(Delta) > MaxDelta) { 01097 MaxDelta = fabs(Delta); 01098 } 01099 if(Val > 0.3 && Val < 0.7) { RndCount++; } 01100 } 01101 01102 for(int l = 0; l < NAttrs * 2; l++) { 01103 const int NId = NewVal[l].Val2; 01104 const int AId = NewVal[l].Val3; 01105 PhiVV.At(NId, AId) = NewVal[l].Val1; 01106 } 01107 } 01108 for(int i = 0; i < NNodes; i++) { 01109 for(int l = 0; l < NAttrs; l++) { 01110 MuV[l] = MuV[l] + PhiVV.At(i, l); 01111 } 01112 } 01113 for(int l = 0; l < NAttrs; l++) { 01114 MuV[l] = MuV[l] / double(NNodes); 01115 } 01116 01117 TFltV SortMuV = MuV; 01118 double Avg = 0.0; 01119 SortMuV.Sort(false); 01120 for(int l = 0; l < NAttrs; l++) { 01121 printf(" F[%d] = %.3f", l, double(MuV[l])); 01122 Avg += SortMuV[l]; 01123 L1 += fabs(TrueMuV[l] - SortMuV[l]); 01124 } 01125 printf("\n"); 01126 printf(" Rnd = %d(%.3f)", RndCount, double(RndCount) / double(NNodes * NAttrs)); 01127 printf(" Avg = %.3f\n", Avg / double(NAttrs)); 01128 L1 /= double(NAttrs); 01129 01130 return L1; 01131 } 01132 01133 01134 double TMAGFitBern::DoEStepApxOneIter(const TFltV& TrueMuV, TFltVV& NewPhiVV, const double& Lambda) { 01135 const int NNodes = Param.GetNodes(); 01136 const int NAttrs = Param.GetAttrs(); 01137 double MaxDelta = 0, L1 = 0; 01138 double Val; 01139 TFltIntIntTrV NewVal; 01140 int RndCount = 0; 01141 // double OldMI = 0.0, NewMI = 0.0; 01142 TFltV MuV(NAttrs); MuV.PutAll(0.0); 01143 TFltVV ProdVV(NNodes, 4); ProdVV.PutAll(0.0); 01144 TIntV NIndV(NNodes), AIndV(NAttrs); 01145 01146 // Update Phi 01147 /* 01148 for(int i = 0; i < NNodes; i++) { NIndV[i] = i; } 01149 for(int l = 0; l < NAttrs; l++) { AIndV[l] = l; } 01150 if(Randomized) { 01151 NIndV.Shuffle(TMAGNodeBern::Rnd); 01152 AIndV.Shuffle(TMAGNodeBern::Rnd); 01153 } 01154 */ 01155 01156 AvgPhiV.Gen(NAttrs); AvgPhiV.PutAll(0.0); 01157 AvgPhiPairVV.Gen(NAttrs, 4*NAttrs); AvgPhiPairVV.PutAll(0.0); 01158 for(int i = 0; i < NNodes; i++) { 01159 for(int l = 0; l < NAttrs; l++) { 01160 for(int p = l+1; p < NAttrs; p++) { 01161 int index = 4 * p; 01162 AvgPhiPairVV(l, index) += PhiVV(i, l) * PhiVV(i, p); 01163 AvgPhiPairVV(l, index+1) += PhiVV(i, l) * (1.0-PhiVV(i, p)); 01164 AvgPhiPairVV(l, index+2) += (1.0-PhiVV(i, l)) * PhiVV(i, p); 01165 AvgPhiPairVV(l, index+3) += (1.0-PhiVV(i, l)) * (1.0-PhiVV(i, p)); 01166 } 01167 AvgPhiV[l] += PhiVV(i, l); 01168 } 01169 } 01170 for(int i = 0; i < NNodes; i++) { 01171 ProdVV(i, 0) = GetAvgProdLinWeight(i, i, true, false); 01172 ProdVV(i, 1) = GetAvgProdLinWeight(i, i, false, true); 01173 ProdVV(i, 2) = GetAvgProdSqWeight(i, i, true, false); 01174 ProdVV(i, 3) = GetAvgProdSqWeight(i, i, false, true); 01175 } 01176 01177 const int Iter = 3; 01178 int NId; 01179 01180 NewVal.Gen(NAttrs * Iter); 01181 for(int i = 0; i < NNodes * Iter; i++) { 01182 for(int l = 0; l < NAttrs; l++) { 01183 const int AId = TMAGNodeBern::Rnd.GetUniDevInt(NAttrs); 01184 double Delta = 0.0; 01185 if(KnownVV(NId, AId)) { 01186 Val = PhiVV.At(NId, AId); 01187 } else { 01188 Delta = UpdateApxPhiMI(Lambda, NId, AId, Val, ProdVV); 01189 } 01190 01191 // PhiVV.At(NId, AId) = Val; 01192 NewVal[l] = TFltIntIntTr(Val, NId, AId); 01193 01194 // MuV[AId] = MuV[AId] + Val; 01195 if(fabs(Delta) > MaxDelta) { 01196 MaxDelta = fabs(Delta); 01197 } 01198 if(Val > 0.3 && Val < 0.7) { RndCount++; } 01199 } 01200 01201 for(int l = 0; l < NAttrs; l++) { 01202 const int NId = NewVal[l].Val2; 01203 const int AId = NewVal[l].Val3; 01204 01205 ProdVV(NId, 0) -= GetAvgThetaLL(NId, NId, AId, true, false); 01206 ProdVV(NId, 1) -= GetAvgThetaLL(NId, NId, AId, false, true); 01207 ProdVV(NId, 2) -= GetAvgSqThetaLL(NId, NId, AId, true, false); 01208 ProdVV(NId, 3) -= GetAvgSqThetaLL(NId, NId, AId, false, true); 01209 for(int p = 0; p < NAttrs; p++) { 01210 if(p > AId) { 01211 int index = 4 * p; 01212 AvgPhiPairVV(AId, index) -= PhiVV(NId, AId) * PhiVV(NId, p); 01213 AvgPhiPairVV(AId, index+1) -= PhiVV(NId, AId) * (1.0-PhiVV(NId, p)); 01214 AvgPhiPairVV(AId, index+2) -= (1.0-PhiVV(NId, AId)) * PhiVV(NId, p); 01215 AvgPhiPairVV(AId, index+3) -= (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, p)); 01216 } else if (p < AId) { 01217 int index = 4 * AId; 01218 AvgPhiPairVV(p, index) -= PhiVV(NId, p) * PhiVV(NId, AId); 01219 AvgPhiPairVV(p, index+1) -= PhiVV(NId, p) * (1.0-PhiVV(NId, AId)); 01220 AvgPhiPairVV(p, index+2) -= (1.0-PhiVV(NId, p)) * PhiVV(NId, AId); 01221 AvgPhiPairVV(p, index+3) -= (1.0-PhiVV(NId, p)) * (1.0-PhiVV(NId, AId)); 01222 } 01223 } 01224 AvgPhiV[AId] -= PhiVV(NId, AId); 01225 01226 PhiVV.At(NId, AId) = NewVal[l].Val1; 01227 01228 ProdVV(NId, 0) += GetAvgThetaLL(NId, NId, AId, true, false); 01229 ProdVV(NId, 1) += GetAvgThetaLL(NId, NId, AId, false, true); 01230 ProdVV(NId, 2) += GetAvgSqThetaLL(NId, NId, AId, true, false); 01231 ProdVV(NId, 3) += GetAvgSqThetaLL(NId, NId, AId, false, true); 01232 for(int p = 0; p < NAttrs; p++) { 01233 if(p > AId) { 01234 int index = 4 * p; 01235 AvgPhiPairVV(AId, index) += PhiVV(NId, AId) * PhiVV(NId, p); 01236 AvgPhiPairVV(AId, index+1) += PhiVV(NId, AId) * (1.0-PhiVV(NId, p)); 01237 AvgPhiPairVV(AId, index+2) += (1.0-PhiVV(NId, AId)) * PhiVV(NId, p); 01238 AvgPhiPairVV(AId, index+3) += (1.0-PhiVV(NId, AId)) * (1.0-PhiVV(NId, p)); 01239 } else if (p < AId) { 01240 int index = 4 * AId; 01241 AvgPhiPairVV(p, index) += PhiVV(NId, p) * PhiVV(NId, AId); 01242 AvgPhiPairVV(p, index+1) += PhiVV(NId, p) * (1.0-PhiVV(NId, AId)); 01243 AvgPhiPairVV(p, index+2) += (1.0-PhiVV(NId, p)) * PhiVV(NId, AId); 01244 AvgPhiPairVV(p, index+3) += (1.0-PhiVV(NId, p)) * (1.0-PhiVV(NId, AId)); 01245 } 01246 } 01247 AvgPhiV[AId] += PhiVV(NId, AId); 01248 } 01249 } 01250 01251 for(int l = 0; l < NAttrs; l++) { 01252 MuV[l] = AvgPhiV[l] / double(NNodes); 01253 } 01254 01255 TFltV SortMuV = MuV; 01256 double Avg = 0.0; 01257 // SortMuV.Sort(false); 01258 for(int l = 0; l < NAttrs; l++) { 01259 printf(" F[%d] = %.3f", l, double(MuV[l])); 01260 Avg += SortMuV[l]; 01261 // L1 += fabs(TrueMuV[l] - SortMuV[l]); 01262 } 01263 printf("\n"); 01264 printf(" Rnd = %d(%.3f)", RndCount, double(RndCount) / double(NNodes * NAttrs)); 01265 printf(" Avg = %.3f\n", Avg / double(NAttrs)); 01266 // printf(" Linf = %f\n", MaxDelta); 01267 // L1 /= double(NAttrs); 01268 01269 return L1; 01270 } 01271 01272 double TMAGFitBern::DoEStep(const TFltV& TrueMuV, const int& NIter, double& LL, const double& Lambda) { 01273 const int NNodes = Param.GetNodes(); 01274 const int NAttrs = Param.GetAttrs(); 01275 TFltVV NewPhiVV(NNodes, NAttrs); 01276 // double MI; 01277 01278 TFltV Delta(NIter); 01279 01280 for(int i = 0; i < NIter; i++) { 01281 TExeTm IterTm; 01282 printf("EStep iteration : %d\n", (i+1)); 01283 if(ESpeedUp) { 01284 Delta[i] = DoEStepApxOneIter(TrueMuV, NewPhiVV, Lambda); 01285 } else { 01286 Delta[i] = DoEStepOneIter(TrueMuV, NewPhiVV, Lambda); 01287 } 01288 // PhiVV = NewPhiVV; 01289 01290 printf(" (Time = %s)\n", IterTm.GetTmStr()); 01291 } 01292 printf("\n"); 01293 01294 NewPhiVV.Clr(); 01295 01296 return Delta.Last(); 01297 } 01298 01299 const double TMAGFitBern::UpdateMu(const int& AId) { 01300 const int NNodes = Param.GetNodes(); 01301 TMAGNodeBern DistParam = Param.GetNodeAttr(); 01302 const double OldMu = DistParam.GetMu(AId); 01303 double NewMu = 0.0; 01304 01305 for(int i = 0; i < NNodes; i++) { 01306 NewMu += PhiVV.At(i, AId); 01307 } 01308 AvgPhiV[AId] = NewMu; 01309 NewMu /= double(NNodes); 01310 01311 printf(" [Posterior Mu] = %.4f\n", NewMu); 01312 01313 double Delta = fabs(NewMu - OldMu); 01314 DistParam.SetMu(AId, NewMu); 01315 Param.SetNodeAttr(DistParam); 01316 01317 return Delta; 01318 } 01319 01320 const void TMAGFitBern::GradAffMtx(const int& AId, const TFltVV& ProdVV, const TFltVV& SqVV, const TMAGAffMtx& CurMtx, TFltV& GradV) { 01321 const int NNodes = Param.GetNodes(); 01322 // const int NAttrs = Param.GetAttrs(); 01323 GradV.PutAll(0.0); 01324 01325 for(int i = 0; i < NNodes; i++) { 01326 for(int j = 0; j < NNodes; j++) { 01327 double Prod = ProdVV(i, j) - GetThetaLL(i, j, AId); 01328 double Sq = SqVV(i, j) - GetSqThetaLL(i, j, AId); 01329 01330 for(int p = 0; p < 4; p++) { 01331 int Ai = p / 2; 01332 int Aj = p % 2; 01333 double Prob = GetProbPhi(i, j, AId, Ai, Aj); 01334 if(Graph->IsEdge(i, j)) { 01335 GradV[p] += Prob / CurMtx.At(p); 01336 } else { 01337 GradV[p] -= Prob * exp(Prod); 01338 GradV[p] -= Prob * exp(Sq) * CurMtx.At(p); 01339 } 01340 } 01341 } 01342 } 01343 } 01344 01345 01346 const void TMAGFitBern::GradApxAffMtx(const int& AId, const TFltVV& ProdVV, const TFltVV& SqVV, const TMAGAffMtx& CurMtx, TFltV& GradV) { 01347 const int NNodes = Param.GetNodes(); 01348 // const int NAttrs = Param.GetAttrs(); 01349 // const int NSq = NNodes * (NNodes - 1); 01350 GradV.PutAll(0.0); 01351 01352 TFltV LogSumV; 01353 for(int p = 0; p < 4; p++) { 01354 int Ai = p / 2; 01355 int Aj = p % 2; 01356 LogSumV.Gen(NNodes * 4, 0); 01357 01358 for(int i = 0; i < NNodes; i++) { 01359 const double LProd = ProdVV(i, 0) - GetAvgThetaLL(i, i, AId, true, false); 01360 const double LSq = SqVV(i, 0) - GetAvgSqThetaLL(i, i, AId, true, false); 01361 const double RProd = ProdVV(i, 1) - GetAvgThetaLL(i, i, AId, false, true); 01362 const double RSq = SqVV(i, 1) - GetAvgSqThetaLL(i, i, AId, false, true); 01363 01364 LogSumV.Add(LProd + log(GetProbMu(i, i, AId, Ai, Aj, true, false))); 01365 LogSumV.Add(LSq + log(GetProbMu(i, i, AId, Ai, Aj, true, false)) + log(CurMtx.At(p))); 01366 LogSumV.Add(RProd + log(GetProbMu(i, i, AId, Ai, Aj, false, true))); 01367 LogSumV.Add(RSq + log(GetProbMu(i, i, AId, Ai, Aj, false, true)) + log(CurMtx.At(p))); 01368 } 01369 double LogSum = LogSumExp(LogSumV); 01370 GradV[p] -= (NNodes - 1) * 0.5 * exp(LogSum); 01371 } 01372 01373 for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 01374 const int NId1 = EI.GetSrcNId(); 01375 const int NId2 = EI.GetDstNId(); 01376 const double ProdOne = GetProdLinWeight(NId1, NId2) - GetThetaLL(NId1, NId2, AId); 01377 const double SqOne = GetProdSqWeight(NId1, NId2) - GetSqThetaLL(NId1, NId2, AId); 01378 01379 for(int p = 0; p < 4; p++) { 01380 int Ai = p / 2; 01381 int Aj = p % 2; 01382 double Prob = GetProbPhi(NId1, NId2, AId, Ai, Aj); 01383 GradV[p] += Prob / CurMtx.At(p); 01384 GradV[p] += Prob * exp(ProdOne); 01385 GradV[p] += Prob * exp(SqOne) * CurMtx.At(p); 01386 } 01387 } 01388 01389 #if 0 01390 const double Prod = ProdVV(0, 0) - GetAvgThetaLL(0, 0, AId, false, false); 01391 const double Sq = SqVV(0, 0) - GetAvgSqThetaLL(0, 0, AId, false, false); 01392 for(int p = 0; p < 4; p++) { 01393 int Ai = p / 2; 01394 int Aj = p % 2; 01395 GradV[p] -= NSq * exp(Prod) * GetProbMu(0, 0, AId, Ai, Aj, false, false); 01396 GradV[p] -= NSq * exp(Sq) * GetProbMu(0, 0, AId, Ai, Aj, false, false) * CurMtx.At(p); 01397 } 01398 01399 for(TNGraph::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) { 01400 const int NId1 = EI.GetSrcNId(); 01401 const int NId2 = EI.GetDstNId(); 01402 const double ProdOne = GetProdLinWeight(NId1, NId2) - GetThetaLL(NId1, NId2, AId); 01403 const double SqOne = GetProdSqWeight(NId1, NId2) - GetSqThetaLL(NId1, NId2, AId); 01404 01405 for(int p = 0; p < 4; p++) { 01406 int Ai = p / 2; 01407 int Aj = p % 2; 01408 double Prob = GetProbPhi(NId1, NId2, AId, Ai, Aj); 01409 // GradV[p] += Prob / CurMtx.At(p); 01410 // GradV[p] += Prob * exp(ProdOne); 01411 // GradV[p] += Prob * exp(SqOne) * CurMtx.At(p); 01412 } 01413 } 01414 #endif 01415 } 01416 01417 01418 const double TMAGFitBern::UpdateAffMtx(const int& AId, const double& LrnRate, const double& MaxGrad, const double& Lambda, TFltVV& ProdVV, TFltVV& SqVV, TMAGAffMtx& NewMtx) { 01419 double Delta = 0.0; 01420 // const int NNodes = Param.GetNodes(); 01421 // const int NAttrs = Param.GetAttrs(); 01422 TMAGAffMtx AffMtx = Param.GetMtx(AId); 01423 01424 TFltV GradV(4); 01425 TFltV HessV(4); 01426 if(MSpeedUp) { 01427 GradApxAffMtx(AId, ProdVV, SqVV, NewMtx, GradV); 01428 } else { 01429 GradAffMtx(AId, ProdVV, SqVV, NewMtx, GradV); 01430 } 01431 01432 double Ratio = 1.0; 01433 for(int p = 0; p < 4; p++) { 01434 if(fabs(Ratio * LrnRate * GradV[p]) > MaxGrad) { 01435 Ratio = MaxGrad / fabs(LrnRate * GradV[p]); 01436 } 01437 } 01438 01439 for(int p = 0; p < 4; p++) { 01440 GradV[p] *= (Ratio * LrnRate); 01441 NewMtx.At(p) = AffMtx.At(p) + GradV[p]; 01442 // if(NewMtx.At(p) > 0.9999) { NewMtx.At(p) = 0.9999; } 01443 if(NewMtx.At(p) < 0.0001) { NewMtx.At(p) = 0.0001; } 01444 } 01445 01446 printf(" [Attr = %d]\n", AId); 01447 printf(" %s + [%f, %f; %f %f] -----> %s\n", (AffMtx.GetMtxStr()).GetCStr(), double(GradV[0]), double(GradV[1]), double(GradV[2]), double(GradV[3]), (NewMtx.GetMtxStr()).GetCStr()); 01448 01449 // Param.SetMtx(AId, NewMtx); 01450 return Delta; 01451 } 01452 01453 01454 void TMAGFitBern::NormalizeAffMtxV(TMAGAffMtxV& MtxV, const bool UseMu) { 01455 const int NNodes = Param.GetNodes(); 01456 const int NAttrs = MtxV.Len(); 01457 TFltV MuV = GetMuV(); 01458 double Product = 1.0, ExpEdge = NNodes * (NNodes - 1); 01459 01460 TFltV SumV(NAttrs), EdgeSumV(NAttrs); 01461 SumV.PutAll(0.0); EdgeSumV.PutAll(0.0); 01462 for(int l = 0; l < NAttrs; l++) { 01463 double Mu = (UseMu) ? double(MuV[l]) : (AvgPhiV[l] / double(NNodes)); 01464 EdgeSumV[l] += Mu * Mu * MtxV[l].At(0, 0); 01465 EdgeSumV[l] += Mu * (1.0-Mu) * MtxV[l].At(0, 1); 01466 EdgeSumV[l] += Mu * (1.0-Mu) * MtxV[l].At(1, 0); 01467 EdgeSumV[l] += (1.0-Mu) * (1.0-Mu) * MtxV[l].At(1, 1); 01468 SumV[l] = SumV[l] + MtxV[l].At(0, 0); 01469 SumV[l] = SumV[l] + MtxV[l].At(0, 1); 01470 SumV[l] = SumV[l] + MtxV[l].At(1, 0); 01471 SumV[l] = SumV[l] + MtxV[l].At(1, 1); 01472 Product *= SumV[l]; 01473 ExpEdge *= EdgeSumV[l]; 01474 } 01475 ExpEdge = Graph->GetEdges() / ExpEdge; 01476 NormConst *= Product; 01477 // NormConst = ExpEdge; 01478 Product = 1.0; 01479 // Product = pow(Product * ExpEdge, 1.0 / double(NAttrs)); 01480 01481 for(int l = 0; l < NAttrs; l++) { 01482 for(int p = 0; p < 4; p++) { 01483 MtxV[l].At(p) = MtxV[l].At(p) * Product / SumV[l]; 01484 // MtxV[l].At(p) = MtxV[l].At(p) * Product / MtxV[l].At(0, 0); 01485 // MtxV[l].At(p) = MtxV[l].At(p) * Product; 01486 // if(MtxV[l].At(p) > 0.9999) { MtxV[l].At(p) = 0.9999; } 01487 // if(MtxV[l].At(p) < 0.0001) { MtxV[l].At(p) = 0.0001; } 01488 } 01489 } 01490 } 01491 01492 void TMAGFitBern::UnNormalizeAffMtxV(TMAGAffMtxV& MtxV, const bool UseMu) { 01493 const int NNodes = Param.GetNodes(); 01494 const int NAttrs = MtxV.Len(); 01495 TFltIntPrV MaxEntV(NAttrs); 01496 TFltV MuV = GetMuV(); 01497 NormalizeAffMtxV(MtxV, UseMu); 01498 01499 double ExpEdge = NNodes * (NNodes - 1); 01500 for(int l = 0; l < NAttrs; l++) { 01501 double Mu = MuV[l]; 01502 double EdgeSum = Mu * Mu * MtxV[l].At(0, 0); 01503 EdgeSum += Mu * (1.0-Mu) * MtxV[l].At(0, 1); 01504 EdgeSum += Mu * (1.0-Mu) * MtxV[l].At(1, 0); 01505 EdgeSum += (1.0-Mu) * (1.0-Mu) * MtxV[l].At(1, 1); 01506 ExpEdge *= EdgeSum; 01507 } 01508 NormConst = double(Graph->GetEdges()) / ExpEdge; 01509 // NormConst *= ExpEdge; 01510 01511 for(int l = 0; l < NAttrs; l++) { 01512 MaxEntV[l] = TFltIntPr(-1, l); 01513 for(int p = 0; p < 4; p++) { 01514 if(MaxEntV[l].Val1 < MtxV[l].At(p)) { MaxEntV[l].Val1 = MtxV[l].At(p); } 01515 } 01516 } 01517 MaxEntV.Sort(false); 01518 01519 for(int l = 0; l < NAttrs; l++) { 01520 int CurId = MaxEntV[l].Val2; 01521 double Factor = pow(NormConst, 1.0 / double(NAttrs - l)); 01522 double MaxFactor = 0.9999 / MaxEntV[l].Val1; 01523 Factor = (Factor > MaxFactor) ? MaxFactor : Factor; 01524 NormConst = NormConst / Factor; 01525 01526 for(int p = 0; p < 4; p++) { 01527 MtxV[CurId].At(p) = MtxV[CurId].At(p) * Factor; 01528 } 01529 } 01530 } 01531 01532 const void TMAGFitBern::PrepareUpdateAffMtx(TFltVV& ProdVV, TFltVV& SqVV) { 01533 const int NNodes = Param.GetNodes(); 01534 ProdVV.Gen(NNodes, NNodes); 01535 SqVV.Gen(NNodes, NNodes); 01536 01537 for(int i = 0; i < NNodes; i++) { 01538 for(int j = 0; j < NNodes; j++) { 01539 ProdVV(i, j) = GetProdLinWeight(i, j); 01540 SqVV(i, j) = GetProdSqWeight(i, j); 01541 } 01542 } 01543 } 01544 01545 const void TMAGFitBern::PrepareUpdateApxAffMtx(TFltVV& ProdVV, TFltVV& SqVV) { 01546 const int NNodes = Param.GetNodes(); 01547 ProdVV.Gen(NNodes, 2); 01548 SqVV.Gen(NNodes, 2); 01549 01550 for(int i = 0; i < NNodes; i++) { 01551 ProdVV(i, 0) = GetAvgProdLinWeight(i, i, true, false); 01552 ProdVV(i, 1) = GetAvgProdLinWeight(i, i, false, true); 01553 SqVV(i, 0) = GetAvgProdSqWeight(i, i, true, false); 01554 SqVV(i, 1) = GetAvgProdSqWeight(i, i, false, true); 01555 } 01556 } 01557 01558 const double TMAGFitBern::UpdateAffMtxV(const int& GradIter, const double& LrnRate, const double& MaxGrad, const double& Lambda, const int& NReal) { 01559 const int NNodes = Param.GetNodes(); 01560 const int NAttrs = Param.GetAttrs(); 01561 const TMAGNodeBern DistParam = Param.GetNodeAttr(); 01562 const TFltV MuV = DistParam.GetMuV(); 01563 double Delta = 0.0; 01564 double DecLrnRate = LrnRate, DecMaxGrad = MaxGrad; 01565 01566 TFltVV ProdVV(NNodes, NNodes), SqVV(NNodes, NNodes); 01567 TMAGAffMtxV NewMtxV, OldMtxV; 01568 Param.GetMtxV(OldMtxV); 01569 Param.GetMtxV(NewMtxV); 01570 01571 for(int g = 0; g < GradIter; g++) { 01572 if(MSpeedUp) { 01573 PrepareUpdateApxAffMtx(ProdVV, SqVV); 01574 } else { 01575 PrepareUpdateAffMtx(ProdVV, SqVV); 01576 } 01577 01578 printf(" [Grad step = %d]\n", (g+1)); 01579 // for(int l = 0; l < NAttrs; l++) { 01580 for(int l = NReal; l < NAttrs; l++) { 01581 UpdateAffMtx(l, DecLrnRate, DecMaxGrad, Lambda, ProdVV, SqVV, NewMtxV[l]); 01582 Param.SetMtxV(NewMtxV); 01583 } 01584 DecLrnRate *= 0.97; 01585 DecMaxGrad *= 0.97; 01586 01587 printf("\n"); 01588 NormalizeAffMtxV(NewMtxV, true); 01589 Param.SetMtxV(NewMtxV); 01590 } 01591 NormalizeAffMtxV(NewMtxV, true); 01592 01593 printf( "\nFinal\n"); 01594 for(int l = 0; l < NAttrs; l++) { 01595 printf(" ["); 01596 for(int p = 0; p < 4; p++) { 01597 // NewMtxV[l].At(p) = NewMtxV[l].At(p) * Product / SumV[l]; 01598 Delta += fabs(OldMtxV[l].At(p) - NewMtxV[l].At(p)); 01599 printf(" %.4f ", double(NewMtxV[l].At(p))); 01600 } 01601 printf("]\n"); 01602 } 01603 Param.SetMtxV(NewMtxV); 01604 ProdVV.Clr(); SqVV.Clr(); 01605 return Delta; 01606 } 01607 01608 void TMAGFitBern::DoMStep(const int& GradIter, const double& LrnRate, const double& MaxGrad, const double& Lambda, const int& NReal) { 01609 // const int NNodes = Param.GetNodes(); 01610 const int NAttrs = Param.GetAttrs(); 01611 double MuDelta = 0.0, AffMtxDelta = 0.0; 01612 01613 TExeTm ExeTm; 01614 01615 printf("\n"); 01616 AvgPhiV.Gen(NAttrs); AvgPhiV.PutAll(0.0); 01617 for(int l = 0; l < NAttrs; l++) { 01618 // printf(" [Attr = %d]\n", l); 01619 MuDelta += UpdateMu(l); 01620 } 01621 printf("\n"); 01622 01623 printf(" == Update Theta\n"); 01624 AffMtxDelta += UpdateAffMtxV(GradIter, LrnRate, MaxGrad, Lambda, NReal); 01625 printf("\n"); 01626 printf("Elpased time = %s\n", ExeTm.GetTmStr()); 01627 printf("\n"); 01628 } 01629 01630 void TMAGFitBern::DoEMAlg(const int& NStep, const int& NEstep, const int& NMstep, const double& LrnRate, const double& MaxGrad, const double& Lambda, const double& ReInit, const int& NReal) { 01631 const int NNodes = Param.GetNodes(); 01632 const int NAttrs = Param.GetAttrs(); 01633 TIntV IndexV; 01634 double LL; 01635 // double MuDist, MtxDist; 01636 01637 MuHisV.Gen(NStep + 1, 0); 01638 MtxHisV.Gen(NStep + 1, 0); 01639 LLHisV.Gen(NStep + 1, 0); 01640 01641 printf("--------------------------------------------\n"); 01642 printf("Before EM Iteration\n"); 01643 printf("--------------------------------------------\n"); 01644 01645 TMAGAffMtxV InitMtxV; 01646 TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 01647 Param.GetMtxV(InitMtxV); 01648 TFltV InitMuV = NodeAttr.GetMuV(); 01649 01650 for(int i = 0; i < NNodes; i++) { 01651 for(int l = 0; l < NAttrs; l++) { 01652 if(! KnownVV(i, l)) { 01653 PhiVV.At(i, l) = TMAGNodeBern::Rnd.GetUniDev(); 01654 } 01655 } 01656 } 01657 01658 if(Debug) { 01659 double LL = ComputeApxLL(); 01660 MuHisV.Add(InitMuV); 01661 MtxHisV.Add(InitMtxV); 01662 LLHisV.Add(LL); 01663 } 01664 01665 NormalizeAffMtxV(InitMtxV, true); 01666 Param.SetMtxV(InitMtxV); 01667 01668 for(int n = 0; n < NStep; n++) { 01669 printf("--------------------------------------------\n"); 01670 printf("EM Iteration : %d\n", (n+1)); 01671 printf("--------------------------------------------\n"); 01672 01673 NodeAttr = Param.GetNodeAttr(); 01674 for(int i = 0; i < NNodes; i++) { 01675 for(int l = 0; l < NAttrs; l++) { 01676 if(!KnownVV(i, l) && TMAGNodeBern::Rnd.GetUniDev() < ReInit) { 01677 PhiVV.At(i, l) = TMAGNodeBern::Rnd.GetUniDev(); 01678 } 01679 } 01680 } 01681 DoEStep(InitMuV, NEstep, LL, Lambda); 01682 Param.GetMtxV(InitMtxV); 01683 // NormalizeAffMtxV(InitMtxV); 01684 Param.SetMtxV(InitMtxV); 01685 DoMStep(NMstep, LrnRate, MaxGrad, Lambda, NReal); 01686 01687 printf("\n"); 01688 01689 if(Debug) { 01690 double LL = ComputeApxLL(); 01691 MuHisV.Add(InitMuV); 01692 MtxHisV.Add(InitMtxV); 01693 LLHisV.Add(LL); 01694 printf(" ApxLL = %.2f (Const = %f)\n", LL, double(NormConst)); 01695 } 01696 01697 } 01698 Param.GetMtxV(InitMtxV); 01699 UnNormalizeAffMtxV(InitMtxV, true); 01700 Param.SetMtxV(InitMtxV); 01701 } 01702 01703 void TMAGFitBern::MakeCCDF(const TFltPrV& RawV, TFltPrV& CcdfV) { 01704 double Total = 0.0; 01705 CcdfV.Gen(RawV.Len(), 0); 01706 01707 for(int i = 0; i < RawV.Len(); i++) { 01708 if(RawV[i].Val2 <= 0) { continue; } 01709 Total += RawV[i].Val2; 01710 CcdfV.Add(RawV[i]); 01711 IAssert(RawV[i].Val2 > 0); 01712 } 01713 for(int i = 1; i < CcdfV.Len(); i++) { 01714 CcdfV[i].Val2 += CcdfV[i-1].Val2; 01715 } 01716 01717 for(int i = CcdfV.Len() - 1; i > 0; i--) { 01718 CcdfV[i].Val2 = (Total - CcdfV[i-1].Val2) ; 01719 if(CcdfV[i].Val2 <= 0) { printf("CCDF = %f\n", double(CcdfV[i].Val2));} 01720 IAssert(CcdfV[i].Val2 > 0); 01721 } 01722 CcdfV[0].Val2 = Total; 01723 // CcdfV[0].Val2 = 1.0; 01724 } 01725 01726 01727 void TMAGFitBern::PlotProperties(const TStr& FNm) { 01728 const int NNodes = Param.GetNodes(); 01729 const int NAttrs = Param.GetAttrs(); 01730 TMAGParam<TMAGNodeBern> MAGGen(NNodes, NAttrs); 01731 TMAGNodeBern MAGNode = Param.GetNodeAttr(); 01732 MAGGen.SetNodeAttr(MAGNode); 01733 TMAGAffMtxV MtxV; Param.GetMtxV(MtxV); 01734 MAGGen.SetMtxV(MtxV); 01735 01736 PNGraph TrG = new TNGraph; 01737 *TrG = *Graph; 01738 01739 TIntVV AttrVV(NNodes, NAttrs); 01740 for(int i = 0; i < NNodes; i++) { 01741 for(int j = 0; j < NAttrs; j++) { 01742 if(PhiVV(i, j) > TMAGNodeBern::Rnd.GetUniDev()) AttrVV(i, j) = 0; 01743 else AttrVV(i, j) = 1; 01744 } 01745 } 01746 PNGraph MAG = MAGGen.GenMAG(AttrVV, true, 10000); 01747 // PNGraph MAG = MAGGen.GenAttrMAG(AttrVV, true, 10000); 01748 printf("%d edges created for MAG...\n", MAG->GetEdges()); 01749 01750 TSnap::DelZeroDegNodes(TrG); 01751 TSnap::DelZeroDegNodes(MAG); 01752 01753 TGStatVec GS(tmuNodes, TFSet() | gsdInDeg | gsdOutDeg | gsdWcc | gsdHops | gsdClustCf | gsdSngVec | gsdSngVal | gsdTriadPart); 01754 01755 TGnuPlot InDegP(FNm + "-InDeg"), OutDegP(FNm + "-OutDeg"), SvalP(FNm + "-Sval"), SvecP(FNm + "-Svec"), WccP(FNm + "-Wcc"), HopP(FNm + "-Hop"), TriadP(FNm + "-Triad"), CcfP(FNm + "-Ccf");; 01756 01757 InDegP.SetXYLabel("Degree", "# of nodes"); 01758 OutDegP.SetXYLabel("Degree", "# of nodes"); 01759 SvalP.SetXYLabel("Rank", "Singular value"); 01760 SvecP.SetXYLabel("Rank", "Primary SngVec component"); 01761 WccP.SetXYLabel("Size of component", "# of components"); 01762 CcfP.SetXYLabel("Degree", "Clustering coefficient"); 01763 HopP.SetXYLabel("Hops", "# of node pairs"); 01764 TriadP.SetXYLabel("# of triads", "# of participating nodes"); 01765 01766 InDegP.SetScale(gpsLog10XY); InDegP.AddCmd("set key top right"); 01767 OutDegP.SetScale(gpsLog10XY); OutDegP.AddCmd("set key top right"); 01768 SvalP.SetScale(gpsLog10XY); SvalP.AddCmd("set key top right"); 01769 SvecP.SetScale(gpsLog10XY); SvecP.AddCmd("set key top right"); 01770 CcfP.SetScale(gpsLog10XY); CcfP.AddCmd("set key top right"); 01771 HopP.SetScale(gpsLog10XY); HopP.AddCmd("set key top right"); 01772 TriadP.SetScale(gpsLog10XY); TriadP.AddCmd("set key top right"); 01773 InDegP.ShowGrid(false); 01774 OutDegP.ShowGrid(false); 01775 SvalP.ShowGrid(false); 01776 SvecP.ShowGrid(false); 01777 CcfP.ShowGrid(false); 01778 HopP.ShowGrid(false); 01779 TriadP.ShowGrid(false); 01780 01781 const TStr Style[2] = {"lt 1 lw 3 lc rgb 'black'", "lt 2 lw 3 lc rgb 'red'"}; 01782 const TStr Name[2] = {"Real", "MAG"}; 01783 GS.Add(Graph, TSecTm(1), "Real Graph"); 01784 GS.Add(MAG, TSecTm(2), "MAG"); 01785 01786 TFltPrV InDegV, OutDegV, SvalV, SvecV, HopV, WccV, CcfV, TriadV; 01787 for(int i = 0; i < GS.Len(); i++) { 01788 MakeCCDF(GS.At(i)->GetDistr(gsdInDeg), InDegV); 01789 MakeCCDF(GS.At(i)->GetDistr(gsdOutDeg), OutDegV); 01790 SvalV = GS.At(i)->GetDistr(gsdSngVal); 01791 SvecV = GS.At(i)->GetDistr(gsdSngVec); 01792 MakeCCDF(GS.At(i)->GetDistr(gsdClustCf), CcfV); 01793 HopV = GS.At(i)->GetDistr(gsdHops); 01794 MakeCCDF(GS.At(i)->GetDistr(gsdTriadPart), TriadV); 01795 01796 InDegP.AddPlot(InDegV, gpwLines, Name[i], Style[i]); 01797 OutDegP.AddPlot(OutDegV, gpwLines, Name[i], Style[i]); 01798 SvalP.AddPlot(SvalV, gpwLines, Name[i], Style[i]); 01799 SvecP.AddPlot(SvecV, gpwLines, Name[i], Style[i]); 01800 CcfP.AddPlot(CcfV, gpwLines, Name[i], Style[i]); 01801 HopP.AddPlot(HopV, gpwLines, Name[i], Style[i]); 01802 TriadP.AddPlot(TriadV, gpwLines, Name[i], Style[i]); 01803 } 01804 01805 InDegP.SaveEps(30); 01806 OutDegP.SaveEps(30); 01807 SvalP.SaveEps(30); 01808 SvecP.SaveEps(30); 01809 CcfP.SaveEps(30); 01810 HopP.SaveEps(30); 01811 TriadP.SaveEps(30); 01812 } 01813 01814 void TMAGFitBern::CountAttr(TFltV& EstMuV) const { 01815 const int NNodes = PhiVV.GetXDim(); 01816 const int NAttrs = PhiVV.GetYDim(); 01817 EstMuV.Gen(NAttrs); 01818 EstMuV.PutAll(0.0); 01819 01820 for(int l = 0; l < NAttrs; l++) { 01821 for(int i = 0; i < NNodes; i++) { 01822 EstMuV[l] = EstMuV[l] + PhiVV(i, l); 01823 } 01824 EstMuV[l] = EstMuV[l] / double(NNodes); 01825 } 01826 } 01827 01828 void TMAGFitBern::SortAttrOrdering(const TFltV& TrueMuV, TIntV& IndexV) const { 01829 const int NAttrs = TrueMuV.Len(); 01830 // const int NNodes = PhiVV.GetXDim(); 01831 TFltV EstMuV, SortedTrueMuV, SortedEstMuV, TrueIdxV, EstIdxV; 01832 IndexV.Gen(NAttrs); 01833 TrueIdxV.Gen(NAttrs); 01834 EstIdxV.Gen(NAttrs); 01835 01836 for(int l = 0; l < NAttrs; l++) { 01837 TrueIdxV[l] = l; 01838 EstIdxV[l] = l; 01839 } 01840 01841 CountAttr(EstMuV); 01842 SortedTrueMuV = TrueMuV; 01843 SortedEstMuV = EstMuV; 01844 for(int i = 0; i < NAttrs; i++) { 01845 if(SortedTrueMuV[i] > 0.5) { SortedTrueMuV[i] = 1.0 - SortedTrueMuV[i]; } 01846 if(SortedEstMuV[i] > 0.5) { SortedEstMuV[i] = 1.0 - SortedEstMuV[i]; } 01847 } 01848 01849 for(int i = 0; i < NAttrs; i++) { 01850 for(int j = i+1; j < NAttrs; j++) { 01851 if(SortedTrueMuV[i] < SortedTrueMuV[j]) { 01852 SortedTrueMuV.Swap(i, j); 01853 TrueIdxV.Swap(i, j); 01854 } 01855 if(SortedEstMuV[i] < SortedEstMuV[j]) { 01856 EstIdxV.Swap((int)SortedEstMuV[i], (int)SortedEstMuV[j]); 01857 SortedEstMuV.Swap(i, j); 01858 } 01859 } 01860 } 01861 01862 for(int l = 0; l < NAttrs; l++) { 01863 IndexV[l] = (int)TrueIdxV[(int)EstIdxV[l]]; 01864 } 01865 } 01866 01867 const bool TMAGFitBern::NextPermutation(TIntV& IndexV) const { 01868 const int NAttrs = IndexV.Len(); 01869 int Pos = NAttrs - 1; 01870 while(Pos > 0) { 01871 if(IndexV[Pos-1] < IndexV[Pos]) { 01872 break; 01873 } 01874 Pos--; 01875 } 01876 if(Pos == 0) { 01877 return false; 01878 } 01879 01880 int Val = NAttrs, NewPos = -1; 01881 for(int i = Pos; i < NAttrs; i++) { 01882 if(IndexV[i] > IndexV[Pos - 1] && IndexV[i] < Val) { 01883 NewPos = i; 01884 Val = IndexV[i]; 01885 } 01886 } 01887 IndexV[NewPos] = IndexV[Pos - 1]; 01888 IndexV[Pos - 1] = Val; 01889 01890 TIntV SubIndexV; 01891 IndexV.GetSubValV(Pos, NAttrs - 1, SubIndexV); 01892 SubIndexV.Sort(true); 01893 for(int i = Pos; i < NAttrs; i++) { 01894 IndexV[i] = SubIndexV[i - Pos]; 01895 } 01896 01897 return true; 01898 } 01899 01900 const double TMAGFitBern::ComputeJointOneLL(const TIntVV& AttrVV) const { 01901 double LL = 0.0; 01902 const int NNodes = Param.GetNodes(); 01903 const int NAttrs = Param.GetAttrs(); 01904 TMAGAffMtxV MtxV(NAttrs); Param.GetMtxV(MtxV); 01905 const TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 01906 const TFltV MuV = NodeAttr.GetMuV(); 01907 01908 for(int l = 0; l < NAttrs; l++) { 01909 for(int i = 0; i < MtxV[l].Len(); i++) { 01910 MtxV[l].At(i) = log(MtxV[l].At(i)); 01911 } 01912 } 01913 01914 for(int i = 0; i < NNodes; i++) { 01915 for(int l = 0; l < NAttrs; l++) { 01916 if(AttrVV.At(i, l) == 0) { 01917 LL += log(MuV[l]); 01918 } else { 01919 LL += log(1.0 - MuV[l]); 01920 } 01921 } 01922 01923 for(int j = 0; j < NNodes; j++) { 01924 if(i == j) { continue; } 01925 01926 double ProbLL = 0.0; 01927 for(int l = 0; l < NAttrs; l++) { 01928 ProbLL += MtxV[l].At(AttrVV.At(i, l), AttrVV.At(j, l)); 01929 } 01930 01931 if(Graph->IsEdge(i, j)) { 01932 LL += ProbLL; 01933 } else { 01934 LL += log(1-exp(ProbLL)); 01935 } 01936 } 01937 } 01938 01939 return LL; 01940 } 01941 01942 const double TMAGFitBern::ComputeJointAdjLL(const TIntVV& AttrVV) const { 01943 double LL = 0.0; 01944 const int NNodes = Param.GetNodes(); 01945 const int NAttrs = Param.GetAttrs(); 01946 TMAGAffMtxV MtxV(NAttrs); Param.GetMtxV(MtxV); 01947 const TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 01948 const TFltV MuV = NodeAttr.GetMuV(); 01949 01950 for(int l = 0; l < NAttrs; l++) { 01951 for(int i = 0; i < MtxV[l].Len(); i++) { 01952 MtxV[l].At(i) = log(MtxV[l].At(i)); 01953 } 01954 } 01955 01956 for(int i = 0; i < NNodes; i++) { 01957 for(int j = 0; j < NNodes; j++) { 01958 if(i == j) { continue; } 01959 01960 double ProbLL = 0.0; 01961 for(int l = 0; l < NAttrs; l++) { 01962 ProbLL += MtxV[l].At(AttrVV.At(i, l), AttrVV.At(j, l)); 01963 } 01964 01965 if(Graph->IsEdge(i, j)) { 01966 LL += ProbLL; 01967 } else { 01968 LL += log(1-exp(ProbLL)); 01969 } 01970 } 01971 } 01972 01973 return LL; 01974 } 01975 01976 const double TMAGFitBern::ComputeJointLL(int NSample) const { 01977 double LL = 0.0; 01978 const int NNodes = Param.GetNodes(); 01979 const int NAttrs = Param.GetAttrs(); 01980 01981 TRnd Rnd(2000); 01982 TIntVV AttrVV(NNodes, NAttrs); 01983 int count = 0; 01984 for(int s = 0; s < NSample; s++) { 01985 for(int i = 0; i < NNodes; i++) { 01986 for(int l = 0; l < NAttrs; l++) { 01987 01988 if(Rnd.GetUniDev() <= PhiVV(i, l)) { 01989 AttrVV.At(i, l) = 0; 01990 } else { 01991 AttrVV.At(i, l) = 1; 01992 } 01993 01994 if(PhiVV(i, l) > 0.05 && PhiVV(i, l) < 0.95) count++; 01995 } 01996 } 01997 01998 LL += ComputeJointOneLL(AttrVV); 01999 } 02000 AttrVV.Clr(); 02001 02002 return LL / double(NSample); 02003 } 02004 02005 const double TMAGFitBern::ComputeApxLL() const { 02006 double LL = 0.0; 02007 // double LLSelf = 0.0; 02008 const int NNodes = Param.GetNodes(); 02009 const int NAttrs = Param.GetAttrs(); 02010 TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 02011 TFltV MuV = NodeAttr.GetMuV(); 02012 TMAGAffMtxV LLMtxV(NAttrs); 02013 02014 for(int l = 0; l < NAttrs; l++) { 02015 for(int i = 0; i < NNodes; i++) { 02016 LL += PhiVV(i, l) * log(MuV[l]); 02017 LL += (1.0 - PhiVV(i, l)) * log(1.0 - MuV[l]); 02018 LL -= PhiVV(i, l) * log(PhiVV(i, l)); 02019 LL -= (1.0 - PhiVV(i, l)) * log(1.0 - PhiVV(i, l)); 02020 } 02021 TMAGAffMtx Theta = Param.GetMtx(l); 02022 Theta.GetLLMtx(LLMtxV[l]); 02023 } 02024 02025 for(int i = 0; i < NNodes; i++) { 02026 for(int j = 0; j < NNodes; j++) { 02027 if(i == j) { continue; } 02028 02029 if(Graph->IsEdge(i, j)) { 02030 for(int l = 0; l < NAttrs; l++) { 02031 LL += GetProbPhi(i, j, l, 0, 0) * LLMtxV[l].At(0, 0); 02032 LL += GetProbPhi(i, j, l, 0, 1) * LLMtxV[l].At(0, 1); 02033 LL += GetProbPhi(i, j, l, 1, 0) * LLMtxV[l].At(1, 0); 02034 LL += GetProbPhi(i, j, l, 1, 1) * LLMtxV[l].At(1, 1); 02035 } 02036 LL += log(NormConst); 02037 } else { 02038 LL += log(1-exp(GetProdLinWeight(i, j))); 02039 } 02040 } 02041 } 02042 02043 return LL; 02044 } 02045 02046 const double TMAGFitBern::ComputeApxAdjLL() const { 02047 double LL = 0.0; 02048 // double LLSelf = 0.0; 02049 const int NNodes = Param.GetNodes(); 02050 const int NAttrs = Param.GetAttrs(); 02051 TMAGNodeBern NodeAttr = Param.GetNodeAttr(); 02052 TFltV MuV = NodeAttr.GetMuV(); 02053 MuV.PutAll(0.0); 02054 TMAGAffMtxV LLMtxV(NAttrs); 02055 double TotalEdge = 0.0; 02056 for(int l = 0; l < NAttrs; l++) { 02057 TMAGAffMtx Theta = Param.GetMtx(l); 02058 Theta.GetLLMtx(LLMtxV[l]); 02059 } 02060 02061 for(int i = 0; i < NNodes; i++) { 02062 for(int j = 0; j < NNodes; j++) { 02063 if(i == j) { continue; } 02064 02065 if(Graph->IsEdge(i, j)) { 02066 for(int l = 0; l < NAttrs; l++) { 02067 LL += GetProbPhi(i, j, l, 0, 0) * LLMtxV[l].At(0, 0); 02068 LL += GetProbPhi(i, j, l, 0, 1) * LLMtxV[l].At(0, 1); 02069 LL += GetProbPhi(i, j, l, 1, 0) * LLMtxV[l].At(1, 0); 02070 LL += GetProbPhi(i, j, l, 1, 1) * LLMtxV[l].At(1, 1); 02071 } 02072 } else { 02073 LL += log(1-exp(GetProdLinWeight(i, j))); 02074 } 02075 02076 double TempLL = 1.0; 02077 for(int l = 0; l < NAttrs; l++) { 02078 int Ai = (double(PhiVV(i, l)) > 0.5) ? 0 : 1; 02079 int Aj = (double(PhiVV(j, l)) > 0.5) ? 0 : 1; 02080 TempLL *= Param.GetMtx(l).At(Ai, Aj); 02081 } 02082 if(TMAGNodeBern::Rnd.GetUniDev() < TempLL) { 02083 TotalEdge += 1.0; 02084 } 02085 } 02086 } 02087 02088 return LL; 02089 } 02090 02091 const double TMAGFitBern::ComputeMI(const TIntVV& AttrV, const int AId1, const int AId2) { 02092 const int NNodes = AttrV.GetXDim(); 02093 double MI = 0.0; 02094 double Cor = 0.0; 02095 02096 TFltVV Pxy(2,2); 02097 TFltV Px(2), Py(2); 02098 Pxy.PutAll(0.0); 02099 Px.PutAll(0.0); 02100 Py.PutAll(0.0); 02101 02102 for(int i = 0; i < NNodes; i++) { 02103 int X = AttrV(i, AId1); 02104 int Y = AttrV(i, AId2); 02105 Pxy(X, Y) = Pxy(X, Y) + 1; 02106 Px[X] = Px[X] + 1; 02107 Py[Y] = Py[Y] + 1; 02108 Cor += double(X * Y); 02109 } 02110 02111 for(int x = 0; x < 2; x++) { 02112 for(int y = 0; y < 2; y++) { 02113 MI += Pxy(x, y) / double(NNodes) * (log(Pxy(x, y).Val) - log(Px[x].Val) - log(Py[y].Val) + log((double)NNodes)); 02114 } 02115 } 02116 02117 return MI; 02118 } 02119 02120 const double TMAGFitBern::ComputeMI(const TFltVV& AttrV, const int AId1, const int AId2) { 02121 const int NNodes = AttrV.GetXDim(); 02122 double MI = 0.0; 02123 double Cor = 0.0; 02124 02125 TFltVV Pxy(2,2); 02126 TFltV Px(2), Py(2); 02127 Pxy.PutAll(0.0); 02128 Px.PutAll(0.0); 02129 Py.PutAll(0.0); 02130 02131 for(int i = 0; i < NNodes; i++) { 02132 double X = AttrV(i, AId1); 02133 double Y = AttrV(i, AId2); 02134 Pxy(0, 0) = Pxy(0, 0) + X * Y; 02135 Pxy(0, 1) = Pxy(0, 1) + X * (1 - Y); 02136 Pxy(1, 0) = Pxy(1, 0) + (1 - X) * Y; 02137 Pxy(1, 1) = (i+1) - Pxy(0, 0) - Pxy(0, 1) - Pxy(1, 0); 02138 Px[0] = Px[0] + X; 02139 Py[0] = Py[0] + Y; 02140 Cor += double((1-X) * (1-Y)); 02141 } 02142 Px[1] = NNodes - Px[0]; 02143 Py[1] = NNodes - Py[0]; 02144 02145 for(int x = 0; x < 2; x++) { 02146 for(int y = 0; y < 2; y++) { 02147 MI += Pxy(x, y) / double(NNodes) * (log(Pxy(x, y)) - log(Px[x]) - log(Py[y]) + log(double(NNodes))); 02148 } 02149 } 02150 02151 return MI; 02152 } 02153 02154 const double TMAGFitBern::ComputeMI(const TIntVV& AttrV) { 02155 // const int NNodes = AttrV.GetXDim(); 02156 const int NAttrs = AttrV.GetYDim(); 02157 double MI = 0.0; 02158 02159 for(int l = 0; l < NAttrs; l++) { 02160 for(int k = l+1; k < NAttrs; k++) { 02161 MI += ComputeMI(AttrV, l, k); 02162 } 02163 } 02164 02165 return MI; 02166 } 02167 02168 const double TMAGFitBern::ComputeMI(const TFltVV& AttrV) { 02169 // const int NNodes = AttrV.GetXDim(); 02170 const int NAttrs = AttrV.GetYDim(); 02171 double MI = 0.0; 02172 02173 for(int l = 0; l < NAttrs; l++) { 02174 for(int k = l+1; k < NAttrs; k++) { 02175 MI += ComputeMI(AttrV, l, k); 02176 } 02177 } 02178 02179 return MI; 02180 }