SNAP Library , User Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
gio.h
Go to the documentation of this file.
00001 
00002 // Loading and saving graphs from/to various file formats.
00003 namespace TSnap {
00004 
00006 template <class PGraph> PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId=0, const int& DstColId=1);
00008 template <class PGraph> PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId, const char& Separator);
00010 template <class PGraph> PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId=0, const int& DstColId=1);
00012 template <class PGraph> PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId, TStrHash<TInt>& StrToNIdH);
00014 template <class PGraph> PGraph LoadConnList(const TStr& InFNm);
00016 template <class PGraph> PGraph LoadConnListStr(const TStr& InFNm, TStrHash<TInt>& StrToNIdH);
00017 
00019 
00021 template <class PGraph> PGraph LoadPajek(const TStr& InFNm);
00023 PNGraph LoadDyNet(const TStr& FNm);
00025 TVec<PNGraph> LoadDyNetGraphV(const TStr& FNm);
00026 
00027 //TODO: Sparse/Dense adjacency matrix which values we threshold at Thresh to obtain an adjacency matrix.
00028 //template <class PGraph> PGraph LoadAdjMtx(const TStr& FNm, const int Thresh);
00029 //TODO: Load from a GML file format (http://en.wikipedia.org/wiki/Graph_Modelling_Language)
00030 //template <class PGraph> PGraph LoadGml(const TStr& FNm, const int Thresh);
00031 
00032 
00034 template <class PGraph> void SaveEdgeList(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc=TStr());
00036 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm);
00038 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH);
00040 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH);
00042 template <class PGraph> void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH, const TIntStrH& EIdColorH);
00044 template <class PGraph> void SaveMatlabSparseMtx(const PGraph& Graph, const TStr& OutFNm);
00046 
00049 template<class PGraph> void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc=TStr(), const bool& NodeLabels=false, const TIntStrH& NIdColorH=TIntStrH());
00051 
00054 template<class PGraph> void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const TIntStrH& NIdLabelH);
00055 
00056 //TODO:  Save to a GML file format (http://en.wikipedia.org/wiki/Graph_Modelling_Language)
00057 //template <class PGraph> SaveGml(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc);
00058 
00060 // Implementation
00061 
00063 
00067 template <class PGraph>
00068 PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId) {
00069   TSsParser Ss(InFNm, ssfWhiteSep, true, true, true);
00070   PGraph Graph = PGraph::TObj::New();
00071   int SrcNId, DstNId;
00072   while (Ss.Next()) {
00073     if (! Ss.GetInt(SrcColId, SrcNId) || ! Ss.GetInt(DstColId, DstNId)) { continue; }
00074     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00075     if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00076     Graph->AddEdge(SrcNId, DstNId);
00077   }
00078   Graph->Defrag();
00079   return Graph;
00080 }
00081 
00083 
00087 template <class PGraph>
00088 PGraph LoadEdgeList(const TStr& InFNm, const int& SrcColId, const int& DstColId, const char& Separator) {
00089   TSsParser Ss(InFNm, Separator);
00090   PGraph Graph = PGraph::TObj::New();
00091   int SrcNId, DstNId;
00092   while (Ss.Next()) {
00093     if (! Ss.GetInt(SrcColId, SrcNId) || ! Ss.GetInt(DstColId, DstNId)) { continue; }
00094     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00095     if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00096     Graph->AddEdge(SrcNId, DstNId);
00097   }
00098   Graph->Defrag();
00099   return Graph;
00100 }
00101 
00103 
00108 template <class PGraph>
00109 PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId) {
00110   TSsParser Ss(InFNm, ssfWhiteSep);
00111   PGraph Graph = PGraph::TObj::New();
00112   TStrHash<TInt> StrToNIdH(Mega(1), true); // hash-table mapping strings to integer node ids
00113   while (Ss.Next()) {
00114     const int SrcNId = StrToNIdH.AddKey(Ss[SrcColId]);
00115     const int DstNId = StrToNIdH.AddKey(Ss[DstColId]);
00116     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00117     if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00118     Graph->AddEdge(SrcNId, DstNId);
00119   }
00120   Graph->Defrag();
00121   return Graph;
00122 }
00123 
00125 
00131 template <class PGraph>
00132 PGraph LoadEdgeListStr(const TStr& InFNm, const int& SrcColId, const int& DstColId, TStrHash<TInt>& StrToNIdH) {
00133   TSsParser Ss(InFNm, ssfWhiteSep);
00134   PGraph Graph = PGraph::TObj::New();
00135   while (Ss.Next()) {
00136     const int SrcNId = StrToNIdH.AddKey(Ss[SrcColId]);
00137     const int DstNId = StrToNIdH.AddKey(Ss[DstColId]);
00138     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00139     if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00140     Graph->AddEdge(SrcNId, DstNId);
00141   }
00142   Graph->Defrag();
00143   return Graph;
00144 }
00145 
00147 
00151 template <class PGraph>
00152 PGraph LoadConnList(const TStr& InFNm) {
00153   TSsParser Ss(InFNm, ssfWhiteSep, true, true, true);
00154   PGraph Graph = PGraph::TObj::New();
00155   while (Ss.Next()) {
00156     if (! Ss.IsInt(0)) { continue; }
00157     const int SrcNId = Ss.GetInt(0);
00158     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00159     for (int dst = 1; dst < Ss.Len(); dst++) {
00160       const int DstNId = Ss.GetInt(dst);
00161       if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00162       Graph->AddEdge(SrcNId, DstNId);
00163     }
00164   }
00165   Graph->Defrag();
00166   return Graph;
00167 }
00168 
00170 
00175 template <class PGraph> 
00176 PGraph LoadConnListStr(const TStr& InFNm, TStrHash<TInt>& StrToNIdH) {
00177   TSsParser Ss(InFNm, ssfWhiteSep, true, true, true);
00178   PGraph Graph = PGraph::TObj::New();
00179   while (Ss.Next()) {
00180     const int SrcNId = StrToNIdH.AddDatId(Ss[0]);
00181     if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
00182     for (int dst = 1; dst < Ss.Len(); dst++) {
00183       const int DstNId = StrToNIdH.AddDatId(Ss[dst]);
00184       if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
00185       Graph->AddEdge(SrcNId, DstNId);
00186     }
00187   }
00188   Graph->Defrag();
00189   return Graph;
00190 }
00191 
00192 template <class PGraph>
00193 PGraph LoadPajek(const TStr& InFNm) {
00194   PGraph Graph = PGraph::TObj::New();
00195   TSsParser Ss(InFNm, ssfSpaceSep, true, true, true);
00196   while ((Ss.Len()==0 || strstr(Ss[0], "*vertices") == NULL) && ! Ss.Eof()) {
00197     Ss.Next();  Ss.ToLc(); }
00198   // nodes
00199   bool EdgeList = true;
00200   EAssert(strstr(Ss[0], "*vertices") != NULL);
00201   while (Ss.Next()) {
00202     Ss.ToLc();
00203     if (Ss.Len()>0 && Ss[0][0] == '%') { continue; } // comment
00204     if (strstr(Ss[0], "*arcslist")!=NULL || strstr(Ss[0],"*edgeslist")!=NULL) { EdgeList=false; break; } 
00205     if (strstr(Ss[0], "*arcs")!=NULL || strstr(Ss[0],"*edges")!=NULL) { break; } // arcs are directed, edges are undirected
00206     Graph->AddNode(Ss.GetInt(0));
00207   }
00208   // edges
00209   while (Ss.Next()) {
00210     if (Ss.Len()>0 && Ss[0][0] == '%') { continue; } // comment
00211     if (Ss.Len()>0 && Ss[0][0] == '*') { break; }
00212     if (EdgeList) {
00213       // <source> <destination> <weight>
00214       if (Ss.Len() >= 3 && Ss.IsInt(0) && Ss.IsInt(1)) {
00215         Graph->AddEdge(Ss.GetInt(0), Ss.GetInt(1)); }
00216     } else {
00217       // <source> <destination1> <destination2> <destination3> ...
00218       const int SrcNId = Ss.GetInt(0);
00219       for (int i = 1; i < Ss.Len(); i++) {
00220         Graph->AddEdge(SrcNId, Ss.GetInt(i)); }
00221     }
00222   }
00223   return Graph;
00224 }
00225 
00226 template <class PGraph>
00227 void SaveEdgeList(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc) {
00228   FILE *F = fopen(OutFNm.CStr(), "wt");
00229   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { fprintf(F, "# Directed graph: %s \n", OutFNm.CStr()); } 
00230   else { fprintf(F, "# Undirected graph (each unordered pair of nodes is saved once): %s\n", OutFNm.CStr()); }
00231   if (! Desc.Empty()) { fprintf(F, "# %s\n", Desc.CStr()); }
00232   fprintf(F, "# Nodes: %d Edges: %d\n", Graph->GetNodes(), Graph->GetEdges());
00233   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) { fprintf(F, "# FromNodeId\tToNodeId\n"); }
00234   else { fprintf(F, "# NodeId\tNodeId\n"); }
00235   for (typename PGraph::TObj::TEdgeI ei = Graph->BegEI(); ei < Graph->EndEI(); ei++) {
00236     fprintf(F, "%d\t%d\n", ei.GetSrcNId(), ei.GetDstNId());
00237   }
00238   fclose(F);
00239 }
00240 
00241 template <class PGraph>
00242 void SavePajek(const PGraph& Graph, const TStr& OutFNm) {
00243   TIntH NIdToIdH(Graph->GetNodes(), true);
00244   FILE *F = fopen(OutFNm.CStr(), "wt");
00245   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00246   int i = 0;
00247   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) {
00248     fprintf(F, "%d  \"%d\" ic Red fos 10\n", i+1, NI.GetId()); // ic: internal color, fos: font size
00249     NIdToIdH.AddDat(NI.GetId(), i+1);
00250   }
00251   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) {
00252     fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected
00253   else {
00254     fprintf(F, "*Edges %d\n", Graph->GetEdges());
00255   }
00256   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00257     const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId());
00258     const int DstNId = NIdToIdH.GetDat(EI.GetDstNId());
00259     fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1); // width=1
00260   }
00261   fclose(F);
00262 }
00263 
00266 template <class PGraph>
00267 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH) {
00268   TIntH NIdToIdH(Graph->GetNodes(), true);
00269   FILE *F = fopen(OutFNm.CStr(), "wt");
00270   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00271   int i = 0;
00272   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) {
00273     fprintf(F, "%d  \"%d\" ic %s fos 10\n", i+1, NI.GetId(),
00274       NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red");
00275     NIdToIdH.AddDat(NI.GetId(), i+1);
00276   }
00277   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) {
00278     fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected
00279   else {
00280     fprintf(F, "*Edges %d\n", Graph->GetEdges());
00281   }
00282   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00283     const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId());
00284     const int DstNId = NIdToIdH.GetDat(EI.GetDstNId());
00285     fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1);
00286   }
00287   fclose(F);
00288 }
00289 
00293 template <class PGraph>
00294 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH) {
00295   TIntH NIdToIdH(Graph->GetNodes(), true);
00296   FILE *F = fopen(OutFNm.CStr(), "wt");
00297   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00298   int i = 0;
00299   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) {
00300     fprintf(F, "%d  \"%s\" ic %s fos 10\n", i+1,
00301       NIdLabelH.IsKey(NI.GetId()) ? NIdLabelH.GetDat(NI.GetId()).CStr() : TStr::Fmt("%d", NI.GetId()).CStr(),
00302       NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red");
00303     NIdToIdH.AddDat(NI.GetId(), i+1);
00304   }
00305   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) {
00306     fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected
00307   else {
00308     fprintf(F, "*Edges %d\n", Graph->GetEdges());
00309   }
00310   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00311     const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId());
00312     const int DstNId = NIdToIdH.GetDat(EI.GetDstNId());
00313     fprintf(F, "%d %d %d c Black\n", SrcNId, DstNId, 1);
00314   }
00315   fclose(F);
00316 }
00317 
00322 template <class PGraph>
00323 void SavePajek(const PGraph& Graph, const TStr& OutFNm, const TIntStrH& NIdColorH, const TIntStrH& NIdLabelH, const TIntStrH& EIdColorH) {
00324   CAssert(HasGraphFlag(typename PGraph::TObj, gfMultiGraph)); // network needs to have edge ids
00325   TIntH NIdToIdH(Graph->GetNodes(), true);
00326   FILE *F = fopen(OutFNm.CStr(), "wt");
00327   fprintf(F, "*Vertices %d\n", Graph->GetNodes());
00328   int i = 0;
00329   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, i++) {
00330     fprintf(F, "%d  \"%s\" ic %s fos 10\n", i+1,
00331       NIdLabelH.IsKey(NI.GetId()) ? NIdLabelH.GetDat(NI.GetId()).CStr() : TStr::Fmt("%d", NI.GetId()).CStr(),
00332       NIdColorH.IsKey(NI.GetId()) ? NIdColorH.GetDat(NI.GetId()).CStr() : "Red");
00333     NIdToIdH.AddDat(NI.GetId(), i+1);
00334   }
00335   if (HasGraphFlag(typename PGraph::TObj, gfDirected)) {
00336     fprintf(F, "*Arcs %d\n", Graph->GetEdges()); } // arcs are directed, edges are undirected
00337   else {
00338     fprintf(F, "*Edges %d\n", Graph->GetEdges());
00339   }
00340   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00341     const int SrcNId = NIdToIdH.GetDat(EI.GetSrcNId());
00342     const int DstNId = NIdToIdH.GetDat(EI.GetDstNId());
00343     fprintf(F, "%d %d 1 c %s\n", SrcNId, DstNId, 1,
00344       EIdColorH.IsKey(EI.GetId()) ? EIdColorH.GetDat(EI.GetId()).CStr() : "Black");
00345   }
00346   fclose(F);
00347 }
00348 
00350 template <class PGraph>
00351 void SaveMatlabSparseMtx(const PGraph& Graph, const TStr& OutFNm) {
00352   FILE *F = fopen(OutFNm.CStr(), "wt");
00353   TIntSet NIdSet(Graph->GetNodes()); // so that
00354   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00355     NIdSet.AddKey(NI.GetId());
00356   }
00357   for (typename PGraph::TObj::TEdgeI EI = Graph->BegEI(); EI < Graph->EndEI(); EI++) {
00358     const int Src = NIdSet.GetKeyId(EI.GetSrcNId())+1;
00359     const int Dst = NIdSet.GetKeyId(EI.GetDstNId())+1;
00360     fprintf(F, "%d\t%d\t1\n", Src, Dst);
00361     if (! HasGraphFlag(typename PGraph::TObj, gfDirected) && Src!=Dst) {
00362       fprintf(F, "%d\t%d\t1\n", Dst, Src);
00363     }
00364   }
00365   fclose(F);
00366 }
00367 
00368 template<class PGraph>
00369 void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const bool& NodeLabels, const TIntStrH& NIdColorH) {
00370   const bool IsDir = HasGraphFlag(typename PGraph::TObj, gfDirected);
00371   FILE *F = fopen(OutFNm.CStr(), "wt");
00372   if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr());
00373   if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); }
00374   fprintf(F, "  graph [splines=false overlap=false]\n"); //size=\"12,10\" ratio=fill
00375   // node  [width=0.3, height=0.3, label=\"\", style=filled, color=black]
00376   // node  [shape=box, width=0.3, height=0.3, label=\"\", style=filled, fillcolor=red]
00377   fprintf(F, "  node  [shape=ellipse, width=0.3, height=0.3%s]\n", NodeLabels?"":", label=\"\"");
00378   // node colors
00379   //for (int i = 0; i < NIdColorH.Len(); i++) {
00380   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00381     if (NIdColorH.IsKey(NI.GetId())) {
00382       fprintf(F, "  %d [style=filled, fillcolor=\"%s\"];\n", NI.GetId(), NIdColorH.GetDat(NI.GetId()).CStr()); }
00383     else {
00384       fprintf(F, "  %d ;\n", NI.GetId());
00385     }
00386   }
00387   // edges
00388   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00389     if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 && !NIdColorH.IsKey(NI.GetId())) {
00390       fprintf(F, "%d;\n", NI.GetId()); }
00391     else {
00392       for (int e = 0; e < NI.GetOutDeg(); e++) {
00393         if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; }
00394         fprintf(F, "  %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e));
00395       }
00396     }
00397   }
00398   if (! Desc.Empty()) {
00399     fprintf(F, "  label = \"\\n%s\\n\";", Desc.CStr());
00400     fprintf(F, "  fontsize=24;\n");
00401   }
00402   fprintf(F, "}\n");
00403   fclose(F);
00404 }
00405 
00406 template<class PGraph>
00407 void SaveGViz(const PGraph& Graph, const TStr& OutFNm, const TStr& Desc, const TIntStrH& NIdLabelH) {
00408   const bool IsDir = Graph->HasFlag(gfDirected);
00409   FILE *F = fopen(OutFNm.CStr(), "wt");
00410   if (! Desc.Empty()) fprintf(F, "/*****\n%s\n*****/\n\n", Desc.CStr());
00411   if (IsDir) { fprintf(F, "digraph G {\n"); } else { fprintf(F, "graph G {\n"); }
00412   fprintf(F, "  graph [splines=true overlap=false]\n"); //size=\"12,10\" ratio=fill
00413   fprintf(F, "  node  [shape=ellipse, width=0.3, height=0.3]\n");
00414   // node colors
00415   //for (int i = 0; i < NodeLabelH.Len(); i++) {
00416   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00417     fprintf(F, "  %d [label=\"%s\"];\n", NI.GetId(), NIdLabelH.GetDat(NI.GetId()).CStr());
00418 }
00419   // edges
00420   for (typename PGraph::TObj::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
00421     if (NI.GetOutDeg()==0 && NI.GetInDeg()==0 && ! NIdLabelH.IsKey(NI.GetId())) {
00422       fprintf(F, "%d;\n", NI.GetId()); }
00423     else {
00424       for (int e = 0; e < NI.GetOutDeg(); e++) {
00425         if (! IsDir && NI.GetId() > NI.GetOutNId(e)) { continue; }
00426         fprintf(F, "  %d %s %d;\n", NI.GetId(), IsDir?"->":"--", NI.GetOutNId(e));
00427       }
00428     }
00429   }
00430   if (! Desc.Empty()) {
00431     fprintf(F, "  label = \"\\n%s\\n\";", Desc.CStr());
00432     fprintf(F, "  fontsize=24;\n");
00433   }
00434   fprintf(F, "}\n");
00435   fclose(F);
00436 }
00437 
00438 } // namespace TSnap