SNAP Library 2.1, Developer Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
00001 #include "bd.h" 00002 00004 // Url 00005 typedef enum {usUndef, usHttp, usOther} TUrlScheme; 00006 00007 ClassTPV(TUrl, PUrl, TUrlV)//{ 00008 private: 00009 static const TStr UrlHttpPrefixStr; 00010 static const TStr UrlHttpAbsPrefixStr; 00011 TUrlScheme Scheme; 00012 TStr UrlStr, RelUrlStr, BaseUrlStr; 00013 TStr SchemeNm, HostNm; 00014 TStr PortStr, PathStr, SearchStr, FragIdStr; 00015 int PortN; 00016 TStrV PathSegV; 00017 TStr IpNum; 00018 TStr FinalUrlStr, FinalHostNm; 00019 TStr HttpRqStr; 00020 void GetAbs(const TStr& AbsUrlStr); 00021 void GetAbsFromBase(const TStr& RelUrlStr, const TStr& BaseUrlStr); 00022 UndefDefaultCopyAssign(TUrl); 00023 public: 00024 TUrl(const TStr& _RelUrlStr, const TStr& _BaseUrlStr=TStr()); 00025 static PUrl New(const TStr& RelUrlStr, const TStr& BaseUrlStr=TStr()){ 00026 return PUrl(new TUrl(RelUrlStr, BaseUrlStr));} 00027 ~TUrl(){} 00028 TUrl(TSIn&){Fail;} 00029 static PUrl Load(TSIn&){Fail; return NULL;} 00030 void Save(TSOut&){Fail;} 00031 00032 bool IsOk(const TUrlScheme _Scheme=usUndef) const { 00033 if (_Scheme==usUndef){return Scheme!=usUndef;} 00034 else {return Scheme==_Scheme;}} 00035 TUrlScheme GetScheme(){return Scheme;} 00036 TStr GetUrlStr() const {return UrlStr;} 00037 TStr GetRelUrlStr() const {return RelUrlStr;} 00038 bool IsBaseUrl(){return !BaseUrlStr.Empty();} 00039 TStr GetBaseUrlStr() const {return BaseUrlStr;} 00040 TStr GetSchemeNm() const {EAssert(IsOk()); return SchemeNm;} 00041 TStr GetHostNm() const {EAssert(IsOk()); return HostNm;} 00042 TStr GetDmNm(const int& MxDmSegs=-1) const; 00043 bool IsPortOk() const { EAssert(IsOk()); return (PortN > 0); } 00044 TStr GetPortStr() const {EAssert(IsOk()); return PortStr;} 00045 int GetPortN() const {EAssert(IsOk()&&(PortN!=-1)); return PortN;} 00046 TStr GetPathStr() const {EAssert(IsOk()); return PathStr;} 00047 int GetPathSegs() const {return PathSegV.Len();} 00048 TStr GetPathSeg(const int& PathSegN) const {return PathSegV[PathSegN];} 00049 TStr GetSearchStr() const {EAssert(IsOk()); return SearchStr;} 00050 TStr GetFragIdStr() const {EAssert(IsOk()); return FragIdStr;} 00051 00052 bool IsIpNum() const {return !IpNum.Empty();} 00053 void PutIpNum(const TStr& _IpNum){IpNum=_IpNum;} 00054 TStr GetIpNum() const {EAssert(IsIpNum()); return IpNum;} 00055 TStr GetIpNumOrHostNm() const {return IsIpNum() ? GetIpNum() : GetHostNm();} 00056 00057 bool IsDefFinalUrl() const { 00058 EAssert(IsOk(usHttp)); return !FinalUrlStr.Empty();} 00059 TStr GetFinalUrlStr() const { 00060 EAssert(IsDefFinalUrl()); return FinalUrlStr;} 00061 TStr GetAsFinalUrlStr() const { 00062 if (IsDefFinalUrl()){return FinalUrlStr;} else {return UrlStr;}} 00063 TStr GetFinalHostNm() const { 00064 EAssert(IsDefFinalUrl()); return FinalHostNm;} 00065 TStr GetAsFinalHostNm() const { 00066 if (IsDefFinalUrl()){return FinalHostNm;} else {return HostNm;}} 00067 void DefUrlAsFinal(){ 00068 EAssert(IsOk(usHttp)); EAssert(!IsDefFinalUrl()); 00069 FinalUrlStr=UrlStr; FinalHostNm=HostNm;} 00070 void DefFinalUrl(const TStr& _FinalHostNm); 00071 00072 void PutHttpRqStr(const TStr& _HttpRqStr){HttpRqStr=_HttpRqStr;} 00073 TStr GetHttpRqStr() const {return HttpRqStr;} 00074 bool IsHttpRqStr() const {return !HttpRqStr.Empty();} 00075 void ChangeHttpRqStr(const TStr& SrcStr, const TStr& DstStr){ 00076 HttpRqStr.ChangeStr(SrcStr, DstStr);} 00077 00078 bool IsInHost(const TStr& _HostNm) const { 00079 EAssert(IsOk()); return HostNm.GetUc().IsSuffix(_HostNm.GetUc());} 00080 bool IsInPath(const TStr& _PathStr) const { 00081 EAssert(IsOk()); return PathStr.GetUc().IsPrefix(_PathStr.GetUc());} 00082 void ToLcPath(); 00083 00084 static bool IsAbs(const TStr& UrlStr); 00085 static bool IsScript(const TStr& UrlStr); 00086 static bool IsSite(const TStr& UrlStr); 00087 00088 static PUrl GetUrlFromShortcut(const TStr& ShortcutUrlStr, 00089 const TStr& DfHostNmPrefix, const TStr& DfHostNmSufix); 00090 static TStr GetUrlSearchStr(const TStr& Str); 00091 static TStr EncodeUrlStr(const TStr& Str){return GetUrlSearchStr(Str);} 00092 static TStr DecodeUrlStr(const TStr& UrlStr); 00093 static TStr GetDocStrFromUrlStr(const TStr& UrlStr, const int& Copies=1); 00094 static TStr GetTopDownDocNm( 00095 const TStr& UrlStr, const int& MxLen=-1, const bool& HostOnlyP=false); 00096 }; 00097 typedef TPair<TInt, PUrl> TIdUrlPr; 00098 typedef TQQueue<TIdUrlPr> TIdUrlPrQ; 00099 typedef THash<TInt, PUrl> TIdToUrlH; 00100 00102 // Url-Environment 00103 ClassTP(TUrlEnv, PUrlEnv)//{ 00104 private: 00105 TStr BaseUrlStr; 00106 TStrV KeyNmV; 00107 TStrStrVH KeyNmToValH; 00108 public: 00109 TUrlEnv(): 00110 KeyNmV(), KeyNmToValH(10){} 00111 TUrlEnv(const TUrlEnv& UrlEnv): 00112 KeyNmV(UrlEnv.KeyNmV), KeyNmToValH(UrlEnv.KeyNmToValH){} 00113 static PUrlEnv New(){return new TUrlEnv();} 00114 static PUrlEnv New(const TStr& BaseUrlStr, 00115 const TStr& KeyNm1=TStr(), const TStr& ValStr1=TStr(), 00116 const TStr& KeyNm2=TStr(), const TStr& ValStr2=TStr(), 00117 const TStr& KeyNm3=TStr(), const TStr& ValStr3=TStr(), 00118 const TStr& KeyNm4=TStr(), const TStr& ValStr4=TStr()){ 00119 PUrlEnv UrlEnv=New(); 00120 UrlEnv->PutBaseUrlStr(BaseUrlStr); 00121 if (!KeyNm1.Empty()){UrlEnv->AddKeyVal(KeyNm1, ValStr1);} 00122 if (!KeyNm2.Empty()){UrlEnv->AddKeyVal(KeyNm2, ValStr2);} 00123 if (!KeyNm3.Empty()){UrlEnv->AddKeyVal(KeyNm3, ValStr3);} 00124 if (!KeyNm4.Empty()){UrlEnv->AddKeyVal(KeyNm4, ValStr4);} 00125 return UrlEnv;} 00126 ~TUrlEnv(){} 00127 TUrlEnv(TSIn& SIn): KeyNmV(SIn), KeyNmToValH(SIn){} 00128 static PUrlEnv Load(TSIn& SIn){return new TUrlEnv(SIn);} 00129 void Save(TSOut& SOut){KeyNmV.Save(SOut); KeyNmToValH.Save(SOut);} 00130 00131 TUrlEnv& operator=(const TUrlEnv& Env){ 00132 if (this!=&Env){KeyNmV=Env.KeyNmV; KeyNmToValH=Env.KeyNmToValH;} 00133 return *this;} 00134 00135 // base url 00136 void PutBaseUrlStr(const TStr& _BaseUrlStr){BaseUrlStr=_BaseUrlStr;} 00137 TStr GetBaseUrlStr() const {return BaseUrlStr;} 00138 00139 // adding key-value 00140 void AddKeyVal(const TStr& KeyNm, const TStr& ValStr){ 00141 if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);} 00142 KeyNmToValH.GetDat(KeyNm).Clr(); 00143 KeyNmToValH.GetDat(KeyNm).Add(ValStr);} 00144 void AddToKeyVal(const TStr& KeyNm, const TStr& ValStr){ 00145 if (!IsKey(KeyNm)){KeyNmV.Add(KeyNm); KeyNmToValH.AddKey(KeyNm);} 00146 KeyNmToValH.GetDat(KeyNm).Add(ValStr);} 00147 00148 // key retrieval 00149 bool Empty() const {return KeyNmV.Empty();} 00150 int GetKeys() const {return KeyNmV.Len();} 00151 bool IsKey(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm)!=-1;} 00152 int GetKeyN(const TStr& KeyNm) const {return KeyNmV.SearchForw(KeyNm);} 00153 TStr GetKeyNm(const int& KeyN) const {return KeyNmV[KeyN];} 00154 00155 // value retrieval 00156 int GetVals(const int& KeyN) const { 00157 return KeyNmToValH.GetDat(KeyNmV[KeyN]).Len();} 00158 int GetVals(const TStr& KeyNm) const { 00159 return KeyNmToValH.GetDat(KeyNm).Len();} 00160 TStr GetVal(const int& KeyN, const int& ValN=0) const { 00161 return KeyNmToValH.GetDat(KeyNmV[KeyN])[ValN];} 00162 TStr GetVal(const TStr& KeyNm, const int& ValN=0, const TStr& DfVal="") const { 00163 if (KeyNmToValH.IsKey(KeyNm)){ 00164 return KeyNmToValH.GetDat(KeyNm)[ValN];} 00165 else {return DfVal;}} 00166 00167 // full-url-string 00168 TStr GetFullUrlStr() const; 00169 00170 static PUrlEnv MkClone(const PUrlEnv& UrlEnv); 00171 }; 00172