SNAP Library 2.1, Developer Reference
2013-09-25 10:47:25
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <html.h>
Public Member Functions | |
THtmlTok () | |
THtmlTok (const THtmlLxSym &_Sym) | |
THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str) | |
THtmlTok (const THtmlLxSym &_Sym, const TStr &_Str, const THtmlLx::TArgNmValV &_ArgNmValV) | |
THtmlTok (TSIn &) | |
void | Save (TSOut &) |
THtmlTok & | operator= (const THtmlTok &) |
THtmlLxSym | GetSym () const |
TStr | GetStr () const |
TStr | GetFullStr () const |
bool | IsArg (const TStr &ArgNm) const |
TStr | GetArg (const TStr &ArgNm) const |
TStr | GetArg (const TStr &ArgNm, const TStr &DfArgVal) const |
bool | IsUrlTok (TStr &RelUrlStr) const |
bool | IsRedirUrlTok () const |
void | SaveTxt (const PSOut &SOut, const bool &TxtMode=true) |
Static Public Member Functions | |
static PHtmlTok | Load (TSIn &) |
static bool | IsBreakTag (const TStr &TagNm) |
static bool | IsBreakTok (const PHtmlTok &Tok) |
static bool | IsHTag (const TStr &TagNm, int &HTagN) |
static PHtmlTok | GetHTok (const bool &IsBTag, const int &HTagN) |
Static Public Attributes | |
static const TStr | ATagNm = "<A>" |
static const TStr | AreaTagNm = "<AREA>" |
static const TStr | BrTagNm = "<BR>" |
static const TStr | CardTagNm = "<CARD>" |
static const TStr | CenterTagNm = "<CENTER>" |
static const TStr | FrameTagNm = "<FRAME>" |
static const TStr | H1TagNm = "<H1>" |
static const TStr | H2TagNm = "<H2>" |
static const TStr | H3TagNm = "<H3>" |
static const TStr | H4TagNm = "<H4>" |
static const TStr | H5TagNm = "<H5>" |
static const TStr | H6TagNm = "<H6>" |
static const TStr | ImgTagNm = "<IMG>" |
static const TStr | LiTagNm = "<LI>" |
static const TStr | MetaTagNm = "<META>" |
static const TStr | PTagNm = "<P>" |
static const TStr | UlTagNm = "<UL>" |
static const TStr | TitleTagNm = "<TITLE>" |
static const TStr | TitleETagNm = "</TITLE>" |
static const TStr | AltArgNm = "ALT" |
static const TStr | HRefArgNm = "HREF" |
static const TStr | SrcArgNm = "SRC" |
static const TStr | TitleArgNm = "TITLE" |
static const TStr | HttpEquivArgNm = "HTTP-EQUIV" |
Private Attributes | |
TCRef | CRef |
THtmlLxSym | Sym |
TStr | Str |
THtmlLx::TArgNmValV | ArgNmValV |
Friends | |
class | TPt< THtmlTok > |
THtmlTok::THtmlTok | ( | ) | [inline] |
THtmlTok::THtmlTok | ( | const THtmlLxSym & | _Sym | ) | [inline] |
THtmlTok::THtmlTok | ( | const THtmlLxSym & | _Sym, |
const TStr & | _Str | ||
) | [inline] |
THtmlTok::THtmlTok | ( | const THtmlLxSym & | _Sym, |
const TStr & | _Str, | ||
const THtmlLx::TArgNmValV & | _ArgNmValV | ||
) | [inline] |
THtmlTok::THtmlTok | ( | TSIn & | ) | [inline] |
TStr THtmlTok::GetArg | ( | const TStr & | ArgNm | ) | const [inline] |
Definition at line 207 of file html.h.
Referenced by IsRedirUrlTok(), and IsUrlTok().
{ return ArgNmValV[ArgNmValV.SearchForw(TStrKd(ArgNm))].Dat;}
TStr THtmlTok::GetArg | ( | const TStr & | ArgNm, |
const TStr & | DfArgVal | ||
) | const [inline] |
TStr THtmlTok::GetFullStr | ( | ) | const |
Definition at line 628 of file html.cpp.
References ArgNmValV, GetStr(), TStr::GetSubStr(), hsyBTag, hsyETag, TStr::Len(), TVec< TVal, TSizeTy >::Len(), Str, and Sym.
Referenced by SaveTxt().
{ if ((Sym==hsyBTag)&&(ArgNmValV.Len()>0)){ TChA FullChA; FullChA+=Str.GetSubStr(0, Str.Len()-2); for (int ArgNmValN=0; ArgNmValN<ArgNmValV.Len(); ArgNmValN++){ FullChA+=' '; FullChA+=ArgNmValV[ArgNmValN].Key; FullChA+='='; FullChA+='"'; FullChA+=ArgNmValV[ArgNmValN].Dat; FullChA+='"'; } FullChA+='>'; return FullChA; } else if (Sym==hsyETag){ TChA FullChA; FullChA+='<'; FullChA+='/'; FullChA+=Str.GetSubStr(1, Str.Len()-1); return FullChA; } else { return GetStr(); } }
PHtmlTok THtmlTok::GetHTok | ( | const bool & | IsBTag, |
const int & | HTagN | ||
) | [static] |
Definition at line 762 of file html.cpp.
References Fail, H1TagNm, H2TagNm, H3TagNm, H4TagNm, H5TagNm, H6TagNm, hsyBTag, hsyETag, and THtmlTok().
Referenced by THtmlHldV::THtmlHldV().
{ THtmlLxSym HTagSym=IsBTag?hsyBTag:hsyETag; TStr HTagNm; switch (HTagN){ case 1: HTagNm=H1TagNm; break; case 2: HTagNm=H2TagNm; break; case 3: HTagNm=H3TagNm; break; case 4: HTagNm=H4TagNm; break; case 5: HTagNm=H5TagNm; break; case 6: HTagNm=H6TagNm; break; default: Fail; } return PHtmlTok(new THtmlTok(HTagSym, HTagNm)); }
TStr THtmlTok::GetStr | ( | ) | const [inline] |
Definition at line 203 of file html.h.
Referenced by GetFullStr(), IsRedirUrlTok(), and IsUrlTok().
{return Str;}
THtmlLxSym THtmlTok::GetSym | ( | ) | const [inline] |
Definition at line 202 of file html.h.
Referenced by IsRedirUrlTok(), and IsUrlTok().
{return Sym;}
bool THtmlTok::IsArg | ( | const TStr & | ArgNm | ) | const [inline] |
Definition at line 205 of file html.h.
Referenced by IsRedirUrlTok(), and IsUrlTok().
{ return ArgNmValV.SearchForw(TStrKd(ArgNm))!=-1;}
bool THtmlTok::IsBreakTag | ( | const TStr & | TagNm | ) | [static] |
Definition at line 726 of file html.cpp.
References THash< TKey, TDat, THashFunc >::AddKey(), THash< TKey, TDat, THashFunc >::IsKey(), and THash< TKey, TDat, THashFunc >::Len().
Referenced by IsBreakTok().
{ static TStrH BreakTagNmH(50); if (BreakTagNmH.Len()==0){ BreakTagNmH.AddKey(TStr("<H1>")); BreakTagNmH.AddKey(TStr("<H2>")); BreakTagNmH.AddKey(TStr("<H3>")); BreakTagNmH.AddKey(TStr("<H4>")); BreakTagNmH.AddKey(TStr("<H5>")); BreakTagNmH.AddKey(TStr("<H6>")); BreakTagNmH.AddKey(TStr("<BR>")); BreakTagNmH.AddKey(TStr("<HR>")); BreakTagNmH.AddKey(TStr("<P>")); BreakTagNmH.AddKey(TStr("<DL>")); BreakTagNmH.AddKey(TStr("<UL>")); BreakTagNmH.AddKey(TStr("<OL>")); BreakTagNmH.AddKey(TStr("<LI>")); BreakTagNmH.AddKey(TStr("<DT>")); BreakTagNmH.AddKey(TStr("<DD>")); BreakTagNmH.AddKey(TStr("<HEAD>")); BreakTagNmH.AddKey(TStr("<TITLE>")); BreakTagNmH.AddKey(TStr("<META>")); BreakTagNmH.AddKey(TStr("<SCRIPT>")); BreakTagNmH.AddKey(TStr("<HEAD>")); BreakTagNmH.AddKey(TStr("<BODY>")); } return BreakTagNmH.IsKey(TagNm); }
bool THtmlTok::IsBreakTok | ( | const PHtmlTok & | Tok | ) | [static] |
Definition at line 744 of file html.cpp.
References hsyBTag, hsyETag, and IsBreakTag().
Referenced by THtmlHldV::THtmlHldV().
{ if ((Tok->GetSym()==hsyBTag)||(Tok->GetSym()==hsyETag)){ return IsBreakTag(Tok->GetStr()); } else { return false; } }
bool THtmlTok::IsHTag | ( | const TStr & | TagNm, |
int & | HTagN | ||
) | [static] |
Definition at line 752 of file html.cpp.
References TStr::Len().
Referenced by THtmlHldV::THtmlHldV().
{ if ((TagNm.Len()==4)&&(TagNm[0]=='<')&&(TagNm[1]=='H')&&(TagNm[3]=='>')){ char Ch=TagNm[2]; if (('1'<=Ch)&&(Ch<='6')){HTagN=Ch-'0'; return true;} else {HTagN=-1; return false;} } else { HTagN=-1; return false; } }
bool THtmlTok::IsRedirUrlTok | ( | ) | const |
Definition at line 676 of file html.cpp.
References GetArg(), GetStr(), GetSym(), TStr::GetUc(), hsyBTag, HttpEquivArgNm, IsArg(), and MetaTagNm.
{ if (GetSym()==hsyBTag){ TStr TagNm=GetStr(); if ((TagNm==MetaTagNm)&&(IsArg(HttpEquivArgNm))){ TStr HttpEquivArgVal=GetArg(HttpEquivArgNm).GetUc(); if ((HttpEquivArgVal=="REFRESH")&&IsArg("CONTENT")){ return true; } else { return false; } } } return false; }
bool THtmlTok::IsUrlTok | ( | TStr & | RelUrlStr | ) | const |
Definition at line 648 of file html.cpp.
References AreaTagNm, ATagNm, TStr::Empty(), FrameTagNm, GetArg(), GetStr(), TStr::GetSubStr(), GetSym(), TStr::GetUc(), HRefArgNm, hsyBTag, HttpEquivArgNm, ImgTagNm, IsArg(), TStr::Len(), MetaTagNm, TStr::SplitOnStr(), and SrcArgNm.
{ if (GetSym()==hsyBTag){ TStr TagNm=GetStr(); if ((TagNm==ATagNm)&&(IsArg(HRefArgNm))){ RelUrlStr=GetArg(HRefArgNm); return true;} else if ((TagNm==AreaTagNm)&&(IsArg(HRefArgNm))){ RelUrlStr=GetArg(HRefArgNm); return true;} else if ((TagNm==FrameTagNm)&&(IsArg(SrcArgNm))){ RelUrlStr=GetArg(SrcArgNm); return true;} else if ((TagNm==ImgTagNm)&&(IsArg(SrcArgNm))){ RelUrlStr=GetArg(SrcArgNm); return true;} else if ((TagNm==MetaTagNm)&&(IsArg(HttpEquivArgNm))){ TStr HttpEquivArgVal=GetArg(HttpEquivArgNm).GetUc(); if ((HttpEquivArgVal=="REFRESH")&&IsArg("CONTENT")){ TStr ContentStr=GetArg("CONTENT"); TStr LeftStr; TStr RightStr; TStr UrlEqStr="URL="; ContentStr.GetUc().SplitOnStr(LeftStr, UrlEqStr, RightStr); RelUrlStr=ContentStr.GetSubStr( LeftStr.Len()+UrlEqStr.Len(), ContentStr.Len()); return !RelUrlStr.Empty(); } else { return false; } } } return false; }
static PHtmlTok THtmlTok::Load | ( | TSIn & | ) | [inline, static] |
void THtmlTok::Save | ( | TSOut & | ) | [inline] |
void THtmlTok::SaveTxt | ( | const PSOut & | SOut, |
const bool & | TxtMode = true |
||
) |
Definition at line 691 of file html.cpp.
References GetFullStr(), THtmlLx::GetSymStr(), TSOut::PutStr(), and Sym.
{ if (TxtMode){ SOut->PutStr(GetFullStr()); SOut->PutStr(" "); } else { SOut->PutStr(THtmlLx::GetSymStr(Sym)); SOut->PutStr(" "); SOut->PutStr(GetFullStr()); SOut->PutStr(" "); } }
const TStr THtmlTok::AltArgNm = "ALT" [static] |
const TStr THtmlTok::AreaTagNm = "<AREA>" [static] |
Definition at line 218 of file html.h.
Referenced by IsUrlTok().
THtmlLx::TArgNmValV THtmlTok::ArgNmValV [private] |
Definition at line 186 of file html.h.
Referenced by GetFullStr().
const TStr THtmlTok::ATagNm = "<A>" [static] |
Definition at line 217 of file html.h.
Referenced by IsUrlTok().
const TStr THtmlTok::BrTagNm = "<BR>" [static] |
const TStr THtmlTok::CardTagNm = "<CARD>" [static] |
const TStr THtmlTok::CenterTagNm = "<CENTER>" [static] |
TCRef THtmlTok::CRef [private] |
const TStr THtmlTok::FrameTagNm = "<FRAME>" [static] |
Definition at line 222 of file html.h.
Referenced by IsUrlTok().
const TStr THtmlTok::H1TagNm = "<H1>" [static] |
const TStr THtmlTok::H2TagNm = "<H2>" [static] |
const TStr THtmlTok::H3TagNm = "<H3>" [static] |
const TStr THtmlTok::H4TagNm = "<H4>" [static] |
const TStr THtmlTok::H5TagNm = "<H5>" [static] |
const TStr THtmlTok::H6TagNm = "<H6>" [static] |
const TStr THtmlTok::HRefArgNm = "HREF" [static] |
Definition at line 238 of file html.h.
Referenced by IsUrlTok().
const TStr THtmlTok::HttpEquivArgNm = "HTTP-EQUIV" [static] |
Definition at line 241 of file html.h.
Referenced by IsRedirUrlTok(), and IsUrlTok().
const TStr THtmlTok::ImgTagNm = "<IMG>" [static] |
Definition at line 229 of file html.h.
Referenced by IsUrlTok().
const TStr THtmlTok::LiTagNm = "<LI>" [static] |
const TStr THtmlTok::MetaTagNm = "<META>" [static] |
Definition at line 231 of file html.h.
Referenced by IsRedirUrlTok(), and IsUrlTok().
const TStr THtmlTok::PTagNm = "<P>" [static] |
const TStr THtmlTok::SrcArgNm = "SRC" [static] |
Definition at line 239 of file html.h.
Referenced by IsUrlTok().
TStr THtmlTok::Str [private] |
Definition at line 185 of file html.h.
Referenced by GetFullStr().
THtmlLxSym THtmlTok::Sym [private] |
Definition at line 184 of file html.h.
Referenced by GetFullStr(), and SaveTxt().
const TStr THtmlTok::TitleArgNm = "TITLE" [static] |
const TStr THtmlTok::TitleETagNm = "</TITLE>" [static] |
const TStr THtmlTok::TitleTagNm = "<TITLE>" [static] |
const TStr THtmlTok::UlTagNm = "<UL>" [static] |