SNAP Library , User Reference
2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
|
#include <html.h>
Public Types | |
typedef TStrKdV | TArgNmValV |
Public Member Functions | |
THtmlLx (const PSIn &_SIn, const bool &_DoParseArg=true) | |
THtmlLx & | operator= (const THtmlLx &) |
void | PutCh (const char &_Ch) |
void | PutStr (const TStr &Str) |
THtmlLxSym | GetSym () |
PHtmlTok | GetTok (const bool &DoUc=true) |
TStr | GetPreSpaceStr () const |
int | GetArgs () const |
TStr | GetArgNm (const int &ArgN) const |
TStr | GetArgVal (const int &ArgN) const |
bool | IsArg (const TStr &ArgNm) const |
TStr | GetArg (const TStr &ArgNm, const TStr &DfArgVal=TStr()) const |
void | PutArg (const TStr &ArgNm, const TStr &ArgVal) |
TStr | GetFullBTagStr () const |
void | MoveToStrOrEof (const TStr &Str) |
void | MoveToBTagOrEof (const TStr &TagNm) |
void | MoveToBTag2OrEof (const TStr &TagNm1, const TStr &TagNm2) |
void | MoveToBTag3OrEof (const TStr &TagNm1, const TStr &TagNm2, const TStr &TagNm3) |
void | MoveToBTagOrETagOrEof (const TStr &BTagNm, const TStr &ETagNm) |
void | MoveToBTagArgOrEof (const TStr &TagNm, const TStr &ArgNm, const TStr &ArgVal) |
void | MoveToBTagArg2OrEof (const TStr &TagNm, const TStr &ArgNm1, const TStr &ArgVal1, const TStr &ArgNm2, const TStr &ArgVal2, const bool &AndOpP=true) |
void | MoveToBTagOrEof (const TStr &TagNm1, const TStr &ArgNm1, const TStr &ArgVal1, const TStr &TagNm2, const TStr &ArgNm2, const TStr &ArgVal2) |
void | MoveToETagOrEof (const TStr &TagNm) |
TStr | GetTextOnlyStrToEof () |
TStr | GetStrToBTag (const TStr &TagNm, const bool &TxtOnlyP=false) |
TStr | GetStrToBTag (const TStr &TagNm, const TStr &ArgNm, const TStr &ArgVal, const bool &TxtOnlyP=false) |
TStr | GetStrToETag (const TStr &TagNm, const bool &TxtOnlyP=false) |
TStr | GetStrToETag2 (const TStr &TagNm1, const TStr &TagNm2, const bool &TxtOnlyP=false) |
TStr | GetStrInTag (const TStr &TagNm, const bool &TxtOnlyP=false) |
TStr | GetHRefBeforeStr (const TStr &Str) |
bool | IsGetBTag (const TStr &TagNm) |
bool | IsGetETag (const TStr &TagNm) |
Static Public Member Functions | |
static TStr | GetSymStr (const THtmlLxSym &Sym) |
static TStr | GetEscapedStr (const TChA &ChA) |
static TStr | GetAsciiStr (const TChA &ChA, const char &GenericCh='_') |
static void | GetTokStrV (const TStr &Str, TStrV &TokStrV) |
static TStr | GetNoTag (const TStr &Str) |
Public Attributes | |
THtmlLxSym | Sym |
int | SymBChX |
int | SymEChX |
TChA | ChA |
TChA | UcChA |
TChA | SymChA |
int | PreSpaces |
TChA | PreSpaceChA |
TArgNmValV | ArgNmValV |
Private Member Functions | |
void | GetCh () |
void | GetEscCh () |
void | GetMetaTag () |
void | GetTag () |
Private Attributes | |
PSIn | SIn |
TSIn & | RSIn |
bool | DoParseArg |
TChA | ChStack |
char | Ch |
int | ChX |
bool | EscCh |
TChA | EscChA |
TChA | ArgNm |
TChA | ArgVal |
Static Private Attributes | |
static THtmlLxChDef | ChDef |
typedef TStrKdV THtmlLx::TArgNmValV |
THtmlLx::THtmlLx | ( | const PSIn & | _SIn, |
const bool & | _DoParseArg = true |
||
) | [inline] |
TStr THtmlLx::GetArg | ( | const TStr & | ArgNm, |
const TStr & | DfArgVal = TStr() |
||
) | const [inline] |
TStr THtmlLx::GetArgNm | ( | const int & | ArgN | ) | const [inline] |
int THtmlLx::GetArgs | ( | ) | const [inline] |
TStr THtmlLx::GetArgVal | ( | const int & | ArgN | ) | const [inline] |
TStr THtmlLx::GetAsciiStr | ( | const TChA & | ChA, |
const char & | GenericCh = '_' |
||
) | [static] |
void THtmlLx::GetCh | ( | ) | [inline, private] |
TStr THtmlLx::GetEscapedStr | ( | const TChA & | ChA | ) | [static] |
Definition at line 568 of file html.cpp.
{ TChA EscapedChA; for (int ChN=0; ChN<ChA.Len(); ChN++){ char Ch=ChA[ChN]; switch (Ch){ case '"': EscapedChA+="""; break; case '&': EscapedChA+="&"; break; case '\'': EscapedChA+="'"; break; case '<': EscapedChA+="<"; break; case '>': EscapedChA+=">"; break; default: EscapedChA+=Ch; } } return EscapedChA; }
void THtmlLx::GetEscCh | ( | ) | [private] |
Definition at line 195 of file html.cpp.
{ GetCh(); EscCh=(Ch=='&'); if (EscCh){ EscChA.Clr(); EscChA.AddCh(Ch); GetCh(); if (Ch=='#'){ EscChA.AddCh(Ch); GetCh(); if (('0'<=Ch)&&(Ch<='9')){ do {EscChA.AddCh(Ch); GetCh();} while (('0'<=Ch)&&(Ch<='9')); if (Ch==';'){GetCh();} PutStr(ChDef.GetEscStr(EscChA)); } else { PutCh('#'); PutCh('&'); } } else if ((('a'<=Ch)&&(Ch<='z'))||(('A'<=Ch)&&(Ch<='Z'))){ do { EscChA.AddCh(Ch); GetCh(); } while ((('A'<=Ch)&&(Ch<='Z'))||(('a'<=Ch)&&(Ch<='z'))||(('0'<=Ch)&&(Ch<='9'))); if (Ch==';'){ GetCh(); PutStr(ChDef.GetEscStr(EscChA)); } else { PutStr(EscChA); } } else { PutCh('&'); } } }
TStr THtmlLx::GetFullBTagStr | ( | ) | const |
TStr THtmlLx::GetHRefBeforeStr | ( | const TStr & | Str | ) |
void THtmlLx::GetMetaTag | ( | ) | [private] |
TStr THtmlLx::GetNoTag | ( | const TStr & | Str | ) | [static] |
TStr THtmlLx::GetPreSpaceStr | ( | ) | const [inline] |
Definition at line 132 of file html.h.
{ return TStr::GetSpaceStr(PreSpaces);}
TStr THtmlLx::GetStrInTag | ( | const TStr & | TagNm, |
const bool & | TxtOnlyP = false |
||
) |
Definition at line 525 of file html.cpp.
{ MoveToBTagOrEof(TagNm); return GetStrToETag(TagNm, TxtOnlyP); }
TStr THtmlLx::GetStrToBTag | ( | const TStr & | TagNm, |
const bool & | TxtOnlyP = false |
||
) |
TStr THtmlLx::GetStrToBTag | ( | const TStr & | TagNm, |
const TStr & | ArgNm, | ||
const TStr & | ArgVal, | ||
const bool & | TxtOnlyP = false |
||
) |
TStr THtmlLx::GetStrToETag | ( | const TStr & | TagNm, |
const bool & | TxtOnlyP = false |
||
) |
TStr THtmlLx::GetStrToETag2 | ( | const TStr & | TagNm1, |
const TStr & | TagNm2, | ||
const bool & | TxtOnlyP = false |
||
) |
Definition at line 277 of file html.cpp.
{ // prepare symbol descriptions ChA.Clr(); UcChA.Clr(); PreSpaces=0; PreSpaceChA.Clr(); ArgNmValV.Clr(); // skip white-space while (ChDef.IsSpace(Ch)){ if (ChX>0){PreSpaceChA+=Ch; PreSpaces++;} GetEscCh();} // parse symbol SymChA.Clr(); SymChA+=Ch; SymBChX=ChX; switch (ChDef.GetChTy(Ch)){ case hlctAlpha: Sym=hsyStr; forever{ do { ChA.AddCh(Ch); UcChA.AddCh(ChDef.GetUc(Ch)); GetEscCh(); } while (ChDef.IsAlNum(Ch)); if (Ch=='.'){ GetCh(); if (ChDef.IsAlNum(Ch)){ChA.AddCh('.'); UcChA.AddCh('.');} else {PutCh(Ch); Ch='.'; break;} } else {break;} } break; case hlctNum: Sym=hsyNum; forever{ do { ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh(); } while (ChDef.IsNum(Ch)); if (Ch=='.'){ GetCh(); if (ChDef.IsAlNum(Ch)){ChA.AddCh('.'); UcChA.AddCh('.');} else {PutCh(Ch); Ch='.'; break;} } else if (ChDef.IsAlpha(Ch)){ Sym=hsyStr; } else { break; } } break; case hlctSym: Sym=hsySSym; ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh(); if ((ChA.LastCh()=='.')&&(ChDef.IsAlNum(Ch))){ Sym=hsyStr; do { ChA.AddCh(Ch); UcChA.AddCh(ChDef.GetUc(Ch)); GetEscCh(); } while (ChDef.IsAlNum(Ch)); } break; case hlctLTag: if (EscCh){ Sym=hsySSym; ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh(); } else { GetCh(); if (Ch=='!'){GetCh(); GetMetaTag();} else {GetTag();} } break; case hlctRTag: if (EscCh){ Sym=hsySSym; ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh(); } else { Sym=hsySSym; ChA.AddCh(Ch); UcChA.AddCh(Ch); GetEscCh(); } break; case hlctEof: Sym=hsyEof; break; default: Sym=hsyUndef; GetEscCh(); } // set symbol last-character-position SymEChX=ChX-1; // delete last character if (!SymChA.Empty()){SymChA.Pop();} // return symbol return Sym; }
TStr THtmlLx::GetSymStr | ( | const THtmlLxSym & | Sym | ) | [static] |
Definition at line 553 of file html.cpp.
{ switch (Sym){ case hsyUndef: return "Undef"; case hsyStr: return "Str"; case hsyNum: return "Num"; case hsySSym: return "SSym"; case hsyUrl: return "Url"; case hsyBTag: return "BTag"; case hsyETag: return "ETag"; case hsyMTag: return "MTag"; case hsyEof: return "Eof"; default: Fail; return TStr(); } }
void THtmlLx::GetTag | ( | ) | [private] |
Definition at line 236 of file html.cpp.
{ if (Ch=='/'){Sym=hsyETag; GetCh();} else {Sym=hsyBTag;} UcChA.AddCh('<'); while (ChDef.IsAlNum(Ch)||(Ch==':')){ UcChA.AddCh(ChDef.GetUc(Ch)); GetCh();} UcChA.AddCh('>'); ChA=UcChA; if (DoParseArg){ while ((Ch!='>')&&(Ch!=TCh::EofCh)){ while ((!ChDef.IsAlpha(Ch))&&(Ch!='>')&&(Ch!=TCh::EofCh)){GetCh();} if (ChDef.IsAlpha(Ch)){ ArgNm.Clr(); ArgVal.Clr(); while (ChDef.IsAlNum(Ch)||(Ch=='-')){ArgNm.AddCh(ChDef.GetUc(Ch)); GetCh();} while (ChDef.IsWs(Ch)){GetCh();} if (Ch=='='){ GetCh(); while (ChDef.IsWs(Ch)){GetCh();} if (Ch=='"'){ GetCh(); while ((Ch!=TCh::EofCh)&&(Ch!='"')&&(Ch!='>')){ if (!ChDef.IsEoln(Ch)){ArgVal.AddCh(Ch);} GetCh();} if (Ch=='"'){GetCh();} } else if (Ch=='\''){ GetCh(); while ((Ch!=TCh::EofCh)&&(Ch!='\'')&&(Ch!='>')){ if (!ChDef.IsEoln(Ch)){ArgVal.AddCh(Ch);} GetCh();} if (Ch=='\''){GetCh();} } else { while ((!ChDef.IsWs(Ch))&&(Ch!='>')&&(Ch!=TCh::EofCh)){ ArgVal.AddCh(Ch); GetCh();} } ArgNmValV.Add(TStrKd(ArgNm, ArgVal)); } } } } else { while ((Ch!='>')&&(Ch!=TCh::EofCh)){GetCh();} } if (Ch!=TCh::EofCh){GetEscCh();} }
PHtmlTok THtmlLx::GetTok | ( | const bool & | DoUc = true | ) |
void THtmlLx::GetTokStrV | ( | const TStr & | Str, |
TStrV & | TokStrV | ||
) | [static] |
bool THtmlLx::IsArg | ( | const TStr & | ArgNm | ) | const [inline] |
bool THtmlLx::IsGetBTag | ( | const TStr & | TagNm | ) |
bool THtmlLx::IsGetETag | ( | const TStr & | TagNm | ) |
void THtmlLx::MoveToBTag2OrEof | ( | const TStr & | TagNm1, |
const TStr & | TagNm2 | ||
) |
void THtmlLx::MoveToBTag3OrEof | ( | const TStr & | TagNm1, |
const TStr & | TagNm2, | ||
const TStr & | TagNm3 | ||
) |
void THtmlLx::MoveToBTagArg2OrEof | ( | const TStr & | TagNm, |
const TStr & | ArgNm1, | ||
const TStr & | ArgVal1, | ||
const TStr & | ArgNm2, | ||
const TStr & | ArgVal2, | ||
const bool & | AndOpP = true |
||
) |
Definition at line 410 of file html.cpp.
{ forever { GetSym(); if (Sym==hsyEof){break;} if (AndOpP){ if ((Sym==hsyBTag)&&(UcChA==TagNm)&& (IsArg(ArgNm1))&&(GetArg(ArgNm1)==ArgVal1)&& (IsArg(ArgNm2))&&(GetArg(ArgNm2)==ArgVal2)){break;} } else { if ((Sym==hsyBTag)&&(UcChA==TagNm)&& (((IsArg(ArgNm1))&&(GetArg(ArgNm1)==ArgVal1))|| ((IsArg(ArgNm2))&&(GetArg(ArgNm2)==ArgVal2)))){break;} } } }
void THtmlLx::MoveToBTagArgOrEof | ( | const TStr & | TagNm, |
const TStr & | ArgNm, | ||
const TStr & | ArgVal | ||
) |
void THtmlLx::MoveToBTagOrEof | ( | const TStr & | TagNm | ) |
void THtmlLx::MoveToBTagOrETagOrEof | ( | const TStr & | BTagNm, |
const TStr & | ETagNm | ||
) |
void THtmlLx::MoveToETagOrEof | ( | const TStr & | TagNm | ) |
void THtmlLx::MoveToStrOrEof | ( | const TStr & | Str | ) |
void THtmlLx::PutArg | ( | const TStr & | ArgNm, |
const TStr & | ArgVal | ||
) | [inline] |
void THtmlLx::PutCh | ( | const char & | _Ch | ) | [inline] |
void THtmlLx::PutStr | ( | const TStr & | Str | ) | [inline] |
TChA THtmlLx::ArgNm [private] |
TChA THtmlLx::ArgVal [private] |
char THtmlLx::Ch [private] |
THtmlLxChDef THtmlLx::ChDef [static, private] |
TChA THtmlLx::ChStack [private] |
int THtmlLx::ChX [private] |
bool THtmlLx::DoParseArg [private] |
bool THtmlLx::EscCh [private] |
TChA THtmlLx::EscChA [private] |
TSIn& THtmlLx::RSIn [private] |
PSIn THtmlLx::SIn [private] |
int THtmlLx::SymBChX |
int THtmlLx::SymEChX |