SNAP Library , User Reference  2013-01-07 14:03:36
SNAP, a general purpose, high performance system for analysis and manipulation of large networks
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
TStrUtil Class Reference

#include <util.h>

List of all members.

Static Public Member Functions

static TChAGetXmlTagVal (TXmlLx &XmlLx, const TChA &TagNm)
static void GetXmlTagNmVal (TXmlLx &XmlLx, TChA &TagNm, TChA &TagVal)
static bool GetXmlTagNmVal2 (TXmlLx &XmlLx, TChA &TagNm, TChA &TagVal, const bool &TakeTagNms)
static TChA GetDomNm (const TChA &UrlChA)
static TChA GetDomNm2 (const TChA &UrlChA)
static TChA GetWebsiteNm (const TChA &UrlChA)
static bool GetNormalizedUrl (const TChA &UrlIn, const TChA &BaseUrl, TChA &UrlOut)
 Quick URL nomalization: Remove ending /, /index.html, etc. and strip starting www.
static bool StripEnd (const TChA &Str, const TChA &SearchStr, TChA &NewStr)
static TChA GetShorStr (const TChA &LongStr, const int MaxLen=50)
static TChA GetCleanStr (const TChA &ChA)
static TChA GetCleanWrdStr (const TChA &ChA)
static int CountWords (const char *CStr)
static int CountWords (const TChA &ChA)
static int CountWords (const TChA &ChA, const TStrHash< TInt > &StopWordH)
static int SplitWords (TChA &ChA, TVec< char * > &WrdV, const bool &SplitOnWs=true)
static int SplitOnCh (TChA &ChA, TVec< char * > &WrdV, const char &Ch, const bool &SkipEmpty=false)
static int SplitLines (TChA &ChA, TVec< char * > &LineV, const bool &SkipEmpty=false)
static int SplitSentences (TChA &ChA, TVec< char * > &SentenceV)
static void RemoveHtmlTags (const TChA &HtmlStr, TChA &TextStr)
static bool IsLatinStr (const TChA &Str, const double &MinAlFrac)
static void GetWIdV (const TStrHash< TInt > &StrH, const char *CStr, TIntV &WIdV)
static void GetAddWIdV (TStrHash< TInt > &StrH, const char *CStr, TIntV &WIdV)
static bool GetTmFromStr (const char *TmStr, TSecTm &Tm)
 Parses time in many different text formats. See source code for details.
static TStr GetStdName (TStr AuthorName)
 Puts person's name (fist middle last) in a standard form: <last_name>_<first name innitial>
static void GetStdNameV (TStr AuthorNames, TStrV &StdNameV)
 Splits a list of people's names.

Detailed Description

String helper functions and utilities. Quick and ditry!

Definition at line 34 of file util.h.

Member Function Documentation

int TStrUtil::CountWords ( const char *  CStr) [static]

Definition at line 393 of file util.cpp.

  int WrdCnt = 1;
  for (const char *c = CStr; *c; c++) {
    if (TCh::IsWs(*c)) { WrdCnt++; }
  return WrdCnt;
int TStrUtil::CountWords ( const TChA ChA) [static]

Definition at line 389 of file util.cpp.

  return CountWords(ChA.CStr());
int TStrUtil::CountWords ( const TChA ChA,
const TStrHash< TInt > &  StopWordH 
) [static]

Definition at line 401 of file util.cpp.

  TChA Tmp;
  TVec<char *> WrdV;
  SplitWords(Tmp, WrdV);
  int SWordCnt = 0;
  for (int w = 0; w < WrdV.Len(); w++) {
    if (StopWordH.IsKey(WrdV[w])) { SWordCnt++; }
  return WrdV.Len() - SWordCnt;
void TStrUtil::GetAddWIdV ( TStrHash< TInt > &  StrH,
const char *  CStr,
) [static]

Definition at line 552 of file util.cpp.

  TChA ChA(CStr);
  TVec<char *> WrdV;
  TInt WId;
  TStrUtil::SplitWords(ChA, WrdV);
  for (int w = 0; w < WrdV.Len(); w++) {
TChA TStrUtil::GetCleanStr ( const TChA ChA) [static]

Definition at line 372 of file util.cpp.

  char *b = (char *) ChA.CStr();
  while (*b && ! TCh::IsAlNum(*b)) { b++; }
  if (*b == 0) { return TChA(); }
  TChA OutChA(ChA.Len());
  char *e = b;
  bool ws=false;
  while (*e) {
    while (*e && TCh::IsWs(*e)) { e++; ws=true; }
    if (! *e) { break; }
    if (ws) { OutChA.AddCh(' '); ws=false; }
  return OutChA;
TChA TStrUtil::GetCleanWrdStr ( const TChA ChA) [static]

Definition at line 350 of file util.cpp.

  char *b = (char *) ChA.CStr();
  while (*b && ! TCh::IsAlNum(*b)) { b++; }
  if (*b == 0) { return TChA(); }
  TChA OutChA(ChA.Len());
  char *e = b, tmp;
  while (*e) {
    b = e;
    while (*e && (TCh::IsAlNum(*e) || ((*e=='\'' || *e=='-') && TCh::IsAlNum(*(e+1))))) { e++; }
    if (b < e) {
      tmp = *e; *e=0;
      OutChA += b;  OutChA.AddCh(' ');
      *e = tmp;
    while (*e && ! TCh::IsAlNum(*e)) { e++; }
    if (! *e) { break; }
  OutChA.DelLastCh();  OutChA.ToLc();
  return OutChA;
TChA TStrUtil::GetDomNm ( const TChA UrlChA) [static]

Definition at line 187 of file util.cpp.

  int EndSlash = UrlChA.SearchCh('/', 7)-1; // skip starting http://
  if (EndSlash > 0) {
    const int BegSlash = UrlChA.SearchChBack('/', EndSlash);
    if (BegSlash > 0) { return UrlChA.GetSubStr(BegSlash+1, EndSlash).ToLc(); }
    else { return UrlChA.GetSubStr(0, UrlChA.SearchCh('/', 0)-1).ToLc(); }
  } else {
    if (UrlChA.IsPrefix("http://")) { return UrlChA.GetSubStr(7, UrlChA.Len()-1).ToLc(); }
    EndSlash = UrlChA.SearchCh('/', 0);
    if (EndSlash > 0) { return UrlChA.GetSubStr(0, EndSlash-1).ToLc(); }
    else { return TChA(UrlChA).ToLc(); }
TChA TStrUtil::GetDomNm2 ( const TChA UrlChA) [static]

Definition at line 201 of file util.cpp.

  TChA Dom = GetDomNm(UrlChA);
  if (Dom.IsPrefix("www.")) { return Dom.GetSubStr(4, TInt::Mx); }
  else { return Dom; }
bool TStrUtil::GetNormalizedUrl ( const TChA UrlIn,
const TChA BaseUrl,
TChA UrlOut 
) [static]

Quick URL nomalization: Remove ending /, /index.html, etc. and strip starting www.

Definition at line 306 of file util.cpp.

  UrlOut = UrlIn;
  if (StripEnd(UrlIn, "/", UrlOut)) {}
  else if (StripEnd(UrlIn, "/index.html", UrlOut)) {}
  else if (StripEnd(UrlIn, "/index.htm", UrlOut)) {}
  else if (StripEnd(UrlIn, "/index.php", UrlOut)) {}
  if (! (UrlOut.IsPrefix("http://") || UrlOut.IsPrefix("ftp://"))) {
    // if UrlIn is relative url, try combine it with BaseUrl
    if (UrlIn.Empty() || ! (BaseUrl.IsPrefix("http://") || BaseUrl.IsPrefix("ftp://"))) {
      //printf("** Bad URL: base:'%s' url:'%s'\n", BaseUrl.CStr(), UrlIn.CStr());
      return false; }
    TChA Out;
    if (! GetNormalizedUrl(BaseUrl, TChA(), Out)) { return false; }
    if (UrlIn[0] != '/') { Out.AddCh('/'); }
    Out += UrlOut;
    UrlOut = Out;
  // http://www. --> http://
  if (UrlOut.IsPrefix("http://www.")) {
    UrlOut = TChA("http://") + UrlOut.GetSubStr(11, TInt::Mx);
  return true;
TChA TStrUtil::GetShorStr ( const TChA LongStr,
const int  MaxLen = 50 
) [static]

Definition at line 342 of file util.cpp.

  if (LongStr.Len() < MaxLen) { return LongStr; }
  TChA Str = LongStr.GetSubStr(0, MaxLen-1);
  Str += "...";
  return Str;
TStr TStrUtil::GetStdName ( TStr  AuthorName) [static]

Puts person's name (fist middle last) in a standard form: <last_name>_<first name innitial>

Definition at line 621 of file util.cpp.

  TStr StdName;
  AuthorName.ChangeChAll('\n', ' ');
  AuthorName.ChangeChAll('.', ' ');
  // if there is a number in the name, remove it and everything after it
  int i, pos = 0;
  while (pos<AuthorName.Len() && (AuthorName[pos]!='#' && !TCh::IsNum(AuthorName[pos]))) {
    pos++; }
  if (pos < AuthorName.Len()) {
    AuthorName = AuthorName.GetSubStr(0, pos-1).ToTrunc(); }
  if (AuthorName.Empty()) { return TStr::GetNullStr(); }

  // replace everything after '('
  int b = AuthorName.SearchCh('(');
  if (b != -1) {
    AuthorName = AuthorName.GetSubStr(0, b-1).ToTrunc(); }
  // skip if contains ')'
  if (AuthorName .SearchCh(')')!=-1) { return TStr::GetNullStr(); }
  // skip if it is not a name
  if (AuthorName .SearchStr("figures")!=-1 || AuthorName .SearchStr("macros")!=-1
   || AuthorName .SearchStr("univ")!=-1 || AuthorName .SearchStr("institute")!=-1) {
    return TStr::GetNullStr();
  // remove all non-letters (latex tags, ...)
  TChA NewName;
  for (i = 0; i < AuthorName.Len(); i++) {
    const char Ch = AuthorName[i];
    if (TCh::IsAlpha(Ch) || TCh::IsWs(Ch) || Ch=='-') { NewName += Ch; }
  StdName = NewName;  StdName.ToTrunc();
  TStrV AuthNmV; StdName.SplitOnWs(AuthNmV);
  // too short -- not a name
  if (! AuthNmV.Empty() && AuthNmV.Last() == "jr") AuthNmV.DelLast();
  if (AuthNmV.Len() < 2) return TStr::GetNullStr();

  const TStr LastNm = AuthNmV.Last();
  if (! TCh::IsAlpha(LastNm[0]) || LastNm.Len() == 1) return TStr::GetNullStr();

  return TStr::Fmt("%s_%c", LastNm.CStr(), AuthNmV[0][0]);
void TStrUtil::GetStdNameV ( TStr  AuthorNames,
TStrV StdNameV 
) [static]

Splits a list of people's names.

Definition at line 664 of file util.cpp.

  AuthorNames.ChangeChAll('\n', ' ');
  // split into author names
  TStrV AuthV, TmpV, Tmp2V;
  // split on 'and'
  AuthorNames.SplitOnStr(" and ", TmpV);
  int i;
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh(',', Tmp2V);  AuthV.AddV(Tmp2V); }
  // split on '&'
  TmpV = AuthV;  AuthV.Clr();
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh('&', Tmp2V);  AuthV.AddV(Tmp2V); }
  // split on ','
  TmpV = AuthV;  AuthV.Clr();
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh(',', Tmp2V);  AuthV.AddV(Tmp2V); }
  // split on ';'
  TmpV = AuthV;  AuthV.Clr();
  for (i = 0; i < TmpV.Len(); i++) {
    TmpV[i].SplitOnAllCh(';', Tmp2V);  AuthV.AddV(Tmp2V); }
  // standardize names
  //printf("\n*** %s\n", AuthorNames.CStr());
  for (i = 0; i < AuthV.Len(); i++) {
    TStr StdName = GetStdName(AuthV[i]);
    if (! StdName.Empty()) {
      //printf("\t%s  ==>  %s\n", AuthV[i].CStr(), StdName.CStr());
bool TStrUtil::GetTmFromStr ( const char *  TmStr,
TSecTm Tm 
) [static]

Parses time in many different text formats. See source code for details.

Definition at line 571 of file util.cpp.

  static TStrV MonthV1, MonthV2;
  if (MonthV1.Empty()) {
    TStr("january|february|march|april|may|june|july|august|september|october|november|december").SplitOnAllCh('|', MonthV1);
    TStr("jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec").SplitOnAllCh('|', MonthV2);
  TChA Tmp(TmStr);
  TVec<char *> WrdV;
  const char* End = Tmp.CStr()+Tmp.Len();
  int Col = -1, Cols=0;
  for (char *b = Tmp.CStr(); b <End; ) {
    while (*b && ! (*b==' ' || *b=='-' || *b==':' || *b==',')) { b++; }
    if (*b==':') { if(Col==-1) { Col=WrdV.Len(); } Cols++;  }
    *b=0; b++;
    while (*b && (*b==' ' || *b=='-' || *b==':' || *b==',')) { b++; }
  if (Cols == 2) {
    if (Col+1 >= WrdV.Len()) { return false; }
  if (Col<1) { return false; }
  const int Hr = atoi(WrdV[Col-1]);
  const int Min = atoi(WrdV[Col]);
  WrdV.Del(Col);  WrdV.Del(Col-1);
  if (WrdV.Len() != 3) { return false; }
  int y=0,m=1,d=2, Mon=-1;
  if (TCh::IsAlpha(WrdV[0][0])) {
    y=2; m=0; d=1;
  } else if (TCh::IsAlpha(WrdV[1][0])) {
    y=2; m=1; d=0;
  } else if (TCh::IsAlpha(WrdV[2][0])) {
    y=0; m=2; d=1;
  } else {
    y=0; m=1; d=2;
    Mon = atoi(WrdV[m]);
  int Day = atoi(WrdV[d]);
  if (Mon <= 0) { Mon = MonthV1.SearchForw(WrdV[m])+1; }
  if (Mon <= 0) { Mon = MonthV2.SearchForw(WrdV[m])+1; }
  if (Mon == 0) { return false; }
  int Year = atoi(WrdV[y]);
  if (Day > Year) { ::Swap(Day, Year); }
  //printf("%d-%02d-%02d  %02d:%02d\n", Year, Mon, Day, Hr, Min);
  Tm = TSecTm(Year, Mon, Day, Hr, Min, 0);
  return true;
TChA TStrUtil::GetWebsiteNm ( const TChA UrlChA) [static]

Definition at line 218 of file util.cpp.

  TChA DomNm = TStrUtil::GetDomNm2(PostUrlStr);
  if (DomNm == "") {
    return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 2, '&')-1);
  // For these websites take the domain name and 1st directory:
  if (DomNm=="" || DomNm=="" || DomNm=="" || DomNm=="" || DomNm==""
    || DomNm=="" || DomNm=="" || DomNm=="" || DomNm==""
    || DomNm=="" || DomNm=="" || DomNm=="" || DomNm==""
    || DomNm=="" || DomNm=="" || DomNm=="" || DomNm==""
    || DomNm=="" || DomNm=="" || DomNm==""
    || DomNm=="" || DomNm=="" || DomNm=="") {
      return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 4)-1);
  if (DomNm == "") {
    if (PostUrlStr.IsPrefix("")) {
      const int Url = PostUrlStr.SearchStr(";url=");
      if (Url != -1) {
        return GetWebsiteNm(PostUrlStr.GetSubStr(Url+5, PostUrlStr.SearchCh('&', Url+5))); }
    } else {
      return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 4)-1); }
  // For these websites take the domain name and 2 directories:
  if (PostUrlStr.IsPrefix("") || PostUrlStr.IsPrefix("")
    || PostUrlStr.IsPrefix("") || PostUrlStr.IsPrefix("")) {
    return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 5)-1);
  // ~3/361711640
  if (DomNm=="") {
    return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 5)-1);
  if (DomNm=="") {
    return PostUrlStr.GetSubStr(7, GetNthOccurence(PostUrlStr, 5)-1);
  if (DomNm=="") { // redirect
    const int UrlPos = PostUrlStr.SearchStr("&url=");
    if (UrlPos != -1) {
      return GetWebsiteNm(PostUrlStr.GetSubStr(UrlPos+5, PostUrlStr.SearchCh('&', UrlPos+5))); }
  if (DomNm == "") { // redirect
    const int Http2 = PostUrlStr.SearchStr("/http://");
    if (Http2!=-1) {
      return GetWebsiteNm(PostUrlStr.GetSubStr(Http2+1, PostUrlStr.Len()-1)); }
  if (DomNm.IsSuffix("")) {
    const int Http2 = PostUrlStr.SearchStr("/*");
    if (Http2!=-1) {
      return GetWebsiteNm(PostUrlStr.GetSubStr(Http2+9, PostUrlStr.Len()-1)); }
  return DomNm;
void TStrUtil::GetWIdV ( const TStrHash< TInt > &  StrH,
const char *  CStr,
) [static]

Definition at line 538 of file util.cpp.

  const int NotWId = -1;
  TChA ChA(CStr);
  TVec<char *> WrdV;
  TInt WId;
  TStrUtil::SplitWords(ChA, WrdV);
  for (int w = 0; w < WrdV.Len(); w++) {
    if (StrH.IsKeyGetDat(WrdV[w], WId)) { WIdV.Add(WId); }
    else { WIdV.Add(NotWId); }
void TStrUtil::GetXmlTagNmVal ( TXmlLx XmlLx,
TChA TagNm,
TChA TagVal 
) [static]

Definition at line 149 of file util.cpp.

  EAssertR(XmlLx.GetSym() == xsySTag, TagNm);
  TagNm = XmlLx.TagNm;
  const TXmlLxSym NextSym = XmlLx.GetSym();
  TagVal = XmlLx.TxtChA;
  if (NextSym == xsyStr) {
    EAssertR(XmlLx.GetSym() == xsyETag, TagNm);
  } else {
    EAssertR(NextSym == xsyETag, TagNm); // empty tag
    //printf("  token: %s empty! %s\n", XmlLx.TagNm.CStr(), XmlLx.GetFPosStr().CStr());
bool TStrUtil::GetXmlTagNmVal2 ( TXmlLx XmlLx,
TChA TagNm,
TChA TagVal,
const bool &  TakeTagNms 
) [static]

Definition at line 163 of file util.cpp.

  if (XmlLx.GetSym() != xsySTag) {
    return false; }
  TagNm = XmlLx.TagNm;
  //const TXmlLxSym NextSym = XmlLx.GetSym();
  while (XmlLx.Sym != xsyETag || XmlLx.TagNm != TagNm.CStr()) {
    if (TakeTagNms) {
      TagVal += XmlLx.TxtChA; }
    else if (XmlLx.Sym == xsyStr) {
      TagVal += XmlLx.TxtChA; }
  return true;
  //if (NextSym == xsyStr) {
  //  EAssertR(XmlLx.GetSym() == xsyETag, TagNm);
  //} else {
  //  EAssertR(NextSym == xsyETag, TagNm); // empty tag
  //  printf("  token: %s empty! %s\n", XmlLx.TagNm.CStr(), XmlLx.GetFPosStr().CStr());
TChA & TStrUtil::GetXmlTagVal ( TXmlLx XmlLx,
const TChA TagNm 
) [static]

Definition at line 132 of file util.cpp.

  static TChA TagVal;
  EAssertR(XmlLx.GetSym() == xsySTag, TagNm);
  EAssertR(TagNm == XmlLx.TagNm.CStr(), TagNm);
  const TXmlLxSym NextSym = XmlLx.GetSym();
  TagVal = XmlLx.TxtChA;
  if (NextSym == xsyStr) {
    EAssertR(XmlLx.GetSym() == xsyETag, TagNm);
  } else {
    EAssertR(NextSym == xsyETag, TagNm); // empty tag
    //printf("  token: %s empty! %s\n", XmlLx.TagNm.CStr(), XmlLx.GetFPosStr().CStr());
  EAssertR(XmlLx.TagNm == TagNm, TagNm);
  return TagVal;
bool TStrUtil::IsLatinStr ( const TChA Str,
const double &  MinAlFrac 
) [static]

Definition at line 527 of file util.cpp.

  int AlNumCnt=0, ChCnt=0;
  for (const char *c = Str.CStr(); *c; c++) {
    if (TCh::IsWs(*c)) { continue; }
    if (*c > 0 && TCh::IsAlNum(*c)) { AlNumCnt++; }
  if (double(AlNumCnt)/double(ChCnt) > MinAlFrac) { return true; }
  return false;
void TStrUtil::RemoveHtmlTags ( const TChA HtmlStr,
TChA TextStr 
) [static]

Definition at line 481 of file util.cpp.

  char *StrB, *StrE;
  // use full page html: skip till <body>
  //PageHtmlStr = "<script fdsfs>  fsdfsd </script> jure";
  /*if (UseFullHtml) {
    StrB = PageHtmlStr.CStr();
    StrE = StrB+PageHtmlStr.Len();
    char * NewB = strstr(StrB, "<body>");
    if (NewB != NULL) { StrB = NewB+6; }
    char * NewE = strstr(StrB, "body>");
    if (NewE != NULL) {
      while (true) {
        char *E=strstr(NewE+4, "body>");
        if (E == NULL) { break; }  NewE = E; }
      StrE = NewE;
  } else {  // only extracted post html*/
  StrB = (char *) HtmlStr.CStr();
  StrE = (char *) StrB+HtmlStr.Len(); //}
  for (char *e = StrB; e < StrE; ) {
    char* b = e;
    while (e<StrE && *e != '<') { e++; }
    // copy text
    char tmp=*e;  *e = 0;
    TextStr+= b; TextStr.AddCh(' ');  *e = tmp;
    if (e >= StrE) { return; }
    // if start of a comment: skip
    if (e[1]=='!' && e[2]=='-' && e[3]=='-') { // comment
      e += 3;
      while(e<StrE && !(*(e-2)=='-' && *(e-1)=='-' && *e=='>')) { e++; }
      e++;  continue;
    // if "<script" then skip
    if (e[1]=='s' && e[2]=='c' && e[3]=='r' && e[4]=='i' && e[5]=='p' && e[6]=='t') {
      e += 5;
      while(e<StrE && !(*(e-6)=='s' && *(e-5)=='c' && *(e-4)=='r' && *(e-3)=='i' && *(e-2)=='p' && *(e-1)=='t' && *e=='>')) { e++; }
      e++;  continue;
    // skip to end of tag
    while (e < StrE && *e != '>') { e++; }
    if (e>=StrE) { return; }
int TStrUtil::SplitLines ( TChA ChA,
TVec< char * > &  LineV,
const bool &  SkipEmpty = false 
) [static]

Definition at line 439 of file util.cpp.

  bool IsChs=false;
  for (char *c = (char *) ChA.CStr(); *c; c++) {
    if (*c == '\n') {
      if (c > ChA.CStr() && *(c-1)=='\r') { *(c-1)=0; } // \r\n
      if (SkipEmpty) {
        if (IsChs) { LineV.Add(c+1); }
      } else {
    } else {
  return LineV.Len();
int TStrUtil::SplitOnCh ( TChA ChA,
TVec< char * > &  WrdV,
const char &  Ch,
const bool &  SkipEmpty = false 
) [static]

Definition at line 425 of file util.cpp.

  for (char *c = (char *) ChA.CStr(); *c; c++) {
    if (*c == Ch) {
      *c = 0;
      if (SkipEmpty && ! WrdV.Empty() && strlen(WrdV.Last()) == 0) { WrdV.DelLast(); }
  if (SkipEmpty && ! WrdV.Empty() && strlen(WrdV.Last()) == 0) { WrdV.DelLast(); }
  return WrdV.Len();
int TStrUtil::SplitSentences ( TChA ChA,
TVec< char * > &  SentenceV 
) [static]

Definition at line 460 of file util.cpp.

  const char *B = ChA.CStr();
  const char *E = B+ChA.Len();
  char *c = (char *) B;
  while (*c && TCh::IsWs(*c)) { c++; }
  if (*c) { SentenceV.Add(c); } else { return 0; }
  for (; c < E; c++) {
    if (c<E && (*c == '.' || *c == '!' || *c == '?') && ! TCh::IsAlNum(*(c+1))) { // end of sentence
      if (c<E && *(c+1)=='"') { *c='"';  c++; } // blah." --> blah"
      if (c>=E) { continue; }
      *c=0;  c++;
      char *e = c-1;
      while (e>B && *e!='"' && ! TCh::IsAlNum(*e)) { *e=0; e--; } // skip trailing non-alpha-num chars
      while (c<E && ! (TCh::IsAlNum(*c) || (*c=='"' && TCh::IsAlNum(*(c+1))))) { c++; } // sentence starts with AlNum or "AlNum
      if (c<E) { SentenceV.Add(c); }
  return SentenceV.Len();
int TStrUtil::SplitWords ( TChA ChA,
TVec< char * > &  WrdV,
const bool &  SplitOnWs = true 
) [static]

Definition at line 412 of file util.cpp.

  for (char *c = (char *) ChA.CStr(); *c; c++) {
    if ((SplitOnWs && *c == ' ') || (! SplitOnWs && ! TCh::IsAlNum(*c))) {
      *c = 0;
      if (! WrdV.Empty() && strlen(WrdV.Last()) == 0) { WrdV.DelLast(); }
  return WrdV.Len();
bool TStrUtil::StripEnd ( const TChA Str,
const TChA SearchStr,
TChA NewStr 
) [static]

Definition at line 331 of file util.cpp.

  const int StrLen = Str.Len();
  const int SearchStrLen = SearchStr.Len();
  if (StrLen < SearchStrLen) { return false; }
  for (int i = 0; i < SearchStrLen; i++) {
    if (Str[StrLen-i-1] != SearchStr[SearchStrLen-i-1]) { return false; }
  NewStr = Str.GetSubStr(0, StrLen-SearchStrLen-1);
  return true;

The documentation for this class was generated from the following files: