(gdb) backtrace #0 0x00002aaaae17f1f9 in tag_parser (tag=0x2
, len=-1216936334, back_tag=0x7fffffc91ac7 "") at getandParseWithC_pl_05fd.xs:52 #1 0x00002aaaae17f762 in parser (url=0x4cd9780 "openpolytechnic.ac.nz/ftp/linux/sunsite/docs/faqs/ftp-faq", doc=0x2aaab7770010 "HTTP/1.1 200 OK\r\nDate: Wed, 12 Oct 2005 01:16:33 GMT\r\nServer: Apache/2.0.54 (Unix) mod_ssl/2.0.54 OpenSSL/0.9.7g DAV/2\r\nLast-Modified: Sun, 29 May 2005 11:11:07 GMT\r\nETag: \"193b68-1ead-d3e728c0\"\r\nAcce"..., buf=0x153efac0 "openpolytechnic U\nac U\nnz U\nftp U\nlinux U\nsunsite U\ndocs U\nfaqs U\nftp U\nfaq U\nftp P\nhowto P\nftp P\nfile P\ntransfer P\nprotocol P\nis P\na P\nclient P\nserver P\ntcp P\nprotocol P\nthat P\nallows P\na P\nuser P\nto"..., blen=16321) at getandParseWithC_pl_05fd.xs:176 #2 0x00002aaaae17fb4a in MyParser (url=0x4cd9780 "openpolytechnic.ac.nz/ftp/linux/sunsite/docs/faqs/ftp-faq", page=0x2aaab7770010 "HTTP/1.1 200 OK\r\nDate: Wed, 12 Oct 2005 01:16:33 GMT\r\nServer: Apache/2.0.54 (Unix) mod_ssl/2.0.54 OpenSSL/0.9.7g DAV/2\r\nLast-Modified: Sun, 29 May 2005 11:11:07 GMT\r\nETag: \"193b68-1ead-d3e728c0\"\r\nAcce"..., len=8160) at getandParseWithC_pl_05fd.xs:310 #3 0x00002aaaae180dcb in XS_main_MyParser (my_perl=0x505010, cv=0xb36240) at getandParseWithC_pl_05fd.c:400 #4 0x000000398d69b67e in Perl_pp_entersub () from /usr/lib64/perl5/5.8.6/x86_64-linux-thread-multi/CORE/libperl.so #5 0x000000398d67f3cd in Perl_runops_debug () from /usr/lib64/perl5/5.8.6/x86_64-linux-thread-multi/CORE/libperl.so #6 0x000000398d639dbe in perl_run () from /usr/lib64/perl5/5.8.6/x86_64-linux-thread-multi/CORE/libperl.so #7 0x0000000000401a01 in main () ####
#include
#include
#include
#define PTAG_B 1
#define PTAG_I 2
#define PTAG_H 3
#define PTAG_TITLE 4
#define PTAG_SCRIPT 5
#define _TITLE_TAG 0x0001
#define _B_TAG 0x0004
#define _H_TAG 0x0008
#define _I_TAG 0x0010
#define xl_isdigit(c) (((c) >= '0') && ((c) <= '9'))
#define xl_islower(c) (((c) >= 'a') && ((c) <= 'z'))
#define xl_isupper(c) (((c) >= 'A') && ((c) <= 'Z'))
#define xl_isindexable(c) (xl_isdigit(c) || xl_islower(c) || xl_isupper(c))
#define xl_tolower(c) ((c) += 'a' - 'A')
char* parser_init(char* doc)
{
char *p;
if (strncasecmp(doc, "HTTP/", 5))
return NULL;
for (p = doc; (*p != ' ')&&(*p); p++);
if (*p == '\0')
return NULL;
if (atoi(p) != 200)
return NULL;
p = strstr(p, "\\r\\n\\r\\n");
if (p == NULL)
return NULL;
return p+4;
}
int tag_parser(char* tag, int len, char* back_tag)
{
int i = 0;
if (tag[0] == '/')
{
*back_tag = 1;
i++;
} else
*back_tag = 0;
switch (tag[i])
{
case 'b':
case 'B':
case 'i':
case 'I':
if (!isspace(tag[i+1]))
return 0;
if ((tag[i] == 'b') || (tag[i] == 'B'))
return PTAG_B;
return PTAG_I;
case 'e':
case 'E':
i++;
if (((tag[i]=='m')||(tag[i]=='M')) && (isspace(tag[i+1])))
return PTAG_I;
return 0;
case 'h':
case 'H':
i++;
if (((tag[i]>='1')&&(tag[i]<='6')) && (isspace(tag[i+1])))
return PTAG_H;
return 0;
case 't':
case 'T':
i++;
if ((0==strncasecmp(tag+i, "itle", 4)) && (isspace(tag[i+4])))
return PTAG_TITLE;
return 0;
case 's':
case 'S':
i++;
if ((0==strncasecmp(tag+i, "trong", 5)) && (isspace(tag[i+5])))
return PTAG_B;
if ((0==strncasecmp(tag+i, "cript", 5)) && (isspace(tag[i+5])))
return PTAG_SCRIPT;
return 0;
default:
break;
}
return 0;
}
#define xlbit_set(__b1, __b2) ((__b1) |= (__b2))
#define xlbit_unset(__b1, __b2) ((__b1) &= ~(__b2))
#define xlbit_check(__b1, __b2) ((__b1)&(__b2))
char* parser(char* url, char* doc, char* buf, int blen)
{
char *p, *purl, *word, *ptag, *pbuf;
char ch, back_tag, intag, inscript;
unsigned tag_flag;
int ret;
p = parser_init(doc);
if (p == NULL)
return 0;
pbuf = buf;
/* parsing URL */
purl = url;
while (*purl != '\0')
{
if (!xl_isindexable(*purl))
{
purl++;
continue;
}
word = purl;
while (xl_isindexable(*purl))
{
if (xl_isupper(*purl))
xl_tolower(*purl);
purl++;
}
ch = *purl;
*purl = '\0';
if (pbuf-buf+purl-word+3 > blen-1)
return -1;
sprintf(pbuf, "%s U\\n", word);
pbuf += (purl-word)+3;
*purl = ch;
}
/* parsing page */
tag_flag = 0;
intag = 0;
inscript = 0;
while (*p != '\0')
{
if (!xl_isindexable(*p))
{
if (*p != '>')
{
if (*p == '<')
{
ptag = p;
intag = 1;
}
p++;
continue;
}
*p = ' ';
ret = tag_parser(ptag+1, p-ptag, &back_tag);
switch (ret)
{
case PTAG_B:
if (back_tag == 0)
xlbit_set(tag_flag, _B_TAG);
else
xlbit_unset(tag_flag, _B_TAG);
break;
case PTAG_I:
if (back_tag == 0)
xlbit_set(tag_flag, _I_TAG);
else
xlbit_unset(tag_flag, _I_TAG);
break;
case PTAG_H:
if (back_tag == 0)
xlbit_set(tag_flag, _H_TAG);
else
xlbit_unset(tag_flag, _H_TAG);
break;
case PTAG_TITLE:
if (back_tag == 0)
xlbit_set(tag_flag, _TITLE_TAG);
else
xlbit_unset(tag_flag, _TITLE_TAG);
break;
case PTAG_SCRIPT:
if (back_tag == 0)
inscript = 1;
else
inscript = 0;
default:
break;
}
intag = 0;
p++;
continue;
}
if (inscript || intag)
{
p++;
continue;
}
word = p;
while (xl_isindexable(*p))
{
if (xl_isupper(*p))
xl_tolower(*p);
p++;
}
ch = *p;
*p = '\0';
if (pbuf-buf+p-word+1 > blen-1)
return -1;
sprintf(pbuf, "%s ", word);
pbuf += (p-word)+1;
if (xlbit_check(tag_flag, _B_TAG))
{
if (pbuf-buf+1> blen-1)
return -1;
*pbuf = 'B';
pbuf++;
}
if (xlbit_check(tag_flag, _H_TAG))
{
if (pbuf-buf+1> blen-1)
return -1;
*pbuf = 'H';
pbuf++;
}
if (xlbit_check(tag_flag, _I_TAG))
{
if (pbuf-buf+1> blen-1)
return -1;
*pbuf = 'I';
pbuf++;
}
if (xlbit_check(tag_flag, _TITLE_TAG))
{
if (pbuf-buf+1> blen-1)
return -1;
*pbuf = 'T';
pbuf++;
}
if (tag_flag == 0)
{
if (pbuf-buf+1> blen-1)
return -1;
*pbuf = 'P';
pbuf++;
}
if (pbuf-buf+1> blen-1)
return -1;
*pbuf = '\\n';
pbuf++;
*p = ch;
}
*pbuf = '\0';
return pbuf-buf;
}