in reply to Re: some help with inline C
in thread some help with inline C
As i said before, parser is a code that my research group has been using for some time with out problems.(though everyone else is a C guru, not perl) I suspect there is an issue with how my code is interacting with parser (or how the perl vars are passed into this C code)I will include the parser code for comleteness :(gdb) backtrace #0 0x00002aaaae17f1f9 in tag_parser (tag=0x2 <Address 0x2 out of boun +ds>, len=-1216936334, back_tag=0x7fffffc91ac7 "") at getandParseWithC_pl_05fd.xs:52 #1 0x00002aaaae17f762 in parser (url=0x4cd9780 "openpolytechnic.ac.nz +/ftp/linux/sunsite/docs/faqs/ftp-faq", doc=0x2aaab7770010 "HTTP/1.1 200 OK\r\nDate: Wed, 12 Oct 2005 01:1 +6:33 GMT\r\nServer: Apache/2.0.54 (Unix) mod_ssl/2.0.54 OpenSSL/0.9.7 +g DAV/2\r\nLast-Modified: Sun, 29 May 2005 11:11:07 GMT\r\nETag: \"19 +3b68-1ead-d3e728c0\"\r\nAcce"..., buf=0x153efac0 "openpolytechnic U\nac U\nnz U\nftp U\nlinux U\nsun +site U\ndocs U\nfaqs U\nftp U\nfaq U\nftp P\nhowto P\nftp P\nfile P\n +transfer P\nprotocol P\nis P\na P\nclient P\nserver P\ntcp P\nprotoco +l P\nthat P\nallows P\na P\nuser P\nto"..., blen=16321) at getandPars +eWithC_pl_05fd.xs:176 #2 0x00002aaaae17fb4a in MyParser (url=0x4cd9780 "openpolytechnic.ac. +nz/ftp/linux/sunsite/docs/faqs/ftp-faq", page=0x2aaab7770010 "HTTP/1.1 200 OK\r\nDate: Wed, 12 Oct 2005 01: +16:33 GMT\r\nServer: Apache/2.0.54 (Unix) mod_ssl/2.0.54 OpenSSL/0.9. +7g DAV/2\r\nLast-Modified: Sun, 29 May 2005 11:11:07 GMT\r\nETag: \"1 +93b68-1ead-d3e728c0\"\r\nAcce"..., len=8160) at getandParseWithC_pl_0 +5fd.xs:310 #3 0x00002aaaae180dcb in XS_main_MyParser (my_perl=0x505010, cv=0xb36 +240) at getandParseWithC_pl_05fd.c:400 #4 0x000000398d69b67e in Perl_pp_entersub () from /usr/lib64/perl5/5.8.6/x86_64-linux-thread-multi/CORE/libperl. +so #5 0x000000398d67f3cd in Perl_runops_debug () from /usr/lib64/perl5/5.8.6/x86_64-linux-thread-multi/CORE/libperl. +so #6 0x000000398d639dbe in perl_run () from /usr/lib64/perl5/5.8.6/x86_ +64-linux-thread-multi/CORE/libperl.so #7 0x0000000000401a01 in main ()
#include <stdio.h> #include <stdlib.h> #include <string.h> #define PTAG_B 1 #define PTAG_I 2 #define PTAG_H 3 #define PTAG_TITLE 4 #define PTAG_SCRIPT 5 #define _TITLE_TAG 0x0001 #define _B_TAG 0x0004 #define _H_TAG 0x0008 #define _I_TAG 0x0010 #define xl_isdigit(c) (((c) >= '0') && ((c) <= '9')) #define xl_islower(c) (((c) >= 'a') && ((c) <= 'z')) #define xl_isupper(c) (((c) >= 'A') && ((c) <= 'Z')) #define xl_isindexable(c) (xl_isdigit(c) || xl_islower(c) || xl_isuppe +r(c)) #define xl_tolower(c) ((c) += 'a' - 'A') char* parser_init(char* doc) { char *p; if (strncasecmp(doc, "HTTP/", 5)) return NULL; for (p = doc; (*p != ' ')&&(*p); p++); if (*p == '\0') return NULL; if (atoi(p) != 200) return NULL; p = strstr(p, "\\r\\n\\r\\n"); if (p == NULL) return NULL; return p+4; } int tag_parser(char* tag, int len, char* back_tag) { int i = 0; if (tag[0] == '/') { *back_tag = 1; i++; } else *back_tag = 0; switch (tag[i]) { case 'b': case 'B': case 'i': case 'I': if (!isspace(tag[i+1])) return 0; if ((tag[i] == 'b') || (tag[i] == 'B')) return PTAG_B; return PTAG_I; case 'e': case 'E': i++; if (((tag[i]=='m')||(tag[i]=='M')) && (isspace(tag[i+1]))) return PTAG_I; return 0; case 'h': case 'H': i++; if (((tag[i]>='1')&&(tag[i]<='6')) && (isspace(tag[i+1]))) return PTAG_H; return 0; case 't': case 'T': i++; if ((0==strncasecmp(tag+i, "itle", 4)) && (isspace(tag[i+4]))) return PTAG_TITLE; return 0; case 's': case 'S': i++; if ((0==strncasecmp(tag+i, "trong", 5)) && (isspace(tag[i+5])) +) return PTAG_B; if ((0==strncasecmp(tag+i, "cript", 5)) && (isspace(tag[i+5])) +) return PTAG_SCRIPT; return 0; default: break; } return 0; } #define xlbit_set(__b1, __b2) ((__b1) |= (__b2)) #define xlbit_unset(__b1, __b2) ((__b1) &= ~(__b2)) #define xlbit_check(__b1, __b2) ((__b1)&(__b2)) char* parser(char* url, char* doc, char* buf, int blen) { char *p, *purl, *word, *ptag, *pbuf; char ch, back_tag, intag, inscript; unsigned tag_flag; int ret; p = parser_init(doc); if (p == NULL) return 0; pbuf = buf; /* parsing URL */ purl = url; while (*purl != '\0') { if (!xl_isindexable(*purl)) { purl++; continue; } word = purl; while (xl_isindexable(*purl)) { if (xl_isupper(*purl)) xl_tolower(*purl); purl++; } ch = *purl; *purl = '\0'; if (pbuf-buf+purl-word+3 > blen-1) return -1; sprintf(pbuf, "%s U\\n", word); pbuf += (purl-word)+3; *purl = ch; } /* parsing page */ tag_flag = 0; intag = 0; inscript = 0; while (*p != '\0') { if (!xl_isindexable(*p)) { if (*p != '>') { if (*p == '<') { ptag = p; intag = 1; } p++; continue; } *p = ' '; ret = tag_parser(ptag+1, p-ptag, &back_tag); switch (ret) { case PTAG_B: if (back_tag == 0) xlbit_set(tag_flag, _B_TAG); else xlbit_unset(tag_flag, _B_TAG); break; case PTAG_I: if (back_tag == 0) xlbit_set(tag_flag, _I_TAG); else xlbit_unset(tag_flag, _I_TAG); break; case PTAG_H: if (back_tag == 0) xlbit_set(tag_flag, _H_TAG); else xlbit_unset(tag_flag, _H_TAG); break; case PTAG_TITLE: if (back_tag == 0) xlbit_set(tag_flag, _TITLE_TAG); else xlbit_unset(tag_flag, _TITLE_TAG); break; case PTAG_SCRIPT: if (back_tag == 0) inscript = 1; else inscript = 0; default: break; } intag = 0; p++; continue; } if (inscript || intag) { p++; continue; } word = p; while (xl_isindexable(*p)) { if (xl_isupper(*p)) xl_tolower(*p); p++; } ch = *p; *p = '\0'; if (pbuf-buf+p-word+1 > blen-1) return -1; sprintf(pbuf, "%s ", word); pbuf += (p-word)+1; if (xlbit_check(tag_flag, _B_TAG)) { if (pbuf-buf+1> blen-1) return -1; *pbuf = 'B'; pbuf++; } if (xlbit_check(tag_flag, _H_TAG)) { if (pbuf-buf+1> blen-1) return -1; *pbuf = 'H'; pbuf++; } if (xlbit_check(tag_flag, _I_TAG)) { if (pbuf-buf+1> blen-1) return -1; *pbuf = 'I'; pbuf++; } if (xlbit_check(tag_flag, _TITLE_TAG)) { if (pbuf-buf+1> blen-1) return -1; *pbuf = 'T'; pbuf++; } if (tag_flag == 0) { if (pbuf-buf+1> blen-1) return -1; *pbuf = 'P'; pbuf++; } if (pbuf-buf+1> blen-1) return -1; *pbuf = '\\n'; pbuf++; *p = ch; } *pbuf = '\0'; return pbuf-buf; }
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^3: some help with inline C
by ikegami (Patriarch) on Oct 17, 2007 at 18:15 UTC | |
|
Re^3: some help with inline C
by syphilis (Archbishop) on Oct 18, 2007 at 10:50 UTC |