sub guess_perl_type { my $self = shift; my $node = shift; my $obj = B::svref_2object(ref($node) ? $node : \$node); my $class = B::class($obj); return $class unless $class eq "PVMG"; my $flags = $obj->flagspv; return "PV" if $flags =~ "POK"; die "what type is $flags?"; } #### #!/usr/bin/perl package XML::SAX::ExpatNB; use base qw/XML::SAX::Expat/; use strict; use warnings; use XML::Parser (); use Carp qw/croak/; sub _really_create_parser { my $p = shift; my $opt = shift; $p->{_xml_parser_obj} ||= $p->SUPER::_create_parser($opt); } sub _create_parser { my $p = shift; $p->_expat_obj; } sub _expat_obj { my $p = shift; $p->{_expat_nb_obj} = shift if @_; $p->{_expat_nb_obj}; } sub _parser_obj { my $p = shift; $p->{_xml_parser_obj} = shift if @_; $p->{_xml_parser_obj}; } sub parse { my $p = shift; my $opts = $p->get_options(@_); if ($p->{Parent}){ return $p->{Parent}->parse($opts); } else { if (defined $opts->{Source}{String}){ return $p->_parse_string($opts->{Source}{String}); } else { croak "The only thing I know how to parse is a string. You have to fetch me the data yourself."; } } } sub parse_more { my $p = shift; $p->parse_string(@_); } sub _parse_string { my $p = shift; my $xml = shift; $p->parse_start unless $p->{_parsing}; $p->_expat_obj->parse_more($xml); } sub parse_start { my $p = shift; my $opt = shift; croak "Can't parse_start - Already started" if $p->{_parsing}; $p->{_parsing} = 1; $p->_really_create_parser($opt); $p->_expat_obj($p->_parser_obj->parse_start); } sub parse_done { my $p = shift; croak "Can't parse_done - Havn't started parsing" unless $p->{_parsing}; undef $p->{_parsing}; $p->_expat_obj->parse_done; } __PACKAGE__ __END__ =pod =head1 NAME XML::SAX::ExpatNB - XML::SAX::Expat subclass for nonblocking parsing. =head1 SYNOPSIS use XML::SAX::ExpatNB; # don't do this, use XML::SAX::ParserFactory my $p = XML::SAX::ExpatNB->new( Handler => MyHandler->new ); $p->parse_start; while (){ $p->parse_more($_); # or $p->parse_string($_); } $p->parse_done; =head1 DESCRIPTION Most XML parsers give a callback interface within an encapsulated loop. That is, you call $p->parse_whatever($whatever); And eventually, when C<$whatever> is depleted by the parser, C<< $p->parse >> will return. Sometimes you don't want the parser to control the loop for you. For example, if you need to retrieve your XML in chunks in a funny way, you might need to do something like my $doc = ''; while (defined(my $buffer = get_more_xml())) { $doc .= $buffer; } $p->parse_string($doc); which is not very convenient, or efficient. You could use L to tie a filehandle which does this for you, but that only works some of the time (for example, say you have two inputs coming in simultaneously). L solves this by providing three methods: =over 4 =item parse_start =item parse_more =item parse_done =back This interface lets you move the loop to outside the parser, retaining control. The callbacks are executed in the same manner, just that now, when there is no left to parse, instead of taking more data from a source on it's own, the parser returns control to you. $p->parse_start; # you can omit this - parse_start will # be called automatically as needed while(defined(my $buffer = get_more_xml())) { $p->parse_more($buffer); } $p->parse_done; This module is a subclass of L which is to L as L is to L itself. =cut #### Initially, Log2Backtrack() will put the cursor here: This is a sample line of text which we will move around ^ Log2Right() will then put the cursor here This is a sample line of text which we will move around ^ Log2Left() will afterwords put the cursor here This is a sample line of text which we will move around ^ Log2Backtrack() at this point will return the cursor to here This is a sample line of text which we will move around ^ And now, Log2Right() will move the cursor to This is a sample line of text which we will move around ^ #### map :call Log2Left() map :call Log2Backtrack() map :call Log2Right() #### function Log2InitState() if !exists("b:log_2_cursor") || b:log_2_cursor != col(".") || b:log_2_line != line(".") let b:log_2_cursor = col(".") let b:log_2_line = line(".") let b:log_2_left = 0 let b:log_2_right = strlen(getline(b:log_2_line)) let b:log_2_call_stack = "" endif endfunction function Log2MoveIt(i) let b:log_2_cursor = a:i + b:log_2_left + ( (b:log_2_right - b:log_2_left) / 2 ) call cursor(b:log_2_line, b:log_2_cursor) endfunction function Log2StackPush (val) let b:log_2_call_stack = b:log_2_call_stack . a:val endfunction function Log2StackPop () let ret = strpart(b:log_2_call_stack, strlen(b:log_2_call_stack)-1, 1) let b:log_2_call_stack = substitute(b:log_2_call_stack, ".$", "", "") return ret endfunction function Log2Left() call Log2InitState() if b:log_2_right - b:log_2_left > 1 let b:log_2_right = b:log_2_cursor call Log2StackPush("l") call Log2MoveIt(0) endif endfunction function Log2Right() call Log2InitState() if b:log_2_right - b:log_2_left > 1 let b:log_2_left = b:log_2_cursor call Log2StackPush("r") call Log2MoveIt(1) endif endfunction function Log2Backtrack() call Log2InitState() let popped = Log2StackPop() if popped == "r" let b:log_2_left = b:log_2_left - (b:log_2_right - b:log_2_left) if b:log_2_left < 0 let b:log_2_left = 0 endif elseif popped == "l" let b:log_2_right = b:log_2_right + ( b:log_2_right - b:log_2_left) if b:log_2_left > strlen(getline(b:log_2_line)) let b:log_2_line = strlen(getline(b:log_2_line)) endif else let b:log_2_left = 0 let b:log_2_right = strlen(getline(b:log_2_line)) endif call Log2MoveIt(0) endfunction #### #!/usr/bin/perl { package MySchema; use base qw/Class::DBI::Schema::SQL::Translator/; sub dbh { DBI->connect_cached( ... ); # mine comes from a config module } { package MyTable; use base qw/Class::DBI::Schema::SQL::Translator::Table/; sub schema_class { "MySchema" } } { # this should really be a file package Foo::Schema; use base qw/MyTable/; use Class::DBI::Schema::SQL::Translator; # for CAPS_SUBS sub init { my $self = shift->SUPER::init(@_); $self->add_fields( # these guys create SQL::Translator::Schema::Field # objects, or transform them. Prototypes make them # sort of like SQL ID, STRING name => 30, UNIQUE INTEGER "blah", ); return $self; # SQL::Translator objects are Class::Base based } } # elsewhere use Foo::Schema; # import creates an instance # ... or # my $table = Foo::Schema->instance; # because ->isa("Class::Singleton") # creating the schema singleton will cause it to register with the global schema # and stub the CDBI class with an AUTOLOAD.. more on this several lines down # the fun part: # just use CDBI Foo->create({ name => "bob" }); # some explanation: # AUTOLOAD causes the schema object to create the CDBI class, # by calling $schema->make_cdbi_class. # make_cdbi_class will do some standard CDBI stuff. # it will also make a db_Main if Foo::Schema->schema_class (the global schema) # ->can("dbh") # Foo::Schema->cdbi_class == Foo, the ::Schema suffix is removed. # if Foo::Schema->cdbi_class->db_Main->tables doesn't contain # Foo::Schema->cdbi_class->moniker, it uses SQL::Translator to create the table # in Foo->db_Main # the Class::DBI::Schema::SQL::Translator::Table init checks for # ->can("SUB"), so you can say package Foo::Schema; sub FIELDS { ID, STRING blah => $length, }; # or even use constant FIELDS => [ ID, STRING blah => $length ]; # but i have to do this for relationships too. #### #!/usr/bin/perl package MyClass::Exception; use base qw/Error/; use base qw/MyClass::Message/; # FIXME use interface; ... #### • [syeeda:~/src] nothingmuch % prove t t/exception-api....# FIXME: use interface; at /Users/nothingmuch/src/lib/MyClass/Exception.pm line 4 t/exception-api....ok All tests successful. Files=1, Tests=10, 5 wallclock secs ( 1.71 cusr + 0.24 csys = 1.95 CPU) #### • [syeeda:~/src] nothingmuch % touch /Users/nothingmuch/src/lib/MyClass/Exception.pm • [syeeda:~/src] nothingmuch % prove t t/exception-api....ok All tests successful. Files=1, Tests=10, 5 wallclock secs ( 1.71 cusr + 0.24 csys = 1.95 CPU) #### #!/usr/bin/perl use Date::Manip; use Time::Piece; use File::Find::Rule; sub parse_date { Time::Piece->strptime(ParseDate(shift), "%Y%m%d%T") } ( sub { # don't remind to fix code which I'm not responsible for my $self = shift; DROP if $self->{file} =~ m{ /usr/.*lib # not mine, 5.9.1 tree | /.*Library/Perl # not mine, OSX stock | support # [patched] support code, test for manually }x; }, sub { # don't remind to fix files which i've been working on recently my $self = shift; DROP unless File::Find::Rule-> file-> mtime( "<=" . parse_date("yesterday")->epoch )-> in( $self->{file} ); }, ); #### PERL5OPT=-MDevel::FIXME::Rules::PerlFile #### #!/usr/bin/perl package Devel::FIXME; use 5.008_000; # needs FH interface to var use strict; use warnings; use Exporter; use Scalar::Util qw/reftype/; use List::Util qw/first/; use Carp qw/carp croak/; our @EXPORT = qw/FIXME/; our @EXPORT_OK = qw/SHOUT DROP CONT/; our %EXPORT_TAGS = ( "constants" => \@EXPORT_OK ); our $VERSION = 0.01; # some constants for rules sub CONT () { 0 }; sub SHOUT () { 1 }; sub DROP () { 2 }; my %lock; # to prevent recursion our %rets; # return value cache our $cur; # the current file, used in an eval our $err; # the current error, for rethrowal our $inited; # whether the code ref was installed in @INC, and all { my $anon = ''; open my $fh, "<", \$anon or die $!; close $fh; } # otherwise perlio require stuff breaks sub init { my $pkg = shift; unless($inited){ $pkg->readfile($_) for ($0, sort grep { $_ ne __FILE__ } (values %INC)); # readfile on everything loaded, but not us (we don't want to match our own docs) $pkg->install_inc; } $inited = 1; } our $carprec = 0; sub install_inc { my $pkg = shift; unshift @INC, sub { # YUCK! but tying %INC didn't work, and source filters are applied per caller. XS for source filter purposes is yucki/er/ my $self = shift; my $file = shift; return undef if $lock{$file}; # if we're already processing the file, then we're in the eval several lines down. return. local $lock{$file} = 1; # set the lock unless (ref $INC[0] and $INC[0] == $self){ # if this happens, some stuff won't be filtered. It shouldn't happen often though. local @INC = grep { !ref or $_ != $self } @INC; # make sure we don't recurse when carp loads it's various innards, it causes a mess carp "FIXME's magic sub is no longer first in \@INC"; } # create some perl code that gives back the return value of the original package, and thus looks like you're really requiring the same thing my $buffer = "\${ delete \$Devel::FIXME::rets{q{$file}} };"; # return what the last module returned. I don't know why it doesn't work without deref # really load the file local $cur = $file; my $ret = eval 'require $Devel::FIXME::cur'; # require always evaluates the return from an evalfile in scalar context ($err = "$@\n") =~ s/\nCompilation failed in require at \(eval \d+\)(?:\[.*?\])? line 1\.\n//s; $buffer = 'die $Devel::FIXME::err' if $@; # rethrow this way for the sake of shutting up base # save the return value so that the original require can have it $rets{$file} = \$ret; # look for FIXME comments $pkg->readfile($INC{$file}) if ($INC{$file}); open my $fh, "<", \$buffer; $fh; # empty stub like thing. Simply returns the last thing the real file did. }; } sub readfile { my $pkg = shift; my $file = shift; return unless -f $file; open my $src, "<", $file or die "couldn't open $file: $!"; local $_; while(<$src>){ $pkg->FIXME( text => "$1", line => $., file => $file, ) if /#\s*FIXME\s+(.*)$/; # if a file matches the comment, emit a warning. } continue { last if eof $src }; # is this a platform bug on OSX? close $src; } sub eval { # evaluate the rules, one by one my $self = shift; foreach my $rule ($self->rules){ my $action = &$rule($self); if ($action == SHOUT){ # if the rule said to shout, we shout and stop return $self->shout; } elsif ($action == DROP){ # if the rule says to drop, we stop return undef; } # otherwise we keep looping through the rules } $self->shout; # and shout if there are no more rules left. } sub rules { }; # shout by default sub shout { # generate a pretty string and send it to STDERR my $self = shift; warn("# FIXME: $self->{text} at $self->{file} line $self->{line}.\n"); } sub new { # an object per FIXME statement my $pkg = shift; my %args; if (@_ == 1){ # if we only have one arg if (ref $_[0] and reftype($_[0]) eq 'HASH'){ # and it's a hash ref, then we take the hashref to be our args %args = %{ $_[0] }; } else { # if it's one arg and not a hashref, then it's our text %args = ( text => $_[0] ); } } elsif (@_ % 2 == 0){ # if there's an even number of arguments, they are key value pairs %args = @_; } else { # if the argument list is anything else we complain croak "Invalid arguments"; } my $self = bless \%args, $pkg; # fill in some defaults $self->{package} ||= (caller(1))[0]; $self->{file} ||= (caller(1))[1]; $self->{line} ||= (caller(1))[2]; $self->{script} ||= $0; $self->{time} ||= localtime; $self; } sub import { # export \&FIXME to our caller, /and/ generate a message if there is one to generate my $pkg = $_[0]; $pkg->init; if (@_ == 1 or @_ > 2 or (@_ == 2 and first { $_[1] eq $_ or $_[1] eq "&$_" } @EXPORT_OK, map { ":$_" } keys %EXPORT_TAGS)){ shift; local $Exporter::ExportLevel = $Exporter::ExportLevel + 1; $pkg->Exporter::import(@_); } else { goto \&FIXME; } } sub FIXME { # generate a method my $pkg = __PACKAGE__; $pkg = shift if UNIVERSAL::can($_[0],"isa") and $_[0]->isa(__PACKAGE__); # it's a method or function, we don't care $pkg->new(@_)->eval; } *msg = \&FIXME; # booya. __PACKAGE__ __END__ =pod =head1 NAME FIXME - Semi intelligent, pending issue reminder system. =head1 SYNOPSIS this($code)->isa("broken"); # FIXME what shall we fix? =head1 DESCRIPTION Usually we're too busy to fix things like circular refs, edge cases and things when we're spewing code into the editor. A common thing to do is to make a comment saying ... # FIXME I hope someone finds this comment somewhere in your code, and then search through your sources for occurrances of I every now and then. This works, pretty much, until your code base grows considerably, and you have too many FIXMEs to prioritise properly. The solution for me was to create this package, which gives the author an "intelligent" way of being reminded. =head1 DECLARATION INTERFACE There are several ways to get your code fixed by you in some indeterminate time in the future. The first is a sort-of source filter based compile time fix, which does not affect shipped code. $code; # FIXME broken That's it. The most reasonable way to get it to work is to set the environment variable I, so that it contains C<-MFIXME>. The second interface is a compile time, somewhat more explicit way of emmitting messages. use Devel::FIXME "broken"; This can be repeated for additional messages as needed. This is useful if you want your FIXMEs to break as you test it with a perl tree that doesn't have FIXME.pm in it. The third, and probably most problematic is a runtime, explicit way of emmitting messages: $code; FIXME("broken"); This relies on FIXME to have been imported into the current namespace, which is probably not always the case. Provided you know FIXME is loaded I in the running perl interpreter, you can use a fully qualified version: $code; Devel::FIXME::FIXME("broken"); or if you feel that's not pretty enough: $code; Devel::FIXME->msg("broken"); # or $code; Devel::FIXME::msg("broken"); But do use the former two methods instead. =head1 OUTPUT FILTERING =head2 Rationale There are some problems with simply grepping for occurances of I: =over 4 =item * It's messy - you get a bajillion lines, if your source tree is big enough. =item * You need context, which can be provided for, but is a bit of work (and adds to the clutter). =item * You (well I anyway) forget to do it. And no, cron is not perfect for this. =back The solution to the first two problems is to make the reporting smart, so that it decides which FIXMEs are printed and which arent. The solution to the last problem is to have it happen automatically whenever the source code in question is used. =head2 Principal The way FIXMEs are filtered is similar to how a firewall filters packets. Each FIXME statement is considered as it is found, by iterating through some rules, which ultimately decide whether to print the statement or not. This may sound a bit overkill, but I think it's useful. What it means is that you can get reminded of FIXMEs in source files that are more than a week old, or when your release schedule reaches feature freeze, or if your program is in the stable tree if your source management repository, or whatever. =head2 Practice Currently the FIXMEs are filtered by calling the class method C, and evaluating the subroutine references that are returned, as methods on the fixme object. In the future, a witty little grammer will be written, to allow you to do the trivial things concisely, and use perl code for more complicated rules, in a config file that is global to your settings. =head1 BUGS If I had a nickle for every bug you could find in this module, I would have zero or more nickles. =head1 COPYRIGHT & LICNESE Same as perl. =head1 AUTHOR Yuval Kogman #### =head1 GETTING A HANDLE ON IO =head2 Introduction This tutorial will delve into the naughtier, uglier parts of POSIX centric I/O. Herein are covered the nasty details of the various calls, and the layers they belong to, the different modes of input and output, and the combination of these aspects into practical examples. The tutorial will not attempt to explain the various ways to obtain the actual handles, except as necessary for specific examples. It's aim is to tutor on the various styles of I/O, that is, to manipulate given handles, in more specialized ways, in the hope that when the kind of interaction with a handle a user wants to attain is known, finding out how to get such a handle will be easy by using the reference (L, L, L, L, L, L). The main focus of the tutorial will be simplicity. Thereafter will come robustness, and then performance. What this means is that I will not systematically append C to every line of example, because I find that distracting. I will also not resort to ugly constructs to gain a little throughput. I think impure examples hinder my ability to convey my ideas clearly. Nuff said, on to the intro. We start with tiny baby steps, and then start striding forward. =head2 What is a filehandle? We'll start by covering the perl specific data type, which abstracts a stream of data. The filehandle. If you already think you know what you're doing, skip onwards a bit. This is really basic stuff. Perl's filehandles are points through which data is moved. You can refer to by name, or by storing them in a variable. The abstraction focuses round a metaphor of a sort of port hole, or pipe end, which your software can ask the OS to take data from and move it elsewhere, or put data on for your software to read. Data is moved via these orifices in chunks, coming out of or going into a normal variable, as a string. For example, lets say we've opened a file: open my $fh, "/some/file"; This stores a reference to a filehandle in the variable C<$fh>, which will grant you access to the data inside C. Perl allows us to ask for data to come out of filehandles in useful ways. Lets say we wanted a single line from C to be stored in the variable. my $var = <$fh>; But wait, how do we know which line will come out of C<$fh>? Well, the answer is "the next one". Filehandles are stream oriented. Data will arrive serially, and you can nibble at it, slowly progressing through the stream of data, till it ends. Specifically, handles having to do with files will have an implicit cursor, working behind the scenes, marking the point in the file which the handle is currently at. =head2 Plumbing your handles To move data in and out of file handles you use system calls. We'll start with the two most basic calls there are, L and L, which are available in perl as the builtin functions C and C. Their interfaces are pretty streight forward. Here is a subset of their functionality: sysread $fh, $variable_data_will_be_read_to, $how_much_data_to_read; C takes a filehandle as it's first argument, a variable as it's second, and a number as it's third, and read as many bytes as are specified in the number, from the handle, into the variable. syswrite $fh, $data_to_write; C will take a filehandle as it's first argument, and a string as it's second argument, and write the data from the string, to the filehandle. We already know of a way data can be put on a filehandle for us, which was telling the OS what file we'd like it to come from. Writing is just as flexible. The next section discusses ways of telling the OS not only what data is moved around, but where it will go. =head2 Directing data, a conceptual introduction Now that we've a hopefully firm grasp on how data enters and exits your software through handles, lets discuss it's movement, specifically, where it goes. The most common use for filehandles is for storing and retrieving data in files. We've already seen opening for reading. We can also write to a file: open my $fh, ">", "/some/file"; The C<< > >> argument will tell C that we want to write to the file (and also to erase it's contents first). When C<$fh> is opened for writing, we simply use C or one of it's deriviatives on it. But handles are not limited to just files. They can also be sockets, allowing the transfer of data between two unrelated processes, on possibly two different machines. A web server, for example, reads and writes on handles, receiving and sending data to browsers. Handles can be used as pipes to other processes, like to child processes using L or processes in a shell pipeline. The latter case is interesting, because it is done implicitly: cat file | tr a-z A-Z > file.uppercase That command will ask L to read the file C, and then print it to it's I. The standard output is a handle that you would normally output data to. What "normally" means in this context will be explained soon. Then L reads data from it's I, converts the data, and writes it to I standard output. It does this a chunk at a time. The shell redirect is perhaps the most interesting part: instead of L's STDOUT being connected to the terminal, where the user can read the data, the shell connected L's STDOUT to a handle of it's own, which is opened to C. I hope that the example fullfilled it's purpose in demonstrating the flexibility of the concept of piping data around through file handles. =head2 The going gets tough Now that we've covered the conceptual basics, lets look in greater detail at the most simple type of handle there is - a single purposed, non seekable, blocking handle. What single purposed means is that it can either read, or write. Not both. What seekable means, is that you can use C to change the cursor position for the file the handle abstracts. Not all handles abstract files, and thus not all handles have cursors. The ones that don't work more simply. A blocking handle refers to the type of semantics the system calls on the handle will work in. Non-seekable handles are implemented in terms of a buffer. The operating system associates some scratch space for it. As data comes into the buffer from somewhere (it could be your software writing to it, or somebody else if you're on the reading side), it is accumilated in that buffer. When data is read from the handle, it is taken from the buffer. What happens when there is not enough space in the buffer to write anymore? Or not enough data in the buffer to be read? This is where the blocking semantics of this kind of handle comes in. I'm oversimplifying, but basically, if the writing side wants to write a chunk of data that is too big for the space in the buffer, the operating system simply makes it wait with the write till the reading side asks for some data to come out. As data exits the buffer, more space is cleared out, and the writing can continue. Eventually all the data will be written to the buffer, and the write system call that the writing side executed will return. The same goes for reading: the read system call will simply wait until the data that was asked for has been made available. The state in which an operating system puts a process that is waiting for an IO call to complete is referred to as 'blocked'. When a process is blocked it leaves the hardware resources free for other processes to use. Blocking IO has an interesting property, in that it balances resource allocation in a pipeline. Lets say for example, that you ran this line of shell: cat file.gz | gzip -d | tr a-z A-Z L is doing very little work. It's a simple loop. It reads from the file, and writes to STDOUT. The data that L is getting, on the other hand, is processed more extensively. L performs some complex calculation on the data that enters it, and outputs derived data after this calculation. Then, finally, L performs simple actions, that while more complex than L, they are dwarfed by L. So what happens is that L will read some data, and then write some data, and then read some more data, and write some more data, until the buffer is full, and it's write will block. All this time L's and L's read calls were blocking. Eventually L's read will return, allowing it to do it's job, and finally emit data to L. It will turn out that most of the time L will use up CPU time, while L will spend most of it's time blocking in write calls, and L will spend most of it's time blocking in read calls, but will need some time for calculation too, otherwise L's writes will block. Plan (not really in order): blocking, nonseekable handles, and their conventions: fatal errors, SIGPIPE, etc. promote fault tolerant behavior by default UNIX pipelining mantra explain when blocking is not good, and continue with a single purpose, non seekable handle as used in a select loop to avoid it. mention epoll/kqueue, and perl interfaces for thereof. Mention Event/POE as more powerful multiplexing solutions. multiplexing, with a threading approach, as an alternative to select. and a non blocking approach, including SIGIO, nb vs. select, reliability, and latency versus blocking & selected IO. When not to use nonblocking bufferring, stdio vs sycalls, different functions, perlio Touch seekable handles briefly, and explain the semantics of blocking and so on as far as file io is concerned, mention files, and discuss that not all things in the filesystem are files: devices (char and block), named pipes, unix domain sockets... Sockets Introduce non stream handles, and discuss the implementations of socket IO, and it's multilayered nature, the relationship between streams and datagrams. Implications of networking envs. discuss IO on shared handles discuss accept() on shared sockets in a preforked env appendix: faux IO: open ref, perlio layerrs and ties