in reply to Bit handling in Perl

The first part can be done this way:

#! perl -slw use strict; binmode STDOUT; while( <DATA> ) { tr[,\n][]d; print pack 'b*', $_; } __DATA__ 1,0,0,1,0,0,1,1,0,0,0,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,1, +1,0,1,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,1,0,1,0,0,0,1 +,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,0,1, +0,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,0,1 +,0,0,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,0,1,0,1,0,1,1,1, +1,0,1,1,0,1,0,0,1,0,1,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,0,0,1,1,1,0,1 +,1,0,1,1,0,1,0,0,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1, +0,0,1,0,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,0,1,0,0,0,1,0,1,1,1,1 +,0,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,0,0,1,0,1,0, +0,0,1,1,0,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0 +,1,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,0,0,1,1, +0,0,1,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0 +,0,1,0,0,0,0,0,1,1,1,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1,0, +1,0,0,1,1,1,0,1,1,0,0,1,0,1,0,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0 +,0,0,1,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,0,1,0,0, +1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,1,0,0,0,1 +,1,0,0,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,1,0,1,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,0,0 +,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,1,0,0,1,1,0,1,1,1, +1,0,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0 +,0,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,1,0,1,0,1,1,1, +0,0,1,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,1,1,1,1,1,1,1,0,0,1,1,1,0 +,0,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0,1,0,0,1,0,1,0,0,0,1, +0,1,0,1,1,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,1 +,0,1,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,1,0,1,1,0,0,1,1,1,0,1,0,0, +1,1,1,1,0,1,0,1,0,0,1,0,0,1,1,1,1 0,1,0,1,1,1,1,1,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,1,1,1, +0,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,1,1,1,1,0,0,1,0 +,0,1,1,1,1,1,0,1,0,1,0,1,1,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1, +0,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,1,1,1,0,1,1,1,0,0,1,0,1,0,0 +,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,0,1,1,1,1,0,0,1,0,0,0,1,0,0,1,0,0, +1,0,1,1,1,0,1,0,0,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,0,0 +,0,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,0,0,1,1,1, +1,0,1,1,1,0,0,1,0,0,0,0,0,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,0,1,0,0,1,1,1 +,1,0,0,1,0,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1, +0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,1,1,0,0,1,1,0,0,0,0,1,0,1,0,0,1 +,1,1,1,1,0,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,1,0,0,0,1,1,1,1,0,1,0,0,1, +1,0,1,0,0,1,1,0,0,1,0,1,1,1,1,0,1,1,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1 +,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,1, +1,1,0,1,0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,1,0,1 +,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1,1,0,1, +1,1,0,1,0,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0 +,0,1,0,1,1,1,1,1,0,1,1,0,0,0,0,0,0,1,0,1,1,1,1,0,0,1,0,0,1,1,1,0,0,1, +1,1,0,1,1,1,0,0,0,1,0,0,1,1,1,1,1,1,1,0,0,1,0,1,1,1,1,1,0,1,1,1,0,0,1 +,1,0,1,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0, +0,0,1,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,1,0,1,1,1,0,1,1,1,0,1 +,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,0, +0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,1,0 +,0,1,0,1,1,0,0,1,1,0,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1, +0,0,1,0,0,0,1,1,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,1,0,1,1,1,0,1,0,0,0,0 +,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,1,1,1, +1,1,0,1,1,0,0,0,1,1,1,0,1,0,1,0,1

That assumes you want each binary record terminated with a newline. (Remove the 'l' from the shebang line if not.)

For the second part you'll have to describe the format of the second file in more detail?


With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.

Replies are listed 'Best First'.
Re^2: Bit handling in Perl
by vaidhy_m (Novice) on Oct 11, 2014 at 05:30 UTC

    Thank you BrowserUk! I am sorry if I was not clear earlier. My first file has 880 columns of 1/0 running through several rows, as high as 50 million. My second file has a single row of 880 columns. I want to compare every bit in file 2 to correspoding bit in a row in file 1 and calculate a value based on the comparison for that row and repeat the process for all rows.

    I have both files in csv format that are huge and I want them converted to binary such that every column in my file will be a bit and not a byte in the binary file. Won't pack convert each column into bytes? As the column holds only 1 or 0 I want them in one bit. I want the comparison made and values calculated on the converted binary files.

    I hope this makes sense.

      Won't pack convert each column into bytes?

      No. When pack is used with the 'b' template it converts (packs!) each 0 or 1 in the input string to a single bit.

      So for your 880 field CSV, the output is a 110 byte string.

      Once you've converted both files to binary format, you can compare two records (count the number of bits set in both strings) using:

      my $bitsInCommon = unpack '%32b*', ( $record1 & $record2 );

      With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
      Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
      "Science is about questioning the status quo. Questioning authority".
      In the absence of evidence, opinion is indistinguishable from prejudice.

        I tried to find the size of the variable after storing a 0 and pack equivalent of it and the size doubled up! Please find my code below. When I assigned $a=0 I got 24 bytes and on packing it I got 48 bytes.

        #! perl -slw use strict; use Devel::Size qw(size); $a=pack 'b*',0; print "Size of scalar is " .size($a) . " bytes\n";