#!/usr/bin/perl # # parse_bates_numbers_v2.pl use strict; use warnings; use English qw( -no_match_vars ); BEGIN { my $bates_number_pattern = qr{ ^ # ( Prefix ) ( Number ) (?: ( XYZ\s\d{2,3}(?:\sST)? ) \s ( \d{8} ) | ( XYZ\s[UV]\s\d{1,3} ) \s ( \d{8} ) | ( XYZ\s\d{3} ) ( \d{8} ) | ( XYZ ) \s ( \d{7,8} ) | ( ABC-M- ) ( \d{7} ) | ( ABCD- ) ( \d{8} ) | ( ) ( \d{11} ) ) $ }x; '12345678901' =~ $bates_number_pattern or die "Invalid Bates number pattern:\n$bates_number_pattern\n"; $#LAST_MATCH_END % 2 == 0 or die "Invalid number of parentheses in pattern: $#LAST_MATCH_END\n"; sub parse_bates_number { my $bates_number = shift; $bates_number =~ $bates_number_pattern or die "Invalid Bates number: $bates_number\n"; no strict 'refs'; return map { $$_ } grep { defined $LAST_MATCH_START[$_] } ( 1 .. $#LAST_MATCH_START ); } } while (my $bates_number = ) { chomp $bates_number; my ($prefix, $number) = parse_bates_number($bates_number); printf "%-20s %-10s %12.0f\n", $bates_number, $prefix, $number; } exit 0; __END__ XYZ 123 00000123 XYZ 123 00000456 XYZ 123 00654321 XYZ 12 ST 00123456 XYZ 123 ST 00654321 XYZ U 123 00123456 XYZ U 12 00654321 XYZ V 1 00123456 XYZ 12300654321 XYZ 00123456 XYZ 0654321 ABC-M-0123456 ABCD-00654321 00000123456 99999999999 BOGUS99