#!/usr/bin/perl -w use 5.018; use strict; #1149716 =head I would like to extract a piece of data from one field that has multiple fields in it. The original field is a long description that usually contains a #F123456, #123456, #123-F123456, #123-123456, or #12AB-123456 in it. This data floats around from left to right and there should be whitespace before the #. Also, the end of the data is either whitespace, or the end of the field. =cut my @data = ("TRAY HINGED PLSTC 20 CAV #F32473", "BOX HSC,35-3/4X17-1/4 X 50-1/2 SIMULATOR TALL BOX", "PAD, FOAM, 24 X 24 X 1/4 #16193 112 SHEETS PER ROLL, ORDER IN FULL ROLLS", "PKG LIST,ASST ARM,RAD,300 #F37784", "PAD, TOP CAP RE17-30048 #F30121 CORRUGATED ASSEMBLY, 22-7/8 X 21-1/8 X 4-3/4", "foo bar #379460 best F11", "F1234 SIMULATION", ); for my $data (@data) { # say "\t|$data|\n\n"; chomp $data; if ( $data =~ /\n/ ) { $data =~ s/\n//g; } if ( $data =~ /(^.* #[A-Z]*\d+.*$)/m ) { say "\n\$data matches regex\n"; $data =~ s/ +/ /g; # clean up excess spaces say "$data \n"; } else { say "\n\t The data, $data, does NOT MATCH\n"; } }