Depending somewhat on how the file was created it may not be sufficient to simplistically parse each line by splitting on semi-colons. If it is a well formed csv file you should use one of the CSV modules such as Text::CSV to manage parsing the file. Consider:
use strict; use warnings; use Text::CSV qw{}; # Create a test file my $fname = 'delme.csv'; open my $fOut, '>', $fname or die "Can't create $fname: $!\n"; print $fOut <<'TESTDATA'; country_name;region;city;type;area;lat;long;etc Albanië;Albanië;Berat;Hotel;Berati;40,9999;19,99999;Meer;ok;0; Albanië;Albanië;Berat;Hotel;Castle Park;45,769969;19,9999;Meer;ok;0; Albanië;Albanië;Berat;Hotel;Mangalemi;40,709999;19,959999;Meer;ok;0; Andorra;Andorra;El Serrat;Hotel;Subi;43,658607;5,568623;;ok;0; Andorra;Andorra;El Serrat;Hotel;Tristan;42,618507;1,538923;;ok;0; Andorra;Andorra;El Tarter;Hotel;Del;42,580340;1,648919;;ok;0; België;Antwerpen;Antwerpen;Bezienswaardigheid;Boerentoren (Kbc Toren); +51,2000;4,399000;;ok;0; België;Antwerpen;Antwerpen;Bed&Breakfast;Borze;51,2200;4,399000;;ok;0; België;Antwerpen;Antwerpen;Bed&Breakfast;Boulevard ;51,220000;4,399000 +;;ok;0; België;Antwerpen;Antwerpen;Appartement;Britselei 37;51,220000;4,399000 +;;ok;0; Engeland;Groot Londen;Londen;Museum;Tate Britain;51,5200;-0,126236;;ok +;0; Engeland;Groot Londen;Londen;Museum;Tate Modern;51,500200;-0,126236;;o +k;0; Engeland;Groot Londen;Londen;Hotel;Testing;51,500200;-0,126236;;ok;0; Engeland;Groot Londen;Londen;Bezienswaardigheid;Thames (rivier);51,020 +0;-0,126236;;ok;0; Engeland;Groot Londen;Londen;Hotel;The Archery London;51,500200;-0,126 +236;;ok;0; Finland;Finland;Ylläsjärvi;Hotel;Yllasrinne;67,526750;24,275630;;ok;0; Finland;Finland;Ypäjä;Bezienswaardigheid;Loimijoki Golf;0,805300;23,27 +1400;;ok;0 Finland;Finland;Ytteresse;Vakantiepark;Solhaga;0,630700;22,962500;;ok; +0 Finland;Lapland;Hetta;Hotel;Hetan Majatalo;6,34700;23,633800;;ok;0 Frankrijk;Alsace (Elzas);Rouffach;Hotel;Au Relais D'Alsace;7,959680;7, +29970;;ok;0 Frankrijk;Alsace (Elzas);Rouffach;Hotel;Château d Isenbourg;7,959680;7 +,29970;;ok;0 Frankrijk;Alsace (Elzas);Ruederbach;Vakantiehuis;Au Cheval Blanc;47,56 +1440;7,26840;;ok;0 Frankrijk;Alsace (Elzas);Sausheim;Camping;Le fary;4,787300;7,36390;;ok +;0; TESTDATA close $fOut; # Now for the sample interesting code open my $fIn, '<', $fname or die "Can't open $fname: $!\n"; my $csv = Text::CSV->new({binary => 1, sep_char => ';'}); my %byCountry; my @columns = @{$csv->getline($fIn)}; push @{$byCountry{$_->[0]}}, $_ while $_ = $csv->getline($fIn); close $fIn; for my $country (sort keys %byCountry) { print "--- $country.csv---\n"; $csv->print (*STDOUT, \@columns); print "\n"; for my $row (@{$byCountry{$country}}) { $csv->print (*STDOUT, $row); print "\n"; } }
Prints (in part):
--- Albanië.csv--- country_name;region;city;type;area;lat;long;etc Albanië;Albanië;Berat;Hotel;Berati;40,9999;19,99999;Meer;ok;0; Albanië;Albanië;Berat;Hotel;"Castle Park";45,769969;19,9999;Meer;ok;0; Albanië;Albanië;Berat;Hotel;Mangalemi;40,709999;19,959999;Meer;ok;0; --- Andorra.csv--- country_name;region;city;type;area;lat;long;etc Andorra;Andorra;"El Serrat";Hotel;Subi;43,658607;5,568623;;ok;0; ... Finland;Finland;Ypäjä;Bezienswaardigheid;"Loimijoki Golf";0,805300;23, +271400;;ok;0 Finland;Finland;Ytteresse;Vakantiepark;Solhaga;0,630700;22,962500;;ok; +0 Finland;Lapland;Hetta;Hotel;"Hetan Majatalo";6,34700;23,633800;;ok;0 --- Frankrijk.csv--- country_name;region;city;type;area;lat;long;etc Frankrijk;"Alsace (Elzas)";Rouffach;Hotel;"Au Relais D'Alsace";7,95968 +0;7,29970;;ok;0 Frankrijk;"Alsace (Elzas)";Rouffach;Hotel;"Château d Isenbourg";7,9596 +80;7,29970;;ok;0 Frankrijk;"Alsace (Elzas)";Ruederbach;Vakantiehuis;"Au Cheval Blanc";4 +7,561440;7,26840;;ok;0 Frankrijk;"Alsace (Elzas)";Sausheim;Camping;"Le fary";4,787300;7,36390 +;;ok;0;
In reply to Re: AWK? Split one file in seperate files based on country
by GrandFather
in thread AWK? Split one file in seperate files based on country
by Janwhatever
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |