use strict; use warnings; my %operons; while () { chomp; next unless /^(\S+) \s+\S+\s+\S+\s+ (\S+)$/x; my ($operon, $targets) = ($1, $2); my @genes = split /,/, $targets; $operons{$operon} ||= {}; for (@genes) { my ($subOperon, $gene) = /([^|]*)\|(.*)/; $operons{$operon}{$subOperon} = $gene; } } for my $operon (sort keys %operons) { print "$operon: ", join (', ', map {"$_ ($operons{$operon}{$_})"} sort keys %{$operons{$operon}}), "\n"; } __DATA__ malS 1 forward malS|b3571, malT 1 forward malT|b3418, malXY 2 forward malX|b1621,malY|b1622, malZ 1 forward malZ|b0403, manA 1 forward manA|b1613, manXYZ 3 forward manX|b1817,manY|b1818,manZ|b1819, map-glnD-dapD 3 reverse dapD|b0166,glnD|b0167,map|b0168, marC 1 reverse marC|b1529, marRAB 3 forward marA|b1531,marB|b1532,marR|b1530, mbhA 1 forward mbhA|, mcrA 1 forward mcrA|b1159, mcrBC 2 reverse mcrB|b4346,mcrC|b4345, mdaB 1 forward mdaB|b3028, mdh 1 reverse mdh|b3236, mdlAB 2 forward mdlA|b0448,mdlB|b0449, mdoB 1 reverse mdoB|b4359, mdoC 1 reverse mdoC|b1047, mdoD 1 forward mdoD|b1424, mdoGH 2 forward mdoG|b1048,mdoH|b1049, mdtABCD-baeSR 6 forward baeR|b2079,baeS|b2078,mdtA|b2074,mdtB|b2075,mdtC|b2076,mdtD|b2077, mdtEF 2 forward mdtE|b3513,mdtF|b3514, mdtG 1 reverse mdtG|b1053, mdtH 1 reverse mdtH|b1065, mdtJI 2 reverse mdtI|b1599,mdtJ|b1600, mdtK 1 forward mdtK|b1663, mdtL 1 forward mdtL|b3710, mdtM-yjiN 2 reverse mdtM|b4337,yjiN|b4336, mdtNOP 3 reverse mdtN|b4082,mdtO|b4081,mdtP|b4080, mdtQ 1 reverse mdtQ|b2138, melAB 2 forward melA|b4119,melB|b4120, melR 1 reverse melR|b4118, menA 1 reverse menA|b3930, menFD-yfbB-menBCE 6 reverse menB|b2262,menC|b2261,menD|b2264,menE|b2260,menF|b2265,yfbB|b2263, metA 1 forward metA|b4013, metBL 2 forward metB|b3939,metL|b3940, metC 1 forward metC|b3008, metE 1 forward metE|b3829, metF 1 forward metF|b3941, metG 1 forward metG|b2114, metH 1 forward metH|b4019, metJ 1 reverse metJ|b3938, metK 1 forward metK|b2942, metNIQ 3 reverse metI|b0198,metN|b0199,metQ|b0197, metR 1 reverse metR|b3828, metT-leuW-glnUW-metU-glnVX 7 reverse glnU|b0670,glnV|b0665,glnW|b0668,glnX|b0664,leuW|b0672,metT|b0673,metU|b0666, metY-yhbC-nusA-infB-rbfA-truB-rpsO-pnp 8 reverse infB|b3168,metY|b3171,nusA|b3169,pnp|b3164,rbfA|b3167,rpsO|b3165,truB|b3166,yhbC­|b3170, metZWV 3 forward metV|b2816,metW|b2815,metZ|b2814, mfd 1 reverse mfd|b1114, mglBAC 3 reverse mglA|b2149,mglB|b2150,mglC|b2148, mgrB 1 reverse mgrB|b1826, mgsA 1 reverse mgsA|b0963, mgtA 1 forward mgtA|b4242, #### malS: malS (b3571) malT: malT (b3418) malXY: malX (b1621), malY (b1622) malZ: malZ (b0403) manA: manA (b1613) manXYZ: manX (b1817), manY (b1818), manZ (b1819) map-glnD-dapD: dapD (b0166), glnD (b0167), map (b0168) marC: marC (b1529) marRAB: marA (b1531), marB (b1532), marR (b1530) mbhA: mbhA () mcrA: mcrA (b1159) mcrBC: mcrB (b4346), mcrC (b4345) mdaB: mdaB (b3028) mdh: mdh (b3236) mdlAB: mdlA (b0448), mdlB (b0449) mdoB: mdoB (b4359) mdoC: mdoC (b1047) mdoD: mdoD (b1424) mdoGH: mdoG (b1048), mdoH (b1049) mdtABCD-baeSR: baeR (b2079), baeS (b2078), mdtA (b2074), mdtB (b2075), mdtC (b2076), mdtD (b2077) mdtEF: mdtE (b3513), mdtF (b3514) mdtG: mdtG (b1053) mdtH: mdtH (b1065) mdtJI: mdtI (b1599), mdtJ (b1600) mdtK: mdtK (b1663) mdtL: mdtL (b3710) mdtM-yjiN: mdtM (b4337), yjiN (b4336) mdtNOP: mdtN (b4082), mdtO (b4081), mdtP (b4080) mdtQ: mdtQ (b2138) melAB: melA (b4119), melB (b4120) melR: melR (b4118) menA: menA (b3930) menFD-yfbB-menBCE: menB (b2262), menC (b2261), menD (b2264), menE (b2260), menF (b2265), yfbB (b2263) metA: metA (b4013) metBL: metB (b3939), metL (b3940) metC: metC (b3008) metE: metE (b3829) metF: metF (b3941) metG: metG (b2114) metH: metH (b4019) metJ: metJ (b3938) metK: metK (b2942) metNIQ: metI (b0198), metN (b0199), metQ (b0197) metR: metR (b3828) metT-leuW-glnUW-metU-glnVX: glnU (b0670), glnV (b0665), glnW (b0668), glnX (b0664), leuW (b0672), metT (b0673), metU (b0666) metY-yhbC-nusA-infB-rbfA-truB-rpsO-pnp: infB (b3168), metY (b3171), nusA (b3169), pnp (b3164), rbfA (b3167), rpsO (b3165), truB (b3166), yhbC­ (b3170) metZWV: metV (b2816), metW (b2815), metZ (b2814) mfd: mfd (b1114) mglBAC: mglA (b2149), mglB (b2150), mglC (b2148) mgrB: mgrB (b1826) mgsA: mgsA (b0963) mgtA: mgtA (b4242)