D:\>perl -ne "if (m/^[[:xdigit:]]{4,} - [[:xdigit:]]{4,}/) { s/ - /../; s/ : /; /; print }" unichist.pl > unichist_Blocks.txt D:\>web get http://unicode.org/Public/UNIDATA/Blocks.txt | perl -ne "print if m/^[[:xdigit:]]{4,}\.\.[[:xdigit:]]{4,}/" > UNIDATA_Blocks.txt D:\>wc -l unichist_Blocks.txt UNIDATA_Blocks.txt 149 unichist_Blocks.txt 197 UNIDATA_Blocks.txt 346 total D:\>diff unichist_Blocks.txt UNIDATA_Blocks.txt 1,4c1,4 < 0000..007F; Basic Latin ASCII < 0080..00FF; Latin-1 < 0100..017F; Latin Extended A < 0180..024F; Latin Extended B --- > 0000..007F; Basic Latin > 0080..00FF; Latin-1 Supplement > 0100..017F; Latin Extended-A > 0180..024F; Latin Extended-B 8c8 < 0370..03FF; Greek --- > 0370..03FF; Greek and Coptic 17c17,18 < 07C0..07FF; N'Ko --- > 07C0..07FF; NKo > 0800..083F; Samaritan 21c22 < 0A80..0A8F; Gujarati --- > 0A80..0AFF; Gujarati 37c38 < 1400..167F; Canadian Syllabics --- > 1400..167F; Unified Canadian Aboriginal Syllabics 45c46,47 < 1800..18FF; Mongolian --- > 1800..18AF; Mongolian > 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 50c52,53 < 1A00..1AFF; Buginese --- > 1A00..1A1F; Buginese > 1A20..1AAF; Tai Tham 51a55,58 > 1B80..1BBF; Sundanese > 1C00..1C4F; Lepcha > 1C50..1C7F; Ol Chiki > 1CD0..1CFF; Vedic Extensions 74,75c81,82 < 27C0..27EF; Miscellaneous Mathematical Symbols A < 27F0..27FF; Supplemental Arrows A --- > 27C0..27EF; Miscellaneous Mathematical Symbols-A > 27F0..27FF; Supplemental Arrows-A 77,78c84,85 < 2900..297F; Supplemental Arrows B < 2980..29FF; Miscellaneous Mathematical Symbols B --- > 2900..297F; Supplemental Arrows-B > 2980..29FF; Miscellaneous Mathematical Symbols-B 82c89 < 2C60..2C7F; Latin Extended C --- > 2C60..2C7F; Latin Extended-C 86a94 > 2DE0..2DFF; Cyrillic Extended-A 104c112 < 4E00..9FBF; CJK Unified Ideographs --- > 4E00..9FFF; CJK Unified Ideographs 106a115,118 > A4D0..A4FF; Lisu > A500..A63F; Vai > A640..A69F; Cyrillic Extended-B > A6A0..A6FF; Bamum 108c120 < A720..A7FF; Latin Extended D --- > A720..A7FF; Latin Extended-D 110c122,133 < A840..A87F; Phags-Pa --- > A830..A83F; Common Indic Number Forms > A840..A87F; Phags-pa > A880..A8DF; Saurashtra > A8E0..A8FF; Devanagari Extended > A900..A92F; Kayah Li > A930..A95F; Rejang > A960..A97F; Hangul Jamo Extended-A > A980..A9DF; Javanese > AA00..AA5F; Cham > AA60..AA7F; Myanmar Extended-A > AA80..AADF; Tai Viet > ABC0..ABFF; Meetei Mayek 112,113c135,138 < D800..D8FF; High Surrogate Area < DC00..DFFF; Low Surrogate Area --- > D7B0..D7FF; Hangul Jamo Extended-B > D800..DB7F; High Surrogates > DB80..DBFF; High Private Use Surrogates > DC00..DFFF; Low Surrogates 117c142 < FB50..FDFF; Arabic Presentation Forms A --- > FB50..FDFF; Arabic Presentation Forms-A 123c148 < FE70..FEFF; Arabic Presentation Forms B --- > FE70..FEFF; Arabic Presentation Forms-B 129a155,158 > 10190..101CF; Ancient Symbols > 101D0..101FF; Phaistos Disc > 10280..1029F; Lycian > 102A0..102DF; Carian 137a167 > 10840..1085F; Imperial Aramaic 138a169 > 10920..1093F; Lydian 139a171,177 > 10A60..10A7F; Old South Arabian > 10B00..10B3F; Avestan > 10B40..10B5F; Inscriptional Parthian > 10B60..10B7F; Inscriptional Pahlavi > 10C00..10C4F; Old Turkic > 10E60..10E7F; Rumi Numeral Symbols > 11080..110CF; Kaithi 141a180 > 13000..1342F; Egyptian Hieroglyphs 144c183 < 1D200..1D24F; Ancient Greek Musical --- > 1D200..1D24F; Ancient Greek Musical Notation 147a187,190 > 1F000..1F02F; Mahjong Tiles > 1F030..1F09F; Domino Tiles > 1F100..1F1FF; Enclosed Alphanumeric Supplement > 1F200..1F2FF; Enclosed Ideographic Supplement 148a192 > 2A700..2B73F; CJK Unified Ideographs Extension C 149a194,197 > E0000..E007F; Tags > E0100..E01EF; Variation Selectors Supplement > F0000..FFFFF; Supplementary Private Use Area-A > 100000..10FFFF; Supplementary Private Use Area-B D:\>egrep "Egyptian Hieroglyphs|Mahjong|Domino" unichist_Blocks.txt D:\>