in reply to Contemplating some set comparison tasks
I'm not sure exactly what you want as result but perhaps you want DISTINCT ON.
Slurping that list above in a table 'setcomp' with a single column 'c', you'd get something like:
#!/bin/sh t=setcomp echo " -- drop table if exists $t; -- enable if necessary create table $t as select * from (values ('0000002D9D62AEBE1E0E9DB6C4C4C7C16A163D2C|2f214516cdcab089e83f3e509 +4928fe9611f2f51'), ('000000A9E47BD385A0A3685AA12C2DB6FD727A20|2adeac692d450c54f8830014e +e6cbe3a958c1e60'), ('00000142988AFA836117B1B572FAE4713F200567|04bb7bbed62376f9aaec15fe6 +f18b89b27a4c3d8'), ('00000142988AFA836117B1B572FAE4713F200567|6935a8fc967a6ffc20be0f07f +2bb4a46072a397e'), ('00000142988AFA836117B1B572FAE4713F200567|8c88f4f3c4b1aff760a026759 +ae807af6c40e015'), ('00000142988AFA836117B1B572FAE4713F200567|974c820f53aded6d6e57ca8de +2c33206e2b5f439'), ('00000142988AFA836117B1B572FAE4713F200567|b05be3e17bb9987ffb368696e +e916dd9b9c2f9b3'), ('000001BCBC3B7C8C6E5FC59B686D3568132D218C|0d4c09539f42165bb8b1ab890 +fe6dc3d3ca838b3'), ('000001BCBC3B7C8C6E5FC59B686D3568132D218C|9fd421d4e020788100c289d21 +e4b9297acaaff62'), ('000001BCBC3B7C8C6E5FC59B686D3568132D218C|d09565280ebae0a37ca9385bc +39c0a777a446554'), ('000001E4975FA18878DF5C0989024327FBE1F4DF|55b8ece03f4935f9be667e332 +d52f7db3e17b809'), ('000001EF1880189B7DE7C15E971105EB6707DE83|cd15550344b5b9c2785a13ef9 +583015f267ad667'), ('000002F2D7CB4D4B548ADC623F559683D6F59258|36bed8bdb6d66fb67f409166f +5db64b02199812f'), ('0000034C9033333F8F58D9C7A64800F509962F3A|3c4b0a3c1acf6e03111805a0d +8b4e879df112b7a'), ('000003682106A4CB4F9D3B1B6E5C08820FCFD1B2|cd15550344b5b9c2785a13ef9 +583015f267ad667'), ('00000368B9CFE1B4CF9F3D38F3EFD82840BA280D|50edd315b9217345b1728c38b +02657df42043197'), ('000003A16A5A1C6CCDDBE548E85261422489A458|691845459c0ad35b28cce4dff +c0e3ee8912fb0f5'), ('0000046FD530A338D03422C7D0D16A9EE087ECD9|13e213f346ce624e9be99b356 +ab9125af563a375'), ('0000046FD530A338D03422C7D0D16A9EE087ECD9|67c0da2da88a23a803733cea9 +51e84974b34d029'), ('00000472E2B96A6CD0CBE8614779C5C8197BB42D|0c5e6cdb06c52160ded398d17 +392246269165e0a') ) as f(c) ; " | psql -qX echo " -- count all (20 rows) select count(*) from $t; -- show key count where more than 1: select key, count(*) from ( select substring(c, 1, 40) as key , substring(c, 42, 40) as source from $t ) as f group by key having count(*) > 1 ; -- show source count where more than 1: select source, count(*) from ( select substring(c, 1, 40) as key , substring(c, 42, 40) as source from $t ) as f group by source having count(*) > 1 ; -- distinct on key (13 rows) select distinct on (key) * from ( select substring(c, 1, 40) as key , substring(c, 42, 40) as source from $t ) as f ; " | psql -Xq
... with output:
$ ./pm-1096794.sh count ------- 20 (1 row) key | count ------------------------------------------+------- 0000046FD530A338D03422C7D0D16A9EE087ECD9 | 2 000001BCBC3B7C8C6E5FC59B686D3568132D218C | 3 00000142988AFA836117B1B572FAE4713F200567 | 5 (3 rows) source | count ------------------------------------------+------- cd15550344b5b9c2785a13ef9583015f267ad667 | 2 (1 row) key | source ------------------------------------------+--------------------------- +--------------- 0000002D9D62AEBE1E0E9DB6C4C4C7C16A163D2C | 2f214516cdcab089e83f3e5094 +928fe9611f2f51 000000A9E47BD385A0A3685AA12C2DB6FD727A20 | 2adeac692d450c54f8830014ee +6cbe3a958c1e60 00000142988AFA836117B1B572FAE4713F200567 | 04bb7bbed62376f9aaec15fe6f +18b89b27a4c3d8 000001BCBC3B7C8C6E5FC59B686D3568132D218C | 0d4c09539f42165bb8b1ab890f +e6dc3d3ca838b3 000001E4975FA18878DF5C0989024327FBE1F4DF | 55b8ece03f4935f9be667e332d +52f7db3e17b809 000001EF1880189B7DE7C15E971105EB6707DE83 | cd15550344b5b9c2785a13ef95 +83015f267ad667 000002F2D7CB4D4B548ADC623F559683D6F59258 | 36bed8bdb6d66fb67f409166f5 +db64b02199812f 0000034C9033333F8F58D9C7A64800F509962F3A | 3c4b0a3c1acf6e03111805a0d8 +b4e879df112b7a 000003682106A4CB4F9D3B1B6E5C08820FCFD1B2 | cd15550344b5b9c2785a13ef95 +83015f267ad667 00000368B9CFE1B4CF9F3D38F3EFD82840BA280D | 50edd315b9217345b1728c38b0 +2657df42043197 000003A16A5A1C6CCDDBE548E85261422489A458 | 691845459c0ad35b28cce4dffc +0e3ee8912fb0f5 0000046FD530A338D03422C7D0D16A9EE087ECD9 | 13e213f346ce624e9be99b356a +b9125af563a375 00000472E2B96A6CD0CBE8614779C5C8197BB42D | 0c5e6cdb06c52160ded398d173 +92246269165e0a (13 rows)
(Perhaps you already have 'key' and 'source' in separate columns so that you don't need the subselect.)
Is that last SQL statement the action you want performed (reducing the initial 20 rows to 13 unique key values)?
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Contemplating some set comparison tasks
by dwhite20899 (Friar) on Aug 12, 2014 at 15:11 UTC | |
by erix (Prior) on Aug 12, 2014 at 15:20 UTC |