in reply to Re^11: subtraction issue
in thread subtraction issue

... As promised, my manual manipulation of FixedPoint (arbitrary length strings of digits) ...

package FixedPoint; # simplistic module for adding and halving stringified # new, add, halve; no negatives # new() does NOT properly handle a floating point argument that defaul +t-stringifies to scientific notation use warnings; use strict; sub new($;$) { my $class = shift; my $val = '' . (shift || '0.0'); my $self = bless \$val, $class; $$self .= '.0' if $self->dotpos() >= length($$self); return $self; } sub copy($) { my $orig = shift; my $val = '' . $$orig; my $self = bless \$val, ref($orig); $$self .= '.0' if $self->dotpos() >= length($$self); return $self; } sub dotpos($) { my $val = ${$_[0]}; my $dot = index($val, '.'); $dot = index($val.='.0','.') if $dot<0; return $dot; } sub ilen($) { # the length of the integer portion my $v = ${$_[0]}; # just the value; don't want side effects $v =~ s/^0+(?!\.)|(?<!\.)0+$//g; # strip leading zeroes (exce +pt 0.) and trailing zeroes (except .0); return dotpos(\$v) || 1; # if dotpos==0, there's an implied + 0 before the dot } sub flen($) { # the length of the fractional portion my $v = ${$_[0]}; # just the value; do not affect $self $v =~ s/^0+(?!\.)|(?<!\.)0+$//g; # strip leading zeroes (exce +pt 0.) and trailing zeroes (except .0); my $dot = dotpos(\$v); return 0 if $dot>=length($v); # no fractional length if the dot +is imaginary return length($v)-$dot-1; } sub lengths($) { my $self = shift; sprintf '<<%d,%d>>', $self->ilen(), $self->flen(); } sub halve($) { # in-place division my $self = shift; my $rem = 0; my $pos = 0; my $dot = $self->dotpos; if($dot<0) { $$self .= '.0'; $dot = $self->dotpos; } if(0==$dot) { substr $$self, 0, 0, '0'; # precede ".xxx" with "0" => "0.xx +x" ++$dot; } while ( $pos < length($$self) ) { next if $pos eq $dot; my $d = substr($$self, $pos, 1); $d += 10*$rem; my $h = int($d / 2); $rem = $d % 2; substr $$self, $pos, 1, $h; if(0){ local $, = "\t"; local $\ = $/; print $pos, $d, $h, $rem,,$$self; } $$self .= '0' if $rem && ($pos+1 == length($$self)); } continue { ++$pos; } $$self =~ s/^0+(?!\.)|(?<!\.)0+$//g; # strip leading zeroes ( +except 0.) and trailing zeroes (except .0); } sub add($) { # in-place addition my($self, $addend) = @_; my $sdot = $self->dotpos; my $adot = $addend->dotpos; my $delt = 0; if($sdot >= length($$self)) { $$self .= ".0"; $sdot = $self->i +len; } if($adot >= length($$addend)) { $$addend .= ".0"; $adot = $addend- +>ilen; } # line up fixed points if($sdot < $adot) { $delt = $adot - $sdot; substr $$self, 0, 0, '0'x$delt; $sdot = $self->dotpos; } elsif ($adot < $sdot ) { $delt = $sdot - $adot; substr $$addend, 0, 0, '0'x$delt; $adot = $addend->dotpos; } # line up end of fractions my $sf = $self->flen; my $af = $addend->flen; if($sf<$af) { # self fract is shorter $$self .= '0' x ($af-$sf); $sf = $self->flen; } elsif ($af<$sf) { $$addend .= '0' x ($sf-$af); $af = $addend->flen; } #print join "\n", 'add', # 'self :'.$$self.':'.$sdot.':'.$sf, # 'addend:'.$$addend.':'.$adot.':'.$af; die "add: could not get the lengths the same:\n\t>>$$self<<\n\t>>$ +$addend<<\n" if length($$self)!=length($$addend); my $l = length($$self); my $c = 0; for my $p (1 .. $l) { next if $sdot == ($l-$p); my $s = substr $$self, -$p, 1; my $a = substr $$addend, -$p, 1; my $total = $s + $a + $c; substr($$self, -$p, 1) = $total % 10; $c = int($total/10); } substr $$self, 0, 0, $c if $c; #print join "\n", '', 'answer:'.$$self; return $self; } 1; package main; # ULP: 2**-52 my $ulp = FixedPoint->new(1); $ulp->halve for 1..52; # make 1+2**-52 my $x = FixedPoint->new(1)->add($ulp); print "\n"; printf "%s\n", '-'x(52+2); printf "%-8s = %s\n%-8s = %s\n", 'ulp(1)', $$ulp, '1+ulp(1)', $$x; printf "%s\n", '-'x(52+2); # bring it down to the smallest normal double plus one ULP # = (1+2**-52) * 2**-1022 = 2**-1022 + 2**-1074 for(1..1022) { $x->halve; printf "%s\n", $$x if 0; } # details on the FixedPoint: print "\n", '-'x40, "\n"; printf "(1+ulp(1))*2**-1022 = \n%s\n", $$x; print "\n"; printf "FixedPoint: %-16d = Digits before the fixed decimal point\n", +$x->ilen(); printf "FixedPoint: %-16d = Total digits the fixed decimal point\n", $ +x->flen(); my $p = 0; $$x =~ m/[1-9]/g ; $p = pos($$x); $p -= $x->ilen()+1; printf "FixedPoint: %-16d = Location of first significant digit (nth d +igit after decimal point)\n", $p; printf "FixedPoint: %-16d = Number of significant digits\n", $x->flen( +) - $p + 1; use POSIX qw(floor ceil log10 log2); use Data::IEEE754::Tools qw(:ulp :constants :floatingpoint); my $tiny = nextUp( POS_NORM_SMALLEST ); print "\n"; printf "Double: %30.16e\n", $tiny; printf "HexFloat: %30s\n", to_hex_floatingpoint($tiny); printf "DecFloat: %30s\n", to_dec_floatingpoint($tiny); print "\n"; printf "Math: %-16d = should start at ceil(-log10(x))-th digit a +fter decimal point\n", my $d = ceil(-log10($tiny)); printf "Math: %-16d = total number of digits after decimal shoul +d be the lowest power of two = ceil(-log2(ulp(x)))\n", my $e = ceil(- +log2(ulp($tiny))); printf "Math: %-16d = Number of significant digits", $e - $d + 1 +;
__RESULTS__ ------------------------------------------------------ ulp(1) = 0.0000000000000002220446049250313080847263336181640625 1+ulp(1) = 1.0000000000000002220446049250313080847263336181640625 ------------------------------------------------------ ---------------------------------------- (1+ulp(1))*2**-1022 = 0.00000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000000000000000000000000000000000000000000 +000000000000000000000000000000002225073858507201877155878558578948240 +788008848683704195613130031211968860399600696529790429221262885863903 +701367028190801717129607271191035512722741317515219905574004313880456 +780323337753988163917738732895924607422927011307805381339708165336129 +644744952978952121897909078385258336590185178961879988515042751478263 +607602168043622031129270045483207396484571310391222596393560832244062 +389690727689018671705454927517398658932481040173822832825124579506565 +573819103800864691161582871998970864729322144979697154670672039979199 +080916034762598038599542473984767886118009507251154376238960371621517 +172981601154460435953128432540644193864532490538913779568091580479240 +509922741385427494262054264040883983691918741817298779334027924276754 +4565229087538682506419718265533447265625 FixedPoint: 1 = Digits before the fixed decimal point FixedPoint: 1074 = Total digits the fixed decimal point FixedPoint: 308 = Location of first significant digit (nt +h digit after decimal point) FixedPoint: 767 = Number of significant digits Double: 2.2250738585072019e-308 HexFloat: +0x1.0000000000001p-1022 DecFloat: +0d1.0000000000000002p-1022 Math: 308 = should start at ceil(-log10(x))-th digi +t after decimal point Math: 1074 = total number of digits after decimal sh +ould be the lowest power of two = ceil(-log2(ulp(x))) Math: 767 = Number of significant digits

So what I said poorly as "it takes n fixed-point decimal digits to exactly represent an n-bit fixed-point fractional binary number" could have been better phrased: "if the smallest power of two exactly represented in a floating-point number is -n, it takes n decimal digits after the fixed decimal point to exactly represent that same number." In the example of nextUp(1), the smallest power of two is -52, so it takes 52 decimal digits after the fixed decimal point to exactly represent it. In the example of nextUp(POS_NORM_SMALLEST) = 2**-1022 + 2**-1074, it takes 1074 digits after the fixed decimal point to exactly represent it.

Thanks for this interesting diversion.

update: removed duplicate end-code tag