Re^12: subtraction issue

... As promised, my manual manipulation of FixedPoint (arbitrary length strings of digits) ...

package FixedPoint;
# simplistic module for adding and halving stringified
# new, add, halve; no negatives
# new() does NOT properly handle a floating point argument that defaul
+t-stringifies to scientific notation

use warnings;
use strict;


sub new($;$) {
    my $class = shift;
    my $val = '' . (shift || '0.0');
    my $self = bless \$val, $class;
    $$self .= '.0' if $self->dotpos() >= length($$self);
    return $self;
}

sub copy($) {
    my $orig = shift;
    my $val = '' . $$orig;
    my $self = bless \$val, ref($orig);
    $$self .= '.0' if $self->dotpos() >= length($$self);
    return $self;
}

sub dotpos($) {
    my $val = ${$_[0]};
    my $dot = index($val, '.');
    $dot = index($val.='.0','.') if $dot<0;
    return $dot;
}

sub ilen($) {   # the length of the integer portion
    my $v = ${$_[0]}; # just the value; don't want side effects
    $v =~ s/^0+(?!\.)|(?<!\.)0+$//g;      # strip leading zeroes (exce
+pt 0.) and trailing zeroes (except .0);
    return dotpos(\$v) || 1;        # if dotpos==0, there's an implied
+ 0 before the dot
}

sub flen($) {   # the length of the fractional portion
    my $v = ${$_[0]}; # just the value; do not affect $self
    $v =~ s/^0+(?!\.)|(?<!\.)0+$//g;      # strip leading zeroes (exce
+pt 0.) and trailing zeroes (except .0);
    my $dot = dotpos(\$v);
    return 0 if $dot>=length($v);   # no fractional length if the dot 
+is imaginary
    return length($v)-$dot-1;
}

sub lengths($) {
    my $self = shift;
    sprintf '<<%d,%d>>', $self->ilen(), $self->flen();
}

sub halve($) {      # in-place division
    my $self = shift;
    my $rem = 0;
    my $pos = 0;
    my $dot = $self->dotpos;
    if($dot<0) {
        $$self .= '.0';
        $dot = $self->dotpos;
    }
    if(0==$dot) {
        substr $$self, 0, 0, '0';   # precede ".xxx" with "0" => "0.xx
+x"
        ++$dot;
    }
    while ( $pos < length($$self) ) {
        next if $pos eq $dot;
        my $d = substr($$self, $pos, 1);
        $d += 10*$rem;
        my $h = int($d / 2);
        $rem = $d % 2;
        substr $$self, $pos, 1, $h;
        if(0){
            local $, = "\t";
            local $\ = $/;
            print $pos, $d, $h, $rem,,$$self;
        }
        $$self .= '0' if $rem && ($pos+1 == length($$self));
    } continue {
        ++$pos;
    }
    $$self =~ s/^0+(?!\.)|(?<!\.)0+$//g;      # strip leading zeroes (
+except 0.) and trailing zeroes (except .0);
}

sub add($) {    # in-place addition
    my($self, $addend) = @_;
    my $sdot = $self->dotpos;
    my $adot = $addend->dotpos;
    my $delt = 0;

    if($sdot >= length($$self))   { $$self   .= ".0"; $sdot = $self->i
+len; }
    if($adot >= length($$addend)) { $$addend .= ".0"; $adot = $addend-
+>ilen; }

    # line up fixed points
    if($sdot < $adot) {
        $delt = $adot - $sdot;
        substr $$self, 0, 0, '0'x$delt;
        $sdot = $self->dotpos;
    } elsif ($adot < $sdot ) {
        $delt = $sdot - $adot;
        substr $$addend, 0, 0, '0'x$delt;
        $adot = $addend->dotpos;
    }

    # line up end of fractions
    my $sf = $self->flen;
    my $af = $addend->flen;
    if($sf<$af) {    # self fract is shorter
        $$self .= '0' x ($af-$sf);
        $sf = $self->flen;
    } elsif ($af<$sf) {
        $$addend .= '0' x ($sf-$af);
        $af = $addend->flen;
    }

    #print join "\n", 'add',
    #    'self  :'.$$self.':'.$sdot.':'.$sf,
    #    'addend:'.$$addend.':'.$adot.':'.$af;

    die "add: could not get the lengths the same:\n\t>>$$self<<\n\t>>$
+$addend<<\n"  if length($$self)!=length($$addend);
    my $l = length($$self);
    my $c = 0;
    for my $p (1 .. $l) {
        next if $sdot == ($l-$p);
        my $s = substr $$self,   -$p, 1;
        my $a = substr $$addend, -$p, 1;
        my $total = $s + $a + $c;
        substr($$self, -$p, 1) = $total % 10;
        $c = int($total/10);
    }
    substr $$self, 0, 0, $c if $c;

    #print join "\n", '', 'answer:'.$$self;
    return $self;
}

1;

package main;
# ULP: 2**-52
my $ulp = FixedPoint->new(1);
$ulp->halve for 1..52;
# make 1+2**-52
my $x = FixedPoint->new(1)->add($ulp);
print "\n";
printf "%s\n", '-'x(52+2);
printf "%-8s = %s\n%-8s = %s\n", 'ulp(1)', $$ulp, '1+ulp(1)', $$x;
printf "%s\n", '-'x(52+2);
# bring it down to the smallest normal double plus one ULP
#   = (1+2**-52) * 2**-1022 = 2**-1022 + 2**-1074
for(1..1022) {
    $x->halve;
    printf "%s\n", $$x          if 0;
}

# details on the FixedPoint:
print "\n", '-'x40, "\n";
printf "(1+ulp(1))*2**-1022 = \n%s\n", $$x;
print "\n";
printf "FixedPoint: %-16d = Digits before the fixed decimal point\n", 
+$x->ilen();
printf "FixedPoint: %-16d = Total digits the fixed decimal point\n", $
+x->flen();
my $p = 0;
$$x =~ m/[1-9]/g ;
$p = pos($$x);
$p -= $x->ilen()+1;
printf "FixedPoint: %-16d = Location of first significant digit (nth d
+igit after decimal point)\n", $p;
printf "FixedPoint: %-16d = Number of significant digits\n", $x->flen(
+) - $p + 1;

use POSIX qw(floor ceil log10 log2);
use Data::IEEE754::Tools qw(:ulp :constants :floatingpoint);
my $tiny = nextUp( POS_NORM_SMALLEST );
print "\n";
printf "Double:    %30.16e\n", $tiny;
printf "HexFloat:  %30s\n", to_hex_floatingpoint($tiny);
printf "DecFloat:  %30s\n", to_dec_floatingpoint($tiny);
print "\n";
printf "Math:       %-16d = should start at ceil(-log10(x))-th digit a
+fter decimal point\n", my $d = ceil(-log10($tiny));
printf "Math:       %-16d = total number of digits after decimal shoul
+d be the lowest power of two = ceil(-log2(ulp(x)))\n", my $e = ceil(-
+log2(ulp($tiny)));
printf "Math:       %-16d = Number of significant digits", $e - $d + 1
+;
[download]

__RESULTS__
------------------------------------------------------
ulp(1)   = 0.0000000000000002220446049250313080847263336181640625
1+ulp(1) = 1.0000000000000002220446049250313080847263336181640625
------------------------------------------------------

----------------------------------------
(1+ulp(1))*2**-1022 =
0.00000000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000000
+000000000000000000000000000000002225073858507201877155878558578948240
+788008848683704195613130031211968860399600696529790429221262885863903
+701367028190801717129607271191035512722741317515219905574004313880456
+780323337753988163917738732895924607422927011307805381339708165336129
+644744952978952121897909078385258336590185178961879988515042751478263
+607602168043622031129270045483207396484571310391222596393560832244062
+389690727689018671705454927517398658932481040173822832825124579506565
+573819103800864691161582871998970864729322144979697154670672039979199
+080916034762598038599542473984767886118009507251154376238960371621517
+172981601154460435953128432540644193864532490538913779568091580479240
+509922741385427494262054264040883983691918741817298779334027924276754
+4565229087538682506419718265533447265625

FixedPoint: 1                = Digits before the fixed decimal point
FixedPoint: 1074             = Total digits the fixed decimal point
FixedPoint: 308              = Location of first significant digit (nt
+h digit after decimal point)
FixedPoint: 767              = Number of significant digits

Double:           2.2250738585072019e-308
HexFloat:        +0x1.0000000000001p-1022
DecFloat:     +0d1.0000000000000002p-1022

Math:       308              = should start at ceil(-log10(x))-th digi
+t after decimal point
Math:       1074             = total number of digits after decimal sh
+ould be the lowest power of two = ceil(-log2(ulp(x)))
Math:       767              = Number of significant digits
[download]

So what I said poorly as "it takes n fixed-point decimal digits to exactly represent an n-bit fixed-point fractional binary number" could have been better phrased: "if the smallest power of two exactly represented in a floating-point number is -n, it takes n decimal digits after the fixed decimal point to exactly represent that same number." In the example of nextUp(1), the smallest power of two is -52, so it takes 52 decimal digits after the fixed decimal point to exactly represent it. In the example of nextUp(POS_NORM_SMALLEST) = 2**-1022 + 2**-1074, it takes 1074 digits after the fixed decimal point to exactly represent it.

Thanks for this interesting diversion.

update: removed duplicate end-code tag

Comment on Re^12: subtraction issue Select or Download Code