File : fz_sqr.adb


   1 ------------------------------------------------------------------------------
   2 ------------------------------------------------------------------------------
   3 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'.               --
   4 --                                                                          --
   5 -- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org )                      --
   6 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html     --
   7 --                                                                          --
   8 -- You do not have, nor can you ever acquire the right to use, copy or      --
   9 -- distribute this software ; Should you use this software for any purpose, --
  10 -- or copy and distribute it to anyone or in any manner, you are breaking   --
  11 -- the laws of whatever soi-disant jurisdiction, and you promise to         --
  12 -- continue doing so for the indefinite future. In any case, please         --
  13 -- always : read and understand any software ; verify any PGP signatures    --
  14 -- that you use - for any purpose.                                          --
  15 --                                                                          --
  16 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm .     --
  17 ------------------------------------------------------------------------------
  18 ------------------------------------------------------------------------------
  19 
  20 with Words;    use Words;
  21 with Word_Ops; use Word_Ops;
  22 with W_Mul;    use W_Mul;
  23 with FZ_Arith; use FZ_Arith;
  24 
  25 
  26 package body FZ_Sqr is
  27    
  28    -- Square case of Comba's multiplier. (CAUTION: UNBUFFERED)
  29    procedure FZ_Sqr_Comba(X  : in  FZ;
  30                           XX : out FZ) is
  31       
  32       -- Words in each multiplicand
  33       L : constant Word_Index := X'Length;
  34       
  35       -- Length of Product, i.e. double the length of X
  36       LP : constant Word_Index := 2 * L;
  37       
  38       -- 3-word Accumulator
  39       A2, A1, A0 : Word := 0;
  40       
  41       Lo, Hi  : Word; -- Output of WxW multiply/square
  42       
  43       -- Type for referring to a column of XX
  44       subtype ColXX is Word_Index range 0 .. LP - 1;
  45       
  46       procedure Accum is
  47          C      : WBool; -- Carry for the Accumulator addition
  48          Sum    : Word;  -- Sum for Accumulator addition
  49       begin
  50          -- Now add add double-Word Hi:Lo to accumulator A2:A1:A0:
  51          -- A0 += Lo; C := Carry
  52          Sum := A0 + Lo;
  53          C   := W_Carry(A0, Lo, Sum);
  54          A0  := Sum;
  55          -- A1 += Hi + C; C := Carry
  56          Sum := A1 + Hi + C;
  57          C   := W_Carry(A1, Hi, Sum);
  58          A1  := Sum;
  59          -- A2 += A2 + C
  60          A2  := A2 + C;
  61       end Accum;
  62       pragma Inline_Always(Accum);
  63       
  64       procedure SymmDigits(N : in ColXX; From : in ColXX; To : in ColXX) is
  65       begin
  66          for j in From .. To loop
  67             -- Hi:Lo := j-th * (N - j)-th Word, and then,
  68             Mul_Word(X(X'First + j),
  69                      X(X'First - j + N),
  70                      Lo, Hi);
  71             Accum;
  72             Accum; -- Accum += 2 * (Hi:Lo)
  73          end loop;
  74       end SymmDigits;
  75       pragma Inline_Always(SymmDigits);
  76       
  77       procedure SqrDigit(N : in ColXX) is
  78       begin
  79          Sqr_Word(X(X'First + N), Lo, Hi); -- Hi:Lo := Square(N-th digit)
  80          Accum;                            -- Accum += Hi:Lo
  81       end SqrDigit;
  82       pragma Inline_Always(SqrDigit);
  83       
  84       procedure HaveDigit(N : in ColXX) is
  85       begin
  86          -- Save the Nth (indexed from zero) word of XX:
  87          XX(XX'First + N) := A0;
  88          -- Right-Shift the Accumulator by one Word width:
  89          A0 := A1;
  90          A1 := A2;
  91          A2 := 0;
  92       end HaveDigit;
  93       pragma Inline_Always(HaveDigit);
  94       
  95       -- Compute the Nth (indexed from zero) column of the Product
  96       procedure Col(N : in ColXX; U : in ColXX; V : in ColXX) is
  97       begin
  98          -- The branch pattern depends only on FZ wordness
  99          if N mod 2 = 0 then         -- If we're doing an EVEN-numbered column:
 100             SymmDigits(N, U, V - 1); --   Stop before V: it is the square case
 101             SqrDigit(V);             --   Compute the square case at V
 102          else                        -- If we're doing an ODD-numbered column:
 103             SymmDigits(N, U, V);     --   All of them are the symmetric case
 104          end if;                     -- After either even or odd column:
 105          HaveDigit(N);               --   We have the N-th digit of the result.
 106       end Col;
 107       pragma Inline_Always(Col);
 108       
 109    begin
 110       -- First col always exists:
 111       SqrDigit(ColXX'First);
 112       HaveDigit(ColXX'First);
 113       
 114       -- Compute the lower half of the Product:
 115       for i in 1 .. L - 1 loop
 116          Col(i, 0, i / 2);
 117       end loop;
 118       
 119       -- Compute the upper half (sans last Word) of the Product:
 120       for i in L .. LP - 2 loop
 121          Col(i, i - L + 1, i / 2);
 122       end loop;
 123       
 124       -- The very last Word of the Product:
 125       XX(XX'Last) := A0; -- Equiv. of XX(XX'First + 2*L - 1) := A0;
 126    end FZ_Sqr_Comba;
 127    
 128    
 129    -- Karatsuba's Squaring. (CAUTION: UNBUFFERED)
 130    procedure Sqr_Karatsuba(X  : in  FZ;
 131                            XX : out FZ) is
 132       
 133       -- L is the wordness of X. Guaranteed to be a power of two.
 134       L : constant Word_Count := X'Length;
 135       
 136       -- An 'LSeg' is the same length as X.
 137       subtype LSeg is FZ(1 .. L);
 138       
 139       -- K is HALF of the length of X.
 140       K : constant Word_Index := L / 2;
 141       
 142       -- A 'KSeg' is the same length as HALF of X.
 143       subtype KSeg is FZ(1 .. K);
 144       
 145       -- The three L-sized variables of the product equation, i.e.:
 146       -- XX = LL + 2^(K*Bitness)(LL + HH - DD) + 2^(2*K*Bitness)HH
 147       LL, DD, HH : LSeg;
 148       
 149       -- K-sized term Dx, Dx := abs(XLo - XHi) to then get DD := Dx^2
 150       Dx         : KSeg;
 151       
 152       -- IGNORED Subtraction borrow, sign of (XL - XH)
 153       Cx         : WBool; -- given as DD := Dx^2, DD is always positive
 154       pragma Unreferenced(Cx);
 155       
 156       -- Bottom and Top K-sized halves of X.
 157       XLo        : KSeg  renames    X(    X'First       ..    X'Last - K );
 158       XHi        : KSeg  renames    X(    X'First + K   ..    X'Last     );
 159       
 160       -- L-sized middle segment of the product XX (+/- K from the midpoint).
 161       XXMid      : LSeg  renames   XX(   XX'First + K   ..   XX'Last - K );
 162       
 163       -- Bottom and Top L-sized halves of the product XX.
 164       XXLo       : LSeg  renames   XX(   XX'First       ..   XX'Last - L );
 165       XXHi       : LSeg  renames   XX(   XX'First + L   ..   XX'Last     );
 166       
 167       -- Topmost K-sized quarter segment of the product XX, or 'tail'
 168       XXHiHi     : KSeg  renames XXHi( XXHi'First + K   .. XXHi'Last     );
 169       
 170       -- Carry from addition of HH and LL terms; borrow from subtraction of DD
 171       C          : WBool;
 172       
 173       -- Barring a cosmic ray, 0 <= TC <= 2
 174       subtype TailCarry is Word range 0 .. 2;
 175       
 176       -- Tail-Carry accumulator, for the final ripple-out into XXHiHi
 177       TC         : TailCarry := 0;
 178       
 179       -- Barring a cosmic ray, the tail ripple will NOT overflow.
 180       FinalCarry : WZeroOrDie := 0;
 181       
 182    begin
 183       
 184       -- Recurse: LL := XLo^2
 185       FZ_Square_Unbuffered(XLo, LL);
 186       
 187       -- Recurse: HH := XHi^2
 188       FZ_Square_Unbuffered(XHi, HH);
 189       
 190       -- Dx := |XLo - XHi| , and we don't care about the borrow
 191       FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx);
 192       
 193       -- Recurse: DD := Dx^2 (always positive, which is why we don't need Cx)
 194       FZ_Square_Unbuffered(Dx, DD);
 195       
 196       -- XX := LL + 2^(2 * K * Bitness) * HH
 197       XXLo := LL;
 198       XXHi := HH;
 199       
 200       -- XX += 2^(K * Bitness) * HH, carry goes into Tail Carry accumulator.
 201       FZ_Add_D(X => XXMid, Y => HH, Overflow => TC);
 202       
 203       -- XX += 2^(K * Bitness) * LL, ...
 204       FZ_Add_D(X => XXMid, Y => LL, Overflow => C);
 205       
 206       -- ... add the carry from LL addition into the Tail Carry accumulator.
 207       TC := TC + C;
 208       
 209       -- XX -= 2^(K * Bitness) * DD
 210       FZ_Sub_D(X         => XXMid, -- Because DD is always positive,
 211                Y         => DD,    -- this term is always subtractive.
 212                Underflow => C); -- C is the borrow from DD term subtraction
 213       
 214       -- Get final Tail Carry for the ripple by subtracting DD term's borrow
 215       TC := TC - C;
 216       
 217       -- Ripple the Tail Carry into the tail.
 218       FZ_Add_D_W(X => XXHiHi, W => TC, Overflow => FinalCarry);
 219       
 220    end Sqr_Karatsuba;
 221    -- CAUTION: Inlining prohibited for Sqr_Karatsuba !
 222    
 223    
 224    -- Squaring. (CAUTION: UNBUFFERED)
 225    procedure FZ_Square_Unbuffered(X     : in  FZ;
 226                                   XX    : out FZ) is
 227    begin
 228       
 229       if X'Length <= Sqr_Karatsuba_Thresh then
 230          
 231          -- Base case:
 232          FZ_Sqr_Comba(X, XX);
 233          
 234       else
 235          
 236          -- Recursive case:
 237          Sqr_Karatsuba(X, XX);
 238          
 239       end if;
 240       
 241    end FZ_Square_Unbuffered;
 242    
 243    
 244    -- Squaring. Preserves the inputs.
 245    procedure FZ_Square_Buffered(X     : in  FZ;
 246                                 XX_Lo : out FZ;
 247                                 XX_Hi : out FZ) is
 248       
 249       -- Product buffer.
 250       P : FZ(1 .. 2 * X'Length);
 251       
 252    begin
 253       
 254       FZ_Square_Unbuffered(X, P);
 255       
 256       XX_Lo := P(P'First            .. P'First + X'Length - 1);
 257       XX_Hi := P(P'First + X'Length .. P'Last);
 258       
 259    end FZ_Square_Buffered;
 260    
 261 end FZ_Sqr;