Main Page   Data Structures   File List   Data Fields   Globals  

addition_scs.c

Go to the documentation of this file.
00001 /** Functions for SCS addition and subtraction 
00002 
00003 @file addition_scs.c
00004 
00005 @author Defour David David.Defour@ens-lyon.fr
00006 @author Florent de Dinechin Florent.de.Dinechin@ens-lyon.fr 
00007  
00008 This file is part of the SCS library.
00009 
00010 Many functions come in two versions, selected by a @#if.
00011 
00012 The reason is that we designed scslib library for internal use with
00013 SCS_NB_WORDS==8, so we provide a version with manual optimizations for
00014 this case.
00015 
00016 These optimisations include loop unrolling, and sometimes replacing
00017 temporary arrays of size 8 with 8 variables, which is more efficient
00018 on all modern processors with many (renaming) registers.
00019 
00020 Using gcc3.2 with the most aggressive optimization options for this
00021 purpose (-funroll-loops -foptimize-register-move -frerun-loop-opt
00022 -frerun-cse-after-loop) is still much slower. At some point in the
00023 future, gcc should catch up with unrolling since our loops are so
00024 simple, however the replacement of small arrays with variables is
00025 not something we are aware of in the literature about compiler
00026 optimization.
00027 */
00028 
00029 /*
00030 Copyright (C) 2002  David Defour and Florent de Dinechin
00031 
00032     This library is free software; you can redistribute it and/or
00033     modify it under the terms of the GNU Lesser General Public
00034     License as published by the Free Software Foundation; either
00035     version 2.1 of the License, or (at your option) any later version.
00036 
00037     This library is distributed in the hope that it will be useful,
00038     but WITHOUT ANY WARRANTY; without even the implied warranty of
00039     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00040     Lesser General Public License for more details.
00041 
00042     You should have received a copy of the GNU Lesser General Public
00043     License along with this library; if not, write to the Free Software
00044     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00045 
00046  */
00047 
00048 #include "scs.h"
00049 #include "scs_private.h"
00050 
00051 /**
00052 This function copies a result into another. There is an unrolled
00053 version for the case SCS_NB_WORDS==8.
00054 */
00055 void inline scs_set(scs_ptr result, scs_ptr x){
00056  /* unsigned int i;*/
00057   
00058 #if (SCS_NB_WORDS==8)
00059     R_HW[0] = X_HW[0]; R_HW[1] = X_HW[1]; 
00060     R_HW[2] = X_HW[2]; R_HW[3] = X_HW[3]; 
00061     R_HW[4] = X_HW[4]; R_HW[5] = X_HW[5]; 
00062     R_HW[6] = X_HW[6]; R_HW[7] = X_HW[7]; 
00063 #else
00064   for(i=0; i<SCS_NB_WORDS; i++)
00065     R_HW[i] = X_HW[i];
00066 #endif
00067   R_EXP = X_EXP;  
00068   R_IND = X_IND; 
00069   R_SGN = X_SGN;
00070 }
00071 
00072 
00073 /** renormalize a SCS number.  
00074 
00075 This function removes the carry from each digit, and also shifts the
00076 digits in case of a cancellation (so that if result != 0 then its
00077 first digit is non-zero)
00078  
00079  @warning THIS FUNCTION HAS NEVER BEEN PROPERLY TESTED and is
00080  currently unused in the library: instead, specific renormalisation
00081  steps are fused within the code of the operations which require it.
00082  */
00083 
00084 void inline scs_renorm(scs_ptr result){
00085   unsigned int c;
00086   int i, j, k;
00087 
00088   /*
00089    * Carry propagate
00090    */
00091   for(i=SCS_NB_WORDS-1; i>0; i--){
00092     c = R_HW[i] & ~SCS_MASK_RADIX;
00093     R_HW[i-1] += c >> SCS_NB_BITS;
00094     R_HW[i]    = R_HW[i] &  SCS_MASK_RADIX; 
00095   }
00096 
00097   if (R_HW[0] >= SCS_RADIX){
00098     /*  Carry out! Need to shift digits  */
00099     c = R_HW[0] & ~SCS_MASK_RADIX;
00100     c = c >> SCS_NB_BITS;
00101     for(i=SCS_NB_WORDS-1; i>1; i--)
00102       R_HW[i] = R_HW[i-1];
00103 
00104     R_HW[1] = R_HW[0] & SCS_MASK_RADIX;
00105     R_HW[0] = c;
00106     R_IND  += 1;
00107 
00108   }else{
00109     /* Was there a cancellation ? */
00110     if (R_HW[0] == 0){
00111 
00112       k = 1;
00113       while ((R_HW[k] == 0) && (k <= SCS_NB_WORDS))
00114         k++;
00115       
00116       R_IND -= k;
00117 
00118       for(j=k, i=0; j<SCS_NB_WORDS; j++, i++)
00119         R_HW[i] = R_HW[j];
00120 
00121       for(        ; i<SCS_NB_WORDS; i++)
00122         R_HW[i] = 0;     
00123 
00124     }
00125   }
00126 }
00127 
00128 
00129 
00130 /** Renormalization without cancellation check.
00131 
00132  This renormalization step is especially designed for the addition of
00133  several numbers with the same sign.  In this case, you know that there
00134  has been no cancellation, which allows simpler renormalisation.
00135 */
00136 
00137 void inline scs_renorm_no_cancel_check(scs_ptr result){ 
00138   unsigned int carry, c0;
00139  /* int i;*/
00140 
00141   /* Carry propagate  */      
00142 #if (SCS_NB_WORDS==8)
00143   carry = R_HW[7] >> SCS_NB_BITS;
00144   R_HW[6] += carry;  R_HW[7] = R_HW[7] & SCS_MASK_RADIX;
00145   carry = R_HW[6] >> SCS_NB_BITS;
00146   R_HW[5] += carry;  R_HW[6] = R_HW[6] & SCS_MASK_RADIX;
00147   carry = R_HW[5] >> SCS_NB_BITS;
00148   R_HW[4] += carry;  R_HW[5] = R_HW[5] & SCS_MASK_RADIX;
00149   carry = R_HW[4] >> SCS_NB_BITS;
00150   R_HW[3] += carry;  R_HW[4] = R_HW[4] & SCS_MASK_RADIX;
00151   carry = R_HW[3] >> SCS_NB_BITS;
00152   R_HW[2] += carry;  R_HW[3] = R_HW[3] & SCS_MASK_RADIX;
00153   carry = R_HW[2] >> SCS_NB_BITS;
00154   R_HW[1] += carry;  R_HW[2] = R_HW[2] & SCS_MASK_RADIX;
00155   carry = R_HW[1] >> SCS_NB_BITS;
00156   R_HW[0] += carry;  R_HW[1] = R_HW[1] & SCS_MASK_RADIX;
00157 #else
00158   for(i=(SCS_NB_WORDS-1);i>0;i--){
00159       carry      = R_HW[i] >> SCS_NB_BITS;
00160       R_HW[i-1] += carry;
00161       R_HW[i]    = R_HW[i] & SCS_MASK_RADIX;
00162   }
00163 #endif
00164     
00165   if (R_HW[0] >= SCS_RADIX){
00166     /* Carry out ! Need to shift digits */
00167     c0 = R_HW[0] >> SCS_NB_BITS;
00168     
00169 #if (SCS_NB_WORDS==8)
00170     R_HW[7] = R_HW[6]; R_HW[6] = R_HW[5]; 
00171     R_HW[5] = R_HW[4]; R_HW[4] = R_HW[3]; 
00172     R_HW[3] = R_HW[2]; R_HW[2] = R_HW[1]; 
00173 #else
00174     for(i=(SCS_NB_WORDS-1); i>1; i--)
00175       R_HW[i] =  R_HW[i-1];
00176 #endif    
00177     R_HW[1] =  R_HW[0] & SCS_MASK_RADIX;
00178     R_HW[0] =  c0;
00179     R_IND  += 1;
00180   }
00181   return;
00182 }
00183 
00184 
00185 
00186 
00187 /* addition without renormalisation.
00188 
00189 
00190   Add two scs number x and y, the result is put into "result".
00191   Assumes x.sign == y.sign    x.index > y.index.  
00192 
00193    The result is not normalized.
00194  */
00195 
00196 void do_add_no_renorm(scs_ptr result, scs_ptr x, scs_ptr y){
00197   unsigned int RES[SCS_NB_WORDS];
00198   unsigned int i, j, Diff;
00199   
00200   if (x->exception.i[HI_ENDIAN]==0){scs_set(result, y); return; }
00201   if (y->exception.i[HI_ENDIAN]==0){scs_set(result, x); return; }  
00202   
00203   for (i=0; i<SCS_NB_WORDS; i++)
00204     RES[i] = X_HW[i];
00205 
00206   Diff  = X_IND - Y_IND;
00207   R_EXP = X_EXP + Y_EXP - 1; 
00208   R_IND = X_IND;
00209   R_SGN = X_SGN;
00210 
00211   for (i=Diff, j=0; i<SCS_NB_WORDS; i++, j++)
00212     RES[i] += Y_HW[j];
00213 
00214   for (i=0; i<SCS_NB_WORDS; i++)
00215     R_HW[i] = RES[i];
00216 
00217   return;
00218 }
00219 
00220 
00221 /*
00222  * Addition without renormalization. Assumes that x.sign == y.sign.
00223  */
00224 void inline scs_add_no_renorm(scs_ptr result, scs_ptr x, scs_ptr y)
00225 {
00226   if (X_IND >= Y_IND)
00227     do_add_no_renorm(result,x,y);
00228   else
00229     do_add_no_renorm(result,y,x);
00230   return;
00231 }
00232 
00233 
00234 
00235 
00236 
00237 
00238 
00239 
00240 
00241 
00242 
00243 
00244 
00245 
00246 
00247 
00248 /* The function that does the work in case of an addition
00249 
00250  do_add is the function that does the addition of two SCS numbers,
00251    assuming that x.sign == y.sign, X_IND > Y_IND, x and y both
00252    non-zero. 
00253  */
00254 
00255 void inline do_add(scs_ptr result, scs_ptr x, scs_ptr y)
00256 {
00257 #if (SCS_NB_WORDS==8)  /* in this case we unroll all the loops */
00258   int carry, Diff;
00259   int r0,r1,r2,r3,r4,r5,r6,r7;
00260   
00261   Diff  = X_IND - Y_IND;
00262   R_EXP = X_EXP + Y_EXP - 1; 
00263   R_IND = X_IND;
00264   R_SGN = X_SGN;
00265 
00266   switch (Diff){
00267   case 0:
00268     r0 = X_HW[0] + Y_HW[0]; r1 = X_HW[1] + Y_HW[1]; 
00269     r2 = X_HW[2] + Y_HW[2]; r3 = X_HW[3] + Y_HW[3];
00270     r4 = X_HW[4] + Y_HW[4]; r5 = X_HW[5] + Y_HW[5];
00271     r6 = X_HW[6] + Y_HW[6]; r7 = X_HW[7] + Y_HW[7]; break;
00272   case 1:
00273     r0 = X_HW[0];           r1 = X_HW[1] + Y_HW[0];
00274     r2 = X_HW[2] + Y_HW[1]; r3 = X_HW[3] + Y_HW[2];
00275     r4 = X_HW[4] + Y_HW[3]; r5 = X_HW[5] + Y_HW[4];
00276     r6 = X_HW[6] + Y_HW[5]; r7 = X_HW[7] + Y_HW[6]; break;
00277   case 2:
00278     r0 = X_HW[0];           r1 = X_HW[1];      
00279     r2 = X_HW[2] + Y_HW[0]; r3 = X_HW[3] + Y_HW[1];
00280     r4 = X_HW[4] + Y_HW[2]; r5 = X_HW[5] + Y_HW[3];
00281     r6 = X_HW[6] + Y_HW[4]; r7 = X_HW[7] + Y_HW[5]; break;
00282   case 3:
00283     r0 = X_HW[0];           r1 = X_HW[1];     
00284     r2 = X_HW[2];           r3 = X_HW[3] + Y_HW[0];
00285     r4 = X_HW[4] + Y_HW[1]; r5 = X_HW[5] + Y_HW[2];
00286     r6 = X_HW[6] + Y_HW[3]; r7 = X_HW[7] + Y_HW[4]; break;
00287   case 4:
00288     r0 = X_HW[0];           r1 = X_HW[1];
00289     r2 = X_HW[2];           r3 = X_HW[3];
00290     r4 = X_HW[4] + Y_HW[0]; r5 = X_HW[5] + Y_HW[1]; 
00291     r6 = X_HW[6] + Y_HW[2]; r7 = X_HW[7] + Y_HW[3]; break;
00292   case 5:
00293     r0 = X_HW[0];           r1 = X_HW[1];
00294     r2 = X_HW[2];           r3 = X_HW[3];
00295     r4 = X_HW[4];           r5 = X_HW[5] + Y_HW[0];
00296     r6 = X_HW[6] + Y_HW[1]; r7 = X_HW[7] + Y_HW[2]; break;
00297   case 6:
00298     r0 = X_HW[0];           r1 = X_HW[1];
00299     r2 = X_HW[2];           r3 = X_HW[3];
00300     r4 = X_HW[4];           r5 = X_HW[5];
00301     r6 = X_HW[6] + Y_HW[0]; r7 = X_HW[7] + Y_HW[1]; break;
00302   case 7:
00303     r0 = X_HW[0];           r1 = X_HW[1];
00304     r2 = X_HW[2];           r3 = X_HW[3];
00305     r4 = X_HW[4];           r5 = X_HW[5];
00306     r6 = X_HW[6];           r7 = X_HW[7] + Y_HW[0]; break;
00307   default:
00308     /* Diff >= 8*/
00309     R_HW[0] = X_HW[0]; R_HW[1] = X_HW[1];
00310     R_HW[2] = X_HW[2]; R_HW[3] = X_HW[3];
00311     R_HW[4] = X_HW[4]; R_HW[5] = X_HW[5];
00312     R_HW[6] = X_HW[6]; R_HW[7] = X_HW[7];    return;
00313   }
00314 
00315 
00316   /* Carry propagation */
00317   
00318   carry = r7 >> SCS_NB_BITS; r6 += carry;  r7 = r7 & SCS_MASK_RADIX;
00319   carry = r6 >> SCS_NB_BITS; r5 += carry;  r6 = r6 & SCS_MASK_RADIX;
00320   carry = r5 >> SCS_NB_BITS; r4 += carry;  r5 = r5 & SCS_MASK_RADIX;
00321   carry = r4 >> SCS_NB_BITS; r3 += carry;  r4 = r4 & SCS_MASK_RADIX;
00322   carry = r3 >> SCS_NB_BITS; r2 += carry;  r3 = r3 & SCS_MASK_RADIX;
00323   carry = r2 >> SCS_NB_BITS; r1 += carry;  r2 = r2 & SCS_MASK_RADIX;
00324   carry = r1 >> SCS_NB_BITS; r0 += carry;  r1 = r1 & SCS_MASK_RADIX;
00325   carry = r0 >> SCS_NB_BITS;
00326    
00327   if (carry){
00328     R_HW[7] = r6; R_HW[6] = r5;  R_HW[5] = r4; R_HW[4] = r3; 
00329     R_HW[3] = r2; R_HW[2] = r1;  R_HW[1] = r0 & SCS_MASK_RADIX;
00330     R_HW[0] = 1 ;  
00331     R_IND  += 1;
00332   }
00333   else {
00334     R_HW[0] = r0; R_HW[1] = r1; R_HW[2] = r2; R_HW[3] = r3; 
00335     R_HW[4] = r4; R_HW[5] = r5; R_HW[6] = r6; R_HW[7] = r7; 
00336   }
00337   return;
00338 
00339 #else /* #if SCS_NB_WORDS==8*/
00340 
00341 /* This generic version is still written in such a way that
00342  it is unrollable at compile time
00343 */
00344   int i,j, s, carry, Diff;
00345   int res[SCS_NB_WORDS];
00346   
00347   Diff  = X_IND - Y_IND;
00348   R_EXP = X_EXP + Y_EXP - 1; 
00349   R_IND = X_IND;
00350   R_SGN = X_SGN;
00351 
00352   /* The easy case */
00353   if(Diff >= SCS_NB_WORDS){  
00354     scs_set(result, x); return;
00355   }
00356 
00357   /* 0 <= Diff <= (SCS_NB_WORDS-1) */
00358 
00359   carry=0;
00360   for(i=(SCS_NB_WORDS-1), j=((SCS_NB_WORDS-1)-Diff); i>=0 ; i--,j--){
00361     if (j>=0)
00362       s = X_HW[i] + Y_HW[j] + carry;
00363     else
00364       s = X_HW[i] + carry;
00365     carry = s >> SCS_NB_BITS;
00366     res[i] = s & SCS_MASK_RADIX;
00367   }
00368 
00369   if (carry){
00370     /* Carry out ! Need to shift digits */    
00371     for(i=(SCS_NB_WORDS-1); i>=1; i--)
00372       R_HW[i] =  res[i-1];
00373     
00374     R_HW[0] = 1 ;  
00375     R_IND  += 1;
00376   }
00377   else {
00378     for(i=0; i<SCS_NB_WORDS; i++)
00379       R_HW[i] =  res[i];
00380   }
00381 
00382   return;
00383 #endif  /* #if SCS_NB_WORDS==8*/
00384 
00385 } /* do_add*/
00386 
00387 
00388 
00389 
00390 
00391 
00392 
00393 
00394 /*
00395  * Compare only the mantissa of x and y without the index or sign field
00396  * Return : 
00397  * - 1    if x > y
00398  * - (-1) if x < y
00399  * - (0)  if x = y
00400  */
00401 
00402 int inline scs_cmp_mant(scs_ptr x, scs_ptr y){
00403   int i;
00404   /* Tried to unroll this loop, it doesn't work  */
00405   for(i=0; i< SCS_NB_WORDS; i++){
00406     if (X_HW[i] == Y_HW[i]) continue;
00407     else if (X_HW[i] > Y_HW[i]) return 1;
00408     else return -1;}
00409   return 0;
00410 }
00411 
00412 
00413 
00414 /*/////////////////////////////////////////////////////////////////
00415 /////////////////////// SUBTRACTION //////////////////////////////
00416 //////////////////////////////////////////////////////////////////
00417 // This procedure assumes :
00418 // -    X_IND >= Y_IND
00419 // -   X_SIGN != Y_SIGN
00420 //    neither x or y is zero
00421 //  and result = x - y  
00422 */
00423 
00424 
00425 void inline do_sub(scs_ptr result, scs_ptr x, scs_ptr y){
00426   int s, carry;
00427   int Diff, i, j, cp;
00428   int res[SCS_NB_WORDS];
00429 
00430   R_EXP = X_EXP + Y_EXP - 1;
00431   Diff  = X_IND - Y_IND;
00432   R_IND = X_IND;
00433   
00434     /* The easy case */
00435   if(Diff >= SCS_NB_WORDS){  
00436     scs_set(result, x); return;
00437   }
00438 
00439   else { 
00440     /* 0 <= Diff <= (SCS_NB_WORDS-1) */
00441     carry = 0;
00442     if(Diff==0) { 
00443       cp = scs_cmp_mant(x, y);
00444       
00445       if (cp == 0) {
00446         /* Yet another easy case: result = 0 */
00447         scs_zero(result);
00448         return;
00449       }
00450       else { /* cp <> 0 */
00451         if (cp > 0){
00452           /* x > y */
00453 
00454           R_SGN = X_SGN; 
00455           for(i=(SCS_NB_WORDS-1); i>=0 ;i--){
00456             s = X_HW[i] - Y_HW[i] - carry;
00457             carry = (s&SCS_RADIX)>>SCS_NB_BITS;
00458             res[i] = (s&SCS_RADIX) + s;
00459           }       
00460         }
00461         else { /* cp < 0  */
00462           /* x < y (change of sign) */
00463 
00464           R_SGN = - X_SGN;
00465           for(i=(SCS_NB_WORDS-1); i>=0 ;i--){
00466             s = - X_HW[i] + Y_HW[i] - carry;
00467             carry = (s&SCS_RADIX)>>SCS_NB_BITS;
00468             res[i] = (s&SCS_RADIX) + s;
00469           }
00470         }
00471       }
00472     }
00473     else {
00474       /* 1<=Diff<(SCS_NB_WORDS-1) Digits of x and y overlap but the
00475        * sign will be that of x */
00476       
00477       R_SGN = X_SGN; 
00478       for(i=(SCS_NB_WORDS-1), j=((SCS_NB_WORDS-1)-Diff); i>=0 ;i--,j--){
00479         if(j>=0)
00480           s = X_HW[i] - Y_HW[j] - carry;
00481         else
00482           s = X_HW[i] - carry;
00483         carry = (s&SCS_RADIX)>>SCS_NB_BITS;
00484         res[i] = (s&SCS_RADIX) + s;
00485       }
00486     }
00487     /* check for cancellations */
00488     i=0;
00489     while ((res[i]==0) && (i < SCS_NB_WORDS))  i++;
00490 
00491     if(i>0) { /* cancellation, shift result*/
00492       R_IND -= i;
00493       for(j=0; i<SCS_NB_WORDS; i++,j++)    R_HW[j] = res[i];
00494       for(   ; j<SCS_NB_WORDS; j++)        R_HW[j] = 0;
00495     }
00496     else {
00497       for(i=0; i<SCS_NB_WORDS; i++)
00498         R_HW[i] =  res[i];
00499     }
00500   }
00501   return;
00502 }
00503 
00504 
00505 
00506 
00507 
00508  
00509 
00510 
00511 /** SCS addition (result is a normalised SCS number).
00512 
00513  */
00514 void inline scs_add(scs_ptr result, scs_ptr x, scs_ptr y)
00515 {
00516     
00517   if (x->exception.i[HI_ENDIAN]==0){scs_set(result, y); return; }
00518   if (y->exception.i[HI_ENDIAN]==0){scs_set(result, x); return; }  
00519 
00520   if (X_SGN == Y_SGN){
00521     if(X_IND >= Y_IND)
00522       do_add(result,x,y);
00523     else
00524       do_add(result,y,x);
00525   }else {  
00526     if(X_IND>=Y_IND){
00527       do_sub(result,x,y);
00528     }else { 
00529       do_sub(result,y,x); 
00530     } 
00531   } return; 
00532 }
00533 
00534 /** SCS subtraction (result is a normalised SCS number).
00535 
00536  The arguments x, y and result may point to the same memory
00537  location. 
00538  */
00539 void inline scs_sub(scs_ptr result, scs_ptr x, scs_ptr y)
00540 {
00541   if (x->exception.i[HI_ENDIAN]==0)
00542     { scs_set(result, y); R_SGN = -R_SGN; return; }
00543   if (y->exception.i[HI_ENDIAN]==0)
00544     { scs_set(result, x); return; }
00545 
00546   if (X_SGN == Y_SGN) {
00547     /* Same sign, so it's a sub   */
00548     if(X_IND>=Y_IND)
00549       do_sub(result,x,y);
00550     else{
00551       do_sub(result,y,x);
00552       R_SGN = -R_SGN;
00553     }
00554   }else {
00555     if(X_IND>=Y_IND)
00556       do_add(result,x,y);
00557     else{
00558       do_add(result,y,x);
00559       R_SGN = -R_SGN; 
00560     }
00561   }
00562   return;
00563 }
00564 
00565 

Generated on Tue Jun 17 10:15:51 2003 for SCSLib by doxygen1.2.15