00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "scs.h"
00030 #include "scs_private.h"
00031
00032 #if 0
00033 void pr(char* s,double d) {
00034 scs_db_number x;
00035 x.d=d;
00036 printf(s);printf(" ");
00037 printf("%8x%8x . 2^%d (%8f %8x %8x) \n",
00038 (x.i[HI_ENDIAN]&0x000FFFFF)+0x00100000,
00039 x.i[LO_ENDIAN],
00040 (x.i[HI_ENDIAN]>>20)-1023,
00041 x.d,
00042 x.i[HI_ENDIAN],
00043 x.i[LO_ENDIAN]);
00044 }
00045 #endif
00046
00047
00048
00049
00050
00051
00052
00053
00054 #ifdef SCS_USE_FLT_MULT
00055
00056
00057
00058
00059 #define SCS_CARRY_PROPAGATE(r1,r0,tmp) \
00060 {tmp=((r1-SCS_FLT_TRUNC_CST)+SCS_FLT_SHIFT_CST)-SCS_FLT_SHIFT_CST; r1-= tmp; r0+=tmp*SCS_RADIX_MONE_DOUBLE;}
00061
00062 typedef double SCS_CONVERSION_MUL;
00063
00064 #else
00065
00066
00067 #define SCS_CARRY_PROPAGATE(r1,r0,tmp) \
00068 {tmp = r1>>SCS_NB_BITS; r0 += tmp; r1 -= (tmp<<SCS_NB_BITS);}
00069 typedef unsigned long long int SCS_CONVERSION_MUL;
00070 #endif
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089 #if (SCS_NB_WORDS==4)
00090
00091 void scs_mul(scs_ptr result, scs_ptr x, scs_ptr y){
00092 SCS_CONVERSION_MUL val, tmp;
00093 SCS_CONVERSION_MUL r0,r1,r2,r3,r4;
00094 SCS_CONVERSION_MUL x0,x1,x2,x3;
00095 int y0,y1,y2,y3;
00096
00097 R_EXP = X_EXP * Y_EXP;
00098 R_SGN = X_SGN * Y_SGN;
00099 R_IND = X_IND + Y_IND;
00100
00101
00102 x3=X_HW[3]; y3=Y_HW[3]; x2=X_HW[2]; y2=Y_HW[2];
00103 x1=X_HW[1]; y1=Y_HW[1]; x0=X_HW[0]; y0=Y_HW[0];
00104
00105 r4 = x3*y1 + x2*y2 + x1*y3;
00106 r3 = x3*y0 + x2*y1 + x1*y2 + x0*y3;
00107 r2 = x2*y0 + x1*y1 + x0*y2;
00108 r1 = x1*y0 + x0*y1 ;
00109 r0 = x0*y0;
00110
00111 val= 0;
00112
00113 SCS_CARRY_PROPAGATE(r4,r3,tmp)
00114 SCS_CARRY_PROPAGATE(r3,r2,tmp)
00115 SCS_CARRY_PROPAGATE(r2,r1,tmp)
00116 SCS_CARRY_PROPAGATE(r1,r0,tmp)
00117 SCS_CARRY_PROPAGATE(r0,val,tmp)
00118
00119 if(val != 0){
00120
00121 R_HW[0] = val; R_HW[1] = r0; R_HW[2] = r1; R_HW[3] = r2;
00122 R_IND += 1;
00123 }
00124 else {
00125 R_HW[0] = r0; R_HW[1] = r1; R_HW[2] = r2; R_HW[3] = r3;
00126 }
00127
00128 }
00129
00130
00131 void scs_square(scs_ptr result, scs_ptr x){
00132 SCS_CONVERSION_MUL r0,r1,r2,r3,r4;
00133 SCS_CONVERSION_MUL x0,x1,x2,x3;
00134 SCS_CONVERSION_MUL val, tmp;
00135
00136
00137 R_EXP = X_EXP * X_EXP;
00138 R_IND = X_IND + X_IND;
00139 R_SGN = 1;
00140
00141
00142
00143
00144 x3=X_HW[3]; x2=X_HW[2]; x1=X_HW[1]; x0=X_HW[0];
00145
00146 r0 = x0*x0;
00147 r1 = (x0*x1)* 2 ;
00148 r2 = x1*x1 + (x0*x2*2);
00149 r3 = (x1*x2 + x0*x3)* 2;
00150 r4 = x2*x2 + (x1*x3)* 2;
00151
00152 val= 0;
00153
00154 SCS_CARRY_PROPAGATE(r4,r3,tmp)
00155 SCS_CARRY_PROPAGATE(r3,r2,tmp)
00156 SCS_CARRY_PROPAGATE(r2,r1,tmp)
00157 SCS_CARRY_PROPAGATE(r1,r0,tmp)
00158 SCS_CARRY_PROPAGATE(r0,val,tmp)
00159
00160 if(val != 0){
00161
00162 R_HW[0] = val; R_HW[1] = r0; R_HW[2] = r1; R_HW[3] = r2;
00163 R_IND += 1;
00164 }
00165 else {
00166 R_HW[0] = r0; R_HW[1] = r1; R_HW[2] = r2; R_HW[3] = r3;
00167 }
00168
00169 }
00170
00171
00172
00173
00174
00175
00176
00177
00178 #elif (SCS_NB_WORDS==8)
00179
00180 void scs_mul(scs_ptr result, scs_ptr x, scs_ptr y){
00181 SCS_CONVERSION_MUL val, tmp;
00182 SCS_CONVERSION_MUL r0,r1,r2,r3,r4,r5,r6,r7,r8;
00183 SCS_CONVERSION_MUL x0,x1,x2,x3,x4,x5,x6,x7;
00184 int y0,y1,y2,y3,y4,y5,y6,y7;
00185
00186 R_EXP = X_EXP * Y_EXP;
00187 R_SGN = X_SGN * Y_SGN;
00188 R_IND = X_IND + Y_IND;
00189
00190
00191 x7=X_HW[7]; y7=Y_HW[7]; x6=X_HW[6]; y6=Y_HW[6];
00192 x5=X_HW[5]; y5=Y_HW[5]; x4=X_HW[4]; y4=Y_HW[4];
00193 x3=X_HW[3]; y3=Y_HW[3]; x2=X_HW[2]; y2=Y_HW[2];
00194 x1=X_HW[1]; y1=Y_HW[1]; x0=X_HW[0]; y0=Y_HW[0];
00195
00196 r8 = x7*y1 + x6*y2 + x5*y3 + x4*y4 + x3*y5 + x2*y6 + x1*y7;
00197 r7 = x7*y0 + x6*y1 + x5*y2 + x4*y3 + x3*y4 + x2*y5 + x1*y6 + x0*y7;
00198 r6 = x6*y0 + x5*y1 + x4*y2 + x3*y3 + x2*y4 + x1*y5 + x0*y6;
00199 r5 = x5*y0 + x4*y1 + x3*y2 + x2*y3 + x1*y4 + x0*y5;
00200 r4 = x4*y0 + x3*y1 + x2*y2 + x1*y3 + x0*y4 ;
00201 r3 = x3*y0 + x2*y1 + x1*y2 + x0*y3;
00202 r2 = x2*y0 + x1*y1 + x0*y2;
00203 r1 = x1*y0 + x0*y1 ;
00204 r0 = x0*y0 ;
00205
00206 val= 0;
00207
00208 SCS_CARRY_PROPAGATE(r8,r7,tmp)
00209 SCS_CARRY_PROPAGATE(r7,r6,tmp)
00210 SCS_CARRY_PROPAGATE(r6,r5,tmp)
00211 SCS_CARRY_PROPAGATE(r5,r4,tmp)
00212 SCS_CARRY_PROPAGATE(r4,r3,tmp)
00213 SCS_CARRY_PROPAGATE(r3,r2,tmp)
00214 SCS_CARRY_PROPAGATE(r2,r1,tmp)
00215 SCS_CARRY_PROPAGATE(r1,r0,tmp)
00216 SCS_CARRY_PROPAGATE(r0,val,tmp)
00217
00218 if(val != 0){
00219
00220 R_HW[0] = val; R_HW[1] = r0; R_HW[2] = r1; R_HW[3] = r2;
00221 R_HW[4] = r3; R_HW[5] = r4; R_HW[6] = r5; R_HW[7] = r6;
00222 R_IND += 1;
00223 }
00224 else {
00225 R_HW[0] = r0; R_HW[1] = r1; R_HW[2] = r2; R_HW[3] = r3;
00226 R_HW[4] = r4; R_HW[5] = r5; R_HW[6] = r6; R_HW[7] = r7;
00227 }
00228
00229 }
00230
00231
00232 void scs_square(scs_ptr result, scs_ptr x){
00233 SCS_CONVERSION_MUL r0,r1,r2,r3,r4,r5,r6,r7,r8;
00234 SCS_CONVERSION_MUL x0,x1,x2,x3,x4,x5,x6,x7;
00235 SCS_CONVERSION_MUL val, tmp;
00236
00237
00238 R_EXP = X_EXP * X_EXP;
00239 R_IND = X_IND + X_IND;
00240 R_SGN = 1;
00241
00242
00243
00244
00245 x7=X_HW[7]; x6=X_HW[6]; x5=X_HW[5]; x4=X_HW[4];
00246 x3=X_HW[3]; x2=X_HW[2]; x1=X_HW[1]; x0=X_HW[0];
00247
00248 r0 = x0*x0;
00249 r1 = (x0*x1)* 2 ;
00250 r2 = x1*x1 + (x0*x2*2);
00251 r3 = (x1*x2 + x0*x3)* 2;
00252 r4 = x2*x2 + (x1*x3 + x0*x4)* 2;
00253 r5 = (x2*x3 + x1*x4 + x0*x5)* 2;
00254 r6 = x3*x3 + (x2*x4 + x1*x5 + x0*x6)* 2;
00255 r7 = (x3*x4 + x2*x5 + x1*x6 + x0*x7)* 2;
00256 r8 = x4*x4 + (x3*x5 + x2*x6 + x1*x7)* 2;
00257
00258 val= 0;
00259
00260 SCS_CARRY_PROPAGATE(r8,r7,tmp)
00261 SCS_CARRY_PROPAGATE(r7,r6,tmp)
00262 SCS_CARRY_PROPAGATE(r6,r5,tmp)
00263 SCS_CARRY_PROPAGATE(r5,r4,tmp)
00264 SCS_CARRY_PROPAGATE(r4,r3,tmp)
00265 SCS_CARRY_PROPAGATE(r3,r2,tmp)
00266 SCS_CARRY_PROPAGATE(r2,r1,tmp)
00267 SCS_CARRY_PROPAGATE(r1,r0,tmp)
00268 SCS_CARRY_PROPAGATE(r0,val,tmp)
00269
00270 if(val != 0){
00271
00272 R_HW[0] = val; R_HW[1] = r0; R_HW[2] = r1; R_HW[3] = r2;
00273 R_HW[4] = r3; R_HW[5] = r4; R_HW[6] = r5; R_HW[7] = r6;
00274 R_IND += 1;
00275 }
00276 else {
00277 R_HW[0] = r0; R_HW[1] = r1; R_HW[2] = r2; R_HW[3] = r3;
00278 R_HW[4] = r4; R_HW[5] = r5; R_HW[6] = r6; R_HW[7] = r7;
00279 }
00280
00281 }
00282
00283
00284
00285
00286 #else
00287
00288
00289
00290
00291 void scs_mul(scs_ptr result, scs_ptr x, scs_ptr y){
00292 SCS_CONVERSION_MUL RES[SCS_NB_WORDS+1];
00293 SCS_CONVERSION_MUL val, tmp;
00294 int i, j;
00295
00296 R_EXP = X_EXP * Y_EXP;
00297 R_SGN = X_SGN * Y_SGN;
00298 R_IND = X_IND + Y_IND;
00299
00300 for(i=0; i<=SCS_NB_WORDS; i++)
00301 RES[i]=0;
00302
00303
00304
00305
00306 #ifdef SCS_TYPECPU_X86
00307
00308
00309
00310 {
00311 scs_db_number t;
00312
00313 for(j=0; j<(SCS_NB_WORDS); j++) {
00314 __asm__ volatile("mull %3"
00315 : "=a" (t.i[LO_ENDIAN]), "=d" (t.i[HI_ENDIAN])
00316 : "a" (X_HW[0]) , "g" (Y_HW[j]));
00317 RES[j] += t.l;
00318 }
00319
00320 for(i=1 ; i<SCS_NB_WORDS; i++){
00321 for(j=0; j<(SCS_NB_WORDS-i); j++){
00322 __asm__ volatile("mull %3"
00323 : "=a" (t.i[LO_ENDIAN]), "=d" (t.i[HI_ENDIAN])
00324 : "a" (X_HW[i]) , "g" (Y_HW[j]));
00325 RES[i+j] += t.l;
00326 }
00327 __asm__ volatile("mull %3"
00328 : "=a" (t.i[LO_ENDIAN]), "=d" (t.i[HI_ENDIAN])
00329 : "a" (X_HW[i]) , "g" (Y_HW[j]));
00330
00331 RES[SCS_NB_WORDS] += t.l;
00332 }
00333 }
00334
00335 #else
00336
00337
00338 tmp = X_HW[0];
00339 for(j=0; j<(SCS_NB_WORDS); j++)
00340 RES[j] += tmp * Y_HW[j];
00341
00342 for(i=1 ; i<SCS_NB_WORDS; i++){
00343 tmp = X_HW[i];
00344 for(j=0; j<(SCS_NB_WORDS-i); j++)
00345 RES[i+j] += tmp * Y_HW[j];
00346 RES[SCS_NB_WORDS] += tmp * Y_HW[j];
00347 }
00348 #endif
00349
00350 val = 0;
00351
00352
00353 for(i=SCS_NB_WORDS; i>0; i--)
00354 SCS_CARRY_PROPAGATE(RES[i],RES[i-1],tmp)
00355 SCS_CARRY_PROPAGATE(RES[0],val,tmp)
00356
00357
00358
00359 if(val != 0){
00360
00361 R_HW[0] = val;
00362 for(i=1; i<SCS_NB_WORDS; i++)
00363 R_HW[i] = RES[i-1];
00364
00365 R_IND += 1;
00366 }else {
00367 for(i=0; i<SCS_NB_WORDS; i++)
00368 R_HW[i] = RES[i];
00369 }
00370 }
00371
00372
00373
00374
00375
00376
00377
00378 void scs_square(scs_ptr result, scs_ptr x){
00379 SCS_CONVERSION_MUL RES[SCS_NB_WORDS+1];
00380 SCS_CONVERSION_MUL val, tmp;
00381 int i, j;
00382
00383
00384 R_EXP = X_EXP * X_EXP;
00385 R_SGN = 1;
00386 R_IND = X_IND + X_IND;
00387
00388
00389 for(i=0; i<=SCS_NB_WORDS; i++)
00390 RES[i] = 0;
00391
00392
00393 tmp = (SCS_CONVERSION_MUL)X_HW[0];
00394 for(j=1; j<SCS_NB_WORDS; j++)
00395 RES[j] += tmp * X_HW[j];
00396 for(i=1 ; i<(SCS_NB_WORDS+1)/2; i++){
00397 tmp = (SCS_CONVERSION_MUL)X_HW[i];
00398 for(j=i+1; j<(SCS_NB_WORDS-i); j++)
00399 RES[i+j] += tmp * X_HW[j];
00400 RES[SCS_NB_WORDS] += tmp * X_HW[SCS_NB_WORDS-i];
00401 }
00402
00403
00404 for(i=0; i<=SCS_NB_WORDS; i++)
00405 RES[i] *=2;
00406
00407
00408 for(i=0, j=0; i<=SCS_NB_WORDS; i+=2, j++){
00409 RES[i] += (SCS_CONVERSION_MUL)X_HW[j] * X_HW[j];
00410 }
00411
00412 val = 0;
00413
00414 for(i=SCS_NB_WORDS; i>0; i--)
00415 SCS_CARRY_PROPAGATE(RES[i],RES[i-1],tmp)
00416
00417 SCS_CARRY_PROPAGATE(RES[0],val,tmp)
00418
00419
00420
00421 if(val != 0){
00422
00423 R_HW[0] = val;
00424 for(i=1; i<SCS_NB_WORDS; i++)
00425 R_HW[i] = RES[i-1];
00426
00427 R_IND += 1;
00428 }else {
00429 for(i=0; i<SCS_NB_WORDS; i++)
00430 R_HW[i] = RES[i];
00431 }
00432
00433 }
00434
00435
00436
00437
00438
00439 #endif
00440
00441
00442
00443
00444
00445
00446
00447
00448 void scs_mul_ui(scs_ptr x, unsigned int val_int){
00449 SCS_CONVERSION_MUL val, tmp, vald, rr;
00450 int i;
00451
00452 if (val_int == 0)
00453 X_EXP = 0;
00454
00455 vald = val_int;
00456
00457 val = 0;
00458 rr = 0;
00459 for(i=(SCS_NB_WORDS-1); i>=0; i--){
00460 val += vald * X_HW[i];
00461 SCS_CARRY_PROPAGATE(val, rr, tmp)
00462 X_HW[i] = val;
00463 val = rr;
00464 rr = 0;
00465 }
00466
00467 if(val != 0){
00468
00469 for(i=(SCS_NB_WORDS-1); i>0; i--)
00470 X_HW[i] = X_HW[i-1];
00471
00472 X_HW[0] = val;
00473 X_IND += 1;
00474 }
00475
00476 return;
00477 }