M4RI 1.0.1
packedmatrix.h
Go to the documentation of this file.
00001 
00009 #ifndef PACKEDMATRIX_H
00010 #define PACKEDMATRIX_H
00011 /*******************************************************************
00012 *
00013 *                M4RI: Linear Algebra over GF(2)
00014 *
00015 *    Copyright (C) 2007, 2008 Gregory Bard <bard@fordham.edu>
00016 *    Copyright (C) 2008-2010 Martin Albrecht <M.R.Albrecht@rhul.ac.uk>
00017 *
00018 *  Distributed under the terms of the GNU General Public License (GPL)
00019 *  version 2 or higher.
00020 *
00021 *    This code is distributed in the hope that it will be useful,
00022 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00023 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00024 *    General Public License for more details.
00025 *
00026 *  The full text of the GPL is available at:
00027 *
00028 *                  http://www.gnu.org/licenses/
00029 *
00030 ********************************************************************/
00031 
00032 #include <math.h>
00033 #include <assert.h>
00034 #include <stdio.h>
00035 
00036 #ifdef HAVE_SSE2
00037 #include <emmintrin.h>
00038 #endif
00039 
00040 
00041 #ifdef HAVE_SSE2
00042 
00049 #define SSE2_CUTOFF 20
00050 #endif
00051 
00060 #define MZD_MUL_BLOCKSIZE MIN(((int)sqrt((double)(4*CPU_L2_CACHE)))/2,2048)
00061 
00062 
00069 typedef struct {
00075   mmb_t *blocks;
00076 
00081   size_t nrows;
00082 
00087   size_t ncols;
00088 
00092   size_t width; 
00093 
00098   size_t offset;
00099   
00105   word **rows;
00106 
00107 } mzd_t;
00108 
00119 mzd_t *mzd_init(const size_t r, const size_t c);
00120 
00127 void mzd_free(mzd_t *A);
00128 
00129 
00151 mzd_t *mzd_init_window(const mzd_t *M, const size_t lowr, const size_t lowc, const size_t highr, const size_t highc);
00152 
00159 #define mzd_free_window mzd_free
00160 
00169 static inline void mzd_row_swap(mzd_t *M, const size_t rowa, const size_t rowb) {
00170   if(rowa == rowb)
00171     return;
00172   size_t i;
00173   size_t width = M->width - 1;
00174   word *a = M->rows[rowa];
00175   word *b = M->rows[rowb];
00176   word tmp; 
00177   word mask_begin = RIGHT_BITMASK(RADIX - M->offset);
00178   word mask_end = LEFT_BITMASK( (M->offset + M->ncols)%RADIX );
00179 
00180   if (width != 0) {
00181     tmp = a[0];
00182     a[0] = (a[0] & ~mask_begin) | (b[0] & mask_begin);
00183     b[0] = (b[0] & ~mask_begin) | (tmp & mask_begin);
00184     
00185     for(i = 1; i<width; i++) {
00186       tmp = a[i];
00187       a[i] = b[i];
00188       b[i] = tmp;
00189     }
00190     tmp = a[width];
00191     a[width] = (a[width] & ~mask_end) | (b[width] & mask_end);
00192     b[width] = (b[width] & ~mask_end) | (tmp & mask_end);
00193     
00194   } else {
00195     tmp = a[0];
00196     a[0] = (a[0] & ~mask_begin) | (b[0] & mask_begin & mask_end) | (a[0] & ~mask_end);
00197     b[0] = (b[0] & ~mask_begin) | (tmp & mask_begin & mask_end) | (b[0] & ~mask_end);
00198   }
00199 }
00200 
00213 void mzd_copy_row(mzd_t* B, size_t i, const mzd_t* A, size_t j);
00214 
00223 void mzd_col_swap(mzd_t *M, const size_t cola, const size_t colb);
00224 
00235 static inline void mzd_col_swap_in_rows(mzd_t *M, const size_t cola, const size_t colb, const size_t start_row, const size_t stop_row) {
00236   if (cola == colb)
00237     return;
00238 
00239   const size_t _cola = cola + M->offset;
00240   const size_t _colb = colb + M->offset;
00241 
00242   const size_t a_word = _cola/RADIX;
00243   const size_t b_word = _colb/RADIX;
00244   const size_t a_bit = _cola%RADIX;
00245   const size_t b_bit = _colb%RADIX;
00246   
00247   word a, b, *base;
00248 
00249   size_t i;
00250   
00251   if(a_word == b_word) {
00252     const word ai = RADIX - a_bit - 1;
00253     const word bi = RADIX - b_bit - 1;
00254     for (i=start_row; i<stop_row; i++) {
00255       base = (M->rows[i] + a_word);
00256       register word b = *base;
00257       register word x = ((b >> ai) ^ (b >> bi)) & 1; // XOR temporary
00258       *base = b ^ ((x << ai) | (x << bi));
00259     }
00260     return;
00261   }
00262 
00263   const word a_bm = (ONE<<(RADIX - (a_bit) - 1));
00264   const word b_bm = (ONE<<(RADIX - (b_bit) - 1));
00265 
00266   if(a_bit > b_bit) {
00267     const size_t offset = a_bit - b_bit;
00268     for (i=start_row; i<stop_row; i++) {
00269       base = M->rows[i];
00270       a = *(base + a_word);
00271       b = *(base + b_word);
00272 
00273       a ^= (b & b_bm) >> offset;
00274       b ^= (a & a_bm) << offset;
00275       a ^= (b & b_bm) >> offset;
00276 
00277       *(base + a_word) = a;
00278       *(base + b_word) = b;
00279     }
00280   } else {
00281     const size_t offset = b_bit - a_bit;
00282     for (i=start_row; i<stop_row; i++) {
00283       base = M->rows[i];
00284       a = *(base + a_word);
00285       b = *(base + b_word);
00286 
00287       a ^= (b & b_bm) << offset;
00288       b ^= (a & a_bm) >> offset;
00289       a ^= (b & b_bm) << offset;
00290       *(base + a_word) = a;
00291       *(base + b_word) = b;
00292     }
00293   }
00294 
00295 }
00296 
00308 static inline BIT mzd_read_bit(const mzd_t *M, const size_t row, const size_t col ) {
00309   return GET_BIT(M->rows[row][(col+M->offset)/RADIX], (col+M->offset) % RADIX);
00310 }
00311 
00324 static inline void mzd_write_bit(mzd_t *M, const size_t row, const size_t col, const BIT value) {
00325   if (value==1)
00326     SET_BIT(M->rows[row][(col+M->offset)/RADIX], (col+M->offset) % RADIX);
00327   else
00328     CLR_BIT(M->rows[row][(col+M->offset)/RADIX], (col+M->offset) % RADIX);
00329 }
00330 
00339 void mzd_print(const mzd_t *M);
00340 
00347 void mzd_print_tight(const mzd_t *M);
00348 
00359 /*void mzd_row_add_offset(mzd_t *M, size_t dstrow, size_t srcrow, size_t coloffset); */
00360 static inline void mzd_row_add_offset(mzd_t *M, size_t dstrow, size_t srcrow, size_t coloffset) {
00361   coloffset += M->offset;
00362   const size_t startblock= coloffset/RADIX;
00363   size_t wide = M->width - startblock;
00364   word *src = M->rows[srcrow] + startblock;
00365   word *dst = M->rows[dstrow] + startblock;
00366 
00367   if(!wide)
00368     return;
00369 
00370   word temp = *src++;
00371   if (coloffset%RADIX)
00372     temp = RIGHTMOST_BITS(temp, (RADIX-(coloffset%RADIX)-1));
00373   *dst++ ^= temp;
00374   wide--;
00375 
00376 #ifdef HAVE_SSE2 
00377   if (ALIGNMENT(src,16)==8 && wide) {
00378     *dst++ ^= *src++;
00379     wide--;
00380   }
00381   __m128i *__src = (__m128i*)src;
00382   __m128i *__dst = (__m128i*)dst;
00383   const __m128i *eof = (__m128i*)((unsigned long)(src + wide) & ~0xF);
00384   __m128i xmm1;
00385   
00386   while(__src < eof) {
00387     xmm1 = _mm_xor_si128(*__dst, *__src++);
00388     *__dst++ = xmm1;
00389   }
00390   src  = (word*)__src;
00391   dst = (word*)__dst;
00392   wide = ((sizeof(word)*wide)%16)/sizeof(word);
00393 #endif
00394   size_t i;
00395   for(i=0; i<wide; i++) {
00396     dst[i] ^= src[i];
00397   }
00398 }
00399 
00411 void mzd_row_add(mzd_t *M, const size_t sourcerow, const size_t destrow);
00412 
00427 mzd_t *mzd_transpose(mzd_t *DST, const mzd_t *A );
00428 
00443 mzd_t *mzd_mul_naive(mzd_t *C, const mzd_t *A, const mzd_t *B);
00444 
00459 mzd_t *mzd_addmul_naive(mzd_t *C, const mzd_t *A, const mzd_t *B);
00460 
00472 mzd_t *_mzd_mul_naive(mzd_t *C, const mzd_t *A, const mzd_t *B, const int clear);
00473 
00483 mzd_t *_mzd_mul_va(mzd_t *C, const mzd_t *v, const mzd_t *A, const int clear);
00484 
00495 void mzd_randomize(mzd_t *M);
00496 
00511 void mzd_set_ui(mzd_t *M, const unsigned value);
00512 
00528 int mzd_gauss_delayed(mzd_t *M, const size_t startcol, const int full);
00529 
00545 int mzd_echelonize_naive(mzd_t *M, const int full);
00546 
00556 BIT mzd_equal(const mzd_t *A, const mzd_t *B );
00557 
00571 int mzd_cmp(const mzd_t *A, const mzd_t *B);
00572 
00582 mzd_t *mzd_copy(mzd_t *DST, const mzd_t *A);
00583 
00604 mzd_t *mzd_concat(mzd_t *C, const mzd_t *A, const mzd_t *B);
00605 
00625 mzd_t *mzd_stack(mzd_t *C, const mzd_t *A, const mzd_t *B);
00626 
00639 mzd_t *mzd_submatrix(mzd_t *S, const mzd_t *M, const size_t lowr, const size_t lowc, const size_t highr, const size_t highc);
00640 
00654 mzd_t *mzd_invert_naive(mzd_t *INV, mzd_t *A, const mzd_t *I);
00655 
00667 mzd_t *mzd_add(mzd_t *C, const mzd_t *A, const mzd_t *B);
00668 
00679 mzd_t *_mzd_add(mzd_t *C, const mzd_t *A, const mzd_t *B);
00680 
00691 #define mzd_sub mzd_add
00692 
00703 #define _mzd_sub _mzd_add
00704 
00725 void mzd_combine(mzd_t * DST, const size_t row3, const size_t startblock3,
00726                  const mzd_t * SC1, const size_t row1, const size_t startblock1, 
00727                  const mzd_t * SC2, const size_t row2, const size_t startblock2);
00728 
00738 static inline word mzd_read_bits(const mzd_t *M, const size_t x, const size_t y, const int n) {
00739   word temp;
00740 
00741   /* there are two possible situations. Either all bits are in one
00742    * word or they are spread across two words. */
00743 
00744   if ( ((y+M->offset)%RADIX + n - 1) < RADIX ) {
00745     /* everything happens in one word here */
00746     temp =  M->rows[x][(y+M->offset) / RADIX]; /* get the value */
00747     temp <<= (y+M->offset)%RADIX; /* clear upper bits */
00748     temp >>= RADIX - n; /* clear lower bits and move to correct position.*/
00749     return temp;
00750 
00751   } else {
00752     /* two words are affected */
00753     const size_t block = (y+M->offset) / RADIX; /* correct block */
00754     const size_t spot  = (y+M->offset+n) % RADIX; /* correct offset */
00755     /* make room by shifting spot times to the right, and add stuff from the second word */
00756     temp = (M->rows[x][block] << spot) | ( M->rows[x][block + 1] >> (RADIX - spot) ); 
00757     return (temp << (RADIX-n)) >> (RADIX-n); /* clear upper bits and return */
00758    }
00759 }
00760 
00761 
00772 static inline void mzd_xor_bits(const mzd_t *M, const size_t x, const size_t y, const int n, word values) {
00773   word *temp;
00774 
00775   /* there are two possible situations. Either all bits are in one
00776    * word or they are spread across two words. */
00777 
00778   if ( ((y+M->offset)%RADIX + n - 1) < RADIX ) {
00779     /* everything happens in one word here */
00780     temp =  M->rows[x] + (y+M->offset) / RADIX;
00781     *temp ^= values<<(RADIX-((y+M->offset)%RADIX)-n);
00782 
00783   } else {
00784     /* two words are affected */
00785     const size_t block = (y+M->offset) / RADIX; /* correct block */
00786     const size_t spot  = (y+M->offset+n) % RADIX; /* correct offset */
00787     M->rows[x][block] ^= values >> (spot);
00788     M->rows[x][block + 1] ^= values<<(RADIX-spot);
00789   }
00790 }
00791 
00802 static inline void mzd_and_bits(const mzd_t *M, const size_t x, const size_t y, const int n, word values) {
00803   word *temp;
00804 
00805   /* there are two possible situations. Either all bits are in one
00806    * word or they are spread across two words. */
00807 
00808   if ( ((y+M->offset)%RADIX + n - 1) < RADIX ) {
00809     /* everything happens in one word here */
00810     temp =  M->rows[x] + (y+M->offset) / RADIX;
00811     *temp &= values<<(RADIX-((y+M->offset)%RADIX)-n);
00812 
00813   } else {
00814     /* two words are affected */
00815     const size_t block = (y+M->offset) / RADIX; /* correct block */
00816     const size_t spot  = (y+M->offset+n) % RADIX; /* correct offset */
00817     M->rows[x][block] &= values >> (spot);
00818     M->rows[x][block + 1] &= values<<(RADIX-spot);
00819   }
00820 }
00821 
00831 static inline void mzd_clear_bits(const mzd_t *M, const size_t x, const size_t y, const int n) {
00832   word temp;
00833 
00834   /* there are two possible situations. Either all bits are in one
00835    * word or they are spread across two words. */
00836 
00837   if ( ((y+M->offset)%RADIX + n - 1) < RADIX ) {
00838     /* everything happens in one word here */
00839     temp =  M->rows[x][(y+M->offset) / RADIX];
00840     temp <<= (y+M->offset)%RADIX; /* clear upper bits */
00841     temp >>= RADIX-n; /* clear lower bits and move to correct position.*/
00842     temp <<= RADIX-n - (y+M->offset)%RADIX;
00843     M->rows[x][(y+M->offset) / RADIX] ^= temp;
00844   } else {
00845     /* two words are affected */
00846     const size_t block = (y+M->offset) / RADIX; /* correct block */
00847     const size_t spot  = (y+M->offset+n) % RADIX; /* correct offset */
00848     M->rows[x][block]   ^=  M->rows[x][block] & ((ONE<<(n-spot))-1);
00849     M->rows[x][block+1] ^= (M->rows[x][block+1]>>(RADIX-spot))<<(RADIX-spot);
00850   }
00851 }
00852 
00859 int mzd_is_zero(mzd_t *A);
00860 
00869 void mzd_row_clear_offset(mzd_t *M, const size_t row, const size_t coloffset);
00870 
00887 int mzd_find_pivot(mzd_t *M, size_t start_row, size_t start_col, size_t *r, size_t *c);
00888 
00889 
00902 double mzd_density(mzd_t *A, int res);
00903 
00918 double _mzd_density(mzd_t *A, int res, size_t r, size_t c);
00919 
00920 
00929 size_t mzd_first_zero_row(mzd_t *A);
00930 
00931 #endif //PACKEDMATRIX_H