M4RI 1.0.1
xor.h
Go to the documentation of this file.
00001 
00010 #ifndef XOR_H
00011 #define XOR_H
00012 
00013 #ifdef HAVE_SSE2
00014 #include <emmintrin.h>
00015 #endif
00016 
00017  /*******************************************************************
00018  *
00019  *                 M4RI:  Linear Algebra over GF(2)
00020  *
00021  *    Copyright (C) 2008-2010  Martin Albrecht <martinralbrecht@googlemail.com>
00022  *
00023  *  Distributed under the terms of the GNU General Public License (GPL)
00024  *  version 2 or higher.
00025  *
00026  *    This code is distributed in the hope that it will be useful,
00027  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00028  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00029  *    General Public License for more details.
00030  *
00031  *  The full text of the GPL is available at:
00032  *
00033  *                  http://www.gnu.org/licenses/
00034  *
00035  ********************************************************************/
00036 
00044 #ifdef HAVE_SSE2
00045 static inline void _mzd_combine8(word *c, word *t1, word *t2, word *t3, word *t4, word *t5, word *t6, word *t7, word *t8, size_t wide) {
00046   size_t i;
00047   /* assuming t1 ... t8 are aligned, but c might not be */
00048   if (ALIGNMENT(c,16)==0) {
00049     __m128i *__c = (__m128i*)c;
00050     __m128i *__t1 = (__m128i*)t1;
00051     __m128i *__t2 = (__m128i*)t2;
00052     __m128i *__t3 = (__m128i*)t3;
00053     __m128i *__t4 = (__m128i*)t4;
00054     __m128i *__t5 = (__m128i*)t5;
00055     __m128i *__t6 = (__m128i*)t6;
00056     __m128i *__t7 = (__m128i*)t7;
00057     __m128i *__t8 = (__m128i*)t8;
00058     const __m128i *eof = (__m128i*)((unsigned long)(c + wide) & ~0xF);
00059     __m128i xmm1;
00060     
00061     while(__c < eof) {
00062       xmm1 = _mm_xor_si128(*__c, *__t1++);
00063       xmm1 = _mm_xor_si128(xmm1, *__t2++);
00064       xmm1 = _mm_xor_si128(xmm1, *__t3++);
00065       xmm1 = _mm_xor_si128(xmm1, *__t4++);
00066       xmm1 = _mm_xor_si128(xmm1, *__t5++);
00067       xmm1 = _mm_xor_si128(xmm1, *__t6++);
00068       xmm1 = _mm_xor_si128(xmm1, *__t7++);
00069       xmm1 = _mm_xor_si128(xmm1, *__t8++);
00070       *__c++ = xmm1;
00071     }
00072     c  = (word*)__c;
00073     t1 = (word*)__t1;
00074     t2 = (word*)__t2;
00075     t3 = (word*)__t3;
00076     t4 = (word*)__t4;
00077     t5 = (word*)__t5;
00078     t6 = (word*)__t6;
00079     t7 = (word*)__t7;
00080     t8 = (word*)__t8;
00081     wide = ((sizeof(word)*wide)%16)/sizeof(word);
00082   }
00083   for(i=0; i<wide; i++) {
00084     c[i] ^= t1[i] ^ t2[i] ^ t3[i] ^ t4[i] ^ t5[i] ^ t6[i] ^ t7[i] ^ t8[i];
00085   }
00086 }
00087 #else
00088 
00089 #define _mzd_combine8(c,t1,t2,t3,t4,t5,t6,t7,t8,wide) for(ii=0; ii<wide ; ii++) c[ii] ^= t1[ii] ^ t2[ii] ^ t3[ii] ^ t4[ii] ^ t5[ii] ^ t6[ii] ^ t7[ii] ^ t8[ii]
00090 
00091 #endif
00092 
00101 #ifdef HAVE_SSE2
00102 static inline void _mzd_combine4(word *c, word *t1, word *t2, word *t3, word *t4, size_t wide) {
00103   size_t i;
00104   /* assuming t1 ... t4 are aligned, but c might not be */
00105   if (ALIGNMENT(c,16)==0) {
00106     __m128i *__c = (__m128i*)c;
00107     __m128i *__t1 = (__m128i*)t1;
00108     __m128i *__t2 = (__m128i*)t2;
00109     __m128i *__t3 = (__m128i*)t3;
00110     __m128i *__t4 = (__m128i*)t4;
00111     const __m128i *eof = (__m128i*)((unsigned long)(c + wide) & ~0xF);
00112     __m128i xmm1;
00113     
00114     while(__c < eof) {
00115       xmm1 = _mm_xor_si128(*__c, *__t1++);
00116       xmm1 = _mm_xor_si128(xmm1, *__t2++);
00117       xmm1 = _mm_xor_si128(xmm1, *__t3++);
00118       xmm1 = _mm_xor_si128(xmm1, *__t4++);
00119       *__c++ = xmm1;
00120     }
00121     c  = (word*)__c;
00122     t1 = (word*)__t1;
00123     t2 = (word*)__t2;
00124     t3 = (word*)__t3;
00125     t4 = (word*)__t4;
00126     wide = ((sizeof(word)*wide)%16)/sizeof(word);
00127   }
00128   for(i=0; i<wide; i++) {
00129     c[i] ^= t1[i] ^ t2[i] ^ t3[i] ^ t4[i];
00130   }
00131 }
00132 #else
00133 
00134 #define _mzd_combine4(c, t1, t2, t3, t4, wide) for(ii=0; ii<wide ; ii++) c[ii] ^= t1[ii] ^ t2[ii] ^ t3[ii] ^ t4[ii]
00135 
00136 #endif //HAVE_SSE2
00137 
00145 #ifdef HAVE_SSE2
00146 static inline void _mzd_combine2(word *c, word *t1, word *t2, size_t wide) {
00147   size_t i;
00148   /* assuming t1 ... t2 are aligned, but c might not be */
00149   if (ALIGNMENT(c,16)==0) {
00150     __m128i *__c = (__m128i*)c;
00151     __m128i *__t1 = (__m128i*)t1;
00152     __m128i *__t2 = (__m128i*)t2;
00153     const __m128i *eof = (__m128i*)((unsigned long)(c + wide) & ~0xF);
00154     __m128i xmm1;
00155     
00156     while(__c < eof) {
00157       xmm1 = _mm_xor_si128(*__c, *__t1++);
00158       xmm1 = _mm_xor_si128(xmm1, *__t2++);
00159       *__c++ = xmm1;
00160     }
00161     c  = (word*)__c;
00162     t1 = (word*)__t1;
00163     t2 = (word*)__t2;
00164     wide = ((sizeof(word)*wide)%16)/sizeof(word);
00165   }
00166   for(i=0; i<wide; i++) {
00167     c[i] ^= t1[i] ^ t2[i];
00168   }
00169 }
00170 #else
00171 
00172 #define _mzd_combine2(c, t1, t2, wide) for(ii=0; ii<wide ; ii++) c[ii] ^= t1[ii] ^ t2[ii]
00173 
00174 #endif //HAVE_SSE2
00175 
00176 
00177 #ifdef M4RM_GRAY8
00178 #define _MZD_COMBINE _mzd_combine8(c, t1, t2, t3, t4, t5, t6, t7, t8, wide)
00179 #else //M4RM_GRAY8
00180 #define _MZD_COMBINE _mzd_combine4(c, t1, t2, t3, t4, wide)
00181 #endif //M4RM_GRAY8
00182 
00183 #endif //XOR_H