# Tvheadend streaming server.
# Copyright (C) 2007-2009 Andreas Ă–man
# Copyright (C) 2012-2015 Adam Sutton
-# Copyright (C) 2012-2017 Jaroslav Kysela
+# Copyright (C) 2012-2018 Jaroslav Kysela
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
SRCS-${CONFIG_TSDEBUG} += $(SRCS-TSDEBUG)
I18N-C += $(SRCS-TSDEBUG)
-# FFdecsa
-ifneq ($(CONFIG_DVBCSA),yes)
-FFDECSA-$(CONFIG_CAPMT) = yes
-FFDECSA-$(CONFIG_CWC) = yes
-FFDECSA-$(CONFIG_CONSTCW) = yes
-endif
-
-ifeq ($(FFDECSA-yes),yes)
-SRCS-yes += src/descrambler/ffdecsa/ffdecsa_interface.c \
- src/descrambler/ffdecsa/ffdecsa_int.c
-SRCS-${CONFIG_MMX} += src/descrambler/ffdecsa/ffdecsa_mmx.c
-SRCS-${CONFIG_SSE2} += src/descrambler/ffdecsa/ffdecsa_sse2.c
-${BUILDDIR}/src/descrambler/ffdecsa/ffdecsa_mmx.o : CFLAGS += -mmmx
-${BUILDDIR}/src/descrambler/ffdecsa/ffdecsa_sse2.o : CFLAGS += -msse2
-endif
-
# crypto algorithms
SRCS-${CONFIG_SSL} += src/descrambler/algo/libaesdec.c
SRCS-${CONFIG_SSL} += src/descrambler/algo/libaes128dec.c
"tvhcsa:auto"
"bundle:no"
"pngquant:no"
- "dvbcsa:no"
"kqueue:no"
"dbus_1:auto"
"android:no"
#
if enabled cwc || enabled cccam || enabled capmt || enabled constcw; then
enable tvhcsa
+ enable dvbcsa
if enabled dvbcsa; then
(check_cc_header "dvbcsa/dvbcsa" dvbcsa_h &&\
check_cc_lib dvbcsa dvbcsa_l) ||\
- die "Failed to find dvbcsa support (use --disable-dvbcsa)"
+ die "Failed to find dvbcsa library"
LDFLAGS="$LDFLAGS -ldvbcsa"
fi
fi
+++ /dev/null
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-#include <sys/types.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-//#define DEBUG
-#ifdef DEBUG
-#define DBG(a) a
-#else
-#define DBG(a)
-#endif
-
-//// parallelization stuff, large speed differences are possible
-// possible choices
-#define PARALLEL_32_4CHAR 320
-#define PARALLEL_32_4CHARA 321
-#define PARALLEL_32_INT 322
-#define PARALLEL_64_8CHAR 640
-#define PARALLEL_64_8CHARA 641
-#define PARALLEL_64_2INT 642
-#define PARALLEL_64_LONG 643
-#define PARALLEL_64_MMX 644
-#define PARALLEL_128_16CHAR 1280
-#define PARALLEL_128_16CHARA 1281
-#define PARALLEL_128_4INT 1282
-#define PARALLEL_128_2LONG 1283
-#define PARALLEL_128_2MMX 1284
-#define PARALLEL_128_SSE 1285
-#define PARALLEL_128_SSE2 1286
-
-#include "parallel_generic.h"
-//// conditionals
-#if PARALLEL_MODE==PARALLEL_32_4CHAR
-#include "parallel_032_4char.h"
-#elif PARALLEL_MODE==PARALLEL_32_4CHARA
-#include "parallel_032_4charA.h"
-#elif PARALLEL_MODE==PARALLEL_32_INT
-#include "parallel_032_int.h"
-#define FUNC(x) (x ## _32int)
-#elif PARALLEL_MODE==PARALLEL_64_8CHAR
-#include "parallel_064_8char.h"
-#elif PARALLEL_MODE==PARALLEL_64_8CHARA
-#include "parallel_064_8charA.h"
-#elif PARALLEL_MODE==PARALLEL_64_2INT
-#include "parallel_064_2int.h"
-#elif PARALLEL_MODE==PARALLEL_64_LONG
-#include "parallel_064_long.h"
-#elif PARALLEL_MODE==PARALLEL_64_MMX
-#include "parallel_064_mmx.h"
-#define FUNC(x) (x ## _64mmx)
-#elif PARALLEL_MODE==PARALLEL_128_16CHAR
-#include "parallel_128_16char.h"
-#elif PARALLEL_MODE==PARALLEL_128_16CHARA
-#include "parallel_128_16charA.h"
-#elif PARALLEL_MODE==PARALLEL_128_4INT
-#include "parallel_128_4int.h"
-#elif PARALLEL_MODE==PARALLEL_128_2LONG
-#include "parallel_128_2long.h"
-#elif PARALLEL_MODE==PARALLEL_128_2MMX
-#include "parallel_128_2mmx.h"
-#elif PARALLEL_MODE==PARALLEL_128_SSE
-#include "parallel_128_sse.h"
-#elif PARALLEL_MODE==PARALLEL_128_SSE2
-#include "parallel_128_sse2.h"
-#define FUNC(x) (x ## _128sse2)
-#else
-#error "unknown/undefined parallel mode"
-#endif
-
-
-// stuff depending on conditionals
-
-#define BYTES_PER_GROUP (GROUP_PARALLELISM/8)
-#define BYPG BYTES_PER_GROUP
-#define BITS_PER_GROUP GROUP_PARALLELISM
-#define BIPG BITS_PER_GROUP
-
-#ifndef MALLOC
-#define MALLOC(X) malloc(X)
-#endif
-#ifndef FREE
-#define FREE(X) free(X)
-#endif
-#ifndef MEMALIGN
-#define MEMALIGN
-#endif
-
-//// debug tool
-
-#if 0
-static void dump_mem(const char *string, const unsigned char *p, int len, int linelen){
- int i;
- for(i=0;i<len;i++){
- if(i%linelen==0&&i) fprintf(stderr,"\n");
- if(i%linelen==0) fprintf(stderr,"%s %08x:",string,i);
- else{
- if(i%8==0) fprintf(stderr," ");
- if(i%4==0) fprintf(stderr," ");
- }
- fprintf(stderr," %02x",p[i]);
- }
- if(i%linelen==0) fprintf(stderr,"\n");
-}
-#endif
-
-//////////////////////////////////////////////////////////////////////////////////
-
-struct csa_key_t{
- unsigned char ck[8];
-// used by stream
- int iA[8]; // iA[0] is for A1, iA[7] is for A8
- int iB[8]; // iB[0] is for B1, iB[7] is for B8
-// used by stream (group)
- MEMALIGN group ck_g[8][8]; // [byte][bit:0=LSB,7=MSB]
- MEMALIGN group iA_g[8][4]; // [0 for A1][0 for LSB]
- MEMALIGN group iB_g[8][4]; // [0 for B1][0 for LSB]
-// used by block
- unsigned char kk[56];
-// used by block (group)
- MEMALIGN batch kkmulti[56]; // many times the same byte in every batch
-};
-
-struct csa_keys_t{
- struct csa_key_t even;
- struct csa_key_t odd;
-};
-
-//-----stream cypher
-
-//-----key schedule for stream decypher
-static void key_schedule_stream(
- unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
- int *iA, // [Out] iA[0]-iA[7] 8 nibbles | Key schedule.
- int *iB) // [Out] iB[0]-iB[7] 8 nibbles | Key schedule.
-{
- iA[0]=(ck[0]>>4)&0xf;
- iA[1]=(ck[0] )&0xf;
- iA[2]=(ck[1]>>4)&0xf;
- iA[3]=(ck[1] )&0xf;
- iA[4]=(ck[2]>>4)&0xf;
- iA[5]=(ck[2] )&0xf;
- iA[6]=(ck[3]>>4)&0xf;
- iA[7]=(ck[3] )&0xf;
- iB[0]=(ck[4]>>4)&0xf;
- iB[1]=(ck[4] )&0xf;
- iB[2]=(ck[5]>>4)&0xf;
- iB[3]=(ck[5] )&0xf;
- iB[4]=(ck[6]>>4)&0xf;
- iB[5]=(ck[6] )&0xf;
- iB[6]=(ck[7]>>4)&0xf;
- iB[7]=(ck[7] )&0xf;
-}
-
-//----- stream main function
-
-#define STREAM_INIT
-#include "stream.c"
-#undef STREAM_INIT
-
-#define STREAM_NORMAL
-#include "stream.c"
-#undef STREAM_NORMAL
-
-
-//-----block decypher
-
-//-----key schedule for block decypher
-
-static void key_schedule_block(
- unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
- unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule.
-{
- static const unsigned char key_perm[0x40] = {
- 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40,
- 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29,
- 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11,
- 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37,
- };
-
- int i,j,k;
- int bit[64];
- int newbit[64];
- int kb[7][8];
-
- // 56 steps
- // 56 key bytes kk(55)..kk(0) by key schedule from ck
-
- // kb(6,0) .. kb(6,7) = ck(0) .. ck(7)
- kb[6][0] = ck[0];
- kb[6][1] = ck[1];
- kb[6][2] = ck[2];
- kb[6][3] = ck[3];
- kb[6][4] = ck[4];
- kb[6][5] = ck[5];
- kb[6][6] = ck[6];
- kb[6][7] = ck[7];
-
- // calculate kb[5] .. kb[0]
- for(i=5; i>=0; i--){
- // 64 bit perm on kb
- for(j=0; j<8; j++){
- for(k=0; k<8; k++){
- bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1;
- newbit[key_perm[j*8+k]-1] = bit[j*8+k];
- }
- }
- for(j=0; j<8; j++){
- kb[i][j] = 0;
- for(k=0; k<8; k++){
- kb[i][j] |= newbit[j*8+k] << (7-k);
- }
- }
- }
-
- // xor to give kk
- for(i=0; i<7; i++){
- for(j=0; j<8; j++){
- kk[i*8+j] = kb[i][j] ^ i;
- }
- }
-
-}
-
-//-----block utils
-
-static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){
- int *ri=(int *)in;
- int *ibi=(int *)out;
- int j,i,k,g;
- // copy and first step
- for(g=0;g<count;g++){
- ri[g]=ibi[2*g];
- ri[GROUP_PARALLELISM+g]=ibi[2*g+1];
- }
-//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01230123
-#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
- for(j=0;j<8;j+=4){
- for(i=0;i<2;i++){
- for(k=0;k<INTS_PER_ROW;k++){
- unsigned int t,b;
- t=ri[INTS_PER_ROW*(j+i)+k];
- b=ri[INTS_PER_ROW*(j+i+2)+k];
- ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
- ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
- }
- }
- }
-//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01010101
- for(j=0;j<8;j+=2){
- for(i=0;i<1;i++){
- for(k=0;k<INTS_PER_ROW;k++){
- unsigned int t,b;
- t=ri[INTS_PER_ROW*(j+i)+k];
- b=ri[INTS_PER_ROW*(j+i+1)+k];
- ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
- ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
- }
- }
- }
-//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 00000000
-}
-
-static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){
- int *ri=(int *)in;
- int *bdi=(int *)out;
- int j,i,k,g;
-#define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
-//dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 00000000
- for(j=0;j<8;j+=2){
- for(i=0;i<1;i++){
- for(k=0;k<INTS_PER_ROW;k++){
- unsigned int t,b;
- t=ri[INTS_PER_ROW*(j+i)+k];
- b=ri[INTS_PER_ROW*(j+i+1)+k];
- ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
- ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
- }
- }
- }
-//dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01010101
- for(j=0;j<8;j+=4){
- for(i=0;i<2;i++){
- for(k=0;k<INTS_PER_ROW;k++){
- unsigned int t,b;
- t=ri[INTS_PER_ROW*(j+i)+k];
- b=ri[INTS_PER_ROW*(j+i+2)+k];
- ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
- ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
- }
- }
- }
-//dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-// now 01230123
- for(g=0;g<count;g++){
- bdi[2*g]=ri[g];
- bdi[2*g+1]=ri[GROUP_PARALLELISM+g];
- }
-}
-
-//-----block main function
-
-// block group
-static void block_decypher_group (
- batch *kkmulti, // [In] kkmulti[0]-kkmulti[55] 56 batches | Key schedule (each batch has repeated equal bytes).
- unsigned char *ib, // [In] (ib0,ib1,...ib7)...x32 32*8 bytes | Initialization vector.
- unsigned char *bd, // [Out] (bd0,bd1,...bd7)...x32 32*8 bytes | Block decipher.
- int count)
-{
- // int is faster than unsigned char. apparently not
- static const unsigned char block_sbox[0x100] = {
- 0x3A,0xEA,0x68,0xFE,0x33,0xE9,0x88,0x1A, 0x83,0xCF,0xE1,0x7F,0xBA,0xE2,0x38,0x12,
- 0xE8,0x27,0x61,0x95,0x0C,0x36,0xE5,0x70, 0xA2,0x06,0x82,0x7C,0x17,0xA3,0x26,0x49,
- 0xBE,0x7A,0x6D,0x47,0xC1,0x51,0x8F,0xF3, 0xCC,0x5B,0x67,0xBD,0xCD,0x18,0x08,0xC9,
- 0xFF,0x69,0xEF,0x03,0x4E,0x48,0x4A,0x84, 0x3F,0xB4,0x10,0x04,0xDC,0xF5,0x5C,0xC6,
- 0x16,0xAB,0xAC,0x4C,0xF1,0x6A,0x2F,0x3C, 0x3B,0xD4,0xD5,0x94,0xD0,0xC4,0x63,0x62,
- 0x71,0xA1,0xF9,0x4F,0x2E,0xAA,0xC5,0x56, 0xE3,0x39,0x93,0xCE,0x65,0x64,0xE4,0x58,
- 0x6C,0x19,0x42,0x79,0xDD,0xEE,0x96,0xF6, 0x8A,0xEC,0x1E,0x85,0x53,0x45,0xDE,0xBB,
- 0x7E,0x0A,0x9A,0x13,0x2A,0x9D,0xC2,0x5E, 0x5A,0x1F,0x32,0x35,0x9C,0xA8,0x73,0x30,
-
- 0x29,0x3D,0xE7,0x92,0x87,0x1B,0x2B,0x4B, 0xA5,0x57,0x97,0x40,0x15,0xE6,0xBC,0x0E,
- 0xEB,0xC3,0x34,0x2D,0xB8,0x44,0x25,0xA4, 0x1C,0xC7,0x23,0xED,0x90,0x6E,0x50,0x00,
- 0x99,0x9E,0x4D,0xD9,0xDA,0x8D,0x6F,0x5F, 0x3E,0xD7,0x21,0x74,0x86,0xDF,0x6B,0x05,
- 0x8E,0x5D,0x37,0x11,0xD2,0x28,0x75,0xD6, 0xA7,0x77,0x24,0xBF,0xF0,0xB0,0x02,0xB7,
- 0xF8,0xFC,0x81,0x09,0xB1,0x01,0x76,0x91, 0x7D,0x0F,0xC8,0xA0,0xF2,0xCB,0x78,0x60,
- 0xD1,0xF7,0xE0,0xB5,0x98,0x22,0xB3,0x20, 0x1D,0xA6,0xDB,0x7B,0x59,0x9F,0xAE,0x31,
- 0xFB,0xD3,0xB6,0xCA,0x43,0x72,0x07,0xF4, 0xD8,0x41,0x14,0x55,0x0D,0x54,0x8B,0xB9,
- 0xAD,0x46,0x0B,0xAF,0x80,0x52,0x2C,0xFA, 0x8C,0x89,0x66,0xFD,0xB2,0xA9,0x9B,0xC0,
- };
- MEMALIGN unsigned char r[GROUP_PARALLELISM*(8+56)]; /* 56 because we will move back in memory while looping */
- MEMALIGN unsigned char sbox_in[GROUP_PARALLELISM],sbox_out[GROUP_PARALLELISM],perm_out[GROUP_PARALLELISM];
- int roff;
- int i,g,count_all=GROUP_PARALLELISM;
-
- roff=GROUP_PARALLELISM*56;
- memset(r + roff, 0, sizeof(r) - roff);
-
-#define FASTTRASP1
-#ifndef FASTTRASP1
- for(g=0;g<count;g++){
- // Init registers
- int j;
- for(j=0;j<8;j++){
- r[roff+GROUP_PARALLELISM*j+g]=ib[8*g+j];
- }
- }
-#else
- trasp_N_8((unsigned char *)&r[roff],(unsigned char *)ib,count);
-#endif
-//dump_mem("OLD r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-
- // loop over kk[55]..kk[0]
- for(i=55;i>=0;i--){
- {
- MEMALIGN batch tkkmulti=kkmulti[i];
- batch *si=(batch *)sbox_in;
- batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6);
- for(g=0;g<count_all/BYTES_PER_BATCH;g++){
- si[g]=B_FFXOR(tkkmulti,r6_N[g]); //FIXME: introduce FASTBATCH?
- }
- }
-
- // table lookup, this works on only one byte at a time
- // most difficult part of all
- // - can't be parallelized
- // - can't be synthetized through boolean terms (8 input bits are too many)
- for(g=0;g<count_all;g++){
- sbox_out[g]=block_sbox[sbox_in[g]];
- }
-
- // bit permutation
- {
- unsigned char *po=(unsigned char *)perm_out;
- unsigned char *so=(unsigned char *)sbox_out;
-//dump_mem("pre perm ",(unsigned char *)so,GROUP_PARALLELISM,GROUP_PARALLELISM);
- for(g=0;g<count_all;g+=BYTES_PER_BATCH){
- MEMALIGN batch in,out;
- in=*(batch *)&so[g];
-
- out=B_FFOR(
- B_FFOR(
- B_FFOR(
- B_FFOR(
- B_FFOR(
- B_FFSH8L(B_FFAND(in,B_FFN_ALL_29()),1),
- B_FFSH8L(B_FFAND(in,B_FFN_ALL_02()),6)),
- B_FFSH8L(B_FFAND(in,B_FFN_ALL_04()),3)),
- B_FFSH8R(B_FFAND(in,B_FFN_ALL_10()),2)),
- B_FFSH8R(B_FFAND(in,B_FFN_ALL_40()),6)),
- B_FFSH8R(B_FFAND(in,B_FFN_ALL_80()),4));
-
- *(batch *)&po[g]=out;
- }
-//dump_mem("post perm",(unsigned char *)po,GROUP_PARALLELISM,GROUP_PARALLELISM);
- }
-
- roff-=GROUP_PARALLELISM; /* virtual shift of registers */
-
-#if 0
-/* one by one */
- for(g=0;g<count_all;g++){
- r[roff+GROUP_PARALLELISM*0+g]=r[roff+GROUP_PARALLELISM*8+g]^sbox_out[g];
- r[roff+GROUP_PARALLELISM*6+g]^=perm_out[g];
- r[roff+GROUP_PARALLELISM*4+g]^=r[roff+GROUP_PARALLELISM*0+g];
- r[roff+GROUP_PARALLELISM*3+g]^=r[roff+GROUP_PARALLELISM*0+g];
- r[roff+GROUP_PARALLELISM*2+g]^=r[roff+GROUP_PARALLELISM*0+g];
- }
-#else
- for(g=0;g<count_all;g+=BEST_SPAN){
- XOR_BEST_BY(&r[roff+GROUP_PARALLELISM*0+g],&r[roff+GROUP_PARALLELISM*8+g],&sbox_out[g]);
- XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*6+g],&perm_out[g]);
- XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*4+g],&r[roff+GROUP_PARALLELISM*0+g]);
- XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*3+g],&r[roff+GROUP_PARALLELISM*0+g]);
- XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*2+g],&r[roff+GROUP_PARALLELISM*0+g]);
- }
-#endif
- }
-
-#define FASTTRASP2
-#ifndef FASTTRASP2
- for(g=0;g<count;g++){
- // Copy results
- int j;
- for(j=0;j<8;j++){
- bd[8*g+j]=r[roff+GROUP_PARALLELISM*j+g];
- }
- }
-#else
- trasp_8_N((unsigned char *)&r[roff],(unsigned char *)bd,count);
-#endif
-}
-
-//-----------------------------------EXTERNAL INTERFACE
-
-
-//-----set control words
-
-static void schedule_key(struct csa_key_t *key, const unsigned char *pk){
- // could be made faster, but is not run often
- int bi,by;
- int i,j;
-// key
- memcpy(key->ck,pk,8);
-// precalculations for stream
- key_schedule_stream(key->ck,key->iA,key->iB);
- for(by=0;by<8;by++){
- for(bi=0;bi<8;bi++){
- key->ck_g[by][bi]=(key->ck[by]&(1<<bi))?FF1():FF0();
- }
- }
- for(by=0;by<8;by++){
- for(bi=0;bi<4;bi++){
- key->iA_g[by][bi]=(key->iA[by]&(1<<bi))?FF1():FF0();
- key->iB_g[by][bi]=(key->iB[by]&(1<<bi))?FF1():FF0();
- }
- }
-// precalculations for block
- key_schedule_block(key->ck,key->kk);
- for(i=0;i<56;i++){
- for(j=0;j<BYTES_PER_BATCH;j++){
- *(((unsigned char *)&key->kkmulti[i])+j)=key->kk[i];
- }
- }
-}
-
-extern void FUNC(set_control_words)(void *keys, const unsigned char *ev, const unsigned char *od);
-
-void FUNC(set_control_words)(void *keys, const unsigned char *ev, const unsigned char *od)
-{
- schedule_key(&((struct csa_keys_t *)keys)->even,ev);
- schedule_key(&((struct csa_keys_t *)keys)->odd,od);
-}
-
-extern void FUNC(set_even_control_word)(void *keys, const unsigned char *pk);
-
-void FUNC(set_even_control_word)(void *keys, const unsigned char *pk)
-{
- schedule_key(&((struct csa_keys_t *)keys)->even,pk);
-}
-
-extern void FUNC(set_odd_control_word)(void *keys, const unsigned char *pk);
-
-void FUNC(set_odd_control_word)(void *keys, const unsigned char *pk){
- schedule_key(&((struct csa_keys_t *)keys)->odd,pk);
-}
-
-//-----get internal parallelism
-
-extern int FUNC(get_internal_parallelism)(void);
-
-int FUNC(get_internal_parallelism)(void)
-{
- return GROUP_PARALLELISM;
-}
-
-//-----get suggested cluster size
-
-extern int FUNC(get_suggested_cluster_size)(void);
-
-int FUNC(get_suggested_cluster_size)(void)
-{
- int r;
- r=GROUP_PARALLELISM+GROUP_PARALLELISM/10;
- if(r<GROUP_PARALLELISM+5) r=GROUP_PARALLELISM+5;
- return r;
-}
-
-//-----key structure
-
-extern void *FUNC(get_key_struct)(void);
-void *FUNC(get_key_struct)(void)
-{
- struct csa_keys_t *keys=(struct csa_keys_t *)MALLOC(sizeof(struct csa_keys_t));
- if(keys) {
- static const unsigned char pk[8] = { 0,0,0,0,0,0,0,0 };
- FUNC(set_control_words)(keys,pk,pk);
- }
- return keys;
-}
-
-extern void FUNC(free_key_struct)(void *keys);
-void FUNC(free_key_struct)(void *keys)
-{
- return FREE(keys);
-}
-
-
-
-//-----get control words
-#if 0
-void get_control_words(void *keys, unsigned char *even, unsigned char *odd){
- memcpy(even,&((struct csa_keys_t *)keys)->even.ck,8);
- memcpy(odd,&((struct csa_keys_t *)keys)->odd.ck,8);
-}
-#endif
-
-//----- decrypt
-
-extern int FUNC(decrypt_packets)(void *keys, unsigned char **cluster);
-int FUNC(decrypt_packets)(void *keys, unsigned char **cluster)
-{
- // statistics, currently unused
- int stat_no_scramble=0;
- int stat_reserved=0;
- int stat_decrypted[2]={0,0};
- int stat_decrypted_mini=0;
- unsigned char **clst;
- unsigned char **clst2;
- int grouped;
- int group_ev_od;
- int advanced;
- int can_advance;
- unsigned char *g_pkt[GROUP_PARALLELISM];
- int g_len[GROUP_PARALLELISM];
- int g_offset[GROUP_PARALLELISM];
- int g_n[GROUP_PARALLELISM];
- int g_residue[GROUP_PARALLELISM];
- unsigned char *pkt;
- int xc0,ev_od,len,offset,n,residue;
- struct csa_key_t* k;
- int i,j,iter,g;
- int t23,tsmall;
- int alive[24];
-//icc craziness int pad1=0; //////////align! FIXME
- unsigned char *encp[GROUP_PARALLELISM];
- MEMALIGN unsigned char stream_in[GROUP_PARALLELISM*8];
- MEMALIGN unsigned char stream_out[GROUP_PARALLELISM*8];
- MEMALIGN unsigned char ib[GROUP_PARALLELISM*8];
- MEMALIGN unsigned char block_out[GROUP_PARALLELISM*8];
- struct stream_regs regs;
-
-//icc craziness i=(int)&pad1;//////////align!!! FIXME
-
- // build a list of packets to be processed
- clst=cluster;
- grouped=0;
- advanced=0;
- can_advance=1;
- group_ev_od=-1; // silence incorrect compiler warning
- pkt=*clst;
- do{ // find a new packet
- if(grouped==GROUP_PARALLELISM){
- // full
- break;
- }
- if(pkt==NULL){
- // no more ranges
- break;
- }
- if(pkt>=*(clst+1)){
- // out of this range, try next
- clst++;clst++;
- pkt=*clst;
- continue;
- }
-
- do{ // handle this packet
- xc0=pkt[3]&0xc0;
- DBG(fprintf(stderr," exam pkt=%p, xc0=%02x, can_adv=%i\n",pkt,xc0,can_advance));
- if(xc0==0x00){
- DBG(fprintf(stderr,"skip clear pkt %p (can_advance is %i)\n",pkt,can_advance));
- advanced+=can_advance;
- stat_no_scramble++;
- break;
- }
- if(xc0==0x40){
- DBG(fprintf(stderr,"skip reserved pkt %p (can_advance is %i)\n",pkt,can_advance));
- advanced+=can_advance;
- stat_reserved++;
- break;
- }
- if(xc0==0x80||xc0==0xc0){ // encrypted
- ev_od=(xc0&0x40)>>6; // 0 even, 1 odd
- if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd)
- if(group_ev_od==ev_od){ // could be added to group
- pkt[3]&=0x3f; // consider it decrypted now
- if(pkt[3]&0x20){ // incomplete packet
- offset=4+pkt[4]+1;
- len=188-offset;
- n=len>>3;
- residue=len-(n<<3);
- if(n==0){ // decrypted==encrypted!
- DBG(fprintf(stderr,"DECRYPTED MINI! (can_advance is %i)\n",can_advance));
- advanced+=can_advance;
- stat_decrypted_mini++;
- break; // this doesn't need more processing
- }
- }else{
- len=184;
- offset=4;
- n=23;
- residue=0;
- }
- g_pkt[grouped]=pkt;
- g_len[grouped]=len;
- g_offset[grouped]=offset;
- g_n[grouped]=n;
- g_residue[grouped]=residue;
- DBG(fprintf(stderr,"%2i: eo=%i pkt=%p len=%03i n=%2i residue=%i\n",grouped,ev_od,pkt,len,n,residue));
- grouped++;
- advanced+=can_advance;
- stat_decrypted[ev_od]++;
- }
- else{
- can_advance=0;
- DBG(fprintf(stderr,"skip pkt %p and can_advance set to 0\n",pkt));
- break; // skip and go on
- }
- }
- } while(0);
-
- if(can_advance){
- // move range start forward
- *clst+=188;
- }
- // next packet, if there is one
- pkt+=188;
- } while(1);
- DBG(fprintf(stderr,"-- result: grouped %i pkts, advanced %i pkts\n",grouped,advanced));
-
- // delete empty ranges and compact list
- clst2=cluster;
- for(clst=cluster;*clst!=NULL;clst+=2){
- // if not empty
- if(*clst<*(clst+1)){
- // it will remain
- *clst2=*clst;
- *(clst2+1)=*(clst+1);
- clst2+=2;
- }
- }
- *clst2=NULL;
-
- if(grouped==0){
- // no processing needed
- return advanced;
- }
-
- // sort them, longest payload first
- // we expect many n=23 packets and a few n<23
- DBG(fprintf(stderr,"PRESORTING\n"));
- for(i=0;i<grouped;i++){
- DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
- }
- // grouped is always <= GROUP_PARALLELISM
-
-#define g_swap(a,b) \
- pkt=g_pkt[a]; \
- g_pkt[a]=g_pkt[b]; \
- g_pkt[b]=pkt; \
-\
- len=g_len[a]; \
- g_len[a]=g_len[b]; \
- g_len[b]=len; \
-\
- offset=g_offset[a]; \
- g_offset[a]=g_offset[b]; \
- g_offset[b]=offset; \
-\
- n=g_n[a]; \
- g_n[a]=g_n[b]; \
- g_n[b]=n; \
-\
- residue=g_residue[a]; \
- g_residue[a]=g_residue[b]; \
- g_residue[b]=residue;
-
- // step 1: move n=23 packets before small packets
- t23=0;
- tsmall=grouped-1;
- for(;;){
- for(;t23<grouped;t23++){
- if(g_n[t23]!=23) break;
- }
-DBG(fprintf(stderr,"t23 after for =%i\n",t23));
-
- for(;tsmall>=0;tsmall--){
- if(g_n[tsmall]==23) break;
- }
-DBG(fprintf(stderr,"tsmall after for =%i\n",tsmall));
-
- if(tsmall-t23<1) break;
-
-DBG(fprintf(stderr,"swap t23=%i,tsmall=%i\n",t23,tsmall));
-
- g_swap(t23,tsmall);
-
- t23++;
- tsmall--;
-DBG(fprintf(stderr,"new t23=%i,tsmall=%i\n\n",t23,tsmall));
- }
- DBG(fprintf(stderr,"packets with n=23, t23=%i grouped=%i\n",t23,grouped));
- DBG(fprintf(stderr,"MIDSORTING\n"));
- for(i=0;i<grouped;i++){
- DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
- }
-
- // step 2: sort small packets in decreasing order of n (bubble sort is enough)
- for(i=t23;i<grouped;i++){
- for(j=i+1;j<grouped;j++){
- if(g_n[j]>g_n[i]){
- g_swap(i,j);
- }
- }
- }
- DBG(fprintf(stderr,"POSTSORTING\n"));
- for(i=0;i<grouped;i++){
- DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
- }
-
- // we need to know how many packets need 23 iterations, how many 22...
- for(i=0;i<=23;i++){
- alive[i]=0;
- }
- // count
- alive[23-1]=t23;
- for(i=t23;i<grouped;i++){
- alive[g_n[i]-1]++;
- }
- // integrate
- for(i=22;i>=0;i--){
- alive[i]+=alive[i+1];
- }
- DBG(fprintf(stderr,"ALIVE\n"));
- for(i=0;i<=23;i++){
- DBG(fprintf(stderr,"alive%2i=%i\n",i,alive[i]));
- }
-
- // choose key
- if(group_ev_od==0){
- k=&((struct csa_keys_t *)keys)->even;
- }
- else{
- k=&((struct csa_keys_t *)keys)->odd;
- }
-
- //INIT
-//#define INITIALIZE_UNUSED_INPUT
-#ifdef INITIALIZE_UNUSED_INPUT
-// unnecessary zeroing.
-// without this, we operate on uninitialized memory
-// when grouped<GROUP_PARALLELISM, but it's not a problem,
-// as final results will be discarded.
-// random data makes debugging sessions difficult.
- for(j=0;j<GROUP_PARALLELISM*8;j++) stream_in[j]=0;
-DBG(fprintf(stderr,"--- WARNING: you could gain speed by not initializing unused memory ---\n"));
-#else
-DBG(fprintf(stderr,"--- WARNING: DEBUGGING IS MORE DIFFICULT WHEN PROCESSING RANDOM DATA CHANGING AT EVERY RUN! ---\n"));
-#endif
-
- for(g=0;g<grouped;g++){
- encp[g]=g_pkt[g];
- DBG(fprintf(stderr,"header[%i]=%p (%02x)\n",g,encp[g],*(encp[g])));
- encp[g]+=g_offset[g]; // skip header
- FFTABLEIN(stream_in,g,encp[g]);
- }
-//dump_mem("stream_in",stream_in,GROUP_PARALLELISM*8,BYPG);
-
-
- // ITER 0
-DBG(fprintf(stderr,">>>>>ITER 0\n"));
- iter=0;
- stream_cypher_group_init(®s,k->iA_g,k->iB_g,stream_in);
- // fill first ib
- for(g=0;g<alive[iter];g++){
- COPY_8_BY(ib+8*g,encp[g]);
- }
-DBG(dump_mem("IB ",ib,8*alive[iter],8));
- // ITER 1..N-1
- for (iter=1;iter<23&&alive[iter-1]>0;iter++){
-DBG(fprintf(stderr,">>>>>ITER %i\n",iter));
- // alive and just dead packets: calc block
- block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
-DBG(dump_mem("BLO_ib ",block_out,8*alive[iter-1],8));
- // all packets (dead too): calc stream
- stream_cypher_group_normal(®s,stream_out);
-//dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG);
-
- // alive packets: calc ib
- for(g=0;g<alive[iter];g++){
- FFTABLEOUT(ib+8*g,stream_out,g);
-DBG(dump_mem("stream_out_ib ",ib+8*g,8,8));
-// XOREQ8BY gcc bug? 2x4 ok, 8 ko UPDATE: result ok but speed 1-2% slower (!!!???)
-#if 1
- XOREQ_4_BY(ib+8*g,encp[g]+8);
- XOREQ_4_BY(ib+8*g+4,encp[g]+8+4);
-#else
- XOREQ_8_BY(ib+8*g,encp[g]+8);
-#endif
-DBG(dump_mem("after_stream_xor_ib ",ib+8*g,8,8));
- }
- // alive packets: decrypt data
- for(g=0;g<alive[iter];g++){
-DBG(dump_mem("before_ib_decrypt_data ",encp[g],8,8));
- XOR_8_BY(encp[g],ib+8*g,block_out+8*g);
-DBG(dump_mem("after_ib_decrypt_data ",encp[g],8,8));
- }
- // just dead packets: write decrypted data
- for(g=alive[iter];g<alive[iter-1];g++){
-DBG(dump_mem("jd_before_ib_decrypt_data ",encp[g],8,8));
- COPY_8_BY(encp[g],block_out+8*g);
-DBG(dump_mem("jd_after_ib_decrypt_data ",encp[g],8,8));
- }
- // just dead packets: decrypt residue
- for(g=alive[iter];g<alive[iter-1];g++){
-DBG(dump_mem("jd_before_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
- FFTABLEOUTXORNBY(g_residue[g],encp[g]+8,stream_out,g);
-DBG(dump_mem("jd_after_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
- }
- // alive packets: pointers++
- for(g=0;g<alive[iter];g++) encp[g]+=8;
- };
- // ITER N
-DBG(fprintf(stderr,">>>>>ITER 23\n"));
- iter=23;
- // calc block
- block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
-DBG(dump_mem("23BLO_ib ",block_out,8*alive[iter-1],8));
- // just dead packets: write decrypted data
- for(g=alive[iter];g<alive[iter-1];g++){
-DBG(dump_mem("23jd_before_ib_decrypt_data ",encp[g],8,8));
- COPY_8_BY(encp[g],block_out+8*g);
-DBG(dump_mem("23jd_after_ib_decrypt_data ",encp[g],8,8));
- }
- // no residue possible
- // so do nothing
-
- DBG(fprintf(stderr,"returning advanced=%i\n",advanced));
-
- M_EMPTY(); // restore CPU multimedia state
-
- return advanced;
-}
+++ /dev/null
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-#ifndef FFDECSA_H
-#define FFDECSA_H
-
-//----- public interface
-
-// -- how many packets can be decrypted at the same time
-// This is an info about internal decryption parallelism.
-// You should try to call decrypt_packets with more packets than the number
-// returned here for performance reasons (use get_suggested_cluster_size to know
-// how many).
-int get_internal_parallelism(void);
-
-// -- how many packets you should have in a cluster when calling decrypt_packets
-// This is a suggestion to achieve optimal performance; typically a little
-// higher than what get_internal_parallelism returns.
-// Passing less packets could slow down the decryption.
-// Passing more packets is never bad (if you don't spend a lot of time building
-// the list).
-int get_suggested_cluster_size(void);
-
-// -- alloc & free the key structure
-void *get_key_struct(void);
-void free_key_struct(void *keys);
-
-// -- set control words, 8 bytes each
-void set_control_words(void *keys, const unsigned char *even, const unsigned char *odd);
-
-// -- set even control word, 8 bytes
-void set_even_control_word(void *keys, const unsigned char *even);
-
-// -- set odd control word, 8 bytes
-void set_odd_control_word(void *keys, const unsigned char *odd);
-
-// -- get control words, 8 bytes each
-//void get_control_words(void *keys, unsigned char *even, unsigned char *odd);
-
-// -- decrypt many TS packets
-// This interface is a bit complicated because it is designed for maximum speed.
-// Please read doc/how_to_use.txt.
-int decrypt_packets(void *keys, unsigned char **cluster);
-
-void ffdecsa_init(void);
-
-#endif
+++ /dev/null
-#define PARALLEL_MODE PARALLEL_32_INT
-#include "FFdecsa.c"
+++ /dev/null
-/*
- * CPU detection code, extracted from mmx.h
- * (c)1997-99 by H. Dietz and R. Fisher
- * Converted to C and improved by Fabrice Bellard.
- *
- * This file is part of Tvheadend.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#include "config.h"
-#include "tvheadend.h"
-#include "FFdecsa.h"
-
-
-
-typedef struct {
- int (*get_internal_parallelism)(void);
- int (*get_suggested_cluster_size)(void);
- void *(*get_key_struct)(void);
- void (*free_key_struct)(void *keys);
- void (*set_control_words)(void *keys, const unsigned char *even, const unsigned char *odd);
-
- void (*set_even_control_word)(void *keys, const unsigned char *even);
- void (*set_odd_control_word)(void *keys, const unsigned char *odd);
- int (*decrypt_packets)(void *keys, unsigned char **cluster);
-
-} csafuncs_t;
-
-
-#define MAKEFUNCS(x) \
-extern int get_internal_parallelism_##x(void);\
-extern int get_suggested_cluster_size_##x(void);\
-extern void *get_key_struct_##x(void);\
-extern void free_key_struct_##x(void *keys);\
-extern void set_control_words_##x(void *keys, const unsigned char *even, const unsigned char *odd);\
-extern void set_even_control_word_##x(void *keys, const unsigned char *even);\
-extern void set_odd_control_word_##x(void *keys, const unsigned char *odd);\
-extern int decrypt_packets_##x(void *keys, unsigned char **cluster);\
-static csafuncs_t funcs_##x = { \
- &get_internal_parallelism_##x,\
- &get_suggested_cluster_size_##x,\
- &get_key_struct_##x,\
- &free_key_struct_##x,\
- &set_control_words_##x,\
- &set_even_control_word_##x,\
- &set_odd_control_word_##x,\
- &decrypt_packets_##x\
-};
-
-MAKEFUNCS(32int);
-#ifdef CONFIG_MMX
-MAKEFUNCS(64mmx);
-#endif
-
-#ifdef CONFIG_SSE2
-MAKEFUNCS(128sse2);
-#endif
-
-static csafuncs_t current;
-
-
-
-
-#if defined(__x86_64__)
-# define REG_a "rax"
-# define REG_b "rbx"
-# define REG_c "rcx"
-# define REG_d "rdx"
-# define REG_D "rdi"
-# define REG_S "rsi"
-# define PTR_SIZE "8"
-typedef int64_t x86_reg;
-
-# define REG_SP "rsp"
-# define REG_BP "rbp"
-# define REGBP rbp
-# define REGa rax
-# define REGb rbx
-# define REGc rcx
-# define REGd rdx
-# define REGSP rsp
-
-#elif defined(__i386__)
-
-# define REG_a "eax"
-# define REG_b "ebx"
-# define REG_c "ecx"
-# define REG_d "edx"
-# define REG_D "edi"
-# define REG_S "esi"
-# define PTR_SIZE "4"
-typedef int32_t x86_reg;
-
-# define REG_SP "esp"
-# define REG_BP "ebp"
-# define REGBP ebp
-# define REGa eax
-# define REGb ebx
-# define REGc ecx
-# define REGd edx
-# define REGSP esp
-#else
-typedef int x86_reg;
-#endif
-
-#if defined(__i386__) || defined(__x86_64__)
-static inline void
-native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* saving ebx is necessary for PIC compatibility */
- asm volatile("mov %%"REG_b", %%"REG_S"\n\t"
- "cpuid\n\t"
- "xchg %%"REG_b", %%"REG_S
- : "=a" (*eax),
- "=S" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-#endif
-
-void
-ffdecsa_init(void)
-{
- current = funcs_32int;
-
-
-#if defined(__i386__) || defined(__x86_64__)
-
- unsigned int eax, ebx, ecx, edx;
- unsigned int max_std_level, std_caps;
-
-#if defined(__i386__)
-
- x86_reg a, c;
- __asm__ volatile (
- /* See if CPUID instruction is supported ... */
- /* ... Get copies of EFLAGS into eax and ecx */
- "pushfl\n\t"
- "pop %0\n\t"
- "mov %0, %1\n\t"
-
- /* ... Toggle the ID bit in one copy and store */
- /* to the EFLAGS reg */
- "xor $0x200000, %0\n\t"
- "push %0\n\t"
- "popfl\n\t"
-
- /* ... Get the (hopefully modified) EFLAGS */
- "pushfl\n\t"
- "pop %0\n\t"
- : "=a" (a), "=c" (c)
- :
- : "cc"
- );
-
- if (a != c) {
-#endif
- eax = ebx = ecx = edx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- max_std_level = eax;
-
- if(max_std_level >= 1){
- eax = 1;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- std_caps = edx;
-
-#ifdef CONFIG_SSE2
- if (std_caps & (1<<26)) {
- current = funcs_128sse2;
- tvhinfo(LS_CSA, "Using SSE2 128bit parallel descrambling");
- return;
- }
-#endif
-
-#ifdef CONFIG_MMX
- if (std_caps & (1<<23)) {
- current = funcs_64mmx;
- tvhinfo(LS_CSA, "Using MMX 64bit parallel descrambling");
- return;
- }
-#endif
- }
-#if defined(__i386__)
- }
-#endif
-#endif
-
- tvhinfo(LS_CSA, "Using 32bit parallel descrambling");
-}
-
-
-int
-get_internal_parallelism(void)
-{
- return current.get_internal_parallelism();
-}
-int
-get_suggested_cluster_size(void)
-{
- return current.get_suggested_cluster_size();
-}
-
-void *
-get_key_struct(void)
-{
- return current.get_key_struct();
-}
-void
-free_key_struct(void *keys)
-{
- current.free_key_struct(keys);
-}
-
-void
-set_even_control_word(void *keys, const unsigned char *even)
-{
- current.set_even_control_word(keys, even);
-}
-
-void
-set_odd_control_word(void *keys, const unsigned char *odd)
-{
- current.set_odd_control_word(keys, odd);
-}
-
-int
-decrypt_packets(void *keys, unsigned char **cluster)
-{
- return current.decrypt_packets(keys, cluster);
-}
+++ /dev/null
-#define PARALLEL_MODE PARALLEL_64_MMX
-#include "FFdecsa.c"
+++ /dev/null
-#define PARALLEL_MODE PARALLEL_128_SSE2
-#include "FFdecsa.c"
+++ /dev/null
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2007 Dark Avenger
- * 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef FFTABLE_H
-#define FFTABLE_H
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data)
-{
-#if 0
- *(((int *)tab)+2*g)=*((int *)data);
- *(((int *)tab)+2*g+1)=*(((int *)data)+1);
-#else
- *(((long long *)tab)+g)=*((long long *)data);
-#endif
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g)
-{
-#if 1
- *((int *)data)=*(((int *)tab)+2*g);
- *(((int *)data)+1)=*(((int *)tab)+2*g+1);
-#else
- *((long long *)data)=*(((long long *)tab)+g);
-#endif
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g)
-{
- int j;
- for(j=0;j<n;j++) *(data+j)^=*(tab+8*g+j);
-}
-
-#undef XOREQ_BEST_BY
-static inline void XOREQ_BEST_BY(unsigned char *d, unsigned char *s)
-{
- XOR_BEST_BY(d, d, s);
-}
-
-#endif //FFTABLE_H
+++ /dev/null
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "parallel_std_def.h"
-
-typedef unsigned int group;
-#define GROUP_PARALLELISM 32
-#define FF0() 0x0
-#define FF1() 0xffffffff
-
-/* 64 rows of 32 bits */
-
-void static inline FFTABLEIN(unsigned char *tab, int g, unsigned char *data){
- *(((int *)tab)+g)=*((int *)data);
- *(((int *)tab)+32+g)=*(((int *)data)+1);
-}
-
-void static inline FFTABLEOUT(unsigned char *data, unsigned char *tab, int g){
- *((int *)data)=*(((int *)tab)+g);
- *(((int *)data)+1)=*(((int *)tab)+32+g);
-}
-
-void static inline FFTABLEOUTXORNBY(int n, unsigned char *data, unsigned char *tab, int g){
- int j;
- for(j=0;j<n;j++){
- *(data+j)^=*(tab+4*(g+(j>=4?32-1:0))+j);
- }
-}
-
-typedef unsigned int batch;
-#define BYTES_PER_BATCH 4
-#define B_FFN_ALL_29() 0x29292929
-#define B_FFN_ALL_02() 0x02020202
-#define B_FFN_ALL_04() 0x04040404
-#define B_FFN_ALL_10() 0x10101010
-#define B_FFN_ALL_40() 0x40404040
-#define B_FFN_ALL_80() 0x80808080
-
-#define M_EMPTY()
+++ /dev/null
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2007 Dark Avenger
- * 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <mmintrin.h>
-
-#define MEMALIGN __attribute__((aligned(16)))
-
-union __u64 {
- unsigned int u[2];
- __m64 v;
-};
-
-static const union __u64 ff0 = {{0x00000000U, 0x00000000U}};
-static const union __u64 ff1 = {{0xffffffffU, 0xffffffffU}};
-
-typedef __m64 group;
-#define GROUP_PARALLELISM 64
-#define FF0() ff0.v
-#define FF1() ff1.v
-#define FFAND(a,b) _mm_and_si64((a),(b))
-#define FFOR(a,b) _mm_or_si64((a),(b))
-#define FFXOR(a,b) _mm_xor_si64((a),(b))
-#define FFNOT(a) _mm_xor_si64((a),FF1())
-
-/* 64 rows of 64 bits */
-
-static const union __u64 ff29 = {{0x29292929U, 0x29292929U}};
-static const union __u64 ff02 = {{0x02020202U, 0x02020202U}};
-static const union __u64 ff04 = {{0x04040404U, 0x04040404U}};
-static const union __u64 ff10 = {{0x10101010U, 0x10101010U}};
-static const union __u64 ff40 = {{0x40404040U, 0x40404040U}};
-static const union __u64 ff80 = {{0x80808080U, 0x80808080U}};
-
-typedef __m64 batch;
-#define BYTES_PER_BATCH 8
-#define B_FFAND(a,b) FFAND((a),(b))
-#define B_FFOR(a,b) FFOR((a),(b))
-#define B_FFXOR(a,b) FFXOR((a),(b))
-#define B_FFN_ALL_29() ff29.v
-#define B_FFN_ALL_02() ff02.v
-#define B_FFN_ALL_04() ff04.v
-#define B_FFN_ALL_10() ff10.v
-#define B_FFN_ALL_40() ff40.v
-#define B_FFN_ALL_80() ff80.v
-#define B_FFSH8L(a,n) _mm_slli_si64((a),(n))
-#define B_FFSH8R(a,n) _mm_srli_si64((a),(n))
-
-#define M_EMPTY() _mm_empty()
-
-
-#undef XOR_8_BY
-#define XOR_8_BY(d,s1,s2) do { *(__m64*)d = _mm_xor_si64(*(__m64*)(s1), *(__m64*)(s2)); } while(0)
-
-#undef XOREQ_8_BY
-#define XOREQ_8_BY(d,s) XOR_8_BY(d, d, s)
-
-#undef COPY_8_BY
-#define COPY_8_BY(d,s) do { *(__m64 *)(d) = *(__m64 *)(s); } while(0)
-
-#undef BEST_SPAN
-#define BEST_SPAN 8
-
-#undef XOR_BEST_BY
-#define XOR_BEST_BY(d,s1,s2) XOR_8_BY(d,s1,s2)
-
-#include "fftable.h"
+++ /dev/null
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2007 Dark Avenger
- * 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <emmintrin.h>
-
-#define MEMALIGN __attribute__((aligned(16)))
-
-union __u128i {
- unsigned int u[4];
- __m128i v;
-};
-
-static const union __u128i ff0 = {{0x00000000U, 0x00000000U, 0x00000000U, 0x00000000U}};
-static const union __u128i ff1 = {{0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU}};
-
-typedef __m128i group;
-#define GROUP_PARALLELISM 128
-#define FF0() ff0.v
-#define FF1() ff1.v
-#define FFAND(a,b) _mm_and_si128((a),(b))
-#define FFOR(a,b) _mm_or_si128((a),(b))
-#define FFXOR(a,b) _mm_xor_si128((a),(b))
-#define FFNOT(a) _mm_xor_si128((a),FF1())
-#define MALLOC(X) _mm_malloc(X,16)
-#define FREE(X) _mm_free(X)
-
-/* BATCH */
-
-static const union __u128i ff29 = {{0x29292929U, 0x29292929U, 0x29292929U, 0x29292929U}};
-static const union __u128i ff02 = {{0x02020202U, 0x02020202U, 0x02020202U, 0x02020202U}};
-static const union __u128i ff04 = {{0x04040404U, 0x04040404U, 0x04040404U, 0x04040404U}};
-static const union __u128i ff10 = {{0x10101010U, 0x10101010U, 0x10101010U, 0x10101010U}};
-static const union __u128i ff40 = {{0x40404040U, 0x40404040U, 0x40404040U, 0x40404040U}};
-static const union __u128i ff80 = {{0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U}};
-
-typedef __m128i batch;
-#define BYTES_PER_BATCH 16
-#define B_FFN_ALL_29() ff29.v
-#define B_FFN_ALL_02() ff02.v
-#define B_FFN_ALL_04() ff04.v
-#define B_FFN_ALL_10() ff10.v
-#define B_FFN_ALL_40() ff40.v
-#define B_FFN_ALL_80() ff80.v
-
-#define B_FFAND(a,b) FFAND(a,b)
-#define B_FFOR(a,b) FFOR(a,b)
-#define B_FFXOR(a,b) FFXOR(a,b)
-#define B_FFSH8L(a,n) _mm_slli_epi64((a),(n))
-#define B_FFSH8R(a,n) _mm_srli_epi64((a),(n))
-
-#define M_EMPTY()
-
-#undef BEST_SPAN
-#define BEST_SPAN 16
-
-#undef XOR_BEST_BY
-static inline void XOR_BEST_BY(unsigned char *d, unsigned char *s1, unsigned char *s2)
-{
- __m128i vs1 = _mm_load_si128((__m128i*)s1);
- __m128i vs2 = _mm_load_si128((__m128i*)s2);
- vs1 = _mm_xor_si128(vs1, vs2);
- _mm_store_si128((__m128i*)d, vs1);
-}
-
-#include "fftable.h"
+++ /dev/null
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-
-
-#if 0
-//// generics
-#define COPY4BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
- *pd = *ps; }while(0)
-#define COPY8BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
- *pd = *ps; }while(0)
-#define COPY16BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
- *pd = *ps; \
- *(pd+1) = *(ps+1); }while(0)
-#define COPY32BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
- *pd = *ps; \
- *(pd+1) = *(ps+1) \
- *(pd+2) = *(ps+2) \
- *(pd+3) = *(ps+3); }while(0)
-#define XOR4BY(d,s1,s2) do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \
- *pd = *ps1 ^ *ps2; }while(0)
-#define XOR8BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
- *pd = *ps1 ^ *ps2; }while(0)
-#define XOR16BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
- *pd = *ps1 ^ *ps2; \
- *(pd+8) = *(ps1+8) ^ *(ps2+8); }while(0)
-#define XOR32BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
- *pd = *ps1 ^ *ps2; \
- *(pd+1) = *(ps1+1) ^ *(ps2+1); \
- *(pd+2) = *(ps1+2) ^ *(ps2+2); \
- *(pd+3) = *(ps1+3) ^ *(ps2+3); }while(0)
-#define XOR32BV(d,s1,s2) do{ int *const pd=(int *const)(d), *ps1=(const int *const)(s1), *ps2=(const int *const)(s2); \
- int z; \
- for(z=0;z<8;z++){ \
- pd[z]=ps1[z]^ps2[z]; \
- } \
- }while(0)
-#define XOREQ4BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
- *pd ^= *ps; }while(0)
-#define XOREQ8BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
- *pd ^= *ps; }while(0)
-#define XOREQ16BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
- *pd ^= *ps; \
- *(pd+1) ^=*(ps+1); }while(0)
-#define XOREQ32BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
- *pd ^= *ps; \
- *(pd+1) ^=*(ps+1); \
- *(pd+2) ^=*(ps+2); \
- *(pd+3) ^=*(ps+3); }while(0)
-#define XOREQ32BY4(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
- *pd ^= *ps; \
- *(pd+1) ^=*(ps+1); \
- *(pd+2) ^=*(ps+2); \
- *(pd+3) ^=*(ps+3); \
- *(pd+4) ^=*(ps+4); \
- *(pd+5) ^=*(ps+5); \
- *(pd+6) ^=*(ps+6); \
- *(pd+7) ^=*(ps+7); }while(0)
-#define XOREQ32BV(d,s) do{ unsigned char *pd=(unsigned char *)(d), *ps=(unsigned char *)(s); \
- int z; \
- for(z=0;z<32;z++){ \
- pd[z]^=ps[z]; \
- } \
- }while(0)
-
-#else
-#define XOR_4_BY(d,s1,s2) do{ int *pd=(int *)(d), *ps1=(int *)(s1), *ps2=(int *)(s2); \
- *pd = *ps1 ^ *ps2; }while(0)
-#define XOR_8_BY(d,s1,s2) do{ long long int *pd=(long long int *)(d), *ps1=(long long int *)(s1), *ps2=(long long int *)(s2); \
- *pd = *ps1 ^ *ps2; }while(0)
-#define XOREQ_4_BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
- *pd ^= *ps; }while(0)
-#define XOREQ_8_BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
- *pd ^= *ps; }while(0)
-#define COPY_4_BY(d,s) do{ int *pd=(int *)(d), *ps=(int *)(s); \
- *pd = *ps; }while(0)
-#define COPY_8_BY(d,s) do{ long long int *pd=(long long int *)(d), *ps=(long long int *)(s); \
- *pd = *ps; }while(0)
-
-#define BEST_SPAN 8
-#define XOR_BEST_BY(d,s1,s2) do{ XOR_8_BY(d,s1,s2); }while(0);
-#define XOREQ_BEST_BY(d,s) do{ XOREQ_8_BY(d,s); }while(0);
-#define COPY_BEST_BY(d,s) do{ COPY_8_BY(d,s); }while(0);
-
-#define END_MM do{ }while(0);
-#endif
+++ /dev/null
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define FFXOR(a,b) ((a)^(b))
-#define FFAND(a,b) ((a)&(b))
-#define FFOR(a,b) ((a)|(b))
-#define FFNOT(a) (~(a))
-
-#define B_FFAND(a,b) ((a)&(b))
-#define B_FFOR(a,b) ((a)|(b))
-#define B_FFXOR(a,b) ((a)^(b))
-#define B_FFSH8L(a,n) ((a)<<(n))
-#define B_FFSH8R(a,n) ((a)>>(n))
+++ /dev/null
-/* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-
-// define statics only once, when STREAM_INIT
-#ifdef STREAM_INIT
-struct stream_regs {
- group A[32+10][4]; // 32 because we will move back (virtual shift register)
- group B[32+10][4]; // 32 because we will move back (virtual shift register)
- group X[4];
- group Y[4];
- group Z[4];
- group D[4];
- group E[4];
- group F[4];
- group p;
- group q;
- group r;
- };
-
-static inline void trasp64_32_88ccw(unsigned char *data){
-/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
-#define row ((unsigned int *)data)
- int i,j;
- for(j=0;j<64;j+=32){
- unsigned int t,b;
- for(i=0;i<16;i++){
- t=row[j+i];
- b=row[j+16+i];
- row[j+i] = (t&0x0000ffff) | ((b )<<16);
- row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
- }
- }
- for(j=0;j<64;j+=16){
- unsigned int t,b;
- for(i=0;i<8;i++){
- t=row[j+i];
- b=row[j+8+i];
- row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
- row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
- }
- }
- for(j=0;j<64;j+=8){
- unsigned int t,b;
- for(i=0;i<4;i++){
- t=row[j+i];
- b=row[j+4+i];
- row[j+i] =((t&0x0f0f0f0f)<<4) | (b&0x0f0f0f0f);
- row[j+4+i] = (t&0xf0f0f0f0) | ((b&0xf0f0f0f0)>>4);
- }
- }
- for(j=0;j<64;j+=4){
- unsigned int t,b;
- for(i=0;i<2;i++){
- t=row[j+i];
- b=row[j+2+i];
- row[j+i] =((t&0x33333333)<<2) | (b&0x33333333);
- row[j+2+i] = (t&0xcccccccc) | ((b&0xcccccccc)>>2);
- }
- }
- for(j=0;j<64;j+=2){
- unsigned int t,b;
- for(i=0;i<1;i++){
- t=row[j+i];
- b=row[j+1+i];
- row[j+i] =((t&0x55555555)<<1) | (b&0x55555555);
- row[j+1+i] = (t&0xaaaaaaaa) | ((b&0xaaaaaaaa)>>1);
- }
- }
-#undef row
-}
-
-static inline void trasp64_32_88cw(unsigned char *data){
-/* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
-#define row ((unsigned int *)data)
- int i,j;
- for(j=0;j<64;j+=32){
- unsigned int t,b;
- for(i=0;i<16;i++){
- t=row[j+i];
- b=row[j+16+i];
- row[j+i] = (t&0x0000ffff) | ((b )<<16);
- row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
- }
- }
- for(j=0;j<64;j+=16){
- unsigned int t,b;
- for(i=0;i<8;i++){
- t=row[j+i];
- b=row[j+8+i];
- row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
- row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
- }
- }
- for(j=0;j<64;j+=8){
- unsigned int t,b;
- for(i=0;i<4;i++){
- t=row[j+i];
- b=row[j+4+i];
- row[j+i] =((t&0xf0f0f0f0)>>4) | (b&0xf0f0f0f0);
- row[j+4+i]= (t&0x0f0f0f0f) | ((b&0x0f0f0f0f)<<4);
- }
- }
- for(j=0;j<64;j+=4){
- unsigned int t,b;
- for(i=0;i<2;i++){
- t=row[j+i];
- b=row[j+2+i];
- row[j+i] =((t&0xcccccccc)>>2) | (b&0xcccccccc);
- row[j+2+i]= (t&0x33333333) | ((b&0x33333333)<<2);
- }
- }
- for(j=0;j<64;j+=2){
- unsigned int t,b;
- for(i=0;i<1;i++){
- t=row[j+i];
- b=row[j+1+i];
- row[j+i] =((t&0xaaaaaaaa)>>1) | (b&0xaaaaaaaa);
- row[j+1+i]= (t&0x55555555) | ((b&0x55555555)<<1);
- }
- }
-#undef row
-}
-
-//64-64----------------------------------------------------------
-static inline void trasp64_64_88ccw(unsigned char *data){
-/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
-#define row ((unsigned long long int *)data)
- int i,j;
- for(j=0;j<64;j+=64){
- unsigned long long int t,b;
- for(i=0;i<32;i++){
- t=row[j+i];
- b=row[j+32+i];
- row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
- row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
- }
- }
- for(j=0;j<64;j+=32){
- unsigned long long int t,b;
- for(i=0;i<16;i++){
- t=row[j+i];
- b=row[j+16+i];
- row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
- row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
- }
- }
- for(j=0;j<64;j+=16){
- unsigned long long int t,b;
- for(i=0;i<8;i++){
- t=row[j+i];
- b=row[j+8+i];
- row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
- row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
- }
- }
- for(j=0;j<64;j+=8){
- unsigned long long int t,b;
- for(i=0;i<4;i++){
- t=row[j+i];
- b=row[j+4+i];
- row[j+i] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
- row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
- }
- }
- for(j=0;j<64;j+=4){
- unsigned long long int t,b;
- for(i=0;i<2;i++){
- t=row[j+i];
- b=row[j+2+i];
- row[j+i] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
- row[j+2+i] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
- }
- }
- for(j=0;j<64;j+=2){
- unsigned long long int t,b;
- for(i=0;i<1;i++){
- t=row[j+i];
- b=row[j+1+i];
- row[j+i] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
- row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
- }
- }
-#undef row
-}
-
-static inline void trasp64_64_88cw(unsigned char *data){
-/* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
-#define row ((unsigned long long int *)data)
- int i,j;
- for(j=0;j<64;j+=64){
- unsigned long long int t,b;
- for(i=0;i<32;i++){
- t=row[j+i];
- b=row[j+32+i];
- row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
- row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
- }
- }
- for(j=0;j<64;j+=32){
- unsigned long long int t,b;
- for(i=0;i<16;i++){
- t=row[j+i];
- b=row[j+16+i];
- row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
- row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
- }
- }
- for(j=0;j<64;j+=16){
- unsigned long long int t,b;
- for(i=0;i<8;i++){
- t=row[j+i];
- b=row[j+8+i];
- row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
- row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
- }
- }
- for(j=0;j<64;j+=8){
- unsigned long long int t,b;
- for(i=0;i<4;i++){
- t=row[j+i];
- b=row[j+4+i];
- row[j+i] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
- row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
- }
- }
- for(j=0;j<64;j+=4){
- unsigned long long int t,b;
- for(i=0;i<2;i++){
- t=row[j+i];
- b=row[j+2+i];
- row[j+i] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
- row[j+2+i] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
- }
- }
- for(j=0;j<64;j+=2){
- unsigned long long int t,b;
- for(i=0;i<1;i++){
- t=row[j+i];
- b=row[j+1+i];
- row[j+i] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
- row[j+1+i] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
- }
- }
-#undef row
-}
-
-//64-128----------------------------------------------------------
-static inline void trasp64_128_88ccw(unsigned char *data){
-/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
-#define halfrow ((unsigned long long int *)data)
- int i,j;
- for(j=0;j<64;j+=64){
- unsigned long long int t,b;
- for(i=0;i<32;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+32+i)];
- halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
- halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+32+i)+1];
- halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
- halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
- }
- }
- for(j=0;j<64;j+=32){
- unsigned long long int t,b;
- for(i=0;i<16;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+16+i)];
- halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
- halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+16+i)+1];
- halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
- halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
- }
- }
- for(j=0;j<64;j+=16){
- unsigned long long int t,b;
- for(i=0;i<8;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+8+i)];
- halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
- halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+8+i)+1];
- halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
- halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
- }
- }
- for(j=0;j<64;j+=8){
- unsigned long long int t,b;
- for(i=0;i<4;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+4+i)];
- halfrow[2*(j+i)] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
- halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+4+i)+1];
- halfrow[2*(j+i)+1] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
- halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
- }
- }
- for(j=0;j<64;j+=4){
- unsigned long long int t,b;
- for(i=0;i<2;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+2+i)];
- halfrow[2*(j+i)] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
- halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+2+i)+1];
- halfrow[2*(j+i)+1] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
- halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
- }
- }
- for(j=0;j<64;j+=2){
- unsigned long long int t,b;
- for(i=0;i<1;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+1+i)];
- halfrow[2*(j+i)] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
- halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+1+i)+1];
- halfrow[2*(j+i)+1] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
- halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
- }
- }
-#undef halfrow
-}
-
-static inline void trasp64_128_88cw(unsigned char *data){
-/* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
-#define halfrow ((unsigned long long int *)data)
- int i,j;
- for(j=0;j<64;j+=64){
- unsigned long long int t,b;
- for(i=0;i<32;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+32+i)];
- halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
- halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+32+i)+1];
- halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
- halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
- }
- }
- for(j=0;j<64;j+=32){
- unsigned long long int t,b;
- for(i=0;i<16;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+16+i)];
- halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
- halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+16+i)+1];
- halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
- halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
- }
- }
- for(j=0;j<64;j+=16){
- unsigned long long int t,b;
- for(i=0;i<8;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+8+i)];
- halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
- halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+8+i)+1];
- halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
- halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
- }
- }
- for(j=0;j<64;j+=8){
- unsigned long long int t,b;
- for(i=0;i<4;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+4+i)];
- halfrow[2*(j+i)] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
- halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+4+i)+1];
- halfrow[2*(j+i)+1] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
- halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
- }
- }
- for(j=0;j<64;j+=4){
- unsigned long long int t,b;
- for(i=0;i<2;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+2+i)];
- halfrow[2*(j+i)] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
- halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+2+i)+1];
- halfrow[2*(j+i)+1] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
- halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
- }
- }
- for(j=0;j<64;j+=2){
- unsigned long long int t,b;
- for(i=0;i<1;i++){
- t=halfrow[2*(j+i)];
- b=halfrow[2*(j+1+i)];
- halfrow[2*(j+i)] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
- halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
- t=halfrow[2*(j+i)+1];
- b=halfrow[2*(j+1+i)+1];
- halfrow[2*(j+i)+1] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
- halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
- }
- }
-#undef halfrow
-}
-#endif
-
-
-#ifdef STREAM_INIT
-static void stream_cypher_group_init(
- struct stream_regs *regs,
- group iA[8][4], // [In] iA00,iA01,...iA73 32 groups | Derived from key.
- group iB[8][4], // [In] iB00,iB01,...iB73 32 groups | Derived from key.
- unsigned char *sb) // [In] (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input.
-#endif
-#ifdef STREAM_NORMAL
-static void stream_cypher_group_normal(
- struct stream_regs *regs,
- unsigned char *cb) // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output.
-#endif
-{
-#ifdef STREAM_INIT
- group in1[4];
- group in2[4];
-#endif
- group extra_B[4];
- group fa,fb,fc,fd,fe;
- group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b;
- group next_E[4];
- group tmp0,tmp1,tmp2,tmp3,tmp4;
-#ifdef STREAM_INIT
- group *sb_g=(group *)sb;
-#endif
-#ifdef STREAM_NORMAL
- group *cb_g=(group *)cb;
-#endif
- int aboff;
- int i,j,k,b;
- int dbg;
-
-#ifdef STREAM_INIT
- DBG(fprintf(stderr,":::::::::: BEGIN STREAM INIT\n"));
-#endif
-#ifdef STREAM_NORMAL
- DBG(fprintf(stderr,":::::::::: BEGIN STREAM NORMAL\n"));
-#endif
-#ifdef STREAM_INIT
-for(j=0;j<64;j++){
- DBG(fprintf(stderr,"precall prerot stream_in[%2i]=",j));
- DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
-}
-
-DBG(dump_mem("stream_prerot ",sb,GROUP_PARALLELISM*8,BYPG));
-#if GROUP_PARALLELISM==32
-trasp64_32_88ccw(sb);
-#endif
-#if GROUP_PARALLELISM==64
-trasp64_64_88ccw(sb);
-#endif
-#if GROUP_PARALLELISM==128
-trasp64_128_88ccw(sb);
-#endif
-DBG(dump_mem("stream_postrot",sb,GROUP_PARALLELISM*8,BYPG));
-
-for(j=0;j<64;j++){
- DBG(fprintf(stderr,"precall stream_in[%2i]=",j));
- DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
-}
-#endif
-
- aboff=32;
-
-#ifdef STREAM_INIT
- // load first 32 bits of ck into A[aboff+0]..A[aboff+7]
- // load last 32 bits of ck into B[aboff+0]..B[aboff+7]
- // all other regs = 0
- for(i=0;i<8;i++){
- for(b=0;b<4;b++){
-DBG(fprintf(stderr,"dbg from iA A[%i][%i]=",i,b));
-DBG(dump_mem("",(unsigned char *)&iA[i][b],BYPG,BYPG));
-DBG(fprintf(stderr," dbg from iB B[%i][%i]=",i,b));
-DBG(dump_mem("",(unsigned char *)&iB[i][b],BYPG,BYPG));
- regs->A[aboff+i][b]=iA[i][b];
- regs->B[aboff+i][b]=iB[i][b];
- }
- }
- for(b=0;b<4;b++){
- regs->A[aboff+8][b]=FF0();
- regs->A[aboff+9][b]=FF0();
- regs->B[aboff+8][b]=FF0();
- regs->B[aboff+9][b]=FF0();
- }
- for(b=0;b<4;b++){
- regs->X[b]=FF0();
- regs->Y[b]=FF0();
- regs->Z[b]=FF0();
- regs->D[b]=FF0();
- regs->E[b]=FF0();
- regs->F[b]=FF0();
- }
- regs->p=FF0();
- regs->q=FF0();
- regs->r=FF0();
-#endif
-
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"dbg A0[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->A[aboff+0][dbg],BYPG,BYPG));
- DBG(fprintf(stderr,"dbg B0[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->B[aboff+0][dbg],BYPG,BYPG));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
- // EXTERNAL LOOP - 8 bytes per operation
- for(i=0;i<8;i++){
-
- DBG(fprintf(stderr,"--BEGIN EXTERNAL LOOP %i\n",i));
-
-#ifdef STREAM_INIT
- for(b=0;b<4;b++){
- in1[b]=sb_g[8*i+4+b];
- in2[b]=sb_g[8*i+b];
- }
-#endif
-
- // INTERNAL LOOP - 2 bits per iteration
- for(j=0; j<4; j++){
-
- DBG(fprintf(stderr,"---BEGIN INTERNAL LOOP %i (EXT %i, INT %i)\n",j,i,j));
-
- // from A0..A9, 35 bits are selected as inputs to 7 s-boxes
- // 5 bits input per s-box, 2 bits output per s-box
-
- // we can select bits with zero masking and shifting operations
- // and synthetize s-boxes with optimized boolean functions.
- // this is the actual reason we do all the crazy transposition
- // stuff to switch between normal and bit slice representations.
- // this code really flies.
-
- fe=regs->A[aboff+3][0];fa=regs->A[aboff+0][2];fb=regs->A[aboff+5][1];fc=regs->A[aboff+6][3];fd=regs->A[aboff+8][0];
-/* 1000 1110 1110 0001 : lev 7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) );
-/* 1110 0010 0011 0011 : lev 6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) );
-/* 0011 0110 1000 1101 : lev 5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) );
-/* 0101 0101 1001 0011 : lev 5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) );
-/* 1000 1110 1110 0001 : lev 7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1())));
-/* 1110 0010 0011 0011 : lev 6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1()));
-/* 0011 0110 1000 1101 : lev 5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc)));
-/* 0101 0101 1001 0011 : lev 5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd)));
- s1a=FFXOR(tmp0,FFAND(fe,tmp1));
- s1b=FFXOR(tmp2,FFAND(fe,tmp3));
-//dump_mem("s1as1b-fe",&fe,BYPG,BYPG);
-//dump_mem("s1as1b-fa",&fa,BYPG,BYPG);
-//dump_mem("s1as1b-fb",&fb,BYPG,BYPG);
-//dump_mem("s1as1b-fc",&fc,BYPG,BYPG);
-//dump_mem("s1as1b-fd",&fd,BYPG,BYPG);
-
- fe=regs->A[aboff+1][1];fa=regs->A[aboff+2][2];fb=regs->A[aboff+5][3];fc=regs->A[aboff+6][0];fd=regs->A[aboff+8][1];
-/* 1001 1110 0110 0001 : lev 6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) );
-/* 0000 0011 0111 1011 : lev 5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) );
-/* 1100 0110 1101 0010 : lev 6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) );
-/* 0001 1110 1111 0101 : lev 5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) );
-/* 1001 1110 0110 0001 : lev 6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1()))));
-/* 0000 0011 0111 1011 : lev 5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc));
-/* 1100 0110 1101 0010 : lev 6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1()))));
-/* 0001 1110 1111 0101 : lev 5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd))));
- s2a=FFXOR(tmp0,FFAND(fe,tmp1));
- s2b=FFXOR(tmp2,FFAND(fe,tmp3));
-
- fe=regs->A[aboff+0][3];fa=regs->A[aboff+1][0];fb=regs->A[aboff+4][1];fc=regs->A[aboff+4][3];fd=regs->A[aboff+5][2];
-/* 0100 1011 1001 0110 : lev 5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) );
-/* 1101 0101 1000 1100 : lev 7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) );
-/* 0010 0111 1101 1000 : lev 4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) );
-/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
-/* 0100 1011 1001 0110 : lev 5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd)));
-/* 1101 0101 1000 1100 : lev 7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1()))));
-/* 0010 0111 1101 1000 : lev 4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc));
-/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
- s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1));
- s3b=FFXOR(tmp2,FFAND(fe,tmp3));
-
- fe=regs->A[aboff+2][3];fa=regs->A[aboff+0][1];fb=regs->A[aboff+1][3];fc=regs->A[aboff+3][2];fd=regs->A[aboff+7][0];
-/* 1011 0101 0100 1001 : lev 7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) );
-/* 0010 1101 0110 0110 : lev 6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) );
-/* 0110 0111 1101 0000 : lev 7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) );
-/* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
-/* 1011 0101 0100 1001 : lev 7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1())))));
-/* 0010 1101 0110 0110 : lev 6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc)));
-/* 0110 0111 1101 0000 : lev 7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd)));
-/* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
- s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0)));
- s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3));
-
- fe=regs->A[aboff+4][2];fa=regs->A[aboff+3][3];fb=regs->A[aboff+5][0];fc=regs->A[aboff+7][1];fd=regs->A[aboff+8][2];
-/* 1000 1111 0011 0010 : lev 7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) );
-/* 0110 1011 0000 1011 : lev 6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) );
-/* 0001 1010 0111 1001 : lev 6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) );
-/* 0101 1101 1101 0101 : lev 4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd );
-/* 1000 1111 0011 0010 : lev 7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1()));
-/* 0110 1011 0000 1011 : lev 6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd)))));
-/* 0001 1010 0111 1001 : lev 6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd)));
-/* 0101 1101 1101 0101 : lev 4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd);
- s5a=FFXOR(tmp0,FFAND(fe,tmp1));
- s5b=FFXOR(tmp2,FFAND(fe,tmp3));
-
- fe=regs->A[aboff+2][1];fa=regs->A[aboff+3][1];fb=regs->A[aboff+4][0];fc=regs->A[aboff+6][2];fd=regs->A[aboff+8][3];
-/* 0011 0110 0010 1101 : lev 6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) );
-/* 1110 1110 1011 1011 : lev 3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES );
-/* 0101 1000 0110 0111 : lev 6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) );
-/* 0001 0011 0000 0001 : lev 5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) );
-/* 0011 0110 0010 1101 : lev 6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc));
-/* 1110 1110 1011 1011 : lev 3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1());
-/* 0101 1000 0110 0111 : lev 6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd)));
-/* 0001 0011 0000 0001 : lev 5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd)));
- s6a=FFXOR(tmp0,FFAND(fe,tmp1));
- s6b=FFXOR(tmp2,FFAND(fe,tmp3));
-
- fe=regs->A[aboff+1][2];fa=regs->A[aboff+2][0];fb=regs->A[aboff+6][1];fc=regs->A[aboff+7][2];fd=regs->A[aboff+7][3];
-/* 0111 1000 1001 0110 : lev 5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) );
-/* 0100 1001 0101 1011 : lev 6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) );
-/* 0100 1001 1011 1001 : lev 5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) );
-/* 1111 1111 1101 1101 : lev 3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) );
-/* 0111 1000 1001 0110 : lev 5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd))));
-/* 0100 1001 0101 1011 : lev 6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd))));
-/* 0100 1001 1011 1001 : lev 5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd));
-/* 1111 1111 1101 1101 : lev 3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1()));
- s7a=FFXOR(tmp0,FFAND(fe,tmp1));
- s7b=FFXOR(tmp2,FFAND(fe,tmp3));
-
-
-/*
- we have just done this:
-
- int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0};
- int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1};
- int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1};
- int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1};
- int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2};
- int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0};
- int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2};
-
- s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ]
- |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ];
- s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ]
- |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ];
- s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ]
- |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ];
- s7 = sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ];
-*/
-
- // use 4x4 xor to produce extra nibble for T3
-
- extra_B[3]=FFXOR(FFXOR(FFXOR(regs->B[aboff+2][0],regs->B[aboff+5][1]),regs->B[aboff+6][2]),regs->B[aboff+8][3]);
- extra_B[2]=FFXOR(FFXOR(FFXOR(regs->B[aboff+5][0],regs->B[aboff+7][1]),regs->B[aboff+2][3]),regs->B[aboff+3][2]);
- extra_B[1]=FFXOR(FFXOR(FFXOR(regs->B[aboff+4][3],regs->B[aboff+7][2]),regs->B[aboff+3][0]),regs->B[aboff+4][1]);
- extra_B[0]=FFXOR(FFXOR(FFXOR(regs->B[aboff+8][2],regs->B[aboff+5][3]),regs->B[aboff+2][1]),regs->B[aboff+7][0]);
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"extra_B[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)&extra_B[dbg],BYPG,BYPG));
-}
-
- // T1 = xor all inputs
- // in1, in2, D are only used in T1 during initialisation, not generation
- for(b=0;b<4;b++){
- regs->A[aboff-1][b]=FFXOR(regs->A[aboff+9][b],regs->X[b]);
- }
-
-#ifdef STREAM_INIT
- for(b=0;b<4;b++){
- regs->A[aboff-1][b]=FFXOR(FFXOR(regs->A[aboff-1][b],regs->D[b]),((j % 2) ? in2[b] : in1[b]));
- }
-#endif
-
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"next_A0[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->A[aboff-1][dbg],BYPG,BYPG));
-}
-
- // T2 = xor all inputs
- // in1, in2 are only used in T1 during initialisation, not generation
- // if p=0, use this, if p=1, rotate the result left
- for(b=0;b<4;b++){
- regs->B[aboff-1][b]=FFXOR(FFXOR(regs->B[aboff+6][b],regs->B[aboff+9][b]),regs->Y[b]);
- }
-
-#ifdef STREAM_INIT
- for(b=0;b<4;b++){
- regs->B[aboff-1][b]=FFXOR(regs->B[aboff-1][b],((j % 2) ? in1[b] : in2[b]));
- }
-#endif
-
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"next_B0[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->B[aboff-1][dbg],BYPG,BYPG));
-}
-
- // if p=1, rotate left (yes, this is what we're doing)
- tmp3=regs->B[aboff-1][3];
- regs->B[aboff-1][3]=FFXOR(regs->B[aboff-1][3],FFAND(FFXOR(regs->B[aboff-1][3],regs->B[aboff-1][2]),regs->p));
- regs->B[aboff-1][2]=FFXOR(regs->B[aboff-1][2],FFAND(FFXOR(regs->B[aboff-1][2],regs->B[aboff-1][1]),regs->p));
- regs->B[aboff-1][1]=FFXOR(regs->B[aboff-1][1],FFAND(FFXOR(regs->B[aboff-1][1],regs->B[aboff-1][0]),regs->p));
- regs->B[aboff-1][0]=FFXOR(regs->B[aboff-1][0],FFAND(FFXOR(regs->B[aboff-1][0],tmp3),regs->p));
-
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"next_B0[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->B[aboff-1][dbg],BYPG,BYPG));
-}
-
- // T3 = xor all inputs
- for(b=0;b<4;b++){
- regs->D[b]=FFXOR(FFXOR(regs->E[b],regs->Z[b]),extra_B[b]);
- }
-
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"D[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->D[dbg],BYPG,BYPG));
-}
-
- // T4 = sum, carry of Z + E + r
- for(b=0;b<4;b++){
- next_E[b]=regs->F[b];
- }
-
- tmp0=FFXOR(regs->Z[0],regs->E[0]);
- tmp1=FFAND(regs->Z[0],regs->E[0]);
- regs->F[0]=FFXOR(regs->E[0],FFAND(regs->q,FFXOR(regs->Z[0],regs->r)));
- tmp3=FFAND(tmp0,regs->r);
- tmp4=FFOR(tmp1,tmp3);
-
- tmp0=FFXOR(regs->Z[1],regs->E[1]);
- tmp1=FFAND(regs->Z[1],regs->E[1]);
- regs->F[1]=FFXOR(regs->E[1],FFAND(regs->q,FFXOR(regs->Z[1],tmp4)));
- tmp3=FFAND(tmp0,tmp4);
- tmp4=FFOR(tmp1,tmp3);
-
- tmp0=FFXOR(regs->Z[2],regs->E[2]);
- tmp1=FFAND(regs->Z[2],regs->E[2]);
- regs->F[2]=FFXOR(regs->E[2],FFAND(regs->q,FFXOR(regs->Z[2],tmp4)));
- tmp3=FFAND(tmp0,tmp4);
- tmp4=FFOR(tmp1,tmp3);
-
- tmp0=FFXOR(regs->Z[3],regs->E[3]);
- tmp1=FFAND(regs->Z[3],regs->E[3]);
- regs->F[3]=FFXOR(regs->E[3],FFAND(regs->q,FFXOR(regs->Z[3],tmp4)));
- tmp3=FFAND(tmp0,tmp4);
- regs->r=FFXOR(regs->r,FFAND(regs->q,FFXOR(FFOR(tmp1,tmp3),regs->r))); // ultimate carry
-
-/*
- we have just done this: (believe it or not)
-
- if (q) {
- F = Z + E + r;
- r = (F >> 4) & 1;
- F = F & 0x0f;
- }
- else {
- F = E;
- }
-*/
- for(b=0;b<4;b++){
- regs->E[b]=next_E[b];
- }
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"F[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->F[dbg],BYPG,BYPG));
-}
-DBG(fprintf(stderr,"r="));
-DBG(dump_mem("",(unsigned char *)®s->r,BYPG,BYPG));
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"E[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->E[dbg],BYPG,BYPG));
-}
-
- // this simple instruction is virtually shifting all the shift registers
- aboff--;
-
-/*
- we've just done this:
-
- A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0;
- B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0;
-*/
-
- regs->X[0]=s1a;
- regs->X[1]=s2a;
- regs->X[2]=s3b;
- regs->X[3]=s4b;
- regs->Y[0]=s3a;
- regs->Y[1]=s4a;
- regs->Y[2]=s5b;
- regs->Y[3]=s6b;
- regs->Z[0]=s5a;
- regs->Z[1]=s6a;
- regs->Z[2]=s1b;
- regs->Z[3]=s2b;
- regs->p=s7a;
- regs->q=s7b;
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"X[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->X[dbg],BYPG,BYPG));
-}
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"Y[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->Y[dbg],BYPG,BYPG));
-}
-for(dbg=0;dbg<4;dbg++){
- DBG(fprintf(stderr,"Z[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)®s->Z[dbg],BYPG,BYPG));
-}
-DBG(fprintf(stderr,"p="));
-DBG(dump_mem("",(unsigned char *)®s->p,BYPG,BYPG));
-DBG(fprintf(stderr,"q="));
-DBG(dump_mem("",(unsigned char *)®s->q,BYPG,BYPG));
-
-#ifdef STREAM_NORMAL
- // require 4 loops per output byte
- // 2 output bits are a function of the 4 bits of D
- // xor 2 by 2
- cb_g[8*i+7-2*j]=FFXOR(regs->D[2],regs->D[3]);
- cb_g[8*i+6-2*j]=FFXOR(regs->D[0],regs->D[1]);
-for(dbg=0;dbg<8;dbg++){
- DBG(fprintf(stderr,"op[%i]=",dbg));
- DBG(dump_mem("",(unsigned char *)&cb_g[8*i+dbg],BYPG,BYPG));
-}
-#endif
-
-DBG(fprintf(stderr,"---END INTERNAL LOOP\n"));
-
- } // INTERNAL LOOP
-
-DBG(fprintf(stderr,"--END EXTERNAL LOOP\n"));
-
- } // EXTERNAL LOOP
-
- // move 32 steps forward, ready for next call
- for(k=0;k<10;k++){
- for(b=0;b<4;b++){
-DBG(fprintf(stderr,"moving forward AB k=%i b=%i\n",k,b));
- regs->A[32+k][b]=regs->A[k][b];
- regs->B[32+k][b]=regs->B[k][b];
- }
- }
-
-
-////////////////////////////////////////////////////////////////////////////////
-
-#ifdef STREAM_NORMAL
-for(j=0;j<64;j++){
- DBG(fprintf(stderr,"postcall prerot cb[%2i]=",j));
- DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
-}
-
-#if GROUP_PARALLELISM==32
-trasp64_32_88cw(cb);
-#endif
-#if GROUP_PARALLELISM==64
-trasp64_64_88cw(cb);
-#endif
-#if GROUP_PARALLELISM==128
-trasp64_128_88cw(cb);
-#endif
-
-for(j=0;j<64;j++){
- DBG(fprintf(stderr,"postcall postrot cb[%2i]=",j));
- DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
-}
-#endif
-
-#ifdef STREAM_INIT
- DBG(fprintf(stderr,":::::::::: END STREAM INIT\n"));
-#endif
-#ifdef STREAM_NORMAL
- DBG(fprintf(stderr,":::::::::: END STREAM NORMAL\n"));
-#endif
-
-}
-