// defs-env.h 2015-07-24,16:56:18 // defs-top.h // top-level portability definitions // // Copyright Dec. 1994, Jeff Fessler, University of Michigan #ifndef DefsTop #define DefsTop // set the following flag for compiling on PC/NT/x86 #if defined(Is_pc) #define Userand #define Need_uint #define Need_proto_lgamma #define Need_proto_erf #define Need_time_h #endif // Must include pthread.h *before* my other definitions, because pthread.h // sets a __need_timespec flag which affects declarations in time.h #ifdef Use_thread // include "def,thread.h" used to go here #include #endif #include #include #if defined(Need_assert) #include #endif #include #include #if defined(__osf__) | defined(Need_time_h) #include #endif #if defined(Need_ieeefp) #include // needed for solaris for finite() #endif #if defined(Need_dynlib) #include // needed for dynamic libraries #endif #ifdef Use_mpi #include #endif #if defined(hphp) #define _INCLUDE_XOPEN_SOURCE #define Userand48 extern double drand48(void); #if !defined(Mmex) # define Need_uint #endif #endif #if defined(Need_uint) typedef unsigned int uint; typedef unsigned short ushort; #endif #if defined(Need_ulong) typedef unsigned long ulong; #endif #if defined(titan) #define Provide_strstr // no strstr() on titan #define Need_proto_strstr #include #else #include #endif #ifndef M_PI #define M_PI 3.14159265358979323846 #endif #ifndef M_SQRT1_2 #define M_SQRT1_2 0.70710678118654752440 #endif #ifndef M_SQRT2 #define M_SQRT2 1.4142135623730950488 #endif #ifndef CLOCKS_PER_SEC // stupid SunOS doesn't have it #define CLOCKS_PER_SEC 1000000 #endif #ifndef Clock #ifdef Use_global_clock0 #define GlobalClock0declare clock_t GlobalClock0; #define GlobalClock0extern extern clock_t GlobalClock0; #define Clock0 GlobalClock0 = clock(); #define Clock ( (clock() - GlobalClock0) / (double) CLOCKS_PER_SEC ) #else #define Clock0 ; #define Clock -1. #define GlobalClock0declare #define GlobalClock0extern #endif #endif // No stdlib on titan! #if defined(titan) extern void exit(); extern char *calloc(), *malloc(); extern char *memcpy(), *memset(); extern int free(); #else # include #endif // Other externs #ifdef Need_protos_sol2 // needed for sol2 # define Need_proto_erf # define Need_proto_lgamma # define Need_proto_random #endif #if defined(titan) | defined(sun) extern int bzero(), bcopy(); #endif #if !defined(Const) # define Const const #endif #if 0 // todo #if !defined(noConst) # define Const const #else # define Const #endif #endif #if !defined(Size_t) # define Size_t size_t #endif typedef Const Size_t cSize_t; #if 0 // todo #if defined(_SIZE_T_) # define Size_t size_t #else # define Size_t unsigned #endif #endif #if !defined(MAXint) # define MAXchar 127 # define MAXbyte 255 # define MAXshort 32767 # define MAXint 2147483647 # define MAXint32 2147483647 # define MAXuint32 4294967295 # define MAXfloat 1e30 # define MAXdouble 1e300 #endif #ifndef SHRT_MAX #define SHRT_MAX 32767 #endif #ifndef SHRT_MIN #define SHRT_MIN (-32768) #endif // define all the globals that are needed by the macros #ifndef GlobalsDefinedHere # define GlobalsDefinedHere \ FILE *Global_fp_exist; \ GlobalClock0declare \ int Mpi_rank=0, Mpi_numprocs=1; #endif // my other definitions #if 0 #include "def,macro.h" #include "def,type.h" #include "def,inline.h" #include "def,alloc.h" #include "def,proto.h" #endif #endif // DefsTop // def,type.h // type definitions // Copyright 1995-2, Jeff Fessler, University of Michigan #if !defined(Defs_type) && !defined(No_defs_type) #define Defs_type // uint will come from sys/types.h // return type for functions that can fail or succeed: #if 0 typedef enum {sof_failure, sof_success} sof; // Failure | Success #else // the enum version does not work with cuda 2.2 compiler, so just use int: typedef int sof; // Failure=0 | Success=1 # define sof_failure 0 # define sof_success 1 #endif #define Success sof_success #define Failure sof_failure #define Ok return Success; #if 0 // someday maybe use this version typedef enum {truf_false, truf_true} truf; // False | True #else typedef int truf; // False=0 | True=1 #define truf_false 0 #define truf_true 1 #endif #ifndef True #define True 1 #endif #ifndef False #define False 0 #endif typedef Const truf ctruf; typedef short word; // two byte integers typedef unsigned short uword; // two byte integers, unsigned typedef unsigned char byte; typedef double TypeCalc; // fastest floating point type typedef Const char cchar; typedef Const byte cbyte; typedef Const short cshort; typedef Const int cint; typedef Const long clong; typedef Const unsigned int cuint; typedef Const float cfloat; typedef Const double cdouble; typedef Const void cvoid; typedef cchar *pcchar; typedef const pcchar *pcpcchar; // for argv typedef cfloat *pcfloat; #endif // Defs_type // def,macro.h // favorite macro definitions // Copyright 1994-12, Jeff Fessler, University of Michigan #ifndef Defs_macro #define Defs_macro #define Chat ((chat > 0) ? (chat-1) : 0) #define Calloc(n,s) calloc((unsigned) (n), (unsigned) (s)) #define Swab(s,d,n) { \ if ((s) == (d)) Exit("swab problem") \ swab((char *)(s), (char *)(d), n); } #define Nonull(str) ((str) ? (str) : "null") #define Argflt(n,def) (argc > n ? atof(argv[n]) : def) #define Argint(n,def) (argc > n ? atoi(argv[n]) : def) #define Argstr(n,def) (argc > n ? argv[n] : def) #define Argstn(n) ((argc > n && strcmp(argv[n], "-")) ? argv[n] : NULL) #define Isdigit(c) (('0' <= (c)) && ((c) <= '9')) #if !defined(Isfinite) #define Isfinite(x) isfinite(x) // from finite to isfinite 2008-10-30 #endif #if !defined(Isinf) #define Isinf(x) isinf(x) #endif // messages: using variadic macro feature of c99 #ifdef Use_thread // thread safe version (if posix threads) so thread messages do not mingle extern void jf_thread_print( // in jf,thread1.c FILE *stream, cchar *how, // usually "Note" or "WARN" or "FAIL" cchar *file, // usually __FILE_ cint line, // usually __LINE__ cchar *format, ...); #else // alternate general macro if no posix threads available #define jf_thread_print(stream, how, file, line, ...) \ { \ (void) fprintf(stream, "%s %s %d: ", how, file, line); \ (void) fprintf(stream, __VA_ARGS__); \ (void) fprintf(stream, "\n"); \ (void) fflush(stream); \ } #endif #ifndef Note # define Note(...) { \ jf_thread_print(stdout, "Note", __FILE__, __LINE__, __VA_ARGS__); } #define Note1(msg, a1) Note(msg, a1) #define Note2(msg, a1, a2) Note(msg, a1, a2) #define Note3(msg, a1, a2, a3) Note(msg, a1, a2, a3) #define Note4(msg, a1, a2, a3, a4) Note(msg, a1, a2, a3, a4) #define Note5(msg, a1, a2, a3, a4, a5) Note(msg, a1, a2, a3, a4, a5) #endif #ifndef Warn # define Warn(...) { \ jf_thread_print(stderr, "WARN", __FILE__, __LINE__, __VA_ARGS__); } #define Warn1(msg, a1) Warn(msg, a1) #define Warn2(msg, a1, a2) Warn(msg, a1, a2) #define Warn3(msg, a1, a2, a3) Warn(msg, a1, a2, a3) #define Warn4(msg, a1, a2, a3, a4) Warn(msg, a1, a2, a3, a4) #endif #ifndef Fail_nolock # define Fail_nolock(...) { \ (void) fprintf(stderr, "FAIL %s %d: ", __FILE__, __LINE__); \ (void) fprintf(stderr, __VA_ARGS__); \ (void) fprintf(stderr, "\n"); \ (void) fflush(stderr); \ } #endif #ifndef Fail # define Fail(...) { \ jf_thread_print(stderr, "FAIL", __FILE__, __LINE__, __VA_ARGS__); \ return Failure; } #define Fail1(msg, a1) Fail(msg, a1) #define Fail2(msg, a1, a2) Fail(msg, a1, a2) #define Fail3(msg, a1, a2, a3) Fail(msg, a1, a2, a3) #define Fail4(msg, a1, a2, a3, a4) Fail(msg, a1, a2, a3, a4) #define Fail5(msg, a1, a2, a3, a4, a5) Fail(msg, a1, a2, a3, a4, a5) #define Fail6(msg, a1, a2, a3, a4, a5, a6) Fail(msg, a1, a2, a3, a4, a5, a6) #endif #ifndef Exit # define Exit(msg) { \ (void)fprintf(stderr, "EXIT %s %d: %s\n", __FILE__, __LINE__, msg); \ exit(-1); } #endif #ifndef Call #define Call(fun, arg) { if (!(fun arg)) Fail(#fun) } #define Call1(fun, arg, str) { if (!(fun arg)) Fail1(#fun" %s", str) } #endif // And those pesky IO functions.... #if defined(titan) # define Fread(p,s,n,f) fread ((char *)(p), (Size_t)(s), (int)(n), f) # define Fwrite(p,s,n,f) fwrite((char *)(p), (Size_t)(s), (int)(n), f) #elif defined(sun) # define Fread(p,s,n,f) fread ((char *)(p), (int)(s), (int)(n), f) # define Fwrite(p,s,n,f) fwrite((char *)(p), (int)(s), (int)(n), f) #else # define Fread(p,s,n,f) fread ((void *)(p), (Size_t)(s), (Size_t)(n), f) # define Fwrite(p,s,n,f) fwrite((cvoid *)(p), (Size_t)(s), (Size_t)(n), f) # define Strncat(s1,s2,n) strncat(s1, s2, (int) (n)) #endif #if defined(titan) | defined(sun) #define Memset(p,c,n) memset((char *)(p), (int)(c), (int)(n)); #define Memcpy(t,f,n) memcpy((char *)(t), (char *)(f), (int)(n)); #define Strncat(s1,s2,n) strncat(s1, s2, (Size_t) (n)) #define Bzero(p,n) bzero((char *) (p), (int) ((n) * sizeof(*(p)))); #define Bcopy(s,d,n) bcopy((char *) (s), (char *) (d), (int) ((n) * sizeof(*(s)))); #else #define Memset(p,v,n) (void)memset((void *) (p), v, (Size_t) (n)); #define Memcpy(d,s,n) { if ((d) == (s)) Exit("Identical memcpy pointers") \ else (void)memcpy((void *) (d), (cvoid *) (s), (Size_t) (n)); } #define Bzero(p,n) Memset(p, 0, (n) * sizeof(*(p))) #define Bcopy(s,d,n) Memcpy(d, s, (n) * sizeof(*(s))) #define Bcopyvoid(src,dst,n,siz) Memcpy(dst, src, (n) * (siz)) #endif #if 0 // superceded by versions in io library #ifndef FileExist // (!fclose(fopen(name, "rb"))) did not work on linux extern FILE *Global_fp_exist; #define FileExist(name) ((Global_fp_exist = fopen(name, "rb"), \ Global_fp_exist && !fclose(Global_fp_exist))) #endif // macro to check to see if we can write a file to this name. // use this with caution since if the file exists, it will be truncated. #ifndef FileWriteable #define FileWriteable(s) ((Global_fp_exist = fopen(s, "wb"), \ Global_fp_exist && fclose(Global_fp_exist))) #endif #endif // The suffix "0" means return 0 on error. #define Fopen0(fp,n,t) {if ( !(fp = fopen(n, t)) ) \ Fail1("fopen '%s'", n)} #define Fopen0r(fp,n) Fopen0(fp, n, "rb") #define Fopen0w(fp,n) Fopen0(fp, n, "wb") #define Fopen0e(fp,n) Fopen0(fp, n, "rb+") // for editing #define Fflush0(fp) {if (fflush(fp)) Fail("fflush")} // seek from start of file #define Fseek0(fp,n) {if (fseek(fp, (long) (n), 0)) \ Fail1("fseek %d", (int) n)} #define Fskip0(fp,n) {if (fseek(fp, (long) (n), 1)) \ Fail1("fseek1 %d", (int) n)} #define Fclose0(fp) {if (fclose(fp)) Fail("fclose")} #define Fread0(p,s,n,fp) {Size_t ZF = Fread(p, s, n, fp); \ if (ZF != (Size_t) (n)) \ Fail3("fread found %zu bytes, not %zu at %ld", \ ZF, (Size_t) (n), ftell(fp))} #define Fwrite0(p,s,n,fp) {if (((Size_t) (n)) != Fwrite(p, s, n, fp)) Fail("fwrite")} #if defined(Userand48) # define Rand01 drand48() # define Seeder(s) srand48(s) #elif defined(Userand) // for PC # define Rand01 (rand() / RAND_MAX) # define Seeder(s) srand(s) #else # define Rand01 (random() / 2147483647.0) # define Seeder(s) srandom(s) #endif // The remaining macro definitions should be environment independent #define Sqr(x) ((x) * (x)) #define Odd(n) ((n)/2 != ((n)+1)/2) #define Even(n) ((n)/2 == ((n)+1)/2) #ifndef Abs # define Abs(x) (((x) >= 0) ? (x) : (-(x))) #endif #ifndef Max # define Max(x,y) (((x) > (y)) ? (x) : (y)) #endif #ifndef Min # define Min(x,y) (((x) < (y)) ? (x) : (y)) #endif // Kernighan & Ritchie, p45 2nd ed., say float to int just truncates. // So for nonnegative arguments we can just cast to an integer // to avoid a function call. But Floor_pos will be wrong if x < 0! #define Floor_pos(x) ((int) (x)) // the usual floor function works for both positive and negative values #define Floor(x) floor(x) #define Floorf(x) floorf(x) #define Ceil(x) ceil(x) #define Ceilf(x) ceilf(x) #ifndef Rint // round to nearest integer, ala rint() # define Rint(x) ( (int) ((x) >= 0 ? ((x) + 0.5) : ((x) - 0.5)) ) // formerly ((int) ((x) + 0.5)), which is wrong for negatives! #endif #define Streq(a,b) (((a) != NULL) && ((b) != NULL) && !strcmp(a,b)) #define Streqn(a,b,n) (((a) != NULL) && ((b) != NULL) && !strncmp(a,b,n)) #define Alloc0pure(ptr,type,n,s) { \ if ( !((ptr) = (type *) calloc((Size_t) (n), (Size_t) (s))) ) \ Fail2("alloc %zu %zu", (Size_t) (n), (Size_t) (s)) } #define Free0pure(p) free((void *) (p)); #ifdef CountAlloc #define Alloc0(ptr,type,n,s) { \ if ( !((ptr) = (type *) io_mem_alloc((Size_t) (n), (Size_t) (s), \ __FILE__, __LINE__, #ptr)) ) \ Fail2("io_mem_alloc(%zu,%zu)", (Size_t) (n), (Size_t) (s)) } #define Free(p) { \ if (!io_mem_free((void *) (p), __FILE__, __LINE__, #p)) { \ Warn2("bug free %s %d", __FILE__, __LINE__) \ Exit("bug") } } #define Free0(p) { \ Call(io_mem_free, ((void *) (p), __FILE__, __LINE__, #p)) \ p = NULL; } #define PrintAlloc io_mem_print(__FILE__, __LINE__, 1); #define MemUsed io_mem_usage(__FILE__, __LINE__, Chat); #define MemInfo(p) io_mem_info((cvoid *) p, __FILE__, __LINE__, #p); #define MemChat(chat) Call(io_mem_chat, (chat)) #else #define Alloc0 Alloc0pure #define Free(p) free((void *) (p)) #define Free0 Free0pure #define PrintAlloc {} #define MemUsed {} #define MemInfo(p) {} #define MemChat(chat) {} #endif #ifdef __CUDACC__ #ifndef jf_use_typeof #define jf_use_typeof #endif #endif // __CUDACC__ #ifdef jf_use_typeof // gcc provides typeof() extension (sometimes) #define jf_typeof(p) typeof(p) #else #define jf_typeof(p) void #endif #ifndef Mem0 #define Mem0(p,n) Alloc0(p, jf_typeof(*(p)), n, sizeof(*(p))) #define Mem0void(p,n,s) Alloc0(p, jf_typeof(*(p)), n, s) #define Mem0pure(p,n) Alloc0pure(p, jf_typeof(*(p)), n, sizeof(*(p))) #endif // macros for making code sections with static variable work for threads // Mutex_init declares a *static* mutex that can be used repeatedly // Mutex_destroy need not be called except possibly at program exit #if defined(Use_thread) # define Mutex_init \ static pthread_mutex_t jf_mutex_reent = PTHREAD_MUTEX_INITIALIZER; # define Mutex_lock \ if (pthread_mutex_lock( &jf_mutex_reent )) \ Fail_nolock("pthread_mutex_lock() failed") # define Mutex_unlock \ if (pthread_mutex_unlock( &jf_mutex_reent )) \ Fail_nolock("pthread_mutex_unlock() failed") # define Mutex_destroy \ if (pthread_mutex_destroy( &jf_mutex_reent )) \ Fail_nolock("pthread_mutex_destroy() failed") # define Ok_unlock Mutex_unlock Ok #else # define Mutex_init # define Mutex_lock # define Mutex_unlock # define Mutex_destroy # define Ok_unlock Ok #endif #endif // Defs_macro // def,inline.h // Simple vector operations done in-line. // Copyright 1994-12, Jeff Fessler, University of Michigan #ifndef DefInline #define DefInline // #define whileZn(start) register int Zn = start; for (; Zn; --Zn) #define whileZn(start) for (size_t Zn = start; Zn-- > 0; ) // #define Register register #define Register {} // v op c(onstant) #define Inline1vector(type1, v1, op, type2, c, n) \ { \ Register type1 *Z1 = v1; \ Register Const type2 Zc = (type2) c; \ whileZn(n) \ *Z1++ op Zc; \ } // v1 op v2 #define Inline2vector(type1, v1, op, type2, v2, n) \ { \ Register type1 *Z1 = v1; \ Register Const type2 *Z2 = v2; \ whileZn(n) { \ *Z1++ op (*Z2); \ ++Z2; } \ } // Vector with scalar // v *= c(onstant) #define VectScale(type, v, c, n) \ Inline1vector(type, v, *=, double, c, n) // v += c(onstant) #define VectInc(type, v, c, n) \ Inline1vector(type, v, +=, double, c, n) // v = c(onstant) #define VectSet(type, v, c, n) \ Inline1vector(type, v, =, type, c, n) // v = Min(v,c) #define VectMin(type, v, c, n) \ { \ Register type *Zv = v; \ whileZn(n) { \ if (*Zv > c) *Zv = c; \ ++Zv; } \ } // v = Max(v,c) #define VectMax(type, v, c, n) \ { \ Register type *Zv = v; \ whileZn(n) { \ if (*Zv < c) *Zv = c; \ ++Zv; } \ } // v = Max(v,0) #define VectNonneg(type, v, n) VectMax(type, v, 0, n) // clamp vector to [min max] #define VectClamp(type, v, vmin, vmax, n) \ { \ Register type *Zv = v; \ Register Const type Zmin = vmin; \ Register Const type Zmax = vmax; \ whileZn(n) { \ if (*Zv > Zmax) *Zv = Zmax; \ else \ if (*Zv < Zmin) *Zv = Zmin; \ ++Zv; \ } \ } // s = sum(v) #define VectAccum(type, s, v, n) \ { \ Register double Zsum = 0.; \ Register Const type *Zv = v; \ whileZn(n) \ Zsum += *(Zv++); \ s = Zsum; \ } // s = sum(|v|) #define VectNorm1(type, s, v, n) \ { \ Register Const type *Zv = v; \ Register double Zsum = 0; \ whileZn(n) { \ Zsum += Abs(*Zv); \ ++Zv; \ } s = Zsum; \ } // s = #define VectNorm2(type, s, v, n) \ { \ Register Const type *Zv = v; \ Register double Zsum = 0; \ whileZn(n) { \ Zsum += *Zv * *Zv; \ ++Zv; \ } s = Zsum; \ } // s = _w #define VectNorm2wtd(type, s, v, w, n) \ { \ Register Const type *Zv = v; \ Register Const type *Zw = w; \ Register double Zsum = 0; \ whileZn(n) { \ Zsum += *Zv * *Zv * *Zw; \ ++Zv; ++Zw; \ } s = Zsum; \ } // [v; v; ...; v] repeated nrep times #define VectRepeat(type, v, npoint, nrep) \ { \ Register type *Zv = v; \ Const size_t Zn = npoint; \ Const size_t Zr = nrep; \ for (size_t Zi=1; Zi < Zr; Zi++) \ Bcopy(Zv, Zv+Zi*Zn, Zn) \ } // Vector with Vector // v1 = v2 #define VectCopy(type1, v1, type2, v2, n) \ Inline2vector(type1, v1, =, type2, v2, n) // r *= v #define VectMul(type, r, v, n) \ Inline2vector(type, r, *=, type, v, n) // r /= v #define VectDiv(type, r, v, n) \ Inline2vector(type, r, /=, type, v, n) // r += v #define VectAdd(type, r, v, n) \ Inline2vector(type, r, +=, type, v, n) // r += v #define VectAdd2(type1, r, type2, v, n) \ Inline2vector(type1, r, +=, type2, v, n) // r += const * v #define VectAddScale(type, r, c, v, n) \ Inline2vector(type, r, += c *, type, v, n) // r = s * v #define VectCopyScale(type, r, s, v, n) \ Inline2vector(type, r, = s *, type, v, n) // r -= v #define VectSub(type, r, v, n) \ Inline2vector(type, r, -=, type, v, n) // r += v1 + v2 #define VectInc2(type, r, v1, v2, n) \ { \ Register type *Zr = r; \ Register Const type *Z1 = v1; \ Register Const type *Z2 = v2; \ whileZn(n) \ *Zr++ += *(Z1++) + *(Z2++); \ } // r = r / v or 0 if v=0 #define VectDiv0(type, r, v, n) \ { \ Register type *Zr = r; \ Register Const type *Zv = v; \ whileZn(n) { \ if (*Zv) \ *Zr++ /= *Zv; \ else { \ *Zr++ = 0; \ } \ ++Zv; } \ } // s = #define VectInprod3d(s, type1, v1, type2, v2, n1, n2, n3) \ { \ Register Const type1 *Z1 = v1; \ Register Const type2 *Z2 = v2; \ Const size_t Zn2 = n2; \ Register Const size_t Zn1 = n1; \ double Zsum = 0.; \ whileZn(n3) { \ double Zsum2 = 0.; \ for (size_t Zi2=0; Zi2 < Zn2; ++Zi2) { \ Register double Zsum1 = 0.; \ for (size_t Zi1=0; Zi1 < Zn1; ++Zi1) \ Zsum1 += *(Z1++) * *(Z2++); \ Zsum2 += Zsum1; \ } \ Zsum += Zsum2; \ } \ s = Zsum; \ } #define VectInprod2d(s, type1, v1, type2, v2, n1, n2) \ VectInprod3d(s, type1, v1, type2, v2, n1, n2, 1) // s = #define VectInprod(type, s, v1, v2, n) \ { \ Register Const type *Z1 = v1; \ Register Const type *Z2 = v2; \ Register double Zsum = 0.; \ whileZn(n) \ Zsum += *(Z1++) * *(Z2++); \ s = Zsum; \ } // s = #define VectInprod2(type, s, v1, v2, n) \ { \ Register Const type *Z1 = v1; \ Register Const type *Z2 = v2; \ Register double Zsum = 0.; \ whileZn(n) { \ Zsum += *(Z1++) * *Z2 * *Z2; \ ++Z2; \ } s = Zsum; \ } // s = // Supposes v2 is mostly zeros #define VectInprod2_0(type, s, v1, v2, n) \ { \ Register Const type *Z1 = v1; \ Register Const type *Z2 = v2; \ Register double Zsum = 0.; \ whileZn(n) { \ if (*Z2) \ Zsum += *Z1 * *Z2 * *Z2; \ ++Z1; ++Z2; \ } s = Zsum; \ } // min and max of vector #define VectMinMax(type1, vmin, vmax, type2, v, n) \ { \ Register Const type2 *Zv = v; \ Register type1 Zmin = (type1) *Zv; \ Register type1 Zmax = (type1) *Zv++; \ whileZn(n-1) { \ if (*Zv > Zmax) Zmax = *Zv; \ else \ if (*Zv < Zmin) Zmin = *Zv; \ ++Zv; \ } \ vmin = Zmin; \ vmax = Zmax; \ } // min and max of vector, and min > 0 #define VectMinMax0(type1, this_type_max, vmin, vmin0, vmax, type2, v, n) \ { \ Register Const type2 *Zv = v; \ Register type1 Zmin = (type1) *Zv; \ Register type1 Zmin0 = (type1) (Zmin > 0 ? Zmin : this_type_max); \ Register type1 Zmax = (type1) *Zv++; \ whileZn(n-1) { \ if (*Zv > Zmax) Zmax = *Zv; \ else \ if (*Zv < Zmin) Zmin = *Zv; \ if (*Zv > 0 && *Zv < Zmin0) Zmin0 = *Zv; \ ++Zv; \ } \ vmin = Zmin; \ vmin0 = Zmin0; \ vmax = Zmax; \ } // for debugging #define VectInfo(type, v, n, msg) { \ double Ztmin, Ztmax, Ztmin0, ZZsum; \ VectMinMax0(type, MAX##type, Ztmin, Ztmin0, Ztmax, type, v, n) \ VectAccum(type, ZZsum, v, n) \ Note("%s [%d] min=%g min0=%g max=%g sum=%g", \ msg, n, Ztmin, Ztmin0, Ztmax, ZZsum) } // #undef Register // #undef whileZn #endif // DefInline // def,alloc.h // Copyright 1997-4, Jeff Fessler, University of Michigan #ifndef DefAlloc #define DefAlloc extern void *io_mem_alloc(cSize_t n, cSize_t s, cchar *file, cint line, cchar *name); extern sof io_mem_free(void *p, cchar *file, cint line, cchar *name); extern sof io_mem_print(cchar *file, cint line, cint chat); extern sof io_mem_usage(cchar *file, cint line, cint chat); extern sof io_mem_info(cvoid *p, cchar *file, cint line, cchar *name); extern sof io_mem_chat(cint chat); #endif // DefAlloc // def,proto.h // Prototypes not provided by some old OS versions // // Copyright 2000-5, Jeff Fessler, University of Michigan #ifndef DefProto #define DefProto #ifdef Need_proto_strstr extern char *strstr(char *, char *); #endif #ifdef Need_proto_random extern void srandom(unsigned); extern long random(void); #endif #ifdef Need_proto_lgamma extern double lgamma(cdouble); #endif #ifdef Need_proto_erf extern double erf(cdouble); #endif #endif // DefProto // def,simd.h // macros for SIMD instructions (SSE) // based on src/contrib/ndimtro/SSE.h by Ned Dmitro // Copyright 2010-03-08, Jeff Fessler, University of Michigan #ifndef _def_simd_h #define _def_simd_h // Notes // When macro arguments are preceded by a #, the compiler (cpp) // "stringifies" them into " $Value ". // It also concatenates any adjacent string literals. // ATT syntax, unlike Intel syntax is COMMAND SOURCE DEST // assembler immediate values must be preceded by $ // Put the 32-bit data "val" (e.g., float) in all 4 spots in XMM register NUM // movss moves single precision floating point to lowest word of XMM register // shufps selects parts of source and dest to move to dest #define simd_fill4(val, NUM) { \ __asm__( \ "movss %0, %%xmm" #NUM \ "; shufps $0x00, %%xmm" #NUM ", %%xmm" #NUM ";" : : "m" (val) ); \ } // The following either move data Aligned (a) or Unaligned (u) // either "in" to XMM register NUM from memory, or "out" // val is the starting value of the 4-float array used, e.g., data[0] #define simd_mov_a4_in(val, NUM) { \ __asm__("movaps %0, %%xmm" #NUM ";" : : "m" (val) ); \ } #define simd_mov_a4_out(NUM, val) { \ __asm__("movaps %%xmm" #NUM ", %0;" : "=m" (val) : ); \ } #define simd_mov_u4_in(val, NUM) { \ __asm__("movups %0, %%xmm" #NUM ";" : : "m" (val) ); \ } #define simd_mov_u4_out(NUM, val) { \ __asm__("movups %%xmm" #NUM ", %0;" : "=m" (val) : ); \ } // Perform an operation on XMM NUM1 and XMM NUM2 and leave result in XMM NUM 2 // XMM2 = XMM2 op XMM1 #define simd_add4(NUM1, NUM2) { \ __asm__("addps %xmm" #NUM1 ", %xmm" #NUM2 ";" ); \ } #define simd_mul4(NUM1, NUM2) { \ __asm__("mulps %xmm" #NUM1 ", %xmm" #NUM2 ";" ); \ } #define simd_div4(NUM1, NUM2) { \ __asm__("divps %xmm" #NUM1 ", %xmm" #NUM2 ";" ); \ } #define simd_sub4(NUM1, NUM2) { \ __asm__("subps %xmm" #NUM1 ", %xmm" #NUM2 ";" ); \ } #if 0 // hide #define shufSSE(immediate, source, dest) {\ __asm__("shufps $0b" #immediate ", %xmm" #source ", %xmm" #dest ";" );\ } #define cvttSSE(source, dest) {\ __asm__("cvttps2pi %%xmm" #source ", %%mm" #dest ";" : : : "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");\ } #define cvtSSE(source, dest) {\ __asm__("cvtps2pi %%xmm" #source ", %%mm" #dest ";" : : : "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");\ } // The following preform the operation on XMM NUM1 and XMM NUM2 and // place the result in XMM NUM 2 ie: XMM2 = XMM2 op XMM1 #define andSSE(NUM1, NUM2) {\ __asm__("andps %xmm" #NUM1 ", %xmm" #NUM2 ";" ); \ } #define andnSSE(NUM1, NUM2) {\ __asm__("andnps %xmm" #NUM1 ", %xmm" #NUM2 ";" ); \ } #define orSSE(NUM1, NUM2) {\ __asm__("orps %xmm" #NUM1 ", %xmm" #NUM2 ";" ); \ } #define xorSSE(NUM1, NUM2) {\ __asm__("xorps %xmm" #NUM1 ", %xmm" #NUM2 ";" ); \ } /* The following prefetches the line of data that contains the byte val into non-temporal cache storage */ #define prefetchSSE(val) { \ __asm__("prefetchnta %0;" : : "m" (val) );\ } #define movqOutMMX(NUM, dest) {\ __asm__("movq %%mm" #NUM ", %0;" : "=m" (dest) : );\ } #define movqInMMX(source, NUM) {\ __asm__("movq %0, %%mm" #NUM ";" : : "m" (source) : "st(" #NUM ")" );\ } #define adddMMX(NUM1, NUM2) {\ __asm__("paddd %mm" #NUM1 ", %mm" #NUM2 ";");\ } #define emmsMMX() {\ __asm__("emms;");\ } #endif // hide #endif // _def_simd_h