00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023
00024 #include "tbb_stddef.h"
00025
00026 #if _WIN32||_WIN64
00027
00028 #ifdef _MANAGED
00029 #pragma managed(push, off)
00030 #endif
00031
00032 #if __MINGW32__
00033 #include "machine/linux_ia32.h"
00034 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00035 #define __TBB_Yield() SwitchToThread()
00036 #elif defined(_M_IX86)
00037 #include "machine/windows_ia32.h"
00038 #elif defined(_M_AMD64)
00039 #include "machine/windows_intel64.h"
00040 #else
00041 #error Unsupported platform
00042 #endif
00043
00044 #ifdef _MANAGED
00045 #pragma managed(pop)
00046 #endif
00047
00048 #elif __linux__ || __FreeBSD__
00049
00050 #if __i386__
00051 #include "machine/linux_ia32.h"
00052 #elif __x86_64__
00053 #include "machine/linux_intel64.h"
00054 #elif __ia64__
00055 #include "machine/linux_ia64.h"
00056 #endif
00057
00058 #elif __APPLE__
00059
00060 #if __i386__
00061 #include "machine/linux_ia32.h"
00062 #elif __x86_64__
00063 #include "machine/linux_intel64.h"
00064 #elif __POWERPC__
00065 #include "machine/mac_ppc.h"
00066 #endif
00067
00068 #elif _AIX
00069
00070 #include "machine/ibm_aix51.h"
00071
00072 #elif __sun || __SUNPRO_CC
00073
00074 #define __asm__ asm
00075 #define __volatile__ volatile
00076 #if __i386 || __i386__
00077 #include "machine/linux_ia32.h"
00078 #elif __x86_64__
00079 #include "machine/linux_intel64.h"
00080 #elif __sparc
00081 #include "machine/sunos_sparc.h"
00082 #endif
00083
00084 #endif
00085
00086 #if !defined(__TBB_CompareAndSwap4) \
00087 || !defined(__TBB_CompareAndSwap8) \
00088 || !defined(__TBB_Yield) \
00089 || !defined(__TBB_release_consistency_helper)
00090 #error Minimal requirements for tbb_machine.h not satisfied
00091 #endif
00092
00093 #ifndef __TBB_load_with_acquire
00095 template<typename T>
00096 inline T __TBB_load_with_acquire(const volatile T& location) {
00097 T temp = location;
00098 __TBB_release_consistency_helper();
00099 return temp;
00100 }
00101 #endif
00102
00103 #ifndef __TBB_store_with_release
00105 template<typename T, typename V>
00106 inline void __TBB_store_with_release(volatile T& location, V value) {
00107 __TBB_release_consistency_helper();
00108 location = T(value);
00109 }
00110 #endif
00111
00112 #ifndef __TBB_Pause
00113 inline void __TBB_Pause(int32_t) {
00114 __TBB_Yield();
00115 }
00116 #endif
00117
00118 namespace tbb {
00119 namespace internal {
00120
00122
00123 class atomic_backoff {
00125
00127 static const int32_t LOOPS_BEFORE_YIELD = 16;
00128 int32_t count;
00129 public:
00130 atomic_backoff() : count(1) {}
00131
00133 void pause() {
00134 if( count<=LOOPS_BEFORE_YIELD ) {
00135 __TBB_Pause(count);
00136
00137 count*=2;
00138 } else {
00139
00140 __TBB_Yield();
00141 }
00142 }
00143
00144
00145 bool bounded_pause() {
00146 if( count<=LOOPS_BEFORE_YIELD ) {
00147 __TBB_Pause(count);
00148
00149 count*=2;
00150 return true;
00151 } else {
00152 return false;
00153 }
00154 }
00155
00156 void reset() {
00157 count = 1;
00158 }
00159 };
00160
00162
00163 template<typename T, typename U>
00164 void spin_wait_while_eq( const volatile T& location, U value ) {
00165 atomic_backoff backoff;
00166 while( location==value ) backoff.pause();
00167 }
00168
00170
00171 template<typename T, typename U>
00172 void spin_wait_until_eq( const volatile T& location, const U value ) {
00173 atomic_backoff backoff;
00174 while( location!=value ) backoff.pause();
00175 }
00176
00177
00178
00179
00180 template<size_t S, typename T>
00181 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
00182 volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
00183 #if __TBB_BIG_ENDIAN
00184 const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
00185 #else
00186 const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
00187 #endif
00188 const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
00189 atomic_backoff b;
00190 uint32_t result;
00191 for(;;) {
00192 result = *base;
00193 uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
00194 uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
00195
00196 result = __TBB_CompareAndSwap4( base, new_value, old_value );
00197 if( result==old_value
00198 || ((result^old_value)&mask)!=0 )
00199 break;
00200 else
00201 b.pause();
00202 }
00203 return T((result & mask) >> bitoffset);
00204 }
00205
00206 template<size_t S, typename T>
00207 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) {
00208 return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
00209 }
00210
00211 template<>
00212 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00213 #ifdef __TBB_CompareAndSwap1
00214 return __TBB_CompareAndSwap1(ptr,value,comparand);
00215 #else
00216 return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
00217 #endif
00218 }
00219
00220 template<>
00221 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00222 #ifdef __TBB_CompareAndSwap2
00223 return __TBB_CompareAndSwap2(ptr,value,comparand);
00224 #else
00225 return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
00226 #endif
00227 }
00228
00229 template<>
00230 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) {
00231 return __TBB_CompareAndSwap4(ptr,value,comparand);
00232 }
00233
00234 template<>
00235 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) {
00236 return __TBB_CompareAndSwap8(ptr,value,comparand);
00237 }
00238
00239 template<size_t S, typename T>
00240 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00241 atomic_backoff b;
00242 T result;
00243 for(;;) {
00244 result = *reinterpret_cast<volatile T *>(ptr);
00245
00246 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
00247 break;
00248 b.pause();
00249 }
00250 return result;
00251 }
00252
00253 template<size_t S, typename T>
00254 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00255 atomic_backoff b;
00256 T result;
00257 for(;;) {
00258 result = *reinterpret_cast<volatile T *>(ptr);
00259
00260 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
00261 break;
00262 b.pause();
00263 }
00264 return result;
00265 }
00266
00267
00268
00269
00270
00271
00272
00273
00274 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00275
00276 #if __GNUC__ || __SUNPRO_CC
00277 struct __TBB_machine_type_with_strictest_alignment {
00278 int member[4];
00279 } __attribute__((aligned(16)));
00280 #elif _MSC_VER
00281 __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
00282 int member[4];
00283 };
00284 #else
00285 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
00286 #endif
00287
00288 template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
00289 template<> struct type_with_alignment<1> { char member; };
00290 template<> struct type_with_alignment<2> { uint16_t member; };
00291 template<> struct type_with_alignment<4> { uint32_t member; };
00292 template<> struct type_with_alignment<8> { uint64_t member; };
00293
00294 #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2
00296
00298 template<size_t Size, typename T>
00299 struct work_around_alignment_bug {
00300 #if _MSC_VER
00301 static const size_t alignment = __alignof(T);
00302 #else
00303 static const size_t alignment = __alignof__(T);
00304 #endif
00305 };
00306 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00307 #elif __GNUC__ || __SUNPRO_CC
00308 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
00309 #else
00310 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
00311 #endif
00312 #endif
00313
00314 }
00315 }
00316
00317 #ifndef __TBB_CompareAndSwap1
00318 #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00319 #endif
00320
00321 #ifndef __TBB_CompareAndSwap2
00322 #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00323 #endif
00324
00325 #ifndef __TBB_CompareAndSwapW
00326 #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00327 #endif
00328
00329 #ifndef __TBB_FetchAndAdd1
00330 #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00331 #endif
00332
00333 #ifndef __TBB_FetchAndAdd2
00334 #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00335 #endif
00336
00337 #ifndef __TBB_FetchAndAdd4
00338 #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00339 #endif
00340
00341 #ifndef __TBB_FetchAndAdd8
00342 #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00343 #endif
00344
00345 #ifndef __TBB_FetchAndAddW
00346 #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00347 #endif
00348
00349 #ifndef __TBB_FetchAndStore1
00350 #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00351 #endif
00352
00353 #ifndef __TBB_FetchAndStore2
00354 #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00355 #endif
00356
00357 #ifndef __TBB_FetchAndStore4
00358 #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00359 #endif
00360
00361 #ifndef __TBB_FetchAndStore8
00362 #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00363 #endif
00364
00365 #ifndef __TBB_FetchAndStoreW
00366 #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00367 #endif
00368
00369 #if __TBB_DECL_FENCED_ATOMICS
00370
00371 #ifndef __TBB_CompareAndSwap1__TBB_full_fence
00372 #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
00373 #endif
00374 #ifndef __TBB_CompareAndSwap1acquire
00375 #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
00376 #endif
00377 #ifndef __TBB_CompareAndSwap1release
00378 #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
00379 #endif
00380
00381 #ifndef __TBB_CompareAndSwap2__TBB_full_fence
00382 #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
00383 #endif
00384 #ifndef __TBB_CompareAndSwap2acquire
00385 #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
00386 #endif
00387 #ifndef __TBB_CompareAndSwap2release
00388 #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
00389 #endif
00390
00391 #ifndef __TBB_CompareAndSwap4__TBB_full_fence
00392 #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
00393 #endif
00394 #ifndef __TBB_CompareAndSwap4acquire
00395 #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
00396 #endif
00397 #ifndef __TBB_CompareAndSwap4release
00398 #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
00399 #endif
00400
00401 #ifndef __TBB_CompareAndSwap8__TBB_full_fence
00402 #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
00403 #endif
00404 #ifndef __TBB_CompareAndSwap8acquire
00405 #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
00406 #endif
00407 #ifndef __TBB_CompareAndSwap8release
00408 #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
00409 #endif
00410
00411 #ifndef __TBB_FetchAndAdd1__TBB_full_fence
00412 #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
00413 #endif
00414 #ifndef __TBB_FetchAndAdd1acquire
00415 #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
00416 #endif
00417 #ifndef __TBB_FetchAndAdd1release
00418 #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
00419 #endif
00420
00421 #ifndef __TBB_FetchAndAdd2__TBB_full_fence
00422 #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
00423 #endif
00424 #ifndef __TBB_FetchAndAdd2acquire
00425 #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
00426 #endif
00427 #ifndef __TBB_FetchAndAdd2release
00428 #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
00429 #endif
00430
00431 #ifndef __TBB_FetchAndAdd4__TBB_full_fence
00432 #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
00433 #endif
00434 #ifndef __TBB_FetchAndAdd4acquire
00435 #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
00436 #endif
00437 #ifndef __TBB_FetchAndAdd4release
00438 #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
00439 #endif
00440
00441 #ifndef __TBB_FetchAndAdd8__TBB_full_fence
00442 #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
00443 #endif
00444 #ifndef __TBB_FetchAndAdd8acquire
00445 #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
00446 #endif
00447 #ifndef __TBB_FetchAndAdd8release
00448 #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
00449 #endif
00450
00451 #ifndef __TBB_FetchAndStore1__TBB_full_fence
00452 #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
00453 #endif
00454 #ifndef __TBB_FetchAndStore1acquire
00455 #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
00456 #endif
00457 #ifndef __TBB_FetchAndStore1release
00458 #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
00459 #endif
00460
00461 #ifndef __TBB_FetchAndStore2__TBB_full_fence
00462 #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
00463 #endif
00464 #ifndef __TBB_FetchAndStore2acquire
00465 #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
00466 #endif
00467 #ifndef __TBB_FetchAndStore2release
00468 #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
00469 #endif
00470
00471 #ifndef __TBB_FetchAndStore4__TBB_full_fence
00472 #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
00473 #endif
00474 #ifndef __TBB_FetchAndStore4acquire
00475 #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
00476 #endif
00477 #ifndef __TBB_FetchAndStore4release
00478 #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
00479 #endif
00480
00481 #ifndef __TBB_FetchAndStore8__TBB_full_fence
00482 #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
00483 #endif
00484 #ifndef __TBB_FetchAndStore8acquire
00485 #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
00486 #endif
00487 #ifndef __TBB_FetchAndStore8release
00488 #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
00489 #endif
00490
00491 #endif // __TBB_DECL_FENCED_ATOMICS
00492
00493
00494 #ifndef __TBB_FetchAndAddWrelease
00495 #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
00496 #endif
00497
00498 #ifndef __TBB_FetchAndIncrementWacquire
00499 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
00500 #endif
00501
00502 #ifndef __TBB_FetchAndDecrementWrelease
00503 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
00504 #endif
00505
00506 #if __TBB_WORDSIZE==4
00507
00508 #ifndef __TBB_Store8
00509 inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
00510 tbb::internal::atomic_backoff b;
00511 for(;;) {
00512 int64_t result = *(int64_t *)ptr;
00513 if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
00514 b.pause();
00515 }
00516 }
00517 #endif
00518
00519 #ifndef __TBB_Load8
00520 inline int64_t __TBB_Load8 (const volatile void *ptr) {
00521 int64_t result = *(int64_t *)ptr;
00522 result = __TBB_CompareAndSwap8((volatile void *)ptr,result,result);
00523 return result;
00524 }
00525 #endif
00526 #endif
00527
00528 #ifndef __TBB_Log2
00529 inline intptr_t __TBB_Log2( uintptr_t x ) {
00530 if( x==0 ) return -1;
00531 intptr_t result = 0;
00532 uintptr_t tmp;
00533 #if __TBB_WORDSIZE>=8
00534 if( (tmp = x>>32) ) { x=tmp; result += 32; }
00535 #endif
00536 if( (tmp = x>>16) ) { x=tmp; result += 16; }
00537 if( (tmp = x>>8) ) { x=tmp; result += 8; }
00538 if( (tmp = x>>4) ) { x=tmp; result += 4; }
00539 if( (tmp = x>>2) ) { x=tmp; result += 2; }
00540 return (x&2)? result+1: result;
00541 }
00542 #endif
00543
00544 #ifndef __TBB_AtomicOR
00545 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00546 tbb::internal::atomic_backoff b;
00547 for(;;) {
00548 uintptr_t tmp = *(volatile uintptr_t *)operand;
00549 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00550 if( result==tmp ) break;
00551 b.pause();
00552 }
00553 }
00554 #endif
00555
00556 #ifndef __TBB_AtomicAND
00557 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00558 tbb::internal::atomic_backoff b;
00559 for(;;) {
00560 uintptr_t tmp = *(volatile uintptr_t *)operand;
00561 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00562 if( result==tmp ) break;
00563 b.pause();
00564 }
00565 }
00566 #endif
00567
00568 #ifndef __TBB_TryLockByte
00569 inline bool __TBB_TryLockByte( unsigned char &flag ) {
00570 return __TBB_CompareAndSwap1(&flag,1,0)==0;
00571 }
00572 #endif
00573
00574 #ifndef __TBB_LockByte
00575 inline uintptr_t __TBB_LockByte( unsigned char& flag ) {
00576 if ( !__TBB_TryLockByte(flag) ) {
00577 tbb::internal::atomic_backoff b;
00578 do {
00579 b.pause();
00580 } while ( !__TBB_TryLockByte(flag) );
00581 }
00582 return 0;
00583 }
00584 #endif
00585
00586 #endif