In C++ you can overload on argument type, so perhaps there is aDesperado wrote: 1. why does bsf call itself ?
Code: Select all
int bsf(U32);
Moderators: hgm, Rebel, chrisw
In C++ you can overload on argument type, so perhaps there is aDesperado wrote: 1. why does bsf call itself ?
Code: Select all
int bsf(U32);
Code: Select all
inline U32 lo(U64 b) { return small_cast<U32>(b); }
inline U32 hi(U64 b) { return static_cast<U32>(b >> 32); }
inline index_t bsf(U64 value) {
U32 lo = ::lo(value);
return bsf(lo ? lo : hi(value)) + (lo ? 0 : 32);
}
A bsf(U64) function calls a different bsf(U32) function.1. why does bsf call itself ?
Code: Select all
template <typename T>
inline T universe() { return static_cast<T>(~static_cast<T>(0)); }
template <typename T, typename N>
inline T small_cast(N n) { return static_cast<T>(n & universe<T>()); }
Sorry, I did not assume anyone take my code literaly and did not want to open discussion about C++ syntax tricks.3. and what is the issue between the _trivial_ c version and the c++ version ? In other words, what makes the c++ version more complex than the pure c version?
Simple cast to smaller type should emit a warning in any compiler. And at least Visual C++ debug build tests and breaks an execution of a program when silent conversion losers significant bits.4.i thought explicit putting the (reduced)type in front of more complex type is enough for type conversion downwards.
Well I sometimes find myself to indulge on C++ obscurities but indeed you beat me hands down !Aleks Peshkov wrote: Well, it is my own operator.Code: Select all
template <typename T> inline T universe() { return static_cast<T>(~static_cast<T>(0)); } template <typename T, typename N> inline T small_cast(N n) { return static_cast<T>(n & universe<T>()); }
A cast is not a silent conversion and does not make the compiler emit any warning.Simple cast to smaller type should emit a warning in any compiler. And at least Visual C++ debug build tests and breaks an execution of a program when silent conversion losers significant bits.
Code: Select all
void test (U64 b) {
U32 a = b; // This is a warning
U32 c = (U32)b; // This is not !
}
Code: Select all
BTB_T only_pop1(BTB_T &bb)
{
UI_32 *ptr = (UI_32*)&bb;
ULONG id=64;
_BitScanForward(&id,*(ptr)) ? id : _BitScanForward(&id,*(ptr+=1));
return(*ptr^=BB(id));
}
ULONG pop_and_index1(BTB_T &bb)
{
UI_32 *ptr = (UI_32*)&bb;
ULONG id=64;
_BitScanForward(&id,*(ptr)) ? id : _BitScanForward(&id,*(ptr+=1));
*ptr^=(BB(id));
return(ptr==(UI_32*)&bb ? id : 32+id);
}
Code: Select all
BB(id)
Code: Select all
_BitScanForward(&id,*(ptr+=1));
Code: Select all
_BitScanForward(&id,*(ptr+1));
Code: Select all
*ptr^=(BB(id));
Code: Select all
Square pop_1st_bit(Bitboard* bb) {
uint32_t* ptr = (uint32_t*)bb;
unsigned long id;
if (*ptr)
{
_BitScanForward(&id, *ptr);
*ptr ^= (1 << id);
}
else
{
ptr += 1;
_BitScanForward(&id, *ptr);
*ptr ^= (1 << id);
id += 32;
}
return Square(id);
}
Code: Select all
Square pop_1st_bit(Bitboard* bb) {
0040E760 push ecx
uint32_t* ptr = (uint32_t*)bb;
unsigned long id;
if (*ptr)
0040E761 mov edx,dword ptr [esi]
0040E763 push edi
{
_BitScanForward(&id, *ptr);
*ptr ^= (1 << id);
0040E764 mov edi,1
0040E769 test edx,edx
0040E76B je pop_1st_bit+1Bh (40E77Bh)
0040E76D bsf eax,edx
0040E770 mov ecx,eax
0040E772 shl edi,cl
0040E774 xor edi,edx
0040E776 mov dword ptr [esi],edi
0040E778 pop edi
}
return Square(id);
}
0040E779 pop ecx
0040E77A ret
}
else
{
ptr += 1;
_BitScanForward(&id, *ptr);
0040E77B mov edx,dword ptr [esi+4]
0040E77E bsf ecx,edx
*ptr ^= (1 << id);
0040E781 shl edi,cl
0040E783 mov eax,ecx
0040E785 xor edi,edx
0040E787 mov dword ptr [esi+4],edi
id += 32;
0040E78A add eax,20h
0040E78D pop edi
}
return Square(id);
}
0040E78E pop ecx
0040E78F ret
!oups, how that? You forgot const? Have you inspected assembly?Desperado wrote:Hi Marco,
first i have to correct my last post !!!
Putting the table to global scope, everything changes!
Code: Select all
; input edx:ecx bb
; output eax bit
mov eax, ecx
sub ecx, 1
mov ebx, edx
sbb edx, 0
xor eax, ecx
xor eax, ebx
xor eax, edx
imul eax, 78291ACFH
shr eax, 26
mov eax, dword ptr [lookup + 4*eax] ; movzx eax, byte ptr [lookup + eax]
// ~ 12 cycles
The bsf approach may suffer from miss-predicted branches. 32-bit bsf K7/K8 is alone 8 cycles vector path.Desperado wrote:The result seems to be equal(about)....(for _bitscan alone_)
BUT on _anti_ bitscan machine.