Dear all,
the following code:
#include <iostream> #include <unistd.h> #include <pthread.h> class Test { public: Test() : fDone(false) { } ~Test() {} bool IsDone() const { return fDone; } void SetDone() { fDone = true; } private: bool fDone; }; void* run(void* data) { // input data Test* t = static_cast<Test*>(data); // sleep for 1 second usleep(1000*1000); // thread is done t->SetDone(); std::cout << t->IsDone() << std::endl; std::cout << "Thread finished"<< std::endl; return 0; } int main(int argc, char** argv) { Test t; pthread_t thread; pthread_create(&thread, NULL, &run, &t); while (!t.IsDone()) { usleep(100*1000); } std::cout << "never reached"<< std::endl; void* status; pthread_join(thread, &status); return 0; }
Does not run as expected with Intel Compiler 2019, Update 1 on Linux x86_64. The loop in the main function never finishes, even though the auxiliary thread sets 't.IsDone()' to true after 1 second.
The code is compiled using:
icpc -O2 -g -fPIC -pthread -o bug bug.cc
The bug does not occur if compiled with -00 or -O1. It also doesn't occur when the loop is modified as follows:
while (!t.IsDone()) { std::cout << "..."<< std::endl; usleep(100*1000); }
The loop in question is actually translated by the compiler into this piece of assembly:
Dump of assembler code for function main: 0x0000000000401280 <+0>: push %rbp 0x0000000000401281 <+1>: mov %rsp,%rbp 0x0000000000401284 <+4>: and $0xffffffffffffff80,%rsp 0x0000000000401288 <+8>: sub $0x80,%rsp 0x000000000040128f <+15>: xor %esi,%esi 0x0000000000401291 <+17>: mov $0x3,%edi 0x0000000000401296 <+22>: callq 0x4013d0 <__intel_new_feature_proc_init> 0x000000000040129b <+27>: stmxcsr 0x8(%rsp) 0x00000000004012a0 <+32>: xor %esi,%esi 0x00000000004012a2 <+34>: lea (%rsp),%rdi 0x00000000004012a6 <+38>: orl $0x8040,0x8(%rdi) 0x00000000004012ad <+45>: lea 0x10(%rsp),%rcx 0x00000000004012b2 <+50>: lea 0x57(%rip),%rdx # 0x401310 <run(void*)> 0x00000000004012b9 <+57>: ldmxcsr -0x8(%rcx) 0x00000000004012bd <+61>: movb $0x0,(%rcx) 0x00000000004012c0 <+64>: callq 0x4010a0 <pthread_create@plt> 0x00000000004012c5 <+69>: cmpb $0x0,0x10(%rsp) 0x00000000004012ca <+74>: jne 0x4012d8 <main+88> 0x00000000004012cc <+76>: mov $0x186a0,%edi 0x00000000004012d1 <+81>: callq 0x401130 <usleep@plt> => 0x00000000004012d6 <+86>: jmp 0x4012cc <main+76> 0x00000000004012d8 <+88>: lea 0x1d25(%rip),%rsi # 0x403004 0x00000000004012df <+95>: mov 0x4cd2(%rip),%rdi # 0x405fb8 0x00000000004012e6 <+102>: callq 0x4010c0 <_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@plt> 0x00000000004012eb <+107>: mov %rax,%rdi 0x00000000004012ee <+110>: mov 0x4cbb(%rip),%rsi # 0x405fb0 0x00000000004012f5 <+117>: callq 0x4010d0 <_ZNSolsEPFRSoS_E@plt> 0x00000000004012fa <+122>: mov (%rsp),%rdi 0x00000000004012fe <+126>: lea 0x8(%rsp),%rsi 0x0000000000401303 <+131>: callq 0x401070 <pthread_join@plt> 0x0000000000401308 <+136>: xor %eax,%eax 0x000000000040130a <+138>: mov %rbp,%rsp 0x000000000040130d <+141>: pop %rbp 0x000000000040130e <+142>: retq 0x000000000040130f <+143>: nop
In particular, note the infinite loop marked in bold, which never checks the status of IsDone() at all and just runs usleep forever.
This bug was not present in older Intel Compiler releases, I think 2019 is the first release that introduced this issue, but I am not 100% sure.