• The update of atomic variables is more expensive than regular primitives

    #include <memory>
    
    using namespace std::chrono;
    using shared_ptr_t=std::shared_ptr<int>;
    
    void shared_ptr_receiver_by_value(shared_ptr_t ptr) {
        (void)*ptr;
    }
    
    void shared_ptr_receiver_by_ref(const shared_ptr_t& ptr) {
        (void)*ptr;
    }
    
    void test_copy_by_value(uint64_t n) {
        auto ptr = std::make_shared<int>(100);
        for(uint64_t i = 0u; i < n; ++i) {
            shared_ptr_receiver_by_value(ptr);
        }
    }
    
    void test_copy_by_ref(uint64_t n) {
        auto ptr = std::make_shared<int>(100);
        for(uint64_t i = 0u; i < n; ++i) {
            shared_ptr_receiver_by_ref(ptr);
        }
    }
    
    int main(int argc, char *argv[]) {
        uint64_t n = (argc == 3 ) ? std::stoull(argv[2]) : 100;
        auto t1 = high_resolution_clock::now();
        if(atoi(argv[1]) == 1) {
            test_copy_by_value(n);
        } else {
            test_copy_by_ref(n);
        }
        auto t2 = high_resolution_clock::now();
        auto time_span = duration_cast<duration<int64_t, std::micro>>(t2 - t1);
        std::cout << "It took me " << time_span.count() << " microseconds.\\n";
        return 0;
    }
    
    $ ./cpu_atomic_copy.bin 1 999999
    It took me 3616 microseconds.
    $ ./cpu_atomic_copy.bin 2 999999
    It took me 2 microseconds.