See the repro below on x86-64 Linux (Ubuntu 17.10) I think the -O2 version is returning the wrong result; in the loop in memswp it appears to be writing to *q before reading from it.
$ cat main.c #include <stdio.h> extern void memswp(int *p, int *q, int n); int main() { __attribute__((aligned(4))) char a[] = {'a','b','c','d','e','f','g','h',0}; memswp((int *)a, (int *)a + 1, 1); puts(a); return 0; } $ cat move.c void memswp(int *p, int *q, int n) { for (int i = 0; i < n; i++) { int t = ((struct { int x; } *)p + i)->x; ((struct { int x; } *)p + i)->x = ((struct { int x; } *)q + i)->x; ((struct { int x; } *)q + i)->x = t; } } $ icc -O0 main.c move.c -o swp0 && ./swp0 efghabcd $ icc -O2 main.c move.c -o swp2 && ./swp2 efghefgh $ icc --version icc (ICC) 18.0.1 20171018 Copyright (C) 1985-2017 Intel Corporation. All rights reserved.