int i;
void* devPtr;
void* hostPtr;
if (cudaMalloc(&devPtr, count) != cudaSuccess)
return -1;
hostPtr = malloc(count);
for(i = 0; i < count; i++){
((uint8_t*)hostPtr)[i] = i % 255;
}
for(i = 0; i < count; i++)
fprintf(stderr, "In pos %d: %d\n", i, ((uint8_t*)hostPtr)[i]);
if (cudaMemcpy(devPtr, hostPtr, count, cudaMemcpyHostToDevice) != cudaSuccess)
return -1;
for(i = 0; i < count; i++){
((uint8_t*)hostPtr)[i] = 0;
}
if (cudaMemcpy(hostPtr, devPtr, count, cudaMemcpyDeviceToHost) != cudaSuccess)
return -1;
for(i = 0; i < count; i++)
fprintf(stderr, "In pos %d: %d\n", i, ((uint8_t*)hostPtr)[i]);