Error message: the provided PTX was compiled with an unsupported toolchain.
When I started playing with CUDA and nvcc
, I got the error message the provided PTX was compiled with an unsupported toolchain. when I compiled the following program with nvcc err.cu -o err
and executed it:
#include <stdio.h>
__global__ void testKernel(int nr) {
printf("kernel received parameter '%d'\n", nr);
}
int main(void) {
testKernel<<< 1,1 >>>(42);
cudaDeviceSynchronize();
cudaError_t error = cudaGetLastError();
if(error != cudaSuccess) {
printf("CUDA error: %s\n", cudaGetErrorString(error));
exit(-1);
}
return 0;
}
When I used the compiler flag -arch=native
, the error went away:
$ nvcc -arch=native err.cu -o err
$ ./err
kernel received parameter '42'
show-device-properties.cu
#include <stdio.h>
#include <cuda.h>
int main() {
int deviceCount;
cudaGetDeviceCount(&deviceCount);
for (int dev = 0; dev < deviceCount; ++dev) {
cudaDeviceProp prp;
cudaGetDeviceProperties(&prp, dev);
printf("Device %d (%s)\n", dev, prp.name);
printf(" Compute capability: %d.%d.\n", prp.major, prp.minor);
printf(" Multiprocessors: %d\n" , prp.multiProcessorCount);
printf(" Concur. kernels: %d\n" , prp.concurrentKernels);
printf(" 32-bit regs/block: %d\n" , prp.regsPerBlock);
printf(" Shared mem/block: %d\n" , prp.sharedMemPerBlock);
printf(" L2 cache size: %d\n" , prp.l2CacheSize);
printf(" Global memory: %d\n" , prp.totalConstMem);
// printf(" ? : %d\n" , prp.reservedSharedMemPerBlock );
}
}