The Problem
I have prepared one sample CUDA code using the constant memory. I can run this in cuda 4.2 successfully but I get "invalid device symbol" when I compile using the CUDA 5. I have attached the sample code here.
The Code
#include <iostream>
#include <stdio.h>
#include <cuda_runtime.h>
#include <cuda.h>
struct CParameter
{
    int A;  
    float B;
    float C;
    float D;
};
__constant__ CParameter * CONSTANT_PARAMETER;   
#define PARAMETER "CONSTANT_PARAMETER"
bool ERROR_CHECK(cudaError_t Status)
{
    if(Status != cudaSuccess)
    {
        printf(cudaGetErrorString(Status));
        return false;
    }   
    return true;
}
// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
  int idx = blockIdx.x * blockDim.x + threadIdx.x;
  if (idx<N)
  {
      a[idx] = CONSTANT_PARAMETER->A * a[idx];
  }
}
////Main Function/////
int main(void)
{
    /////Variable Definition
    const int N = 10;
    size_t size = N * sizeof(float);
    cudaError_t Status = cudaSuccess;
    CParameter * m_dParameter;
    CParameter * m_hParameter;
    float * m_D;
    float * m_H;
    //Memory Allocation Host
    m_hParameter = new CParameter;
    m_H = new float[N];
    //Memory Allocation Device
    cudaMalloc((void **) &m_D, size);
    cudaMalloc((void**)&m_dParameter,sizeof(CParameter));
    ////Data Initialization
    for (int i=0; i<N; i++) 
        m_H[i] = (float)i;
    m_hParameter->A = 5;
    m_hParameter->B = 3;
    m_hParameter->C = 98;
    m_hParameter->D = 100;
    //Memory Copy from Host To Device
    Status = cudaMemcpy(m_D, m_H, size, cudaMemcpyHostToDevice);
    ERROR_CHECK(Status);
    Status = cudaMemcpy(m_dParameter,m_hParameter,sizeof(CParameter),cudaMemcpyHostToDevice);
    ERROR_CHECK(Status);        
    Status = cudaMemcpyToSymbol(PARAMETER, &m_dParameter, sizeof(m_dParameter));
    ERROR_CHECK(Status);
    // Do calculation on device:
    int block_size = 4;
    int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
    square_array <<<n_blocks, block_size>>>(m_D,N);
    // Retrieve result from device and store it in host array
    cudaMemcpy(m_H, m_D, sizeof(float)*N, cudaMemcpyDeviceToHost);
    // Print results
    for (int i=0; i<N; i++)
        printf("%d %f\n", i, m_H[i]);
    // Cleanup
    free(m_H);
    free(m_hParameter);
    cudaFree(m_dParameter);
    cudaFree(m_D);
    return 0;   
}
I have tried WINDOWS: CUDA 5.0 Production Release and the Graphics card is GTX 590.
Any help will be appreciated.
In an effort to avoid being "Stringly Typed", the use of character strings to refer to device symbols was deprecated in CUDA runtime API functions in CUDA 4.1, and removed in CUDA 5.0.
The CUDA 5 release notes read:
** The use of a character string to indicate a device symbol, which was possible with certain API functions, is no longer supported. Instead, the symbol should be used directly.
If you change your code to the following, it should work.
Status = cudaMemcpyToSymbol(CONSTANT_PARAMETER, &m_dParameter, sizeof(m_dParameter));
ERROR_CHECK(Status);

From the CUDA 5.0 Release Notes:
** The use of a character string to indicate a device symbol, which was possible with certain API functions, is no longer supported. Instead, the symbol should be used directly. "
These API functions still exist, but they accept the target symbol argument only as a bare identifier now, not as either a bare identifier or a string literal naming an ident. E.g.
__ device__ __ constant__ type ident;
main() { cudaMemcpyToSymbol("ident", ...); } // no longer valid, returns cudaErrorInvalidSymbol
main() { cudaMemcpyToSymbol(ident, ...); }   // valid
So get rid of this:
#define PARAMETER "CONSTANT_PARAMETER"
And change this:
Status = cudaMemcpyToSymbol(PARAMETER, &m_dParameter, sizeof(m_dParameter)); 
To this:
Status = cudaMemcpyToSymbol(CONSTANT_PARAMETER, &m_dParameter, sizeof(m_dParameter)); 
And I think it will work.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With