void KernelComputeStatusVisibleUnitsRBM(dim3 & gridSize, int blockSize, cudafloat * h, cudafloat * weights, cudafloat * a, cudafloat * v, float * randomValues, int J);
void KernelComputeStatusHiddenUnitsRBM(dim3 & gridSize, int blockSize, cudafloat * v, cudafloat * weights, cudafloat * b, cudafloat * h, float * randomValues, int I);
KERNEL ComputeStatusHiddenUnitsSmallRBM(cudafloat * v, cudafloat * weights, cudafloat * b, cudafloat * h, float * randomValues);
KERNEL ComputeStatusVisibleUnitsSmallRBM(cudafloat * h, cudafloat * weights, cudafloat * a, cudafloat * v, float * randomValues);
void RBM::RandomizeWeights() {
int nWeights = w.Elements();
cudafloat * weights = w.HostPointer();
for (int i = 0; i < nWeights; i++) weights[i] = CUDA_VALUE(2.0) * stdWeights * ((cudafloat) rand() / RAND_MAX) - stdWeights;
w.UpdateDevice();
int blockSize = NumberThreadsPerBlockThatBestFit(nWeights);
int blocks = NumberBlocks(nWeights, blockSize);
#ifdef USE_STEP_SIZE
InitBiasDeltasRBM<<<blocks, blockSize>>>(b.DevicePointer(), INITIAL_BIAS_HIDDEN_UNITS, lastDeltaW.Pointer(), lastDeltaB.Pointer(), lastDeltaWithoutLearningMomentumW.Pointer(), lastDeltaWithoutLearningMomentumB.Pointer(), learningRateW.Pointer(), learningRateB.Pointer(), initialLearningRate, nWeights, J);
#else
InitBiasDeltasRBM<<<blocks, blockSize>>>(b.DevicePointer(), INITIAL_BIAS_HIDDEN_UNITS, lastDeltaW.Pointer(), lastDeltaB.Pointer(), nWeights, J);
learningRate = initialLearningRate;
#endif
blocks = NumberBlocks(I, inputsBlockSize);
#ifdef USE_STEP_SIZE
InitInputBiasDeltasRBM<<<blocks, inputsBlockSize>>>(v.Pointer(), a.DevicePointer(), lastDeltaA.Pointer(), lastDeltaWithoutLearningMomentumA.Pointer(), learningRateA.Pointer(), initialLearningRate, I, samples);
#else
InitInputBiasDeltasRBM<<<blocks, inputsBlockSize>>>(v.Pointer(), a.DevicePointer(), lastDeltaA.Pointer(), I, samples);
#endif
epoch = 0;
}
void RBM::ComputeStatusUnits(cudafloat * v, cudafloat * h, cudafloat * v_reconstructed) {
if (proportionRandomValuesUsed == proportionRandomValuesGenerated) {
Random::Fill(randomValues);
proportionRandomValuesUsed = 0;
}
int connections = w.Elements();
float * rnd = (v_reconstructed == nullptr) ? nullptr : (randomValues.Pointer() + (proportionRandomValuesUsed * randomValuesNeededPerEpoch));
if(connections > MAX_THREADS_PER_BLOCK) {
KernelComputeStatusHiddenUnitsRBM(dimJsamples, inputsBlockSize, v, w.DevicePointer(), b.DevicePointer(), h, rnd, I);
} else {
ComputeStatusHiddenUnitsSmallRBM<<<samples, dimIJ, connections * sizeof(cudafloat)>>>(v, w.DevicePointer(), b.DevicePointer(), h, rnd);
}