Lines Matching refs:Ncv32u
92 __device__ Ncv32u warpScanInclusive(Ncv32u idata, volatile Ncv32u *s_Data)
101 const Ncv32u n = cv::cuda::device::shfl_up(idata, i);
108 Ncv32u pos = 2 * threadIdx.x - (threadIdx.x & (K_WARP_SIZE - 1));
123 __device__ __forceinline__ Ncv32u warpScanExclusive(Ncv32u idata, volatile Ncv32u *s_Data)
128 template <Ncv32u tiNumScanThreads>
129 __device__ Ncv32u scan1Inclusive(Ncv32u idata, volatile Ncv32u *s_Data)
134 Ncv32u warpResult = warpScanInclusive(idata, s_Data);
150 Ncv32u val = s_Data[threadIdx.x];
173 const Ncv32u MAX_GRID_DIM = 65535;
176 const Ncv32u NUM_THREADS_ANCHORSPARALLEL = 64;
198 texture<Ncv32u, 1, cudaReadModeElementType> texIImage;
201 __device__ HaarStage64 getStage(Ncv32u iStage, HaarStage64 *d_Stages)
208 __device__ HaarClassifierNode128 getClassifierNode(Ncv32u iNode, HaarClassifierNode128 *d_ClassifierNodes)
224 __device__ void getFeature(Ncv32u iFeature, HaarFeature64 *d_Features,
226 Ncv32u *rectX, Ncv32u *rectY, Ncv32u *rectWidth, Ncv32u *rectHeight)
243 __device__ Ncv32u getElemIImg(Ncv32u x, Ncv32u *d_IImg)
256 __device__ Ncv32u d_outMaskPosition;
259 __device__ void compactBlockWriteOutAnchorParallel(Ncv32u threadPassFlag, Ncv32u threadElem, Ncv32u *vectorOut)
263 __shared__ Ncv32u shmem[NUM_THREADS_ANCHORSPARALLEL * 2];
264 __shared__ Ncv32u numPassed;
265 __shared__ Ncv32u outMaskOffset;
267 Ncv32u incScan = scan1Inclusive<NUM_THREADS_ANCHORSPARALLEL>(threadPassFlag, shmem);
278 Ncv32u excScan = incScan - threadPassFlag;
297 __global__ void applyHaarClassifierAnchorParallel(Ncv32u *d_IImg, Ncv32u IImgStride,
298 Ncv32f *d_weights, Ncv32u weightsStride,
300 Ncv32u *d_inMask, Ncv32u *d_outMask,
301 Ncv32u mask1Dlen, Ncv32u mask2Dstride,
302 NcvSize32u anchorsRoi, Ncv32u startStageInc, Ncv32u endStageExc, Ncv32f scaleArea)
304 Ncv32u y_offs;
305 Ncv32u x_offs;
306 Ncv32u maskOffset;
307 Ncv32u outMaskVal;
370 for (Ncv32u iStage = startStageInc; iStage < endStageExc; iStage++)
375 Ncv32u numRootNodesInStage = curStage.getNumClassifierRootNodes();
376 Ncv32u curRootNodeOffset = curStage.getStartClassifierRootNodeOffset();
382 Ncv32u iNode = curRootNodeOffset;
390 Ncv32u curNodeFeaturesNum = featuresDesc.getNumFeatures();
391 Ncv32u iFeature = featuresDesc.getFeaturesOffset();
395 for (Ncv32u iRect=0; iRect<curNodeFeaturesNum; iRect++)
398 Ncv32u rectX, rectY, rectWidth, rectHeight;
403 Ncv32u iioffsTL = (y_offs + rectY) * IImgStride + (x_offs + rectX);
404 Ncv32u iioffsTR = iioffsTL + rectWidth;
405 Ncv32u iioffsBL = iioffsTL + rectHeight * IImgStride;
406 Ncv32u iioffsBR = iioffsBL + rectWidth;
408 Ncv32u rectSum = getElemIImg<tbCacheTextureIImg>(iioffsBR, d_IImg) -
485 __global__ void applyHaarClassifierClassifierParallel(Ncv32uNcv32u IImgStride,
486 Ncv32f *d_weights, Ncv32u weightsStride,
488 Ncv32u *d_inMask, Ncv32u *d_outMask,
489 Ncv32u mask1Dlen, Ncv32u mask2Dstride,
490 NcvSize32u anchorsRoi, Ncv32u startStageInc, Ncv32u endStageExc, Ncv32f scaleArea)
492 Ncv32u maskOffset = MAX_GRID_DIM * blockIdx.y + blockIdx.x;
499 Ncv32u outMaskVal = d_inMask[maskOffset];
500 Ncv32u y_offs = outMaskVal >> 16;
501 Ncv32u x_offs = outMaskVal & 0xFFFF;
506 for (Ncv32u iStage = startStageInc; iStage<endStageExc; iStage++)
513 Ncv32u curRootNodeOffset = curStage.getStartClassifierRootNodeOffset() + threadIdx.x;
516 Ncv32u numRootChunks = (numRootNodesInStage + NUM_THREADS_CLASSIFIERPARALLEL - 1) >> NUM_THREADS_CLASSIFIERPARALLEL_LOG2;
518 for (Ncv32u chunkId=0; chunkId<numRootChunks; chunkId++)
524 Ncv32u iNode = curRootNodeOffset;
530 Ncv32u curNodeFeaturesNum = featuresDesc.getNumFeatures();
531 Ncv32u iFeature = featuresDesc.getFeaturesOffset();
535 for (Ncv32u iRect=0; iRect<curNodeFeaturesNum; iRect++)
538 Ncv32u rectX, rectY, rectWidth, rectHeight;
543 Ncv32u iioffsTL = (y_offs + rectY) * IImgStride + (x_offs + rectX);
544 Ncv32u iioffsTR = iioffsTL + rectWidth;
545 Ncv32u iioffsBL = iioffsTL + rectHeight * IImgStride;
546 Ncv32u iioffsBR = iioffsBL + rectWidth;
548 Ncv32u rectSum = getElemIImg<tbCacheTextureIImg>(iioffsBR, d_IImg) -
620 Ncv32u outMaskOffset = atomicAdd(&d_outMaskPosition, 1);
630 __global__ void initializeMaskVector(Ncv32u *d_inMask, Ncv32u *d_outMask,
631 Ncv32u mask1Dlen, Ncv32u mask2Dstride,
632 NcvSize32u anchorsRoi, Ncv32u step)
634 Ncv32u y_offs = blockIdx.y;
635 Ncv32u x_offs = blockIdx.x * NUM_THREADS_ANCHORSPARALLEL + threadIdx.x;
636 Ncv32u outMaskOffset = y_offs * gridDim.x * blockDim.x + x_offs;
638 Ncv32u y_offs_upsc = step * y_offs;
639 Ncv32u x_offs_upsc = step * x_offs;
640 Ncv32u inMaskOffset = y_offs_upsc * mask2Dstride + x_offs_upsc;
642 Ncv32u outElem = OBJDET_MASK_ELEMENT_INVALID_32U;
669 Ncv32u *d_IImg;
670 Ncv32u IImgStride;
672 Ncv32u weightsStride;
676 Ncv32u *d_inMask;
677 Ncv32u *d_outMask;
678 Ncv32u mask1Dlen;
679 Ncv32u mask2Dstride;
681 Ncv32u startStageInc;
682 Ncv32u endStageExc;
687 Ncv32u *_d_IImg, Ncv32u _IImgStride,
688 Ncv32f *_d_weights, Ncv32u _weightsStride,
690 Ncv32u *_d_inMask, Ncv32u *_d_outMask,
691 Ncv32u _mask1Dlen, Ncv32u _mask2Dstride,
692 NcvSize32u _anchorsRoi, Ncv32u _startStageInc,
693 Ncv32u _endStageExc, Ncv32f _scaleArea) :
744 Ncv32u *d_IImg, Ncv32u IImgStride,
745 Ncv32f *d_weights, Ncv32u weightsStride,
747 Ncv32u *d_inMask, Ncv32u *d_outMask,
748 Ncv32u mask1Dlen, Ncv32u mask2Dstride,
749 NcvSize32u anchorsRoi, Ncv32u startStageInc,
750 Ncv32u endStageExc, Ncv32f scaleArea)
779 Ncv32u *d_IImg;
780 Ncv32u IImgStride;
782 Ncv32u weightsStride;
786 Ncv32u *d_inMask;
787 Ncv32u *d_outMask;
788 Ncv32u mask1Dlen;
789 Ncv32u mask2Dstride;
791 Ncv32u startStageInc;
792 Ncv32u endStageExc;
797 Ncv32u *_d_IImg, Ncv32u _IImgStride,
798 Ncv32f *_d_weights, Ncv32u _weightsStride,
800 Ncv32u *_d_inMask, Ncv32u *_d_outMask,
801 Ncv32u _mask1Dlen, Ncv32u _mask2Dstride,
802 NcvSize32u _anchorsRoi, Ncv32u _startStageInc,
803 Ncv32u _endStageExc, Ncv32f _scaleArea) :
850 Ncv32u *d_IImg, Ncv32u IImgStride,
851 Ncv32f *d_weights, Ncv32u weightsStride,
853 Ncv32u *d_inMask, Ncv32u *d_outMask,
854 Ncv32u mask1Dlen, Ncv32u mask2Dstride,
855 NcvSize32u anchorsRoi, Ncv32u startStageInc,
856 Ncv32u endStageExc, Ncv32f scaleArea)
882 Ncv32u *d_inMask;
883 Ncv32u *d_outMask;
884 Ncv32u mask1Dlen;
885 Ncv32u mask2Dstride;
887 Ncv32u step;
891 Ncv32u *_d_inMask, Ncv32u *_d_outMask,
892 Ncv32u _mask1Dlen, Ncv32u _mask2Dstride,
893 NcvSize32u _anchorsRoi, Ncv32u _step) :
925 Ncv32u *d_inMask, Ncv32u *d_outMask,
926 Ncv32u mask1Dlen, Ncv32u mask2Dstride,
927 NcvSize32u anchorsRoi, Ncv32u step)
942 Ncv32u getStageNumWithNotLessThanNclassifiers(Ncv32u N, HaarClassifierCascadeDescriptor &haar,
945 Ncv32u i = 0;
957 NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &integral,
959 NCVMatrixAlloc<Ncv32u> &d_pixelMask,
960 Ncv32u &numDetections,
968 Ncv32u pixelStep,
1018 NCVMatrixAlloc<Ncv32u> h_integralImage(cpuAllocator, integral.width, integral.height, integral.pitch);
1022 NCVMatrixAlloc<Ncv32u> h_pixelMask(cpuAllocator, d_pixelMask.width, d_pixelMask.height, d_pixelMask.pitch);
1029 NCVMatrixAlloc<Ncv32u> h_pixelMask_d(cpuAllocator, d_pixelMask.width, d_pixelMask.height, d_pixelMask.pitch);
1046 for (Ncv32u i=0; i<(Ncv32u)anchorsRoi.height; i++)
1048 for (Ncv32u j=0; j<d_pixelMask.stride(); j++)
1050 if ((i%pixelStep==0) && (j%pixelStep==0) && (j<(Ncv32u)anchorsRoi.width))
1068 NCVVectorReuse<Ncv32u> d_vecPixelMask(d_pixelMask.getSegment(), anchorsRoi.height * d_pixelMask.stride());
1071 NCVVectorAlloc<Ncv32u> d_vecPixelMaskTmp(gpuAllocator, static_cast<Ncv32u>(d_vecPixelMask.length()));
1074 NCVVectorAlloc<Ncv32u> hp_pool32u(cpuAllocator, 2);
1076 Ncv32u *hp_zero = &hp_pool32u.ptr()[0];
1077 Ncv32u *hp_numDet = &hp_pool32u.ptr()[1];
1091 NCVVector<Ncv32u> *d_ptrNowData = &d_vecPixelMask;
1092 NCVVector<Ncv32u> *d_ptrNowTmp = &d_vecPixelMaskTmp;
1094 Ncv32u szNppCompactTmpBuf;
1095 nppsStCompactGetSize_32u(static_cast<Ncv32u>(d_vecPixelMask.length()), &szNppCompactTmpBuf, devProp);
1107 cfdTexIImage = cudaCreateChannelDesc<Ncv32u>();
1131 Ncv32u stageStartAnchorParallel = 0;
1132 Ncv32u stageMiddleSwitch = getStageNumWithNotLessThanNclassifiers(NUM_THREADS_CLASSIFIERPARALLEL,
1134 Ncv32u stageEndClassifierParallel = haar.NumStages;
1141 const Ncv32u compactEveryNstage = bDoAtomicCompaction ? 7 : 1;
1142 Ncv32u curStop = stageStartAnchorParallel;
1143 std::vector<Ncv32u> pixParallelStageStops;
1155 Ncv32u pixParallelStageStopsIndex = 0;
1161 ncvAssertCUDAReturn(cudaMemcpyToSymbolAsync(d_outMaskPosition, hp_zero, sizeof(Ncv32u),
1181 static_cast<Ncv32u>(d_vecPixelMask.length()), d_pixelMask.stride(),
1188 ncvAssertCUDAReturn(cudaMemcpyFromSymbolAsync(hp_numDet, d_outMaskPosition, sizeof(Ncv32u),
1196 nppSt = nppsStCompact_32u(d_ptrNowTmp->ptr(), static_cast<Ncv32u>(d_vecPixelMask.length()),
1211 ncvAssertCUDAReturn(cudaMemcpyToSymbolAsync(d_outMaskPosition, hp_zero, sizeof(Ncv32u),
1244 ncvAssertCUDAReturn(cudaMemcpyFromSymbolAsync(hp_numDet, d_outMaskPosition, sizeof(Ncv32u),
1251 nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), static_cast<Ncv32u>(d_vecPixelMask.length()),
1276 ncvAssertCUDAReturn(cudaMemcpyToSymbolAsync(d_outMaskPosition, hp_zero, sizeof(Ncv32u),
1314 ncvAssertCUDAReturn(cudaMemcpyFromSymbolAsync(hp_numDet, d_outMaskPosition, sizeof(Ncv32u),
1339 ncvAssertCUDAReturn(cudaMemcpyToSymbolAsync(d_outMaskPosition, hp_zero, sizeof(Ncv32u),
1375 ncvAssertCUDAReturn(cudaMemcpyFromSymbolAsync(hp_numDet, d_outMaskPosition, sizeof(Ncv32u),
1409 Ncv32u fpu_oldcw, fpu_cw;
1413 Ncv32u numDetGold;
1429 for (Ncv32u i=0; i<std::max(numDetGold, numDetections) && bPass; i++)
1455 const Ncv32u NUM_GROW_THREADS = 128;
1458 __device__ __host__ NcvRect32u pixelToRect(Ncv32u pixel, Ncv32u width, Ncv32u height, Ncv32f scale)
1461 res.x = (Ncv32u)(scale * (pixel & 0xFFFF));
1462 res.y = (Ncv32u)(scale * (pixel >> 16));
1463 res.width = (Ncv32u)(scale * width);
1464 res.height = (Ncv32u)(scale * height);
1469 __global__ void growDetectionsKernel(Ncv32u *pixelMask, Ncv32u numElements,
1471 Ncv32u rectWidth, Ncv32u rectHeight, Ncv32f curScale)
1473 Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;
1474 Ncv32u elemAddr = blockId * NUM_GROW_THREADS + threadIdx.x;
1483 NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,
1484 Ncv32u numPixelMaskDetections,
1486 Ncv32u &totalDetections,
1487 Ncv32u totalMaxDetections,
1488 Ncv32u rectWidth,
1489 Ncv32u rectHeight,
1507 Ncv32u numDetsToCopy = numPixelMaskDetections;
1547 Ncv32u &dstNumRects,
1556 Ncv32u minNeighbors, //default 4
1558 Ncv32u pixelStep, //default 1
1559 Ncv32u flags, //default NCVPipeObjDet_Default
1605 Ncv32u integralWidth = d_srcImg.width() + 1;
1606 Ncv32u integralHeight = d_srcImg.height() + 1;
1608 NCVMatrixAlloc<Ncv32u> integral(gpuAllocator, integralWidth, integralHeight);
1615 NCVMatrixAlloc<Ncv32u> d_pixelMask(gpuAllocator, d_srcImg.width(), d_srcImg.height());
1618 NCVMatrixAlloc<Ncv32u> d_scaledIntegralImage(gpuAllocator, integralWidth, integralHeight);
1629 Ncv32u szTmpBufIntegral, szTmpBufSqIntegral;
1655 Ncv32u lastCheckedScale = 0;
1657 std::vector<Ncv32u> scalesVector;
1663 Ncv32u scale = (Ncv32u)scaleIter;
1709 for (Ncv32u i=0; i<scalesVector.size(); i++)
1711 Ncv32u scale = scalesVector[i];
1753 Ncv32u detectionsOnThisScale;
1764 NCVVectorReuse<Ncv32u> d_vecPixelMask(d_pixelMask.getSegment());
1770 static_cast<Ncv32u>(d_hypothesesIntermediate.length()),
1793 Ncv32u numStrongHypothesesNow = dstNumRects;
1805 for (Ncv32u j=1; j<numStrongHypothesesNow; j++)
1867 dstNumRects = static_cast<Ncv32u>(d_dstRects.length());
1904 NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,
1906 NCVMatrixAlloc<Ncv32u> &h_pixelMask,
1907 Ncv32u &numDetections,
1914 Ncv32u pixelStep,
1944 for (Ncv32u i=0; i<anchorsRoi.height; i++)
1946 for (Ncv32u j=0; j<h_pixelMask.stride(); j++)
1954 for (Ncv32u iStage = 0; iStage < haar.NumStages; iStage++)
1957 Ncv32u numRootNodesInStage = h_HaarStages.ptr()[iStage].getNumClassifierRootNodes();
1958 Ncv32u curRootNodeOffset = h_HaarStages.ptr()[iStage].getStartClassifierRootNodeOffset();
1979 Ncv32u curNodeOffset = curRootNodeOffset;
1985 Ncv32u curNodeFeaturesNum = curFeatDesc.getNumFeatures();
1986 Ncv32u curNodeFeaturesOffs = curFeatDesc.getFeaturesOffset();
1989 for (Ncv32u iRect=0; iRect<curNodeFeaturesNum; iRect++)
1992 Ncv32u rectX, rectY, rectWidth, rectHeight;
1995 Ncv32u iioffsTL = (i + rectY) * h_integralImage.stride() + (j + rectX);
1996 Ncv32u iioffsTR = iioffsTL + rectWidth;
1997 Ncv32u iioffsBL = iioffsTL + rectHeight * h_integralImage.stride();
1998 Ncv32u iioffsBR = iioffsBL + rectWidth;
2000 Ncv32u iivalTL = h_integralImage.ptr()[iioffsTL];
2001 Ncv32u iivalTR = h_integralImage.ptr()[iioffsTR];
2002 Ncv32u iivalBL = h_integralImage.ptr()[iioffsBL];
2003 Ncv32u iivalBR = h_integralImage.ptr()[iioffsBR];
2004 Ncv32u rectSum = iivalBR - iivalBL + iivalTL - iivalTR;
2054 Ncv32u i = 0;
2068 NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,
2069 Ncv32u numPixelMaskDetections,
2071 Ncv32u &totalDetections,
2072 Ncv32u totalMaxDetections,
2073 Ncv32u rectWidth,
2074 Ncv32u rectHeight,
2087 Ncv32u numDetsToCopy = numPixelMaskDetections;
2100 for (Ncv32u i=0; i<numDetsToCopy; i++)
2134 Ncv32u curMaxTreeDepth = 0;
2154 curStage.setStartClassifierRootNodeOffset(static_cast<Ncv32u>(haarClassifierNodes.size()));
2161 Ncv32u nodeId = 0;
2185 Ncv32u leftNodeOffset = tree->left[n];
2186 nodeLeft.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + leftNodeOffset - 1));
2201 Ncv32u rightNodeOffset = tree->right[n];
2202 nodeRight.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + rightNodeOffset - 1));
2207 Ncv32u tiltedVal = feature->tilted;
2210 Ncv32u featureId = 0;
2213 Ncv32u rectX = feature->rect[l].r.x;
2214 Ncv32u rectY = feature->rect[l].r.y;
2215 Ncv32u rectWidth = feature->rect[l].r.width;
2216 Ncv32u rectHeight = feature->rect[l].r.height;
2234 featureId, static_cast<Ncv32u>(haarFeatures.size()) - featureId);
2260 haar.NumStages = static_cast<Ncv32u>(haarStages.size());
2261 haar.NumClassifierRootNodes = static_cast<Ncv32u>(haarClassifierNodes.size());
2262 haar.NumClassifierTotalNodes = static_cast<Ncv32u>(haar.NumClassifierRootNodes + h_TmpClassifierNotRootNodes.size());
2263 haar.NumFeatures = static_cast<Ncv32u>(haarFeatures.size());
2266 Ncv32u offsetRoot = static_cast<Ncv32u>(haarClassifierNodes.size());
2267 for (Ncv32u i=0; i<haarClassifierNodes.size(); i++)
2274 Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
2282 Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
2288 for (Ncv32u i=0; i<h_TmpClassifierNotRootNodes.size(); i++)
2295 Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
2303 Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
2329 Ncv32u fileVersion;
2330 readCount = fread(&fileVersion, sizeof(Ncv32u), 1, fp);
2333 Ncv32u fsize;
2334 readCount = fread(&fsize, sizeof(Ncv32u), 1, fp);
2337 Ncv32u fsizeActual = ftell(fp);
2342 Ncv32u dataOffset = 0;
2350 haar.NumStages = *(Ncv32u *)(&fdata[0]+dataOffset);
2351 dataOffset += sizeof(Ncv32u);
2352 haar.NumClassifierRootNodes = *(Ncv32u *)(&fdata[0]+dataOffset);
2353 dataOffset += sizeof(Ncv32u);
2354 haar.NumClassifierTotalNodes = *(Ncv32u *)(&fdata[0]+dataOffset);
2355 dataOffset += sizeof(Ncv32u);
2356 haar.NumFeatures = *(Ncv32u *)(&fdata[0]+dataOffset);
2357 dataOffset += sizeof(Ncv32u);
2369 Ncv32u szStages = haar.NumStages * sizeof(HaarStage64);
2370 Ncv32u szClassifiers = haar.NumClassifierTotalNodes * sizeof(HaarClassifierNode128);
2371 Ncv32u szFeatures = haar.NumFeatures * sizeof(HaarFeature64);
2384 NCVStatus ncvHaarGetClassifierSize(const cv::String &filename, Ncv32u &numStages,
2385 Ncv32u &numNodes, Ncv32u &numFeatures)
2397 Ncv32u fileVersion;
2398 readCount = fread(&fileVersion, sizeof(Ncv32u), 1, fp);
2402 Ncv32u tmp;
2403 readCount = fread(&numStages, sizeof(Ncv32u), 1, fp);
2405 readCount = fread(&tmp, sizeof(Ncv32u), 1, fp);
2407 readCount = fread(&numNodes, sizeof(Ncv32u), 1, fp);
2409 readCount = fread(&numFeatures, sizeof(Ncv32u), 1, fp);
2495 Ncv32u szStages = haar.NumStages * sizeof(HaarStage64);
2496 Ncv32u szClassifiers = haar.NumClassifierTotalNodes * sizeof(HaarClassifierNode128);
2497 Ncv32u szFeatures = haar.NumFeatures * sizeof(HaarFeature64);
2499 Ncv32u dataOffset = 0;
2504 *(Ncv32u *)(&fdata[0]+dataOffset) = NVBIN_HAAR_VERSION;
2508 *(Ncv32u *)(&fdata[0]+dataOffset) = haar.NumStages;
2509 dataOffset += sizeof(Ncv32u);
2510 *(Ncv32u *)(&fdata[0]+dataOffset) = haar.NumClassifierRootNodes;
2511 dataOffset += sizeof(Ncv32u);
2512 *(Ncv32u *)(&fdata[0]+dataOffset) = haar.NumClassifierTotalNodes;
2513 dataOffset += sizeof(Ncv32u);
2514 *(Ncv32u *)(&fdata[0]+dataOffset) = haar.NumFeatures;
2515 dataOffset += sizeof(Ncv32u);
2529 Ncv32u fsize = dataOffset;
2534 dataOffset = sizeof(Ncv32u);
2535 *(Ncv32u *)(&fdata[0]+dataOffset) = fsize;