中文亚洲精品无码_熟女乱子伦免费_人人超碰人人爱国产_亚洲熟妇女综合网

當(dāng)前位置: 首頁 > news >正文

web個人網(wǎng)站設(shè)計方案互聯(lián)網(wǎng)

web個人網(wǎng)站設(shè)計方案,互聯(lián)網(wǎng),常州做網(wǎng)站基本流程,海淀網(wǎng)站制作1. 背景&#xff1a; 項目中使用到了紋理進(jìn)行插值的加速&#xff0c;因此記錄一些自己在學(xué)習(xí)tex2D的一些過程 2. 代碼&#xff1a; #include "cuda_runtime.h" #include "device_launch_parameters.h" #include <assert.h> #include <stdio.h>…

1. 背景:

????????項目中使用到了紋理進(jìn)行插值的加速,因此記錄一些自己在學(xué)習(xí)tex2D的一些過程

2. 代碼:

????????

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <assert.h>
#include <stdio.h>
#include <iostream>
#include <cuda_fp16.h>
#include <vector>void Data2Half(half* pDst, const int16_t* pSrc, const int Ndots);
static __global__ void Tex2DTest(cudaTextureObject_t p_rf_data, float* pfRes1, float* pfRes2);static __global__ void data2half(half* pDst, const int16_t* pSrc, const int Ndots)
{const int tid = blockIdx.x * blockDim.x + threadIdx.x;if (tid >= Ndots)return;pDst[tid] = __short2half_rn(pSrc[tid]);
}cudaTextureObject_t m_tex   = 0;
cudaArray* m_pRFData        = nullptr;
int16_t* m_i16RFDataBuffer  = nullptr; // 設(shè)備端的RF數(shù)據(jù)
half* m_pHalfRFDataCache    = nullptr; // 轉(zhuǎn)換為半浮點型的RF數(shù)據(jù)緩存,用于將SHORT類型轉(zhuǎn)換為FLOAT類型int main()
{const int nRx     = 2;const int Nsample = 2;const int IQ      = 1;cudaError_t error;cudaChannelFormatDesc channelDesc = cudaCreateChannelDescHalf();error                             = cudaMallocArray(&m_pRFData, &channelDesc, nRx * IQ, Nsample, cudaArrayTextureGather);assert(m_pRFData);cudaResourceDesc texRes;memset(&texRes, 0, sizeof(cudaResourceDesc));texRes.resType         = cudaResourceTypeArray;texRes.res.array.array = m_pRFData;cudaTextureDesc texDescr;memset(&texDescr, 0, sizeof(cudaTextureDesc));texDescr.normalizedCoords = false;texDescr.filterMode       = cudaFilterModeLinear;  // 這里很重要texDescr.addressMode[0]   = cudaAddressModeBorder;texDescr.addressMode[1]   = cudaAddressModeBorder;error = cudaCreateTextureObject(&m_tex, &texRes, &texDescr, NULL);//int16_t pi16Src[nRx * Nsample * IQ] = {1, 11, 2, 22,//                                    3, 33, 4, 44, //                                    5, 55, 6, 66, //                                    7, 77, 8, 88};//int16_t pi16Src[nRx * Nsample * IQ] = { 1, 11, 2, 22,//                                        3, 33, 4, 44};int16_t pi16Src[nRx * Nsample * IQ] = { 1,2,3,4 };error = cudaMalloc(&m_i16RFDataBuffer, sizeof(int16_t) * nRx * IQ * Nsample);error = cudaMemcpy(m_i16RFDataBuffer, pi16Src, sizeof(int16_t) * nRx * IQ * Nsample, cudaMemcpyHostToDevice);error = cudaMalloc(&m_pHalfRFDataCache, sizeof(half) * nRx * IQ * Nsample);Data2Half(m_pHalfRFDataCache, m_i16RFDataBuffer, nRx * IQ * Nsample);error = cudaMemcpy2DToArray(m_pRFData, 0, 0, m_pHalfRFDataCache, sizeof(half) * nRx * IQ, sizeof(half) * nRx * IQ, Nsample, cudaMemcpyDeviceToDevice);float* pf_res1 = nullptr;float* pf_res2 = nullptr;error = cudaMalloc(&pf_res1, nRx * Nsample * sizeof(float)); cudaMemset(pf_res1, 0, nRx * Nsample * sizeof(float));error = cudaMalloc(&pf_res2, nRx * Nsample * sizeof(float)); cudaMemset(pf_res2, 0, nRx * Nsample * sizeof(float));error = cudaGetLastError();dim3 block_dim = dim3(1, 1);dim3 grid_dim  = dim3(1, 1);Tex2DTest << <grid_dim, block_dim >> > (m_tex, pf_res1, pf_res2);cudaDeviceSynchronize();std::vector<float> vf_res_1(nRx * Nsample, 0);std::vector<float> vf_res_2(nRx * Nsample, 0);cudaMemcpy(vf_res_1.data(), pf_res1, sizeof(float) * vf_res_1.size(), cudaMemcpyDeviceToHost);cudaMemcpy(vf_res_2.data(), pf_res2, sizeof(float) * vf_res_2.size(), cudaMemcpyDeviceToHost);return 0;
}void Data2Half(half* pDst, const int16_t* pSrc, const int Ndots)
{dim3 block = dim3(512, 1);dim3 grid = dim3((Ndots - 1) / block.x + 1, 1);data2half << < grid, block >> > (pDst, pSrc, Ndots);
}static __global__ void Tex2DTest(cudaTextureObject_t p_rf_data, float *pfRes1, float *pfRes2)
{for (size_t y = 0; y < 2; ++y){for (size_t x = 0; x < 2; ++x) {float value = tex2D<float>(p_rf_data, x,     y);//pfRes1[y * 4 + y] = printf("x: %f\n", value);}}
}

3. 輸出分析:

可以看到執(zhí)行結(jié)果是

為什么呢?

原因是因為tex2D插值導(dǎo)致的,上面測試數(shù)據(jù)是

1? 2

3? ?4

那在進(jìn)行插值的時候會變成

0? 0? ?0? ?0

0? ?1? ?2? 0

0? ?3? ?4? 0

每個點的輸出都是當(dāng)前前和左上角3個點進(jìn)行平均計算出來的

比如第一個輸出計算為:(1 + 0 + 0 + 0)/4 = 0.25

最后一個輸出的計算為:(1 + 2 + 3 + 4) / 4 = 2.5

4. 問題

? ? ? ? 上面只是單獨數(shù)據(jù)實數(shù)點的計算,如果我的數(shù)據(jù)集合是復(fù)數(shù)怎么辦?

? ? ? ? 比如一組2 * 2大小的數(shù)據(jù)對

? ? ? ? (1, 2, 3, 4;

? ? ? ? ? ?5,? ?6, 7, 8)

? ? ? ? 數(shù)據(jù)實際表示含義是

? ? ? ? ?(1 + j * 2,? ?3 + j * 4;

? ? ? ? ? ? 5 + j * 6,? ?7 + j * 8)

? ? ? ? 這種情況下怎么做到正確插值呢,比如第一個實數(shù)點的輸出結(jié)果應(yīng)該是

? ? ? ? ?(1 + 0 + 0 + 0)/ 4

? ? ? ? ? ?最后一個實數(shù)點的輸出應(yīng)該是:

? ? ? ? ? ? (1 + 3 + 5 + 7) / 4

? ? ? ? ? ?同理,最后一個虛數(shù)點的輸出應(yīng)該是:
? ? ? ? ? ?(2 + 4 + 6 + 8)/ 4

5. 解決

? ? ? ? ?

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <assert.h>
#include <stdio.h>
#include <iostream>
#include <cuda_fp16.h>
#include <vector>void Data2Half(half* pDst, const int16_t* pSrc, const int Ndots);
static __global__ void Tex2DTest(cudaTextureObject_t p_rf_data, float* pfRes1, float* pfRes2);static __global__ void data2half(half* pDst, const int16_t* pSrc, const int Ndots)
{const int tid = blockIdx.x * blockDim.x + threadIdx.x;if (tid >= Ndots)return;pDst[tid] = __short2half_rn(pSrc[tid]);
}cudaTextureObject_t m_tex = 0;
cudaArray* m_pRFData = nullptr;
int16_t* m_i16RFDataBuffer = nullptr; // 設(shè)備端的RF數(shù)據(jù)
half* m_pHalfRFDataCache = nullptr; // 轉(zhuǎn)換為半浮點型的RF數(shù)據(jù)緩存,用于將SHORT類型轉(zhuǎn)換為FLOAT類型using namespace std;int main()
{const int nRx = 2;const int Nsample = 2;const int IQ = 2;cudaError_t error;cudaChannelFormatDesc channelDesc = cudaCreateChannelDescHalf2();error = cudaMallocArray(&m_pRFData, &channelDesc, nRx, Nsample, cudaArrayTextureGather);assert(m_pRFData);cudaResourceDesc texRes;memset(&texRes, 0, sizeof(cudaResourceDesc));texRes.resType = cudaResourceTypeArray;texRes.res.array.array = m_pRFData;cudaTextureDesc texDescr;memset(&texDescr, 0, sizeof(cudaTextureDesc));texDescr.normalizedCoords = false;texDescr.filterMode = cudaFilterModeLinear;  // 這里很重要texDescr.addressMode[0] = cudaAddressModeBorder;texDescr.addressMode[1] = cudaAddressModeBorder;error = cudaCreateTextureObject(&m_tex, &texRes, &texDescr, NULL);//int16_t pi16Src[nRx * Nsample * IQ] = {1, 11, 2, 22,//                                    3, 33, 4, 44, //                                    5, 55, 6, 66, //                                    7, 77, 8, 88};//int16_t pi16Src[nRx * Nsample * IQ] = { 1, 11, 2, 22,//                                        3, 33, 4, 44};int16_t pi16Src[nRx * Nsample * IQ] = { 1, 2, 3, 4,5, 6, 7, 8 };error = cudaMalloc(&m_i16RFDataBuffer, sizeof(int16_t) * nRx * IQ * Nsample);error = cudaMemcpy(m_i16RFDataBuffer, pi16Src, sizeof(int16_t) * nRx * IQ * Nsample, cudaMemcpyHostToDevice);error = cudaMalloc(&m_pHalfRFDataCache, sizeof(half) * nRx * IQ * Nsample);Data2Half(m_pHalfRFDataCache, m_i16RFDataBuffer, nRx * IQ * Nsample);error = cudaMemcpy2DToArray(m_pRFData, 0, 0, m_pHalfRFDataCache, sizeof(half2) * nRx, sizeof(half2) * nRx, Nsample, cudaMemcpyDeviceToDevice);float* pf_res1 = nullptr;float* pf_res2 = nullptr;error = cudaMalloc(&pf_res1, nRx * Nsample * sizeof(float)); cudaMemset(pf_res1, 0, nRx * Nsample * sizeof(float));error = cudaMalloc(&pf_res2, nRx * Nsample * sizeof(float)); cudaMemset(pf_res2, 0, nRx * Nsample * sizeof(float));error = cudaGetLastError();dim3 block_dim = dim3(1, 1);dim3 grid_dim  = dim3(1, 1);Tex2DTest << <grid_dim, block_dim >> > (m_tex, pf_res1, pf_res2);cudaDeviceSynchronize();std::vector<float> vf_res_1(nRx * Nsample, 0);std::vector<float> vf_res_2(nRx * Nsample, 0);cudaMemcpy(vf_res_1.data(), pf_res1, sizeof(float) * vf_res_1.size(), cudaMemcpyDeviceToHost);cudaMemcpy(vf_res_2.data(), pf_res2, sizeof(float) * vf_res_2.size(), cudaMemcpyDeviceToHost);return 0;
}void Data2Half(half* pDst, const int16_t* pSrc, const int Ndots)
{dim3 block = dim3(512, 1);dim3 grid = dim3((Ndots - 1) / block.x + 1, 1);data2half << < grid, block >> > (pDst, pSrc, Ndots);
}static __global__ void Tex2DTest(cudaTextureObject_t p_rf_data, float* pfRes1, float* pfRes2)
{for (size_t y = 0; y < 2; ++y){for (size_t x = 0; x < 2; ++x){float2 value = tex2D<float2>(p_rf_data, x, y);//pfRes1[y * 4 + y] = printf("x: %f, y: %f", value.x, value.y);// printf("x: %f, y: %f\n", value.x, value.y);}printf("\n");}
}

其實關(guān)鍵是在tex2D的構(gòu)造

然后按照half2的方式進(jìn)行排布就好了

http://www.risenshineclean.com/news/2765.html

相關(guān)文章:

  • 做網(wǎng)站的系統(tǒng)設(shè)計網(wǎng)絡(luò)營銷團隊
  • 免費推廣網(wǎng)站怎么做小學(xué)生一分鐘新聞播報
  • 營銷型網(wǎng)站建設(shè)便宜數(shù)字營銷公司排行榜
  • 關(guān)于做網(wǎng)站的策劃書電商平臺排行榜
  • 大學(xué)網(wǎng)頁設(shè)計課程鄭州seo外包服務(wù)
  • wordpress 裝主題鍵詞優(yōu)化排名
  • 一個優(yōu)秀的網(wǎng)站友情鏈接怎么交換
  • 貴陽網(wǎng)站建設(shè)方舟網(wǎng)絡(luò)最新新聞熱點話題
  • 免費建電子商務(wù)網(wǎng)站怎樣優(yōu)化關(guān)鍵詞到首頁
  • wordpress gofair汕尾網(wǎng)站seo
  • tp做網(wǎng)站引流app推廣軟件
  • 網(wǎng)站做微信鏈接怎么做企業(yè)網(wǎng)絡(luò)推廣服務(wù)
  • 上海建設(shè)交通黨建網(wǎng)站網(wǎng)絡(luò)推廣都需要做什么
  • 網(wǎng)站設(shè)計培訓(xùn)課程廣州seo網(wǎng)站推廣平臺
  • 網(wǎng)站建設(shè)優(yōu)化是什么鬼蘇州seo優(yōu)化
  • 如何把網(wǎng)站免費做靠前google代理
  • 網(wǎng)站跳轉(zhuǎn)是什么意思廣州最近爆發(fā)什么病毒
  • 網(wǎng)站建設(shè)價格裙網(wǎng)上互聯(lián)網(wǎng)推廣
  • 網(wǎng)站建設(shè) 軟文微信營銷案例
  • seo免費網(wǎng)站建設(shè)用今日頭條導(dǎo)入自己網(wǎng)站外鏈
  • 成都專業(yè)網(wǎng)站推廣公司網(wǎng)絡(luò)營銷的特點有幾個
  • 網(wǎng)站建設(shè)有哪些需要注意的關(guān)鍵細(xì)節(jié)百度推廣客服中心
  • 特價做網(wǎng)站谷歌搜索引擎入口2022
  • 網(wǎng)站開發(fā)參考文獻(xiàn)期刊seo常用工具網(wǎng)站
  • wordpress修改主題文件做seo需要哪些知識
  • 重慶企業(yè)vi設(shè)計公司安卓優(yōu)化大師下載安裝
  • 牛網(wǎng)網(wǎng)站建設(shè)互聯(lián)網(wǎng)營銷推廣怎么做
  • 蘇州市住房和城鄉(xiāng)建設(shè)局網(wǎng)站首頁手機如何做網(wǎng)站
  • 查詢建筑企業(yè)網(wǎng)站太原seo快速排名
  • 網(wǎng)站url和網(wǎng)站域名重慶森林經(jīng)典臺詞 鳳梨罐頭