品牌推廣網(wǎng)站怎樣做關(guān)鍵詞優(yōu)化排名查詢
一、rbd內(nèi)核驅(qū)動寫入流程
1)初始化
首先是rbd驅(qū)動的初始化工作:包括驗證libceph的兼容性,分配內(nèi)存,在sysfs中創(chuàng)建塊設(shè)備控制文件、創(chuàng)建工作隊列rbd_wq并調(diào)用INIT_WORK初始化它
module_init(rbd_init);
static int __init rbd_init(void)
{
if (!libceph_compatible(NULL)) { //兼容性
rbd_warn(NULL, "libceph incompatibility (quitting)");
return -EINVAL;
}
rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0); //創(chuàng)建工作隊列
if (!rbd_wq)
{rc = -ENOMEM;goto err_out_slab;
}
rc = rbd_slab_init(); //初始化內(nèi)存分配器
if (rc)
return rc;
.......
rc = rbd_sysfs_init(); //創(chuàng)建/sys/bus/rbd/
if (rc)
goto err_out_blkdev;
...}static int rbd_init_request(void *data, struct request *rq,unsigned int hctx_idx, unsigned int request_idx,unsigned int numa_node)
{struct work_struct *work = blk_mq_rq_to_pdu(rq);INIT_WORK(work, rbd_queue_workfn); //初始化一個work,work通過rbd_queue_workfn進行處理return 0;
}
2)塊設(shè)備創(chuàng)建、工作隊列中啟動work
添加塊設(shè)備,首先創(chuàng)建一個rbd client用來通信,然后選擇一個pool存儲池去創(chuàng)建rbd設(shè)備,創(chuàng)建完成后調(diào)用rbd_dev_device_setup初始化rbd設(shè)備,在初始化塊設(shè)備的時候會啟動工作隊列rbd_wq,并將通用塊設(shè)備層的請求轉(zhuǎn)化為一個work添加到rbd_wq工作隊列中,然后由cpu調(diào)度執(zhí)行工作隊列rbd_wq中的work,work對應(yīng)的處理函數(shù)為rbd_queue_workfn,該work用于處理通用塊設(shè)備層的IO請求。
啟動work的調(diào)用關(guān)系: rbd_dev_device_setup → rbd_init_disk? → rbd_mq_ops → rbd_init_request → rbd_queue_workfn處理函數(shù)
static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,const struct blk_mq_queue_data *bd)
{struct request *rq = bd->rq;struct work_struct *work = blk_mq_rq_to_pdu(rq); //通用塊設(shè)備層請求轉(zhuǎn)為workqueue_work(rbd_wq, work); //將work加入到工作隊列,工作隊列中的work由cpu調(diào)度處理return BLK_MQ_RQ_QUEUE_OK;
}static ssize_t rbd_add(struct bus_type *bus,const char *buf,size_t count)
{if (single_major)return -EINVAL;return do_rbd_add(bus, buf, count);
}static ssize_t do_rbd_add(struct bus_type *bus,const char *buf,size_t count)
{.....rbdc = rbd_get_client(ceph_opts); //獲取或創(chuàng)建rbd_clientif (IS_ERR(rbdc)) {rc = PTR_ERR(rbdc);goto err_out_args;}/* pick the pool */rc = rbd_add_get_pool_id(rbdc, spec->pool_name); //選擇存儲池if (rc < 0) {if (rc == -ENOENT)pr_info("pool %s does not exist\n", spec->pool_name);goto err_out_client;}spec->pool_id = (u64)rc;rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts); //創(chuàng)建rbd設(shè)備down_write(&rbd_dev->header_rwsem);
......rc = rbd_dev_image_probe(rbd_dev, 0); //探針更多的是檢查rbd image是否被mapif (rc < 0) {up_write(&rbd_dev->header_rwsem);goto err_out_rbd_dev;}
......rc = rbd_dev_device_setup(rbd_dev); //包括obj->pg映射等static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
{int ret;
....../* Set up the blkdev mapping. */ret = rbd_init_disk(rbd_dev); ......
}static int rbd_init_disk(struct rbd_device *rbd_dev)
{struct gendisk *disk;struct request_queue *q;u64 segment_size;int err;
.....memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));rbd_dev->tag_set.ops = &rbd_mq_ops; //rbd_dev初始化rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
.....
}static struct blk_mq_ops rbd_mq_ops = {.queue_rq = rbd_queue_rq,.init_request = rbd_init_request, //調(diào)用rbd_init_request
};static int rbd_init_request(void *data, struct request *rq,unsigned int hctx_idx, unsigned int request_idx,unsigned int numa_node)
{struct work_struct *work = blk_mq_rq_to_pdu(rq);INIT_WORK(work, rbd_queue_workfn); //通過work_struct啟動線程return 0;
}
3)work處理函數(shù)rbd_queue_workfn內(nèi)流程分析
從上層取出通用塊設(shè)備層請求后,轉(zhuǎn)換為image對象,再從image對象批量轉(zhuǎn)為object對象,再計算出object到pg,pg到osd的映射關(guān)系。
3.1 獲取通用塊設(shè)備層信息
在rbd_queue_workfn中,通過blk_mq_rq_from_pdu獲取到通用塊設(shè)備層IO請求rq、通過blk_rq_bytes(rq)獲取到請求中需要寫入的數(shù)據(jù)長度length(length表示的是客戶端需要寫到磁盤總的數(shù)據(jù)長度),通過blk_rq_pos(rq)獲取塊設(shè)備寫入偏移量offset。
static void rbd_queue_workfn(struct work_struct *work)
{struct request *rq = blk_mq_rq_from_pdu(work); //通用塊設(shè)備層請求struct rbd_device *rbd_dev = rq->q->queuedata;struct rbd_img_request *img_request;struct ceph_snap_context *snapc = NULL;u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; //塊設(shè)備的偏移量u64 length = blk_rq_bytes(rq); //enum obj_operation_type op_type;
.....
}
3.2 通用塊設(shè)備層信息轉(zhuǎn)換image請求,image請求批量轉(zhuǎn)換為object
在rbd_queue_workfn中從通用塊設(shè)備層請求中獲取到塊設(shè)備偏移offset和長度length后,再使用這些指標來創(chuàng)建img_request并將img_request→offset進行填充中,然后調(diào)用rbd_img_request_fill函數(shù),在該函數(shù)中,基于rados object的大小(4M)與rados對象在rbd中的segment排列,對請求進行拆分,最終將rbd_img_request拆分成多個rbd_obj_request對象,通過這樣的過程實現(xiàn)從linux內(nèi)核的通用塊請求到ceph rados object的轉(zhuǎn)換。
static void rbd_queue_workfn(struct work_struct *work)
{struct request *rq = blk_mq_rq_from_pdu(work);struct rbd_device *rbd_dev = rq->q->queuedata;struct rbd_img_request *img_request;u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; //塊設(shè)備偏移u64 length = blk_rq_bytes(rq); //長度
......img_request = rbd_img_request_create(rbd_dev, offset, length, op_type, //創(chuàng)建img_requestsnapc); img_request->offset = offset; //填充img_request→offsetresult = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, //將rbd_img_request劃分為一個個rbd_obj_requestrq->bio);
.....
}static int rbd_img_request_fill(struct rbd_img_request *img_request,enum obj_request_type type,void *data_desc)
{struct rbd_obj_request *obj_request = NULL;u64 img_offset;img_offset = img_request->offset; //塊設(shè)備當(dāng)前寫入的偏移位置resid = img_request->length; //待寫入的長度while (resid) {
......object_name = rbd_segment_name(rbd_dev, img_offset); //對象名length = rbd_segment_length(rbd_dev, img_offset, resid); //長度obj_request = rbd_obj_request_create(object_name, //創(chuàng)建obj_request對象offset, length, type);
......img_offset += length; //偏移增加lengthresid -= length;
......
}
3.3 rbd塊設(shè)備offset到rados object的映射
rbd塊設(shè)備到rados對象的映射是根據(jù)rados對象的大小以及當(dāng)前塊設(shè)備的偏移量來決定的,并且rados對象的命名方式采用前綴rbd_data.$image_id.16位16進制的序號構(gòu)成。
3.3.1 rados對象大小與命名方式
每個rbd塊設(shè)備都定義了一個2為底的指數(shù)來表示每個rbd對象的大小,這個指數(shù)稱為rbd的obj order。obj order默認值為22,因此每個rbd對象大小2^22Bytes,即每個rados對象大小為4MB。