库和全局变量
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <errno.h>
#include <getopt.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <netdb.h>
#include <fcntl.h>
#include <unistd.h>
#include <netinet/tcp.h>
#include <rdma/rdma_cma.h>
#include "common.h"
static struct rdma_addrinfo hints, *rai;
static struct rdma_event_channel *channel;
static const char *port = "7471";
static char *dst_addr;
static char *src_addr;
static int timeout = 2000;
static int retries = 2;
enum step {
STEP_CREATE_ID,
STEP_BIND,
STEP_RESOLVE_ADDR,
STEP_RESOLVE_ROUTE,
STEP_CREATE_QP,
STEP_CONNECT,
STEP_DISCONNECT,
STEP_DESTROY,
STEP_CNT
};
static const char *step_str[] = {
"create id",
"bind addr",
"resolve addr",
"resolve route",
"create qp",
"connect",
"disconnect",
"destroy"
};
struct node {
struct rdma_cm_id *id;
struct timeval times[STEP_CNT][2];
int error;
int retries;
};
// 双向链表
struct list_head {
struct list_head *prev;
struct list_head *next;
struct rdma_cm_id *id;
};
// 为双向链表引用了lock和cond(互斥锁和条件变量)
struct work_list {
pthread_mutex_t lock;
pthread_cond_t cond;
struct list_head list;
};
// 宏的初始化方案
#define INIT_LIST(x) ((x)->prev = (x)->next = (x))
static struct work_list req_work; // 请求工作队列
static struct work_list disc_work; // 断开工作队列
static struct node *nodes; // 全局变量node
static struct timeval times[STEP_CNT][2];
static int connections = 100; // 100个准备建立连接
static volatile int started[STEP_CNT];
static volatile int completed[STEP_CNT];
static struct ibv_qp_init_attr init_qp_attr; // qp的初始化属性
static struct rdma_conn_param conn_param; // 建立连接时候的参数
// 针对时间的宏操作
#define start_perf(n, s) gettimeofday(&((n)->times[s][0]), NULL)
#define end_perf(n, s) gettimeofday(&((n)->times[s][1]), NULL)
#define start_time(s) gettimeofday(×[s][0], NULL)
#define end_time(s) gettimeofday(×[s][1], NULL)
main函数的分析
int main(int argc, char **argv)
{
int op, ret;
hints.ai_port_space = RDMA_PS_TCP;
hints.ai_qp_type = IBV_QPT_RC;
while ((op = getopt(argc, argv, "s:b:c:p:r:t:")) != -1) {
switch (op) {
case 's':
dst_addr = optarg;
break;
case 'b':
src_addr = optarg;
break;
case 'c':
connections = atoi(optarg);
break;
case 'p':
port = optarg;
break;
case 'r':
retries = atoi(optarg);
break;
case 't':
timeout = atoi(optarg);
break;
default:
printf("usage: %s\n", argv[0]);
printf("\t[-s server_address]\n");
printf("\t[-b bind_address]\n");
printf("\t[-c connections]\n");
printf("\t[-p port_number]\n");
printf("\t[-r retries]\n");
printf("\t[-t timeout_ms]\n");
exit(1);
}
}
// 配置QP的属性,send_wr和recv_wr等属性
init_qp_attr.cap.max_send_wr = 1;
init_qp_attr.cap.max_recv_wr = 1;
init_qp_attr.cap.max_send_sge = 1;
init_qp_attr.cap.max_recv_sge = 1;
init_qp_attr.qp_type = IBV_QPT_RC;
// 创建一个事件通道,复制通知应用程序
channel = create_first_event_channel();
if (!channel) {
exit(1);
}
if (dst_addr) {
alloc_nodes(); // 初始化connects的node节点,创建cm管理器
ret = run_client();
} else {
hints.ai_flags |= RAI_PASSIVE;
ret = run_server();
}
cleanup_nodes();
rdma_destroy_event_channel(channel);
if (rai)
rdma_freeaddrinfo(rai);
show_perf();
free(nodes);
return ret;
}
alloc_nodes函数
static int alloc_nodes(void)
{
int ret, i;
// 为每个连接创建连接node,保存每个的时间通道
nodes = calloc(sizeof *nodes, connections);
if (!nodes)
return -ENOMEM;
printf("creating id\n");
start_time(STEP_CREATE_ID);
for (i = 0; i < connections; i++) {
start_perf(&nodes[i], STEP_CREATE_ID);
if (dst_addr) {
// 通过channel为每个连接创建cm_id管理器
ret = rdma_create_id(channel, &nodes[i].id, &nodes[i],
hints.ai_port_space);
if (ret)
goto err;
}
end_perf(&nodes[i], STEP_CREATE_ID);
}
end_time(STEP_CREATE_ID);
return 0;
err:
while (--i >= 0)
rdma_destroy_id(nodes[i].id);
free(nodes);
return ret;
}
run_client函数,配置信息,创建节点,统计每个过程的时间信息。
static int run_client(void)
{
pthread_t event_thread;
int i, ret;
ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai);
if (ret) {
printf("getaddrinfo error: %s\n", gai_strerror(ret));
return ret;
}
conn_param.responder_resources = 1; // 表示响应方的接收队列中有多少个wr可以等待处理。也就是接收方的深度
conn_param.initiator_depth = 1; // 表示发送方的队列中有多少个WR可以等待发送。
conn_param.retry_count = retries; // 表示重试次数
conn_param.private_data = rai->ai_connect; // 指定连接建立时需要传递的私有数据(建立连接的时候交换双方数据)
conn_param.private_data_len = rai->ai_connect_len; // 指定上述私有数据的长度
ret = pthread_create(&event_thread, NULL, process_events, NULL); // 开启一个新的线程处理这个事件。
if (ret) {
perror("failure creating event thread");
return ret;
}
if (src_addr) {
printf("binding source address\n");
start_time(STEP_BIND);
for (i = 0; i < connections; i++) {
start_perf(&nodes[i], STEP_BIND);
ret = rdma_bind_addr(nodes[i].id, rai->ai_src_addr); // 把rdma_cm_id管理器绑定源地址信息
if (ret) {
perror("failure bind addr");
nodes[i].error = 1;
continue;
}
end_perf(&nodes[i], STEP_BIND);
}
end_time(STEP_BIND);
}
printf("resolving address\n");
start_time(STEP_RESOLVE_ADDR);
for (i = 0; i < connections; i++) {
if (nodes[i].error)
continue;
nodes[i].retries = retries;
start_perf(&nodes[i], STEP_RESOLVE_ADDR);
ret = rdma_resolve_addr(nodes[i].id, rai->ai_src_addr,
rai->ai_dst_addr, timeout); // 使用这个rdma_cm_id管理器来检查源地址和目的地址的合法性
if (ret) {
perror("failure getting addr");
nodes[i].error = 1;
continue;
}
started[STEP_RESOLVE_ADDR]++;
}
while (started[STEP_RESOLVE_ADDR] != completed[STEP_RESOLVE_ADDR]) sched_yield();
end_time(STEP_RESOLVE_ADDR);
printf("resolving route\n");
start_time(STEP_RESOLVE_ROUTE);
for (i = 0; i < connections; i++) {
if (nodes[i].error)
continue;
nodes[i].retries = retries;
start_perf(&nodes[i], STEP_RESOLVE_ROUTE);
ret = rdma_resolve_route(nodes[i].id, timeout); // 检查两者之间的连通性(是否存在有效路由)
if (ret) {
perror("failure resolving route");
nodes[i].error = 1;
continue;
}
started[STEP_RESOLVE_ROUTE]++;
}
while (started[STEP_RESOLVE_ROUTE] != completed[STEP_RESOLVE_ROUTE]) sched_yield();
end_time(STEP_RESOLVE_ROUTE);
printf("creating qp\n");
start_time(STEP_CREATE_QP);
for (i = 0; i < connections; i++) {
if (nodes[i].error)
continue;
start_perf(&nodes[i], STEP_CREATE_QP);
ret = rdma_create_qp(nodes[i].id, NULL, &init_qp_attr); // 通过rdma_cm_id创建QP信息
if (ret) {
perror("failure creating qp");
nodes[i].error = 1;
continue;
}
end_perf(&nodes[i], STEP_CREATE_QP);
}
end_time(STEP_CREATE_QP);
printf("connecting\n");
start_time(STEP_CONNECT);
for (i = 0; i < connections; i++) {
if (nodes[i].error)
continue;
start_perf(&nodes[i], STEP_CONNECT);
ret = rdma_connect(nodes[i].id, &conn_param); // 建立QP之间的连接
if (ret) {
perror("failure rconnecting");
nodes[i].error = 1;
continue;
}
started[STEP_CONNECT]++;
}
while (started[STEP_CONNECT] != completed[STEP_CONNECT]) sched_yield();
end_time(STEP_CONNECT);
printf("disconnecting\n");
start_time(STEP_DISCONNECT);
for (i = 0; i < connections; i++) {
if (nodes[i].error)
continue;
start_perf(&nodes[i], STEP_DISCONNECT);
rdma_disconnect(nodes[i].id); // 先断开每个cm管理器
rdma_destroy_qp(nodes[i].id); // 然后断开上面的QP节点信息
started[STEP_DISCONNECT]++;
}
while (started[STEP_DISCONNECT] != completed[STEP_DISCONNECT]) sched_yield();
end_time(STEP_DISCONNECT);
return ret;
}
run_server函数,
static int run_server(void)
{
pthread_t req_thread, disc_thread;
struct rdma_cm_id *listen_id;
int ret;
INIT_LIST(&req_work.list);
INIT_LIST(&disc_work.list);
// 先初始化mutex锁
ret = pthread_mutex_init(&req_work.lock, NULL);
if (ret) {
perror("initializing mutex for req work");
return ret;
}
ret = pthread_mutex_init(&disc_work.lock, NULL);
if (ret) {
perror("initializing mutex for disc work");
return ret;
}
// 初始化条件变量
ret = pthread_cond_init(&req_work.cond, NULL);
if (ret) {
perror("initializing cond for req work");
return ret;
}
ret = pthread_cond_init(&disc_work.cond, NULL);
if (ret) {
perror("initializing cond for disc work");
return ret;
}
// 使用线程来处理work_list里面的rdma_cm_id,检查是否有事件处理
ret = pthread_create(&req_thread, NULL, req_handler_thread, NULL);
if (ret) {
perror("failed to create req handler thread");
return ret;
}
ret = pthread_create(&disc_thread, NULL, disc_handler_thread, NULL);
if (ret) {
perror("failed to create disconnect handler thread");
return ret;
}
ret = rdma_create_id(channel, &listen_id, NULL, hints.ai_port_space);
if (ret) {
perror("listen request failed");
return ret;
}
ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai);
if (ret) {
printf("getrdmaaddr error: %s\n", gai_strerror(ret));
goto out;
}
ret = rdma_bind_addr(listen_id, rai->ai_src_addr);
if (ret) {
perror("bind address failed");
goto out;
}
ret = rdma_listen(listen_id, 0);
if (ret) {
perror("failure trying to listen");
goto out;
}
process_events(NULL);
out:
rdma_destroy_id(listen_id);
return ret;
}
总结:主要还是统计每个阶段耗费的时间。
rdma_cm_id类似于socket(个人感觉)。