这几天在调试有关网卡驱动的东西,有很多地方不清楚。有关网卡驱动部分主要有两个很重要的结构体:struct net_device 和struct sk_buff。 驱动大部分都是围绕这两个东西进行操作的,包括加协议头尾,去头去尾等。为了搞清楚协议栈如何处理数据包,周末闲来无事就看看内核代码去了解下这部分东西,并做了简要记录:
/*
*sk_buff->h :传输层头 :udp头和tcp头
*sk_buff->nh :网络层头 :ip头
*sk_buff->mac :数据链路层头 :mac头
*
*
*sk_buff->head :指向数据缓冲区头部
*sk_buff->data :指向实际数据的头部
*sk_buff->tail :指向实际数据的尾部
*sk_buff->end :指向数据缓冲区尾部
*
*
*sk_buff控制区 :struct sk_buff所在的区域
*线性数据区 :数据缓冲区域 : sk_buff->head~sk_buff->end 之间的区域
*非线性数据区 :数据缓冲区域的补充区域 :skb_shared_info区域
*
*
*sk_buff->truesize:线性数据区+非线性数据区+sizeof(struct sk_buff)
*sk_buff->len :线性数据区+非线性数据区
*sk_buff->data_len:非线性数据区
*
*
*一个完整的网络帧(skb_buff)包括:
* 线性数据区 + 非线性数据区 + skb_buff控制区
*
* skb_clone() :只复制skb_buff控制区,其中新分配的skb_buff和原来的skb_buff共享线性数据区和非线性数据区
*
* pskb_copy() :复制skb_buff控制区 + 线性数据区,共享非线性数据区
*
* skb_copy() :复制skb_buff控制区 + 线性数据区 + 非线性数据区
*
*
*
*
*/
源码附上:
struct sk_buff { /*表示接收或发送数据包的包头信息,其成员变量在从一层向另一层传递时会发生修改*/
/* These two members must be first. */
struct sk_buff *next;
struct sk_buff *prev;
struct sk_buff_head *list;
struct sock *sk;
struct timeval stamp;
struct net_device *dev;
struct net_device *input_dev;
struct net_device *real_dev;
union {
struct tcphdr *th;
struct udphdr *uh;
struct icmphdr *icmph;
struct igmphdr *igmph;
struct iphdr *ipiph;
struct ipv6hdr *ipv6h;
unsigned char *raw;
} h; /*传输层*/
union {
struct iphdr *iph;
struct ipv6hdr *ipv6h;
struct arphdr *arph;
unsigned char *raw;
} nh; /*网络层*/
union {
unsigned char *raw;
} mac; /*链路层*/
struct dst_entry *dst; /*记录了到达目的地的路由信息,以及其他的一些网络特征信息*/
struct sec_path *sp;
/*
* This is the control buffer. It is free to use for every
* layer. Please put your private variables there. If you
* want to keep them across layers you have to do a skb_clone()
* first. This is owned by whoever has the skb queued ATM.
*/
char cb[40];
/*
*
*在sk_buff这个里面没有实际的数据,这里仅仅是控制信息,数据是通过后面的data指针指向其他内存块的!
*那个内存块中是线性数据和非线性数据!那么len 就是length(线性数据) + length(非线性数据),alloc分配的长度
*
*/
unsigned int len, /* len : 代表整个数据区域的长度!skb的组成是有sk_buff控制 + 线性数据 + 非线性数据(skb_shared_info) 组成!*/
data_len, /*data_len: 指的是length(非线性数据)*/
mac_len,
csum;
unsigned char local_df,
cloned:1,
nohdr:1, /*仅仅引用数据区域*/
pkt_type,
ip_summed;
__u32 priority;
unsigned short protocol,
security;
void (*destructor)(struct sk_buff *skb);
#ifdef CONFIG_NETFILTER
unsigned long nfmark; /*nfmark,用于钩子之间通信*/
__u32 nfcache;
__u32 nfctinfo;
struct nf_conntrack *nfct;
#ifdef CONFIG_NETFILTER_DEBUG
unsigned int nf_debug;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif
#endif /* CONFIG_NETFILTER */
#if defined(CONFIG_HIPPI)
union {
__u32 ifield;
} private;
#endif
#ifdef CONFIG_NET_SCHED
__u32 tc_index; /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
__u32 tc_verd; /* traffic control verdict */
__u32 tc_classid; /* traffic control classid */
#endif
#endif
/*
* 划重点!
*
* These elements must be at the end, see alloc_skb() for details.
*
*/
unsigned int truesize;
atomic_t users;
unsigned char *head, /*指向分配给的线性数据内存首地址*/
*data, /*指向保存数据内容的首地址*/
*tail, /*指向数据的结尾*/
*end; /*指向分配的内存块的结尾*/
};
/**
* skb_copy - create private copy of an sk_buff 如果要修改数据,使用该函数。不仅复制sk_buff控制区,也复制数据区。是一个完整的备份
* @skb: buffer to copy
* @gfp_mask: allocation priority
*
* Make a copy of both an &sk_buff and its data. This is used when the
* caller wishes to modify the data and needs a private copy of the
* data to alter. Returns %NULL on failure or the pointer to the buffer
* on success. The returned buffer has a reference count of 1.
*
* As by-product this function converts non-linear &sk_buff to linear
* one, so that &sk_buff becomes completely private and caller is allowed
* to modify all the data of returned buffer. This means that this
* function is not recommended for use in circumstances when only
* header is going to be modified. Use pskb_copy() instead.
*/
struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
{
int headerlen = skb->data - skb->head;
/*
* Allocate the copy buffer
*/
struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, /*分配的空间大小为:sizeof(线性数据区 + 非线性数据区)*/
gfp_mask);
if (!n)
return NULL;
/* Set the data pointer */
skb_reserve(n, headerlen); /*skb_reserve 分配headerlen大小的headroom 空间*/
/* Set the tail pointer and length */
skb_put(n, skb->len);
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
BUG();
copy_skb_header(n, skb);
return n;
}
/**
* pskb_copy - create copy of an sk_buff with private head.
* @skb: buffer to copy
* @gfp_mask: allocation priority
*
* Make a copy of both an &sk_buff and part of its data, located
* in header. Fragmented data remain shared. This is used when
* the caller wishes to modify only header of &sk_buff and needs
* private copy of the header to alter. Returns %NULL on failure
* or the pointer to the buffer on success.
* The returned buffer has a reference count of 1.
*/
struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)/*复制sk_buff控制区和线性数据区,非线性数据区依然共享*/
{
/*
* Allocate the copy buffer
*/
struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
if (!n)
goto out;
/* Set the data pointer */
skb_reserve(n, skb->data - skb->head);
/* Set the tail pointer and length */
skb_put(n, skb_headlen(skb));
/* Copy the bytes */
memcpy(n->data, skb->data, n->len);
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
n->data_len = skb->data_len;
n->len = skb->len;
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
get_page(skb_shinfo(n)->frags[i].page);
}
skb_shinfo(n)->nr_frags = i;
}
if (skb_shinfo(skb)->frag_list) {
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
skb_clone_fraglist(n);
}
copy_skb_header(n, skb);
out:
return n;
}
/**
* skb_clone - duplicate an sk_buff :只复制一个和skb_buff,该skb_buff的指针的值与原来的skb值相同
* @skb: buffer to clone
* @gfp_mask: allocation priority
*
* Duplicate an &sk_buff. The new one is not owned by a socket. Both
* copies share the same packet data but not structure. The new
* buffer has a reference count of 1. If the allocation fails the
* function returns %NULL otherwise the new buffer is returned.
*
* If this function is called from an interrupt gfp_mask() must be
* %GFP_ATOMIC.
*/
struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
{
struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
if (!n)
return NULL;
#define C(x) n->x = skb->x /*只复制控制区,因此定义一个宏函数方便复制操作*/
n->next = n->prev = NULL;
n->list = NULL;
n->sk = NULL;
C(stamp);
C(dev);
C(real_dev);
C(h);
C(nh);
C(mac);
C(dst);
dst_clone(skb->dst);
C(sp);
#ifdef CONFIG_INET
secpath_get(skb->sp);
#endif
memcpy(n->cb, skb->cb, sizeof(skb->cb));
C(len);
C(data_len);
C(csum);
C(local_df);
n->cloned = 1;
n->nohdr = 0;
C(pkt_type);
C(ip_summed);
C(priority);
C(protocol);
C(security);
n->destructor = NULL;
#ifdef CONFIG_NETFILTER
C(nfmark);
C(nfcache);
C(nfct);
nf_conntrack_get(skb->nfct);
C(nfctinfo);
#ifdef CONFIG_NETFILTER_DEBUG
C(nf_debug);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
C(nf_bridge);
nf_bridge_get(skb->nf_bridge);
#endif
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_HIPPI)
C(private);
#endif
#ifdef CONFIG_NET_SCHED
C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
C(input_dev);
C(tc_classid);
#endif
#endif
C(truesize);
atomic_set(&n->users, 1);
C(head);
C(data);
C(tail);
C(end);
atomic_inc(&(skb_shinfo(skb)->dataref));
skb->cloned = 1;
return n;
}