这几天在调试有关网卡驱动的东西,有很多地方不清楚。有关网卡驱动部分主要有两个很重要的结构体:struct net_device 和struct sk_buff。 驱动大部分都是围绕这两个东西进行操作的,包括加协议头尾,去头去尾等。为了搞清楚协议栈如何处理数据包,周末闲来无事就看看内核代码去了解下这部分东西,并做了简要记录:

/*
 *sk_buff->h		:传输层头	 :udp头和tcp头
 *sk_buff->nh		:网络层头	 :ip头
 *sk_buff->mac	    :数据链路层头 :mac头
 *
 *
 *sk_buff->head	:指向数据缓冲区头部
 *sk_buff->data	:指向实际数据的头部
 *sk_buff->tail	:指向实际数据的尾部
 *sk_buff->end	:指向数据缓冲区尾部
 *
 *
 *sk_buff控制区	:struct sk_buff所在的区域
 *线性数据区		:数据缓冲区域 : sk_buff->head~sk_buff->end 之间的区域
 *非线性数据区	:数据缓冲区域的补充区域 :skb_shared_info区域
 *
 *
 *sk_buff->truesize:线性数据区+非线性数据区+sizeof(struct sk_buff)
 *sk_buff->len     :线性数据区+非线性数据区
 *sk_buff->data_len:非线性数据区
 *
 *
 *一个完整的网络帧(skb_buff)包括:
 *        线性数据区 + 非线性数据区 + skb_buff控制区
 *
 * skb_clone() :只复制skb_buff控制区,其中新分配的skb_buff和原来的skb_buff共享线性数据区和非线性数据区
 *
 * pskb_copy() :复制skb_buff控制区 + 线性数据区,共享非线性数据区
 *
 * skb_copy()  :复制skb_buff控制区 + 线性数据区 + 非线性数据区
 *
 *
 *
 *
 */

源码附上: 

struct sk_buff { /*表示接收或发送数据包的包头信息,其成员变量在从一层向另一层传递时会发生修改*/
	/* These two members must be first. */
	struct sk_buff		*next;
	struct sk_buff		*prev;

	struct sk_buff_head	*list;
	struct sock			*sk;
	struct timeval			stamp;
	struct net_device		*dev;
	struct net_device		*input_dev;
	struct net_device		*real_dev;

	union {
		struct tcphdr		*th;
		struct udphdr		*uh;
		struct icmphdr	*icmph;
		struct igmphdr	*igmph;
		struct iphdr		*ipiph;
		struct ipv6hdr	*ipv6h;
		unsigned char		*raw;
	} h;								/*传输层*/

	union {
		struct iphdr		*iph;
		struct ipv6hdr	*ipv6h;
		struct arphdr		*arph;
		unsigned char		*raw;
	} nh;							/*网络层*/

	union {
	  	unsigned char 	*raw;
	} mac;							/*链路层*/

	struct  dst_entry		*dst;	/*记录了到达目的地的路由信息,以及其他的一些网络特征信息*/
	struct	sec_path		*sp;

	/*
	 * This is the control buffer. It is free to use for every
	 * layer. Please put your private variables there. If you
	 * want to keep them across layers you have to do a skb_clone()
	 * first. This is owned by whoever has the skb queued ATM.
	 */
	char					 cb[40];

	
	/*
 	 *
	 *在sk_buff这个里面没有实际的数据,这里仅仅是控制信息,数据是通过后面的data指针指向其他内存块的!
	 *那个内存块中是线性数据和非线性数据!那么len 就是length(线性数据) + length(非线性数据),alloc分配的长度
	 *
	 */
	unsigned int		len,	/* len : 代表整个数据区域的长度!skb的组成是有sk_buff控制 + 线性数据 + 非线性数据(skb_shared_info) 组成!*/
					data_len, /*data_len: 指的是length(非线性数据)*/
					mac_len,
					csum;
	unsigned char		local_df,
					cloned:1,		
					nohdr:1,		/*仅仅引用数据区域*/
					pkt_type,
					ip_summed;
	__u32			priority;
	unsigned short	protocol,
					security;

	void				(*destructor)(struct sk_buff *skb);
	
#ifdef CONFIG_NETFILTER
        unsigned long		nfmark;			/*nfmark,用于钩子之间通信*/
	__u32				nfcache;
	__u32				nfctinfo;
	struct nf_conntrack	*nfct;
	
#ifdef CONFIG_NETFILTER_DEBUG
        unsigned int			nf_debug;
#endif

#ifdef CONFIG_BRIDGE_NETFILTER
	struct nf_bridge_info	*nf_bridge;
#endif
#endif /* CONFIG_NETFILTER */
#if defined(CONFIG_HIPPI)
	union {
		__u32			ifield;
	} private;
#endif
#ifdef CONFIG_NET_SCHED
       __u32			tc_index;        /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
	__u32          		 tc_verd;               /* traffic control verdict */
	__u32           		tc_classid;            /* traffic control classid */
#endif

#endif

	/*
	 *	划重点!
	 *
	 * These elements must be at the end, see alloc_skb() for details. 
	 *
	 */
	unsigned int		truesize;
	atomic_t			users;
	unsigned char		*head,	/*指向分配给的线性数据内存首地址*/
					*data,	/*指向保存数据内容的首地址*/
					*tail,	/*指向数据的结尾*/
					*end;	/*指向分配的内存块的结尾*/
};
​
/**
 *	skb_copy	-	create private copy of an sk_buff  如果要修改数据,使用该函数。不仅复制sk_buff控制区,也复制数据区。是一个完整的备份
 *	@skb: buffer to copy
 *	@gfp_mask: allocation priority
 *
 *	Make a copy of both an &sk_buff and its data. This is used when the
 *	caller wishes to modify the data and needs a private copy of the
 *	data to alter. Returns %NULL on failure or the pointer to the buffer
 *	on success. The returned buffer has a reference count of 1.
 *
 *	As by-product this function converts non-linear &sk_buff to linear
 *	one, so that &sk_buff becomes completely private and caller is allowed
 *	to modify all the data of returned buffer. This means that this
 *	function is not recommended for use in circumstances when only
 *	header is going to be modified. Use pskb_copy() instead.
 */

struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
{
	int headerlen = skb->data - skb->head;
	/*
	 *	Allocate the copy buffer
	 */
	struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, /*分配的空间大小为:sizeof(线性数据区  + 非线性数据区)*/
				      gfp_mask);
	if (!n)
		return NULL;

	/* Set the data pointer */
	skb_reserve(n, headerlen);	/*skb_reserve 分配headerlen大小的headroom 空间*/
	/* Set the tail pointer and length */
	skb_put(n, skb->len);
	n->csum	     = skb->csum;
	n->ip_summed = skb->ip_summed;

	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
		BUG();

	copy_skb_header(n, skb);
	return n;
}

​
/**
 *	pskb_copy	-	create copy of an sk_buff with private head.
 *	@skb: buffer to copy
 *	@gfp_mask: allocation priority
 *
 *	Make a copy of both an &sk_buff and part of its data, located
 *	in header. Fragmented data remain shared. This is used when
 *	the caller wishes to modify only header of &sk_buff and needs
 *	private copy of the header to alter. Returns %NULL on failure
 *	or the pointer to the buffer on success.
 *	The returned buffer has a reference count of 1.
 */

struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)/*复制sk_buff控制区和线性数据区,非线性数据区依然共享*/
{
	/*
	 *	Allocate the copy buffer
	 */
	struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);

	if (!n)
		goto out;

	/* Set the data pointer */
	skb_reserve(n, skb->data - skb->head);
	/* Set the tail pointer and length */
	skb_put(n, skb_headlen(skb));
	/* Copy the bytes */
	memcpy(n->data, skb->data, n->len);
	n->csum	     = skb->csum;
	n->ip_summed = skb->ip_summed;

	n->data_len  = skb->data_len;
	n->len	     = skb->len;

	if (skb_shinfo(skb)->nr_frags) {
		int i;

		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
			get_page(skb_shinfo(n)->frags[i].page);
		}
		skb_shinfo(n)->nr_frags = i;
	}

	if (skb_shinfo(skb)->frag_list) {
		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
		skb_clone_fraglist(n);
	}

	copy_skb_header(n, skb);
out:
	return n;
}
/**
 *	skb_clone	-	duplicate an sk_buff :只复制一个和skb_buff,该skb_buff的指针的值与原来的skb值相同
 *	@skb: buffer to clone
 *	@gfp_mask: allocation priority
 *
 *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
 *	copies share the same packet data but not structure. The new
 *	buffer has a reference count of 1. If the allocation fails the
 *	function returns %NULL otherwise the new buffer is returned.
 *
 *	If this function is called from an interrupt gfp_mask() must be
 *	%GFP_ATOMIC.
 */

struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
{
	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);

	if (!n) 
		return NULL;

#define C(x) n->x = skb->x     /*只复制控制区,因此定义一个宏函数方便复制操作*/

	n->next = n->prev = NULL;
	n->list = NULL;
	n->sk = NULL;
	C(stamp);
	C(dev);
	C(real_dev);
	C(h);
	C(nh);
	C(mac);
	C(dst);
	dst_clone(skb->dst);
	C(sp);
#ifdef CONFIG_INET
	secpath_get(skb->sp);
#endif
	memcpy(n->cb, skb->cb, sizeof(skb->cb));
	C(len);
	C(data_len);
	C(csum);
	C(local_df);
	n->cloned = 1;
	n->nohdr = 0;
	C(pkt_type);
	C(ip_summed);
	C(priority);
	C(protocol);
	C(security);
	n->destructor = NULL;
#ifdef CONFIG_NETFILTER
	C(nfmark);
	C(nfcache);
	C(nfct);
	nf_conntrack_get(skb->nfct);
	C(nfctinfo);
#ifdef CONFIG_NETFILTER_DEBUG
	C(nf_debug);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
	C(nf_bridge);
	nf_bridge_get(skb->nf_bridge);
#endif
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_HIPPI)
	C(private);
#endif
#ifdef CONFIG_NET_SCHED
	C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
	n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
	n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
	C(input_dev);
	C(tc_classid);
#endif

#endif
	C(truesize);
	atomic_set(&n->users, 1);
	C(head);
	C(data);
	C(tail);
	C(end);

	atomic_inc(&(skb_shinfo(skb)->dataref));
	skb->cloned = 1;

	return n;
}