android dns获取流程安卓dns查看

转载

我心依旧 2023-08-02 00:51:02

文章标签 android dns获取流程 DNS Cache DNS缓存缓存 文章分类 Android 移动开发

DNS响应数据包中都带有一个TTL字段，表示了本次查询结果的有效期，在没有到期之前，如果还需要获取同样一个查询结果，那么无需真的向DNS服务器查询，使用之前的即可。为了实现这种功能，libc有责任将查询结果进行缓存，并且在结果过期的时候将缓存信息删除。这篇笔记就介绍下libc中的这种DNS查询缓存机制。

1. 核心数据结构

在看代码逻辑之前，先来过一下相关的数据结构。

1.1 struct resolv_cache_info

系统中每使能一张网卡都会创建一个该结构，用于保存该网卡相关的DNS配置信息，以及在该网卡上进行的DNS查询结果缓存信息，系统中所有网卡的该结构被组织成一个单链表。

struct resolv_cache_info {
	//网卡的netid
	unsigned                    netid;
	//DNS查询缓存信息
	Cache*                      cache;
	//系统中所有网卡的struct resolv_cache_info组织成一个列表
	struct resolv_cache_info*   next;
	//配置的DNS服务器地址的数目，即nameservers[]中有几个DNS服务器地址
	int                         nscount;
	//DNS服务器地址，当前限制最多可以设置4个DNS服务器地址
	char*                       nameservers[MAXNS];
	//转换后的DNS服务器地址信息，用于查询过程
	struct addrinfo*            nsaddrinfo[MAXNS];
	//见注释，DNS服务器地址每变更一次，该成员的值加1
	int                         revision_id; // # times the nameservers have been replaced
	//惩罚机制相关的一组配置参数，见相关笔记
	struct __res_params         params;
	struct __res_stats          nsstats[MAXNS];
	//这两个参数用于域名搜索，具体见hostname(7)，Android中基本上不使用，可以忽略
	char                        defdname[MAXDNSRCHPATH];
	int                         dnsrch_offset[MAXDNSRCH+1];  // offsets into defdname
};

1.1.1 链表初始化

// Head of the list of caches.  Protected by _res_cache_list_lock.
static struct resolv_cache_info _res_cache_list;

static void _res_cache_init(void)
{
    memset(&_res_cache_list, 0, sizeof(_res_cache_list));
    pthread_mutex_init(&_res_cache_list_lock, NULL);
}

初始化resolv_cache_info链表的表头结构以及其互斥锁。_res_cache_list结构本身只作为表头使用，并不保存任何网卡的cache信息，即链表中真正的第一个cache_info信息是从_res_cache_info.next开始的。

另外，resolv_cache_info结构的创建是在设置DNS地址的时候完成的，具体可以参考笔记Android DNS之DNS参数设置。

1.2 查询结果cache表头

缓存信息被组织成一个哈希表，但是还需要一个结构来从整体上描述该哈希表的信息，姑且称之为cache表头吧。

typedef struct resolv_cache {
	//Cache中最多可以容纳多少项
	int              max_entries;
	//Cache中当前已容纳多少项
	int              num_entries;
	//MRU表头
	Entry            mru_list;
	int              last_id;
	//Cache表,表的分配时在设置DNS地址的时候完成的
	Entry*           entries;
	//当多个线程同时请求同一个域名查询时，实际上只有第一个会触发网络查询，
	//其它后续请求都会阻塞等待第一个查询请求返回，见下文分析
	PendingReqInfo   pending_requests;
} Cache;

1.2.1 cache表头的创建

cache的创建是和resolv_cache_info结构一起创建的，所以其创建过程也是在设置DNS地址的时候执行的，创建cahce的接口是_resolv_cache_create()，其代码如下：

#define  CONFIG_MAX_ENTRIES    64 * 2 * 5

static int _res_cache_get_max_entries( void )
{
    //系统cache大小为64*2*5
    int cache_size = CONFIG_MAX_ENTRIES;
    //非Netd调用者是不会分配cache的
    const char* cache_mode = getenv("ANDROID_DNS_MODE");
    if (cache_mode == NULL || strcmp(cache_mode, "local") != 0) {
        // Don't use the cache in local mode. This is used by the proxy itself.
        cache_size = 0;
    }

    XLOG("cache size: %d", cache_size);
    return cache_size;
}

static struct resolv_cache* _resolv_cache_create( void )
{
    struct resolv_cache*  cache;

    //分配cache表头结构
    cache = calloc(sizeof(*cache), 1);
    if (cache) {
        //为cache哈希表分配内存
        cache->max_entries = _res_cache_get_max_entries();
        cache->entries = calloc(sizeof(*cache->entries), cache->max_entries);
        if (cache->entries) {
            //初始化MRU链表为空
            cache->mru_list.mru_prev = cache->mru_list.mru_next = &cache->mru_list;
            XLOG("%s: cache created\n", __FUNCTION__);
        } else {
            free(cache);
            cache = NULL;
        }
    }
    return cache;
}

抛开_res_cache_list链表不说（很简单了），cache的组织结构如下图所示：

android dns获取流程安卓dns查看_android dns获取流程

1.2.2 MRU双向链表

从上面的cache结构图中可以看出，缓存项除了用哈希表管理外，还额外链接成一个双向链表，从指针名字看，我们姑且称之为MRU(the Most Recently Update)链表，该链表是有序链表，实际维护时是按照最近访问的时间倒叙排列，即最近访问的缓存项会被放在表头，这样设计是为了在缓存项已满，但是又需要加入新的缓存项时，可以快速的移除最旧的（移除MRU链表末尾结点即可）。

MRU链表的作用就这一点，相关代码就是基本的双向链表操作，这里不再赘述。

1.3 查询结果缓存项Entry

该结构才是实实在在的缓存项，代表了一个查询结果，如上图，它被组织成一个哈希表。

/* cache entry. for simplicity, 'hash' and 'hlink' are inlined in this
 * structure though they are conceptually part of the hash table.
 *
 * similarly, mru_next and mru_prev are part of the global MRU list
 */
typedef struct Entry {
    //该hash值是根据查询报文内容计算出来的
    unsigned int     hash;   /* hash value */
    //指向冲突链中的下一个成员
    struct Entry*    hlink;  /* next in collision chain */
    //MRU列表
    struct Entry*    mru_prev;
    struct Entry*    mru_next;

    //query和answer分别为查询报文和响应报文
    const uint8_t*   query;
    int              querylen;
    const uint8_t*   answer;
    int              answerlen;
    //DNS响应报文的有效期，记录的是墙上时钟，即当前系统时间超过expires，则认为失效
    time_t           expires;   /* time_t when the entry isn't valid any more */
    int              id;        /* for debugging purpose */
} Entry;

2. 缓存项的添加

在res_nsend()中，如果完成一次成功的查询，那么会将查询结果进行缓存，这通过调用_resolv_cache_add()完成。

@netid:在哪个网卡上发起的查询
@query：查询报文
@querylen：查询报文缓存区长度
@answer：响应报文
@answerlen：响应报文缓存区长度
void _resolv_cache_add( unsigned              netid,
                   const void*           query,
                   int                   querylen,
                   const void*           answer,
                   int                   answerlen )
{
    Entry    key[1];
    Entry*   e;
    Entry**  lookup;
    u_long   ttl;
    Cache*   cache = NULL;

    //根据查询报文，初始化key，key的类型就是Entry，所以从这里可以看出，
    //缓存项就是用查询报文信息索引的
    /* don't assume that the query has already been cached */
    if (!entry_init_key( key, query, querylen )) {
        XLOG( "%s: passed invalid query ?", __FUNCTION__);
        return;
    }

    pthread_mutex_lock(&_res_cache_list_lock);
    //找到该netid的cache信息头部,即该netid对应的resolv_cache_info结构中的Cache成员
    //寻找方法也非常简单，就是遍历_res_resolv_list链表，寻找指定netid的结点
    cache = _find_named_cache_locked(netid);
    if (cache == NULL) {
        goto Exit;
    }
    //在添加之前首先查一下是否已经有了，这样可以避免添加重复项
    lookup = _cache_lookup_p(cache, key);
    e      = *lookup;
    //cache中已有，这应该不太可能发生，因为调用者只会在cache没有命中的情况下才添加
    if (e != NULL) { /* should not happen */
        XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
             __FUNCTION__, e);
        goto Exit;
    }
    //到这里，说明当前cache表里没有本次新的查询结果，那么需要将其添加到cache表中
    
    //如果缓存已满，为了将新的cache放入缓存，那么需要移除最旧的
    if (cache->num_entries >= cache->max_entries) {
    	//先将所有过期限的cache项移除掉
        _cache_remove_expired(cache);
        //如果没有过期的cache项，那么还需要移除那些最旧的，即最近都没有被访问过的
        if (cache->num_entries >= cache->max_entries) {
            _cache_remove_oldest(cache);
        }
        //这里为什么要再查一遍，不理解...
        lookup = _cache_lookup_p(cache, key);
        e      = *lookup;
        if (e != NULL) {
            XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
                __FUNCTION__, e);
            goto Exit;
        }
    }
    //从响应报文中获取本次查询结果中指定的查询结果的有效期
    ttl = answer_getTTL(answer, answerlen);
    if (ttl > 0) {
    	//ttl大于0，表示该地址可以保留一段时间，那么创建一个新的cache项，
        //然后设定其有效期，并将其加入到cache中
        e = entry_alloc(key, answer, answerlen);
        if (e != NULL) {
            e->expires = ttl + _time_now();
            _cache_add_p(cache, lookup, e);
        }
    }

Exit:
    if (cache != NULL) {
    	//向所有等待结果的线程发送广播，该机制见下文的分析
      	_cache_notify_waiting_tid_locked(cache, key);
    }
    pthread_mutex_unlock(&_res_cache_list_lock);
}

3. cache表查询

在res_nsend()真正向DNS服务器发起DNS查询请求之前，会首先向自己的cache查询，如果cache可以命中，那么直接返回，否则才继续向DNS服务器查询。该查询过程是通过_resolv_cache_lookup()完成的。

//函数返回值
typedef enum {
    //返回这种值表示一种错误
    RESOLV_CACHE_UNSUPPORTED,  /* the cache can't handle that kind of queries */
                               /* or the answer buffer is too small */
    //查询过程没有问题，但是cache没有命中
    RESOLV_CACHE_NOTFOUND,     /* the cache doesn't know about this query */
    //查询过程没有问题，而且命中了
    RESOLV_CACHE_FOUND         /* the cache found the answer */
} ResolvCacheStatus;

/*
 * @netid：cache是基于网卡保存的
 * @query&querylen：查询报文和查询报文长度
 * @answer&answersize：响应报文和响应报文长度
 * @ret: cache查询结果
 */
ResolvCacheStatus _resolv_cache_lookup( unsigned              netid,
                      const void*           query,
                      int                   querylen,
                      void*                 answer,
                      int                   answersize,
                      int                  *answerlen )
{
    Entry      key[1];
    Entry**    lookup;
    Entry*     e;
    time_t     now;
    Cache*     cache;

    ResolvCacheStatus  result = RESOLV_CACHE_NOTFOUND;

    XLOG("%s: lookup", __FUNCTION__);
    XLOG_QUERY(query, querylen);

    //下面几个步骤和前面_resolv_cache_add()一样
    if (!entry_init_key(key, query, querylen)) {
        XLOG("%s: unsupported query", __FUNCTION__);
        return RESOLV_CACHE_UNSUPPORTED;
    }

    pthread_once(&_res_cache_once, _res_cache_init);
    pthread_mutex_lock(&_res_cache_list_lock);

    cache = _find_named_cache_locked(netid);
    if (cache == NULL) {
        result = RESOLV_CACHE_UNSUPPORTED;
        goto Exit;
    }

    /* see the description of _lookup_p to understand this.
     * the function always return a non-NULL pointer.
     */
    lookup = _cache_lookup_p(cache, key);
    e      = *lookup;

    //cache中没有待查询的请求，下面这段逻辑很重要，会影响本次查询到底会不会真的发起
    if (e == NULL) {
        XLOG( "NOT IN CACHE");
        // calling thread will wait if an outstanding request is found
        // that matching this query
        //返回0，表示没有请求发出，这时直接返回，这种情况下会项DNS服务器发起查询请求
        //返回1，表示是阻塞返回
        if (!_cache_check_pending_request_locked(&cache, key, netid) || cache == NULL) {
            goto Exit;
        } else {
            //阻塞返回，重新查询cache表，因为查询结果可能已经加入到了cache中了，
            //见_cache_check_pending_request_locked
            lookup = _cache_lookup_p(cache, key);
            e = *lookup;
            if (e == NULL) {
                goto Exit;
            }
        }
    }

    //到这里，说明是阻塞调用返回的，而且响应结果不是自己查询出来的。由于中间因为调度等因素，
    //查询结果有可能已经无效了，所以这里需要判断查询结果是否还在有效期内
    now = _time_now();
    //查询结果无效，返回没有查询到结果,这种情况下也会向DNS服务器发起查询请求
    if (now >= e->expires) {
        XLOG( " NOT IN CACHE (STALE ENTRY %p DISCARDED)", *lookup );
        XLOG_QUERY(e->query, e->querylen);
        _cache_remove_p(cache, lookup);
        goto Exit;
    }
    //ok，到这里说明cache中的结果没问题，开始组织查询结果

    //提供的接收缓冲区过小，返回错误
    *answerlen = e->answerlen;
    if (e->answerlen > answersize) {
        /* NOTE: we return UNSUPPORTED if the answer buffer is too short */
        result = RESOLV_CACHE_UNSUPPORTED;
        XLOG(" ANSWER TOO LONG");
        goto Exit;
    }
    //都ok，拷贝响应报文到调用者提供的缓存中
    memcpy( answer, e->answer, e->answerlen );

    //由于该cache项被访问了，所以需要将其更新到MRU链表的首部，表示该cache项是被最新的，
    //这样可避免该cache项被_cache_remove_oldest()删除
    /* bump up this entry to the top of the MRU list */
    if (e != cache->mru_list.mru_next) {
        entry_mru_remove( e );
        entry_mru_add( e, &cache->mru_list );
    }
    //返回查询成功
    XLOG( "FOUND IN CACHE entry=%p", e );
    result = RESOLV_CACHE_FOUND;

Exit:
    pthread_mutex_unlock(&_res_cache_list_lock);
    return result;
}

/*
 * Return 0 if no pending request is found matching the key.
 * If a matching request is found the calling thread will wait until
 * the matching request completes, then update *cache and return 1.
 */
//从上面的注释中可以看出该函数的作用
static int _cache_check_pending_request_locked( struct resolv_cache** cache, Entry* key, unsigned netid )
{
    struct pending_req_info *ri, *prev;
    int exist = 0;

    if (*cache && key) {
    	//检查pending_request，寻找看下是否有与查询报文hash值一样的结点
        //hash值是基于查询报文内容算出来的，所以hash值相等意味着两次查询请求完全相同
        ri = (*cache)->pending_requests.next;
        prev = &(*cache)->pending_requests;
        while (ri) {
            if (ri->hash == key->hash) {
                exist = 1;
                break;
            }
            prev = ri;
            ri = ri->next;
        }
        //如果没有找到，说明没有挂起的请求，那么创建一个请求，然后将其加入到pending_request列表中
        if (!exist) {
            ri = calloc(1, sizeof(struct pending_req_info));
            if (ri) {
                ri->hash = key->hash;
                pthread_cond_init(&ri->cond, NULL);
                prev->next = ri;
            }
        } else {
            //如果找到了，说明之前已经有相同请求发出去了，没有必要同时发起两次相同的请求，
            //所以block当前线程，使其阻塞等待前面的查询结果
            struct timespec ts = {0,0};
            XLOG("Waiting for previous request");
            //最多等待20s，该值超过了配置的DNS请求超时时间，应该是足够了
            ts.tv_sec = _time_now() + PENDING_REQUEST_TIMEOUT;
            //调用线程会阻塞到这里
            pthread_cond_timedwait(&ri->cond, &_res_cache_list_lock, &ts);
            /* Must update *cache as it could have been deleted. */
            //等待期间，网卡可能已经被销毁了，这时其cache表也被释放了，所以这里需要重新查询下
            *cache = _find_named_cache_locked(netid);
        }
    }
    //返回值表示是否已经有相同的请求被发送出去了
    return exist;
}

4. 查询失败时缓存相关处理

从上面的cache查询中，可以看出有些请求是会加入到pending_request中并阻塞等待的，所以如果在res_nsend()中发起了一次DNS查询，但是查询失败了，那么必须将查询失败的结果也告诉缓存机制，缓存机制需要将这些继续等待的线程唤醒。这个过程是通过调用_resolv_cache_query_failed()实现的。

/* notify the cache that the query failed */
void _resolv_cache_query_failed( unsigned netid, const void* query, int querylen)
{
    Entry    key[1];
    Cache*   cache;

    if (!entry_init_key(key, query, querylen))
        return;

    pthread_mutex_lock(&_res_cache_list_lock);

    cache = _find_named_cache_locked(netid);

    if (cache) {
    	//前面的步骤已经很熟悉了，重点看这一步
        _cache_notify_waiting_tid_locked(cache, key);
    }

    pthread_mutex_unlock(&_res_cache_list_lock);
}

/* notify any waiting thread that waiting on a request
 * matching the key has been added to the cache */
static void _cache_notify_waiting_tid_locked( struct resolv_cache* cache, Entry* key )
{
    struct pending_req_info *ri, *prev;

    if (cache && key) {
        ri = cache->pending_requests.next;
        prev = &cache->pending_requests;
        while (ri) {
            //向所有等待本次查询结果的线程发送广播，唤醒这些阻塞的线程
            if (ri->hash == key->hash) {
                pthread_cond_broadcast(&ri->cond);
                break;
            }
            prev = ri;
            ri = ri->next;
        }

        // remove item from list and destroy
        if (ri) {
            prev->next = ri->next;
            pthread_cond_destroy(&ri->cond);
            free(ri);
        }
    }
}

5. 其它

5.1 _cache_lookup_p()

前面多次用到该函数，该函数的作用是从Cache表(cache参数指定)中寻找是否有指定的缓存项(key参数指定)。

/* This function tries to find a key within the hash table
 * In case of success, it will return a *pointer* to the hashed key.
 * In case of failure, it will return a *pointer* to NULL
 *
 * So, the caller must check '*result' to check for success/failure.
 *
 * The main idea is that the result can later be used directly in
 * calls to _resolv_cache_add or _resolv_cache_remove as the 'lookup'
 * parameter. This makes the code simpler and avoids re-searching
 * for the key position in the htable.
 *
 * The result of a lookup_p is only valid until you alter the hash
 * table.
 */
//见注释，如果找到key，那么返回指向缓存项的指针的地址；如果没有找到，那么返回指向NULL的指针
//也就是说，调用者应该判断*ret，ret为返回值
static Entry** _cache_lookup_p( Cache* cache, Entry* key )
{
    //哈希算法也非常简单，就是求余
    int      index = key->hash % cache->max_entries;
    Entry**  pnode = (Entry**) &cache->entries[ index ];

    //遍历冲突链
    while (*pnode != NULL) {
        Entry*  node = *pnode;

        if (node == NULL)
            break;
        //hash值要一致；查询报文要一致，关于查询报文的比较不再赘述，关心的可以继续往下跟
        if (node->hash == key->hash && entry_equals(node, key))
            break;

        pnode = &node->hlink;
    }
    return pnode;
}

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。

上一篇：java 遍历循环两个日期 java foreach循环遍历二维数组

下一篇：Android 读取 raw android 读取大文件内存不足

提问和评论都可以，用心的回复会被更多人看到评论

发布评论

相关文章

官方博客	全部文章	热门标签	班级博客
了解我们	网站地图	意见反馈

鸿蒙开发者社区	51CTO学堂
51CTO	软考资讯