0.导语
vector 的数据安排以及操作方式,与 array 非常相似。两者的唯一差别在于空间的运用的灵活性,array 是静态的,一旦配置了就不能改变,而 vector 是动态空间,随着元素的加入,它的内部机制会自行扩充空间以容纳新元素。下面一起来看一下 vector 的"内部机制",怎么来实现空间配置策略的。
1.vector
在_Vector_base中开头有两行比较难理解,下面一个一个分析:
1.1 _Tp_alloc_type
开头处定义:
typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template rebind<_Tp>::other _Tp_alloc_type;
在__gnu_cxx::__alloc_traits中:对应文件为:ext/alloc_traits.h
template<typename _Tp> struct rebind { typedef typename _Base_type::template rebind_alloc<_Tp> other; };
等价于
typename __gnu_cxx::__alloc_traits<_Alloc>::template rebind<_Tp>::other
等价于:
typename _Base_type::template rebind_alloc<_Tp>
而_Base_type是:
typedef std::allocator_traits<_Alloc> _Base_type;
所以上述等价于:
typename std::allocator_traits<_Alloc>::template rebind_alloc<_Tp>
继续到allocator_traits中寻找
找到了:
template<typename _Up> using rebind_alloc = allocator<_Up>;
于是:
std::allocator_traits<_Alloc>::template rebind_alloc<_Tp>
等价于:
allocator<_Tp>
★小结
”
typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template rebind<_Tp>::other _Tp_alloc_type;
等价于:
typedef allocator<_Tp> _Tp_alloc_type
1.2 pointer
而pointer:
typedef typename __gnu_cxx::__alloc_traits<_Tp_alloc_type>::pointer pointer;
等价于:
typedef typename __gnu_cxx::__alloc_traits<allocator<_Tp>>::pointer pointer;
根据下面两行:
typedef std::allocator_traits<_Alloc> _Base_type; typedef typename _Base_type::pointer pointer;
又等价于:
typedef std::allocator_traits<_Alloc>::pointer pointer;
在allocator_traits中找到下面:
/** * @brief The allocator's pointer type. * * @c Alloc::pointer if that type exists, otherwise @c value_type* */ typedef __pointer pointer;
注释中说了如果存在就是Alloc::pointer,否则为value_type *。
★小结
”
typedef typename __gnu_cxx::__alloc_traits<_Tp_alloc_type>::pointer pointer; // 或者 typedef typename __gnu_cxx::__alloc_traits<allocator<_Tp>>::pointer pointer;
等价于
/** * @brief The allocator's pointer type. * * @c Alloc::pointer if that type exists, otherwise @c value_type* */ typedef __pointer pointer;
如果存在_Tp_alloc_type::pointer也就是allocator<_Tp>存在就是allocator<_Tp>::pointer,
这个看allocator.h源码:
typedef _Tp* pointer;
否则为value_type*。而value_type为:
typedef typename _Alloc::value_type value_type;
所以value_type*推导出为:
_Tp::value_type*
1.3 vector 的内存管理
_Vector_base中有一个内存管理器_Vector_impl类,该结构体继承allocator(根据上述 1.1 等价条件得出)。
template<typename _Tp, typename _Alloc> struct _Vector_base { //__alloc_traits -> allocator_traits // typedef allocator<_Tp> _Tp_alloc_type typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template rebind<_Tp>::other _Tp_alloc_type; // _Tp::value_type* or _Tp* typedef typename __gnu_cxx::__alloc_traits<_Tp_alloc_type>::pointer pointer; struct _Vector_impl : public _Tp_alloc_type { pointer _M_start; // 使用空间起始位置 pointer _M_finish; // 使用空间结束位置 pointer _M_end_of_storage; // 可使用空间结束位置 _Vector_impl() : _Tp_alloc_type(), _M_start(0), _M_finish(0), _M_end_of_storage(0) {} _Vector_impl(_Tp_alloc_type const &__a) // vector数据交换,只交换内存地址,不交换数据 void _M_swap_data(_Vector_impl &__x) _GLIBCXX_NOEXCEPT { std::swap(_M_start, __x._M_start); std::swap(_M_finish, __x._M_finish); std::swap(_M_end_of_storage, __x._M_end_of_storage); } }; public: typedef _Alloc allocator_type; // 前面我们知道_Vector_impl继承自allocator分配器,而这个又是_Tp_alloc_type,所以这里返回的就是_M_impl的基类。 _Tp_alloc_type & _M_get_Tp_allocator() _GLIBCXX_NOEXCEPT { return *static_cast<_Tp_alloc_type *>(&this->_M_impl); } const _Tp_alloc_type & _M_get_Tp_allocator() const _GLIBCXX_NOEXCEPT { return *static_cast<const _Tp_alloc_type *>(&this->_M_impl); } allocator_type // 获取传递进来的分配器 get_allocator() const _GLIBCXX_NOEXCEPT { return allocator_type(_M_get_Tp_allocator()); } _Vector_base() : _M_impl() {} _Vector_base(const allocator_type &__a) _GLIBCXX_NOEXCEPT : _M_impl(__a) {} _Vector_base(size_t __n) : _M_impl() { _M_create_storage(__n); } _Vector_base(size_t __n, const allocator_type &__a) : _M_impl(__a) { _M_create_storage(__n); } #if __cplusplus >= 201103L _Vector_base(_Tp_alloc_type&& __a) noexcept : _M_impl(std::move(__a)) { } // 移动构造函数,只交换3个指针,不copy数据 _Vector_base(_Vector_base&& __x) noexcept : _M_impl(std::move(__x._M_get_Tp_allocator())) { this->_M_impl._M_swap_data(__x._M_impl); } _Vector_base(_Vector_base&& __x, const allocator_type& __a) : _M_impl(__a) { if (__x.get_allocator() == __a) this->_M_impl._M_swap_data(__x._M_impl); else { size_t __n = __x._M_impl._M_finish - __x._M_impl._M_start; _M_create_storage(__n); } } #endif ~_Vector_base() _GLIBCXX_NOEXCEPT { _M_deallocate(this->_M_impl._M_start, this->_M_impl._M_end_of_storage - this->_M_impl._M_start); } public: _Vector_impl _M_impl; pointer _M_allocate(size_t __n) { typedef __gnu_cxx::__alloc_traits <_Tp_alloc_type> _Tr; return __n != 0 ? _Tr::allocate(_M_impl, __n) : 0; // 同_M_deallocate,一直往后调用的就是malloc函数 } void _M_deallocate(pointer __p, size_t __n) { typedef __gnu_cxx::__alloc_traits <_Tp_alloc_type> _Tr; if (__p) _Tr::deallocate(_M_impl, __p, __n); // 最后调用allocator_traits的deallocate,而该函数又是根据传递进来的_M_impl进行deallocate,一直往后,最后调用的就是free函数 } private: void _M_create_storage(size_t __n) { this->_M_impl._M_start = this->_M_allocate(__n); this->_M_impl._M_finish = this->_M_impl._M_start; this->_M_impl._M_end_of_storage = this->_M_impl._M_start + __n; } };
小结:_Vector_base专门负责vector的内存管理,内部类_M_impl通过继承_Tp_alloc_type(也就是 allocator)得到内存分配释放的功能,_M_allocate 和_M_deallocate 分别分配和释放 vector 所用内存,vector 只需要负责元素构造和析构。
在 vector 中,默认内存分配器为std::allocator<_Tp>
template<typename _Tp, typename _Alloc = std::allocator<_Tp>> class vector : protected _Vector_base<_Tp, _Alloc> { }
vector 代码中使用基类的内存函数及 typedef 等:
template<typename _Tp, typename _Alloc = std::allocator<_Tp>> class vector : protected _Vector_base<_Tp, _Alloc> { typedef _Vector_base<_Tp, _Alloc> _Base; typedef typename _Base::_Tp_alloc_type _Tp_alloc_type; public: typedef typename _Base::pointer pointer; protected: using _Base::_M_allocate; using _Base::_M_deallocate; using _Base::_M_impl; using _Base::_M_get_Tp_allocator; }
2.vector 迭代器
在 vector 中使用了两种迭代器,分别是正向__normal_iterator与反向迭代器reverse_iterator:
正向:
typedef __gnu_cxx::__normal_iterator <pointer, vector> iterator; typedef __gnu_cxx::__normal_iterator <const_pointer, vector> const_iterator;
反向:
typedef std::reverse_iterator<const_iterator> const_reverse_iterator; typedef std::reverse_iterator<iterator> reverse_iterator;
__normal_iterator与reverse_iterator都定义于 stl_iterator.h,封装了 vector 元素的指针。
2.1 正向
template<typename _Iterator, typename _Container> class __normal_iterator { protected: _Iterator _M_current; typedef iterator_traits<_Iterator> __traits_type; public: typedef _Iterator iterator_type; // iterator必须包含的五种typedef typedef typename __traits_type::iterator_category iterator_category; typedef typename __traits_type::value_type value_type; typedef typename __traits_type::difference_type difference_type; typedef typename __traits_type::reference reference; typedef typename __traits_type::pointer pointer; _GLIBCXX_CONSTEXPR __normal_iterator() _GLIBCXX_NOEXCEPT : _M_current(_Iterator()) { } explicit __normal_iterator(const _Iterator& __i) _GLIBCXX_NOEXCEPT : _M_current(__i) { } // Allow iterator to const_iterator conversion template<typename _Iter> __normal_iterator(const __normal_iterator<_Iter, typename __enable_if< (std::__are_same<_Iter, typename _Container::pointer>::__value), _Container>::__type>& __i) _GLIBCXX_NOEXCEPT : _M_current(__i.base()) { } // Forward iterator requirements reference operator*() const _GLIBCXX_NOEXCEPT { return *_M_current; } pointer operator->() const _GLIBCXX_NOEXCEPT { return _M_current; } // 前置++ __normal_iterator& operator++() _GLIBCXX_NOEXCEPT { ++_M_current; return *this; } // 后置++ __normal_iterator operator++(int) _GLIBCXX_NOEXCEPT { return __normal_iterator(_M_current++); } // 前置-- // Bidirectional iterator requirements __normal_iterator& operator--() _GLIBCXX_NOEXCEPT { --_M_current; return *this; } // 后置-- __normal_iterator operator--(int) _GLIBCXX_NOEXCEPT { return __normal_iterator(_M_current--); } // 随机访问迭代器都要重载[]操作符 // Random access iterator requirements reference operator[](difference_type __n "" "" "") const _GLIBCXX_NOEXCEPT { return _M_current[__n]; } // +=操作符 跳跃n个difference_type __normal_iterator& operator+=(difference_type __n) _GLIBCXX_NOEXCEPT { _M_current += __n; return *this; } // +操作符 跳跃n个difference_type __normal_iterator operator+(difference_type __n) const _GLIBCXX_NOEXCEPT { return __normal_iterator(_M_current + __n); } // -=操作符 后退n个difference_type __normal_iterator& operator-=(difference_type __n) _GLIBCXX_NOEXCEPT { _M_current -= __n; return *this; } // -操作符 后退n个difference_type __normal_iterator operator-(difference_type __n) const _GLIBCXX_NOEXCEPT { return __normal_iterator(_M_current - __n); } const _Iterator& base() const _GLIBCXX_NOEXCEPT { return _M_current; } };
_M_current 是指向迭代器位置的指针,这是一个随机访问型指针,operator+和 operator-等移动操作可以直接移动到目的地,非随机访问型指针只能一步步移动。
2.2 反向
vector 还会使用 reverse_iterator,即逆序迭代器,顾名思义,其移动方向与普通迭代器相反
template<typename _Iterator> class reverse_iterator : public iterator<typename iterator_traits<_Iterator>::iterator_category, typename iterator_traits<_Iterator>::value_type, typename iterator_traits<_Iterator>::difference_type, typename iterator_traits<_Iterator>::pointer, typename iterator_traits<_Iterator>::reference> { protected: _Iterator current; typedef iterator_traits<_Iterator> __traits_type; public: typedef _Iterator iterator_type; typedef typename __traits_type::difference_type difference_type; typedef typename __traits_type::pointer pointer; typedef typename __traits_type::reference reference; // 省略不重要的代码 // 该迭代器是从后面end()开始,需要往前一步,才可以获取到有效的迭代器位置 reference operator*() const { _Iterator __tmp = current; return *--__tmp; } // 通过调用上述*操作符直接实现 pointer operator->() const { return &(operator*()); } // 前置++操作符完成后退任务 reverse_iterator& operator++() { --current; return *this; } // 后置++ reverse_iterator operator++(int) { reverse_iterator __tmp = *this; --current; return __tmp; } // 前置--操作符完成前进任务 reverse_iterator& operator--() { ++current; return *this; } // 后置-- reverse_iterator operator--(int) { reverse_iterator __tmp = *this; ++current; return __tmp; } // +操作符 reverse_iterator operator+(difference_type __n) const { return reverse_iterator(current - __n); } // +=操作符 reverse_iterator& operator+=(difference_type __n) { current -= __n; return *this; } // -操作符 reverse_iterator operator-(difference_type __n) const { return reverse_iterator(current + __n); } // -=操作符 reverse_iterator& operator-=(difference_type __n) { current += __n; return *this; } // []操作符 reference operator[](difference_type __n "" "" "") const { return *(*this + __n); } };
3.vector 的数据结构
vector 内存由_M_impl 中的 M_start,_M_finish,_M_end_of_storage 三个指针管理,所有关于地址,容量大小等操作都需要用到这三个指针:
iterator begin() _GLIBCXX_NOEXCEPT { return iterator(this->_M_impl._M_start); } iterator end() _GLIBCXX_NOEXCEPT { return iterator(this->_M_impl._M_finish); } reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } reverse_iterator rend() noexcept { return reverse_iterator(begin()); } size_type size() const _GLIBCXX_NOEXCEPT { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); } size_type capacity() const _GLIBCXX_NOEXCEPT { return size_type(this->_M_impl._M_end_of_storage - this->_M_impl._M_start); } bool empty() const _GLIBCXX_NOEXCEPT { return begin() == end(); }
_M_finish 和_M_end_of_storage 之间的空间没有数据,有时候这是一种浪费,c++11 提供了一个很有用的函数 shrink_to_fit(),将这段未使用空间释放,主要调用了下面代码,
template<typename _Alloc> bool vector<bool, _Alloc>:: _M_shrink_to_fit() { if (capacity() - size() < int(_S_word_bit)) // 64位系统为64bytes return false; __try { _M_reallocate(size()); return true; } __catch(...) { return false; } }
template<typename _Alloc> void vector<bool, _Alloc>:: _M_reallocate(size_type __n) { _Bit_type* __q = this->_M_allocate(__n); this->_M_impl._M_finish = _M_copy_aligned(begin(), end(), iterator(__q, 0)); this->_M_deallocate(); this->_M_impl._M_start = iterator(__q, 0); this->_M_impl._M_end_of_storage = __q + _S_nword(__n); }
而_M_copy_aligned通过两个 std::copy 实现:
第一次 swap 把__first的指针与__last的指针之间的数据拷贝到__result指针所指向的起始位置。第二次 swap 获得__last的指针对应的迭代器。
iterator _M_copy_aligned(const_iterator __first, const_iterator __last, iterator __result) { // _Bit_type * _M_p; _Bit_type为unsigned long类型 _Bit_type* __q = std::copy(__first._M_p, __last._M_p, __result._M_p); return std::copy(const_iterator(__last._M_p, 0), __last, iterator(__q, 0)); }
先分配 size()大小的内存空间,用于存储begin()与end()之间的数据,释放原来的 vector 空间,新的 vector 只包含 size()数量的数据,并修改_M_start与_M_end_of_storage指向。
4.vector 构造与析构
//使用默认内存分配器 vector() : _Base() { } //指定内存分配器 explicit vector(const allocator_type& __a) : _Base(__a) { } //初始化为n个__value值,如果没指定就使用该类型默认值 explicit vector(size_type __n, const value_type& __value = value_type(), const allocator_type& __a = allocator_type()): _Base(__n, __a) { _M_fill_initialize(__n, __value); } //拷贝构造函数 vector(const vector& __x) : _Base(__x.size(), _Alloc_traits::_S_select_on_copy(__x._M_get_Tp_allocator())) { this->_M_impl._M_finish = std::__uninitialized_copy_a(__x.begin(), __x.end(), this->_M_impl._M_start, _M_get_Tp_allocator()); } //c++11的移动构造函数,获取__x的M_start,_M_finish,_M_end_of_storage,并不需要数据拷贝 vector(vector&& ) noexcept : _Base(std::move(__x)) { } //从list中拷贝数据,也是c++11才有的 vector(initializer_list<value_type> __l, const allocator_type& __a = allocator_type()) : _Base(__a) { _M_range_initialize(__l.begin(), __l.end(), random_access_iterator_tag()); } //支持vector使用两个迭代器范围内的值初始化,除了stl的迭代器,也可以是数组地址 template<typename _InputIterator, typename = std::_RequireInputIter<_InputIterator>> vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a = allocator_type()) : _Base(__a) { _M_initialize_dispatch(__first, __last, __false_type()); } //只析构所有元素,释放内存由vector_base完成 ~vector() _GLIBCXX_NOEXCEPT { std::_Destroy(this->_M_impl._M_start, this->_M_impl._M_finish,_M_get_Tp_allocator()); }
5.vector
插入涉及到内存分配,动态调整,与一开始提到的 vector 与 array 区别,就在下面体现出:
typename vector<_Tp, _Alloc>::iterator vector<_Tp, _Alloc>::insert(iterator __position, const value_type& __x) { const size_type __n = __position – begin(); //插入到最后一个位置,相当于push_back if (this->_M_impl._M_finish != this->_M_impl._M_end_of_storage && __position == end()) { _Alloc_traits::construct(this->_M_impl, this->_M_impl._M_finish, __x); ++this->_M_impl._M_finish; } else { _M_insert_aux(__position, __x); } return iterator(this->_M_impl._M_start + __n); }
其中_M_insert_aux实现:
template<typename _Tp, typename _Alloc> void vector<_Tp, _Alloc>::_M_insert_aux(iterator __position, const _Tp& __x) { //内存空间足够 if (this->_M_impl._M_finish != this->_M_impl._M_end_of_storage) { _Alloc_traits::construct(this->_M_impl, this->_M_impl._M_finish, _GLIBCXX_MOVE(*(this->_M_impl._M_finish - 1))); ++this->_M_impl._M_finish; //__position后的元素依次向后移动一个位置 _GLIBCXX_MOVE_BACKWARD3(__position.base(), this->_M_impl._M_finish - 2, this->_M_impl._M_finish – 1); //目标地址赋值 *__position = _Tp(std::forward<_Args>(__args)...); } else { //内存加倍 const size_type __len = _M_check_len(size_type(1), "vector::_M_insert_aux"); const size_type __elems_before = __position - begin(); pointer __new_start(this->_M_allocate(__len)); pointer __new_finish(__new_start); __try { //给position位置赋值 _Alloc_traits::construct(this->_M_impl, __new_start + __elems_before, std::forward<_Args>(__args)...); __x); __new_finish = 0; //拷贝position位置前元素 __new_finish = std::__uninitialized_move_if_noexcept_a (this->_M_impl._M_start, __position.base(), __new_start, _M_get_Tp_allocator()); ++__new_finish; //拷贝position位置后元素 __new_finish = std::__uninitialized_move_if_noexcept_a (__position.base(), this->_M_impl._M_finish, __new_finish, _M_get_Tp_allocator()); } __catch(...) { if (!__new_finish) _Alloc_traits::destroy(this->_M_impl, __new_start + __elems_before); else std::_Destroy(__new_start, __new_finish, _M_get_Tp_allocator()); _M_deallocate(__new_start, __len); __throw_exception_again; } //析构原vector所有元素 std::_Destroy(this->_M_impl._M_start, this->_M_impl._M_finish, _M_get_Tp_allocator()); //释放原vector内存空间 _M_deallocate(this->_M_impl._M_start, this->_M_impl._M_end_of_storage,this->_M_impl._M_start); //vector内存地址指向新空间 this->_M_impl._M_start = __new_start; this->_M_impl._M_finish = __new_finish; this->_M_impl._M_end_of_storage = __new_start + __len; } }
其中_M_check_len:
size_type _M_check_len(size_type __n, const char* __s) const { if (max_size() - size() < __n) __throw_length_error(__N(__s)); const size_type __len = size() + std::max(size(), __n); //如果n小于当前size,内存加倍,否则内存增长n。 return (__len < size() || __len > max_size()) ? max_size() : __len; }
内存分配策略并不是简单的加倍,如果 n 小于当前 size,内存加倍,否则内存增长 n。
学习资料:
★侯捷《STL 源码剖析》
”
”