首先我们来看一下Python中的PyStringObject和PySting_Type,PyStringObject.h对象的定义如下:
typedef struct {
PyObject_VAR_HEAD
long ob_shash;
int ob_sstate;
char ob_sval[1];
/* Invariants:
* ob_sval contains space for 'ob_size+1' elements.
* ob_sval[ob_size] == 0.
* ob_shash is the hash of the string or -1 if not computed yet.
* ob_sstate != 0 iff the string object is in stringobject.c's
* 'interned' dictionary; in this case the two references
* from 'interned' to this object are *not counted* in ob_refcnt.
*/
} PyStringObject;PyObject_VAR_HEAD中有个ob_size变量,字符串的长度由ob_size决定,ob_size决定了这段内存的实际长度(字节),这个机制是Python中所有变长对象的实现机制。Python中字符串都是以结尾。ob_shash的初始值是-1,这个在Python的dict中有非常巨大的作用。hash值得算法如下:
static long
string_hash(PyStringObject *a)
{
register Py_ssize_t len;
register unsigned char *p;
register long x;
#ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized);
#endif
if (a->ob_shash != -1)
return a->ob_shash;
len = Py_SIZE(a);
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
*/
if (len == 0) {
a->ob_shash = 0;
return 0;
}
p = (unsigned char *) a->ob_sval;
x = _Py_HashSecret.prefix;
x ^= *p << 7;
while (--len >= 0)
x = (1000003*x) ^ *p++;
x ^= Py_SIZE(a);
x ^= _Py_HashSecret.suffix;
if (x == -1)
x = -2;
a->ob_shash = x;
return x;
}字符串的预存hash值和intern机制使得Python的执行效率提升了20%左右。再看一下PyString_Type,代码如下:
PyTypeObject PyString_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"str",
PyStringObject_SIZE,
sizeof(char),
string_dealloc, /* tp_dealloc */
(printfunc)string_print, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
string_repr, /* tp_repr */
&string_as_number, /* tp_as_number */
&string_as_sequence, /* tp_as_sequence */
&string_as_mapping, /* tp_as_mapping */
(hashfunc)string_hash, /* tp_hash */
0, /* tp_call */
string_str, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
&string_as_buffer, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
string_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
(richcmpfunc)string_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
string_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
&PyBaseString_Type, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
string_new, /* tp_new */
PyObject_Del, /* tp_free */
};从上面的代码我们不难看出,String对象的tp_as_number、tp_as_sequence、tp_as_mapping三个域都被设置了,这说明了PyStringObject对数值操作
、序列操作和映射操作都支持。接下来我们看一下图片:

















