python cookbook中文 python cookbook3

转载

岁月静好呀 2024-02-26 17:00:25

文章标签 python cookbook中文 python 数据结构字段 User 文章分类 Python 后端开发

python cookbook3第一章

序列中出现次数最多的元素
通过某个关键字排序一个字典列表
排序不支持原生比较的对象
通过某个字段将记录分组
过滤序列元素
从字典中提取子集

序列中出现次数最多的元素

标准答案应该是 collections.Counter 类，它甚至有一个有用的 most_common() 方法直接给了你答案。

words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)
# 出现频率最高的3个单词
top_three = word_counts.most_common(3)
print(top_three)
# Outputs [('eyes', 8), ('the', 5), ('look', 4)]

作为输入， Counter对象可以接受任意的由可哈希（hashable）元素构成的序列对象。在底层实现上，一个 Counter对象就是一个字典，将元素映射到它出现的次数上。下面随便贴一点源码：

def _count_elements(mapping, iterable):
    'Tally elements from the iterable.'
    mapping_get = mapping.get
    for elem in iterable:
        mapping[elem] = mapping_get(elem, 0) + 1

    def update(self, iterable=None, /, **kwds):
        '''Like dict.update() but add counts instead of replacing them.
        Source can be an iterable, a dictionary, or another Counter instance.
        >>> c = Counter('which')
        >>> c.update('witch')           # add elements from another iterable
        >>> d = Counter('watch')
        >>> c.update(d)                 # add elements from another counter
        >>> c['h']                      # four 'h' in which, witch, and watch
        4
        '''
        # The regular dict.update() operation makes no sense here because the
        # replace behavior results in the some of original untouched counts
        # being mixed-in with all of the other counts for a mismash that
        # doesn't have a straight-forward interpretation in most counting
        # contexts.  Instead, we implement straight-addition.  Both the inputs
        # and outputs are allowed to contain zero and negative counts.

        if iterable is not None:
            if isinstance(iterable, _collections_abc.Mapping):
                if self:
                    self_get = self.get
                    for elem, count in iterable.items():
                        self[elem] = count + self_get(elem, 0)
                else:
                    super(Counter, self).update(iterable) # fast path when counter is empty
            else:
                _count_elements(self, iterable)
        if kwds:
            self.update(kwds)

update函数可以更新（增加个数）Counter对象，这里参数iterable可以是list或者dict，同时相应地定义了subtract函数做减少的操作。此外也定义了__add__和__sub__方法使得Counter可以直接加减（Counter('abbb') + Counter('bcc')），定义了__or__和__and__方式使得可以直接做集合操作（Counter('abbb') | Counter('bcc')），还有很多方法就不一一讲了，源码拿去。

通过某个关键字排序一个字典列表

排序不支持原生比较的对象

分别使用了operator模块里的itemgetter和attrgetter函数，拿到排序需要的信息，传递给sorted函数的关键字参数key，比如：

rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

from operator import itemgetter
rows_by_fname = sorted(rows, key=itemgetter('fname'))
print(rows_by_fname)

class User:
    def __init__(self, user_id):
        self.user_id = user_id

    def __repr__(self):
        return 'User({})'.format(self.user_id)

from operator import attrgetter
users = [User(23), User(3), User(99)]
rows_by_id = sorted(users, key=attrgetter('user_id'))
print(rows_by_id)

当然这两个函数还能同时允许多个字段进行比较，那时会返回包含多个目标元素值的元祖，并根据顺序比较然后排序。如果你不是很在乎运行速度的话，也能自己写个匿名函数lambda来代替。

源码里的一些魔法方法的实现：

class attrgetter:
    """
    Return a callable object that fetches the given attribute(s) from its operand.
    After f = attrgetter('name'), the call f(r) returns r.name.
    After g = attrgetter('name', 'date'), the call g(r) returns (r.name, r.date).
    After h = attrgetter('name.first', 'name.last'), the call h(r) returns
    (r.name.first, r.name.last).
    """
    __slots__ = ('_attrs', '_call')

    def __init__(self, attr, *attrs):
        if not attrs:
            if not isinstance(attr, str):
                raise TypeError('attribute name must be a string')
            self._attrs = (attr,)
            names = attr.split('.')
            def func(obj):
                for name in names:
                    obj = getattr(obj, name)
                return obj
            self._call = func
        else:
            self._attrs = (attr,) + attrs
            getters = tuple(map(attrgetter, self._attrs))
            def func(obj):
                return tuple(getter(obj) for getter in getters)
            self._call = func

    def __call__(self, obj):
        return self._call(obj)

    def __repr__(self):
        return '%s.%s(%s)' % (self.__class__.__module__,
                              self.__class__.__qualname__,
                              ', '.join(map(repr, self._attrs)))

    def __reduce__(self):
        return self.__class__, self._attrs

class itemgetter:
    """
    Return a callable object that fetches the given item(s) from its operand.
    After f = itemgetter(2), the call f(r) returns r[2].
    After g = itemgetter(2, 5, 3), the call g(r) returns (r[2], r[5], r[3])
    """
    __slots__ = ('_items', '_call')

    def __init__(self, item, *items):
        if not items:
            self._items = (item,)
            def func(obj):
                return obj[item]
            self._call = func
        else:
            self._items = items = (item,) + items
            def func(obj):
                return tuple(obj[i] for i in items)
            self._call = func

    def __call__(self, obj):
        return self._call(obj)

    def __repr__(self):
        return '%s.%s(%s)' % (self.__class__.__module__,
                              self.__class__.__name__,
                              ', '.join(map(repr, self._items)))

    def __reduce__(self):
        return self.__class__, self._items

通过某个字段将记录分组

如果你需要将这样的数据进行分组处理：

[(1, 'ame'), (2, 'xm'), (1, 'paparize'), (2, 'ori'), (3 ,'yang'), (1, 'sccc'), (3, 'old eleven')]

处理之后我可能需要得到这样的结果：

[(1, ['ame', 'paparize', 'sccc']), (2, ['xm', 'ori']), (3, ['yang', 'old eleven'])]

最实用的方法是itertools.groupby()函数：

from itertools import groupby
from operator import itemgetter

x = [(1, 'ame'), (2, 'xm'), (1, 'paparize'), (2, 'ori'), (3 ,'yang'), (1, 'sccc'), (3, 'old eleven')]

soooo = sorted(x, key=itemgetter(0))
p = groupby(soooo, key=itemgetter(0))
for i,l in p:
    print (i, [_[1] for _ in l])

在这里，如果你想按key字段分组，那么你首先得按该字段排序，否则达不到效果，因为源代码里是直接用next(iter(iterable))来遍历传入的可迭代容器的。返回值要注意，第一个是分组的key，第二个是一个包含所有分到该组的元素的迭代器。

具体源码在这：
https://docs.python.org/3/library/itertools.html#itertools.groupby

过滤序列元素

mylist = [1, 4, -5, 10, -7, 2, 3, -1]
print([n for n in mylist if n > 0]) # output [1, 4, 10, 2, 3]
print(list(n for n in mylist if n > 0))

不考虑内存占用的话就直接用列表推导式，否则就用生成器表达式。
过滤条件复杂的时候就需要用上filter()，将过滤条件写成一个函数，注意返回也是一个迭代器：

values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False
ivals = list(filter(is_int, values))
print(ivals)
# Outputs ['1', '2', '-3', '4', '5']

从字典中提取子集

和上一小节差不多，用字典推导式即可：

prices = {
    'yyf': 'Ti2',
    'Mu': 'Ti4',
    'Hao': 'Ti4',
    'blink': 'Ti6',
    'iceice': 'Ti6'
}
print({key: value for key, value in prices.items() if value == 'Ti4'})

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。