前言:

正文:

本文档基于Raymond Hettinger 2013年在Pycon US的演讲. 文档中的代码是基于Python2, 对于Python3改动的地方做了注释.

YouTube传送门:video

PPT传送门:slides

使用xrange(py2)/ range(py3)

for i in [0, 1, 2, 3, 4, 5]:
print i**2
for i in range(6):
print i**2
Better
for i in xrange(6):
print i**2
xrange 会产生一个生成器, 与range相比更加节省内存空间.
xrange 在python3中重命名为range.
遍历collection
colors = ['red', 'green', 'blue', 'yellow']
for i in range(len(colors)):
print colors[i]
Better
for color in colors:
print color
反向遍历collection
colors = ['red', 'green', 'blue', 'yellow']
for i in range(len(colors)-1, -1, -1):
print colors[i]
Better
for color in reversed(colors):
print color
遍历collection中的元素与索引
colors = ['red', 'green', 'blue', 'yellow']
for i in range(len(colors)):
print i, '--->', colors[i]
Better
for i, color in enumerate(colors):
print i, '--->', color
同时遍历多个collection
names = ['raymond', 'rachel', 'matthew']
colors = ['red', 'green', 'blue', 'yellow']
n = min(len(names), len(colors))
for i in range(n):
print names[i], '--->', colors[i]
for name, color in zip(names, colors):
print name, '--->', color
Better
for name, color in izip(names, colors):
print name, '--->', color
zip 会生成一个新的列表, 会使用更多的内存.
izip 会生成一个生成器, 节省内存.
注: 在 python 3 izip 重命名为 zip.
遍历并排序collection
colors = ['red', 'green', 'blue', 'yellow']
# Forward sorted order
for color in sorted(colors):
print colors
# Backwards sorted order
for color in sorted(colors, reverse=True):
print colors
自定义排序键
colors = ['red', 'green', 'blue', 'yellow']
def compare_length(c1, c2):
if len(c1) < len(c2): return -1
if len(c1) > len(c2): return 1
return 0
print sorted(colors, cmp=compare_length)
Better
print sorted(colors, key=len)
comparison functions 在Python3中已经以及被取消了.
使用iter()连续调用函数
blocks = []
while True:
block = f.read(32)
if block == '':
break
blocks.append(block)
Better
blocks = []
for block in iter(partial(f.read, 32), ''):
blocks.append(block)
iter 接受两个参数时. 第一个参数是一个可调用对象(函数), 第二个参数是边界值, 当可调用对象返回这个值时, 就会抛出StopIteration
使用for/else
def find(seq, target):
found = False
for i, value in enumerate(seq):
if value == target:
found = True
break
if not found:
return -1
return i
Better
def find(seq, target):
for i, value in enumerate(seq):
if value == target:
break
else:
return -1
return i
注: 在这里, Raymond 建议把else理解为 no break
遍历字典中的键
d = {'matthew': 'blue', 'rachel': 'green', 'raymond': 'red'}
for k in d:
print k
for k in d.keys():
if k.startswith('r'):
del d[k]
注: 不可以边遍历边修改字典, 当你需要修改字典中的数据时, 你应该使用第二种方法. 第二种方法把字典中的键单独提取出来, 并非遍历字典.
注: 在Python3 中, 上述代码会抛出:
RuntimeError: dictionary changed size during iteration
需要把字典中的键拷贝一份才可以.
同时遍历字典中的键值对
# Not very fast, has to re-hash every key and do a lookup
for k in d:
print k, '--->', d[k]
# Makes a big huge list
for k, v in d.items():
print k, '--->', v
Better
for k, v in d.iteritems():
print k, '--->', v
iteritems() 返回一个生成器 注: 在Python3中, 使用items()可以达到同样的效果.
使用键值对生成字典
names = ['raymond', 'rachel', 'matthew']
colors = ['red', 'green', 'blue']
d = dict(izip(names, colors))
# {'matthew': 'blue', 'rachel': 'green', 'raymond': 'red'}
注: python 3: d = dict(zip(names, colors))
使用字典进行计数
colors = ['red', 'green', 'red', 'blue', 'green', 'red']
# 适合新手的计数方法
d = {}
for color in colors:
if color not in d:
d[color] = 0
d[color] += 1
# {'blue': 1, 'green': 2, 'red': 3}
Better
d = {}
for color in colors:
d[color] = d.get(color, 0) + 1
d = defaultdict(int)
for color in colors:
d[color] += 1
使用字典进行分组
names = ['raymond', 'rachel', 'matthew', 'roger',
'betty', 'melissa', 'judith', 'charlie']
# In this example, we're grouping by name length
d = {}
for name in names:
key = len(name)
if key not in d:
d[key] = []
d[key].append(name)
# {5: ['roger', 'betty'], 6: ['rachel', 'judith'], 7: ['raymond', 'matthew', 'melissa', 'charlie']}
d = {}
for name in names:
key = len(name)
d.setdefault(key, []).append(name)
Better
d = defaultdict(list)
for name in names:
key = len(name)
d[key].append(name)
popitem() 是原子操作
d = {'matthew': 'blue', 'rachel': 'green', 'raymond': 'red'}
while d:
key, value = d.popitem()
print key, '-->', value
popitem 是原子操作, 在多线程编程时无需加锁.
连接多个字典
defaults = {'color': 'red', 'user': 'guest'}
parser = argparse.ArgumentParser()
parser.add_argument('-u', '--user')
parser.add_argument('-c', '--color')
namespace = parser.parse_args([])
command_line_args = {k:v for k, v in vars(namespace).items() if v}
# The common approach below allows you to use defaults at first, then override them
# with environment variables and then finally override them with command line arguments.
# It copies data like crazy, unfortunately.
d = defaults.copy()
d.update(os.environ)
d.update(command_line_args)
Better
d = ChainMap(command_line_args, os.environ, defaults)
ChainMap 在python3引进.
Improving ClarityPositional arguments and indicies are nice
Keywords and names are better
The first way is convenient for the computer
The second corresponds to how human’s think
使用关键词参数提高程序可读性
twitter_search('@obama', False, 20, True)
Better
twitter_search('@obama', retweets=False, numtweets=20, popular=True)
使用命名元组返回更具可读性的结果
# Old testmod return value
doctest.testmod()
# (0, 4)
# Is this good or bad? You don't know because it's not clear.
Better
# New testmod return value, a namedTuple
doctest.testmod()
# TestResults(failed=0, attempted=4)
To make a namedTuple:
TestResults = namedTuple('TestResults', ['failed', 'attempted'])
解包
p = 'Raymond', 'Hettinger', 0x30, 'python@example.com'
# A common approach / habit from other languages
fname = p[0]
lname = p[1]
age = p[2]
email = p[3]
Better
fname, lname, age, email = p
The second approach uses tuple unpacking and is faster and more readable.
解包多个变量
def fibonacci(n):
x = 0
y = 1
for i in range(n):
print x
t = y
y = x + y
x = t
Better
def fibonacci(n):
x, y = 0, 1
for i in range(n):
print x
x, y = y, x + y
使用原子操作更新变量
tmp_x = x + dx * t
tmp_y = y + dy * t
tmp_dx = influence(m, x, y, dx, dy, partial='x')
tmp_dy = influence(m, x, y, dx, dy, partial='y')
x = tmp_x
y = tmp_y
dx = tmp_dx
dy = tmp_dy
Better
x, y, dx, dy = (x + dx * t,
y + dy * t,
influence(m, x, y, dx, dy, partial='x'),
influence(m, x, y, dx, dy, partial='y'))
效率问题总的来说, 不要产生不必要的数据.
拼接字符串
names = ['raymond', 'rachel', 'matthew', 'roger',
'betty', 'melissa', 'judith', 'charlie']
s = names[0]
for name in names[1:]:
s += ', ' + name
print s
Better
print ', '.join(names)
使用合适的数据结构
names = ['raymond', 'rachel', 'matthew', 'roger',
'betty', 'melissa', 'judith', 'charlie']
del names[0]
# The below are signs you're using the wrong data structure
names.pop(0)
names.insert(0, 'mark')
Better
names = deque(['raymond', 'rachel', 'matthew', 'roger',
'betty', 'melissa', 'judith', 'charlie'])
# More efficient with deque
del names[0]
names.popleft()
names.appendleft('mark')
装饰器和上下文管理器
使用装饰器代替管理操作
# Mixes business / administrative logic and is not reusable
def web_lookup(url, saved={}):
if url in saved:
return saved[url]
page = urllib.urlopen(url).read()
saved[url] = page
return page
Better
@cache
def web_lookup(url):
return urllib.urlopen(url).read()
注: python 3.2以后, 使用functools.lru_cache.
Factor-out temporary contexts
# Saving the old, restoring the new
old_context = getcontext().copy()
getcontext().prec = 50
print Decimal(355) / Decimal(113)
setcontext(old_context)
Better
with localcontext(Context(prec=50)):
print Decimal(355) / Decimal(113)
打开以及关闭文件
f = open('data.txt')
try:
data = f.read()
finally:
f.close()
Better
with open('data.txt') as f:
data = f.read()
如何使用锁
# Make a lock
lock = threading.Lock()
# Old-way to use a lock
lock.acquire()
try:
print 'Critical section 1'
print 'Critical section 2'
finally:
lock.release()
Better
# New-way to use a lock
with lock:
print 'Critical section 1'
print 'Critical section 2'
使用ignored() 代替 pass exception
try:
os.remove('somefile.tmp')
except OSError:
pass
Better
with ignored(OSError):
os.remove('somefile.tmp')
使用上下文管理器减少临时变量
# Temporarily redirect standard out to a file and then return it to normal
with open('help.txt', 'w') as f:
oldstdout = sys.stdout
sys.stdout = f
try:
help(pow)
finally:
sys.stdout = oldstdout
Better
with open('help.txt', 'w') as f:
with redirect_stdout(f):
help(pow)
编写自己的redirect_stdout上下文管理器
@contextmanager
def redirect_stdout(fileobj):
oldstdout = sys.stdout
sys.stdout = fileobj
try:
yield fieldobj
finally:
sys.stdout = oldstdout
列表解析器以及生成器表达式
result = []
for i in range(10):
s = i ** 2
result.append(s)
print sum(result)
Better
print sum([i**2 for i in xrange(10)])
print sum(i**2 for i in xrange(10))