译自: http://www.fantascienza.net/leonardo/ar/python_best_practices.html


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


有时候,甚至优秀的程序员在他们首次尝试用Python时,会很少使用最佳方案和数据结构。在过去数年中,Python也逐渐累积了一些诸如:冗余,缺点等瑕疵(它们中的一部分会从Python3.0中移除,这篇文章主要基于Python 2.5,不针对之后的版本,比如说Python 2.6,同样不针对Jython 或 PyPy 或 IronPython 它们之间有点微妙的差异),但是总而言之,Python依然是非常干净、简洁的语言,本文也像这样,目的是为了让你避免程序开发实践中最可能碰到的误区。本文同样简短,你甚至能发现在线解释。


当然啦,我也许可能是错误的,但该页(文)来自一些练习,因此当你不同意我观点的时候,我建议你通过邮件组或者网页去查找答案,而不是想当然的自以为是。如果你的确发现我错误,或者有任何建议,看法。请通过邮件练习我们,我们会非常高兴,并改正错误.


差的代码格式

好的代码格式

x=5

if ( (x==8) and (y>5) ) : ...

1<<5&2

return(5);

while (x<5) : ...

7.

x = 5

if x == 8 and y > 5: ...

(1 << 5) & 2 

return 5

while x < 5: ...

7.0

print x,x*x+1

v[i + 1 + a] + v[i + 2 + b]

# 有时候规则可以打破,比如要展示组的时候,可以下面这样

print x, x*x + 1 

v[i+a+1] + v[i+b+2]

def Function ( x ): ...

def function(x): ...

class fooclass: ...

class Fooclass(object): ...

d = dict()

freqs = {}

# 变量最好用描述性名称

# 小范围来说,短的变量名业务不错的选择。

list = [1, 2, 3] 

dict = {'alpha': 0x1234}

sum = x + y

# 不要用内置的保留字来做变量名

values = [1, 2, 3]

symbol_address = {'alpha': 0x1234}

tot = x + y


"some string" and 'some string' and

"""some string""" and '''some string'''

# 上述的格式是一样,一样,一样的

mapping = { 5 :"5", 6:"6" }

mapping = {5: "5", 6: "6"}

mapping = {5 : "5", 6 : "6"}

if mapping.has_key(6): ...

mapping = {5: "5", 6: "6"}

if 6 in mapping: ...

def function( x, l = [] ): ...

# 通常不用可变对象,比如列表来做默认值

def function(x, items=None): ...

    if items is None:

        items = []

if x == None: ...

if x is None: ...

x = 1

if z > 5:

  var1 = 55

# 总是用4个空格作为缩进

# (或者总是用TAB键作为缩进,但这样同样不好)

x = 1

if z > 5:

    var1 = 55

mapping = {5 : "5", 6 : "6"}

for key, val in mapping.items(): ...

for key in mapping.keys(): ...

# 尽可能使用iter*开头的迭代方法

mapping = {5: "5", 6: "6"}

for key, val in mapping.iteritems(): ...

for key in mapping: ...

for i in range(10, 20000): ...

for i in xrange(10, 20000): ...


# Use to denote the code that has to

# run when a module is executed and not

# imported:

if __name__ == '__main__':


# Python profiler: 

python -m profile -o stats myscript.py

>>> import pstats

>>> p = pstats.Stats('stats')

>>> p.sort_stats('time').print_stats(15)


对于含非ASCII编码的字符,在首行添加:

# -*- coding: UTF-8 -*-

# 或者如果你电脑内存不足,就干脆使用latin编码

# coding: latin

al = [1, 2, 3]

for i in xrange(len(al)-1, -1, -1):

    del al[i]

items = [1, 2, 3]

del items[:]

# 如果程序速度不是最重要的话,可以仅仅新增加以个空列表:

items = []

# 如果你只想移除一个列表的引用值时,可以使用:

del items

repeat

    xxx

until yyy

# 等价于:

while True

    xxx

    if yyy: break


# 增加一个包含所需模块压缩文件到搜索路径中

sys.path.append("some.zip")

a = 5

b = 6

aux = a

a = b

b = aux

a = 5

b = 6

a, b = b, a # 两个变量交换值,无需中间变量

if x < 10 and x > 2: ...

if 2 < x < 10: ...

a = 5

b = 5

c = 5

a = b = c = 5

if x == 1: y = fun1(x)

else if x == 2: y = fun2(x)

else if x == 3: y = fun3(x)

else: y = None

if x == 1: y = fun1(x)

elif x == 2: y = fun2(x)

elif x == 3: y = fun3(x)

else: y = None 

# 但又时候,使用字典无疑更好些:

funs = {1: fun1, 2: fun2, 3: fun3}

y = funs.get(x, lambda x:None)(x)

mapping = {5 : "5", 6 : "6"}

for key in mapping.iterkeys(): ...

mapping = {5: "5", 6: "6"}

for key in mapping: ...

al = [1, 2, 3]

for i in xrange(len(al)):

    print al[i]

al = [1, 2, 3]

for el in al:

    print el

al = [1, 2, 3]

for i in xrange(len(al)-1, -1, -1):

    print al[i]

al = [1, 2, 3]

for el in reversed(al):

    print el

class Test(object):

    def __init__(I, x): ...

class Test(object):

    def __init__(self, x): ...

# Compute the sum of the ...

def sum_of(x, y, z): ...

def sum_of(x, y, z): ...

    """Compute the sum of the ..."""

from operator import add

sl = ["ab", "cd", "ef"] 

all = ""

for s in sl:

    all += s

# Or:

sl = ["ab", "cd", "ef"]

all = reduce(lambda x,y: x+y, sl, "")

sl = ["ab", "cd", "ef"]

all = "".join(sl)

a = "this isn't a word, right?"

a = a.replace("'", " ")

a = a.replace(".", " ")

a = a.replace("?", " ")

a = a.replace(",", "")

# 用.来代替,效果及速度更快些

from string import maketrans

tab = maketrans("'.?", "   ")

a = "this isn't a word, right."

afilt = a.translate(tab, ",")

values = ["stop",0,0]

values = ["stop", 0, 0]

def mul(x, y): return x*y

l = [2, 3]

print apply(mul, l)

def mul(x, y):

    return x * y

l = [2, 3]

print mul(*l)

vals = [2, 3, -5, 0]

result = []

for el in vals:

    if el > 0:

        result.append(el * el)

vals = [2, 3, -5, 0]

result = [el * el for el in vals if el > 0]

l = [0] * 4

m = [l] * 4

m[1][1] = 5

print m

# 一个正确的做法是创建一个矩阵:

m = [[0] * 4 for _ in xrange(4)]

m[1][1] = 5

print m

a = 1

print a / 2, a / float(2)

# 一种可替代方案:

from __future__ import division

a = 1

print a // 2, a / 2

class Foo(object):

    def __init__(self, x, y, z):

        self.x_public = x

        self.y_private = y

        self.z_veryprivate = z

    def getx(self):

        return self.x_public

print Foo(1, 2, 3).getx()

# Generally getters and setters are not used.
# Instance names starting with _ are meant as
# 'to not mess with' by convention.
# Instance names starting with __ are private
# and receive name mangling.
class Foo(object):
    def __init__(self, x, y, z):
        self.x_public = x
        self._y_private = y
        self.__z_veryprivate = z
print Foo(1, 2, 3).x_public

finder = re.compile("^\s*([\[\]])\s*([-+]?\d+)

\s*,\s*([-+]?\d+)\s*([\[\]])\s*$")

finder = re.compile(r"""

    ^ \s*             # start at beginning+ opt spaces

    ( [\[\]] )        # Group 1: opening bracket

        \s*           # optional spaces

        ( [-+]? \d+ ) # Group 2: first number

        \s* , \s*     # opt spaces+ comma+ opt spaces

        ( [-+]? \d+ ) # Group 3: second number

        \s*           # opt spaces

    ( [\[\]] )        # Group 4: closing bracket

    \s* $             # opt spaces+ end at the end

    """, flags=re.VERBOSE)

# 上面的关于正则表达式代码就很容易读懂.

# 每行就像代码一样.


# 下面是另外一个不错的关于正则表达式排版方式:

spaces = r"\s*"            # optional spaces

number = r"( [-+]? \d+ )"  # Group

bracket = r"( [\[\]] )"    # Group. Closing bracket

parts = ["^", bracket, number, ",", number, bracket, "$"]

finder = re.compile(spaces.join(parts), flags=re.VERBOSE)

def function(data):

    """A comment"""

    ...implementation...

# 使用doctests模块(或者tests模块):

def function(data):

    """A comment

    >>> function()
    None
    >>> function(1)
    result1
    >>> function("a")
    Traceback (most recent call last):
      ...
    TypeError
    """
    ...implementation...

if __name__ == "__main__":
    import doctest
    doctest.testmod()
    print "Tests done."

x = (1, 2, 6, 55, 63, 96, 125, 256, \

     301, 456, 958, 1256, \

     1359, 2568, 3597)

x = (1, 2, 6, 55, 63, 96, 125, 256,

     301, 456, 958, 1256,

     1359, 2568, 3597)

# 太多的行,必须用\来作为分割符

# 但\在() [] {}中是不必要的

from Tkinter import *

from mymodule import *

import Tkinter as tk

from mymodule import fun1, Class1, baseconvert as bc

import psyco

psyco.bind(myfun1) 

a = [3.56, 2.12]

try:

    import psyco

    # psyco中的类非常有用

    from psyco.classes import __metaclass__

    psyco.bind(myfun1)

except ImportError: pass


# 使用psyco array.array中

# 双精度值可以更快些

import array

a = array.array("d", [3.56, 2.12])

# 在一些情形下,使用字符串数组同样很快


# psyco同map,filter,生成器一起使用也会很慢 can be slow with itertools, map, filter

# 但会比列表解析快

# 最快的Psyco是使用低级别的编码格式

 

# 打印一个不包含空格的字符串:

from sys import stdout

stdout.write(string1)

stdout.write(string2)

 

This is good enough:

words = ['me', 'do' 'bye', 'taz', 'foo', 'bar'] 

A shorter, more readable, but slower alternative:

words = 'me do bye taz foo bar'.split()

# sorting on the second item of the tuple

# try to remove the i index from the temporary tuples

lp = [(5J,"b"),(2J,"c"),(3+1J,"a"),(1+2J,"a")]

lp2 = [(c, i, n) for i,(n, c) in enumerate(lp)]

lp2.sort()

print [(n, c) for (c, i, n) in lp2]

from operator import itemgetter

lp = [(5J, "b"), (2J, "c"), (3+1J, "a"), (1+2J, "a")]

print sorted(lp, key=itemgetter(1))

vals = [5, 7 ,8]

tot = -2.0

for v in vals:

    tot += v

vals = [5, 7 ,8]

tot = sum(vals, -2.0)

ll = [[1, 2, 3], [4], [5, 6]]

print sum(ll, [])

data = [[1, 2, 3], [4], [5, 6]]

result = []

for sublist in data:

    result.extend(sublist)


# 最快速度实现

from itertools import imap

data = [[1, 2, 3], [4], [5, 6]]

result = [None] * sum(imap(len, data))

pos = 0

for sublist in data:

    lensl = len(sublist)

    result[pos : pos+lensl] = sublist

    pos += lensl

print "%s %s" % (string1, string2)

print '"' + chr(c) + '":', freq[c]

print string1, string2

print '"%c": %d' % (c, freq[c])

[' ', c][c.isalpha()]

# Python 2.5以上版本支持:

(c if c.isalpha() else ' ')


# 如何反转一个字符串,列表等.

alist[::-1]

astring[::-1]

# To negate (inplace) each second

#  element of alist:

result = []

for (i, v) in enumerate(alist):

    # faster than i % 2

    if i & 1 == 0: 

        result.append(v)

    else:

        result.append(-v)

alist[:] = result

from operator import neg

alist[1::2] = map(neg, alist[1::2])


# 下面方式虽慢点,但是易读:

alist[1::2] = [-el for el in alist[1::2]]


# 隐式拷贝一个字典或列表:

# (元祖无需拷贝)

newlist = list(alist)

newdict = dict(adict)

# 或者仅仅是:

newlist = list[:]

import sys

sys.exit()

# 停掉一个控制台:

raise SystemExit


#或者仅仅是:

exit()

if type(s) == type(""): ...

if type(seq) == list or \

   type(seq) == tuple: ...

if isinstance(s, basestring): ...

if isinstance(seq, (list, tuple)): ...

# Or even:

if hasattr(seq, "__getitem__"): ...

# But quite often in dynamic languages you

# don't test types, you just use them (look

# for duck typing), catching exception that

# may occur.

name1 = 5; name2 = 20; print name2

a = 1

b = 2

c = 3

name1 = 5

name2 = 20

print name2

a, b, c = 1, 2, 3

prima = 1

rossa = "Il colore rosso"

léger = 30

# 只能用英文字母做变量名:

first = 1

red = "Il colore rosso"

light = 30


__del__ method of classes is 

usually left undefined.

try:

    fin = file("absent_file.txt")

except:

    ...

try:

    something()

except:

    ...

# 通常的声明异常并捕获:

try:

    fin = file("absent_file.txt")

except IOError:

    ...

try:

    something()

except someException:

    ...

except ImportError, IOError: ...

except (ImportError, IOError): ...

bytes = array.array('B', [0] * nbytes)

# 或者:

from itertools import repeat

bytes = array.array('B', repeat(0, nbytes))

# 下面的方式更快些

bytes = array.array('B', [0]) * nbytes

freqs = {}

for c in "abracadabra":

    try:

        freqs[c] += 1

    except:

        freqs[c] = 1

# 简单方式:

freqs = {}

for c in "abracadabra":

    freqs[c] = freqs.get(c, 0) + 1


# 常用方式:

freqs = {}

for c in "abracadabra":

    if c in freqs:

        freqs[c] += 1

    else:

        freqs[c] = 1


# 或者在Python 2.5以上版本使用:

from collections import defaultdict

freqs = defaultdict(int)

for c in "abracadabra":

    freqs[c] += 1

someitems = set([1, 2, 3])
somemap = {1:2, 3:4, 5:6}
print list(someitems)[0]
print list(somemap)[0]

someitems = set([1, 2, 3])

somemap = {1: 2, 3: 4, 5: 6}

print iter(someitems).next()

print iter(somemap).next()

from time import clock

# 下面的导入方式在Windows和LINUX下都工作正常:

from timeit import default_timer as clock

# 或者经常使用timeit模块来计算程序运行时间


请同时参阅Python代码规范: http://www.python.org/dev/peps/pep-0008/