##函数式编程 函数即对象
直接赋给变量
My_sum = sum
有属性和方法
__name__
__call__
高阶函数
def f1(f, a, b):
print f(a, b)
统计单词词频 map reduce
很多文章,要统计他们十大最热门词汇
1.文本处理,统计每个文章的词频
2.合并不同文章的词频
3.排序,输出
一般:
__author__ = (file_name): f = (file_name) y = [] x = f.readlines() line x: y.extend(line.split()) f.close() word_list2 = [] word y: word1 = word : lastchar = word1[-:] lastchar []: word2 = word1.rstrip(lastchar) word1 = word2 : word2 = word1 : firstchar = word2[] firstchar []: word3 = word2.lstrip(firstchar) word2 = word3 : word3 = word2 word_list2.append(word3.lower()) freq_list = [] word_saved = [] word2 word_list2: word2 word_saved: word_saved.append(word2) freq_list.append((word2word_list2.count(word2))) sorted_list = (freq_list=x: x[]=) sorted_list (list1list2): word1num1 = (*list1) merge_list = [] wordnum list2: word word1: merge_list.append((wordnum)) : index = word1.index(word) merge_list.append((wordnum+num1[index])) word2num2 = (*list2) wordnum2 list1: word word2: merge_list.append((wordnum)) sorted_list = (merge_list=x: x[]=) sorted_list __name__ == : file_list = [] cc = (readFilefile_list) word_list = (mergeStaticcc) word word_list[:]: % (word[]word[])
并发执行
__author__ = os (file_name): .sleep() os.getpid() f=(file_name) y=[] x=f.readlines() line x: y.extend(line.split()) f.close() word_list2 = [] word y: word1 = word : lastchar = word1[-:] lastchar []: word2 = word1.rstrip(lastchar) word1 = word2 : word2 = word1 : firstchar = word2[] firstchar []: word3 = word2.lstrip(firstchar) word2 = word3 : word3 = word2 word_list2.append(word3.lower()) freq_list = [] word_saved = [] word2 word_list2: word2 word_saved: word_saved.append(word2) freq_list.append((word2word_list2.count(word2))) sorted_list = (freq_list=x:x[]=) sorted_list (list1list2): word1num1=(*list1) merge_list = [] wordnum list2: word word1: merge_list.append((wordnum)) : index = word1.index(word) merge_list.append((wordnum+num1[index])) word2num2=(*list2) wordnum list1: word word2: merge_list.append((wordnum)) sorted_list = (merge_list=x:x[]=) sorted_list __name__==: file_list = [] multiprocessing mult pool_num = pool = mult.Pool(pool_num) start = .time() cc = (readFilefile_list) end = .time() end-startpool.close() pool.join() word_list = (wordnumStaticcc) word word_list[:]: % (word[]word[])