小学生学习Python工具小学 python

转载
lazihuman 2024-07-04 17:23:30
文章标签 小学生学习Python工具 python 字段 Python #if 文章分类 Python 后端开发
2019独角兽企业重金招聘Python工程师标准>>>
小学生学习Python工具小学 python_#if
将每个学生填写的信息在检查后自动汇总到一个新的excel表格。
#-*- encoding: utf-8 -*-
'''
  小学学生学籍信息汇总 （带身份证号码有效性检查）
  功能： 
     1， 将一个目录中的所有表格按姓名排序后汇总入一个表格（write2file函数）
     2， 检查手动汇总的表格与原始数据的一致性 （check函数）
  版权：GPL
  owner：jhonglei@gmail.com
  
   待完成的功能：
    1， 按照拼音排序，而不是unicode编码
    2， 自动按照姓名或身份证去重（并显示 发生重复的文件名称）
    3， 做字段间的一致性检查
    4， 自动修正功能（这个虽然不好，但是方便大家工作，比如民族写成了汉而非汉族之类，遗漏的地址电话号码补齐等，班号填写错误等），
    5， 允许一个输入表中有多个条目（如 多个小孩来自同一个家庭 ）
'''

import os

import re
import traceback
from xlrd import open_workbook  

## 根据需要修改的字段。。
JOIN_YEAR = "201509"
CLASS_NUM ="2015102"
#SCHOOL_NUM = "2222" #

def check_id( idcard ):
    '''
      检查身份证号的有效性
    '''
    s = idcard[:-1] #"34052419800101001" #分组
    temp = zip(s[0:17], [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2])
    #相乘
    temp2 = map(lambda x:int(x[0])*x[1], temp)
    #print temp2
    #相加
    temp3 = sum(temp2)
    #或者这样写：
    #temp3 = reduce(lambda x, y : x + y, temp2)print temp3
    #最终结果
    tmp = '10X98765432'
    res = tmp[temp3 % 11]
    if idcard[ -1].upper() != res:
        raise Exception("身份证号码错误")
import re
def read_one_file (targetFile,sheetName, is_check=False ):  
    '''
         从文件中读取相应信息， 如果有多行则读取多行
    '''
    #打开excel文件，formatting_info=True可以指定保存格式  
    data = open_workbook(filename=targetFile,formatting_info=True)
    
    #table = data.sheets()[0]          #通过索引顺序获取
    #table = data.sheet_by_index(0) #通过索引顺序获取
    table = data.sheet_by_name( sheetName )#通过名称获取    
    ##读取有意义的内容
    for row_index in range(1, table.nrows):
        row_value = table.row_values(row_index)
        
        ## 在校对模式下，不再做自动修正处理 
        if is_check:
            yield row_value
            continue
        
        ## 替换掉所有的空格等间隔符, 
        for col_index, col_val in enumerate( row_value ):
            if isinstance( col_val, str):
                row_value[col_index] = re.subn(r"[ /r/n/f/t]", "", col_val)[0]
        
        ## 出生地、户籍所在地, 只保留代码。。
        def addr_code(  row_value, index):
            '''
                地址代码检查
            '''
            ## 如果为空则不检查 
            if row_value[index]:
                row_value[index] =  re.search("\d+",row_value[index]).group()   
        try:
            addr_code(row_value, 4)
            addr_code(row_value, 14)
            addr_code(row_value, 51)
            addr_code(row_value, 63)         
            ## 身份证号检查， 第12项 
            check_id(   row_value[12] )
            #班级检查
            assert row_value[15] == CLASS_NUM
            #入学年份检查
            assert row_value[16] == JOIN_YEAR
            #入学方式：
            assert row_value[17] == "就近入学" 
            assert row_value[18] == "走读" 
            ## TODO 其他字段的检查以及字段间的一致性检查未做。
            
            
        except Exception as e:     
            print (traceback.format_exc() )         
            print (row_value)
            print ("Error", targetFile)
            print (e)    
        else:
            print (row_value)
        yield row_value
        #for index , value in enumerate(row_value):
            #if "130104200905133362" in value:
                #print (index)
            #if ("000000" in value):
                #print (index, value)
    
     
def read_files( dir_path, is_check = False ):
    '''
       读取目录中的所有xls文件
    '''
    for filename in os.listdir(path=dir_path):
        if filename.endswith("xls"):
            file_path = os.path.join(dir_path, filename)    
            for row in  read_one_file(file_path, sheetName="学生基础信息", is_check=is_check):
                yield row
 

def write2file(dir_path, targetFile=None):  
    '''
       写入汇总文件。xlwt有时有问题，所以先用openpyxl写入excel2010的文件格式。
    '''
    #import xlwt    
    #w = xlwt.Workbook()
    #ws = w.add_sheet('学生基础信息')    
    ##dir_path = r'H:\Work\Python\xueji\app\input_files'
    #for row_index, row in enumerate( read_files(dir_path) ):
        #for  col_index , col_value in enumerate(row):
            #ws.write(row_index, col_index, col_value ) #注意单元格坐标从(0,0)开始  
    #w.save(targetFile)  
    
    ##写入csv会将长数字型字符串转换为数值。 
    #import csv
    #with open( targetFile, mode="w") as f:
        #writer = csv.writer( f )
        #for row_index, row in enumerate( read_files(dir_path) ):
            #writer.writerow(row)    
    
    from openpyxl import Workbook
    wb = Workbook()
    
    # grab the active worksheet
    ws = wb.active
    for row in sorted ( read_files(dir_path) ) :
        #writer.writerow(row)        
        # Rows can also be appended
        ws.append( row )
    if not targetFile:
        targetFile = "output.xlsx"
        
    wb.save( targetFile )

def get_col_names ( filename, sheetName):
    data = open_workbook(filename=filename,formatting_info=True)
    #table = data.sheets()[0]          #通过索引顺序获取
    #table = data.sheet_by_index(0) #通过索引顺序获取
    table = data.sheet_by_name( sheetName )#通过名称获取    
    return table.row_values(0)
    
def check( dir_path, sheetName ):
    '''
      如果人工汇总的信息，可以检查目的和原始各个表格内容的一致性
      用于确认最终修改的正确性：
    '''
    file_name  =  "学籍模板最新正式统计表修改错误信息.xls"
    col_names = get_col_names(file_name, sheetName)
    dest_dict = dict (  [ (item[1],item) for item in read_one_file( file_name,sheetName=sheetName ,is_check=True) ] )
    source_items = dict (  [ (item[1],item) for item in read_files(dir_path,is_check=True)] )
    for name, dest_val in dest_dict.items():
        if name not in source_items:
            print ("source missed:", name)
            continue
        src_val = source_items[name]
        for col_index, dest_col_val in  enumerate( dest_val )  :
            src_col_val = src_val[ col_index]
            if dest_col_val != src_col_val:
                print ("{0} , {1} 目的 '{2}' 源 '{3}'".format(  name, col_names[col_index], dest_col_val, src_col_val)  )

def main():
    dir_path = r'H:\Work\Python\xueji\app\input_files'
    check(dir_path, sheetName="学生基础信息")    
    
if  __name__ =="__main__":  
    main()

    
    #write2file(  dir_path)
#-*- encoding: utf-8 -*-
'''
  小学学生学籍信息汇总 （带身份证号码有效性检查）
  功能： 
     1， 将一个目录中的所有表格按姓名排序后汇总入一个表格（write2file函数）
     2， 检查手动汇总的表格与原始数据的一致性 （check函数）
  版权：GPL
  owner：jhonglei@gmail.com
  
   待完成的功能：
    1， 按照拼音排序，而不是unicode编码
    2， 自动按照姓名或身份证去重（并显示 发生重复的文件名称）
    3， 做字段间的一致性检查
    4， 自动修正功能（这个虽然不好，但是方便大家工作，比如民族写成了汉而非汉族之类，遗漏的地址电话号码补齐等，班号填写错误等），
    5， 允许一个输入表中有多个条目（如 多个小孩来自同一个家庭 ）
'''

import os

import re
import traceback
from xlrd import open_workbook  

## 根据需要修改的字段。。
JOIN_YEAR = "201509"
CLASS_NUM ="2015102"
#SCHOOL_NUM = "2222" #

def check_id( idcard ):
    '''
      检查身份证号的有效性
    '''
    s = idcard[:-1] #"34052419800101001" #分组
    temp = zip(s[0:17], [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2])
    #相乘
    temp2 = map(lambda x:int(x[0])*x[1], temp)
    #print temp2
    #相加
    temp3 = sum(temp2)
    #或者这样写：
    #temp3 = reduce(lambda x, y : x + y, temp2)print temp3
    #最终结果
    tmp = '10X98765432'
    res = tmp[temp3 % 11]
    if idcard[ -1].upper() != res:
        raise Exception("身份证号码错误")
import re
def read_one_file (targetFile,sheetName, is_check=False ):  
    '''
         从文件中读取相应信息， 如果有多行则读取多行
    '''
    #打开excel文件，formatting_info=True可以指定保存格式  
    data = open_workbook(filename=targetFile,formatting_info=True)
    
    #table = data.sheets()[0]          #通过索引顺序获取
    #table = data.sheet_by_index(0) #通过索引顺序获取
    table = data.sheet_by_name( sheetName )#通过名称获取    
    ##读取有意义的内容
    for row_index in range(1, table.nrows):
        row_value = table.row_values(row_index)
        
        ## 在校对模式下，不再做自动修正处理 
        if is_check:
            yield row_value
            continue
        
        ## 替换掉所有的空格等间隔符, 
        for col_index, col_val in enumerate( row_value ):
            if isinstance( col_val, str):
                row_value[col_index] = re.subn(r"[ /r/n/f/t]", "", col_val)[0]
        
        ## 出生地、户籍所在地, 只保留代码。。
        def addr_code(  row_value, index):
            '''
                地址代码检查
            '''
            ## 如果为空则不检查 
            if row_value[index]:
                row_value[index] =  re.search("\d+",row_value[index]).group()   
        try:
            addr_code(row_value, 4)
            addr_code(row_value, 14)
            addr_code(row_value, 51)
            addr_code(row_value, 63)         
            ## 身份证号检查， 第12项 
            check_id(   row_value[12] )
            #班级检查
            assert row_value[15] == CLASS_NUM
            #入学年份检查
            assert row_value[16] == JOIN_YEAR
            #入学方式：
            assert row_value[17] == "就近入学" 
            assert row_value[18] == "走读" 
            ## TODO 其他字段的检查以及字段间的一致性检查未做。
            
            
        except Exception as e:     
            print (traceback.format_exc() )         
            print (row_value)
            print ("Error", targetFile)
            print (e)    
        else:
            print (row_value)
        yield row_value
        #for index , value in enumerate(row_value):
            #if "130104200905133362" in value:
                #print (index)
            #if ("000000" in value):
                #print (index, value)
    
     
def read_files( dir_path, is_check = False ):
    '''
       读取目录中的所有xls文件
    '''
    for filename in os.listdir(path=dir_path):
        if filename.endswith("xls"):
            file_path = os.path.join(dir_path, filename)    
            for row in  read_one_file(file_path, sheetName="学生基础信息", is_check=is_check):
                yield row
 

def write2file(dir_path, targetFile=None):  
    '''
       写入汇总文件。xlwt有时有问题，所以先用openpyxl写入excel2010的文件格式。
    '''
    #import xlwt    
    #w = xlwt.Workbook()
    #ws = w.add_sheet('学生基础信息')    
    ##dir_path = r'H:\Work\Python\xueji\app\input_files'
    #for row_index, row in enumerate( read_files(dir_path) ):
        #for  col_index , col_value in enumerate(row):
            #ws.write(row_index, col_index, col_value ) #注意单元格坐标从(0,0)开始  
    #w.save(targetFile)  
    
    ##写入csv会将长数字型字符串转换为数值。 
    #import csv
    #with open( targetFile, mode="w") as f:
        #writer = csv.writer( f )
        #for row_index, row in enumerate( read_files(dir_path) ):
            #writer.writerow(row)    
    
    from openpyxl import Workbook
    wb = Workbook()
    
    # grab the active worksheet
    ws = wb.active
    for row in sorted ( read_files(dir_path) ) :
        #writer.writerow(row)        
        # Rows can also be appended
        ws.append( row )
    if not targetFile:
        targetFile = "output.xlsx"
        
    wb.save( targetFile )

def get_col_names ( filename, sheetName):
    data = open_workbook(filename=filename,formatting_info=True)
    #table = data.sheets()[0]          #通过索引顺序获取
    #table = data.sheet_by_index(0) #通过索引顺序获取
    table = data.sheet_by_name( sheetName )#通过名称获取    
    return table.row_values(0)
    
def check( dir_path, sheetName ):
    '''
      如果人工汇总的信息，可以检查目的和原始各个表格内容的一致性
      用于确认最终修改的正确性：
    '''
    file_name  =  "学籍模板最新正式统计表修改错误信息.xls"
    col_names = get_col_names(file_name, sheetName)
    dest_dict = dict (  [ (item[1],item) for item in read_one_file( file_name,sheetName=sheetName ,is_check=True) ] )
    source_items = dict (  [ (item[1],item) for item in read_files(dir_path,is_check=True)] )
    for name, dest_val in dest_dict.items():
        if name not in source_items:
            print ("source missed:", name)
            continue
        src_val = source_items[name]
        for col_index, dest_col_val in  enumerate( dest_val )  :
            src_col_val = src_val[ col_index]
            if dest_col_val != src_col_val:
                print ("{0} , {1} 目的 '{2}' 源 '{3}'".format(  name, col_names[col_index], dest_col_val, src_col_val)  )

def main():
    dir_path = r'H:\Work\Python\xueji\app\input_files'
    check(dir_path, sheetName="学生基础信息")    
    
if  __name__ =="__main__":  
    main()

    
    #write2file(  dir_path)
本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。