背景:
Superset二次开发,在处理版本升级的过程中,需要手动迁移代码,
如何在Superset项目众多的文件中,记录修改过的文件,迁移代码时只需重点关注这些文件修改的内容即可,
但是针对项目中多次的commit 信息,每个commit 又涉及不同的文件,
如何快速梳理出这些二开工作中修改的文件,是我们本次版本升级流程中需要优化的工作内容之一
以下代码基于GitLab OpenAPI的方式获取修改的文件列表数据
配置访问令牌:
设置-访问令牌-输入名称、有限期、全选权限范围即可

获取项目ID:

获取项目的提交记录:
- 使用 GET /projects/:id/repository/commits端点来获取提交记录。
- 传入分支信息,可以指定具体分支
- 传入提交人信息,可以指定具体提交人
- 可以自定义时间(since - until),筛选时间范围内的commit 信息
查看提交记录的具体修改文件:
- 使用 GET /projects/:id/repository/commits/:sha/diff端点来获取某个特定提交记录的具体修改文件。
import requests
import logging
from datetime import datetime
from collections import defaultdict
 
# 配置日志记录
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
     
# 获取指定作者在特定分支上的提交记录 
def get_commits(project_id, api_token, branch, author):
  
    url = f'https://gitlab.com/api/v4/projects/{project_id}/repository/commits?ref_name={branch}'
    params = {
        'author_name': author
    }
    headers = {
        'Private-Token': api_token
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
#         return response.json()
        commits = response.json()
        # 过滤提交记录,确保author_name匹配参数author
        filtered_commits = [commit for commit in commits if commit['author_name'] == author]
        return filtered_commits
    else:
        logging.error(f"Failed to fetch commits: {response.status_code}")
        return []
     
# 根据提交日期过滤提交记录。
def filter_commits_by_date(commits, since, until):
 
    since_date = datetime.strptime(since,'%Y-%m-%dT%H:%M:%SZ')
    until_date = datetime.strptime(until,'%Y-%m-%dT%H:%M:%SZ')
    filtered_commits = []
 
    for commit in commits:
        commit_date = datetime.strptime(commit['created_at'],'%Y-%m-%dT%H:%M:%S.%fZ')
        if commit_date >= since_date and commit_date < until_date:
            filtered_commits.append(commit)
 
    return filtered_commits
     
# 获取特定提交记录的修改文件路径。
def get_commit_diffs(project_id, commit_sha, api_token):
 
    url = f'https://gitlab.com/api/v4/projects/{project_id}/repository/commits/{commit_sha}/diff'
    headers = {
        'Private-Token': api_token
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return [diff['new_path'] for diff in response.json()]
    else:
        logging.error(f"Failed to fetch commit diffs: {response.status_code}")
        return []
 
# 获取指定作者在特定日期范围内修改的文件列表。   
def get_modified_files(project_id, api_token, branch, authors, since, until):
    # 字典用于存储去重后的文件路径
    unique_files = defaultdict(set)
     
    for author in authors:
        # 获取提交记录
        all_commits = get_commits(project_id, api_token, branch, author)
        # 过滤提交记录
        filtered_commits = filter_commits_by_date(all_commits, since, until)
         
        for commit in filtered_commits:
            logging.info(f"Commit ID: {commit['id']}, Title: {commit['title']}, Author: {commit['author_name']}")
            # 获取特定提交记录的修改文件
            modified_files = get_commit_diffs(project_id, commit['id'], api_token)
            for idx, file_path in enumerate(modified_files, 1):
                if file_path not in unique_files[author]:
                    unique_files[author].add(file_path)
                    logging.info(f"   Modified File:{idx}. {file_path} (by {author})")
 
    # 输出去重后的修改文件列表
    logging.info("\nUnique Modified Files:")
    for author, files in unique_files.items():
        logging.info(f"\nAuthor: {author}")
        for idx, file_path in enumerate(files, 1):
            logging.info(f"  {idx}. {file_path} (unique)")
     
# 设置你的GitLab API Token
api_token = 'xxxxxx'
# GitLab 项目ID
project_id = 'xxx' 
# 指定分支名称
branch = 'xxxx' 
# 提交人
authors = ['xxxx','xxxxx']
# 查询开始日期
since = '2024-05-21T00:00:00Z'
# 查询结束日期
until = '2024-06-13T23:59:59Z'
 
# 获取修改的文件列表
get_modified_files(project_id, api_token, branch, authors, since, until) 
 
                     
            
        













 
                    

 
                 
                    