centos 7.5

yum install openssl-devel  pcre-devel gcc libffi-devel zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel -y

python 3.8.1

 

安装py3.8.1 及py的依赖模块

unzip py.zip 
cd py
tar -zxvf Python-3.8.1.tgz 

cd Python-3.8.1
./configure --prefix=/usr/local/python3 && make -j4 && make install
ln -s /usr/local/python3/bin/python3 /usr/bin/python3
ln -s /usr/local/python3/bin/pip3.8 /usr/bin/pip3
python3  --version
#Python 3.8.1

pip3 install -i https://pypi.doubanio.com/simple/   --upgrade pip 
pip3 install -i https://pypi.doubanio.com/simple/  virtualenv

pip3 install python-gitlab 
pip3 install pandas

gitlab 配置token

gitlab 如何统计总代码量 gitlab统计代码提交量_linux

gitlab 如何统计总代码量 gitlab统计代码提交量_git_02

 

gitlab 如何统计总代码量 gitlab统计代码提交量_服务器_03

 

 按时间统计每个项目的提交等信息

项目名

开发者

分支

添加代码行数

删除代码行数

提交总行数

提交次数

#!/usr/bin/env python
# coding=utf-8

import time
import gitlab
import collections
import pandas as pd

gl = gitlab.Gitlab('https://gitlab.cn/', private_token='tiGweLb-ujmNF', timeout=60, api_version='4')

start_time = '2022-10-1T00:00:00Z'
end_time = '2022-11-1T23:00:00Z'


def get_gitlab():
    """
    gitlab API
    """
    list2 = []
    projects = gl.projects.list(owned=True, all=True)
    num = 0
    for project in projects:
        num += 1
        print("查看了%d个项目" % num)
        for branch in project.branches.list():
            commits = project.commits.list(all=True, query_parameters={'since': start_time, 'until': end_time,
                                                                       'ref_name': branch.name})

            for commit in commits:
                com = project.commits.get(commit.id)

                pro = {}
                try:
                    # print(project.path_with_namespace,com.author_name,com.stats["total"])
                    pro["projectName"] = project.path_with_namespace
                    pro["authorName"] = com.author_name
                    pro["branch"] = branch.name
                    pro["additions"] = com.stats["additions"]
                    pro["deletions"] = com.stats["deletions"]
                    pro["commitNum"] = com.stats["total"]
                    list2.append(pro)
                except:
                    print("有错误, 请检查")

    return list2


def data():
    """
    数据去重
    key split
    """

    ret = {}

    for ele in get_gitlab():
        key = ele["projectName"] + ele["authorName"] + ele["branch"]
        if key not in ret:
            ret[key] = ele
            ret[key]["commitTotal"] = 1
        else:
            ret[key]["additions"] += ele["additions"]
            ret[key]["deletions"] += ele["deletions"]
            ret[key]["commitNum"] += ele["commitNum"]
            ret[key]["commitTotal"] += 1

    list1 = []
    for key, v in ret.items():
        v["项目名"] = v.pop("projectName")
        v["开发者"] = v.pop("authorName")
        v["分支"] = v.pop("branch")
        v["添加代码行数"] = v.pop("additions")
        v["删除代码行数"] = v.pop("deletions")
        v["提交总行数"] = v.pop("commitNum")
        v["提交次数"] = v["commitTotal"]
        list1.append(v)
    print(list1)
    return list1


def csv(csvName):
    """
    csv
    """

    df = pd.DataFrame(data(), columns=["项目名", "开发者", "分支", "添加代码行数", "删除代码行数", "提交总行数", "提交次数"])
    df.to_csv(csvName, index=False, encoding="utf_8_sig")


if __name__ == "__main__":
    csv("./gitlab.csv")

 效果图

gitlab 如何统计总代码量 gitlab统计代码提交量_gitlab 如何统计总代码量_04

 脚本2 按单个项目分析

#!/usr/bin/env python
# coding=utf-8
import requests
import os
import json
import threading
import datetime

"""统计的时间区间-开始日期"""
git_root_url = ""
"""访问Token"""
git_token = "blog.csdn.net"
"""统计结果的存储目录"""
export_path = "./dist"
"""统计的时间区间-开始日期"""
t_from = "2021-06-01"
"""统计的时间区间-结束日期"""
t_end = "2021-07-01"
"""统计的时间区间-开始日期,datetime对象"""
date_from = datetime.datetime.strptime(t_from, '%Y-%m-%d')
"""统计的时间区间-结束日期,datetime对象"""
date_end = datetime.datetime.strptime(t_end, '%Y-%m-%d')

"""一个线程锁"""
lock = threading.RLock()

user_unknown = {}
user_email_alias_mapping = {}
user_email_name_mapping = {}


class GitlabApiCountTrueLeTrue:
    """
    Worker类
    """
    """
    所有commit的集合,用于去重。
    这里的重复,可能是代码merge造成的
    """
    total_commit_map = {}

    """
    最终的数据集合
    """
    totalMap = {}

    def get_projects(self):
        """
        获取所有仓库,并生成报告
        :return:
        """
        threads = []
        # 获取服务器上的所有仓库,每个仓库新建一个线程
        for i in range(1, 3):
            # 线上gitlab可用,问题是没有全部显示
            url = '%s/api/v4/projects' \
                  '?private_token=%s&per_page=1000&page=%d&order_by=last_activity_at' % (
                      git_root_url, git_token, i)
            r1 = requests.get(url)  # 请求url,传入header,ssl认证为false
            r2 = r1.json()  # 显示json字符串
            print(r2)
            for r3 in r2:
                value = r3['default_branch']
                last_active_time = r3['last_activity_at']
                if value is None:
                    continue
                days = date_from - \
                    datetime.datetime.strptime(
                        last_active_time, '%Y-%m-%dT%H:%M:%S.%fZ')
                # 如果project的最后更新时间比起始时间小,则continue
                if days.days > 1:
                    continue
                project_info = ProjectInfo()
                project_info.project_id = r3['id']
                project_info.name = r3['name']
                project_info.project_desc = r3['description']
                project_info.project_url = r3['web_url']
                project_info.path = r3['path']
                # 构件好线程
                t = threading.Thread(
                    target=self.get_branches, args=(r3['id'], project_info))
                threads.append(t)
        # 所有线程逐一开始
        for t in threads:
            t.start()
        # 等待所有线程结束
        for t in threads:
            t.join()
        final_commit_map = {}
        for key, project in self.totalMap.items():
            for author_email, detail in project.commit_map.items():
                exist_detail = final_commit_map.get(detail.author_email)
                if exist_detail is None:
                    final_commit_map[detail.author_email] = detail
                else:
                    exist_detail.total += detail.total
                    exist_detail.additions += detail.additions
                    exist_detail.deletions += detail.deletions
                    final_commit_map[detail.author_email] = exist_detail
        write_to_csv("%s/GitStatic_%s/%s_%s.csv" % (export_path, t_from, 'total', t_from), final_commit_map,
                     "extra")
        return

    def get_branches(self, project_id, project_info):
        """
        获取仓库的所有Branch,并汇总commit到一个map里
        :param project_id:
        :param project_info:
        :return:
        """
        print("进入线程:%d,项目id%d,%s" %
              (threading.get_ident(), project_id, project_info.project_url))
        # 线上gitlab可用,问题是没有全部显示
        url = '%s/api/v4/projects/%s/repository/branches?private_token=%s' % (
            git_root_url, project_id, git_token)

        print("start get branch list %d,url=%s" % (project_id, url))

        r1 = requests.get(url)  # 请求url,传入header,ssl认证为false
        r2 = r1.json()  # 显示json字符串
        if not r2:
            return
        # branch的map,key为branch名称,value为按照提交者email汇总的,key为email的子map集合
        branch_map = {}
        # 主动获取master分支的提交
        detail_map = self.get_commits(
            project_id, project_info.project_url, 'master')
        print("get commits finish project_id=%d branch master" % project_id)

        if detail_map:
            branch_map['master'] = detail_map
        for r3 in r2:
            branch_name = r3['name']
            if branch_name is None:
                continue
            # 如果仓库已经被Merge了,则不再处理
            if r3['merged']:
                continue
            detail_map = self.get_commits(
                project_id, project_info.project_url, branch_name)
            if not detail_map:
                continue
            # 将结果放到map里
            branch_map[branch_name] = detail_map
            print("get commits finish project_id=%d branch %s" %
                  (project_id, branch_name))

        print("all branch commits finish %d " % project_id)

        final_commit_map = {}
        # 遍历branch map,并按照提交者email进行汇总
        for key, value_map in branch_map.items():
            for author_email, detail in value_map.items():
                exist_detail = final_commit_map.get(detail.author_email)
                if exist_detail is None:
                    final_commit_map[detail.author_email] = detail
                else:
                    exist_detail.total += detail.total
                    exist_detail.additions += detail.additions
                    exist_detail.deletions += detail.deletions
                    final_commit_map[detail.author_email] = exist_detail

        if not final_commit_map:
            return

        project_info.commit_map = final_commit_map
        # 加锁
        lock.acquire()
        # 此对象会被各个线程操作
        self.totalMap[project_info.project_id] = project_info
        # 释放锁
        lock.release()

        # 汇总完毕后,将结果写入到projectID+日期的csv文件里
        write_to_csv(
            "%s/GitStatic_%s/project/%s_%d.csv" % (
                export_path, t_from, project_info.path, project_info.project_id),
            final_commit_map, project_info.project_url)

    def get_commits(self, project_id, project_url, branch_name):
        """
        获取指定仓库,指定分支的所有commits,然后遍历每一个commit获得单个branch的统计信息
        :param project_id:
        :param project_url:
        :param branch_name:
        :return:
        """
        since_date = date_from.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
        until_date = date_end.strftime('%Y-%m-%dT%H:%M:%S.%fZ')

        url = '%s/api/v4/projects/%s/repository/commits?page=1&per_page=1000&ref_name=%s&since=%s&until=%s&private_token=%s' % (
            git_root_url, project_id, branch_name, since_date, until_date, git_token)
        r1 = requests.get(url)  # 请求url,传入header,ssl认证为false
        r2 = r1.json()  # 显示json字符串
        if not r2:
            return
        print('start get_commits,projectID=%d,branch=%s,url=%s' %
              (project_id, branch_name, url))

        detail_map = {}

        for r3 in r2:
            commit_id = r3['id']
            if commit_id is None:
                continue
            # 在这里进行commit去重判断
            if self.total_commit_map.get(commit_id) is None:
                self.total_commit_map[commit_id] = commit_id
            else:
                continue
            # 这里开始获取单次提交详情
            detail = get_commit_detail(project_id, commit_id)
            if detail is None:
                continue
            if detail.total > 5000:
                # 单次提交大于5000行的代码,可能是脚手架之类生成的代码,不做处理
                continue
            # 这里和主流程无关,是用来处理commit记录里的提交者,账号不规范的问题
            if detail.author_email in user_unknown:
                print("email %s projectid= %d,branchname,%s,url=%s" % (
                    detail.author_email, project_id, branch_name, project_url))

            # 根据email纬度,统计提交数据
            exist_detail = detail_map.get(detail.author_email)
            if exist_detail is None:
                detail_map[detail.author_email] = detail
            else:
                exist_detail.total += detail.total
                exist_detail.additions += detail.additions
                exist_detail.deletions += detail.deletions
                detail_map[detail.author_email] = exist_detail
        return detail_map


def get_commit_detail(project_id, commit_id):
    """
    获取单个commit的信息
    :param project_id: 工程ID
    :param commit_id: commit的id
    :return: 返回#CommitDetails对象
    """
    url = '%s/api/v4/projects/%s/repository/commits/%s?private_token=%s' \
          % (git_root_url, project_id, commit_id, git_token)
    r1 = requests.get(url)  # 请求url,传入header,ssl认证为false
    r2 = r1.json()  # 显示json字符串
    # print(json.dumps(r2, ensure_ascii=False))
    author_name = r2['author_name']
    author_email = r2['author_email']

    stats = r2['stats']
    if 'Merge branch' in r2['title']:
        return
    if stats is None:
        return
    temp_mail = user_email_alias_mapping.get(author_email)
    if temp_mail is not None:
        author_email = temp_mail
    temp_name = user_email_name_mapping.get(author_email)
    if temp_name is not None:
        author_name = temp_name
    additions = stats['additions']
    deletions = stats['deletions']
    total = stats['total']
    # details = {'additions': additions, 'deletions': deletions, 'total': total, 'author_email': author_email,
    #            'author_name': author_name}
    details = CommitDetails()
    details.additions = additions
    details.deletions = deletions
    details.total = total
    details.author_email = author_email

    details.author_name = author_name
    return details


def make_dir_safe(file_path):
    """
    工具方法:写文件时,如果关联的目录不存在,则进行创建
    :param file_path:文件路径或者文件夹路径
    :return:
    """
    if file_path.endswith("/"):
        if not os.path.exists(file_path):
            os.makedirs(file_path)
    else:
        folder_path = file_path[0:file_path.rfind('/') + 1]
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)


def write_to_csv(file_path, final_commit_map, extra):
    """
    工具方法:将结果写入csv,从#final_commit_map参数解析业务数据
    :param file_path:文件路径
    :param final_commit_map:提交参数
    :param extra:额外数据列
    :return:
    """
    make_dir_safe(file_path)
    with open(file_path, 'w') as out:
        title = '%s,%s,%s,%s,%s,%s' % (
            "提交人邮箱", "提交人姓名", "总行数", "增加行数", "删除行数", extra)
        out.write(title + "\n")
        # print(title)
        for key, value in final_commit_map.items():
            var = '%s,%s,%s,%s,%s' % (
                value.author_email, value.author_name, value.total, value.additions, value.deletions)
            out.write(var + '\n')
            # print(var)
        out.close()


class CommitDetails(json.JSONEncoder):
    """
    提交信息的结构体
    """
    author_name = None
    author_email = None
    additions = 0
    deletions = 0
    total = 0


class ProjectInfo(json.JSONEncoder):
    """
    工程信息的结构体
    """
    project_id = None
    project_desc = None
    project_url = None
    path = None
    name = None
    commit_map = None


if __name__ == '__main__':
    gitlab4 = GitlabApiCountTrueLeTrue()
    gitlab4.get_projects()