Python 脚本:获取 GitLab 所有项目所有分支所有人的代码统计数据
这是一个使用 Python 脚本获取 GitLab 所有项目所有分支所有人的代码总数、提交数、删除数,并输出到指定 HDFS 路径的 CSV 文件的示例。
该脚本使用 GitLab API 获取项目、分支、提交信息,并使用 Pandas 库处理数据并输出到 CSV 文件。
import requests
import pandas as pd
gitlab_url = 'https://gitlab.com/api/v4'
private_token = 'your_private_token'
hdfs_path = '/path/to/output.csv'
# 获取所有项目信息
def get_projects():
url = f'{gitlab_url}/projects?private_token={private_token}&per_page=100'
projects = []
while url:
response = requests.get(url)
projects.extend(response.json())
url = response.headers.get('X-Next-Page')
return projects
# 获取指定项目的所有分支信息
def get_branches(project_id):
url = f'{gitlab_url}/projects/{project_id}/repository/branches?private_token={private_token}&per_page=100'
branches = []
while url:
response = requests.get(url)
branches.extend(response.json())
url = response.headers.get('X-Next-Page')
return branches
# 获取指定分支的提交信息
def get_commits(project_id, branch_name):
url = f'{gitlab_url}/projects/{project_id}/repository/commits?private_token={private_token}&ref_name={branch_name}&per_page=100'
commits = []
while url:
response = requests.get(url)
commits.extend(response.json())
url = response.headers.get('X-Next-Page')
return commits
# 获取指定项目的所有分支所有人的代码总数、提交数、删除数
def get_project_stats(project):
project_id = project['id']
project_name = project['name']
branches = get_branches(project_id)
stats = []
for branch in branches:
branch_name = branch['name']
commits = get_commits(project_id, branch_name)
total_additions = 0
total_deletions = 0
total_commits = 0
users = set()
for commit in commits:
total_additions += commit['stats']['additions']
total_deletions += commit['stats']['deletions']
total_commits += 1
users.add(commit['author_email'])
stats.append({
'project_name': project_name,
'branch_name': branch_name,
'total_additions': total_additions,
'total_deletions': total_deletions,
'total_commits': total_commits,
'total_users': len(users)
})
return stats
# 获取所有项目所有分支所有人的代码总数、提交数、删除数
def get_all_stats():
projects = get_projects()
stats = []
for project in projects:
stats.extend(get_project_stats(project))
return stats
# 输出数据到CSV文件
def output_to_csv(stats):
df = pd.DataFrame(stats)
df.to_csv(hdfs_path, index=False)
# 执行主程序
if __name__ == '__main__':
stats = get_all_stats()
output_to_csv(stats)
注意:
- 脚本中使用了 GitLab API,需要替换
private_token为您的个人访问令牌。 - 脚本中使用了 HDFS 路径,需要替换
hdfs_path为您指定的 HDFS 路径。 - 脚本仅供参考,您可能需要根据您的实际情况进行修改。
原文地址: https://www.cveoy.top/t/topic/loEi 著作权归作者所有。请勿转载和采集!