这是一个使用 Python 脚本获取 GitLab 所有项目所有分支所有人的代码总数、提交数、删除数,并输出到指定 HDFS 路径的 CSV 文件的示例。

该脚本使用 GitLab API 获取项目、分支、提交信息,并使用 Pandas 库处理数据并输出到 CSV 文件。

import requests
import pandas as pd

gitlab_url = 'https://gitlab.com/api/v4'
private_token = 'your_private_token'
hdfs_path = '/path/to/output.csv'

# 获取所有项目信息
def get_projects():
    url = f'{gitlab_url}/projects?private_token={private_token}&per_page=100'
    projects = []
    while url:
        response = requests.get(url)
        projects.extend(response.json())
        url = response.headers.get('X-Next-Page')
    return projects

# 获取指定项目的所有分支信息
def get_branches(project_id):
    url = f'{gitlab_url}/projects/{project_id}/repository/branches?private_token={private_token}&per_page=100'
    branches = []
    while url:
        response = requests.get(url)
        branches.extend(response.json())
        url = response.headers.get('X-Next-Page')
    return branches

# 获取指定分支的提交信息
def get_commits(project_id, branch_name):
    url = f'{gitlab_url}/projects/{project_id}/repository/commits?private_token={private_token}&ref_name={branch_name}&per_page=100'
    commits = []
    while url:
        response = requests.get(url)
        commits.extend(response.json())
        url = response.headers.get('X-Next-Page')
    return commits

# 获取指定项目的所有分支所有人的代码总数、提交数、删除数
def get_project_stats(project):
    project_id = project['id']
    project_name = project['name']
    branches = get_branches(project_id)
    stats = []
    for branch in branches:
        branch_name = branch['name']
        commits = get_commits(project_id, branch_name)
        total_additions = 0
        total_deletions = 0
        total_commits = 0
        users = set()
        for commit in commits:
            total_additions += commit['stats']['additions']
            total_deletions += commit['stats']['deletions']
            total_commits += 1
            users.add(commit['author_email'])
        stats.append({
            'project_name': project_name,
            'branch_name': branch_name,
            'total_additions': total_additions,
            'total_deletions': total_deletions,
            'total_commits': total_commits,
            'total_users': len(users)
        })
    return stats

# 获取所有项目所有分支所有人的代码总数、提交数、删除数
def get_all_stats():
    projects = get_projects()
    stats = []
    for project in projects:
        stats.extend(get_project_stats(project))
    return stats

# 输出数据到CSV文件
def output_to_csv(stats):
    df = pd.DataFrame(stats)
    df.to_csv(hdfs_path, index=False)

# 执行主程序
if __name__ == '__main__':
    stats = get_all_stats()
    output_to_csv(stats)

注意:

  • 脚本中使用了 GitLab API,需要替换 private_token 为您的个人访问令牌。
  • 脚本中使用了 HDFS 路径,需要替换 hdfs_path 为您指定的 HDFS 路径。
  • 脚本仅供参考,您可能需要根据您的实际情况进行修改。
Python 脚本:获取 GitLab 所有项目所有分支所有人的代码统计数据

原文地址: https://www.cveoy.top/t/topic/loEi 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录