python

关注公众号 jb51net

关闭
首页 > 脚本专栏 > python > Python迁移Github仓库

Python实现自动化迁移Github仓库并保留所有历史记录

作者:weixin_30777913

这篇文章主要为大家详细介绍了如何使用Python实现自动化迁移Github仓库并保留所有历史记录,文中的示例代码简洁易懂,下面就跟随小编一起了解下吧

通过本文提供的详细步骤、Python自动化代码和最佳实践,您可以:

记得在实际使用前:

一、GitHub仓库迁移详细步骤

方法1:使用Git命令手动迁移

步骤1:克隆原始仓库

git clone --mirror https://github.com/username/original-repo.git
cd original-repo.git

步骤2:创建新仓库

步骤3:推送所有内容到新仓库

git push --mirror https://github.com/username/new-repo.git

步骤4:清理本地文件

cd ..
rm -rf original-repo.git

方法2:使用GitHub API迁移

步骤1:获取个人访问令牌

步骤2:使用API创建仓库

curl -H "Authorization: token YOUR_TOKEN" \
     -d '{"name":"new-repo", "description":"Migrated repository"}' \
     https://api.github.com/user/repos

二、Python自动化迁移代码

import os
import subprocess
import requests
import json
import shutil
from pathlib import Path

class GitHubRepoMigrator:
    def __init__(self, github_token, temp_dir="/tmp/git_migration"):
        self.github_token = github_token
        self.temp_dir = temp_dir
        self.headers = {
            "Authorization": f"token {github_token}",
            "Accept": "application/vnd.github.v3+json"
        }
        
    def create_github_repo(self, repo_name, description="", private=False):
        """在GitHub上创建新仓库"""
        url = "https://api.github.com/user/repos"
        data = {
            "name": repo_name,
            "description": description,
            "private": private,
            "auto_init": False
        }
        
        response = requests.post(url, headers=self.headers, data=json.dumps(data))
        
        if response.status_code == 201:
            print(f"✅ 成功创建仓库: {repo_name}")
            return response.json()["clone_url"]
        else:
            raise Exception(f"创建仓库失败: {response.json()}")
    
    def clone_repository(self, source_url, local_path):
        """克隆原始仓库(包含所有分支和标签)"""
        if os.path.exists(local_path):
            shutil.rmtree(local_path)
            
        print(f"📥 正在克隆仓库: {source_url}")
        result = subprocess.run([
            "git", "clone", "--mirror", source_url, local_path
        ], capture_output=True, text=True)
        
        if result.returncode != 0:
            raise Exception(f"克隆失败: {result.stderr}")
        
        print("✅ 仓库克隆完成")
    
    def push_to_new_repo(self, local_path, target_url):
        """推送到新仓库"""
        os.chdir(local_path)
        
        print(f"📤 正在推送到新仓库: {target_url}")
        
        # 更新远程URL
        subprocess.run(["git", "remote", "set-url", "origin", target_url], check=True)
        
        # 推送所有内容
        result = subprocess.run([
            "git", "push", "--mirror"
        ], capture_output=True, text=True)
        
        if result.returncode != 0:
            raise Exception(f"推送失败: {result.stderr}")
        
        print("✅ 推送完成")
        
    def verify_migration(self, original_url, new_url):
        """验证迁移是否成功"""
        print("🔍 验证迁移结果...")
        
        # 临时克隆新仓库进行验证
        verify_path = f"{self.temp_dir}/verify"
        if os.path.exists(verify_path):
            shutil.rmtree(verify_path)
            
        # 克隆新仓库
        subprocess.run(["git", "clone", new_url, verify_path], 
                      capture_output=True, check=True)
        
        os.chdir(verify_path)
        
        # 检查分支数量
        branches_result = subprocess.run([
            "git", "branch", "-r"
        ], capture_output=True, text=True)
        
        branches = [b.strip() for b in branches_result.stdout.split('\n') if b.strip()]
        print(f"📋 分支数量: {len(branches)}")
        
        # 检查标签数量
        tags_result = subprocess.run([
            "git", "tag"
        ], capture_output=True, text=True)
        
        tags = [t.strip() for t in tags_result.stdout.split('\n') if t.strip()]
        print(f"🏷️  标签数量: {len(tags)}")
        
        # 检查提交历史
        log_result = subprocess.run([
            "git", "log", "--oneline", "-5"
        ], capture_output=True, text=True)
        
        print("📜 最近5次提交:")
        print(log_result.stdout)
        
        shutil.rmtree(verify_path)
        
    def migrate_repository(self, source_url, new_repo_name, description="", private=False):
        """执行完整的仓库迁移流程"""
        try:
            print(f"🚀 开始迁移仓库: {source_url} -> {new_repo_name}")
            
            # 创建本地临时目录
            local_path = f"{self.temp_dir}/{new_repo_name}.git"
            os.makedirs(self.temp_dir, exist_ok=True)
            
            # 步骤1: 在GitHub上创建新仓库
            new_repo_url = self.create_github_repo(new_repo_name, description, private)
            
            # 步骤2: 克隆原始仓库
            self.clone_repository(source_url, local_path)
            
            # 步骤3: 推送到新仓库
            self.push_to_new_repo(local_path, new_repo_url)
            
            # 步骤4: 验证迁移
            self.verify_migration(source_url, new_repo_url)
            
            # 清理
            shutil.rmtree(local_path)
            
            print(f"🎉 迁移完成! 新仓库URL: {new_repo_url}")
            return new_repo_url
            
        except Exception as e:
            print(f"❌ 迁移失败: {e}")
            # 清理临时文件
            if os.path.exists(self.temp_dir):
                shutil.rmtree(self.temp_dir)
            raise e

def main():
    # 配置信息
    GITHUB_TOKEN = "your_github_token_here"  # 替换为你的GitHub token
    SOURCE_REPO_URL = "https://github.com/username/original-repo.git"  # 原始仓库URL
    NEW_REPO_NAME = "new-repository-name"  # 新仓库名称
    DESCRIPTION = "Migrated repository with full history"  # 仓库描述
    IS_PRIVATE = False  # 是否私有仓库
    
    # 创建迁移器实例
    migrator = GitHubRepoMigrator(GITHUB_TOKEN)
    
    # 执行迁移
    try:
        new_repo_url = migrator.migrate_repository(
            SOURCE_REPO_URL, 
            NEW_REPO_NAME, 
            DESCRIPTION, 
            IS_PRIVATE
        )
        
        print(f"\n📋 迁移总结:")
        print(f"   原始仓库: {SOURCE_REPO_URL}")
        print(f"   新仓库: {new_repo_url}")
        print(f"   状态: ✅ 成功")
        
    except Exception as e:
        print(f"   状态: ❌ 失败 - {e}")

if __name__ == "__main__":
    main()

三、高级迁移工具(支持批量操作)

import csv
import time
from datetime import datetime

class BatchGitHubMigrator:
    def __init__(self, github_token, config_file="migration_config.csv"):
        self.migrator = GitHubRepoMigrator(github_token)
        self.config_file = config_file
        
    def read_migration_config(self):
        """读取迁移配置文件"""
        migrations = []
        with open(self.config_file, 'r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            for row in reader:
                migrations.append(row)
        return migrations
    
    def batch_migrate(self):
        """批量迁移仓库"""
        migrations = self.read_migration_config()
        results = []
        
        print(f"🔄 开始批量迁移 {len(migrations)} 个仓库")
        
        for i, config in enumerate(migrations, 1):
            print(f"\n--- 处理第 {i}/{len(migrations)} 个仓库 ---")
            
            try:
                start_time = datetime.now()
                
                new_url = self.migrator.migrate_repository(
                    config['source_url'],
                    config['new_name'],
                    config.get('description', ''),
                    config.get('private', 'false').lower() == 'true'
                )
                
                end_time = datetime.now()
                duration = (end_time - start_time).total_seconds()
                
                results.append({
                    'source': config['source_url'],
                    'new_repo': new_url,
                    'status': 'success',
                    'duration': duration
                })
                
                print(f"✅ 完成 ({duration:.1f}秒)")
                
            except Exception as e:
                results.append({
                    'source': config['source_url'],
                    'new_repo': '',
                    'status': f'failed: {str(e)}',
                    'duration': 0
                })
                print(f"❌ 失败: {e}")
            
            # 添加延迟避免API限制
            time.sleep(2)
        
        self.generate_report(results)
        return results
    
    def generate_report(self, results):
        """生成迁移报告"""
        report_file = f"migration_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
        
        with open(report_file, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(['源仓库', '新仓库', '状态', '耗时(秒)'])
            
            for result in results:
                writer.writerow([
                    result['source'],
                    result['new_repo'],
                    result['status'],
                    result['duration']
                ])
        
        print(f"\n📊 迁移报告已生成: {report_file}")
        
        # 统计信息
        success_count = sum(1 for r in results if r['status'] == 'success')
        print(f"📈 统计: 成功 {success_count}/{len(results)}")

# 使用示例
def batch_migration_example():
    migrator = BatchGitHubMigrator("your_github_token_here")
    migrator.batch_migrate()

四、详细论述:GitHub仓库迁移的最佳实践

1. 迁移前的准备工作

风险评估

技术准备

2. 迁移过程中的关键技术要点

使用--mirror参数的重要性

# 正确的方式 - 包含所有内容
git clone --mirror original-repo.git

# 错误的方式 - 可能丢失信息
git clone original-repo.git

--mirror参数确保:

3. 迁移后的验证步骤

完整性检查清单

4. 常见问题及解决方案

问题1:认证失败

# 解决方案:使用正确的认证方式
headers = {
    "Authorization": f"token {token}",
    # 或者使用 Basic Auth
    # "Authorization": "Basic " + base64.b64encode(f"username:{token}".encode()).decode()
}

问题2:网络超时

# 解决方案:添加重试机制
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

session = requests.Session()
retry_strategy = Retry(
    total=3,
    backoff_factor=1,
    status_forcelist=[429, 500, 502, 503, 504],
)
session.mount("https://", HTTPAdapter(max_retries=retry_strategy))

5. 企业级迁移策略

分阶段迁移

监控和回滚

五、配置文件和用法示例

批量迁移配置文件 (migration_config.csv)

source_url,new_name,description,private
https://github.com/org/old-repo1.git,new-repo1,Migrated repository 1,true
https://github.com/org/old-repo2.git,new-repo2,Migrated repository 2,false
https://github.com/org/old-repo3.git,new-repo3,Migrated repository 3,true

环境准备脚本

#!/bin/bash
# setup_migration.sh

# 安装必要的依赖
pip install requests

# 设置Git配置
git config --global user.name "Migration Bot"
git config --global user.email "bot@company.com"

# 创建迁移目录
mkdir -p /tmp/git_migration

echo "环境准备完成"

以上就是Python实现自动化迁移Github仓库并保留所有历史记录的详细内容,更多关于Python自动化迁移Github仓库的资料请关注脚本之家其它相关文章!

您可能感兴趣的文章:
阅读全文