非常感谢本论坛中 @鴻渊 原著或转载的《git rebase 修改commit时间》这篇文章对我的启发。但使用 git rebase 会把提交的线路图变成一条线,比如下图中这样的复杂的提交历史线路,用 git rebase 结果就变成一条直线。
[i]
这不是我期望的,我希望保持提交历史线路,于是我写了新程序来满足需求。
核心思想:使用 git commit-tree 命令创建提交节点。
相关知识:Git有四类哈希,对应四类对象(blob, tree, commit, tag)。blob对应部分文件内容;tree包含有若干个blob的哈希,对应某个历史节点的所有文件;commit是提交节点,它含有一个tree的哈希以及用户提交的信息;tag是标签,通过一个commit的哈希挂到commit对象上。
[Python] 纯文本查看 复制代码#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @date 2025-04-20 ~ 2025-04-24
# 重建所有提交,期间可改信息
from datetime import datetime, timedelta
import pathlib
from re import fullmatch
import subprocess
from typing import cast, Dict, List, Mapping, Optional, TypedDict
# 用户可改信息类型
class EditInfo(TypedDict):
author_name: str
author_email: str
author_date: str
committer_name: str
committer_email: str
committer_date: str
commit_comment: bytes
commit_hash: str
# 标签信息类型
class TagInfo(TypedDict):
tag_name: str
tagger_name: str
tagger_email: str
tagger_date: str
tag_comment: bytes
# 提交信息必备类型
class CommitInfoBase(TypedDict):
# “旧”树哈希
old_tree_hash: str
# “旧”用户信息
old_author_name : str
old_author_email : str
old_author_date : str
old_committer_name : str
old_committer_email: str
old_committer_date : str
# “旧”提交注释
old_commit_comment: bytes
# “旧”父提交哈希列表
ls_old_parent: List[str]
# 分枝名称列表
ls_branch: List[str]
# 标签列表
ls_tag: List[TagInfo]
# 提交信息类型,继承必备,新增可选。total(V3.8+)代替NotRequired(V3.11+)
class CommitInfo(CommitInfoBase, total=False):
# “新”提交哈希:占坑可略,后继更新
new_commit_hash: Optional[str]
# “新”树哈希
new_tree_hash: str
# “新”用户信息
new_author_name : str
new_author_email : str
new_author_date : str
new_committer_name : str
new_committer_email: str
new_committer_date : str
# “新”提交注释
new_commit_comment: bytes
# 通过Git命令获取字节串
def git_bs(cmd: str) -> bytes:
out = subprocess.check_output(cmd)
assert isinstance(out, bytes)
return out
# 通过Git命令获取字符串
def git_str(cmd: str) -> str:
return git_bs(cmd).decode()
# 通过Git命令获取制表符分割的字段列表
def git_tab_list(cmd: str) -> List[str]:
# 先解码,再去除结尾换行,最后按制表符分割
return git_str(cmd).rstrip('\n').split('\t')
# 通过Git命令获取名称列表
def git_name_list(cmd: str) -> List[str]:
ls1 = git_str(cmd).split('\n') # 按行分割,不按空白分割是因名称中可能有空白
ls2 = [i.lstrip('*').strip() for i in ls1] # 去除打头星号和首尾空白
ls3 = list(filter(lambda i:i, ls2)) # 去除空串
return ls3
# 通过Git命令获取哈希列表
def git_hash_list(cmd: str) -> List[str]:
ls_hash = git_str(cmd).split() # 按空白分割,连续空白视为一,首尾空白会剔除
assert all([bool(fullmatch(r'^[0-9A-Fa-f]{40}$', h)) for h in ls_hash])
return ls_hash
# 依提交哈希生成标签信息
def read_tag_info_via_commit_hash(commit_hash: str) -> List[TagInfo]:
ls_tag: List[TagInfo] = []
ls_tagname = git_name_list(f'git tag --points-at {commit_hash}')
for tag_name in ls_tagname:
fmt = '%(taggername)\t%(taggeremail:trim)\t%(taggerdate:iso-strict)'
cmd = f'git for-each-ref --format="{fmt}" refs/tags/{tag_name}'
(tagger_name, tagger_email, tagger_date) = git_tab_list(cmd)
cmd = f'git for-each-ref --format="%(contents)" refs/tags/{tag_name}'
tag_comment = git_bs(cmd)
ls_tag.append({
'tag_name': tag_name,
'tagger_name': tagger_name,
'tagger_email': tagger_email,
'tagger_date': tagger_date,
'tag_comment': tag_comment,
})
return ls_tag
# 依哈希列表读取提交生成字典
def read_commit_via_list(ls_commit_hash: List[str]) -> Dict[str, CommitInfo]:
len_commits = len(ls_commit_hash)
print(f'已获取到{len_commits}个提交。', flush=True)
commit_dict: Dict[str, CommitInfo] = {} # 待填字典
# 将待处理的提交节点信息逐一填到字典中
for idx, old_commit_hash in enumerate(ls_commit_hash):
print(f'正在读取{idx+1}/{len_commits}-{old_commit_hash}……', flush=True)
# 树哈希、用户信息。实测知,一次读取比分多次读取,整体上可节省一半时间
fmt = '%T\t%an\t%ae\t%aI\t%cn\t%ce\t%cI'
cmd = f'git log -1 --format="{fmt}" {old_commit_hash}'
(tree_hash, author_name, author_email, author_date, committer_name,
committer_email, committer_date) = git_tab_list(cmd)
# 提交注释
commit_comment = git_bs(f'git log -1 --format="%B" {old_commit_hash}')
# 字典填入
commit_dict[old_commit_hash] = {
# “旧”树哈希
'old_tree_hash': tree_hash,
# “旧”用户信息
'old_author_name' : author_name,
'old_author_email' : author_email,
'old_author_date' : author_date,
'old_committer_name' : committer_name,
'old_committer_email': committer_email,
'old_committer_date' : committer_date,
# “旧”提交注释
'old_commit_comment': commit_comment,
# “旧”父提交哈希列表
'ls_old_parent':
git_hash_list(f'git log -1 --format="%P" {old_commit_hash}'),
# 分枝名称列表
'ls_branch':
git_name_list(f'git branch --points-at {old_commit_hash}'),
# 标签列表
'ls_tag': read_tag_info_via_commit_hash(old_commit_hash),
# # “新”提交哈希:占坑可略,后继更新
# 'new_commit_hash': None,
# # “新”树哈希
# 'new_tree_hash': tree_hash,
# # “新”用户信息
# 'new_author_name' : author_name,
# 'new_author_email' : author_email,
# 'new_author_date' : author_date,
# 'new_committer_name' : committer_name,
# 'new_committer_email': committer_email,
# 'new_committer_date' : committer_date,
# # “新”提交注释
# 'new_commit_comment': commit_comment,
# # (无)“新”父提交哈希列表
# # ls_new_parent 由 ls_old_parent 结合 new_commit_hash 实时生成
}
# 返回已填结果
return commit_dict
# 一个提交依赖于“树节点、用户信息、父提交节点”,由此判断是否需要更新
def is_commit_not_changed(val: CommitInfo, ls_new_parent: List[str]) -> bool:
return (
val['new_tree_hash'] == val['old_tree_hash'] and
val['new_author_name'] == val['old_author_name'] and
val['new_author_email'] == val['old_author_email'] and
val['new_author_date'] == val['old_author_date'] and
val['new_committer_name'] == val['old_committer_name'] and
val['new_committer_email'] == val['old_committer_email'] and
val['new_committer_date'] == val['old_committer_date'] and
ls_new_parent == val['ls_old_parent']
)
# 准备提交环境变量
def mk_commit_env_vars(val: CommitInfo) -> Mapping[str, str]:
return {
'GIT_AUTHOR_NAME': val['new_author_name'],
'GIT_AUTHOR_EMAIL': val['new_author_email'],
'GIT_AUTHOR_DATE': val['new_author_date'],
'GIT_COMMITTER_NAME': val['new_committer_name'],
'GIT_COMMITTER_EMAIL': val['new_committer_email'],
'GIT_COMMITTER_DATE': val['new_committer_date'],
}
# 更新分支
def update_branch(new_commit_hash: str, ls_branch: List[str]) -> None:
for old_branch_name in ls_branch:
new_branch_name = old_branch_name # + '_NEW_'
# 若新旧分支同名,则强行变更,否则创建新分支
cmd = f'git branch -f {new_branch_name} {new_commit_hash}'
if subprocess.call(cmd) != 0: # 不触发异常,因可能会碰到工作分支而失败
cmd = f'git checkout -B {new_branch_name} {new_commit_hash}'
subprocess.check_call(cmd)
# 更新标签
def update_tag(new_commit_hash: str, ls_tag: List[TagInfo]) -> None:
for old_tag in ls_tag:
old_tag_name = old_tag['tag_name']
new_tag_name = old_tag_name # + '_NEW_'
# 若新旧标签同名,就先删除旧标签
if new_tag_name == old_tag_name:
subprocess.check_call(f'git tag -d {old_tag_name}')
# 创建新标签,分两种:lightweight(轻量)、annotated(带注释)
if not old_tag['tagger_name']: # 轻量标签
subprocess.check_call(f'git tag {new_tag_name} {new_commit_hash}')
else: # 带注释标签
env_vars = {
'GIT_COMMITTER_NAME': old_tag['tagger_name'],
'GIT_COMMITTER_EMAIL': old_tag['tagger_email'],
'GIT_COMMITTER_DATE': old_tag['tagger_date'],
}
# 将标签注释放临时文件以便读取
tmpf = '.git/TAG_EDITMSG'
with open(tmpf, 'wb') as f:
f.write(old_tag['tag_comment'])
# 创建标签
cmd = f'git tag -a -F {tmpf} {new_tag_name} {new_commit_hash}'
subprocess.check_call(cmd, env=env_vars)
# 删除临时文件。使用missing_ok是因为目录以点打头会误报找不到文件
pathlib.Path(tmpf).unlink(missing_ok=True)
# 待用户对字典信息进行处理
def user_update(commit_dict: Dict[str, CommitInfo]) -> None:
for old_commit_hash, val in commit_dict.items():
# 提供给用户更改
ref: EditInfo = {
'author_name': val['old_author_name'],
'author_email': val['old_author_email'],
'author_date': val['old_author_date'],
'committer_name': val['old_committer_name'],
'committer_email': val['old_committer_email'],
'committer_date': val['old_committer_date'],
'commit_comment': val['old_commit_comment'],
'commit_hash': old_commit_hash,
}
user_callback(ref)
# 一般地,直接配置新数据
val['new_author_name'] = ref['author_name']
val['new_author_email'] = ref['author_email']
val['new_author_date'] = ref['author_date']
val['new_committer_name'] = ref['committer_name']
val['new_committer_email'] = ref['committer_email']
val['new_committer_date'] = ref['committer_date']
val['new_commit_comment'] = ref['commit_comment']
# 特别地,如果提交哈希被改,意即更换提交对应的树节点
if ref['commit_hash'] != old_commit_hash:
cmd = 'git rev-parse %s^{tree}' % ref['commit_hash']
val['new_tree_hash'] = git_str(cmd).strip()
else: # 否则延用“旧”树哈希
val['new_tree_hash'] = val['old_tree_hash']
# 重建所有
def rebuild_all(commit_dict: Dict[str, CommitInfo]) -> None:
wanna = True # 初始要更
while wanna: # 不断循环直至更无可更
wanna = False # 先设无需更,后循环检查要不要更
for old_commit_hash, val in commit_dict.items():
# 由旧父列表得出对应的新父列表,未建新父为空值
ls_map_parent = [commit_dict[i].get('new_commit_hash')
for i in val['ls_old_parent']]
# 找出父亲已创建而自身没创建的节点
if all(ls_map_parent) and not val.get('new_commit_hash'):
wanna = True # 要更
ls_new_parent = cast(List[str], ls_map_parent) # 确定列表无空
# 一个提交依赖于“树节点、用户信息、父提交节点”,由此判断是否需要更新
if is_commit_not_changed(val, ls_new_parent):
print(f'保留提交对应{old_commit_hash}……', flush=True)
new_commit_hash = old_commit_hash
else:
print(f'创建提交对应{old_commit_hash}……', flush=True)
cmd = ' '.join(['git commit-tree',
*[f'-p {i}' for i in ls_new_parent],
val['new_tree_hash']])
out = subprocess.check_output(
cmd, env=mk_commit_env_vars(val),
input=val['old_commit_comment'])
# print(type(out), out)
new_commit_hash = out.decode().strip()
# 更新提交
val['new_commit_hash'] = new_commit_hash
# 提交有变时,更新相应分支和标签
if new_commit_hash != old_commit_hash:
update_branch(new_commit_hash, val['ls_branch'])
update_tag(new_commit_hash, val['ls_tag'])
# 回调函数,用户可在其中对特定内容进行修改
def user_callback(ref: EditInfo) -> None:
if 1:
# 将作者时间和提交时间往前推360天
ad = datetime.fromisoformat(ref['author_date'])
cd = datetime.fromisoformat(ref['committer_date'])
ad -= timedelta(days=360)
cd -= timedelta(days=360)
ref['author_date'] = ad.isoformat()
ref['committer_date'] = cd.isoformat()
pass
# 主函数
def main():
# 获取待处理提交后生成字典
ls_commit_hash = git_hash_list('git rev-list --all')
commit_dict = read_commit_via_list(ls_commit_hash)
# 待用户对字典信息进行处理
user_update(commit_dict)
# 重建所有
rebuild_all(commit_dict)
if __name__ == '__main__':
main()