Refactor (in support of testing) (#2)

Reviewed-on: #2
Co-authored-by: Jack Jackson <scubbojj@gmail.com>
Co-committed-by: Jack Jackson <scubbojj@gmail.com>
This commit is contained in:
Jack Jackson 2025-03-04 01:21:21 +00:00 committed by scubbo
parent 892a62f4e5
commit a97fefc99d
10 changed files with 367 additions and 338 deletions

View File

@ -11,3 +11,9 @@
- [ ] Tests!
- [ ] ...
- [ ] Profit?
- [ ] Migrate into Gitea's own [Issue Tracker](https://gitea.scubbo.org/scubbo/commit-report-sync/issues)
- [ ] Use a more fully-featured logging system than `console.log` (at least with different logging levels!)
# Done
- [X] Remove `parentHashes`, never ended up being needed

330
dist/index.js vendored
View File

@ -25641,6 +25641,156 @@ module.exports = {
}
/***/ }),
/***/ 1243:
/***/ ((__unused_webpack_module, exports, __nccwpck_require__) => {
"use strict";
// Abstract-away Git interactions, so they can be mocked in tests
Object.defineProperty(exports, "__esModule", ({ value: true }));
exports.gitClone = gitClone;
exports.getNLatestCommits = getNLatestCommits;
exports.getCommitsSinceLatestBeforeGivenDate = getCommitsSinceLatestBeforeGivenDate;
exports.insertRepresentativeCommit = insertRepresentativeCommit;
exports.gitPush = gitPush;
const child_process_1 = __nccwpck_require__(5317);
const fs_1 = __nccwpck_require__(9896);
const date_fns_1 = __nccwpck_require__(4367);
function gitClone(dir, url) {
(0, child_process_1.execSync)(`git clone ${url} ${dir}`, { cwd: dir });
}
function getNLatestCommits(dir, n) {
const logOutput = (0, child_process_1.execSync)(
// If you want to copy this formatting for debugging, it's:
//
// --pretty=format:'{"hash":"%h","author_name":"%an","author_email":"%ae","date":"%ai","message":"%s"}'
//
// TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers
// of string-parsing to consider here, I wouldn't want to bet without testing!
//eslint-disable-next-line no-useless-escape
`git log --max-count=${n} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, { cwd: dir });
const logLines = logOutput.toString().split('\n');
return logLines.map(commitLine => {
// https://gist.github.com/textarcana/1306223
const parsed = JSON.parse(commitLine);
return {
hash: parsed.hash,
author_name: parsed.author_name,
author_email: parsed.author_email,
repo_path: dir,
date: parsed.date,
message: parsed.message
};
});
}
function getCommitsSinceLatestBeforeGivenDate(dir, date) {
try {
const countingLogOutput = (0, child_process_1.execSync)(`git log --since=${date.toISOString()} --pretty=oneline`, { cwd: dir });
const countedNumber = countingLogOutput.toString().split('\n').length;
console.log(`DEBUG - countedNumber (how many commits in target repo since oldest source commit) is: ${countedNumber}`);
// TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers
// of string-parsing to consider here, I wouldn't want to bet without testing!
const logOutput = (0, child_process_1.execSync)(
//eslint-disable-next-line no-useless-escape
`git log --max-count=${countedNumber + 1} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, { cwd: dir });
const logLines = logOutput.toString().split('\n');
return logLines.map(commitLine => {
const parsed = JSON.parse(commitLine);
return {
hash: parsed.hash,
author_name: parsed.author_name,
author_email: parsed.author_email,
repo_path: dir,
date: parsed.date,
message: parsed.message
};
});
}
catch (e) {
const error = e;
// No commits in the target repo - return an empty array, which will result in the first representative commit
// being made as the first commit. And then we can iterate as normal (recalling that the target history is
// refreshed _from local repo_ - incurring no network charges) from there on.
const errorOutputAsString = '' + error.output[2];
if (!errorOutputAsString.includes('does not have any commits yet')) {
console.log(`Unexpected error: ${errorOutputAsString}`);
throw Error(`Unexpected error while building target commit history`, {
cause: error
});
}
// Fresh target repo - just write into it (by returning an empty array of target commits as target history)
// (i.e. doing nothing)
return [];
}
}
function insertRepresentativeCommit(dir, sourceRepo, sourceCommit, targetCommit, followOnTargetCommit) {
// If there is a target commit,
if (targetCommit != undefined) {
(0, child_process_1.execSync)(`git reset --hard ${targetCommit.hash}`, {
cwd: dir
});
}
createRepresentativeCommit(dir, sourceRepo, sourceCommit);
// Then, if there is a follow-on target commit, we need to cherry-pick it onto the source commit:
if (followOnTargetCommit != undefined) {
(0, child_process_1.execSync)(`git cherry-pick ${followOnTargetCommit.hash}`, {
cwd: dir
});
} // else - nothing to cherry-pick back on top
}
function gitPush(dir, tokenForTargetRepo, targetRepoId) {
// Note that it must be a `-f`, because we are literally rewriting history.
(0, child_process_1.execSync)(`git push -f https://unused-username:${tokenForTargetRepo}@${targetRepoId.domain}/${targetRepoId.owner}/${targetRepoId.name}`, {
cwd: dir
});
// TODO - it'd be nice - before this `git push` is probably best - to add a `README.md` comment acknowledging
// the sync
}
function createRepresentativeCommit(dir, sourceRepo, sourceCommit) {
// Create a commit that represents the source commit, but with a filename that is generated from the source commit's
// metadata.
//
// This is guaranteed to not cause conflicts with other commits, because the filename is generated from the source
// commit's metadata, and no two source commits will have the same metadata.
// (OK sure _technically_ these could have a collision, but...like...what are the odds?)
// TODO - figure out what the odds actually are, that'd be fun :P
const filename = `${sourceRepo.owner}/${sourceRepo.name}/${sourceCommit.hash}`;
(0, fs_1.mkdirSync)(dir + '/' + sourceRepo.owner + '/' + sourceRepo.name, { recursive: true });
(0, child_process_1.execSync)(`touch ${filename}`, {
cwd: dir
});
(0, child_process_1.execSync)(`git add ${filename}`, {
cwd: dir
});
// Seems like setting `--author` on `git commit` is not sufficient - still need to set `user` as well (I guess those
// are the difference between `comitted by` and `written by`?)
// Confirmed by following the instructions [here](https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-github-profile/managing-contribution-settings-on-your-profile/why-are-my-contributions-not-showing-up-on-my-profile#your-local-git-commit-email-isnt-connected-to-your-account)
// to check the "made by" address, and confirming that it did not match the email set in the `--author` flag.
// Note that, contrary to advice given by the CLI, this does not use the global config, but the local one - because,
// otherwise, if this was run locally, it would mess up the host system's config.
(0, child_process_1.execSync)(`git config user.email "${sourceCommit.author_email}"`, { cwd: dir });
(0, child_process_1.execSync)(`git config user.name "${sourceCommit.author_name}"`, { cwd: dir });
try {
// Do _not_ arbitrarily remove the `hash` - it's used for signalling identity in `main()`
const args = `"${sourceRepo.owner}/${sourceRepo.name}: ${sourceCommit.message} - ${sourceCommit.hash}" --date="${(0, date_fns_1.format)(sourceCommit.date, 'yyyy-MM-dd HH:mm:ss')}" --author="${sourceCommit.author_name} <${sourceCommit.author_email}>"`;
console.log(`About to commit with args ${args}`);
// https://github.com/Shpota/github-activity-generator/blob/main/contribute.py#L63
// "%Y-%m-%d %H:%M:%S"
(0, child_process_1.execSync)(`git commit -m ${args}`, {
cwd: dir
});
}
catch (e) {
console.log(e);
const error = e;
console.log(`DEBUG - error while creating representative commit: ${'' + error.output[2]} ... ${'' + error.output[1]}`);
throw e;
}
}
/***/ }),
/***/ 9407:
@ -25729,38 +25879,14 @@ run();
Object.defineProperty(exports, "__esModule", ({ value: true }));
exports.main = main;
exports.buildSourceCommitHistory = buildSourceCommitHistory;
exports.buildTargetCommitHistory = buildTargetCommitHistory;
const types_1 = __nccwpck_require__(8522);
const child_process_1 = __nccwpck_require__(5317);
const fs_1 = __nccwpck_require__(9896);
const date_fns_1 = __nccwpck_require__(4367);
const git_1 = __nccwpck_require__(1243);
const WORKING_DIR = './working';
const SOURCE_DIR = WORKING_DIR + '/source';
const TARGET_DIR = WORKING_DIR + '/target';
async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) {
// It _shouldn't_ ever exist, but it if did, something weird is going on.
if ((0, fs_1.existsSync)(WORKING_DIR) || (0, fs_1.existsSync)(SOURCE_DIR) || (0, fs_1.existsSync)(TARGET_DIR)) {
throw new Error('Working directory already exists/populated');
}
if (tokenForTargetRepo == '') {
throw new Error('token_for_target_repo is required');
}
(0, fs_1.mkdirSync)(WORKING_DIR);
(0, fs_1.mkdirSync)(SOURCE_DIR);
(0, fs_1.mkdirSync)(TARGET_DIR);
console.log(`DEBUG - sourceRepoPath: ${(0, types_1.repoString)(sourceRepo)}`);
console.log(`DEBUG - targetRepoPath: ${(0, types_1.repoString)(targetRepo)}`);
// TODO - allow parameterizing how far back in history to checkout (because it might take a long time for older
// repos and, once synced initially, it won't have to go back further than a single one in most cases)
const sourceRepoCloneCommand = `git clone https://${(0, types_1.repoString)(sourceRepo)} ${SOURCE_DIR}`;
console.log(`DEBUG - sourceRepoCloneCommand: ${sourceRepoCloneCommand}`);
(0, child_process_1.execSync)(sourceRepoCloneCommand);
(0, child_process_1.execSync)(`git clone https://${(0, types_1.repoString)(targetRepo)} ${TARGET_DIR}`);
// Seems like setting `--author` on `git commit` is not sufficient - still need to set `user` as well (I guess those
// are the difference between `comitted by` and `written by`?)
(0, child_process_1.execSync)(`git config --global user.email "commit-report-sync-bot@scubbo.org"`, { cwd: TARGET_DIR });
(0, child_process_1.execSync)(`git config --global user.name "Commit Report Sync Bot"`, { cwd: TARGET_DIR });
async function main(sourceRepoId, targetRepoId, dryRun, tokenForTargetRepo) {
setPreconditions(tokenForTargetRepo, sourceRepoId, targetRepoId);
// Logic:
// * Go back as far in source commit history as the given number of commits
// * For each commit, check if it is recorded in the target repo
@ -25781,7 +25907,7 @@ async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) {
// only been alive so many years - there's a hard limit on the rate of code I could possibly have generated, which
// is small compared to, y'know, _companies_. And I don't see organizations of that size caring about GitHub
// contribution history at whole-org scale - and if they do, it'd be proportionally simple for them to implement it.
const sourceCommitHistory = buildSourceCommitHistory(SOURCE_DIR, 10);
const sourceCommitHistory = (0, git_1.getNLatestCommits)(SOURCE_DIR, 10);
// Calling `doSomethingTo(sourceCommitHistory.reverse()); doSomethingElseTo(sourceCommitHistory.reverse());` results
// in the second invocation receiving the double-reversed array.
const reversedSourceCommitHistory = sourceCommitHistory.reverse();
@ -25793,11 +25919,12 @@ async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) {
// than abandoning the target tree after the insertion point and trusting in later operation to rebuild it - because
// the target repo's tree will have representations of commits from _other_ (source)repos too, which we cannot
// recreate without their context)
let targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, reversedSourceCommitHistory[0].date);
let targetCommitHistory = (0, git_1.getCommitsSinceLatestBeforeGivenDate)(TARGET_DIR, reversedSourceCommitHistory[0].date);
for (const sourceCommit of reversedSourceCommitHistory) {
// "(Index of) First Target Commit that is earlier than (or equal to) the source commit"
const targetCommitIndex = targetCommitHistory.findIndex(c => c.date <= sourceCommit.date);
console.log(`DEBUG - targetCommitIndex: ${targetCommitIndex}. targetCommitHistory: ${JSON.stringify(targetCommitHistory)}`);
// TODO - refactor this to use guard clauses more than nested-ifs
if (targetCommitIndex != -1) {
const targetCommit = targetCommitHistory[targetCommitIndex];
// If the target commit is a representation of the source commit, we can skip it
@ -25819,7 +25946,7 @@ async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) {
else {
followOnTargetCommit = targetCommitHistory[targetCommitIndex - 1];
}
insertRepresentativeCommit(sourceRepo, sourceCommit, targetCommit, followOnTargetCommit);
(0, git_1.insertRepresentativeCommit)(TARGET_DIR, sourceRepoId, sourceCommit, targetCommit, followOnTargetCommit);
// And then regenerate the target commit history
// Thankfully, we only need to do this back to immediately preceding the _just processed_ source
// commit (since we know that all the rest of the source commits to be processed will be after it),
@ -25828,147 +25955,42 @@ async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) {
// compared), albeit approximately-halved - but I'm gambling on the fact that that should still take
// negligible practical time at usual repo sizes - at least, the ones I'm
// this quadratic portion should be negligible, though - and, even if it isn't, it definitely will
targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, sourceCommit.date);
targetCommitHistory = (0, git_1.getCommitsSinceLatestBeforeGivenDate)(TARGET_DIR, sourceCommit.date);
}
}
console.log(`DEBUG - targetCommit: ${targetCommit.hash}`);
}
else {
console.log(`DEBUG - could not find a targetCommit that is earlier than or equal to the sourceCommit ${sourceCommit.hash} - therefore, writing the source commit's representation onto the current HEAD of target repo`);
insertRepresentativeCommit(sourceRepo, sourceCommit, undefined, undefined);
(0, git_1.insertRepresentativeCommit)(TARGET_DIR, sourceRepoId, sourceCommit, undefined, undefined);
// As above, have to regenerate history after mutation
targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, sourceCommit.date);
targetCommitHistory = (0, git_1.getCommitsSinceLatestBeforeGivenDate)(TARGET_DIR, sourceCommit.date);
}
}
// OK, that's it - we've processed all the source commits, and we've inserted all the necessary target commits.
// We can just `git push` to the target repo now.
//
// Note that it must be a `-f`, because we are literally rewriting history.
if (!dryRun) {
(0, child_process_1.execSync)(`git push -f https://unused-username:${tokenForTargetRepo}@${targetRepo.domain}/${targetRepo.owner}/${targetRepo.name}`, {
cwd: TARGET_DIR
});
// TODO - it'd be nice - before this `git push` is probably best - to add a `README.md` comment acknowledging
// the sync
(0, git_1.gitPush)(TARGET_DIR, tokenForTargetRepo, targetRepoId);
}
return;
}
function buildSourceCommitHistory(path, numCommits) {
console.log(`DEBUG - building source commit history for ${path} with max count ${numCommits}`);
const output = [];
const logOutput = (0, child_process_1.execSync)(
// If you want to copy this formatting for debugging, it's:
//
// --pretty=format:'{"hash":"%h","author_name":"%an","author_email":"%ae","date":"%ai","message":"%s"}'
//
// TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers
// of string-parsing to consider here, I wouldn't want to bet without testing!
//eslint-disable-next-line no-useless-escape
`git log --max-count=${numCommits} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, { cwd: path });
const logLines = logOutput.toString().split('\n');
for (const line of logLines) {
const commit = parseCommit(path, line);
output.push(commit);
function setPreconditions(tokenForTargetRepo, sourceRepoId, targetRepoId) {
// It _shouldn't_ ever exist, but it if did, something weird is going on.
if ((0, fs_1.existsSync)(WORKING_DIR) || (0, fs_1.existsSync)(SOURCE_DIR) || (0, fs_1.existsSync)(TARGET_DIR)) {
throw new Error('Working directory already exists/populated');
}
return output;
}
function buildTargetCommitHistory(path, oldestDateInSourceCommitHistory) {
console.log(`DEBUG - building target commit history for ${path} with oldest date ${oldestDateInSourceCommitHistory.toISOString()}`);
const output = [];
try {
const countingLogOutput = (0, child_process_1.execSync)(`git log --since=${oldestDateInSourceCommitHistory.toISOString()} --pretty=oneline`, { cwd: path });
const countedNumber = countingLogOutput.toString().split('\n').length;
console.log(`DEBUG - countedNumber (how many commits in target repo since oldest source commit) is: ${countedNumber}`);
// TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers
// of string-parsing to consider here, I wouldn't want to bet without testing!
const logOutput = (0, child_process_1.execSync)(
//eslint-disable-next-line no-useless-escape
`git log --max-count=${countedNumber + 1} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, { cwd: path });
const logLines = logOutput.toString().split('\n');
for (const line of logLines) {
const commit = parseCommit(path, line);
output.push(commit);
}
}
catch (e) {
const error = e;
// Now you can safely access properties
// No commits in the target repo - return an empty array, which will result in the first representative commit
// being made as the first commit. And then we can iterate as normal (recalling that the target history is
// refreshed _from local repo_ - incurring no network charges) from there on.
const errorOutputAsString = '' + error.output[2];
if (!errorOutputAsString.includes('does not have any commits yet')) {
console.log(`Unexpected error: ${errorOutputAsString}`);
throw Error(`Unexpected error while building target commit history`, {
cause: error
});
}
// Fresh target repo - just write into it (by returning an empty array of target commits as target history)
// (i.e. doing nothing)
}
console.log(`As final output of buildTargetCommitHistory, preceding ${oldestDateInSourceCommitHistory.toISOString()}, output is ${JSON.stringify(output)}`);
return output;
}
// https://gist.github.com/textarcana/1306223
function parseCommit(repo_path, line) {
console.log(`DEBUG - line: ${line}, for path ${repo_path}`);
const parsed = JSON.parse(line);
return {
hash: parsed['hash'],
author_name: parsed['author_name'],
author_email: parsed['author_email'],
repo_path: repo_path,
date: new Date(parsed['date']),
message: parsed['message'],
};
}
function insertRepresentativeCommit(sourceRepo, sourceCommit, targetCommit, followOnTargetCommit) {
// If there is a target commit,
if (targetCommit != undefined) {
(0, child_process_1.execSync)(`git reset --hard ${targetCommit.hash}`, {
cwd: TARGET_DIR
});
}
createRepresentativeCommit(sourceRepo, sourceCommit);
// Then, if there is a follow-on target commit, we need to cherry-pick it onto the source commit:
if (followOnTargetCommit != undefined) {
(0, child_process_1.execSync)(`git cherry-pick ${followOnTargetCommit.hash}`, {
cwd: TARGET_DIR
});
} // else - nothing to cherry-pick back on top
}
function createRepresentativeCommit(sourceRepo, sourceCommit) {
// Create a commit that represents the source commit, but with a filename that is generated from the source commit's
// metadata.
//
// This is guaranteed to not cause conflicts with other commits, because the filename is generated from the source
// commit's metadata, and no two source commits will have the same metadata.
// (OK sure _technically_ these could have a collision, but...like...what are the odds?)
// TODO - figure out what the odds actually are, that'd be fun :P
const filename = `${sourceRepo.owner}/${sourceRepo.name}/${sourceCommit.hash}`;
(0, fs_1.mkdirSync)(TARGET_DIR + '/' + sourceRepo.owner + '/' + sourceRepo.name, { recursive: true });
(0, child_process_1.execSync)(`touch ${filename}`, {
cwd: TARGET_DIR
});
(0, child_process_1.execSync)(`git add ${filename}`, {
cwd: TARGET_DIR
});
try {
// Do _not_ arbitrarily remove the `hash` - it's used for signalling identity in `main()`
const args = `"${sourceRepo.owner}/${sourceRepo.name}: ${sourceCommit.message} - ${sourceCommit.hash}" --date="${(0, date_fns_1.format)(sourceCommit.date, 'yyyy-MM-dd HH:mm:ss')}" --author="${sourceCommit.author_name} <${sourceCommit.author_email}>"`;
console.log(`About to commit with args ${args}`);
// https://github.com/Shpota/github-activity-generator/blob/main/contribute.py#L63
// "%Y-%m-%d %H:%M:%S"
(0, child_process_1.execSync)(`git commit -m ${args}`, {
cwd: TARGET_DIR
});
}
catch (e) {
console.log(e);
const error = e;
console.log(`DEBUG - error while creating representative commit: ${'' + error.output[2]} ... ${'' + error.output[1]}`);
throw e;
if (tokenForTargetRepo == '') {
throw new Error('token_for_target_repo is required');
}
(0, fs_1.mkdirSync)(WORKING_DIR);
(0, fs_1.mkdirSync)(SOURCE_DIR);
(0, fs_1.mkdirSync)(TARGET_DIR);
console.log(`DEBUG - sourceRepoPath: ${(0, types_1.repoString)(sourceRepoId)}`);
console.log(`DEBUG - targetRepoPath: ${(0, types_1.repoString)(targetRepoId)}`);
// TODO - allow parameterizing how far back in history to checkout (because it might take a long time for older
// repos and, once synced initially, it won't have to go back further than a single one in most cases)
(0, git_1.gitClone)(SOURCE_DIR, `https://${(0, types_1.repoString)(sourceRepoId)}`);
(0, git_1.gitClone)(TARGET_DIR, `https://${(0, types_1.repoString)(targetRepoId)}`);
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

151
src/git.ts Normal file
View File

@ -0,0 +1,151 @@
// Abstract-away Git interactions, so they can be mocked in tests
import { execSync } from "child_process";
import { Commit, ExecSyncError, RepoId } from "./types";
import { mkdirSync } from "fs";
import { format } from 'date-fns';
export function gitClone(dir: string, url: string) {
execSync(`git clone ${url} ${dir}`, { cwd: dir });
}
export function getNLatestCommits(dir: string, n: number): Commit[] {
const logOutput = execSync(
// If you want to copy this formatting for debugging, it's:
//
// --pretty=format:'{"hash":"%h","author_name":"%an","author_email":"%ae","date":"%ai","message":"%s"}'
//
// TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers
// of string-parsing to consider here, I wouldn't want to bet without testing!
//eslint-disable-next-line no-useless-escape
`git log --max-count=${n} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`,
{ cwd: dir }
);
const logLines = logOutput.toString().split('\n')
return logLines.map(commitLine => {
// https://gist.github.com/textarcana/1306223
const parsed = JSON.parse(commitLine)
return {
hash: parsed.hash,
author_name: parsed.author_name,
author_email: parsed.author_email,
repo_path: dir,
date: parsed.date,
message: parsed.message
}
});
}
export function getCommitsSinceLatestBeforeGivenDate(dir: string, date: Date): Commit[] {
try {
const countingLogOutput = execSync(
`git log --since=${date.toISOString()} --pretty=oneline`,
{ cwd: dir }
);
const countedNumber = countingLogOutput.toString().split('\n').length;
console.log(`DEBUG - countedNumber (how many commits in target repo since oldest source commit) is: ${countedNumber}`);
// TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers
// of string-parsing to consider here, I wouldn't want to bet without testing!
const logOutput = execSync(
//eslint-disable-next-line no-useless-escape
`git log --max-count=${countedNumber+1} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`,
{ cwd: dir }
);
const logLines = logOutput.toString().split('\n');
return logLines.map(commitLine => {
const parsed = JSON.parse(commitLine)
return {
hash: parsed.hash,
author_name: parsed.author_name,
author_email: parsed.author_email,
repo_path: dir,
date: parsed.date,
message: parsed.message
}
});
} catch (e) {
const error = e as ExecSyncError
// No commits in the target repo - return an empty array, which will result in the first representative commit
// being made as the first commit. And then we can iterate as normal (recalling that the target history is
// refreshed _from local repo_ - incurring no network charges) from there on.
const errorOutputAsString = '' + error.output[2]
if (!errorOutputAsString.includes('does not have any commits yet')) {
console.log(`Unexpected error: ${errorOutputAsString}`);
throw Error(`Unexpected error while building target commit history`, {
cause: error
})
}
// Fresh target repo - just write into it (by returning an empty array of target commits as target history)
// (i.e. doing nothing)
return [];
}
}
export function insertRepresentativeCommit(dir: string,sourceRepo: RepoId, sourceCommit: Commit, targetCommit: Commit | undefined, followOnTargetCommit: Commit | undefined): void {
// If there is a target commit,
if (targetCommit != undefined) {
execSync(`git reset --hard ${targetCommit.hash}`, {
cwd: dir
})
}
createRepresentativeCommit(dir,sourceRepo, sourceCommit);
// Then, if there is a follow-on target commit, we need to cherry-pick it onto the source commit:
if (followOnTargetCommit != undefined) {
execSync(`git cherry-pick ${followOnTargetCommit.hash}`, {
cwd: dir
})
}// else - nothing to cherry-pick back on top
}
export function gitPush(dir: string, tokenForTargetRepo: string, targetRepoId: RepoId) {
// Note that it must be a `-f`, because we are literally rewriting history.
execSync(`git push -f https://unused-username:${tokenForTargetRepo}@${targetRepoId.domain}/${targetRepoId.owner}/${targetRepoId.name}`, {
cwd: dir
})
// TODO - it'd be nice - before this `git push` is probably best - to add a `README.md` comment acknowledging
// the sync
}
function createRepresentativeCommit(dir: string, sourceRepo: RepoId,sourceCommit: Commit) {
// Create a commit that represents the source commit, but with a filename that is generated from the source commit's
// metadata.
//
// This is guaranteed to not cause conflicts with other commits, because the filename is generated from the source
// commit's metadata, and no two source commits will have the same metadata.
// (OK sure _technically_ these could have a collision, but...like...what are the odds?)
// TODO - figure out what the odds actually are, that'd be fun :P
const filename = `${sourceRepo.owner}/${sourceRepo.name}/${sourceCommit.hash}`
mkdirSync(dir + '/' + sourceRepo.owner + '/' + sourceRepo.name, { recursive: true });
execSync(`touch ${filename}`, {
cwd: dir
})
execSync(`git add ${filename}`, {
cwd: dir
})
// Seems like setting `--author` on `git commit` is not sufficient - still need to set `user` as well (I guess those
// are the difference between `comitted by` and `written by`?)
// Confirmed by following the instructions [here](https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-github-profile/managing-contribution-settings-on-your-profile/why-are-my-contributions-not-showing-up-on-my-profile#your-local-git-commit-email-isnt-connected-to-your-account)
// to check the "made by" address, and confirming that it did not match the email set in the `--author` flag.
// Note that, contrary to advice given by the CLI, this does not use the global config, but the local one - because,
// otherwise, if this was run locally, it would mess up the host system's config.
execSync(`git config user.email "${sourceCommit.author_email}"`, { cwd: dir });
execSync(`git config user.name "${sourceCommit.author_name}"`, { cwd: dir });
try {
// Do _not_ arbitrarily remove the `hash` - it's used for signalling identity in `main()`
const args = `"${sourceRepo.owner}/${sourceRepo.name}: ${sourceCommit.message} - ${sourceCommit.hash}" --date="${format(sourceCommit.date, 'yyyy-MM-dd HH:mm:ss')}" --author="${sourceCommit.author_name} <${sourceCommit.author_email}>"`;
console.log(`About to commit with args ${args}`);
// https://github.com/Shpota/github-activity-generator/blob/main/contribute.py#L63
// "%Y-%m-%d %H:%M:%S"
execSync(`git commit -m ${args}`, {
cwd: dir
})
} catch (e) {
console.log(e);
const error = e as ExecSyncError;
console.log(`DEBUG - error while creating representative commit: ${'' + error.output[2]} ... ${'' + error.output[1]}`);
throw e;
}
}

View File

@ -1,37 +1,13 @@
import { Repo, repoString, Commit, ExecSyncError } from './types';
import { execSync } from 'child_process';
import { RepoId, repoString, Commit } from './types';
import { existsSync, mkdirSync } from 'fs';
import { format } from 'date-fns';
import { getCommitsSinceLatestBeforeGivenDate, getNLatestCommits, gitClone, gitPush, insertRepresentativeCommit } from './git';
const WORKING_DIR = './working';
const SOURCE_DIR = WORKING_DIR + '/source';
const TARGET_DIR = WORKING_DIR + '/target';
export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean, tokenForTargetRepo: string) {
// It _shouldn't_ ever exist, but it if did, something weird is going on.
if (existsSync(WORKING_DIR) || existsSync(SOURCE_DIR) || existsSync(TARGET_DIR)) {
throw new Error('Working directory already exists/populated');
}
if (tokenForTargetRepo == '') {
throw new Error('token_for_target_repo is required');
}
mkdirSync(WORKING_DIR);
mkdirSync(SOURCE_DIR);
mkdirSync(TARGET_DIR);
console.log(`DEBUG - sourceRepoPath: ${repoString(sourceRepo)}`)
console.log(`DEBUG - targetRepoPath: ${repoString(targetRepo)}`)
// TODO - allow parameterizing how far back in history to checkout (because it might take a long time for older
// repos and, once synced initially, it won't have to go back further than a single one in most cases)
const sourceRepoCloneCommand = `git clone https://${repoString(sourceRepo)} ${SOURCE_DIR}`
console.log(`DEBUG - sourceRepoCloneCommand: ${sourceRepoCloneCommand}`);
execSync(sourceRepoCloneCommand);
execSync(`git clone https://${repoString(targetRepo)} ${TARGET_DIR}`);
export async function main(sourceRepoId: RepoId, targetRepoId: RepoId, dryRun: boolean, tokenForTargetRepo: string) {
setPreconditions(tokenForTargetRepo, sourceRepoId, targetRepoId);
// Logic:
// * Go back as far in source commit history as the given number of commits
@ -53,7 +29,7 @@ export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean,
// only been alive so many years - there's a hard limit on the rate of code I could possibly have generated, which
// is small compared to, y'know, _companies_. And I don't see organizations of that size caring about GitHub
// contribution history at whole-org scale - and if they do, it'd be proportionally simple for them to implement it.
const sourceCommitHistory = buildSourceCommitHistory(SOURCE_DIR, 10);
const sourceCommitHistory = getNLatestCommits(SOURCE_DIR, 10);
// Calling `doSomethingTo(sourceCommitHistory.reverse()); doSomethingElseTo(sourceCommitHistory.reverse());` results
// in the second invocation receiving the double-reversed array.
@ -66,12 +42,13 @@ export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean,
// than abandoning the target tree after the insertion point and trusting in later operation to rebuild it - because
// the target repo's tree will have representations of commits from _other_ (source)repos too, which we cannot
// recreate without their context)
let targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, reversedSourceCommitHistory[0].date);
let targetCommitHistory = getCommitsSinceLatestBeforeGivenDate(TARGET_DIR, reversedSourceCommitHistory[0].date);
for (const sourceCommit of reversedSourceCommitHistory) {
// "(Index of) First Target Commit that is earlier than (or equal to) the source commit"
const targetCommitIndex = targetCommitHistory.findIndex(c => c.date <= sourceCommit.date);
console.log(`DEBUG - targetCommitIndex: ${targetCommitIndex}. targetCommitHistory: ${JSON.stringify(targetCommitHistory)}`);
// TODO - refactor this to use guard clauses more than nested-ifs
if (targetCommitIndex != -1) {
const targetCommit = targetCommitHistory[targetCommitIndex];
// If the target commit is a representation of the source commit, we can skip it
@ -90,7 +67,7 @@ export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean,
} else {
followOnTargetCommit = targetCommitHistory[targetCommitIndex - 1];
}
insertRepresentativeCommit(sourceRepo, sourceCommit, targetCommit, followOnTargetCommit);
insertRepresentativeCommit(TARGET_DIR,sourceRepoId, sourceCommit, targetCommit, followOnTargetCommit);
// And then regenerate the target commit history
// Thankfully, we only need to do this back to immediately preceding the _just processed_ source
// commit (since we know that all the rest of the source commits to be processed will be after it),
@ -99,171 +76,44 @@ export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean,
// compared), albeit approximately-halved - but I'm gambling on the fact that that should still take
// negligible practical time at usual repo sizes - at least, the ones I'm
// this quadratic portion should be negligible, though - and, even if it isn't, it definitely will
targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, sourceCommit.date);
targetCommitHistory = getCommitsSinceLatestBeforeGivenDate(TARGET_DIR, sourceCommit.date);
}
}
console.log(`DEBUG - targetCommit: ${targetCommit.hash}`);
} else {
console.log(`DEBUG - could not find a targetCommit that is earlier than or equal to the sourceCommit ${sourceCommit.hash} - therefore, writing the source commit's representation onto the current HEAD of target repo`);
insertRepresentativeCommit(sourceRepo, sourceCommit, undefined, undefined);
insertRepresentativeCommit(TARGET_DIR, sourceRepoId, sourceCommit, undefined, undefined);
// As above, have to regenerate history after mutation
targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, sourceCommit.date);
targetCommitHistory = getCommitsSinceLatestBeforeGivenDate(TARGET_DIR, sourceCommit.date);
}
}
// OK, that's it - we've processed all the source commits, and we've inserted all the necessary target commits.
// We can just `git push` to the target repo now.
//
// Note that it must be a `-f`, because we are literally rewriting history.
if (!dryRun) {
execSync(`git push -f https://unused-username:${tokenForTargetRepo}@${targetRepo.domain}/${targetRepo.owner}/${targetRepo.name}`, {
cwd: TARGET_DIR
})
// TODO - it'd be nice - before this `git push` is probably best - to add a `README.md` comment acknowledging
// the sync
gitPush(TARGET_DIR, tokenForTargetRepo, targetRepoId);
}
return
}
export function buildSourceCommitHistory(path: string, numCommits: number): Commit[] {
console.log(`DEBUG - building source commit history for ${path} with max count ${numCommits}`);
const output: Commit[] = [];
const logOutput = execSync(
// If you want to copy this formatting for debugging, it's:
//
// --pretty=format:'{"hash":"%h","author_name":"%an","author_email":"%ae","date":"%ai","message":"%s"}'
//
// TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers
// of string-parsing to consider here, I wouldn't want to bet without testing!
//eslint-disable-next-line no-useless-escape
`git log --max-count=${numCommits} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`,
{ cwd: path }
);
const logLines = logOutput.toString().split('\n');
for (const line of logLines) {
const commit = parseCommit(path, line);
output.push(commit);
function setPreconditions(tokenForTargetRepo: string, sourceRepoId: RepoId, targetRepoId: RepoId) {
// It _shouldn't_ ever exist, but it if did, something weird is going on.
if (existsSync(WORKING_DIR) || existsSync(SOURCE_DIR) || existsSync(TARGET_DIR)) {
throw new Error('Working directory already exists/populated');
}
return output;
}
export function buildTargetCommitHistory(path: string, oldestDateInSourceCommitHistory: Date): Commit[] {
console.log(`DEBUG - building target commit history for ${path} with oldest date ${oldestDateInSourceCommitHistory.toISOString()}`);
const output: Commit[] = [];
try {
const countingLogOutput = execSync(
`git log --since=${oldestDateInSourceCommitHistory.toISOString()} --pretty=oneline`,
{ cwd: path }
);
const countedNumber = countingLogOutput.toString().split('\n').length;
console.log(`DEBUG - countedNumber (how many commits in target repo since oldest source commit) is: ${countedNumber}`);
// TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers
// of string-parsing to consider here, I wouldn't want to bet without testing!
const logOutput = execSync(
//eslint-disable-next-line no-useless-escape
`git log --max-count=${countedNumber+1} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`,
{ cwd: path }
);
const logLines = logOutput.toString().split('\n');
for (const line of logLines) {
const commit = parseCommit(path, line);
output.push(commit);
}
} catch (e) {
const error = e as ExecSyncError
// Now you can safely access properties
// No commits in the target repo - return an empty array, which will result in the first representative commit
// being made as the first commit. And then we can iterate as normal (recalling that the target history is
// refreshed _from local repo_ - incurring no network charges) from there on.
const errorOutputAsString = '' + error.output[2]
if (!errorOutputAsString.includes('does not have any commits yet')) {
console.log(`Unexpected error: ${errorOutputAsString}`);
throw Error(`Unexpected error while building target commit history`, {
cause: error
})
}
// Fresh target repo - just write into it (by returning an empty array of target commits as target history)
// (i.e. doing nothing)
}
console.log(`As final output of buildTargetCommitHistory, preceding ${oldestDateInSourceCommitHistory.toISOString()}, output is ${JSON.stringify(output)}`);
return output;
}
// https://gist.github.com/textarcana/1306223
function parseCommit(repo_path: string, line: string): Commit {
console.log(`DEBUG - line: ${line}, for path ${repo_path}`);
const parsed = JSON.parse(line)
return {
hash: parsed['hash'],
author_name: parsed['author_name'],
author_email: parsed['author_email'],
repo_path: repo_path,
date: new Date(parsed['date']),
message: parsed['message'],
}
}
function insertRepresentativeCommit(sourceRepo: Repo,sourceCommit: Commit, targetCommit: Commit | undefined, followOnTargetCommit: Commit | undefined): void {
// If there is a target commit,
if (targetCommit != undefined) {
execSync(`git reset --hard ${targetCommit.hash}`, {
cwd: TARGET_DIR
})
if (tokenForTargetRepo == '') {
throw new Error('token_for_target_repo is required');
}
createRepresentativeCommit(sourceRepo, sourceCommit);
// Then, if there is a follow-on target commit, we need to cherry-pick it onto the source commit:
if (followOnTargetCommit != undefined) {
execSync(`git cherry-pick ${followOnTargetCommit.hash}`, {
cwd: TARGET_DIR
})
}// else - nothing to cherry-pick back on top
}
function createRepresentativeCommit(sourceRepo: Repo,sourceCommit: Commit) {
// Create a commit that represents the source commit, but with a filename that is generated from the source commit's
// metadata.
//
// This is guaranteed to not cause conflicts with other commits, because the filename is generated from the source
// commit's metadata, and no two source commits will have the same metadata.
// (OK sure _technically_ these could have a collision, but...like...what are the odds?)
// TODO - figure out what the odds actually are, that'd be fun :P
const filename = `${sourceRepo.owner}/${sourceRepo.name}/${sourceCommit.hash}`
mkdirSync(TARGET_DIR + '/' + sourceRepo.owner + '/' + sourceRepo.name, { recursive: true });
execSync(`touch ${filename}`, {
cwd: TARGET_DIR
})
execSync(`git add ${filename}`, {
cwd: TARGET_DIR
})
// Seems like setting `--author` on `git commit` is not sufficient - still need to set `user` as well (I guess those
// are the difference between `comitted by` and `written by`?)
// Confirmed by following the instructions [here](https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-github-profile/managing-contribution-settings-on-your-profile/why-are-my-contributions-not-showing-up-on-my-profile#your-local-git-commit-email-isnt-connected-to-your-account)
// to check the "made by" address, and confirming that it did not match the email set in the `--author` flag.
// Note that, contrary to advice given by the CLI, this does not use the global config, but the local one - because,
// otherwise, if this was run locally, it would mess up the host system's config.
execSync(`git config user.email "${sourceCommit.author_email}"`, { cwd: TARGET_DIR });
execSync(`git config user.name "${sourceCommit.author_name}"`, { cwd: TARGET_DIR });
try {
// Do _not_ arbitrarily remove the `hash` - it's used for signalling identity in `main()`
const args = `"${sourceRepo.owner}/${sourceRepo.name}: ${sourceCommit.message} - ${sourceCommit.hash}" --date="${format(sourceCommit.date, 'yyyy-MM-dd HH:mm:ss')}" --author="${sourceCommit.author_name} <${sourceCommit.author_email}>"`;
console.log(`About to commit with args ${args}`);
// https://github.com/Shpota/github-activity-generator/blob/main/contribute.py#L63
// "%Y-%m-%d %H:%M:%S"
execSync(`git commit -m ${args}`, {
cwd: TARGET_DIR
})
} catch (e) {
console.log(e);
const error = e as ExecSyncError;
console.log(`DEBUG - error while creating representative commit: ${'' + error.output[2]} ... ${'' + error.output[1]}`);
throw e;
}
mkdirSync(WORKING_DIR);
mkdirSync(SOURCE_DIR);
mkdirSync(TARGET_DIR);
console.log(`DEBUG - sourceRepoPath: ${repoString(sourceRepoId)}`)
console.log(`DEBUG - targetRepoPath: ${repoString(targetRepoId)}`)
// TODO - allow parameterizing how far back in history to checkout (because it might take a long time for older
// repos and, once synced initially, it won't have to go back further than a single one in most cases)
gitClone(SOURCE_DIR, `https://${repoString(sourceRepoId)}`);
gitClone(TARGET_DIR, `https://${repoString(targetRepoId)}`);
}

View File

@ -1,11 +1,11 @@
// TODO - parametrize the scheme
export type Repo = {
export type RepoId = {
domain: string;
owner: string;
name: string;
}
export function repoString(repo: Repo): string {
export function repoString(repo: RepoId): string {
return `${repo.domain}/${repo.owner}/${repo.name}`;
}