diff --git a/TODO.txt b/TODO.txt index 0821a6f..bd22df8 100644 --- a/TODO.txt +++ b/TODO.txt @@ -10,4 +10,10 @@ - [ ] Blog about this ;) - [ ] Tests! - [ ] ... - - [ ] Profit? \ No newline at end of file + - [ ] Profit? +- [ ] Migrate into Gitea's own [Issue Tracker](https://gitea.scubbo.org/scubbo/commit-report-sync/issues) +- [ ] Use a more fully-featured logging system than `console.log` (at least with different logging levels!) + +# Done + +- [X] Remove `parentHashes`, never ended up being needed diff --git a/dist/index.js b/dist/index.js index 24a3b32..b3bb073 100644 --- a/dist/index.js +++ b/dist/index.js @@ -25641,6 +25641,156 @@ module.exports = { } +/***/ }), + +/***/ 1243: +/***/ ((__unused_webpack_module, exports, __nccwpck_require__) => { + +"use strict"; + +// Abstract-away Git interactions, so they can be mocked in tests +Object.defineProperty(exports, "__esModule", ({ value: true })); +exports.gitClone = gitClone; +exports.getNLatestCommits = getNLatestCommits; +exports.getCommitsSinceLatestBeforeGivenDate = getCommitsSinceLatestBeforeGivenDate; +exports.insertRepresentativeCommit = insertRepresentativeCommit; +exports.gitPush = gitPush; +const child_process_1 = __nccwpck_require__(5317); +const fs_1 = __nccwpck_require__(9896); +const date_fns_1 = __nccwpck_require__(4367); +function gitClone(dir, url) { + (0, child_process_1.execSync)(`git clone ${url} ${dir}`, { cwd: dir }); +} +function getNLatestCommits(dir, n) { + const logOutput = (0, child_process_1.execSync)( + // If you want to copy this formatting for debugging, it's: + // + // --pretty=format:'{"hash":"%h","author_name":"%an","author_email":"%ae","date":"%ai","message":"%s"}' + // + // TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers + // of string-parsing to consider here, I wouldn't want to bet without testing! + //eslint-disable-next-line no-useless-escape + `git log --max-count=${n} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, { cwd: dir }); + const logLines = logOutput.toString().split('\n'); + return logLines.map(commitLine => { + // https://gist.github.com/textarcana/1306223 + const parsed = JSON.parse(commitLine); + return { + hash: parsed.hash, + author_name: parsed.author_name, + author_email: parsed.author_email, + repo_path: dir, + date: parsed.date, + message: parsed.message + }; + }); +} +function getCommitsSinceLatestBeforeGivenDate(dir, date) { + try { + const countingLogOutput = (0, child_process_1.execSync)(`git log --since=${date.toISOString()} --pretty=oneline`, { cwd: dir }); + const countedNumber = countingLogOutput.toString().split('\n').length; + console.log(`DEBUG - countedNumber (how many commits in target repo since oldest source commit) is: ${countedNumber}`); + // TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers + // of string-parsing to consider here, I wouldn't want to bet without testing! + const logOutput = (0, child_process_1.execSync)( + //eslint-disable-next-line no-useless-escape + `git log --max-count=${countedNumber + 1} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, { cwd: dir }); + const logLines = logOutput.toString().split('\n'); + return logLines.map(commitLine => { + const parsed = JSON.parse(commitLine); + return { + hash: parsed.hash, + author_name: parsed.author_name, + author_email: parsed.author_email, + repo_path: dir, + date: parsed.date, + message: parsed.message + }; + }); + } + catch (e) { + const error = e; + // No commits in the target repo - return an empty array, which will result in the first representative commit + // being made as the first commit. And then we can iterate as normal (recalling that the target history is + // refreshed _from local repo_ - incurring no network charges) from there on. + const errorOutputAsString = '' + error.output[2]; + if (!errorOutputAsString.includes('does not have any commits yet')) { + console.log(`Unexpected error: ${errorOutputAsString}`); + throw Error(`Unexpected error while building target commit history`, { + cause: error + }); + } + // Fresh target repo - just write into it (by returning an empty array of target commits as target history) + // (i.e. doing nothing) + return []; + } +} +function insertRepresentativeCommit(dir, sourceRepo, sourceCommit, targetCommit, followOnTargetCommit) { + // If there is a target commit, + if (targetCommit != undefined) { + (0, child_process_1.execSync)(`git reset --hard ${targetCommit.hash}`, { + cwd: dir + }); + } + createRepresentativeCommit(dir, sourceRepo, sourceCommit); + // Then, if there is a follow-on target commit, we need to cherry-pick it onto the source commit: + if (followOnTargetCommit != undefined) { + (0, child_process_1.execSync)(`git cherry-pick ${followOnTargetCommit.hash}`, { + cwd: dir + }); + } // else - nothing to cherry-pick back on top +} +function gitPush(dir, tokenForTargetRepo, targetRepoId) { + // Note that it must be a `-f`, because we are literally rewriting history. + (0, child_process_1.execSync)(`git push -f https://unused-username:${tokenForTargetRepo}@${targetRepoId.domain}/${targetRepoId.owner}/${targetRepoId.name}`, { + cwd: dir + }); + // TODO - it'd be nice - before this `git push` is probably best - to add a `README.md` comment acknowledging + // the sync +} +function createRepresentativeCommit(dir, sourceRepo, sourceCommit) { + // Create a commit that represents the source commit, but with a filename that is generated from the source commit's + // metadata. + // + // This is guaranteed to not cause conflicts with other commits, because the filename is generated from the source + // commit's metadata, and no two source commits will have the same metadata. + // (OK sure _technically_ these could have a collision, but...like...what are the odds?) + // TODO - figure out what the odds actually are, that'd be fun :P + const filename = `${sourceRepo.owner}/${sourceRepo.name}/${sourceCommit.hash}`; + (0, fs_1.mkdirSync)(dir + '/' + sourceRepo.owner + '/' + sourceRepo.name, { recursive: true }); + (0, child_process_1.execSync)(`touch ${filename}`, { + cwd: dir + }); + (0, child_process_1.execSync)(`git add ${filename}`, { + cwd: dir + }); + // Seems like setting `--author` on `git commit` is not sufficient - still need to set `user` as well (I guess those + // are the difference between `comitted by` and `written by`?) + // Confirmed by following the instructions [here](https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-github-profile/managing-contribution-settings-on-your-profile/why-are-my-contributions-not-showing-up-on-my-profile#your-local-git-commit-email-isnt-connected-to-your-account) + // to check the "made by" address, and confirming that it did not match the email set in the `--author` flag. + // Note that, contrary to advice given by the CLI, this does not use the global config, but the local one - because, + // otherwise, if this was run locally, it would mess up the host system's config. + (0, child_process_1.execSync)(`git config user.email "${sourceCommit.author_email}"`, { cwd: dir }); + (0, child_process_1.execSync)(`git config user.name "${sourceCommit.author_name}"`, { cwd: dir }); + try { + // Do _not_ arbitrarily remove the `hash` - it's used for signalling identity in `main()` + const args = `"${sourceRepo.owner}/${sourceRepo.name}: ${sourceCommit.message} - ${sourceCommit.hash}" --date="${(0, date_fns_1.format)(sourceCommit.date, 'yyyy-MM-dd HH:mm:ss')}" --author="${sourceCommit.author_name} <${sourceCommit.author_email}>"`; + console.log(`About to commit with args ${args}`); + // https://github.com/Shpota/github-activity-generator/blob/main/contribute.py#L63 + // "%Y-%m-%d %H:%M:%S" + (0, child_process_1.execSync)(`git commit -m ${args}`, { + cwd: dir + }); + } + catch (e) { + console.log(e); + const error = e; + console.log(`DEBUG - error while creating representative commit: ${'' + error.output[2]} ... ${'' + error.output[1]}`); + throw e; + } +} + + /***/ }), /***/ 9407: @@ -25729,38 +25879,14 @@ run(); Object.defineProperty(exports, "__esModule", ({ value: true })); exports.main = main; -exports.buildSourceCommitHistory = buildSourceCommitHistory; -exports.buildTargetCommitHistory = buildTargetCommitHistory; const types_1 = __nccwpck_require__(8522); -const child_process_1 = __nccwpck_require__(5317); const fs_1 = __nccwpck_require__(9896); -const date_fns_1 = __nccwpck_require__(4367); +const git_1 = __nccwpck_require__(1243); const WORKING_DIR = './working'; const SOURCE_DIR = WORKING_DIR + '/source'; const TARGET_DIR = WORKING_DIR + '/target'; -async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) { - // It _shouldn't_ ever exist, but it if did, something weird is going on. - if ((0, fs_1.existsSync)(WORKING_DIR) || (0, fs_1.existsSync)(SOURCE_DIR) || (0, fs_1.existsSync)(TARGET_DIR)) { - throw new Error('Working directory already exists/populated'); - } - if (tokenForTargetRepo == '') { - throw new Error('token_for_target_repo is required'); - } - (0, fs_1.mkdirSync)(WORKING_DIR); - (0, fs_1.mkdirSync)(SOURCE_DIR); - (0, fs_1.mkdirSync)(TARGET_DIR); - console.log(`DEBUG - sourceRepoPath: ${(0, types_1.repoString)(sourceRepo)}`); - console.log(`DEBUG - targetRepoPath: ${(0, types_1.repoString)(targetRepo)}`); - // TODO - allow parameterizing how far back in history to checkout (because it might take a long time for older - // repos and, once synced initially, it won't have to go back further than a single one in most cases) - const sourceRepoCloneCommand = `git clone https://${(0, types_1.repoString)(sourceRepo)} ${SOURCE_DIR}`; - console.log(`DEBUG - sourceRepoCloneCommand: ${sourceRepoCloneCommand}`); - (0, child_process_1.execSync)(sourceRepoCloneCommand); - (0, child_process_1.execSync)(`git clone https://${(0, types_1.repoString)(targetRepo)} ${TARGET_DIR}`); - // Seems like setting `--author` on `git commit` is not sufficient - still need to set `user` as well (I guess those - // are the difference between `comitted by` and `written by`?) - (0, child_process_1.execSync)(`git config --global user.email "commit-report-sync-bot@scubbo.org"`, { cwd: TARGET_DIR }); - (0, child_process_1.execSync)(`git config --global user.name "Commit Report Sync Bot"`, { cwd: TARGET_DIR }); +async function main(sourceRepoId, targetRepoId, dryRun, tokenForTargetRepo) { + setPreconditions(tokenForTargetRepo, sourceRepoId, targetRepoId); // Logic: // * Go back as far in source commit history as the given number of commits // * For each commit, check if it is recorded in the target repo @@ -25781,7 +25907,7 @@ async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) { // only been alive so many years - there's a hard limit on the rate of code I could possibly have generated, which // is small compared to, y'know, _companies_. And I don't see organizations of that size caring about GitHub // contribution history at whole-org scale - and if they do, it'd be proportionally simple for them to implement it. - const sourceCommitHistory = buildSourceCommitHistory(SOURCE_DIR, 10); + const sourceCommitHistory = (0, git_1.getNLatestCommits)(SOURCE_DIR, 10); // Calling `doSomethingTo(sourceCommitHistory.reverse()); doSomethingElseTo(sourceCommitHistory.reverse());` results // in the second invocation receiving the double-reversed array. const reversedSourceCommitHistory = sourceCommitHistory.reverse(); @@ -25793,11 +25919,12 @@ async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) { // than abandoning the target tree after the insertion point and trusting in later operation to rebuild it - because // the target repo's tree will have representations of commits from _other_ (source)repos too, which we cannot // recreate without their context) - let targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, reversedSourceCommitHistory[0].date); + let targetCommitHistory = (0, git_1.getCommitsSinceLatestBeforeGivenDate)(TARGET_DIR, reversedSourceCommitHistory[0].date); for (const sourceCommit of reversedSourceCommitHistory) { // "(Index of) First Target Commit that is earlier than (or equal to) the source commit" const targetCommitIndex = targetCommitHistory.findIndex(c => c.date <= sourceCommit.date); console.log(`DEBUG - targetCommitIndex: ${targetCommitIndex}. targetCommitHistory: ${JSON.stringify(targetCommitHistory)}`); + // TODO - refactor this to use guard clauses more than nested-ifs if (targetCommitIndex != -1) { const targetCommit = targetCommitHistory[targetCommitIndex]; // If the target commit is a representation of the source commit, we can skip it @@ -25819,7 +25946,7 @@ async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) { else { followOnTargetCommit = targetCommitHistory[targetCommitIndex - 1]; } - insertRepresentativeCommit(sourceRepo, sourceCommit, targetCommit, followOnTargetCommit); + (0, git_1.insertRepresentativeCommit)(TARGET_DIR, sourceRepoId, sourceCommit, targetCommit, followOnTargetCommit); // And then regenerate the target commit history // Thankfully, we only need to do this back to immediately preceding the _just processed_ source // commit (since we know that all the rest of the source commits to be processed will be after it), @@ -25828,147 +25955,42 @@ async function main(sourceRepo, targetRepo, dryRun, tokenForTargetRepo) { // compared), albeit approximately-halved - but I'm gambling on the fact that that should still take // negligible practical time at usual repo sizes - at least, the ones I'm // this quadratic portion should be negligible, though - and, even if it isn't, it definitely will - targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, sourceCommit.date); + targetCommitHistory = (0, git_1.getCommitsSinceLatestBeforeGivenDate)(TARGET_DIR, sourceCommit.date); } } console.log(`DEBUG - targetCommit: ${targetCommit.hash}`); } else { console.log(`DEBUG - could not find a targetCommit that is earlier than or equal to the sourceCommit ${sourceCommit.hash} - therefore, writing the source commit's representation onto the current HEAD of target repo`); - insertRepresentativeCommit(sourceRepo, sourceCommit, undefined, undefined); + (0, git_1.insertRepresentativeCommit)(TARGET_DIR, sourceRepoId, sourceCommit, undefined, undefined); // As above, have to regenerate history after mutation - targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, sourceCommit.date); + targetCommitHistory = (0, git_1.getCommitsSinceLatestBeforeGivenDate)(TARGET_DIR, sourceCommit.date); } } // OK, that's it - we've processed all the source commits, and we've inserted all the necessary target commits. // We can just `git push` to the target repo now. - // - // Note that it must be a `-f`, because we are literally rewriting history. if (!dryRun) { - (0, child_process_1.execSync)(`git push -f https://unused-username:${tokenForTargetRepo}@${targetRepo.domain}/${targetRepo.owner}/${targetRepo.name}`, { - cwd: TARGET_DIR - }); - // TODO - it'd be nice - before this `git push` is probably best - to add a `README.md` comment acknowledging - // the sync + (0, git_1.gitPush)(TARGET_DIR, tokenForTargetRepo, targetRepoId); } return; } -function buildSourceCommitHistory(path, numCommits) { - console.log(`DEBUG - building source commit history for ${path} with max count ${numCommits}`); - const output = []; - const logOutput = (0, child_process_1.execSync)( - // If you want to copy this formatting for debugging, it's: - // - // --pretty=format:'{"hash":"%h","author_name":"%an","author_email":"%ae","date":"%ai","message":"%s"}' - // - // TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers - // of string-parsing to consider here, I wouldn't want to bet without testing! - //eslint-disable-next-line no-useless-escape - `git log --max-count=${numCommits} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, { cwd: path }); - const logLines = logOutput.toString().split('\n'); - for (const line of logLines) { - const commit = parseCommit(path, line); - output.push(commit); +function setPreconditions(tokenForTargetRepo, sourceRepoId, targetRepoId) { + // It _shouldn't_ ever exist, but it if did, something weird is going on. + if ((0, fs_1.existsSync)(WORKING_DIR) || (0, fs_1.existsSync)(SOURCE_DIR) || (0, fs_1.existsSync)(TARGET_DIR)) { + throw new Error('Working directory already exists/populated'); } - return output; -} -function buildTargetCommitHistory(path, oldestDateInSourceCommitHistory) { - console.log(`DEBUG - building target commit history for ${path} with oldest date ${oldestDateInSourceCommitHistory.toISOString()}`); - const output = []; - try { - const countingLogOutput = (0, child_process_1.execSync)(`git log --since=${oldestDateInSourceCommitHistory.toISOString()} --pretty=oneline`, { cwd: path }); - const countedNumber = countingLogOutput.toString().split('\n').length; - console.log(`DEBUG - countedNumber (how many commits in target repo since oldest source commit) is: ${countedNumber}`); - // TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers - // of string-parsing to consider here, I wouldn't want to bet without testing! - const logOutput = (0, child_process_1.execSync)( - //eslint-disable-next-line no-useless-escape - `git log --max-count=${countedNumber + 1} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, { cwd: path }); - const logLines = logOutput.toString().split('\n'); - for (const line of logLines) { - const commit = parseCommit(path, line); - output.push(commit); - } - } - catch (e) { - const error = e; - // Now you can safely access properties - // No commits in the target repo - return an empty array, which will result in the first representative commit - // being made as the first commit. And then we can iterate as normal (recalling that the target history is - // refreshed _from local repo_ - incurring no network charges) from there on. - const errorOutputAsString = '' + error.output[2]; - if (!errorOutputAsString.includes('does not have any commits yet')) { - console.log(`Unexpected error: ${errorOutputAsString}`); - throw Error(`Unexpected error while building target commit history`, { - cause: error - }); - } - // Fresh target repo - just write into it (by returning an empty array of target commits as target history) - // (i.e. doing nothing) - } - console.log(`As final output of buildTargetCommitHistory, preceding ${oldestDateInSourceCommitHistory.toISOString()}, output is ${JSON.stringify(output)}`); - return output; -} -// https://gist.github.com/textarcana/1306223 -function parseCommit(repo_path, line) { - console.log(`DEBUG - line: ${line}, for path ${repo_path}`); - const parsed = JSON.parse(line); - return { - hash: parsed['hash'], - author_name: parsed['author_name'], - author_email: parsed['author_email'], - repo_path: repo_path, - date: new Date(parsed['date']), - message: parsed['message'], - }; -} -function insertRepresentativeCommit(sourceRepo, sourceCommit, targetCommit, followOnTargetCommit) { - // If there is a target commit, - if (targetCommit != undefined) { - (0, child_process_1.execSync)(`git reset --hard ${targetCommit.hash}`, { - cwd: TARGET_DIR - }); - } - createRepresentativeCommit(sourceRepo, sourceCommit); - // Then, if there is a follow-on target commit, we need to cherry-pick it onto the source commit: - if (followOnTargetCommit != undefined) { - (0, child_process_1.execSync)(`git cherry-pick ${followOnTargetCommit.hash}`, { - cwd: TARGET_DIR - }); - } // else - nothing to cherry-pick back on top -} -function createRepresentativeCommit(sourceRepo, sourceCommit) { - // Create a commit that represents the source commit, but with a filename that is generated from the source commit's - // metadata. - // - // This is guaranteed to not cause conflicts with other commits, because the filename is generated from the source - // commit's metadata, and no two source commits will have the same metadata. - // (OK sure _technically_ these could have a collision, but...like...what are the odds?) - // TODO - figure out what the odds actually are, that'd be fun :P - const filename = `${sourceRepo.owner}/${sourceRepo.name}/${sourceCommit.hash}`; - (0, fs_1.mkdirSync)(TARGET_DIR + '/' + sourceRepo.owner + '/' + sourceRepo.name, { recursive: true }); - (0, child_process_1.execSync)(`touch ${filename}`, { - cwd: TARGET_DIR - }); - (0, child_process_1.execSync)(`git add ${filename}`, { - cwd: TARGET_DIR - }); - try { - // Do _not_ arbitrarily remove the `hash` - it's used for signalling identity in `main()` - const args = `"${sourceRepo.owner}/${sourceRepo.name}: ${sourceCommit.message} - ${sourceCommit.hash}" --date="${(0, date_fns_1.format)(sourceCommit.date, 'yyyy-MM-dd HH:mm:ss')}" --author="${sourceCommit.author_name} <${sourceCommit.author_email}>"`; - console.log(`About to commit with args ${args}`); - // https://github.com/Shpota/github-activity-generator/blob/main/contribute.py#L63 - // "%Y-%m-%d %H:%M:%S" - (0, child_process_1.execSync)(`git commit -m ${args}`, { - cwd: TARGET_DIR - }); - } - catch (e) { - console.log(e); - const error = e; - console.log(`DEBUG - error while creating representative commit: ${'' + error.output[2]} ... ${'' + error.output[1]}`); - throw e; + if (tokenForTargetRepo == '') { + throw new Error('token_for_target_repo is required'); } + (0, fs_1.mkdirSync)(WORKING_DIR); + (0, fs_1.mkdirSync)(SOURCE_DIR); + (0, fs_1.mkdirSync)(TARGET_DIR); + console.log(`DEBUG - sourceRepoPath: ${(0, types_1.repoString)(sourceRepoId)}`); + console.log(`DEBUG - targetRepoPath: ${(0, types_1.repoString)(targetRepoId)}`); + // TODO - allow parameterizing how far back in history to checkout (because it might take a long time for older + // repos and, once synced initially, it won't have to go back further than a single one in most cases) + (0, git_1.gitClone)(SOURCE_DIR, `https://${(0, types_1.repoString)(sourceRepoId)}`); + (0, git_1.gitClone)(TARGET_DIR, `https://${(0, types_1.repoString)(targetRepoId)}`); } diff --git a/node_modules/@vercel/ncc/dist/ncc/cli.js.cache b/node_modules/@vercel/ncc/dist/ncc/cli.js.cache index ad8a961..25f28f4 100644 Binary files a/node_modules/@vercel/ncc/dist/ncc/cli.js.cache and b/node_modules/@vercel/ncc/dist/ncc/cli.js.cache differ diff --git a/node_modules/@vercel/ncc/dist/ncc/index.js.cache b/node_modules/@vercel/ncc/dist/ncc/index.js.cache index 22d67b7..dc265f3 100644 Binary files a/node_modules/@vercel/ncc/dist/ncc/index.js.cache and b/node_modules/@vercel/ncc/dist/ncc/index.js.cache differ diff --git a/node_modules/@vercel/ncc/dist/ncc/loaders/relocate-loader.js.cache b/node_modules/@vercel/ncc/dist/ncc/loaders/relocate-loader.js.cache index 560cd7a..9a0d40d 100644 Binary files a/node_modules/@vercel/ncc/dist/ncc/loaders/relocate-loader.js.cache and b/node_modules/@vercel/ncc/dist/ncc/loaders/relocate-loader.js.cache differ diff --git a/node_modules/@vercel/ncc/dist/ncc/loaders/shebang-loader.js.cache b/node_modules/@vercel/ncc/dist/ncc/loaders/shebang-loader.js.cache index 9efed4a..48bec92 100644 Binary files a/node_modules/@vercel/ncc/dist/ncc/loaders/shebang-loader.js.cache and b/node_modules/@vercel/ncc/dist/ncc/loaders/shebang-loader.js.cache differ diff --git a/node_modules/@vercel/ncc/dist/ncc/loaders/ts-loader.js.cache b/node_modules/@vercel/ncc/dist/ncc/loaders/ts-loader.js.cache index 82a6dd1..fff17c1 100644 Binary files a/node_modules/@vercel/ncc/dist/ncc/loaders/ts-loader.js.cache and b/node_modules/@vercel/ncc/dist/ncc/loaders/ts-loader.js.cache differ diff --git a/src/git.ts b/src/git.ts new file mode 100644 index 0000000..3885301 --- /dev/null +++ b/src/git.ts @@ -0,0 +1,151 @@ +// Abstract-away Git interactions, so they can be mocked in tests + +import { execSync } from "child_process"; +import { Commit, ExecSyncError, RepoId } from "./types"; +import { mkdirSync } from "fs"; +import { format } from 'date-fns'; + +export function gitClone(dir: string, url: string) { + execSync(`git clone ${url} ${dir}`, { cwd: dir }); +} + +export function getNLatestCommits(dir: string, n: number): Commit[] { + const logOutput = execSync( + // If you want to copy this formatting for debugging, it's: + // + // --pretty=format:'{"hash":"%h","author_name":"%an","author_email":"%ae","date":"%ai","message":"%s"}' + // + // TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers + // of string-parsing to consider here, I wouldn't want to bet without testing! + //eslint-disable-next-line no-useless-escape + `git log --max-count=${n} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, + { cwd: dir } + ); + const logLines = logOutput.toString().split('\n') + return logLines.map(commitLine => { + // https://gist.github.com/textarcana/1306223 + const parsed = JSON.parse(commitLine) + return { + hash: parsed.hash, + author_name: parsed.author_name, + author_email: parsed.author_email, + repo_path: dir, + date: parsed.date, + message: parsed.message + } + }); +} + +export function getCommitsSinceLatestBeforeGivenDate(dir: string, date: Date): Commit[] { + try { + const countingLogOutput = execSync( + `git log --since=${date.toISOString()} --pretty=oneline`, + { cwd: dir } + ); + const countedNumber = countingLogOutput.toString().split('\n').length; + console.log(`DEBUG - countedNumber (how many commits in target repo since oldest source commit) is: ${countedNumber}`); + // TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers + // of string-parsing to consider here, I wouldn't want to bet without testing! + const logOutput = execSync( + //eslint-disable-next-line no-useless-escape + `git log --max-count=${countedNumber+1} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, + { cwd: dir } + ); + const logLines = logOutput.toString().split('\n'); + return logLines.map(commitLine => { + const parsed = JSON.parse(commitLine) + return { + hash: parsed.hash, + author_name: parsed.author_name, + author_email: parsed.author_email, + repo_path: dir, + date: parsed.date, + message: parsed.message + } + }); + } catch (e) { + const error = e as ExecSyncError + // No commits in the target repo - return an empty array, which will result in the first representative commit + // being made as the first commit. And then we can iterate as normal (recalling that the target history is + // refreshed _from local repo_ - incurring no network charges) from there on. + const errorOutputAsString = '' + error.output[2] + if (!errorOutputAsString.includes('does not have any commits yet')) { + console.log(`Unexpected error: ${errorOutputAsString}`); + throw Error(`Unexpected error while building target commit history`, { + cause: error + }) + } + // Fresh target repo - just write into it (by returning an empty array of target commits as target history) + // (i.e. doing nothing) + return []; + } +} + +export function insertRepresentativeCommit(dir: string,sourceRepo: RepoId, sourceCommit: Commit, targetCommit: Commit | undefined, followOnTargetCommit: Commit | undefined): void { + // If there is a target commit, + if (targetCommit != undefined) { + execSync(`git reset --hard ${targetCommit.hash}`, { + cwd: dir + }) + } + + createRepresentativeCommit(dir,sourceRepo, sourceCommit); + // Then, if there is a follow-on target commit, we need to cherry-pick it onto the source commit: + if (followOnTargetCommit != undefined) { + execSync(`git cherry-pick ${followOnTargetCommit.hash}`, { + cwd: dir + }) + }// else - nothing to cherry-pick back on top +} + +export function gitPush(dir: string, tokenForTargetRepo: string, targetRepoId: RepoId) { + // Note that it must be a `-f`, because we are literally rewriting history. + execSync(`git push -f https://unused-username:${tokenForTargetRepo}@${targetRepoId.domain}/${targetRepoId.owner}/${targetRepoId.name}`, { + cwd: dir + }) + // TODO - it'd be nice - before this `git push` is probably best - to add a `README.md` comment acknowledging + // the sync +} + +function createRepresentativeCommit(dir: string, sourceRepo: RepoId,sourceCommit: Commit) { + // Create a commit that represents the source commit, but with a filename that is generated from the source commit's + // metadata. + // + // This is guaranteed to not cause conflicts with other commits, because the filename is generated from the source + // commit's metadata, and no two source commits will have the same metadata. + // (OK sure _technically_ these could have a collision, but...like...what are the odds?) + // TODO - figure out what the odds actually are, that'd be fun :P + const filename = `${sourceRepo.owner}/${sourceRepo.name}/${sourceCommit.hash}` + mkdirSync(dir + '/' + sourceRepo.owner + '/' + sourceRepo.name, { recursive: true }); + execSync(`touch ${filename}`, { + cwd: dir + }) + execSync(`git add ${filename}`, { + cwd: dir + }) + + // Seems like setting `--author` on `git commit` is not sufficient - still need to set `user` as well (I guess those + // are the difference between `comitted by` and `written by`?) + // Confirmed by following the instructions [here](https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-github-profile/managing-contribution-settings-on-your-profile/why-are-my-contributions-not-showing-up-on-my-profile#your-local-git-commit-email-isnt-connected-to-your-account) + // to check the "made by" address, and confirming that it did not match the email set in the `--author` flag. + // Note that, contrary to advice given by the CLI, this does not use the global config, but the local one - because, + // otherwise, if this was run locally, it would mess up the host system's config. + execSync(`git config user.email "${sourceCommit.author_email}"`, { cwd: dir }); + execSync(`git config user.name "${sourceCommit.author_name}"`, { cwd: dir }); + + try { + // Do _not_ arbitrarily remove the `hash` - it's used for signalling identity in `main()` + const args = `"${sourceRepo.owner}/${sourceRepo.name}: ${sourceCommit.message} - ${sourceCommit.hash}" --date="${format(sourceCommit.date, 'yyyy-MM-dd HH:mm:ss')}" --author="${sourceCommit.author_name} <${sourceCommit.author_email}>"`; + console.log(`About to commit with args ${args}`); + // https://github.com/Shpota/github-activity-generator/blob/main/contribute.py#L63 + // "%Y-%m-%d %H:%M:%S" + execSync(`git commit -m ${args}`, { + cwd: dir + }) + } catch (e) { + console.log(e); + const error = e as ExecSyncError; + console.log(`DEBUG - error while creating representative commit: ${'' + error.output[2]} ... ${'' + error.output[1]}`); + throw e; + } +} diff --git a/src/main.ts b/src/main.ts index 05eb204..36b0c20 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,37 +1,13 @@ -import { Repo, repoString, Commit, ExecSyncError } from './types'; -import { execSync } from 'child_process'; +import { RepoId, repoString, Commit } from './types'; import { existsSync, mkdirSync } from 'fs'; - -import { format } from 'date-fns'; +import { getCommitsSinceLatestBeforeGivenDate, getNLatestCommits, gitClone, gitPush, insertRepresentativeCommit } from './git'; const WORKING_DIR = './working'; const SOURCE_DIR = WORKING_DIR + '/source'; const TARGET_DIR = WORKING_DIR + '/target'; -export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean, tokenForTargetRepo: string) { - // It _shouldn't_ ever exist, but it if did, something weird is going on. - if (existsSync(WORKING_DIR) || existsSync(SOURCE_DIR) || existsSync(TARGET_DIR)) { - throw new Error('Working directory already exists/populated'); - } - - if (tokenForTargetRepo == '') { - throw new Error('token_for_target_repo is required'); - } - - mkdirSync(WORKING_DIR); - mkdirSync(SOURCE_DIR); - mkdirSync(TARGET_DIR); - - console.log(`DEBUG - sourceRepoPath: ${repoString(sourceRepo)}`) - - console.log(`DEBUG - targetRepoPath: ${repoString(targetRepo)}`) - - // TODO - allow parameterizing how far back in history to checkout (because it might take a long time for older - // repos and, once synced initially, it won't have to go back further than a single one in most cases) - const sourceRepoCloneCommand = `git clone https://${repoString(sourceRepo)} ${SOURCE_DIR}` - console.log(`DEBUG - sourceRepoCloneCommand: ${sourceRepoCloneCommand}`); - execSync(sourceRepoCloneCommand); - execSync(`git clone https://${repoString(targetRepo)} ${TARGET_DIR}`); +export async function main(sourceRepoId: RepoId, targetRepoId: RepoId, dryRun: boolean, tokenForTargetRepo: string) { + setPreconditions(tokenForTargetRepo, sourceRepoId, targetRepoId); // Logic: // * Go back as far in source commit history as the given number of commits @@ -53,7 +29,7 @@ export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean, // only been alive so many years - there's a hard limit on the rate of code I could possibly have generated, which // is small compared to, y'know, _companies_. And I don't see organizations of that size caring about GitHub // contribution history at whole-org scale - and if they do, it'd be proportionally simple for them to implement it. - const sourceCommitHistory = buildSourceCommitHistory(SOURCE_DIR, 10); + const sourceCommitHistory = getNLatestCommits(SOURCE_DIR, 10); // Calling `doSomethingTo(sourceCommitHistory.reverse()); doSomethingElseTo(sourceCommitHistory.reverse());` results // in the second invocation receiving the double-reversed array. @@ -66,12 +42,13 @@ export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean, // than abandoning the target tree after the insertion point and trusting in later operation to rebuild it - because // the target repo's tree will have representations of commits from _other_ (source)repos too, which we cannot // recreate without their context) - let targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, reversedSourceCommitHistory[0].date); + let targetCommitHistory = getCommitsSinceLatestBeforeGivenDate(TARGET_DIR, reversedSourceCommitHistory[0].date); for (const sourceCommit of reversedSourceCommitHistory) { // "(Index of) First Target Commit that is earlier than (or equal to) the source commit" const targetCommitIndex = targetCommitHistory.findIndex(c => c.date <= sourceCommit.date); console.log(`DEBUG - targetCommitIndex: ${targetCommitIndex}. targetCommitHistory: ${JSON.stringify(targetCommitHistory)}`); + // TODO - refactor this to use guard clauses more than nested-ifs if (targetCommitIndex != -1) { const targetCommit = targetCommitHistory[targetCommitIndex]; // If the target commit is a representation of the source commit, we can skip it @@ -90,7 +67,7 @@ export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean, } else { followOnTargetCommit = targetCommitHistory[targetCommitIndex - 1]; } - insertRepresentativeCommit(sourceRepo, sourceCommit, targetCommit, followOnTargetCommit); + insertRepresentativeCommit(TARGET_DIR,sourceRepoId, sourceCommit, targetCommit, followOnTargetCommit); // And then regenerate the target commit history // Thankfully, we only need to do this back to immediately preceding the _just processed_ source // commit (since we know that all the rest of the source commits to be processed will be after it), @@ -99,171 +76,44 @@ export async function main(sourceRepo: Repo, targetRepo: Repo, dryRun: boolean, // compared), albeit approximately-halved - but I'm gambling on the fact that that should still take // negligible practical time at usual repo sizes - at least, the ones I'm // this quadratic portion should be negligible, though - and, even if it isn't, it definitely will - targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, sourceCommit.date); + targetCommitHistory = getCommitsSinceLatestBeforeGivenDate(TARGET_DIR, sourceCommit.date); } } console.log(`DEBUG - targetCommit: ${targetCommit.hash}`); } else { console.log(`DEBUG - could not find a targetCommit that is earlier than or equal to the sourceCommit ${sourceCommit.hash} - therefore, writing the source commit's representation onto the current HEAD of target repo`); - insertRepresentativeCommit(sourceRepo, sourceCommit, undefined, undefined); + insertRepresentativeCommit(TARGET_DIR, sourceRepoId, sourceCommit, undefined, undefined); // As above, have to regenerate history after mutation - targetCommitHistory = buildTargetCommitHistory(TARGET_DIR, sourceCommit.date); + targetCommitHistory = getCommitsSinceLatestBeforeGivenDate(TARGET_DIR, sourceCommit.date); } } // OK, that's it - we've processed all the source commits, and we've inserted all the necessary target commits. // We can just `git push` to the target repo now. - // - // Note that it must be a `-f`, because we are literally rewriting history. if (!dryRun) { - execSync(`git push -f https://unused-username:${tokenForTargetRepo}@${targetRepo.domain}/${targetRepo.owner}/${targetRepo.name}`, { - cwd: TARGET_DIR - }) - // TODO - it'd be nice - before this `git push` is probably best - to add a `README.md` comment acknowledging - // the sync + gitPush(TARGET_DIR, tokenForTargetRepo, targetRepoId); } return - } -export function buildSourceCommitHistory(path: string, numCommits: number): Commit[] { - console.log(`DEBUG - building source commit history for ${path} with max count ${numCommits}`); - const output: Commit[] = []; - - const logOutput = execSync( - // If you want to copy this formatting for debugging, it's: - // - // --pretty=format:'{"hash":"%h","author_name":"%an","author_email":"%ae","date":"%ai","message":"%s"}' - // - // TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers - // of string-parsing to consider here, I wouldn't want to bet without testing! - //eslint-disable-next-line no-useless-escape - `git log --max-count=${numCommits} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, - { cwd: path } - ); - const logLines = logOutput.toString().split('\n'); - for (const line of logLines) { - const commit = parseCommit(path, line); - output.push(commit); +function setPreconditions(tokenForTargetRepo: string, sourceRepoId: RepoId, targetRepoId: RepoId) { + // It _shouldn't_ ever exist, but it if did, something weird is going on. + if (existsSync(WORKING_DIR) || existsSync(SOURCE_DIR) || existsSync(TARGET_DIR)) { + throw new Error('Working directory already exists/populated'); } - return output; + if (tokenForTargetRepo == '') { + throw new Error('token_for_target_repo is required'); + } + + mkdirSync(WORKING_DIR); + mkdirSync(SOURCE_DIR); + mkdirSync(TARGET_DIR); + + console.log(`DEBUG - sourceRepoPath: ${repoString(sourceRepoId)}`) + console.log(`DEBUG - targetRepoPath: ${repoString(targetRepoId)}`) + + // TODO - allow parameterizing how far back in history to checkout (because it might take a long time for older + // repos and, once synced initially, it won't have to go back further than a single one in most cases) + gitClone(SOURCE_DIR, `https://${repoString(sourceRepoId)}`); + gitClone(TARGET_DIR, `https://${repoString(targetRepoId)}`); } - -export function buildTargetCommitHistory(path: string, oldestDateInSourceCommitHistory: Date): Commit[] { - console.log(`DEBUG - building target commit history for ${path} with oldest date ${oldestDateInSourceCommitHistory.toISOString()}`); - const output: Commit[] = []; - - try { - const countingLogOutput = execSync( - `git log --since=${oldestDateInSourceCommitHistory.toISOString()} --pretty=oneline`, - { cwd: path } - ); - const countedNumber = countingLogOutput.toString().split('\n').length; - console.log(`DEBUG - countedNumber (how many commits in target repo since oldest source commit) is: ${countedNumber}`); - // TODO - return to this and figure out if these are _actually_ "useless escapes" or not - got a couple layers - // of string-parsing to consider here, I wouldn't want to bet without testing! - const logOutput = execSync( - //eslint-disable-next-line no-useless-escape - `git log --max-count=${countedNumber+1} --pretty=format:'{\"hash\":\"%h\",\"author_name\":\"%an\",\"author_email\":\"%ae\",\"date\":\"%ai\",\"message\":\"%s\"}'`, - { cwd: path } - ); - const logLines = logOutput.toString().split('\n'); - for (const line of logLines) { - const commit = parseCommit(path, line); - output.push(commit); - } - } catch (e) { - const error = e as ExecSyncError - // Now you can safely access properties - // No commits in the target repo - return an empty array, which will result in the first representative commit - // being made as the first commit. And then we can iterate as normal (recalling that the target history is - // refreshed _from local repo_ - incurring no network charges) from there on. - const errorOutputAsString = '' + error.output[2] - if (!errorOutputAsString.includes('does not have any commits yet')) { - console.log(`Unexpected error: ${errorOutputAsString}`); - throw Error(`Unexpected error while building target commit history`, { - cause: error - }) - } - // Fresh target repo - just write into it (by returning an empty array of target commits as target history) - // (i.e. doing nothing) - } - console.log(`As final output of buildTargetCommitHistory, preceding ${oldestDateInSourceCommitHistory.toISOString()}, output is ${JSON.stringify(output)}`); - return output; -} - -// https://gist.github.com/textarcana/1306223 -function parseCommit(repo_path: string, line: string): Commit { - console.log(`DEBUG - line: ${line}, for path ${repo_path}`); - const parsed = JSON.parse(line) - return { - hash: parsed['hash'], - author_name: parsed['author_name'], - author_email: parsed['author_email'], - repo_path: repo_path, - date: new Date(parsed['date']), - message: parsed['message'], - } -} - -function insertRepresentativeCommit(sourceRepo: Repo,sourceCommit: Commit, targetCommit: Commit | undefined, followOnTargetCommit: Commit | undefined): void { - // If there is a target commit, - if (targetCommit != undefined) { - execSync(`git reset --hard ${targetCommit.hash}`, { - cwd: TARGET_DIR - }) - } - - createRepresentativeCommit(sourceRepo, sourceCommit); - // Then, if there is a follow-on target commit, we need to cherry-pick it onto the source commit: - if (followOnTargetCommit != undefined) { - execSync(`git cherry-pick ${followOnTargetCommit.hash}`, { - cwd: TARGET_DIR - }) - }// else - nothing to cherry-pick back on top - -} - -function createRepresentativeCommit(sourceRepo: Repo,sourceCommit: Commit) { - // Create a commit that represents the source commit, but with a filename that is generated from the source commit's - // metadata. - // - // This is guaranteed to not cause conflicts with other commits, because the filename is generated from the source - // commit's metadata, and no two source commits will have the same metadata. - // (OK sure _technically_ these could have a collision, but...like...what are the odds?) - // TODO - figure out what the odds actually are, that'd be fun :P - const filename = `${sourceRepo.owner}/${sourceRepo.name}/${sourceCommit.hash}` - mkdirSync(TARGET_DIR + '/' + sourceRepo.owner + '/' + sourceRepo.name, { recursive: true }); - execSync(`touch ${filename}`, { - cwd: TARGET_DIR - }) - execSync(`git add ${filename}`, { - cwd: TARGET_DIR - }) - - // Seems like setting `--author` on `git commit` is not sufficient - still need to set `user` as well (I guess those - // are the difference between `comitted by` and `written by`?) - // Confirmed by following the instructions [here](https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-github-profile/managing-contribution-settings-on-your-profile/why-are-my-contributions-not-showing-up-on-my-profile#your-local-git-commit-email-isnt-connected-to-your-account) - // to check the "made by" address, and confirming that it did not match the email set in the `--author` flag. - // Note that, contrary to advice given by the CLI, this does not use the global config, but the local one - because, - // otherwise, if this was run locally, it would mess up the host system's config. - execSync(`git config user.email "${sourceCommit.author_email}"`, { cwd: TARGET_DIR }); - execSync(`git config user.name "${sourceCommit.author_name}"`, { cwd: TARGET_DIR }); - - try { - // Do _not_ arbitrarily remove the `hash` - it's used for signalling identity in `main()` - const args = `"${sourceRepo.owner}/${sourceRepo.name}: ${sourceCommit.message} - ${sourceCommit.hash}" --date="${format(sourceCommit.date, 'yyyy-MM-dd HH:mm:ss')}" --author="${sourceCommit.author_name} <${sourceCommit.author_email}>"`; - console.log(`About to commit with args ${args}`); - // https://github.com/Shpota/github-activity-generator/blob/main/contribute.py#L63 - // "%Y-%m-%d %H:%M:%S" - execSync(`git commit -m ${args}`, { - cwd: TARGET_DIR - }) - } catch (e) { - console.log(e); - const error = e as ExecSyncError; - console.log(`DEBUG - error while creating representative commit: ${'' + error.output[2]} ... ${'' + error.output[1]}`); - throw e; - } - -} \ No newline at end of file diff --git a/src/types.ts b/src/types.ts index 210222e..ba096d2 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,11 +1,11 @@ // TODO - parametrize the scheme -export type Repo = { +export type RepoId = { domain: string; owner: string; name: string; } -export function repoString(repo: Repo): string { +export function repoString(repo: RepoId): string { return `${repo.domain}/${repo.owner}/${repo.name}`; }