Compare commits

..

1 Commits

  1. 25
      .drone.yml
  2. 3
      blog/archetypes/default.md
  3. 1
      blog/config.toml
  4. 1
      blog/content/posts/2021-in-books.md
  5. 2
      blog/content/posts/2022-wrap-up.md
  6. 9
      blog/content/posts/2023-advent-of-code.md
  7. 137
      blog/content/posts/2023-wrap-up-articles.md
  8. 169
      blog/content/posts/2023-wrap-up-books.md
  9. 7
      blog/content/posts/adding-rss.md
  10. 76
      blog/content/posts/almost-all-numbers-are-normal.md
  11. 33
      blog/content/posts/automatic-merging.md
  12. 66
      blog/content/posts/backups-and-updates-and-dependencies-and-resiliency.md
  13. 195
      blog/content/posts/base-app-infrastructure.md
  14. 26
      blog/content/posts/books-as-vehicles.md
  15. 1
      blog/content/posts/cloudflare-tunnel-dns.md
  16. 11
      blog/content/posts/conditional-cleanups-in-pytest.md
  17. 5
      blog/content/posts/edit-command-line-in-zsh.md
  18. 1
      blog/content/posts/grafana-oncall.md
  19. 51
      blog/content/posts/jsonnet-in-argocd.md
  20. 19
      blog/content/posts/keycloak-backup.md
  21. 88
      blog/content/posts/oidc-on-k8s.md
  22. 60
      blog/content/posts/pre-pipeline-verification-and-the-push-and-pray-problem.md
  23. 97
      blog/content/posts/project-management-and-async-functions.md
  24. 47
      blog/content/posts/pvc-debug-pod.md
  25. 1
      blog/content/posts/rebuild-from-scratch.md
  26. 1
      blog/content/posts/secure-docker-registry.md
  27. 1
      blog/content/posts/self-hosted-analytics.md
  28. 2
      blog/content/posts/short-thoughts-2023-03-05.md
  29. 110
      blog/content/posts/vault-secrets-into-k8s.md
  30. 1
      blog/content/posts/vpn-on-kubernetes.md
  31. 35
      blog/content/posts/work-in-a-post-scarcity-utopia.md
  32. 7
      blog/layouts/partials/head-additions.html
  33. 8
      blog/static/css/table-styling-almost-all-numbers.css
  34. BIN
      blog/static/img/do-not-dream-of-labor.jpg
  35. BIN
      blog/static/img/dream-of-labor.jpg
  36. BIN
      blog/static/img/length-of-worm.jpg
  37. BIN
      blog/static/img/open-project-screenshot.png
  38. 36
      np.sh

@ -43,20 +43,19 @@ steps:
- git submodule init
- git submodule update --recursive
- hugo --source blog
- name: docker-build-and-push
image: thegeeklab/drone-docker-buildx # Absurd that this isn't offered as first-party!
privileged: true
- name: push-built-image
image: plugins/docker
settings:
registry: gitea.scubbo.org
repo: gitea.scubbo.org/scubbo/blog_helm
tags: ${DRONE_COMMIT_SHA:0:10}
debug: true
launch_debug: true
username: scubbo
password:
from_secret: gitea_password
repo: gitea.scubbo.org/scubbo/blog_helm
tags:
- ${DRONE_COMMIT_SHA:0:10}
platforms:
- linux/arm64
- linux/amd64
settings:
mtu: 1450
- name: auto-update-infra-repo
image: gitea.scubbo.org/scubbo/auto-repo-update-drone-plugin:latest
settings:
@ -75,14 +74,14 @@ steps:
ARGO_TOKEN:
from_secret: argo_token
commands:
- "curl -sS -X POST \"argo-cd-argocd-server.argo.svc.cluster.local/api/v1/applications/blog-infrastructure/sync\" -H \"Authorization: Bearer ${ARGO_TOKEN}\" -H \"Content-Type: application/json\""
- "curl -s -X POST \"argo-cd-argocd-server.argo.svc.cluster.local/api/v1/applications/blog-infrastructure/sync\" -H \"Authorization: Bearer ${ARGO_TOKEN}\" -H \"Content-Type: application/json\""
- name: purge-cache
image: curlimages/curl
environment:
CLOUDFLARE_TOKEN:
from_secret: cloudflare_token
commands:
- "curl -sS -X POST \"https://api.cloudflare.com/client/v4/zones/c86d55d225ed973d5da45239beac2f99/purge_cache\" -H \"Authorization: Bearer ${CLOUDFLARE_TOKEN}\" -H \"Content-Type:application/json\" -d '{\"files\":[\"https://blog.scubbo.com\"]}'"
- "curl -s -X POST \"https://api.cloudflare.com/client/v4/zones/c86d55d225ed973d5da45239beac2f99/purge_cache\" -H \"Authorization: Bearer ${CLOUDFLARE_TOKEN}\" -H \"Content-Type:application/json\" -d '{\"files\":[\"https://blog.scubbo.com\"]}'"
- name: telegram_notification
image: appleboy/drone-telegram
when:
@ -94,3 +93,7 @@ steps:
from_secret: telegram_token
to:
from_secret: telegram_convo_id
image_pull_secrets:
- dockerconfigjson

@ -17,6 +17,5 @@ Images:
![Alt-text](url "Caption")
Internal links:
[Link-text](\{\{< ref "/posts/name-of-post" >}})
(remove the slashes - this is so that the commented-out content will not prevent a built while editing)
[Link-text]({{< ref "/posts/name-of-post" >}})
-->

@ -17,7 +17,6 @@ SectionPagesMenu = "main"
github = "https://github.com/scubbo"
linkedin = "https://www.linkedin.com/in/jack-jackson-14a47441/"
mastodon = "https://fosstodon.org/@scubbo"
rss = "https://blog.scubbo.org/index.xml"
custom_css = ['inlinespoiler.css']
show_reading_time = true
page_source_base = "https://gitea.scubbo.org/scubbo/blogContent/src/branch/main/blog/"

@ -2,7 +2,6 @@
title: "2021 in Books"
date: 2021-12-30T08:29:09-08:00
tags:
- end-of-year-wrapups
- reading
---
My good friend [George](https://tinyletter.com/altthoughtprocess/) set himself a challenge a while back to read 52 books in a calendar year. He succeeded (as George is wont to do), and that achievement has always stuck in my mind as impressive[^1]. I don't think I'd _ever_ be able to equal it (especially not now, with Work-From-Home removing my most common reading time - the commute), but I did start tracking my reading as a matter of interest. To that end, I present my year-of-reading-in-review, with book-by-book recaps and the [full list]({{< ref "#full-list" >}}) at the end:

@ -2,8 +2,8 @@
title: "2022 Wrap Up"
date: 2022-12-31T12:47:00-08:00
tags:
- end-of-year-wrapups
- productivity
- real-life
- reading
---

@ -1,9 +0,0 @@
---
title: "2023 Advent of Code"
date: 2023-12-01T18:43:13-08:00
tags:
- programming-challenges
- rust
---
Just a quick note to record that, for the first time, I'm taking part [Advent Of Code](https://adventofcode.com/) - a series of programming challenges that run every day from the 1st to the 25th of December. Inspired by my [experiences]({{< ref "/posts/short-thoughts-2023-03-05" >}}) during [Exercism's 12in23 challenge](https://exercism.org/challenges/12in23), I'll be trying to complete the challenges in Rust (see my solutions [here](https://github.com/scubbo/advent-of-code-2023)). That's on top of trying to complete Exercism's "_December Diversions_", as well as 5 challenges in _another_ language for the year-long badge to make up for the fact that January was unassigned (and keeping up my reading of [Ward](https://www.parahumans.net/) to remain on-track to finish by the end of the year). It's going to be a busy month!

@ -1,137 +0,0 @@
---
title: "2023 Wrap Up - Articles"
date: 2024-01-01T15:31:18-08:00
tags:
- end-of-year-wrapups
- productivity
- SDLC
---
Stand-outs among articles I read this year - abandoning the table layout from last year in favour of readability.
<!--more-->
If you just want the best-of-the-best, I recommend [The Categories Were Made For Man, Not Man For The Categories](https://slatestarcodex.com/2014/11/21/the-categories-were-made-for-man-not-man-for-the-categories/), [Many More Much Smaller Steps](https://www.geepawhill.org/2021/09/29/many-more-much-smaller-steps-first-sketch/), and [Five Geek Social Fallacies](https://plausiblydeniable.com/five-geek-social-fallacies/) (with an honourable mention for [Should You Split That File](https://www.pathsensitive.com/2023/12/should-you-split-that-file.html) if you are a software engineer - the others are of general interest, even though MMMSS _focuses_ on engineering)
## [Theory-building and why employee churn is lethal to software companies](https://www.baldurbjarnason.com/2022/theory-building/)
([HN](https://news.ycombinator.com/item?id=34328069))
_Software Development is not just the act of writing code (we knew that!), but the creation of an accurate and up-to-date mental model of the system - a model which is useful for understanding how to alter that system in response to desired changes. Corollaries are that deep understanding of the context and intention of a system is essential, that team churn is catastrophic, and rewrites-for-the-sake-of-rewrites are less self-indulgent than they may seem, as they (re-)generate this familiarity._
I'm reminded of the surprising (but correct!) claim that it's preferable to have a broken-but-easy-to-change system, than a working-but-hard-to-change one. Firstly, the system may _actually_ be broken without you knowing; secondly, even if it is working perfectly _now_, requirements may change _tomorrow_. This article is also particularly relevant in the context of my team's work to establish ownership (which includes "_understanding of the subject_") of systems at work.
## [Velocity defeats itself. Get acceleration instead](https://jessitron.com/2022/12/22/velocity-defeats-itself-get-acceleration-instead/)
([HN](https://news.ycombinator.com/item?id=34353907))
_Do not neglect software engineering work which reduces the friction of development or experimentation_
Hardly a novel perspective, but I liked this way of phrasing it, especially since it stresses the hard-upper-limit of "force" as an input to acceleration.
## [The Categories Were Made For Man, Not Man For The Categories](https://slatestarcodex.com/2014/11/21/the-categories-were-made-for-man-not-man-for-the-categories/)
_Starts off with a good abstract exploration of how categories should be created so as to be useful, but are neither authoritative/immutable nor inherently matters of fact; moves on to apply that, sensibly, to transgender identity._
I was _very_ cautious of reading anything from the EA/Rationalist space, especially anything related to gender identity, as they have a certain reputation - but surprisingly this piece prioritized harm-reduction over the mastubatory satisfaction of retroactively declaring one's prejudices as "supported by science"
## [Social Justice and Words, Words, Words](https://slatestarcodex.com/2014/07/07/social-justice-and-words-words-words/)
A discussion of how the same terms can be used by the same person to mean different things - often with the outcome (intentional or not) that an indefensible claim can be made defensible by "retreating" to an alternative definition when challenged.
Another Rationalist article - this one a little less sympathetic, but certainly not inconsistent or obviously-wrong. I will say that his experience of Social Justice appears to have been more hostile than mine, but I also 100% believe that such people/experiences exist. Certainly, it's not a stretch to believe that some SJ folks take good intentions to harmful extremes, or subscribe to absolutist philosophies which admit no nuance.
This ties into a theme I've noticed recently, of disagreements persisting not because of differing views, but differing definitions of the terms being used in the argument. If I say "_X isn't racist_", and you and I have differing definitions of what "racist" means, we're going to struggle to make progress in understanding until we discover that mismatch. Along the way, we'll be hampered in reaching that understanding if there are value judgements associated with many of these differingly-defined terms - if you think you're hearing me say "_I see the harm being done by X, but I don't care about it_", when what I really mean is "_I recognize the harm that X does, but that harm doesn't fall under the category of 'racism'_", then our discussion will be fraught!
## [Of Dogs And Lizards: A Parable Of Privilege](https://sindeloke.wordpress.com/2010/01/13/37/)
A straightforward description (linked from the preceding article) of how Privilege arises through differing experiences/backgrounds, and how to react to that. I am a _big_ fan of social justice proponents (hmm, we're overdue a new term for that, aren't we?) who prioritize spreading understanding and education rather than criticism and shame - famously poor recruitment tools. In my opinion very few people (not zero!) are deliberately or actively cruel, but many are either lazy or small-minded. In an ideal world, people would "_do the right thing_" simply because it's right, not just because you tell them how their patterns could harm people they care about - but, since we don't live in an ideal world, it's sensible to consider prioritizing harm-reduction over ideological purity[^victims-dont-owe-you-anything].
## [Communicating Like A Grown-Up](https://fs.blog/carl-braun-communicating/)
([HN Link](https://news.ycombinator.com/item?id=35355372)) For a while, I have been convinced that clear written communication is one of the greatest superpowers that a human can possess. It is the best way we have to disseminate and preserve information; and while it may take more effort to write a persistent long-form piece than to make a convincing verbal argument, that effort will be justified by the ability to share and refer-to the durable copy, and by the higher-quality thinking generated by an inability to hide behind easy speech. In fact, a determination to improve my own writing was one of the [primary motivators for this blog in the first place](https://blog.scubbo.org/posts/my-first-post/).
Plenty of good advice in this piece. A consistent theme I want to pull out is that, in any collaboration, we should always be of the mindset that we're all doing out best and pulling in the same direction. Assuming ill intent or stupidity is never a useful strategy - whether you're right or wrong, you will have made an enemy, and if you're wrong you'll make yourself look foolish. Much better to assume that information has not been fully distributed, or that incentives are misaligned - approaches which gracefully allow your collaborator to change their position without admitting defeat. In all cases, prioritize finding the best outcome for the team, not "winning".
## [The Copenhagen Interpretation Of Ethics](https://web.archive.org/web/20220705105128/https://blog.jaibot.com/the-copenhagen-interpretation-of-ethics/)
(Archive.org link because the original is no longer available) "_The Copenhagen Interpretation of Ethics says that when you observe or interact with a problem in any way, you can be blamed for it. At the very least, you are to blame for not doing **more**._"
This is a really interesting one. _Prima facie_, this interpretation seems to be nonsense - we should celebrate _any_ amount of improvement or amelioration of a problem, even if the response doesn't completely fix the problem (so long as the "improvement" doesn't push someone into a local maximum from which it would be hard to escape to genuine long-term betterment). And yet, all the examples that the article gives of the phenomenon - of organizations taking steps which provide limited-but-non-zero assistance, with zero downside - feel, for want of a better word, "icky".
You could argue that the first example (paying homeless people to carry Wifi equipment) _did_ have a downside in that it was dehumanizing to the participants, but, well...they're free to reject the offer if they (justifiably!) value their pride more than the associated payment. Sure, consent cannot be freely given when under duress, and you could argue that a homeless or otherwise-needy person isn't able to express their economic preferences accurately when offered compensation that would (however temporarily) stave off hardship[^capitalism-yo] - that is, that they're compelled to accept an offer which they would otherwise refuse - but, the Wifi-providing company isn't the one _responsible for_ that person's hardship or for the existence of poverty, so why should they be condemned for offering _a choice_ wherein the negative option is "_remain in the situation you would be in if this choice wasn't offered_"?
As I say, an interesting one - something I don't have a well-defined attitude to, yet, but that I do at least want to keep in mind in the future.
(Also, "[Philosophy Bro](http://www.philosophybro.com/archive/peter-singers-drowning-child-argument)" is just delightful)
## [The Seven Ur-Programming Languages](https://madhadron.com/programming/seven_ur_languages.html)
([HN Link](https://news.ycombinator.com/item?id=35813496)) I have been fascinated with the different properties and priorities of Programming Languages since reading "Seven Languages In Seven Weeks" a few years ago. This was a fascinating read on some alternative perspectives on PL design.
(I still don't grok `Lisp`s, though 😅 I can recognize that they _are_ elegant and concise and all that good stuff, but the "Summer of Sexps" was one of the more frustrating months of [Exercism's 12in23](https://exercism.org/challenges/12in23) challenge. I just missed named variables too much!)
## [Many More Much Smaller Steps](https://www.geepawhill.org/2021/09/29/many-more-much-smaller-steps-first-sketch/)
(First read a while ago, but I neglected to note it down at the time)
In my previous role, I half-jokingly compiled a Wiki page of short general-purpose statements which one could use to simulate me as a chatbot - randomly selecting one of the statements as a response in a conversation would do a good job of impersonating me. [Manual Work Is A Bug](https://queue.acm.org/detail.cfm?id=3197520) and [Code Only Says What It Does](https://brooker.co.za/blog/2020/06/23/code.html) were among the list, and this article belongs alongside them.
(Honourable mention to [Choose Boring Technology](https://boringtechnology.club/), which is referenced [a little later](#how-do-you-keep-us-innovative-i-dont) as well)
## [Make It Easy To Do The Right Thing](https://amcaplan.ninja/blog/2016/12/30/make-it-easy-to-do-the-right-thing/)
The title is a mantra of mine - particularly since I've moved to an SRE role - and the article provides some good examples of the concept.
## [Five Geek Social Fallacies](https://plausiblydeniable.com/five-geek-social-fallacies/)
A collection of social dysfunctions that geeks susceptible to, most likely due to a some combination of a. a history of being ostracized, and b. finding socializing unintuitive and thus trying to "learn" it as a skill.
One of those articles that, once you read it, you will notice the tendencies and patterns everywhere (probably including, uncomfortably, in your own past behaviour).
## [How Do You Keep Us Innovative? I Don't](https://ntietz.com/blog/forefront-of-innovation/)
An alternative view on [Choose Boring Technology](https://boringtechnology.club/). It is a fallacy common to junior engineers to think that engineers are in the business of writing code, and that writing cool/efficient/modern/clean/exciting (delete as appropriate) code is what they are paid for. This is not the case. Engineers are paid to solve problems. If you can solve the problem _well_ (and sutainably) with a few lines of hacky bash scripting rather than an elegant multifaceted framework-y masterpiece, that is perfectly acceptable - and often optimal. Newer tools are, by-definition, less-proven than the battle-tested dinosaurs - they represent risk. That risk must be outweighed by significant power or functionality elsewhere.
(An second-order effect is that using newer flashier technologies _might_ be a positive recruiting consideration, but I suspect that that effect is negligible except in areas which are themselves cutting-edge - AI, Self-driving, etc.)
## [Should You Split That File?](https://www.pathsensitive.com/2023/12/should-you-split-that-file.html)
([HN Link](https://news.ycombinator.com/item?id=38489485)) A neat approach for code organization, sensibly balancing the competing pressures when developing code and considering the cognitive-load aspects inherent in them. I am a sucker for solutions which identify a spectrum of solutions which lie along some line of trade-offs, and which then ask "_what if we could take some of the best of both worlds_"? (Yes, I'm still trying to learn Rust 😝)
I do think it's interesting to note how some comments argue in favour of larger files because they allow for (say) searching within a single file in a single operation, when "_searching across a whole project_" _is_ a single operation in well-set-up IDEs (fight me, Vimsters). This feels like a local maximum - to what should we keep using sub-optimal practices because the optimal practice is sub-optimal to those using sub-optimal tools?[^vim-is-an-ide]
Bonus points because the HN comments include this classic form-of-comment: [1](https://news.ycombinator.com/item?id=38490257), [2](https://news.ycombinator.com/item?id=38490757).
## Others
Articles that I thought were worth recording at the time, but which I didn't think warranted a paragraph-length write-up above. The snippets are not (usually) direct quotes but rather are my summations.
* [How To Do Hard Things](https://every.to/no-small-plans/how-to-do-hard-things) ([HN](https://news.ycombinator.com/item?id=35425897#35427605)) - A blend of mindfulness meditation (awareness of thoughts and aversion, recognition that thoughts are real but not necessarily true, center yourself in the present and the body, etc.) and therapy/productivity theory (let actions arise from values, form habits with routine).
* [Everything Is A Practice](https://luxagraf.net/essay/everything-is-a-practice) ([HN](https://news.ycombinator.com/item?id=35437062)) - "_You are what you repeatedly do. Do things that lead you down a path you want to be on. Don't neglect a diversity of skills._" (not a new realization!)
* [Systems Design: What We Hope We Know](https://apenwarr.ca/log/20230415) ([HN](https://news.ycombinator.com/item?id=35580776)) - "_Engineering as a discipline of compromise (Science as a discipline of mechanistic testing of inspired hypotheses), Insight as a new-to-you realization, latency vs. throughput_"
* [Resisting Deterministic Thinking](https://zephoria.medium.com/resisting-deterministic-thinking-52ef8d78248c) - "_Outcome are almost never certain - pick the choice that best shapes the probability-space to your desired outcomes._" (shared by [George](https://www.georgelockett.com/))
* [We Don't Trade With Ants](https://worldspiritsockpuppet.com/2023/01/10/we-dont-trade-with-ants.html) ([HN](https://news.ycombinator.com/item?id=36265774)) - an interesting take on how Human/AI relations might evolve
* [The JavaScript Gom Jabbar](https://frantic.im/javascript-gom-jabbar/) ([HN](https://news.ycombinator.com/item?id=36564010)) - _shudder_
* [Consider SQLite](https://blog.wesleyac.com/posts/consider-sqlite), via [How I Run My Servers](https://blog.wesleyac.com/posts/how-i-run-my-servers). Reminiscent of a [great talk by David Crenshaw](https://www.youtube.com/watch?v=RqubKSF3wig) on the topic of ruthlessly prioritizing simple functionality in your tools - though, to be clear, I still find GoLang to be an extraordinarily-frictiony language for no apparent good reason (unlike Rust, for instance, which is also incredibly-frictiony _but with the friction intentionally introduced with deliberate trade-offs_)
* [Don't Mess With A Genius](https://shreevatsa.wordpress.com/2010/06/04/dont-mess-with-a-genius/) - fun historical story about Isaac Newton's time as Master Of The Mint.
* [Shamir Secret Sharing](https://max.levch.in/post/724289457144070144/shamir-secret-sharing-its-3am-paul-the-head-of) - an engineering war story
* [That Time I Built Excel For Uber And They Ditched It Like A Week After Launch](https://basta.substack.com/p/no-sacred-masterpieces) ([HN](https://news.ycombinator.com/item?id=37527720)) - "_Every piece of code you write as an engineer is legacy code. Maybe not right now, but it will be. Someone will take joy in ripping it out someday._" - plus a cool tidbit of how Excel works (read and find out!)
* [Doing A Job](https://govleaders.org/rickover.htm) - an exploration of the idea of ownership of (loosely - pride and competency in) a role.
[^victims-dont-owe-you-anything]: though even this straightforward formulation can be fraught, as it leans close to an oft-maligned claim that it is the responsibility of the oppressed to educate their oppressors, and to do so in conciliatory and flattering ways - an unreasonable ask, when the oppressed are so often dealing with plenty of other exhausting hardships and trauma. I have a longer blog post planned on this general idea; but in short, this seems like an instance of a special case of "_miscommunication via differing definition_" - explicitly, the miscommunication which arises when making a normative statement (an expression of desirability - "_you should do X_" or "_it's a good idea to do X_") without expressing the criteria used for judgement. If your aim is to change the behaviours of your listeners, then a position which castigates them is less likely to be successful than one which explains to them the benefits of the change - _but_ a victim has, in fact, no responsibility to try to directly effect that change, and not all their speech should be judged through that lens. Their speech might be intended simply to vent, or to build community with other victims, or to effect change _indirectly_ by changing the opinion of large demographics or of policy-makers which will put pressure on their oppressors.
[^capitalism-yo]: In a broader sense, neither can _anyone_ in a capitalist society outside of the 1% - but that's a whole tangent...
[^vim-is-an-ide]: FWIW, I'm _sure_ that Vim could be set up to do "Project Search" just as well as IntelliJ/VS Code/whatever could do. Rest assured, if I'm talking about "Vim as a substandard IDE", then I'm not talking about _your_ setup 😝
<!--
Reminders of patterns you often forget:
Images:
![Alt-text](url "Caption")
Internal links:
[Link-text](\{\{< ref "/posts/name-of-post" >}})
(remove the slashes - this is so that the commented-out content will not prevent a built while editing)
-->

@ -1,169 +0,0 @@
---
title: "2023 Wrap Up - Books"
date: 2023-12-26T12:02:15-08:00
tags:
- end-of-year-wrapups
- reading
---
Another End Of Year Wrap-up, focusing (as the [previous]({{< ref "posts/2022-wrap-up" >}}) [installations]({{< ref "posts/2021-in-books">}}) did) initially on reading[^affiliate-links].
<!--more-->
# Recaps
Potential spoilers, of course, for all the books (check [here](#full-lists-and-stats) to see the full title list), though IMO I'm keeping them minimal - thematic rather than narrative.
I rarely write notes on book _as_ I'm reading them, so please take review comments with a grain of salt - I may be misremembering!
## The Year Of Sanderson
I knew that [Brandon Sanderson's Four Secret Projects](https://www.kickstarter.com/projects/dragonsteel/surprise-four-secret-novels-by-brandon-sanderson) were going to be the focus of the year, and they didn't disappoint. The least complimentary thing I can say about them is that Frugal Wizard felt too YA _for me_, but me not being the target audience doesn't make it a bad book. Yumi felt a little under-explained and -explored (surprising, for the king of well-established internally-consistent magic systems), but I still teared up a little at the appropriate emotional moments - and I _loved_ seeing more of an established character. The Sunlit Man was a compelling action romp with a cool hook even without the _tantalizing_ snippets of the broader Cosmere story; and TOTES (hehe), similarly, would be a delightful "_reverse Princess Bride_" even if you're not someone who geeks at the sight of the word "Hoid".
I'm lumping White Sands in here too even though it wasn't part of the Secret Projects. Probably my least favourite of the Cosmere books, which still puts it in rare company. It was perfectly servicable both narratively and Cosmere-structurally, I just wasn't particularly grabbed at any point.
## The Laundry Files
9 of this years books were from The Laundry Files, "_a series of novels \[that\] mix the genres of Lovecraftian horror, spy thriller, science fiction, and workplace humour_", recommended by [George](https://www.georgelockett.com). They had some high points - "_what if an Investment Bank Analyst Pod, but Vampires_", in particular, was executed much more competently than just a series of lazy jokes about how finance/capitalism is blood-sucking and evil (though, to be clear...) - but overall I have zero desire either to finish the series (after a big in-world event leads to a significant character-focus shift), or to go back and re-read. Perfectly servicable leisure reading, though!
(As a point of comparison, despite the numerous flaws of the Dresden Files, I will be picking up those books at release date right up 'til the end of the planned Big Apocalyptic Trilogy finale)
## The First 90 Days
As I began my new position with LegalZoom - the first time I'd changed companies as a mature, deliberate, somewhat-thoughtful somewhat-adult, rather than as a fresh-faced college grad in no position to be intentional about his career trajectory - I figured it behooved me to study some Real Adult Grown-up Techniques for starting a job off on the right foot.
It was definitely useful and I'm glad I read it, though the target demographic was more the "_decision-making executives_" cohort than the "_primarily IC/executors_" group that I'm in. Don't get me wrong, Senior+ Engineers _should_ be decision-makers (and good ones need to be political too - sadly), and the strategies for understanding the org and for discovering the "hidden org-chart" were valuable, but the heavy focus on understanding phase and segment of the business were less relevant to someone without significant input on hiring, external product selection, or funding. Still - I'm all for cross-training, it was neat to understand how execs think about such things, and I'm sure I'll make use of the knowledge at some strange time in the future!
Very grateful to ex-coworker Kyle for lending this to me!
## Underdog
An alpha-read of a draft novel by my cousin Nick. A thoroughly enjoyable semi-apocalyptic YA fantasy story, with hints of Dune. Trope-y in the best ways, a lot of fun. Looking forward to buying the published version!
## The Rust Book
I've been working through Exercism's [12in23](https://exercism.org/blog/12in23-calendar) challenge this year, and Mechanical March's challenge of working with Rust got me fascinated with the language that so many have spoken so enthusiastically about[^golang-sucks]. I resolved to work through [The Rust Book](https://doc.rust-lang.org/stable/book/) this year to deepen my understanding - realistically, 5 quick coding challenges barely gives you an understanding of the idioms of the language or how it really operates on a daily basis.
The book was reasonably well-written, but knowledge didn't really sink in - I came away from each chapter feeling _reasonably_ confident that the concepts made sense and that I could explain them, and then would invariably score a zero on the "check understanding" quiz that followed. I had (and still do have, as I work through [Advent Of Code]({{< ref "/posts/2023-advent-of-code" >}})) real difficulty translating the theoretical ideas into practical applications. I suspect it's something that will get easier with practice, as I rewire my brain to naturally think about concepts like borrowing. I got a recommendation for a book which is apparently better for learning practical application ("_Programming Rust_" by Blandy, Orendorr, and Tindall), which I'll work through next year.
## The Infinite And The Divine
The release of the WH40k Magic The Gathering decks last year resparked my interest in the sprawling lore of the mega-setting. I'd heard good things both about this book, and the "Twice-Dead King" series by Nate Crowley (a friend-of-a-friend), so I figured I'd give it a go.
Perfectly servicable low-brain-energy entertainment. Nothing to write home about, but enjoyable to speed through as an balm to the heavy thinking of The Rust Book!
## Project Hail Mary
Spiritual-sequel to The Martian, this is another Competence-Porn In Space book, though with a little more characterization at the cost of some of the clever MacGuyvering. I did enjoy the smatterings of linguistics and sociology at the start, but from about halfway through it loses focus and can't decide if it wants to be a buddy-story or a story about Cool Science, and ends up suffering as both. If you are desperate for more of The Martian, this will scratch that itch, though it's a little inferior in most ways.
## A Long Way To A Small, Angry Planet
A recommendation from ex-coworker Amanda. "Cosy Sci-Fi" - take the multi-species interstellar-alliance structure of The Culture or Mass Effect, downgrade the tech to the level of (and apply the "_found family on a boat-in-space_" flavour of) Firefly, and round off the sharp edges (no disrespect! Pleasant comfy fiction is valid and worthwhile!).
## Domain-Driven Design
...OK, I'll admit, this was a spite-read. I have a coworker with whom I have a...challenging relationship, primarily because of our differing methods of communication - and for reasons of professionalism, I will go no further than that! For several months, he insisted on crowbarring concepts and quotes from this book into _every_ discussion, until I eventually resolved to read it to understand what he was talking about.
For all that I read it for less-than-ideal reasons, it was a good read! Suffered somewhat from "[Seinfeld Is Unfunny](https://tvtropes.org/pmwiki/pmwiki.php/Main/SeinfeldIsUnfunny)" syndrome (warning - TVTropes link), in that many concepts in it seemed self-evidently correct and barely even worth stating - the curse of an influential and insightful book is that, ~10 years after it's published, everything in it will seem "obvious" because everyone will have adopted its ideas! Still, putting a structure on and vocabulary around the ideas is valuable.
On the positive side, I now have a better understanding of what my coworker means when he drops buzzwords without elaboration!
## A Fire Upon The Deep
A standard of "best sci-fi" books, I had high hopes for this one - hopes that, sadly, were not met. It had some cool ideas in it, for sure (the composite-consciousness species was a new one to me), but other than that - well, I'll just quote my [Mastodon post](https://fosstodon.org/@scubbo/110618207602838405)[^no-toots] of the time:
> "A Fire Upon The Deep" may be the most disappointing book I've ever read. The intro teased truly inhuman AI viewpoints, never deliverd; popular reporting of the book makes much of "code archaeology", only mentioned once in passing; the two alien races are interesting ideas but nothing's done with them (I truly thought the radio-coats would lead to Flenser *becoming* a Power); the ending was a non-event that left countless plot threads open.
It's _possible_ that the sequel Children Of The Sky would pick up on some of those plot threads in a satisfying way, but I'm unlikely to chance it. Who knows, though - maybe going in with lowered expectations would make me enjoy it more!?
A comparison with Blindsight seems fitting, here. Both are space-based stories which use post-/in-human species as a way to examine assumptions about consciousness and personhood - but, by deliberately having almost _no_ story (or, rather - having a story which was purely a vehicle for "_cool discoveries about the puzzle/structure in question_"), Blindsight avoided any necessity to have a _satisfying_ story. AFUTD _tried_ to have a compelling story with characters we cared about, and (to me) failed.
## Naming Things
Classically, [one of the hardest problems in computer science](https://martinfowler.com/bliki/TwoHardThings.html). This was a short but worthwhile read: very little in here that was truly unknown (aside from the term "_polyseme_", for "_a symbol \[word\] with multiple related meanings_"), but, like "_Effective Java_", "_The Elements Of Style_", or [Oblique Strategies](https://en.wikipedia.org/wiki/Oblique_Strategies), it will be a good collection of tie-breaking advice for those moments when I have a niggling feeling that something _could_ be done better but I need a clearly articulated explanation of how (and why it's better).
(And, yes, I fully recognize that EJ and TEoS are both pretty out-dated by now. I'm not claiming that _every_ piece of advice they give is good and correct - but, at the very least, a clearly-articulated argument that you disagree with will help you formulate your own argument!)
## Ward
Hoo boy. OK, this is the biggie 😅
Several years before I started this blog, I read [Worm](https://parahumans.wordpress.com/), a infamous web serial which was, at the time, nearly as long as the entire Song Of Ice And Fire:
![Length of various sci-fi series](/img/length-of-worm.jpg "I think the most surprising thing here is that, since this diagram was made [in 2018](https://old.reddit.com/r/Parahumans/comments/8nyhqi/worm_length_compared_to_other_sff_series/), there's only been one Stormlight Archive book :P")
I've described it previously as "_A gritty grounded superhero story - like if Brandon Sanderson and George R. R. Martin collaborated to entirely reinvent the MCU as a sci-fi story rather than 'a soap opera with punching'_". It certainly wasn't perfect - not all of that length was free from filler, and the author has some irritating linguistics quirks that begin to grate very quickly[^used-their-power] - but when it was good, oh my, it was _fantastic_. The superpowers were thoughtfully created (and limited, and combined/conflicted) and widely varied; the characterisation was _masterful_ given what a broad cast of characters the author introduced (some for only a few paragraphs at a time, but with barely a placeholder character among them - they all felt like _people_); and, while not every arc was gold, on balance they were excellent, and the ones that were good were _incredible_. Seriously, this book - which, I remind you, was released for free, chapter-by-chapter, onto the Internet by an amateur - contains several of the most vivid, shocking, and compelling scenes I've ever read. It's not a whole-hearted universal recommendation - I've already acknowledged that the pacing and prose are patchy, and it would honestly be faster to list the Content Warnings that _don't_ apply to the book than those that do[^content-warnings] - but if this sounds up your street, it probably is.
Fast-forward to last year, when I saw some Mastodon posts from someone who'd just finished Ward, the sequel to Worm, and was waxing lyrical about it. I planned to read it in 2024 as I would be dedicating this year to Sanderson, but I ended up having enough time to complete Ward this year.
It was great, veering on excellent! That's certainly a step-down from my breathless praise of the original, which is intentional - it had its moments that stand alongside the original, and fleshed out the cosmology/power-system a little, but rarely measured up. Partly this was just the standard problem of sequels - after introducing such an awesome and epic (word choice intentional!) world, anything else would feel like a let-down. That said, the story itself felt disjointed - Worm certainly had distinct arcs, but there still felt like a coherent through-thread, or at least that there was a smooth transition from one to the other. Ward's arcs, by contrast, simply...ended, and then started anew. The protagonist, too, cannot hold a candle to Worm's magnificent anti-hero Taylor. A standout of the original was the way that the narrator's _intensely_ biased (but internally-consistent!) viewpoint is so compelling that you find yourself going along with their reasoning and justifications until you take a step back and realize how far down the slippery-slope she (and you) have slipped. By contrast, Victoria certainly has a hang-up that needs dealing with - but a) it's just the one issue, b) she's _way_ more justified than Taylor ever was (though goes about it in a sometimes-unhelpful way), and c) her viewpoint is uncritically supported by almost every character, so the conflict between perception and reality isn't foregrounded. Finally, the ending felt confused and rushed - I honestly still don't understand the intention or stakes of one of the primary dramatic scenes. Again, comparing with Worm which is my favourite book-ending, and my favourite any-media ending except The Good Place, that's a let-down. Implication from the tone of some of the comments was that the author had gotten bored of the story and just wanted it done with so that he could move on, which I believe.
(On that point, I made the decision to read this book on the author's website, rather than using the "download to ePub" scripts that fans have created (as I did for Worm), so that I could read the comments as I went. Definitely a good decision - there are some _very_ smart (and obsessed!) people out there who gave some insightful commentary and discussion that really enhanced my enjoyment of the book. Again, for free - the Internet can be a wonderful place sometimes!)
Still, for all those flaws - a capital-G Great work, and one I'm very glad to have read. Where Worm is one that I don't-actively-recommend, this I think I would actively-(mildly-)dissuade someone from reading - so that it would only be read by those who are hooked enough to persevere despite discouragement, who I think are those most-likely to enjoy it.
...now I want to re-read Worm with comments...maybe in a couple years...
## The Lathe Of Heaven
I picked this up in a second-hand bookstore in Chicago during a trip for a conference - I'd been meaning to read more Ursula Le Guin for a while (I read The Earthsea Quartet as a kid, and The Left Hand Of Darkness a few years back), and seeing this by the checkout prompted me to pick it up. It's often on Best Of Sci-Fi lists - surely I'd enjoy it, right?
Again, as with A Fire Upon The Deep - heightened expectations lead to disappointment. It was fine, but I just didn't _get_ it. Scenes were described, a mechanic was introduced, but I just didn't get the _point_ of the book. I worried that I'd missed some subtle metaphor, as I did with Camus' "_The Plague_"[^the-plague], but no - from looking up reviews and responses, it seems that the story _is_ the story. I'm really not sure what it's trying to say - "_be careful what you wish for_", or "_power corrupts_", or (surely not!?) "_don't try to improve anything_" (the protagonist himself seems to hold this position and is presented sympathetically, which is, as the youth would say, a big yikes from me)?
## The Internet Con
I closed out the year with Cory Doctorow's latest book, subtitled "_How to Seize The Means Of Computation_". Nothing in here is new to anyone who hangs out on the same kinds of social media as me - I get the impression that this is a book intended to be bought-and-gifted (or, less charitably, read for the sense of navel-gazing self-congratulation). Which, y'know, nothing wrong with that!
# Full lists and stats
(Uncounted but acknowledged - finished Rhythm of War re-read)
1. Tress Of The Emerald Sea
2. White Sands
3. The Jennifer Morgue
4. The Tyranny Of Faith
5. The Fuller Memorandum
6. The Apocalypse Codex
7. The Rhesus Chart
8. The Annihilation Score
9. The Nightmare Stacks
10. The Frugal Wizard's Handbook For Surviving Medieval England
11. The Delirium Briefk
12. The Labyrinth Index
13. The First 90 Days
14. Underdog
15. The Rust Book
16. The Infinite And The Divine
17. Dead Lies Dreaming
18. Project Hail Mary
19. A Long Way To A Small, Angry Planet
20. Domain-Driven Design
21. A Fire Upon The Deep
22. Yumi And The Nightmare Painter
23. Naming Things
24. The Sunlit Man
25. Ward
26. The Lathe Of Heaven
27. The Internet Con
I'd pre-acknowledged that this was not going to be a good year on either of my primary tracking stats ("_books by non-white non-male people_" and "_number of books read overall_"), what with a) Brandon Sanderson [spraying books all over the place](https://www.kickstarter.com/projects/dragonsteel/surprise-four-secret-novels-by-brandon-sanderson) and b) this being the year I finally tackled Ward. However, I actually ended up readding _more_ than I read last year. Whadda you know!? I guess indulging in The Laundry Files (which I could easily tear through at a consistent rate of a-book-a-week) pumped my numbers up a bit.
* 22 Fiction, 5 Non-Fiction ("_The First 90 Days_", "_The Rust Book_", "_Domain-Driven Design_", "_Naming Things_", "_The Internet Con_").
* No Genre analysis this year as too many are borderline. Most Branderson straddles the line between Fantasy and Sci-Fi too neatly to categorize, and The Laundry Files is positively allergic to picking a genre.
* 24 by Men, 2 by Women ("_A Long Way To A Small, Angry Planet_" and "_The Lathe Of Heaven_"), and 1 by a collaboration between a man and a woman "_with contributions from the Rust Community_".
* 1 Book by Friends-or-Family (level with last year)
# Summing up, and looking forward
I'd definitely like to read more non-fiction, and more books by non-white/non-men authors next year. Some that are planned off the top of my head (not necessarily prioritizing those criteria):
* OverLondon (based on [the author's delightful Mastodon presence](https://fosstodon.org/@georgepenney@sunny.garden), and a review I saw somewhere calling it Pratchett-esque).
* Thinking Fast And Slow - also picked up in the Chicago second-hand bookstore.
* The Design Of Everyday Things.
* "Programming Rust" by Blandy, Orendorr, and Tindall.
In the [previous year]({{< ref "/posts/2022-wrap-up" >}}), I included a round-up of the Articles I read that year, but this post is already getting overlong - I'll follow up with that in a separate post instead.
[^affiliate-links]: My Amazon Affiliate account expired from lack of use last year, which is unsurprising - well, it's actually surprising it lasted as long as it did! It seems rather fitting to abandon them in the same year I started my post-Amazon position.
[^golang-sucks]: Rust is often spoken of in opposition to GoLang, which is a big vote in Rust's favour to me. Hoo boy, there's a blog post in the works _there_, too...
[^no-toots]: Never say never, but I can't see myself ever using the word "Toot" unironically.
[^used-their-power]: Particularly egregious in a superhero story is the reverse-saidbookism of consistently using the phrase "_\[person\] used their power \[to do X\]_" rather than any alternative like "_\[person\] X'd_". Trust me, if you're talking about a character with flight powers, and you tell me that they floated/drifted/hovered to a location, I can figure out that they used their power to do so!
[^content-warnings]: Though, even there - while some truly horrific things happen, they rarely feel gratuitous - in the sense that I never got the feeling that the author thought "_hmm, I want to spice this scene up and keep people on their seat - let's have something horrible happen to Our Heroes_". Rather, the atrocities feel like coherent outcomes of the situation and of the vile-but-consistent villains that have already been established as having particular goals and Modi Operandi. "[_It's What My Character Would Do_](https://rpg.stackexchange.com/questions/37103/what-is-my-guy-syndrome-and-how-do-i-handle-it)" may not be an excuse in RPGs, but it's absolutely relevant in fiction.
[^the-plague]: To my shame, I got through the entire book (rather bemused!) before realizing that the plague was a metaphor for {{< inlinespoiler >}}Nazism/collaboration{{< /inlinespoiler >}}

@ -1,7 +0,0 @@
---
title: "Adding RSS"
date: 2024-01-15T13:31:03-08:00
tags:
- meta
---
Inspired by [this article](https://rknight.me/blog/the-web-is-fantastic/), I've added (or attempted to?) an RSS feed to this blog. From [Hugo's docs](https://gohugo.io/templates/rss/) it _seems_ pretty simple, but please let me know if you run into any issues!

@ -1,76 +0,0 @@
---
title: "Almost All Numbers Are Normal"
date: 2023-12-17T17:23:09+00:00
math: true
tags:
- mathematics
extraHeadContent:
- <link rel="stylesheet" type="text/css" href="/css/table-styling-almost-all-numbers.css">
---
"Almost All Numbers Are Normal" is a delightful sentence. In just five words, it relates three mathematical concepts, in a way which is true but misleading - the meaning of the sentence is almost exactly the opposite of what a layman would expect.
<!--more-->
## Numbers
The intuitive conception of "_numbers_" if you ask someone to simply "_name a number_" are the natural numbers $\mathbb{N}$ (0[^is-zero-a-natural-number], 1, 2, 3, ...), or the integers $\mathbb {Z}$ (... -3, -2, -1, 0, 1, 2, 3, ...). [Of course](https://xkcd.com/2501/) most folks are familiar with the rationals $\mathbb{Q}$, though probably by the name of and through the lens of "fractions" rather than the more mathematically-precise objects - and even those only scratch the surface of the full set of [real numbers](https://en.wikipedia.org/wiki/Real_number) $\mathbb{R}$ [and beyond](https://en.wikipedia.org/wiki/Complex_number#Generalizations_and_related_notions).
There are plenty of ways to conceptualize some of these sets of numbers - typically as the unique (up to isomorphism) structure satisfying some particular set of axioms like [Peano's](https://en.wikipedia.org/wiki/Peano_axioms#Set-theoretic_models) or [Dedekind's](https://en.wikipedia.org/wiki/Dedekind_cut) - but for the purposes of this post, I want to consider the reals[^limitation-of-consideration] as an infinite sequence[^what-about-the-decimal-point] of digits 0-9, or equivalently as a function $f: \mathbb{N} \to {0, 1, 2, \cdots 9}$. That is, the number `7394.23` is equivalent to the function partially represented by the following table:
| Index | Value |
|--------------|-----------|
| 1 | 7 |
| 2 | 3 |
| 3 | 9 |
| 4 | 4 |
| 5 | 2 |
| 6 | 3 |
| 7 | 0 |
| 8 | 0 |
| 9 | 0 |
| ... | ... |
I say _partially_ represented, because of course this table could continue infinitely - for any index greater than 6, the function's value is 0: [$\forall n > 6, f(n) = 0$].
This way of describing numbers focuses less on their value, and more on their written representation - it stresses the ability to ask "_what is the fifth digit of this number?_" much more than the ability to ask "_which of these two numbers is bigger?_". This focus will be justified in the next section.
## Normality
The word "normal" has lots of domain-specific meanings in mathematics, many of them related to one of two concepts:
* **orthogonality** - that's fancy mathematician speak for "_being at 90-degrees to something_"[^orthogonal]. For instance, we could say that a skyscraper is orthogonal to, or normal to, the ground, because it points straight upwards and the ground is horizontal.
* of or related to the **norm**, which itself is a function that assigns a length-like value to mathematical objects.
In particular - I don't think I've _ever_ heard the term "_normal_" used in its layman's sense of "_standard, expected, regular, average_"[^term-of-art]. I guess mathematicians don't think it's very normal to be normal.
In number theoretic terms, a [normal number](https://en.wikipedia.org/wiki/Normal_number)[^absolutely-normal] is one in which all digits and sequences of digits occur with the same frequency - no digit or sequence is "favoured". The string of digits looks like it could have been the output of a random number like coin-flipping (for binary digits) or repeatedly rolling a d10.
It's pretty easy to immediately see that no number with terminating decimal expansion (which includes all the integers, and all fractions with a denominator of a power of 10) are not normal - if the sequence of digits starts repeating 0, then 0 is "favoured", and the number is not normal. A little more thought shows that every rational number (every fraction) is abnormal - either the division terminates (and the decimal expansion continues `000...`), or the decimal expansion repeats (and so the repeated-string is "favoured", and any string which didn't appear before point that is absent).
### Corrolary of normalcy
A fun property of normal numbers is that, because all subsequences are "_equally likely_", and because they are infinite non-repeating sequences, any given sequence of numbers _must_ exist somewhere in them. Since any content that is stored on a computer is stored as a sequence of numbers, this implies that any content you could imagine - your name and birthday, the Director's Cut of Lord Of The Rings, a sequence of statements which prove that almost all numbers are normal - exists somewhere within each of them.
The trick would be _finding_ it...
## Almost All
Along with "normal", this is a common term which has a specified mathematical meaning - although, in this case, the meaning _is_ intuitive[^normal-meaning], just formally-defined.
A property is said to hold for "_almost all_" elements of a set if the complementary subset of elements for which the property does _not_ hold is negligible. The definition of negligible depends on the context, but will typically mean:
* A finite set inside an infinite set ("_almost all natural numbers are bigger than 10_" - because the set of numbers smaller-than-or-equal-to 10 is finite, and the set of naturals is infinite)
* A [countable](https://en.wikipedia.org/wiki/Countable_set) set inside an [uncountable](https://en.wikipedia.org/wiki/Uncountable_set) one, or generally a "smaller" infinity inside a bigger one.
This is probably the least surprising of the three concepts, but it does take a while for Maths undergrads to get their head round the co-feasibility of the statements "_P(x) is true for almost all x in S_" and "_P(x) is false for infinite x in S_".
## Putting it all together
So, putting it all together - "_almost all numbers are normal_" could be roughly translated as "_when considering the set of functions which map from $\mathbb{N}$ to ${0, 1, 2, ... 9}$, a negligible set of those functions result in sequences which have subsequences roughly evenly distributed_". Which is about as far as you could get from the results you'd get if you asked a layman to name some normal numbers - small natural numbers!
(I'm not actually going to present a proof of that fact here - I vaguely recall the shape of it, but being over a decade out of study, it's a little beyond my capability to present understandably. There are some reasonably accessible proofs [here](https://arxiv.org/pdf/2102.00493.pdf) and [here](https://www.colorado.edu/amath/sites/default/files/attached-files/math21-8.pdf) if you're interested!)
[^is-zero-a-natural-number]: If you have strong opinions on whether 0 is a natural number, you probably already know the rest of what I'm going to cover in this post.
[^limitation-of-consideration]: I don't think it's a cheat to limit my consideration to normal numbers here, since the concept of normality only applies to normal numbers. For any non-real number, the answer to "_is this normal?_" is `null`, `undefined`, or "_[mu](https://en.wikipedia.org/wiki/Mu_(negative)#Non-dualistic_meaning)_".
[^what-about-the-decimal-point]: For reasons that will become clear as I go on to talk about normality, we're ignoring the decimal point. That is, $123 \equiv 1.23 \equiv 0.000123$ for this discussion. Just trust me.
[^orthogonal]: Again - if you know enough to know why this statement is incorrect, you also know enough to know why I'm glossing over the complications.
[^term-of-art]: yes, I did intentionally pick words here which all have their own mathematical definitions. Language is fun!
[^absolutely-normal]: I'm only discussing base-10 here. A number which is normal in all integer bases >= 2 bears the wonderful label "_absolutely normal_".
[^normal-meaning]: that is - it has the normal meaning 😉

@ -1,33 +0,0 @@
---
title: "Automatic Merging"
date: 2024-02-14T22:46:08-08:00
tags:
- CI/CD
- homelab
- productivity
- SDLC
---
When working on my personal projects, I typically just push straight to `main` - opening a PR just to approve it seems entirely pointless, as if I had been able to find any issues in my own work, I wouldn't wait to do it in a PR! However, this does mean that, if I forget to run any quality checkers (linters, tests, etc.), I won't find out about it until `on: push` GitHub Action runs, and even then I might not see the failure until several commits later.
<!--more-->
This problem _can_ be addressed with [pre-commit hooks](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks)[^spoiler], but I've never been a fan of them:
* As the documentation states, "_client-side hooks are not copied when you clone a repository_", meaning that any new collaborators (or even "_me after [blowing away the repo](https://xkcd.com/1597/)_") will silently miss out on them.
* On-commit seems like the wrong cadence to run quality checks - local commits should be fast and frequent checkpoints that you can return to (or juggle around) as needed, and adding friction to them makes development more precarious. Ideally, quality checks would run immediately prior to _pushing_, but the `pre-push` hook runs "_**after** the remote refs have been updated_", meaning[^do-i-understand-git] that even if they fail, the remote will still have been changed.
The other day I hit on a cool idea that would seem to address both problems - since GitHub allows PRs to be set to be automatically merged when all checks pass, perhaps I could set up a workflow whereby:
* I push commits to a `dev` branch
* An auto-merging PR is automatically opened _from_ that branch to `main`
* If the checks pass, the PR is automatically merged
* If it fails...well, I'd have to set up some non-email channel to notify myself about that, but that shouldn't be too hard
I did make [some progress on it](https://github.com/scubbo/edh-elo/tree/autoMergePR/.github/workflows), but ran into some issues:
* PRs cannot be created _in_ an AutoMerge state - they have to be set _into_ that state after creation. Although [this SO answer](https://stackoverflow.com/a/72259998/1040915) did describe how to do so, some quirk of GHA meant that that failed when executed _in_ a GHA context (claiming the PAT did not have permissions)
* All is well and good if the PR immediately passes - but if it fails and I make correcting commits onto `dev` (which update the PR), then when the PR passes and is squashed into a single commit to then be merged into `main`[^squash-and-merge], then `dev` and `main` will have diverged, and the next PR that's submitted from `dev` to `main` will appear to be contributing the preceding commits as well. Not ideal!
After a couple of hours of fiddling around, I returned to investigating `pre-commit` hooks, and found the [pre-commit](https://pre-commit.com/) _tool_, which provides a management interface for hooks. It unfortunately still requires manual installation (so a new contributor might not benefit from it - though, in fairness, that can be double-checked with CI checks), but the experience is smoother than writing hooks myself. I'll keep experimenting with it and see how I like it.
[^spoiler]: And - spoiler alert - after running into frustrations with my first approach, this was exactly what I ended up doing, using the [pre-commit](https://pre-commit.com/) tool.
[^do-i-understand-git]: I do admit I haven't actually tested this understanding. It does seem surprising, as it would make the `pre-push` hook basically useless. This also seems to contradict the documentation [here](https://github.com/git/git/blob/master/Documentation/RelNotes/1.8.2.txt) which states that "_"git push" will stop without doing anything if the new "pre-push" hook exists and exits with a failure._". So, maybe `pre-push` hooks _aren't_ useless? I've asked for more information on this [here](https://stackoverflow.com/questions/77998932/when-exactly-in-the-push-process-does-a-pre-push-hook-actually-run). But, the first counter-argument - and the convenience of the `pre-commit` _tool_ - have me still using `pre-commit` hooks, even if `pre-push` would have worked.
[^squash-and-merge]: I will die on the hill that "Squash And Merge" is the only sensible PR merge strategy. A Merge Commit means that you have non-linear history, and Rebase means that one _conceptual_ change is represented as however-many different commits were generated during development. There is no value whatsoever in preserving the frantic, scrabbling, experimental commits that were generated _during_ development - they are scaffolding that should be tidied away before presenting the finished product as a single commit![^irony]
[^irony]: Ironic, then, that in fact this tangled automation approach is one of the only cases where a Merge Commit would actually be...I can't believe I'm actually going to say this..._better_ 🤮

@ -1,66 +0,0 @@
---
title: "Backups and Updates and Dependencies and Resiliency"
date: 2024-02-18T16:00:00-08:00
tags:
- homelab
- k8s
- SDLC
---
This post is going to be a bit of a meander. It starts with the description of a bug (and appropriate fix, in the hopes of [helping a fellow unfortunate](https://xkcd.com/979/)), continues on through a re-consideration of software engineering practice, and ends with a bit of pretentious terminological philosophy. Strap in, let's go!
<!--more-->
# The bug
I had a powercut at home recently, which wreaked a bit of havoc on my homelab - good reminder that I need to buy a UPS! Among other fun issues added to my Disaster Recovery backlog, I noticed that the [Sonarr](https://sonarr.tv/) container in my [Ombi](https://ombi.io/) pod was failing to start up, with logs that looked a little like[^not-actual-logs]:
```
[Fatal] ConsoleApp: EPIC FAIL!
[v4.0.0.615] NzbDrone.Common.Exceptions.SonarrStartupException: Sonarr failed to start: Error creating main database --->
System.Exception: constraint failed NOT NULL constraint failed: Commandstemp.QueuedAt While Processing: "INSERT INTO "Commands_temp" ("Id", "Name", "Body", "Priority", "Status", "QueuedAt", "StartedAt", "EndedAt", "Duration", "Exception", "Trigger", "Result") SELECT "Id", "Name", "Body", "Priority", "Status", "QueuedAt", "StartedAt", "EndedAt", "Duration", "Exception", "Trigger", "Result" FROM "Commands"" --->
code = Constraint (19), message = System.Data.SQLite.SQLiteException (0x800027AF): constraint failed NOT NULL
...
```
I could parse enough of this to know that something was wrong with the database, but not how to fix it.
After trying the standard approach of "_overwriting the database with a backup_[^backup]" - no dice - I went a-googling. It [seems](https://old.reddit.com/r/sonarr/comments/15p160j/v4_consoleapp_epic_fail_error/) that a buggy migration was introduced in `v4.0.0.614` of Sonarr, rendering startup impossible if there are any `Tasks` on the backlog in the database. Since my configuration [previously declared the image tag as simply `latest`](https://gitea.scubbo.org/scubbo/helm-charts/src/commit/3dfc818f5f58e3a733fd7acd22269bf1ac94d21a/charts/ombi/templates/deployment.yaml#L57)[^watchtower], the pod restart triggered by the power outage pulled in the latest version, which included that buggy migration. Once I knew that, it was the work of several-but-not-too-many-moments to:
* `k scale deploy/ombi --replicas` to bring down the existing deployment (since I didn't want Sonarr itself messing with the database while I was editing it)
* Spin up a basic ops pod with the PVC attached - frustratingly there's [still no option to do so directly from `k run`](https://github.com/kubernetes/kubernetes/issues/30645), so I had to hand-craft a small Kubernetes manifest and `apply` it.
* Install `sqlite3` and blow away the `Tasks` table.
* Teardown my ops pod, rescale the Ombi pod, and confirm everything working as expected.
# The first realization - automatic dependency updates
This experience prompted me to re-evaluate how I think about updating dependencies[^what-are-dependencies]. Having only had professional Software Engineering experience at Amazon, a lot of my perspectives are naturally biased towards the Amazonian ways of doing things, and it's been an eye-opening experience to get more experience, contrast Amazon's processes with others', and see which I prefer[^ci-cd].
I'd always been a bit surprised to hear the advice to pin the _exact_ versions of your dependencies, and to only ever update them deliberately, not automatically. This, to me, seemed wasteful - if you trust your dependencies to [follow SemVer](https://semver.org/), you can safely naïvely pull in any non-major update, and know that you are:
* depending on the latest-and-greatest version of your dependency (complete with any efficiency gains, security patches, added functionality, etc.)
* never going to pull in anything that will break your system (because that, by definition, would be a Major SemVer change)
The key part of the preceding paragraph is "_if you trust your dependencies_". At Amazon, I did - any library I depended on was either explicitly written by a named team (whose office hours I could attend, whose Slack I could post in, whose Oncall I could pester), or was an external library deliberately ingested and maintained by the Third-Party Software Team. In both cases, I knew the folks responsible for ensuring the quality of the software available to me, and I knew that _they_ knew that they were accountable for it. I knew them to be held to (roughly!) the same standards that I was. Moreover, the sheer scale of the company meant that any issue in a library would be likely to be found, reported, investigated, and mitigated _even before my system did a regular daily scan for updates_. That is - the possible downside to me of automatically pulling in non-major changes was practically zero, so the benefit-ratio is nearly infinite. I can count on one hand the number of times that automatically pulling in updates caused any problems for me or my teams, and only one of those wasn't resolved by immediately taking an explicit dependency on the appropriate patch-version. Consequently, my services were set up to depend only on a specific Major Version of a library, and to automatically build against the most-recent Minor Version thereof.
But that's not the daily experience of developers, most of whom are taking dependencies mostly on external libraries, without the benefits of a 3P team vetting them for correctness, nor of accountability of the developing team to fix any reported issues immediately. In these situations - where there is non-negligible risk that a breaking change might be incorrectly published with a minor version update, or indeed that bugs might remain unreported or unfixed for long periods of time - it is prudent to pin an explicit version of each of your dependencies, and to only make any changes when there is a functionality, security, or other reason to update.
# The second realization - resiliency as inefficiency
Two phenomena described here -
* Having to buy a UPS, because PG&E can't be trusted to deliver reliable energy.
* Having to pin your dependency versions to not-the-latest-and-greatest minor-version, because their developers can't be trusted to deliver bug-free and correctly-SemVer'd updates.
...are examples of a broader phenomenon I've been noticing and seeking to name for some time - "_having to take proactive remediative/protective action because another party can't be trusted to measure up to reasonable expectations_". This is something that bugs me every time I notice it[^examples], because it is inefficient, _especially_ if the service-provider is providing a(n unreliable) service to many customers. At what point does the cost of thousands of UPSes outweigh the cost of, y'know, just providing reliable electricity[^complexity]?
In a showerthought this morning, I realized - _this is just [resiliency engineering](https://sre.google/sre-book/introduction/) in real life_. In fact, I remembered reading a quote from, I think, the much-fêted book "[How Infrastructure Works](https://www.amazon.com/How-Infrastructure-Works-Inside-Systems/dp/0593086597)", to the effect that any resiliency measure "_looks like_" inefficiency when judged solely on how well the system carries out its function _in the happy case_ - because the objective of resiliency is not to improve the behaviour of the happy case, but to make it more common by steering away from failure cases. Hopefully this change of perspective will allow me to meet these incidents with a little more equanimity in the future.
...and if you have any recommendations for a good UPS (ideally, but not necessarily, rack-mountable), please let me know!
[^not-actual-logs]: I didn't think to grab actual logs at the time - it was only in the shower a day or two later that I realized this provided the jumping-off point for this blog post. These logs are taken from [this Reddit post](https://old.reddit.com/r/sonarr/comments/15p160j/v4_consoleapp_epic_fail_error/), which I found invaluable in fixing the issue.
[^backup]: Handily, Sonarr seems to automatically create a `sonarr.db.BACKUP` file - at least, it was present and I didn't remember making it! 😝 but, even if that hadn't been the case, I [took my own advice]({{< ref "posts/check-your-backups" >}}) and set up backups with [BackBlaze](https://www.backblaze.com/), which _should_ have provided another avenues. That reminds me...the backup mechanism is overdue for a test...
[^watchtower]: I know, I know...installing [Watchtower](https://containrrr.dev/watchtower/) is on my list, I swear!
[^what-are-dependencies]: in this section I'm using "dependencies" to refer to "_software libraries used by the services that I as a professional software engineer own-and-operate_", but most of the same thinking applies to "_image tags of services that I deploy alongside my application that are owned and developed by people other than me or my team_".
[^ci-cd]: I will die on the hill that Amazon's internal [CI/CD system](https://blog.scubbo.org/posts/ci-cd-cd-oh-my/) is dramatically superior to any Open Source offering I've found, in ways that don't seem _that_ hard to replicate (primarily, though not solely, image specifications based on build metadata rather than hard-coded infra repo updates), and I'm frankly baffled as to why no-one's implementing their functionality?[^cunningham]
[^cunningham]: Yes, this _is_ a deliberate invocation of [Cunningham's Law](https://en.wikipedia.org/wiki/Ward_Cunningham#Law). _Please do_ prove me wrong!
[^examples]: Though, having _finally_ gotten around to blogging about it, I now can't bring to mind any of the examples that I'd noted.
[^complexity]: I'm glossing over a lot of complexity, here, and deliberately hand-waving away the fact that "_every problem looks easy from the outside_". It's perfectly possible that the difficulty of going from [5 9's](https://en.wikipedia.org/wiki/High_availability) of electrical uptime to 100% is impractical - that "_[the optimal amount of powercuts is non-zero](https://www.bitsaboutmoney.com/archive/optimal-amount-of-fraud/)_" - or that occasional powercuts aren't as impactful to the average consumer as they are homelab aficionadoes. Frankly, I doubt both points, given what I've heard about PG&E's business practices - but, nonetheless, the fact remains that every marginal improvement to a service-provider's service has a leveraged impact across all of its consumers. That break-even point might fall at different places, depending on the diminishing returns of improvement and on the number of customers - but the magnifying effect remains.

@ -1,195 +0,0 @@
---
title: "Base App Infrastructure"
date: 2024-05-10T03:00:23-07:00
tags:
- crossplane
- homelab
- k8s
- SDLC
- vault
---
In my [previous post]({{< ref "/posts/vault-secrets-into-k8s" >}}), I had figured out how to inject Vault secrets into Kubernetes Secrets using the [Vault Secrets Operator](https://developer.hashicorp.com/vault/tutorials/kubernetes/vault-secrets-operator). My runthrough of the walkthrough worked, but I [swiftly ran into namespacing issues]({{< ref "/posts/vault-secrets-into-k8s#added-2024-04-29-namespacing-secrets" >}}) when trying to use it "_in production_".
<!--more-->
# The Problem
The setup can be divided into two parts[^platform-vs-app-team]:
* Creation of a Vault Role (with `boundServiceAccountNamespaces` corresponding with the k8s namespaces that should be permitted to access it) and Policy, and a k8s `VaultAuth` object telling the Vault Secrets Operator how to access the Vault Role.
* Creation of a `VaultStaticSecret` (referencing the VaultAuth object) in the app's `-deployment` repo, which results in a k8s secret.
As I started trying to extend my initial installation to other apps, I realized that simply adding more k8s namespaces to the `boundServiceAccountNamespaces` of a single Vault Role would not be a secure solution - it would allow _any_ pods in any of the bound namespaces to access any secret of any of the (other) applications. Ideally, each application-stage (or, equivalently, each k8s namespace[^namespaces-per-application]) would have its own resources created, with the Vault Role only accessible from that namespace[^sub-namespace-permissions].
## Why do I care?
You may be wondering why I care about Least Privilege - after all, it's only my own homelab, surely I know and trust every application that's running on it? Well, to an extent. I trust them enough to install them, but it still doesn't hurt to limit their privileges so that any unforeseen misbehaviour - whether deliberate or accidental - has limited impact. More importantly, my primary motivation in running this homelab is to learn and practice technical skills - the tasks don't have to be entirely practical, so long as they are educational! In fact, as you'll see shortly, this problem is almost-exactly equivalent to one I'm going to be solving at work soon, so doing this "right" is a good head-start.
# The solution
Ideally, I'd be able to automate (via extracted-and-parameterized logic) the creation of these resources as part of the application definition, since many apps will have similar requirements and I want to minimize any manual or imperative setup.
Thankfully, this is pretty close to a problem that I've been looking into at work, so I have a solution ready to go - [Crossplane](https://www.crossplane.io/), a tool that allows:
* management of "_External Resources_" (i.e. resources in systems outside Kubernetes, like Vault, Argo, etc.) via Kubernetes objects - i.e. you can declaratively create and update a Kubernetes object (a "_Managed Resource_") which represents the External Resource, and the Kubernetes reconciliation loop will keep the External Resource up-to-date.
* "bundling" of resources into Compositions - parameterized and inter-related collections of resources, analagous to Constructs in CDK.
![Diagram of the interrelation of the various Crossplane concepts](https://docs.crossplane.io/media/composition-how-it-works.svg "Diagram of the interrelation of the various Crossplane concepts")
With Crossplane in hand, the solution becomes simple:
* (while wearing my "Platform Team" hat) install a Provider (the interface between Crossplane and an external service) for Vault, and create a Composition which bundles the Vault resources that are necessary for Vault Secrets Operator setup.
* (wearing my "App team" hat) whenever I install an app which requires secret injection, do so alongside a Composite Resource (an instance of a Composition). All from the convenience of a single deployment repo, and with only a few extra lines of configuration!
## Walkthrough
You can see the solution [here](https://gitea.scubbo.org/scubbo/helm-charts/src/commit/e798564692f71187e3ff3f9d77f3aa1c46ca9ee4/charts/vault-crossplane-integration/base-app-infra.yaml).
### XRD
(Lines 1-26) A [Composite Resource Definition](https://docs.crossplane.io/latest/concepts/composite-resource-definitions/) (or "XRD" - yeah, I know, but Kubernetes had already taken the term "CRD") is like (in Programming Language terms) the Interface to a [Composition](https://docs.crossplane.io/latest/concepts/compositions/)'s Implementation, or (in Web Service) the API Spec or schema. It defines how a consumer can invoke a Composition - the name they should use, and the parameters they should pass. Consumers can either invoke this by its name if creating a cluster-scoped [Composite Resource](https://docs.crossplane.io/latest/concepts/composite-resources/), or in a namespaced context via a [Claim](https://docs.crossplane.io/latest/concepts/claims/).
This definition is saying:
* (Lines 6-8) "_There's a Composition that can be addressed as `xbaseapplicationinfrastructures.scubbo.org`_..."
* (Lines 10-12) "_...(which can also be addressed by the Claim Name `BaseAppInfra`)..._"
* (Lines 13-25) "_...which has only a single version defined, which takes a single string parameter named `appName`_"
It is apparently possible to provide [multiple schema versions](https://docs.crossplane.io/v1.15/concepts/composite-resource-definitions/#multiple-schema-versions) - but since "_new required fields are a 'breaking change.'_" and "_Only one version can be `referenceable` \[...which...\] indicates which version of the schema Compositions use_", I'm not really sure how that is actually useful - and this is borne out by the fact that "_Crossplane recommends implementing breaking schema changes as brand new XRDs._".
### Top-level Composition
The only point to note in lines 29-36 is that `spec.compositeTypeRef.apiVersion` and `spec.compositeTypeRef.kind` must match the values set on 6, 8, and 14.
### Vault Resources
Lines 37-136 define Vault Resources, provided by the [Vault Provider](https://github.com/upbound/provider-vault). These create a Vault Role, Policy, and KV Secrets Mount roughly as described in the [walkthrough](https://developer.hashicorp.com/vault/tutorials/kubernetes/vault-secrets-operator). Note the use of [patches and transforms](https://docs.crossplane.io/latest/concepts/patch-and-transform/) to set values in the Managed Resources based on properties of the Claim (the Kubernetes namespace and the parameter `appName`)
### Kubernetes Resource
The [Vault Secrets Operator walkthrough](https://developer.hashicorp.com/vault/tutorials/kubernetes/vault-secrets-operator) also [requires](https://github.com/hashicorp-education/learn-vault-secrets-operator/blob/main/vault/vault-auth-static.yaml) the creation of a `VaultAuth` object (specifying how the Secrets Operator should authenticate to Vault - i.e. which Role to use), and that is [not an object provided by the Vault Provider](https://doc.crds.dev/github.com/upbound/provider-vault)[^limited-vault-provider], so I also needed to use the [Kubernetes Provider](https://github.com/crossplane-contrib/provider-kubernetes) to create an arbitrary Kubernetes object as part of the Composition.
### Actual usage
After deploying this Composition to my cluster, actual usage was a doddle:
```bash
$ cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Namespace
metadata:
name: example-namespace-for-crossplane-vault-secrets-demo
---
apiVersion: scubbo.org/v1alpha1
kind: BaseAppInfra
metadata:
name: example-app-base-infra
namespace: example-namespace-for-crossplane-vault-secrets-demo
spec:
appName: example-app
EOF
namespace/example-namespace-for-crossplane-vault-secrets-demo created
baseappinfra.scubbo.org/example-app-base-infra created
$ kubectl ns example-namespace-for-crossplane-vault-secrets-demo
Context "default" modified.
Active namespace is "example-namespace-for-crossplane-vault-secrets-demo".
$ kubectl get BaseAppInfra example-app-base-infra
NAME SYNCED READY CONNECTION-SECRET AGE
example-app-base-infra True True 29s
$ vault secrets list | grep 'example-app'
app-example-app-kv/ kv kv_d4b378a7 KV storage for app example-app
$ vault read auth/kubernetes/role/vault-secrets-operator-example-app-role
Key Value
--- -----
alias_name_source serviceaccount_uid
audience vault
bound_service_account_names [default]
bound_service_account_namespaces [example-namespace-for-crossplane-vault-secrets-demo]
token_bound_cidrs []
token_explicit_max_ttl 0s
token_max_ttl 0s
token_no_default_policy false
token_num_uses 0
token_period 0s
token_policies [vault-secrets-operator-example-app-policy]
token_ttl 24h
token_type default
$ vault kv put -mount app-example-app-kv example-secret key=value-but-make-it-secret
============= Secret Path =============
app-example-app-kv/data/example-secret
======= Metadata =======
Key Value
--- -----
created_time 2024-05-09T05:53:59.20680794Z
custom_metadata <nil>
deletion_time n/a
destroyed false
version 1
$ kubectl get secrets
No resources found in example-namespace-for-crossplane-vault-secrets-demo namespace.
$ cat <<EOF | kubectl apply -f -
apiVersion: secrets.hashicorp.com/v1beta1
kind: VaultStaticSecret
metadata:
name: vault-kv-app
namespace: example-namespace-for-crossplane-vault-secrets-demo
spec:
type: kv-v2
mount: app-example-app-kv
path: example-secret
destination:
name: secretkv
create: true
refreshAfter: 30s
vaultAuthRef: vault-auth-example-app
EOF
vaultstaticsecret.secrets.hashicorp.com/vault-kv-app created
$ kubectl get VaultStaticSecret
NAME AGE
vault-kv-app 6s
$ kubectl get secrets
NAME TYPE DATA AGE
secretkv Opaque 2 23s
$ kubectl get secret secretkv -o jsonpath='{.data.key}' | base64 -d
value-but-make-it-secret
```
Almost all of the steps above were executed "as if" I was a memeber of the App Team. The Platform Team (or, more accurately, automation owned by the Platform Team, but triggered during Application Creation via the Developer Platform like [Backstage](https://backstage.io/)) should take care of creating the Namespace, but everything else - creating the `BaseAppInfra`, populating the Vault Secret, and creating the `VaultStaticSecret` - are tasks that the App Team can handle.
# Next Steps and Further Thoughts
* Unwinding my yak-shaving-stack by another level, my motivation for injecting secrets from Vault was to be able to set up [Velero](https://velero.io/) with AWS Credentials so I can back up my PVs to S3. Most of my pods are using my TrueNAS cluster as a persistent storage provider (thanks to [this great walkthrough](https://jonathangazeley.com/2021/01/05/using-truenas-to-provide-persistent-storage-for-kubernetes/)), with RAID for redundancy[^raidz1], so they should be _reasonably_ durable - but, backups are still important!
* I should probably export the ZFS Snapshots off-site as well. The task stack never ends...
* My system's getting complex enough that an architecture diagram in the [README](https://gitea.scubbo.org/scubbo/helm-charts) would be useful - at least, as a reminder to myself of what tools I have running, even if no-one else would be interested!
* Because I'm using an [App Of Apps Pattern](https://argo-cd.readthedocs.io/en/stable/operator-manual/cluster-bootstrapping/), I don't need to create Argo Applications[^argo-application] and Policies for the applications - but, for setups that don't use that pattern (like, say, my work :P ), those resources should also be part of the Base Infrastructure. Assumable (Vault) Roles for the application _itself_ to use would also be good.
* This setup defines a Composition that any App Team can create (via Claim), but I haven't looked into how to prevent (non-admin) users from creating arbitrary Managed Resources (outside the scope of a Composition). That is, there's nothing to prevent a user from using Crossplane to create a Vault Policy that has global access, creating a Vault Role using that Policy that's available to their namespace, and wreaking havoc. I suspect this would be a use-case for [Kyverno](https://kyverno.io/), [OpenPolicyAgent](https://www.openpolicyagent.org/docs/latest/kubernetes-introduction/), or other policy tools.
* Several fields in the [Composition](https://gitea.scubbo.org/scubbo/helm-charts/src/commit/e798564692f71187e3ff3f9d77f3aa1c46ca9ee4/charts/vault-crossplane-integration/base-app-infra.yaml) are mutually-dependent. For instance, the name of the Vault Role ([line 71](https://gitea.scubbo.org/scubbo/helm-charts/src/commit/e798564692f71187e3ff3f9d77f3aa1c46ca9ee4/charts/vault-crossplane-integration/base-app-infra.yaml#L71)) must be referenced by the VaultAuth on [line 166](https://gitea.scubbo.org/scubbo/helm-charts/src/commit/e798564692f71187e3ff3f9d77f3aa1c46ca9ee4/charts/vault-crossplane-integration/base-app-infra.yaml#L166), and the name of the Vault Policy ([line 128](https://gitea.scubbo.org/scubbo/helm-charts/src/commit/e798564692f71187e3ff3f9d77f3aa1c46ca9ee4/charts/vault-crossplane-integration/base-app-infra.yaml#L128)) must be assigned to the Role on [line 79](https://gitea.scubbo.org/scubbo/helm-charts/src/commit/e798564692f71187e3ff3f9d77f3aa1c46ca9ee4/charts/vault-crossplane-integration/base-app-infra.yaml#L79). I'd _love_ to use [cdk8s](https://cdk8s.io/) to _define_ the resources instantiated by Crossplane, so that these dependencies can be made explicit, rather than incidental. As a coworker of mine is fond of proclaiming, "_YAML is the assembly language of Cloud-Native_" - although it's a universally-comprehended language that tools can use to communicate, we as human developers should be using higher-level tools and abstractions.
* I've still only used the Secrets Operator to inject static secrets. I'd be interested to see how [Dynamic Secrets](https://developer.hashicorp.com/vault/tutorials/kubernetes/vault-secrets-operator#setup-dynamic-secrets) - for secret values which have to change over time, such as TTL'd creds for other services - would work. According to the [docs](https://kubernetes.io/docs/concepts/configuration/secret/#editing-a-secret), "_updates to existing `Secret` objects are propagated automatically to Pods that use the data_", which is pretty cool.
* An alternative would be to use the [Vault Sidecar Injector service](https://developer.hashicorp.com/vault/tutorials/kubernetes/kubernetes-sidecar) to directly inject and update Vault secret values into the Pod. At first glance, I'd be averse to running both this _and_ Vault Secrets Operator - I'd prefer to have one-and-only-one way of getting Vault Secrets into Kubernetes, and VSO (plus native Secret mounting) seems to cover all use-cases whereas Vault Sidecare only covers injection (and not setting Secret values as env variables) - but, it's always good to know the alternative options!
[^platform-vs-app-team]: In a production setup in a fully-operationalized company, these tasks would be carried out by the Platform team and by the App team, respectively. Obviously in my own homelab setup, I fulfil both roles - but if it ever seems odd in this article that I'm jumping through hoops to keep permission segregated "from myself", keep in mind that I'm effectively "acting as" two different teams.
[^namespaces-per-application]: or, for grants that should be available to all stages of the application, "_each set of namespaces which correspond with a single application_". IDK if this is an industry-standard Best Practice, but the norm at work is to have k8s namespaces `foo-application-dev`, `foo-application-qa`, and `foo-application-prod` for each of the stages of the application, which seems like a sensible way to limit blast radius of changes. I wonder if there's a k8s-native concept of "namespace hierarchies", where you could define (say) a parent namespace `foo-application` (which "contains" the three leaf namespaces), and havy any permission grants "trickle down" to its children.
[^sub-namespace-permissions]: not relevant right now, but I wonder if there's a use-case for even stricter restrictions than just the namespace granularity. I can imagine a case where there are several pods/jobs within a(n application-stage - that is, within a) namespace, but where a given secret should only be accessible to a subset of them. Something for Future-Jack to look into! That level of restriction would presumably be handled at the k8s-level, not the Vault level - the App/Platform boundary interface ensures only that the right secrets are available to the right App, and then the App itself (via k8s) is responsible for further scope-restrictions.
[^limited-vault-provider]: which is fair - it's a CRD inherent to the Vault Secrets Operator and is an object which exists "in" Kubernetes, not in the external service Vault itself
[^raidz1]: Only RAIDZ1, which is apparently [frowned upon](https://serverfault.com/questions/634197/zfs-is-raidz-1-really-that-bad) - but, given that I'm paying for my own hardware rather than designing it for a corporate budget, I'm making a tradeoff between redundancy and cost-of-drives.
[^argo-application]: Argo is pretty great as a tool, but I will _never_ forgive them for the heinous naming decision of giving the name "Application" to "_a single stage/deployment of an application_"
<!--
Reminders of patterns you often forget:
Images:
![Alt-text](url "Caption")
Internal links:
[Link-text](\{\{< ref "/posts/name-of-post" >}})
(remove the slashes - this is so that the commented-out content will not prevent a built while editing)
-->

@ -1,26 +0,0 @@
---
title: "Books as Vehicles"
date: 2023-12-11T20:19:13-08:00
tags:
- snippets
---
"The Liar", Stephen Fry's first novel follows a Wildean young man studying language at Cambridge University. I wonder where he got his inspiration.
<!--more-->
This passage covers our hero Adrian's meeting with his Senior Tutor, philology professor Trefusis:
> Trefusis's quarters could be described in one word.
>
> Books.
>
> Books and books and books. And then, just when an observer might be lured into thinking that that must be it, more books.
>
> Barely a square inch of wood or wall or floor was visible. Walking was only allowed by pathways cut between the piles of books. Treading these pathways with books waist-high either side was likenegotiating a maze. Trefusis called the room his 'librarinth'. Areas where seating was possible were like lagoons in a coral strand of books.
>
> Adrian supposed that any man who could speak twenty-three languages and read forty was likely to collect a few improving volumes along the way. Trefusis himself was highly dismissive of them.
>
> 'Waste of trees,' he had once said. 'Stupid, ugly, clumsy, heavy things. The sooner technology comes up with a reliable alternative the better.'
>
> Early in the term he had flung a book at Adrian's head in irritation at some crass comment. Adrian had caught it and been shocked to see that it was a first edition of _Les Fleurs de Mal_.
>
> 'Books are not holy relics,' Trefusis had said. 'Words may be my religion, but when it comes to worship, I am very low church. The temples and the graven images are of no interest to me. The superstitious mammetry of a bourgeois obsession for books is severely annoying. Think how many children are put off reading by prissy little people ticking them off whenever they turn a page carelessly. The world is so fond of saying that books should be "treated with respect". But when are we told that _words_ should be treated with respect? From our earliest years we are taught to revere only the outward and visible. Ghastly literary types maundering on about books as "objects". Yes, that does happen to be a first edition. A present from Noel Annan, as a matter of fact. But I assure you that a foul yellow _livre de poche_ would have been just as useful to me. Not that I fail to appreciate Noel's generosity. A book is a piece of technology. If people wish to amass them and pay high prices for this one or that, well and good. But they can't pretend that it is any higher or more intelligent a calling than collecting snuff-boxes or bubble-gum cards. I may read a book, I may use it as an ashtray, a paperweight, a doorstop or even as a missile to throw at silly young men who make fatuous remarks. So. Think again.' And Adrian had thought again.

@ -3,7 +3,6 @@ title: "Cloudflare Tunnel DNS"
date: 2022-08-22T16:05:39-07:00
tags:
- homelab
- k8s
- meta
---

@ -1,11 +0,0 @@
---
title: "Conditional Cleanups in Pytest"
date: 2024-04-28T16:55:37-07:00
tags:
- python
- testing
---
A helpful pattern in testing is to take some cleanup action _only_ if the test passes/fails. For instance, for a test which interacts with an on-filesystem database, the database should be deleted if the test passes, but it should stick around if the test fails so that the developer can examine it and debug.
<!--more-->
In JUnit, this is possible [via a `@Rule`](http://www.thinkcode.se/blog/2012/07/08/performing-an-action-when-a-test-fails), but as far as I can tell there's no pre-built equivalent in Python's `pytest`. I did find [this StackOverflow answer](https://stackoverflow.com/a/69283090/1040915) describing an approach using the [`pytest_runtest_makereport`](https://docs.pytest.org/en/latest/reference/reference.html#pytest.hookspec.pytest_runtest_makereport) hook, though the syntax appears to have changed since that answer. I put together an example implementation [here](https://gitea.scubbo.org/scubbo/pytest-conditional-cleanup-demo), which also adds the ability for fixtures _and_ tests to add "cleanup" actions to a stack, which will be executed in reverse order.

@ -3,9 +3,6 @@ title: "Edit Command Line in Zsh"
date: 2022-07-10T00:25:21-07:00
---
**EDIT 2024-04-16**: turns out that there's a [built-in](https://www.computerhope.com/unix/uhistory.htm), `fc`, which does basically the same thing, though it edits the command that was _just entered_ (which is typically what you want when you encounter an error or want to do "the next thing", anyway).
While reading through my dotfiles, I found some [configuration](https://github.com/scubbo/dotfiles/blob/690f907f9ae36e36fed9851eac3a4ff2c20d7905/zshrc-local-mactop#L144-L147)[^1] that didn't seem to be working - it claimed that `<ESC>,v` would allow editing of the current line in vim, but that didn't seem to work. I guess I'd copied that from some other configuration and lost patience with trying to get it working, or that it relied on some other configuration option which had been broken[^2]. I dug in to find out more. ([This article](https://thevaluable.dev/zsh-line-editor-configuration-mouseless/) was invaluable!)
<!--more-->
## Intention
@ -56,4 +53,4 @@ bindkey -M viins '^U' kill-whole-line
[^1]: Still in Github until I fully migrate to my self-hosted [Gitea](https://gitea.scubbo.org/) instance. I'm cautious of a circular dependency here - Gitea would need to be up-and-available to source dotfiles, but dotfiles would be referenced as part of the setup process for hosts (including the one that runs the Gitea instance).
[^2]: An idea - regression testing for dotfiles? Don't tempt me...
[^3]: The [article](https://thevaluable.dev/zsh-line-editor-configuration-mouseless/) says that the ZLE _is_ the command prompt, which...seems unlikely to me? I would think that the ZLE is a part _of_ the command prompt, but not all of it? Although the article contains a lot of useful information and insight, it also has some rather loose and imprecise statements, so I'm not sure how much to trust this.
[^4]: `bindkey -v` is an alias for `bindkey -A viins main` - in ZLE, you don't set a keymap as active, instead you set a keymap as an alias for `main`, and I think that's beautiful.
[^4]: `bindkey -v` is an alias for `bindkey -A viins main` - in ZLE, you don't set a keymap as active, instead you set a keymap as an alias for `main`, and I think that's beautiful.

@ -3,7 +3,6 @@ title: "Grafana Oncall"
date: 2022-09-13T10:52:53-07:00
tags:
- homelab
- k8s
- observability
---

@ -1,51 +0,0 @@
---
title: "Jsonnet in Argocd"
date: 2024-03-12T18:55:26-07:00
draft: true
tags:
- CI/CD
- communication
- end-of-year-wrapups
- homelab
- information-management
- k8s
- leisure
- mathematics
- MentalHealth
- meta
- observability
- politics
- productivity
- programming-challenges
- programming-language-design
- reading
- real-life
- rust
- SDLC
- short-thoughts
- snippets
- transhumanism
- web3
- wordle
---
This is the introduction
<!--more-->
And this is the rest of the content
Notes for post:
* [Main page](https://jsonnet.org)
* [Helpful reference](https://dev.to/kubeden/jsonnet-adventures-deploying-our-application-to-argocd-4fk2)
* [ArgoCD diff](https://codefresh.io/blog/argo-cd-preview-diff/)
* Gotcha that the `argocd` CLI tool doesn't work unless you are in the right Kubernetes namespace - otherwise you get `FATA[0000] error retrieving argocd-cm: configmap "argocd-cm" not found`
<!--
Reminders of patterns you often forget:
Images:
![Alt-text](url "Caption")
Internal links:
[Link-text](\{\{< ref "/posts/name-of-post" >}})
(remove the slashes - this is so that the commented-out content will not prevent a built while editing)
-->

@ -1,19 +0,0 @@
---
title: "Keycloak Backup"
date: 2024-04-06T17:34:34-07:00
tags:
- homelab
- keycloak
- k8s
---
Setting up regular backup for my [Keycloak installation]({{< ref "/posts/oidc-on-k8s" >}}) was a lot trickier than I expected!
<!--more-->
Although there is a `kc.sh export` command on the image, there's a [long-standing bug](https://github.com/keycloak/keycloak/issues/14733) whereby the export process and the server clash for the same port. I went [down the rabbit-hole](https://github.com/keycloak/keycloak/issues/28384) with the Keycloak folks trying to workaround that - only to realize that, because the image doesn't come with `cron` installed, I wouldn't be able to schedule the `kc.sh export` on the main pod _anyway_, but would have to schedule it externally.
A [Kubernetes CronJob](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/) was the obvious solution - but there were several hoops to jump through first:
* I needed to replicate an `initContainer` which, so far as I can tell, just [copies a directory](https://github.com/bitnami/charts/blob/main/bitnami/keycloak/templates/statefulset.yaml#L100) into [a PV](https://github.com/bitnami/charts/blob/main/bitnami/keycloak/templates/statefulset.yaml#L113-L115), only for that same PV [to get mounted at the original path again in the main container](https://github.com/bitnami/charts/blob/main/bitnami/keycloak/templates/statefulset.yaml#L284-L286)
* I couldn't just run `kc.sh export` as I had on the primary pod, but had to explicitly pass `--db`, `--db-username`, and `--db-password`. This is _probably_ self-evident if you understand the architecture of Keycloak, but wasn't obvious to me - the initial attempts to export from the main pod were failing because of a port clash, so "obviously" (scare-quotes because this is apparently wrong!) it was pulling data from some external datasource rather than from a local source.
* Since I was having the CronJob write out to an NFS-mounted volume (on my NAS), I needed to specify `securityContext.runAsUser` and `securityContext.fsGroup` on the `container`, and ensure that the corresponding values were set on the directory in the NAS's local filesystem, otherwise the CronJob would be denied write permission (thanks to [this SO question](https://stackoverflow.com/questions/50156124/kubernetes-nfs-persistent-volumes-permission-denied) for helping me figure this out - NFS permissions are beginning to make sense to me, but I'm still getting my head around it!)
My solution is [here](https://gitea.scubbo.org/scubbo/helm-charts/src/branch/main/app-of-apps/keycloak-backup.yaml). It's not perfect (I'd love to find a way to run `$(date +%s)` in the `args` to name the files according to date, and this setup breaks the neat "app-of-apps" setup I have going because this didn't seem deserving of a full Chart setup), but it works! It'd be really cool to contribute this to the [Bitnami Chart](https://github.com/bitnami/charts/tree/main/bitnami/keycloak) - I'm imagining a `backup` namespace in the `values.yaml` specifying schedule, persistent volume specs, and realms. Shouldn't be _too_ hard...

@ -1,88 +0,0 @@
---
title: "OIDC on K8s"
date: 2024-04-01T20:36:32-07:00
tags:
- dns
- homelab
- keycloak
- k8s
---
I just configured OIDC login for the first service on my Homelab.
<!--more-->
The first step was picking a provider - but thanks to the [awesome self-hosting guide](https://github.com/awesome-foss/awesome-sysadmin?tab=readme-ov-file#identity-management---single-sign-on-sso), I'd already narrowed it down to a shortlist, and [this post](https://old.reddit.com/r/selfhosted/comments/ub7dvb/authentik_or_keycloak/) helped me pick Keycloak over Authentik.
Unusually, there's no Helm chart listed in the [getting started guide](https://www.keycloak.org/getting-started/getting-started-kube), but the old standby of [Bitnami](https://github.com/bitnami/charts/tree/main/bitnami/keycloak) had an offering (though they did weirdly change the admin username from `admin` to `user`, which threw me off at first). [Installation via GitOps](https://gitea.scubbo.org/scubbo/helm-charts/commit/1d56a131b71315fb3c1fb2a3b2b39d099b0f605d) was a breeze now that I'm using [jsonnet](https://jsonnet.org/) to extract common Application setup boilerplate - though I did have to upgrade my ArgoCD installation from `2.7` to `2.10` to make use of `valuesObject` configuration.
The first application I wanted to integrate was Argo itself[^jellyfin-plugin], and thankfully there's a step-by-step [guide](https://argo-cd.readthedocs.io/en/stable/operator-manual/user-management/keycloak/) available, which..._mostly_ worked[^realm].
## [It's not DNS...](https://www.cyberciti.biz/media/new/cms/2017/04/dns.jpg)
I did run into a problem, though - I'd entered an override on my OpnSense router (running Upbound DNS) for `keycloak.avril`[^avril] pointing to the k8s cluster, so that I could access it from my browser - but, apparently, the pods on the cluster don't delegate to that resolver, so I got an error `Failed to query provider "http://keycloak.avril/realms/avril": Get "http://keycloak.avril/realms/avril/.well-known/openid-configuration": dial tcp: lookup keycloak.avril on 10.43.0.10:53: no such host` when trying to login via SSO. At first I tried setting the `issuer` value in Argo's `oidc.config` to `http://keycloak.keycloak` rather than `http://keycloak.avril` (i.e. using the k8s internal DNS name for the service), which allowed Argo to talk to Keycloak, but then gave a DNS error when my _browser_ tried to connect to that host. I could have worked around this by also setting a `keycloak.keycloak` DNS override on the OpnSense Unbound resolver, but that felt hacky - and, besides, I wanted to understand Kubernetes' DNS setup a little better.
[This SO answer](https://stackoverflow.com/a/65338650/1040915) looked promising as a way to set overrides for k8s' CoreDNS - but, since my ConfigMap already had a `hosts` entry (presumably provided by [k3s](https://k3s.io/)?):
```
data:
Corefile: |
.:53 {
errors
health
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
}
hosts /etc/coredns/NodeHosts {
ttl 60
reload 15s
fallthrough
}
prometheus :9153
forward . /etc/resolv.conf
cache 30
loop
reload
loadbalance
import /etc/coredns/custom/*.override
}
import /etc/coredns/custom/*.server
NodeHosts: |
192.168.1.70 host1
192.168.1.71 host2
...
```
I got an error `plugin/hosts: this plugin can only be used once per Server Block` when trying to add another (I'm not sure why that restriction exists tbh - the [docs](https://coredns.io/plugins/hosts/) make it clear that the plugin can be limited to a zone, so it seems reasonable to have multiple entries for multiple zones?). Handily, though, the plugin also allows listing overrides inline, so I was able to add an entry for `keycloak.avril` and everything worked as-desired!
```
...
hosts /etc/coredns/NodeHosts {
192.168.1.70 keycloak.avril
ttl 60
reload 15s
fallthrough
}
...
...
```
That worked, but still felt hacky. Now I was managing DNS overrides in two places rather than one. The docs do list a `forward` [plugin](https://coredns.io/manual/configuration/#forwarding) which looks like it should do what I want - but adding that (and removing the manual override in `hosts`):
```
...
forward avril 192.168.1.1 # My OpnSense router IP
forward . /etc/resolv.conf
...
```
...gave a slightly different error `failed to get token: Post "http://keycloak.avril/realms/avril/protocol/openid-connect/token": dial tcp: lookup keycloak.avril on 10.43.0.10:53: no such host` during callback in the OIDC process. Even opening up the `forward` to operate for all names (`.`) failed (though in this case it was back to the `Failed to query provider...` error).
🤷🏻 at some point you've just gotta take a working solution (the inlined entry in `hosts`) and move forwards with it! This duplication isn't _too_ bad - I doubt there'll be _another_ system (other than OIDC) where I'll need both pods and my browser to be able to use the same DNS name. If there is, I'll return to this problem and try to crack it.
I do also see an `import /etc/coredns/custom/*.override` line in that configuration, which would be another promising avenue of investigation - and, hey, if I realize my intention of managing Unbound DNS entries via [Crossplane](https://www.crossplane.io/), both the Router Overrides and the CoreDNS configuration could be generated from the same source.
[^jellyfin-plugin]: Jellyfin would probably be next, though it looks like that's not natively supported and requires a [plugin](https://github.com/9p4/jellyfin-plugin-sso), or maybe "Keycloak + OpenLDAP" as per [here](https://old.reddit.com/r/selfhosted/comments/ed1z9e/sso_with_authorization_for_jellyfin_ombi_sonarr/fbffkfp/) - though at this point I haven't researched the difference between LDAP and SSO.
[^realm]: _Don't_ follow their instructions to work in the default realm `master`, though! [Keycloak docs](https://www.keycloak.org/docs/latest/server_admin/#the-master-realm) make it clear that you should "_Use the `master` realm only to create and manage the realms in your system._"
[^avril]: As long-time readers will remember, the name that my partner and I use for our house - and so, the name I use for any domain/realm/namespace/zone on our network - is "Avril", because the house purchase process _went and made things so complicated..._

@ -1,6 +1,7 @@
---
title: "Pre-Pipeline Verification, and the Push-And-Pray Problem"
date: 2023-11-23T16:26:06-08:00
date: 2023-11-17T19:49:06-08:00
draft: true
tags:
- CI/CD
- SDLC
@ -13,41 +14,50 @@ It's fairly uncontroversial that, for a good service-deployment pipeline, there
The purpose of this testing is clear: it asserts ("_verifies_") certain correctness properties of the service version being deployed, such that any version which lacks those properties - which "is incorrect" - should not be deployed to customers. This allows promotion to be automated, reducing human toil and allowing developers to focus their efforts on development of new features rather than on confirmation of the correctness of new deployments.
<!--more-->
There's plenty of interesting nuance in the design of in-pipeline testing stages, but in this post I want to talk about the testing you do _before_ a pipeline - and, particularly, why it's important to be able to run Deployed Tests before submitting code.
There's plenty of interesting nuance in the design of in-pipeline testing stages - and although this article isn't _specifically_ about pipeline design, before moving on to the main premise I need to establish one related concept; that of Prod Fidelity.
## Definition of Deployed Testing
## Definition of Prod Fidelity
Categories of test are a fuzzy taxonomy - different developers will inevitably have different ideas of what differentiates a Component Test from an Integration Test, or an Acceptance Test from a Smoke Test, for instance - so, in the interests of clarity, I'm here using (coining?) the term "Deployed Test" to denote a test which can _only_ be meaningfully carried out when the service is deployed to hardware and environment that resembles those on/in which it runs in production. These typically fall into two categories:
* Tests whose logic exercises the interaction of the service with other services - testing AuthN/AuthZ, network connectivity, API contracts, and so on.
* Test that rely on aspects of the deployed environment - service startup configuration, Dependency Injection, the provision of environment variables, nuances of the excecution environment (e.g. Lambda's Cold Start behaviour), and so on.
We can think of the deployments[^twelve-factor-app] of an app as being characterized by selecting a set of values for some configuration variables - variables like "_fleet size_", "_stages used of dependency services_", and, most impactfully, "_what Load Balancer fronts this deployment (and is it one that serves traffic from paying, production customers)?_"[^deployed-image-should-not-be-gitops]. A deployment which perfectly mimics production _is_ production (and so is unsuitable for **pre-production** testing[^test-on-prod]); but, the more a deployment differs from production, the more likely that it will give misleading testing results. Some illustative examples:
Note that these tests don't have to _solely, specifically, or intentionally_ test characteristics of a prod-like environment to be Deployed Tests! Any test which _relies_ on them is a Deployed Test, even if that reliance is indirect. For instance, all Customer Journey Tests - which interact with a service "as if" a customer would, and which make a sequence of "real" calls to confirm that the end result is as-expected - are Deployed Tests (assuming they interact with an external database), even though the test author is thinking on a higher logical level than confirming database connectivity. The category of Deployed Tests is probably best understood by its negation - any test which uses mocked downstreams, and/or which can be simply executed from an IDE on a developer's workstation without any deployment framework, is most likely not a Deployed Test.
* Consider an overall system change C which is implemented by change A in service Alpha and by change B in service Beta, where Alpha depends on Beta. Assume that B is deployed to Beta's `pre-prod` stage, but not to Beta's `prod` stage. Consider a test (for behaviour implemented by C) which executes against a deployment of Alpha which a) has A deployed, and which b) hits Beta's `pre-prod` stage. This test will pass (the Alpha deployment has A, and the dependency deployment has B), but it would be incorrect to conclude from that passing test that "_it is safe to promote this version of Alpha to production_" - because Alpha's `prod` depends on Beta's `prod`, and the test made no assertion about whether B was deployed to Beta's `prod`. Thus, in general, the testing stage which makes the final "_is this version safe to promote to production?_" verification should depend on the production deployments of its dependencies.
Note also that, by virtue of requiring a "full" deployment, Deployed Tests typically involve invoking the service via its externally-available API, rather than by directly invoking functions or methods as in Unit Tests.
![Diagram of Promotion Testing](/img/Promotion-Testing.drawio.png "In Situation 1, Alpha's Pre-Prod depends on Beta's Pre-Prod, so a test of functionality requiring System Change C will pass on Pre-Prod; but if Change A is promoted to Alpha's Prod (Situation 2), the behaviour will fail, because Change B is not on Beta's Prod. Conversely, if Alpha's Pre-Prod depends on Beta's Prod (as in Situation 3), then the same test on Alpha's Pre-Prod will **correctly** fail until B is promoted to Beta's Prod")
* Non-prod deployments which are solely intended for testing might disable or loosen authentication, load-shedding/throttling, or other "non-functional" aspects of the service. While this can be sensible and justified if it leads to simpler operations, it can lead to blind-spots in testing around those very same aspects.
* Load Testing results must be interpreted with caution where the configuration of the deployments _and that of its dependencies_ does not match the configuration of `prod`. Even assuming that a service can handle traffic that scales linearly in the compute-size of the service (a justifiable though often-incorrect assumption), scaling your `prod` by a factor of N compared with your load-testing deployment does not guarantee you can handle N-times the traffic if your dependencies are not similarly scaled!
You've probably already guessed, but, to be explicit - I define **Prod Fidelity** to mean "_the degree to which a deployment matches Prod's configuration_". This is not a universally objectively quantifiable value - I cannot tell you whether "_using the same AMIs as_ `prod`" is more or less impactful to Prod Fidelity for your service than "_having_ `DEBUG`_-level logging enabled_" - but, I suspect that _you_ have a decent idea of the relative importance of the particular variables for your service.
For the purposes of this article, it's not important to be able to give a number to Prod Fidelity - just to be able to compare it, to state that a given deployment has higher or lower Prod Fidelity than another. Generally speaking, as a software version progresses through the SDLC, it will be excecuted on deployments of increasing Prod Fidelity:
* Detecting logical errors (rather than errors in configuration, deployment, or infrastructure) _usually_ doesn't require high Prod Fidelity. High Prod Fidelity is generally more expensive - either in literal financial expense (running a deployment with an equal volume of equally-powerful compute hardware to Prod is more expensive than running a small set of "_good-enough to run tests on_"), or in operational complexity (a deployment which closely mimics Production in terms of functionality will require all the same functionality maintainance - authentication providers, certificate management, and so on.). _Ceteris Paribus_, it's preferable if an error can be detected _before_ the change
TK....hmmm. Maybe I a) need to reconsider this point (is there really value in a pipeline beyond Alpha/Beta/Gamma/Load-Test/One-Box/Prod), and b) should just cut this out entirely. But preserve it - it's an interesting idea (and good writing, and especially a good diagram!), but maybe not necessary to _this_ post.
## When do we do Deployed Testing? When _should_ we do it?
(consider load testing results, or tests which rely on incompletely-deployed behaviour in dependencies when the testing stages don't hit production dependencies). Given that tension, how closely should your testing stages mimic production? For stages which closely mimic production and which "_talk to_" production downstreams and datastores, how do you mark test traffic such that it doesn't distort those datasets or generate real financial transactions while still providing a high-fidelity test?
Deployed Testing most naturally occurs in the CD pipeline for the service. If you were to list the desired properties of a pipeline, right at the top would be "_It builds the application and deploys it to production_", but right below that would be "_...but before doing so, it deploys to a testing stage and runs tests to make sure the deployment to production will be safe_".
However! All too often I see this being the _only_ way that teams are able to run Deployed Tests - that they are literally unable to:
* create a deployment of the application whose artifact was built from the state of code currently on their local development machine
* run a Deployed Test suite against that deployment, where the logic of the tests again is determined by the state of code on their machine
The thinking seems to be that Deployed Tests will be executed in the pipeline _anyway_, so there's no point in running them beforehand - any "bad changes" will get caught and rolled back, so production will be protected. And this is true! But, by leaving the detection of issues until the last minute - when the change is _in_ the (single-threaded) pipeline and when any test failures will block other changes coming down the pipe; when other developers may have started developing against the changes already merged - the disruption of a failure is significantly higher. For low-confidence changes which relate to properties that are only testable in a Deployed Environment, developers have to "Push And Pray" - "_I **think** that this change is correct, but I have no way of verifying it, so I need to push it into the pipeline before I can get any feedback_". This cycle - push, observe failed test results, make local change, push again - might repeat multiple times before they get to working code, during which time the whole pipeline is unusable. They are effectively making the whole pipeline their personal development environment, blocking anyone else from deploying any changes or even making any code changes which depend on their (unstable) merged code.
TK Prod Fidelity increases
It's a small amount of extra effort, but it's _entirely_ worthwhile to set up the ability described in the preceding bullet points, whereby developers can run locally-defined tests against a locally-defined service[^running-locally] before even proposing the change for merging to `main`. Note that this testing is worthwhile in both directions - not only can the dev run existing tests against a new AppCode change to confirm that it's correct, but they can also run a new version of the **Test**Code against existing AppCode to ensure that it operates as-expected!
## Definition of Deployed Testing
Categories of test are a fuzzy taxonomy - different developers will inevitably have different ideas of what differentiates a Component Test from an Integration Test, or an Acceptance Test from a Smoke Test, for instance - so, in the interests of clarity, I'm here using (coining?) the term "Deployed Test" to denote a test which can _only_ be meaningfully carried out when the service is deployed to hardware and environment that resembles those on/in which it runs in production. These typically fall into two categories:
* Tests whose logic exercises the interaction of the service with other services - testing AuthN/AuthZ, network connectivity, API contracts, and so on.
* Test that focus _on_ aspects of the deployed environment - service startup configuration, Dependency Injection, the provision of environment variables, nuances of the excecution environment (e.g. Lambda's Cold Start behaviour), and so on.
Note that these tests don't have to _solely, specifically, or intentionally_ test characteristics of a prod-like environment to be Deployed Tests! Any test which _relies_ on them is a Deployed Test, even if that reliance is indirect. For instance, all Customer Journey Tests - which interact with a service "as if" a customer would, and which make a sequence of "real" calls to confirm that the end result is as-expected - are Deployed Tests (assuming they interact with an external database), even though the test author is thinking on a higher logical level than confirming database connectivity. The category of Deployed Tests is probably best understood by its negation - any test which uses mocked downstreams, and/or which can be simply executed from an IDE on a developer's workstation without any deployment framework, is most likely not a Deployed Test.
Note also that, by virtue of requiring a "full" deployment, Deployed Tests typically involve invoking the service via its externally-available API, rather than by directly invoking functions or methods as in Unit Tests.
## Ephemeral Environments are great, but are not enough
Typically, a change which proceeds through the SDLC will undergo testing which has higher Prod Fidelity
A closely-related topic is "_building and deploying the code associated with a Pull Request, running tests against it (and reporting on them in the Pull Request), and providing a URL where stakeholders can experimentally interact with the service (or, more commonly, website)_" (I don't know of a general term for this, but it's called "Ephemeral Environments" at my current workplace, hence the section title). This is a great practice! Anything you can do to give high-quality testing _early_ in the SDLC - critically, _before_ merging into `main` (after which the impact of a rollback or correction is much higher) - is valuable, particularly if it involves getting explicit signoff from a stakeholder that "_yep, that was what I expected from this change_".
On the spectrum of Prod Fidelity (see the footnote[^multiple-footnote-link] linked from the second paragraph), Deployed Testing falls more towards the high-fidelity end.
However, there should be no need to involve a remote repository system (GitHub etc.) in the process of creating and testing a personal deployment. It _works_, but it's an extra step of unnecessary indirection:
* For any non-Cloud-based system, running an instance of the application from code you have built locally should be trivial - if it's not just `docker build ... && docker run ...`, there should be a very small number of scriptable steps.
* Even for apps that deploy to AWS, GCP, or another Cloud Provider, it should be possible to locally-build AppCode updates, and push the Docker image (or other artifact) to your personal testing deployment without getting GitHub/CodeCommit/CodePipeline involved.
* Testing of infrastructure changes are a little trickier, but depending on your IaC configuration _could_ still be possible - though at that point the creation of a deployment pipeline _for_ a personal testing environment is probably worthwhile.
TK differntiate from Ephemeral Environments for acceptance
Don't get me wrong, PR-related Ephemeral Environments are excellent for what they are, and I heartily recommend them - but if you don't know how to build and deploy your application _from your laptop_ without getting GitHub involved, you probably don't know[^knowledge-is-distributed] it well enough to properly operate it at all. Or, you may be [over-applying GitOps](https://fosstodon.org/@scubbo/111112129591386185) under the mistaken assumption that _nothing_ about _any_ system, _anywhere_, should _ever_ be changed triggered by _anything_ except by a change to a Git repo. That's not even true for production systems[^not-everything-is-gitops], so it's _certainly_ not true for development systems which have made the trade-off of flexibility and agility at the cost of stability. By all means insist, on a rigorous, centralized, standardized, high-confidence, reproducible, audit-logged process (i.e. a GitOps-y one) for everything _after_ "merge to `main`" (and _especially_ regarding "deploy to `prod`) - but, for everything before that point in the SDLC, prefer agility and fast-feedback with as few moving parts as possible.
[^running-locally]: ideally, but not necessarily, _running_ locally as well - though if there are aspects of the deployment environment that mean this is impractical (like depending on Cloud resources, large scale, or particular architecture), this isn't necessary
[^knowledge-is-distributed]: where the definition of "know" is a little fuzzier than just "_have the knowledge immediately to-hand in your mind_". If that "knowledge" consists of "_I know the script I need to run_", then that's good enough for me - it can live in your "_exobrain_", the collection of cognition- and memory-enhancing/supporting tools and structures that you use to augment your natural human brain.
[^not-everything-is-gitops]: when a customer changes their settings in the Web UI, is that change stored into a Git Repo before being reflected? No, it just gets written to a database? OK, so you acknowledge that _some_ properties of the system can have authoritative sources that are not Git repos - now we're just quibbling about where the appropriate dividing line is drawn. Personally I have long believed that "_which image/version is deployed to which stage of a pipeline?_" is properly viewed as an emergent runtime property of the-pipeline-viewed-as-a-software-system-itself, rather than a statically-(Git-)defined property of the application - it is State rather than Structure - but to fully explore that deserves its own post.
[^twelve-factor-app]: I'm here using the definition from the [Twelve-Factor App](https://12factor.net/), that a Deploy(ment) is "_a running instance of the app[...]typically a production site, and one or more staging sites._". Personally I don't _love_ this definition - the intuitive meaning of "Deployment" for me is "_the act of updating the executable binaries on a particular fleet/subset of execution hardware, to a newer version of those binaries_", and I'm generally loathe to use a term whose term-of-art meaning significantly differs from (i.e. is not a sub/super-set of) the intuitive meaning unless there's clear value to doing so. In particular, I'm not aware of an alternative term for the process of "updating the binaries" , leading to the confusing possible statement "_I'm making a deployment of version 3.2 to the_ `pre-prod` _deployment_". However, the Twelve-Factor definition appears to be widely-used, and my best alternative "_stage_" only really applies within a pipeline, so I'll attempt to use it in an unambiguous way[^not-environment].
[^not-environment]: "Environment" - perhaps the most overloaded term in all software engineering, even worse than "Map" - is not even in the running as an alternative.
[^deployed-image-should-not-be-gitops]: I remain convinced that "_what image is deployed to this deployment?_" is _not_ a configuration variable defining a deployment, but rather is an emergent runtime property of the deployment pipeline regarded as an operating software system; it should be considered as State, not Structure. See my [previous article]({{< ref "/posts/ci-cd-cd, oh my" >}}) for more exploration of this - though, since it's been over a year since I wrote that, and I've now had experience of using k8s/Argo professionally, I'm long-overdue for a follow-up (spoiler alert - I think I was right the first time ;) ).
[^test-on-prod]: Another interesting topic that this post doesn't touch on - should you test on Production? (TL;DR - yes, but carefully, and not solely :P )

@ -1,97 +0,0 @@
---
title: "Project Management and Async Functions"
date: 2024-02-20T21:32:49-08:00
tags:
- homelab
- programming-language-design
- SDLC
---
In my greatest display yet of over-engineering and procrastinating-with-tooling, I've started self-hosting [OpenProject](https://www.openproject.org/) to track the tasks I want to carry out on my homelab (and their dependencies).
<!--more-->
![Screenshot of the OpenProject UI](/img/open-project-screenshot.png "Pictured - a very normal and rational and sensible thing to do")
Annoyingly, I didn't find out until _after_ installation that this system [lacks the main feature](https://community.openproject.org/topics/8612) that made me want to use a Project Management Solution™ over a basic old Bunch Of Text Files - dependency visualization and easy identification of unblocked tasks.
Fortunately, the system has an API (of course), and some time later I'd whipped up this little "beauty" to print out all the unblocked tasks (i.e. all those I could start work on immediately):
```python
#!/usr/bin/env python
import json
import os
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
PROJECT_ID=<id>
BASE_URL='http://url.to.my.openproject.installation'
API_KEY=os.environ['API_KEY']
def main():
all_tasks = _req(f'api/v3/projects/{PROJECT_ID}/work_packages')['_embedded']['elements']
unblocked_tasks = [
{
'id': elem['id'],
'title': elem['subject'],
'href': f'{BASE_URL}/work_packages/{elem["id"]}'
} for elem in all_tasks
if _task_is_unblocked(elem['id'])
]
print(json.dumps(unblocked_tasks, indent=2))
def _task_is_unblocked(task_id: int) -> bool:
relations_of_task = _req(f'api/v3/work_packages/{task_id}/relations')['_embedded']['elements']
urls_to_blockers_of_task = [
relation['_links']['from']['href']
for relation in relations_of_task
if relation['type'] == 'blocks'
and relation['_links']['to']['href'].split('/')[4] == str(task_id)]
return all([
_req(url)['_embedded']['status']['isClosed']
for url in urls_to_blockers_of_task])
def _req(path: str):
return requests.get(f'{BASE_URL}/{path}', auth=('apikey', API_KEY), verify=False).json()
if __name__ == '__main__':
main()
```
(Yes, I haven't installed TLS on my cluster yet. The task's right there in the screenshot, see!?)
This is, of course, inefficient as can possibly be\[citation needed\], as it doesn't use any parallelization for the _many_ network calls, nor any caching of often-referenced data. That's fine for now, as N is going to be real small for quite some time.
## Async functions as first-class language design
That actually gets me onto a different topic. For some years now I've enjoyed, shared, and referenced the [What Color Is Your Function?](https://journal.stuffwithstuff.com/2015/02/01/what-color-is-your-function/) article, which (spoilers - seriously, if you are a software engineer, go read it, it's good!) points out the ways in which building-in `async` functions to a language make things really awkward when the rubber hits the road. For a long time I really resented this `async` annotation (which I first encountered in JavaScript, but I then found out it has spread to [Python](https://docs.python.org/3/library/asyncio-task.html), too), as to me it seemed like unnecessary extra overhead - why should I have to annotate _every_ function in my call-stack with `async` just because they called an asynchronous function at some point in the stack? Why, in the following snippet, does `top_level()` have to be `async`, when all it's doing is a synchronous operation on an blocking function?
```python
#!/usr/bin/env python
import asyncio
async def top_level():
print(await mid_level() * 2)
async def mid_level():
return await bottom_level() + 1
async def bottom_level():
# Imagine that this called out to the network
# or did some other actually-async operation
return 1
if __name__ == '__main__':
asyncio.run(top_level())
```
I recently read [this article](https://blainehansen.me/post/red-blue-functions-are-actually-good/) which made the interesting case that `async` should be thought of as a member of the Type System, surfacing information about the behaviour of the associated function:
> Colored functions reveal important realities of a program. Colored functions are essentially a type-system manifestation of program effects, all of which can have dramatic consequences on performance (unorganized io calls can be a latency disaster), security (io can touch the filesystem or the network and open security gaps), global state consistency (async functions often mutate global state, and the filesystem isn't the only example), and correctness/reliability (thrown exceptions are a program effect too, and a `Result` function is another kind of color). Colored functions don't "poison" your program, they inform you of the _reality that your program itself_ has been poisoned by these effects.
I...can see where they're coming from, I guess? According to this viewpoint, `mid_level` should still be declared as `async`, even though it `await`s the actually-asynchronous function, because...the "network-call-ingness" of `bottom_level` propagates up to `mid_level`? I hadn't thought of it that way, but I can see that that's true. My local definition of `mid_level` does nothing asynchronous, but asynchonicity is transitive.
Not gonna lie, though, I still find the experience of writing `async`-ified code really frustrating. I begin writing out my logic in terms of (normal) functions and their interactions, traversing down/through the logic tree from high-level concepts down to implementations of API calls - at which point I hit a network call, and then am forced to traverse back up the tree scattering `async`/`await`s everywhere where I previously had normal function declarations and invocations. Maybe - and I'm half-joking, half-serious here - I should just _start_ writing the program "as if" it was going to be asynchronous in the first place? I wonder what would change then.

@ -1,47 +0,0 @@
---
title: "PVC Debug Pod"
date: 2024-03-04T22:05:41-08:00
tags:
- k8s
---
I've been annoyed sufficiently-often by the fact that there is no single `kubectl` command to "_create a pod, and attach a PVC to it_" that I threw together the following script:
<!--more-->
```bash
#!/bin/bash
set -ex
# This script assumes the existence and correct configuration of `kubectl` and `fzf`.
# TODO - cool feature would be to grab namespaces with `kubectl get ns` and pipe through `fzf` to select - but, 99% of the time, this'll just be for the current namespace anyway
PVC_TO_MOUNT=$(kubectl get pvc --no-headers | awk '{print $1}' | fzf)
POD_CREATE_OUTPUT=$(cat <<EOF | kubectl create -f -
apiVersion: v1
kind: Pod
metadata:
generateName: debug-pod-
spec:
volumes:
- name: pvc
persistentVolumeClaim:
claimName: $PVC_TO_MOUNT
containers:
- name: debug-container
image: ubuntu
command: [ "/bin/bash", "-c", "--" ]
args: [ "while true; do sleep 30; done;" ]
volumeMounts:
- mountPath: "/mnt/pvc"
name: pvc
EOF
)
POD_NAME=$(echo $POD_CREATE_OUTPUT | awk '{print $1}')
kubectl wait --for=condition=Ready $POD_NAME
kubectl exec -it $POD_NAME /bin/bash
```
While researching it, I did find out that [Ephemeral Containers](https://kubernetes.io/docs/concepts/workloads/pods/ephemeral-containers/) are now a thing - but, given that they also don't appear to allow a PVC-mount in their `kubectl`-creation, I suspect you'd still have to create via `cat <<EOF | kubectl create`[^why-create] anyway.
[^why-create]: Why `create` and not `apply`? Because you can't use `generateName` with `apply`, and if I accidentally forget to tear down an pre-existing debug-pod I'd rather not be interrupted in what I'm doing. Arguably, though, that would be a good reminder to clean up after myself.

@ -4,7 +4,6 @@ date: 2023-02-07T19:52:44-08:00
tags:
- CI/CD
- homelab
- k8s
- observability
---

@ -3,7 +3,6 @@ title: "Secure Docker Registry"
date: 2022-07-01T21:26:32-07:00
tags:
- homelab
- k8s
---
Part of the self-hosted setup that supports this blog (along with all my other homelab projects) is a [Docker Registry](https://docs.docker.com/registry/) to hold the images built and used in the CI/CD pipeline. Recently I tried to install TLS certificates to secure interaction with the Registry, and it was a fair bit harder to figure out than I expected, so I wanted to write it up both for future-me and for anyone else struggling with the same problem.
<!--more-->

@ -3,7 +3,6 @@ title: "Self-Hosted Analytics"
date: 2022-08-02T20:23:48-07:00
tags:
- homelab
- k8s
- meta
---

@ -2,8 +2,6 @@
title: "Short Thoughts 2022-03-05"
date: 2023-03-05T19:49:27-08:00
tags:
- programming-challenges
- rust
- short-thoughts
---

@ -1,110 +0,0 @@
---
title: "Vault Secrets Into K8s"
date: 2024-04-21T19:51:06-07:00
tags:
- homelab
- k8s
- vault
---
Continuing my [recent efforts]({{< ref "/posts/oidc-on-k8s" >}}) to make authentication on my homelab cluster more "joined-up" and automated, this weekend I dug into linking Vault to Kubernetes so that pods could authenticate via shared secrets without me having to manually create the secrets in Kubernetes.
<!--more-->
As a concrete use-case - currently, in order for Drone (my CI system) to authenticate to Gitea (to be able to read repos), it needs OAuth credentials to connect. These are provided to Drone in [env variables, which are themselves sourced from a secret](https://gitea.scubbo.org/scubbo/helm-charts/src/commit/1926560274932d4cd052d2281cac82d4f33cacd3/charts/drone/values.yaml#L8-L9). In an ideal world, I'd be able to configure the applications so that:
* When Gitea starts up, if there is no OAuth app configured for Drone (i.e. if this is a cold-start situation), it creates one and writes-out the creds to a Vault location.
* The values from Vault are injected into the Drone namespace.
* The Drone application picks up the values and uses the to authenticate to Gitea.
I haven't taken a stab at the first part (automatically creating a OAuth app at Gitea startup and exporting to Vault), but injecting the secrets ended up being pretty easy!
# Secret Injection
There are actually three different ways of providing Vault secrets to Kubernetes containers:
* The [Vault Secrets Operator](https://developer.hashicorp.com/vault/tutorials/kubernetes/vault-secrets-operator), which syncs Vault Secrets to Kubernetes Secrets.
* The [Vault Agent Injector](https://developer.hashicorp.com/vault/docs/platform/k8s/injector), which syncs Vault Secrets to mounted paths on containers.
* The [Vault Proxy](https://developer.hashicorp.com/vault/docs/agent-and-proxy/proxy), which can act as a (runtime) proxy to Vault for k8s containers, simplifying the process of authentication[^provision].
I don't _think_ that Drone's able to load OAuth secrets from the filesystem or at runtime, so Secrets Operator it is!
![Vault Secrets operator](https://developer.hashicorp.com/_next/image?url=https%3A%2F%2Fcontent.hashicorp.com%2Fapi%2Fassets%3Fproduct%3Dtutorials%26version%3Dmain%26asset%3Dpublic%252Fimg%252Fvault%252Fkubernetes%252Fdiagram-secrets-operator.png%26width%3D321%26height%3D281&w=750&q=75 "Diagram of Vault Secrets Operator injection process")
The walkthrough [here](https://developer.hashicorp.com/vault/tutorials/kubernetes/vault-secrets-operator) was very straightforward - I got through to creating and referencing a Static Secret with no problems, and then tore it down and recreated via [IaC](https://gitea.scubbo.org/scubbo/helm-charts/commit/b856fd2bc5dd047ca93809bd102315cf867740d3). With that in place, it was pretty easy to (convert my [Drone specification to jsonnnet](https://gitea.scubbo.org/scubbo/helm-charts/commit/1926560274932d4cd052d2281cac82d4f33cacd3) and then to) [create a Kubernetes secret referencing the Vault secrets](https://gitea.scubbo.org/scubbo/helm-charts/commit/4c82c014f83020bad95cb81bc34767fef2c232c1). I deleted the original (manually-created) secret and deleted the Drone Pod immediately before doing so just to check that it worked - as I expected, the Pod failed to come up at first (because the Secret couldn't be found), and then successfully started once the Secret was created. Works like a charm!
## (Added 2024-04-29) Namespacing secrets
After attempting to use these Secrets for another use-case, I've run into a speed-bump: the `bound_service_account_namespaces` for the Vault role specifies which Kubernetes namespaces can use that Role to access secrets, but it's all-or-nothing - if a role is available to multiple namespaces, there's no way to restrict that a given namespace can only access certain secrets.
I haven't seen this explicitly stated, but it seems like the intended way to control access is to, create a different Vault Role for each namespace (only accessible _from_ that namespace), and to grant that Vault Role only the appropriate Vault policies.
Gee, if [only](https://www.crossplane.io/) there was a way to manage Vault entities via Kubernetes...😉
(Update 2024-05-11 - see [here]({{< ref "/posts/base-app-infrastructure" >}}) for a solution!)
# Further thoughts
## Type-safety and tooling
I glossed over a few false starts and speedbumps I faced with typoing configuration values - `adddress` instead of `address`, for instance. I've been tinkering with [`cdk8s`](https://cdk8s.io/) at work, and really enjoy the fact that it provides Intellisense for "type-safe" configuration values, prompting for expected keys and warning when unrecognized keys are provided. Jsonnet has been a great tool for factoring out commonalities in application definitions, but I think I'm overdue for adopting `cdk8s` at home as well! (And, of course, using [Crossplane](http://crossplane.io/) to define the initial Vault bootstrapping required (e.g. the `/kubernetes` auth mount) would fully automate the disaster-recovery case)
Similarly, it's a little awkward that the Secret created is part of the `app-of-apps` application, rather than the `drone` application. I structured it this way (with the Vault CRDs at the top-level) so that I could extract the `VaultAuth` and `VaultStaticSecret` to a Jsonnet definition so that they could be reused in other applications. If I'd put the auth and secret definition _inside_ the `charts/drone` specficiation, I'd have had to figure out how to create and publish a [Helm Library](https://helm.sh/docs/topics/library_charts/) to extract them. Which, sure, would be a useful skill to learn - but, one thing at a time!
## Dynamic Secrets
I was partially prompted to investigate this because of a similar issue we'd faced at work - however, in that case, the authentication secrets are dynamically-generated and short-lived, and client apps will have to refetch auth tokens periodically. It looks like the Secrets Operator also supports [Dynamic Secrets](https://developer.hashicorp.com/vault/tutorials/kubernetes/vault-secrets-operator#dynamic-secrets), whose "_lifecycle is managed by Vault and \[which\] will be automatically rotated_". This isn't _quite_ the situation we have at work - where, instead, a fresh short-lived token is created via a Vault Plugin on _every_ secret-read - but it's close! I'd be curious to see how the Secrets Operator can handle this use-case - particularly, whether the environment variable _on the container itself_ will be updated when the secret is changed.
### Immutable Secrets - what's in a name?
There's a broader question, here, about whether the value of secrets should be immutable over the lifespan of a container. [Google's Container Best Practices](https://cloud.google.com/architecture/best-practices-for-operating-containers#immutability)[^best-practices] suggest that "_a container won't be modified during its life: no updates, no patches, no configuration changes.[...]If you need to update a configuration, deploy a new container (based on the same image), with the updated configuration._". Seems pretty clear cut, right?
Well, not really. What _is_ the configuration value in question, here? Is it the actual token which is used to authenticate, or is it the Secret-store path at which that token can be found?
* If the former, then when the token rotates, the configuration value has been changed, and so a new container should be started.
* If the latter, then a token rotation doesn't invalidate the configuration value (the path). The application on the container can keep running - but will have to carry out some logic to refresh its (in-memory) view of the token.
When you start to look at it like that, there's plenty of precedent for "higher-level" configuration values, which are interpreted at runtime to derive more-primitive configuration values:
* Is the config value "_how long you should wait between retries_", or "_the rate at which you should backoff retries_"?
* Is it "_the colour that a button should be_", or "_the name of the A/B test that provides the treatments for customer-to-colour mappings_"?
* Is it "_the number of instances that should exist_", or "_the maximal per-instance memory usage that an auto-scaling group should aim to preserve_"?
Configuration systems that allow the behaviour of a system to change at runtime (either automatically in response to detected signals, or as induced by deliberate human operator action) provide greater flexibility and functionality. This fuctionality - which is often implemented by designing an application to regularly poll an external config (or secret) store for the more-primitive values, rather than to load them once at application startup - comes at the cost of greater tooling requirement for some desirable operational properties:
* **Testing:** If configuration-primitives are directly stored-by-value in Git repos[^secrets-in-code] and a deployment pipeline sequentially deploys them, then automated tests can be executed in earlier stages to provide confidence in correct operation before promotion to later ones. If an environment's configuration can be changed at runtime, there's no guarantee (unless the runtime-configuration system provides it) that that configuration has been tested.
* **Reproducibility:** If you want to set up a system that almost-perfectly[^almost-perfect-reproduction] reproduces an existing one, you need to know the configuration values that were in place at the time. Since time is a factor (you're always trying to reproduce a system that _existed at some time in the past_, even if that's only a few minutes prior), if runtime-variable and/or pointer-based configurations are effect, you need to refer to an audit log to know the actual primitives in effect _at that time_.
These are certainly trade-offs! As with any interesting question, the answer is - "_it depends_". It's certainly the case that directly specifying primitive configuration is _simpler_ - it "just works" with a lot of existing tooling, and generally leads to safer and more deterministic deployments. But it also means that there's a longer reflection time (time between "_recording the desire for a change in behaviour in the controlling system_" and "_the changed behaviour taking effect_"), because the change has to proceed through the whole deployment process[^deployment-process]. This can be unacceptable for certain use-cases:
* operational controls intended to respond in an emergency to preserve some (possibly-degraded) functionality rather than total failure.
* updates to A/B testing or feature flags.
* (Our original use-case) when an authentication secret expires, it would be unacceptable for a service that depends on that secret to be nonfunctional until configuration is updated with a new secret value[^overlap]. Much better, in this case, for the application _itself_ to refresh its own in-memory view of the token with a refreshed one. So, in this case, I claim that it's preferable to treat "_the path at which an authentication secret can be found_" as the immutable configuration value, rather than "_the authentication secret_" - or, conversely, to invert responsibility from "_the application is told what secret to use_" to "_the application is responsible for fetching (and refreshing) secrets from a(n immutable-over-the-course-of-a-container's-lifecycle) location that it is told_"
To be clear, though, I'm only talking here about authentication secrets that have a specified (and short - less than a day or so) Time-To-Live; those which are intended to be created, used, and abandoned rather than persisted. Longer-lived secrets should of course make use of the simpler and more straightforward direct-injection techniques.
### What is a version?
An insightful coworker of mine recently made the point that configuration should be considered an integral part of the deployed version of an application. That is - it's not sufficient to say "_Image tag `v1.3.5` is running on Prod_", as a full specification should also include an identification of the config values in play. When investigating or reasoning about software systems, we care about the overall behaviour, which arises from the intersection of code _and_ configuration[^and-dependencies], not from code alone. The solution we've decided on is to represent an "application-snapshot" as a string of the form `"<tag>:<hash>"`, where `<tag>` is the Docker image tag and `<hash>` is a hash of the configuration variables that configure the application's behaviour[^configuration-index].
Note that this approach is not incompatible with the ability to update configuration values at runtime! We merely need to take an outcome-oriented view - thinking about what we want to achieve or make possible. In this case, we want an operator investigating an issue to be prompted to consider proximate configuration changes if they are a likely cause of the issue.
* Is the configuration primitive one which naturally varies (usually within a small number/range of values) during the normal course of operation? Is it a "tuning variable" rather than one which switches between meaningfully-different behaviours? Then, do not include it as a member of the hash. It is just noise which will distract rather than being likely to point to a cause - a dashboard which records multiple version updates every minute is barely more useful than one which does not report any.
* Though, by all means log the change to your observability platform! Just don't pollute the valuable low-cardinality "application version" concept with it.
* Is the configuration primitive one which changes rarely, and/or which switches between different behaviours? Then, when it is changed (either automatically as a response to signals or system state; or by direct human intervention), recalculate the `<hash>` value and update it _while the container continues running_[^does-datadog-support-this].
[^provision]: Arguably this isn't "_a way of providing secrets to containers_" but is rather "_a way to make it easier for containers to fetch secrets_" - a distinction which actually becomes relevant [later in this post](#immutable-secrets---whats-in-a-name)...
[^best-practices]: And by describing _why_ that's valuable - "_Immutability makes deployments safer and more repeatable. If you need to roll back, you simply redeploy the old image._" - they avoid the [cardinal sin](https://domk.website/blog/2021-01-31-cult-of-best-practise.html) of simply asserting a Best Practice without justification, which prevents listeners from either learning how to reason for themselves, or from judging whether those justifications apply in a novel and unforeseen situation.
[^secrets-in-code]: which is only practical for non-secret values _anyway_ - so we must _always_ use some "pointer" system to inject secrets into applications.
[^almost-perfect-reproduction]: You almost-never want to _perfectly_ reproduce another environment of a system when testing or debugging, because the I/O of the environment is part of its configuration. That is - if you perfectly reproduced the Prod Environment, your reproduction would be taking Production traffic, and would write to the Production database! This point isn't just pedantry - it's helpful to explicitly list (and minimize) the _meaningful_ ways in which you want your near-reproduction to differ (e.g. you probably want the ability to attach a debugger and turn on debug logging, which should be disabled in Prod!), so that you can check that list for possible explanations if _your_ env cannot reproduce behaviour observed in the original. Anyone who's worked on HTTP/S bugs will know what I mean...
[^deployment-process]: where the term "deployment process" could mean anything from "_starting up a new container with the new primitive values_" (the so-called "hotfix in Prod"), to "_promoting a configuration change through the deployment pipeline_", to "_building a new image with different configuration 'baked-in' and then promoting etc...._", depending on the config injection location and the degree of deployment safety enforcement. In any case - certainly seconds, probably minutes, potentially double-digit minutes.
[^overlap]: An alternative, if the infrastructure allowed it, would be an "overlapping rotation" solution, where the following sequence of events occurs: 1. A second version of the secret is created. Both `secret-version-1` and `secret-version-2` are valid. 2. All consumers of the secret are updated to `secret-version-2`. This update is reported back to the secret management system, which waits for confirmation (or times out) before proceeding to... 3. `secret-version-1` is invalidated, and only `secret-version-2` is valid. Under such a system, we could have our cake and eat it, too - secrets could be immutable over the lifetime of a container, _and_ there would be no downtime for users of the secret. I'm not aware of any built-in way of implementing this kind of overlapping rotation with Vault/k8s - and, indeed, at first thought the "callbacks" seem to be a higher degree of coupling than seems usual in k8s designs, where resources generally don't "know about" their consumers.
[^and-dependencies]: Every so often I get stuck in the definitional and philosophical rabbit-hole of wondering whether this is _entirely_ true, or if there's a missing third aspect - "_behaviour/data of dependencies_". If Service A depends on Service B (or an external database), then as Service B's behaviour changes (or the data in the database changes), then a given request to Service A may receive a different response. Is "the behaviour" of a system defined purely in terms of "_for a given request, the response should be (exactly and explicitly) as follows..._", or should the behaviour be a function of both request _and_ dependency-responses? The answer - again, as always - is "it depends': each perspective will be useful at different times and for different purposes. Now that you're aware of them both, though, be wary of misunderstandings when two people are making different assumptions!
[^configuration-index]: which requires an enumeration of said variables to exist in order to iterate over them. Which is a good thing to exist anyway, so that a developer or operator knows all the levers they have available to them, and (hopefully!) has some documentation of their [intended and expected effects](https://brooker.co.za/blog/2020/06/23/code.html).
[^does-datadog-support-this]: I should acknowledge that I haven't yet confirmed that work's observability platform actually supports this. It would be a shame if they didn't - a small-minded insistence that "_configuration values should remain constant over the lifetime of a container_" would neglect to acknowledge the practicality of real-world usecases.
<!--
Reminders of patterns you often forget:
Images:
![Alt-text](url "Caption")
Internal links:
[Link-text](\{\{< ref "/posts/name-of-post" >}})
(remove the slashes - this is so that the commented-out content will not prevent a built while editing)
-->

@ -3,7 +3,6 @@ title: "VPN on Kubernetes"
date: 2022-12-15T22:28:24-08:00
tags:
- homelab
- k8s
---
I was surprised to find that there's not much discussion of putting Kubernetes pods behind a VPN. Given how useful both tools are, you'd think more people would use them in concert.

@ -1,35 +0,0 @@
---
title: "Work in a Post Scarcity Utopia"
date: 2023-12-18T03:05:19-08:00
tags:
- snippets
---
Another snippet from Iain M. Banks' wonderful "_Use Of Weapons_", detailing the adventures of the mercenary called Zakalwe within and around the interstellar post-scarcity AI-led super-high-tech Culture. Here, we see a flashback to his cultural adjustment period after being recruited.
<!--more-->
> He walked for days, stopping at bars and restaurants whenever he felt thirsty, hungry, or tired; mostly they were automatic and he was served by little floating trays, though a few were staffed by real people. They seemed less like servants and more like customers who'd taken a notion to help out for a while.
>
> '_Of course I don't have to do this,_' one middle-aged man said, carefully cleaning the table with a damp cloth. He put the cloth in a little pouch, sat down beside him. '_But look; this table's clean._'
>
> He agreed that the table was clean.
>
> '_Usually,_' the man said, '_I work on alien - no offence - alien religions; Directional Emphasis In Religious Observance; that's my speciality...like when temples or graves or prayers always have to face in a certain direction; that sort of thing? Well, I catalogue, evaluate, compare; I come up with theories and argue with colleagues, here and elsewhere. But...the job's never finished; always new examples, and even the old ones get re-evaluated, and new people come along with new ideas about what you thought was settled...but,_' he slapped the table, '_when you clean a table, you clean a table. You feel you've done something. It's an achievement._'
>
> '_But in the end, it's still just cleaning a table._'
>
> '_And therefore does not really signify on the cosmic scale of events?_' the man suggested.
>
> He smiled in response to the man's grin, '_Well, yes._'
>
> '_But then, what_ does _signify? My other work? Is that really important, either? I could try composing wonderful musical works, or day-long entertainment epics, but what would that do? Give people pleasure? My wiping this table gives me pleasure. And people come to a clean table, which gives_ them _pleasure. And anyway,_' the man laughed, '_people die; stars die; universes die. **What is any achievement, however great it was, once time itself is dead?** Of course, if_ all _I did was wipe tables, then of course it would seem a mean and despicable waste of my huge intellectual potential. But because I choose to do it, it gives me pleasure. And,_' the man said with a smile, '_it's a good way of meeting people. So; where are you from, anyway?_'
\[Emphasis mine\]
---
God I love this. It reminds me of a classic Tumblr exchange:
![I Simply Do Not Dream Of Labor](/img/do-not-dream-of-labor.jpg "If you think that you do not dream of labor...")
![Yes You Do](/img/dream-of-labor.jpg "...it may be that you have just never experienced a way in which labor can be productive and rewarding")
I am a _sucker_ for frame challenges and for re-evaluating the actual sources of issues - "_The problem is not X - X can be useful, helpful, and desirable. The problem is the context in which X currently exists negates its beneficial effects._". Plus, y'know - Fully Automated Luxury Gay Space Communism, always and forever the goal ✊🏻

@ -11,10 +11,3 @@
{{ if .Params.math }}{{ partial "helpers/katex.html" . }}{{ end }}
<script defer data-domain="blog.scubbo.org" src="https://tracking.scubbo.org/js/plausible.js"></script>
<!--
https://gohugo.io/templates/rss/
-->
{{ with .OutputFormats.Get "rss" -}}
{{ printf `<link rel=%q type=%q href=%q title=%q>` .Rel .MediaType.Type .Permalink site.Title | safeHTML }}
{{ end }}

@ -1,8 +0,0 @@
table {
border-collapse: collapse;
}
th, td {
border: 1px solid black;
padding: 3px;
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 417 KiB

36
np.sh

@ -0,0 +1,36 @@
#!/bin/bash
postName=$1
if [ -z $postName ]; then
echo "Usage: np.sh <postName>"
exit 1
fi
if [ "$#" -ne 1 ]; then
echo "Expected 1 arguments but found $# - exiting"
exit 1
fi
pushd blog > /dev/null
hugo new "posts/$postName.md"
outputLocation="content/posts/$postName.md"
# Use our own env variable to encode which editor
# should be used to edit blogposts. Setting $VISUAL
# to `subl` leads to it also being used by (among
# others) zsh's `edit-command-line`, which is
# undesired
if [ -n "$BLOG_EDITOR" ]; then
$BLOG_EDITOR $outputLocation
elif [ -n "$VISUAL" ]; then
$VISUAL $outputLocation
elif [ -n "$EDITOR" ]; then
$EDITOR $outputLocation
else
echo "No default editor set - falling back to Sublime"
# I expect this is only ever gonna be used by me anyway, so
# I might as well set my own preference as the default :P
subl $outputLocation
fi
popd > /dev/null
Loading…
Cancel
Save