Self-host upload to Google Analytics

Some client configurations (Browsers with tracking-blocking enabled, or
networks with [Pi-holes](https://pi-hole.net/)) will block any calls
to `www.google-analytics.com`. This makes me sad! I'd like to know
how many folks are reading my blog!

This commit represents a workaround. Instead of telling the client to
send data directly to Google (which would trigger Tracking-blockers), I
instead tell the client to send it to the `/analytics` endpoint on my
domain, which will then get forwarded to Google. Neat!

But! If a reader has blocked tracking, we should respect that! So I'm
not actually enabling this in my blog until I have also built an
"opt-in" mechanism, where readers can decide if they want to send me
tracking information or not.

"Why didn't you make tracking opt-in to begin with, Jack?" - well, I
kinda addressed that
[here](https://blog.scubbo.org/posts/commenting-enabled/), but you're
right that that would have been a little more ethical. Still - enabling
Google Analytics didn't feel _too_ bad (since anyone who cared about
detecting and blocking it would still be able to with standard methods -
and I assume that anyone reading my blog will know how to), whereas this
new approach feels like a circumvention of their agency.
attemptAtTagsRenovation
Jack Jackson 2 years ago
parent a4e844297a
commit abab21e297
  1. 5
      blog/layouts/partials/head-additions.html
  2. 90
      default.conf
  3. 15
      deployBlog.sh

@ -4,12 +4,14 @@
<!-- https://mertbakir.gitlab.io/hugo/math-typesetting-in-hugo/ -->
{{ if .Params.math }}{{ partial "helpers/katex.html" . }}{{ end }}
<!-- https://minimalanalytics.com/ -->
<!-- commenting this out for now, because I want to make analytics opt-in -->
<!--
<script>
(function(a,b,c){var d=a.history,e=document,f=navigator||{},g=localStorage,
h=encodeURIComponent,i=d.pushState,k=function(){return Math.random().toString(36)},
l=function(){return g.cid||(g.cid=k()),g.cid},m=function(r){var s=[];for(var t in r)
r.hasOwnProperty(t)&&void 0!==r[t]&&s.push(h(t)+"="+h(r[t]));return s.join("&")},
n=function(r,s,t,u,v,w,x){var z="https://www.google-analytics.com/collect",
n=function(r,s,t,u,v,w,x){var z="/analytics",
A=m({v:"1",ds:"web",aip:c.anonymizeIp?1:void 0,tid:b,cid:l(),t:r||"pageview",
sd:c.colorDepth&&screen.colorDepth?screen.colorDepth+"-bits":void 0,dr:e.referrer||
void 0,dt:e.title,dl:e.location.origin+e.location.pathname+e.location.search,ul:c.language?
@ -24,3 +26,4 @@
trackException:function q(r,s){return n("exception",null,null,null,null,r,s)}}})
(window,"UA-216020156-1",{anonymizeIp:true,colorDepth:true,characterSet:true,screenSize:true,language:true});
</script>
-->

@ -0,0 +1,90 @@
# This is almost-entirely the default.conf that
# comes with the base image. The only change is the
# addition of a proxy_pass statement at the end to
# forward any requests to `/analytics` on to Google
# Analytics, and the supporting `upstream` directive -
# see the blog post in the commit that
# introduced this for more explanation.
# https://stackoverflow.com/a/66703673/1040915
upstream ga {
# Note that this is hard-coded - I'm hosting this blog in
# a network that is served by a Pi-hole, so if I just
# put the domain name (`www.google-analytics.com`), it
# wouldn't resolve.
#
# If Google Analytics ever changes their IP, this
# hard-coded value would need to change.
server 142.250.189.238:443;
#server www.google-analytics.com:443;
#
# If you want to see the payload being sent, replace with the following:
#server some.server.you.control:<port_number>;
# and run this: https://gist.githubusercontent.com/mdonkers/63e115cc0c79b4f6b8b3a6b797e485c7/raw/a6a1d090ac8549dac8f2bd607bd64925de997d40/server.py
# (you probably want to change the `proxy_pass` directive to write to
# an `http://` scheme address, too)
}
server {
listen 8080;
server_name localhost;
#access_log /var/log/nginx/host.access.log main;
location / {
root /usr/share/nginx/html;
index index.html index.htm;
}
#error_page 404 /404.html;
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root /usr/share/nginx/html;
}
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# proxy_pass http://127.0.0.1;
#}
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
# root html;
# fastcgi_pass 127.0.0.1:9000;
# fastcgi_index index.php;
# fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
# include fastcgi_params;
#}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
# https://www.freecodecamp.org/news/save-your-analytics-from-content-blockers-7ee08c6ec7ee/
# https://stackoverflow.com/a/32543398/1040915
location /analytics {
# The below is the example, but we don't need this because we know there will be no query-string params
# In particular, our option also removes the trailing `/` from the call to analytics (that is, we
# call `/collect?uip=...` rather than `collect/?uip=...`) - the latter results in a 404
#rewrite ^/analytics/?(.*) /collect/$1?uip=$remote_addr break;
rewrite ^/analytics /collect?uip=$remote_addr break;
# This Host header is important! Without it, Google will 404
proxy_set_header Host www.google-analytics.com;
proxy_set_header Connection keep-alive;
proxy_set_header Sec-Fetch-Dest empty;
proxy_set_header Sec-Fetch-Mode no-cors;
proxy_set_header Sec-Fetch-Site cross-site;
proxy_pass https://ga$uri$is_args$args;
}
rewrite_log on;
}

@ -57,20 +57,25 @@ fi
hugo --quiet --source $path
docker_image_tag="scubbo/blog_nginx"
docker_instance_name="blog_nginx"
cp -r $path/public ./builtContent
docker build -t scubbo/blog_nginx . -f-<<EOF
docker build -t $docker_image_tag . -f-<<EOF
FROM nginxinc/nginx-unprivileged
COPY builtContent /usr/share/nginx/html
EOF
if [[ $(docker ps --filter "name=blog_nginx" | wc -l) -lt 2 ]]; then
if [[ $(docker ps --filter "name=$docker_instance_name" | wc -l) -lt 2 ]]; then
echo "No currently running blog"
else
docker kill blog_nginx
docker rm blog_nginx
docker kill $docker_instance_name
docker rm $docker_instance_name
fi
docker run --name blog_nginx -p 8108:8080 -d scubbo/blog_nginx
docker run --name $docker_instance_name -p 8108:8080 \
--mount type=bind,source="$(pwd)"/default.conf,target=/etc/nginx/conf.d/default.conf \
-d $docker_image_tag
# TODO - call Cloudflare's CDN API to explicitly purge cache on the index page
# TODO - (more of a stretch) and parse the `git push` output to purge cache on updated pages, too
# TODO - do the "docker kill and restart" more idiomatically - there must be a "proper" way to do it!

Loading…
Cancel
Save