From abab21e2973d0cbe2294aca57a94457178bbd3f8 Mon Sep 17 00:00:00 2001 From: Jack Jackson Date: Tue, 3 May 2022 16:52:15 -0700 Subject: [PATCH] Self-host upload to Google Analytics Some client configurations (Browsers with tracking-blocking enabled, or networks with [Pi-holes](https://pi-hole.net/)) will block any calls to `www.google-analytics.com`. This makes me sad! I'd like to know how many folks are reading my blog! This commit represents a workaround. Instead of telling the client to send data directly to Google (which would trigger Tracking-blockers), I instead tell the client to send it to the `/analytics` endpoint on my domain, which will then get forwarded to Google. Neat! But! If a reader has blocked tracking, we should respect that! So I'm not actually enabling this in my blog until I have also built an "opt-in" mechanism, where readers can decide if they want to send me tracking information or not. "Why didn't you make tracking opt-in to begin with, Jack?" - well, I kinda addressed that [here](https://blog.scubbo.org/posts/commenting-enabled/), but you're right that that would have been a little more ethical. Still - enabling Google Analytics didn't feel _too_ bad (since anyone who cared about detecting and blocking it would still be able to with standard methods - and I assume that anyone reading my blog will know how to), whereas this new approach feels like a circumvention of their agency. --- blog/layouts/partials/head-additions.html | 5 +- default.conf | 90 +++++++++++++++++++++++ deployBlog.sh | 15 ++-- 3 files changed, 104 insertions(+), 6 deletions(-) create mode 100644 default.conf diff --git a/blog/layouts/partials/head-additions.html b/blog/layouts/partials/head-additions.html index dfa3893..86883bc 100644 --- a/blog/layouts/partials/head-additions.html +++ b/blog/layouts/partials/head-additions.html @@ -4,12 +4,14 @@ {{ if .Params.math }}{{ partial "helpers/katex.html" . }}{{ end }} + + diff --git a/default.conf b/default.conf new file mode 100644 index 0000000..31f6f99 --- /dev/null +++ b/default.conf @@ -0,0 +1,90 @@ +# This is almost-entirely the default.conf that +# comes with the base image. The only change is the +# addition of a proxy_pass statement at the end to +# forward any requests to `/analytics` on to Google +# Analytics, and the supporting `upstream` directive - +# see the blog post in the commit that +# introduced this for more explanation. + + +# https://stackoverflow.com/a/66703673/1040915 +upstream ga { + # Note that this is hard-coded - I'm hosting this blog in + # a network that is served by a Pi-hole, so if I just + # put the domain name (`www.google-analytics.com`), it + # wouldn't resolve. + # + # If Google Analytics ever changes their IP, this + # hard-coded value would need to change. + server 142.250.189.238:443; + #server www.google-analytics.com:443; + # + # If you want to see the payload being sent, replace with the following: + #server some.server.you.control:; + # and run this: https://gist.githubusercontent.com/mdonkers/63e115cc0c79b4f6b8b3a6b797e485c7/raw/a6a1d090ac8549dac8f2bd607bd64925de997d40/server.py + # (you probably want to change the `proxy_pass` directive to write to + # an `http://` scheme address, too) +} + +server { + listen 8080; + server_name localhost; + + #access_log /var/log/nginx/host.access.log main; + + location / { + root /usr/share/nginx/html; + index index.html index.htm; + } + + #error_page 404 /404.html; + + # redirect server error pages to the static page /50x.html + # + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } + + # proxy the PHP scripts to Apache listening on 127.0.0.1:80 + # + #location ~ \.php$ { + # proxy_pass http://127.0.0.1; + #} + + # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000 + # + #location ~ \.php$ { + # root html; + # fastcgi_pass 127.0.0.1:9000; + # fastcgi_index index.php; + # fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name; + # include fastcgi_params; + #} + + # deny access to .htaccess files, if Apache's document root + # concurs with nginx's one + # + #location ~ /\.ht { + # deny all; + #} + + # https://www.freecodecamp.org/news/save-your-analytics-from-content-blockers-7ee08c6ec7ee/ + # https://stackoverflow.com/a/32543398/1040915 + location /analytics { + # The below is the example, but we don't need this because we know there will be no query-string params + # In particular, our option also removes the trailing `/` from the call to analytics (that is, we + # call `/collect?uip=...` rather than `collect/?uip=...`) - the latter results in a 404 + #rewrite ^/analytics/?(.*) /collect/$1?uip=$remote_addr break; + rewrite ^/analytics /collect?uip=$remote_addr break; + # This Host header is important! Without it, Google will 404 + proxy_set_header Host www.google-analytics.com; + proxy_set_header Connection keep-alive; + proxy_set_header Sec-Fetch-Dest empty; + proxy_set_header Sec-Fetch-Mode no-cors; + proxy_set_header Sec-Fetch-Site cross-site; + proxy_pass https://ga$uri$is_args$args; + } + rewrite_log on; + +} diff --git a/deployBlog.sh b/deployBlog.sh index b6724cb..cb3d366 100755 --- a/deployBlog.sh +++ b/deployBlog.sh @@ -57,20 +57,25 @@ fi hugo --quiet --source $path +docker_image_tag="scubbo/blog_nginx" +docker_instance_name="blog_nginx" + cp -r $path/public ./builtContent -docker build -t scubbo/blog_nginx . -f-<