Preliminary monitoring

This commit is contained in:
R. Miles McCain
2020-04-10 03:25:32 -04:00
parent aab7fbb86a
commit 844c44ae6c
31 changed files with 831 additions and 76 deletions

View File

View File

@@ -0,0 +1,9 @@
from django.contrib import admin
from .models import Hit, Session
admin.site.register(Session)
admin.site.register(Hit)
# Register your models here.

5
shynet/analytics/apps.py Normal file
View File

@@ -0,0 +1,5 @@
from django.apps import AppConfig
class AnalyticsConfig(AppConfig):
name = "analytics"

View File

@@ -0,0 +1,23 @@
from django.contrib import admin
from django.urls import include, path
from .views import ingress
urlpatterns = [
path(
"<service_uuid>/pixel.gif", ingress.PixelView.as_view(), name="endpoint_pixel"
),
path(
"<service_uuid>/script.js", ingress.ScriptView.as_view(), name="endpoint_script"
),
path(
"<service_uuid>/<identifier>/pixel.gif",
ingress.PixelView.as_view(),
name="endpoint_pixel_id",
),
path(
"<service_uuid>/<identifier>/script.js",
ingress.ScriptView.as_view(),
name="endpoint_pixel_id",
),
]

View File

@@ -0,0 +1,65 @@
# Generated by Django 3.0.5 on 2020-04-10 06:58
from django.db import migrations, models
import analytics.models
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="Hit",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("start", models.DateTimeField(auto_now_add=True)),
("duration", models.FloatField(default=0.0)),
("heartbeats", models.IntegerField(default=0)),
("tracker", models.TextField()),
("location", models.TextField(blank=True)),
("referrer", models.TextField(blank=True)),
("loadTime", models.FloatField(null=True)),
("httpStatus", models.IntegerField(null=True)),
("metadata_raw", models.TextField()),
],
),
migrations.CreateModel(
name="Session",
fields=[
(
"uuid",
models.UUIDField(
default=analytics.models._default_uuid,
primary_key=True,
serialize=False,
),
),
("identifier", models.TextField(blank=True)),
("first_seen", models.DateTimeField(auto_now_add=True)),
("last_seen", models.DateTimeField(auto_now_add=True)),
("user_agent", models.TextField()),
("browser", models.TextField()),
("device", models.TextField()),
("os", models.TextField()),
("ip", models.GenericIPAddressField()),
("asn", models.TextField(blank=True)),
("country", models.TextField(blank=True)),
("longitude", models.FloatField(null=True)),
("latitude", models.FloatField(null=True)),
("time_zone", models.TextField(blank=True)),
("metadata_raw", models.TextField()),
],
),
]

View File

@@ -0,0 +1,31 @@
# Generated by Django 3.0.5 on 2020-04-10 06:58
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
("core", "0001_initial"),
("analytics", "0001_initial"),
]
operations = [
migrations.AddField(
model_name="session",
name="service",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="core.Service"
),
),
migrations.AddField(
model_name="hit",
name="session",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="analytics.Session"
),
),
]

View File

View File

@@ -0,0 +1,72 @@
import json
import uuid
from django.db import models
from core.models import Service
def _default_uuid():
return str(uuid.uuid4())
class Session(models.Model):
uuid = models.UUIDField(default=_default_uuid, primary_key=True)
service = models.ForeignKey(Service, on_delete=models.CASCADE)
# Cross-session identification; optional, and provided by the service
identifier = models.TextField(blank=True)
# Time
first_seen = models.DateTimeField(auto_now_add=True)
last_seen = models.DateTimeField(auto_now_add=True)
# Core request information
user_agent = models.TextField()
browser = models.TextField()
device = models.TextField()
os = models.TextField()
ip = models.GenericIPAddressField()
# GeoIP data
asn = models.TextField(blank=True)
country = models.TextField(blank=True)
longitude = models.FloatField(null=True)
latitude = models.FloatField(null=True)
time_zone = models.TextField(blank=True)
# Additional metadata, stored as JSON string
metadata_raw = models.TextField()
@property
def metadata(self):
try:
return json.loads(self.metadata_raw)
except: # Metadata is not crucial; in the case of a read error, just ignore it
return {}
class Hit(models.Model):
session = models.ForeignKey(Session, on_delete=models.CASCADE)
# Base request information
start = models.DateTimeField(auto_now_add=True)
duration = models.FloatField(default=0.0) # Seconds spent on page
heartbeats = models.IntegerField(default=0)
tracker = models.TextField() # Tracking pixel or JS
# Advanced page information
location = models.TextField(blank=True)
referrer = models.TextField(blank=True)
loadTime = models.FloatField(null=True)
httpStatus = models.IntegerField(null=True)
# Additional metadata, stored as JSON string
metadata_raw = models.TextField()
@property
def metadata(self):
try:
return json.loads(self.metadata_raw)
except: # Metadata is not crucial; in the case of a read error, just ignore it
return {}

125
shynet/analytics/tasks.py Normal file
View File

@@ -0,0 +1,125 @@
import json
import logging
import geoip2.database
import user_agents
from celery import shared_task
from django.conf import settings
from django.core.cache import cache
from django.utils import timezone
from core.models import Service
from .models import Hit, Session
log = logging.getLogger(__name__)
_geoip2_city_reader = None
_geoip2_asn_reader = None
def _geoip2_lookup(ip):
global _geoip2_city_reader, _geoip2_asn_reader # TODO: is there a better way to do global Django vars? Is this thread safe?
try:
if settings.MAXMIND_CITY_DB == None or settings.MAXMIND_ASN_DB == None:
return None
if _geoip2_city_reader == None or _geoip2_asn_reader == None:
_geoip2_city_reader = geoip2.database.Reader(settings.MAXMIND_CITY_DB)
_geoip2_asn_reader = geoip2.database.Reader(settings.MAXMIND_ASN_DB)
city_results = _geoip2_city_reader.city(ip)
asn_results = _geoip2_asn_reader.asn(ip)
return {
"asn": asn_results.autonomous_system_organization,
"country": city_results.country.iso_code,
"longitude": city_results.location.longitude,
"latitude": city_results.location.latitude,
"time_zone": city_results.location.time_zone,
}
except geoip2.errors.AddressNotFoundError:
return {}
@shared_task
def ingress_request(
service_uuid, tracker, time, payload, ip, location, user_agent, identifier=""
):
try:
ip_data = _geoip2_lookup(ip)
service = Service.objects.get(uuid=service_uuid)
log.debug(f"Linked to service {service}")
# Create or update session
session_metadata = payload.get("sessionMetadata", {})
session = Session.objects.filter(
service=service,
last_seen__gt=timezone.now() - timezone.timedelta(minutes=30),
ip=ip,
user_agent=user_agent,
identifier=identifier,
).first()
if session is None:
log.debug("Cannot link to existing session; creating a new one...")
ua = user_agents.parse(user_agent)
session = Session.objects.create(
service=service,
ip=ip,
user_agent=user_agent,
identifier=identifier,
browser=f"{ua.browser.family or ''} {ua.browser.version_string or ''}".strip(),
device=f"{ua.device.model or ''}",
os=f"{ua.os.family or ''} {ua.os.version_string or ''}".strip(),
metadata_raw=json.dumps(session_metadata),
asn=ip_data.get("asn", ""),
country=ip_data.get("country", ""),
longitude=ip_data.get("longitude"),
latitude=ip_data.get("latitude"),
time_zone=ip_data.get("time_zone", ""),
)
else:
log.debug("Updating old session with new data...")
# Update old metadata with new metadata
new_metadata = session.metadata
new_metadata.update(session_metadata)
session.metadata_raw = json.dumps(new_metadata)
# Update last seen time
session.last_seen = timezone.now()
session.save()
# Create or update hit
hit_metadata = payload.get("hitMetadata", {})
idempotency = payload.get("idempotency")
idempotency_path = f"hit_idempotency_{idempotency}"
hit = None
if idempotency is not None:
if cache.get(idempotency_path) is not None:
hit = Hit.objects.filter(
pk=cache.get(idempotency_path), session=session
).first()
if hit is not None:
# There is an existing hit with an identical idempotency key. That means
# this is a heartbeat.
log.debug("Hit is a heartbeat; updating old hit with new data...")
hit.heartbeats += 1
hit.duration = (timezone.now() - hit.start).total_seconds()
new_metadata = hit.metadata
new_metadata.update(hit_metadata)
hit.metadata_raw = json.dumps(new_metadata)
hit.save()
if hit is None:
log.debug("Hit is a page load; creating new hit...")
# There is no existing hit; create a new one
hit = Hit.objects.create(
session=session,
tracker=tracker,
location=location,
referrer=payload.get("referrer", ""),
loadTime=payload.get("loadTime"),
metadata_raw=json.dumps(hit_metadata),
)
# Set idempotency (if applicable)
if idempotency is not None:
cache.set(idempotency_path, hit.pk, timeout=30 * 60)
except Exception as e:
log.error(e)

View File

@@ -0,0 +1,27 @@
window.onload = function () {
var idempotency =
Math.random().toString(36).substring(2, 15) +
Math.random().toString(36).substring(2, 15);
function sendUpdate() {
var xhr = new XMLHttpRequest();
xhr.open("POST", "{{endpoint}}", true);
xhr.setRequestHeader("Content-Type", "application/json");
xhr.send(
JSON.stringify({
idempotency: idempotency,
referrer: document.referrer,
loadTime:
window.performance.timing.domContentLoadedEventEnd -
window.performance.timing.navigationStart,
hitMetadata:
typeof shynetHitMetadata !== "undefined" ? shynetHitMetadata : {},
sessionMetadata:
typeof shynetSessionMetadata !== "undefined"
? shynetSessionMetadata
: {},
})
);
}
setInterval(sendUpdate, 5000);
sendUpdate();
};

View File

@@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

View File

View File

@@ -0,0 +1,85 @@
import base64
import json
from django.http import HttpResponse
from django.shortcuts import render
from django.utils import timezone
from django.utils.decorators import method_decorator
from django.views.decorators.csrf import csrf_exempt
from django.views.generic import TemplateView, View
from ipware import get_client_ip
from ..tasks import ingress_request
def ingress(request, service_uuid, identifier, tracker, payload):
time = timezone.now()
client_ip, is_routable = get_client_ip(request)
location = request.META.get("HTTP_REFERER")
user_agent = request.META.get("HTTP_USER_AGENT")
ingress_request.delay(
service_uuid,
tracker,
time,
payload,
client_ip,
location,
user_agent,
identifier,
)
class PixelView(View):
# Fallback view to serve an unobtrusive 1x1 transparent tracking pixel for browsers with
# JavaScript disabled.
def dispatch(self, request, *args, **kwargs):
# Extract primary data
ingress(
request,
self.kwargs.get("service_uuid"),
self.kwargs.get("identifier", ""),
"PIXEL",
{},
)
data = base64.b64decode(
"R0lGODlhAQABAIAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw=="
)
resp = HttpResponse(data, content_type="image/gif")
resp["Cache-Control"] = "no-cache"
resp["Access-Control-Allow-Origin"] = "*"
return resp
@method_decorator(csrf_exempt, name="dispatch")
class ScriptView(View):
def dispatch(self, request, *args, **kwargs):
resp = super().dispatch(request, *args, **kwargs)
resp["Access-Control-Allow-Origin"] = "*"
resp["Access-Control-Allow-Methods"] = "GET,HEAD,OPTIONS,POST"
resp[
"Access-Control-Allow-Headers"
] = "Origin, X-Requested-With, Content-Type, Accept, Authorization, Referer"
return resp
def get(self, *args, **kwargs):
return render(
self.request,
"analytics/scripts/page.js",
context={"endpoint": self.request.build_absolute_uri()},
content_type="application/javascript",
)
def post(self, *args, **kwargs):
payload = json.loads(self.request.body)
ingress(
self.request,
self.kwargs.get("service_uuid"),
self.kwargs.get("identifier", ""),
"JS",
payload,
)
return HttpResponse(
json.dumps({"status": "OK"}), content_type="application/json"
)