Use hashing to associate sessions

This commit is contained in:
R. Miles McCain 2020-05-02 12:16:57 -04:00
parent 34e698e309
commit a210e23bb3
No known key found for this signature in database
GPG Key ID: 24F9B6A2588C5408
5 changed files with 44 additions and 21 deletions

1
.gitignore vendored
View File

@ -132,3 +132,4 @@ dmypy.json
secrets.yml
.vscode
.DS_Store
compiledstatic/

View File

@ -33,6 +33,10 @@ SCRIPT_USE_HTTPS=True
# How frequently should the monitoring script "phone home" (in ms)?
SCRIPT_HEARTBEAT_FREQUENCY=5000
# How much time can elapse between requests from the same user before a new
# session is created, in seconds?
SESSION_MEMORY_TIMEOUT=1800
# Should only superusers (admins) be able to create services? This is helpful
# when you'd like to invite others to your Shynet instance but don't want
# them to be able to create services of their own.

View File

@ -3,6 +3,7 @@ import logging
import geoip2.database
import user_agents
from hashlib import sha1
from celery import shared_task
from django.conf import settings
from django.core.cache import cache
@ -59,29 +60,33 @@ def ingress_request(
if dnt and service.respect_dnt:
return
ip_data = _geoip2_lookup(ip)
log.debug(f"Found geoip2 data")
# Validate payload
if payload.get("loadTime", 1) <= 0:
payload["loadTime"] = None
# Create or update session
session = (
Session.objects.filter(
service=service,
last_seen__gt=timezone.now() - timezone.timedelta(minutes=10),
ip=ip,
user_agent=user_agent,
).first()
# We used to check for identifiers, but that can cause issues when people
# re-open the page in a new tab, for example. It's better to match sessions
# solely based on IP and user agent.
association_id_hash = sha1()
association_id_hash.update(str(ip).encode("utf-8"))
association_id_hash.update(str(user_agent).encode("utf-8"))
session_cache_path = (
f"session_association_{service.pk}_{association_id_hash.hexdigest()}"
)
# Create or update session
session = None
if cache.get(session_cache_path) is not None:
cache.touch(session_cache_path, settings.SESSION_MEMORY_TIMEOUT)
session = Session.objects.filter(
pk=cache.get(session_cache_path), service=service
).first()
if session is None:
log.debug("Cannot link to existing session; creating a new one...")
ua = user_agents.parse(user_agent)
initial = True
log.debug("Cannot link to existing session; creating a new one...")
ip_data = _geoip2_lookup(ip)
log.debug(f"Found geoip2 data...")
ua = user_agents.parse(user_agent)
device_type = "OTHER"
if (
ua.is_bot
@ -111,9 +116,14 @@ def ingress_request(
latitude=ip_data.get("latitude"),
time_zone=ip_data.get("time_zone", ""),
)
cache.set(
session_cache_path, session.pk, timeout=settings.SESSION_MEMORY_TIMEOUT
)
else:
log.debug("Updating old session with new data...")
initial = False
log.debug("Updating old session with new data...")
# Update last seen time
session.last_seen = timezone.now()
if session.identifier == "" and identifier.strip() != "":
@ -124,9 +134,10 @@ def ingress_request(
idempotency = payload.get("idempotency")
idempotency_path = f"hit_idempotency_{idempotency}"
hit = None
if idempotency is not None:
if cache.get(idempotency_path) is not None:
cache.touch(idempotency_path, 10 * 60)
cache.touch(idempotency_path, settings.SESSION_MEMORY_TIMEOUT)
hit = Hit.objects.filter(
pk=cache.get(idempotency_path), session=session
).first()
@ -137,6 +148,7 @@ def ingress_request(
hit.heartbeats += 1
hit.last_seen = timezone.now()
hit.save()
if hit is None:
log.debug("Hit is a page load; creating new hit...")
# There is no existing hit; create a new one
@ -153,7 +165,9 @@ def ingress_request(
)
# Set idempotency (if applicable)
if idempotency is not None:
cache.set(idempotency_path, hit.pk, timeout=10 * 60)
cache.set(
idempotency_path, hit.pk, timeout=settings.SESSION_MEMORY_TIMEOUT
)
except Exception as e:
log.exception(e)
raise e

View File

@ -261,3 +261,7 @@ SCRIPT_USE_HTTPS = os.getenv("SCRIPT_USE_HTTPS", "True") == "True"
# How frequently should the tracking script "phone home" with a heartbeat, in
# milliseconds?
SCRIPT_HEARTBEAT_FREQUENCY = int(os.getenv("SCRIPT_HEARTBEAT_FREQUENCY", "5000"))
# How much time can elapse between requests from the same user before a new
# session is created, in seconds?
SESSION_MEMORY_TIMEOUT = int(os.getenv("SESSION_MEMORY_TIMEOUT", "1800"))

View File

@ -8,7 +8,7 @@
<body>
<noscript><img src="//localhost:8000/ingress/test_uuid/pixel.gif"></noscript>
<script src="//localhost:8000/ingress/test_uuid/script.js"></script>
<script src="//localhost:8000/ingress/66015ce4-c69d-40fb-be8f-5535538d795e/script.js"></script>
</body>
</html>
</html>