https://github.com/Doist/bitmapist
Tip revision: bf4df248d2cefa24d642c9096b367613d921983c authored by Janusz Gregorczyk on 07 March 2024, 19:00:54 UTC
Merge pull request #62 from Doist/proxi/add-types
Merge pull request #62 from Doist/proxi/add-types
Tip revision: bf4df24
__init__.py
# -*- coding: utf-8 -*-
"""
bitmapist.cohort
~~~~~~~~~~~~~~~~
Implements cohort analytics on top of the data stored in bitmapist.
This library makes it possible to implement real-time, highly scalable analytics that can answer following questions:
* Generate a cohort table over real-time data stored in bitmapist
* How many % of users that were active last [days, weeks, months] are still active?
* How many % of users that performed action X also performed action Y (and this over time)
A screenshot of the library in action:
https://d2dq6e731uoz0t.cloudfront.net/d5b299fafecc15eb3ea9f7f12f70a061/as/cohort.png
If you want to read more about cohort please read following:
* http://en.wikipedia.org/wiki/Cohort_(statistics)
* https://mixpanel.com/docs/learn-the-features/retention-overview [ I was inspired by this, but didn't want to pay the steep price ]
Examples
========
Mark user 123 as active and mark some other events::
from bitmapist import mark_event
mark_event('active', 123)
mark_event('song:add', 123)
mark_event('song:play', 123)
Generate the form that makes it easy to query the bitmapist database::
html_form = bitmapist_cohort.render_html_form(
action_url='/_Cohort',
selections1=[ ('Are Active', 'active'), ],
selections2=[ ('Played song', 'song:play'), ],
time_group='days',
select1='active',
select2='song:play'
)
# action_url is the action URL of the <form> element
# selections1, selections2 specifies the events that the user can select in the form
# time_group can be `days`, `weeks` or `months`
# select1, select2 specifies the current selected events in the <form>
Get the data and render it via HTML::
dates_data = bitmapist_cohort.get_dates_data(select1='active',
select2='song:play',
time_group='days',
system='default')
html_data = bitmapist_cohort.render_html_data(dates_data,
time_group='days')
# All the arguments should come from the FORM element (html_form)
# but to make things more clear I have filled them in directly
:copyright: 2012 by Doist Ltd.
:developer: Amir Salihefendic ( http://amix.dk )
:license: BSD
"""
from datetime import date, datetime, timedelta
from os import path
from typing import Any, Callable, Optional
from dateutil.relativedelta import relativedelta
from mako.lookup import TemplateLookup
from bitmapist import (
BitOpAnd,
DayEvents,
GenericPeriodEvents,
MonthEvents,
WeekEvents,
YearEvents,
delete_runtime_bitop_keys,
)
# --- HTML rendering
def render_html_form(
action_url,
selections1,
selections1b=None,
selections2=None,
selections2b=None,
time_group="days",
select1=None,
select1b=None,
select2=None,
select2b=None,
as_precent=1,
num_results=31,
num_of_rows=12,
start_date=None,
):
"""
Render a HTML form that can be used to query the data in bitmapist.
:param :action_url The action URL of the <form> element. The form will always to a GET request.
:param :selections1 A list of selections that the user can filter by, example `[ ('Are Active', 'active'), ]`
:param :selections1b Extra selections, used with the first selection, example `[ ('in US', 'country:US'), ]`
:param :selections2 A list of selections that the user can filter by, example `[ ('Played song', 'song:play'), ]`
:param :selections2b Extra selections, used with the second selection, example `[ ('Playlist', 'playlist:new'), ]`
:param :time_group What data should be clustered by, can be `days`, `weeks`, `months`, `years`
:param :select1 What is the current selected filter (first)
:param :select1b What is the current selected filter (extra, optional)
:param :select2 What is the current selected filter (second)
:param :select2b What is the current selected filter (extra, optional)
"""
# mandatory
selections2 = selections2 or selections1[:]
# optional
selections1b_c = selections1b[:] if selections1b else selections1[:]
selections1b_c.insert(0, ("------", ""))
selections2b_c = selections2b[:] if selections2b else selections2[:]
selections2b_c.insert(0, ("------", ""))
return (
get_lookup()
.get_template("form_data.mako")
.render(
selections1=selections1,
selections1b=selections1b_c,
selections2=selections2,
selections2b=selections2b_c,
time_group=time_group,
select1=select1,
select1b=select1b,
select2=select2,
select2b=select2b,
action_url=action_url,
as_precent=as_precent,
num_results=int(num_results),
num_of_rows=int(num_of_rows),
start_date=start_date,
)
)
def render_html_data(
dates_data,
as_precent=True,
time_group="days",
num_results: int = 31,
num_of_rows: int = 12,
start_date=None,
):
"""
Render's data as HTML, inside a TABLE element.
:param :dates_data The data that's returned by `get_dates_data`
:param :as_precent Should the data be shown as percents or as counts. Defaults to `True`
:param :time_group What is the data grouped by? Can be `days`, `weeks`, `months`, `years`
"""
return (
get_lookup()
.get_template("table_data.mako")
.render(
dates_data=dates_data,
as_precent=as_precent,
time_group=time_group,
num_results=num_results,
num_of_rows=num_of_rows,
start_date=start_date,
)
)
def render_csv_data(
dates_data,
as_precent=True,
time_group="days",
num_results: int = 31,
num_of_rows: int = 12,
start_date=None,
):
"""
Render's data as CSV.
"""
return (
get_lookup()
.get_template("table_data_csv.mako")
.render(
dates_data=dates_data,
as_precent=as_precent,
time_group=time_group,
num_results=num_results,
num_of_rows=num_of_rows,
start_date=start_date,
)
)
# --- Data rendering
def get_dates_data(
select1,
select2,
select1b=None,
select2b=None,
time_group="days",
system="default",
as_precent=1,
num_results=31,
num_of_rows=12,
start_date=None,
):
"""
Fetch the data from bitmapist.
:param :select1 First filter (could be `active`)
:param :select1b Second filter (could be `country:US`, optional)
:param :select2 Second filter (could be `song:played`)
:param :select2b Second filter (could be `playlist:created`, optional)
:param :time_group What is the data grouped by? Can be `days`, `weeks`, `months`, `years`
:param :system What bitmapist should be used?
:param :as_precent If `True` then percents as calculated and shown. Defaults to `True`
:return A list of day data, formatted like `[[datetime, count], ...]`
"""
num_results = int(num_results)
num_of_rows = int(num_of_rows)
if start_date:
now = datetime.strptime(start_date, "%Y-%m-%d")
now = now + timedelta(days=num_results - 1)
else:
now = datetime.utcnow()
# Days
if time_group == "days":
fn_get_events = _day_events_fn
date_range = num_results
now = now - timedelta(days=num_results - 1)
timedelta_inc = lambda d: timedelta(days=d)
# Weeks
elif time_group == "weeks":
fn_get_events = _weeks_events_fn
date_range = num_results
now = now - relativedelta(weeks=num_results - 1)
timedelta_inc = lambda w: relativedelta(weeks=w)
# Months
elif time_group == "months":
fn_get_events = _month_events_fn
date_range = num_results
now = now - relativedelta(months=num_results - 1)
now -= timedelta(days=now.day - 1)
timedelta_inc = lambda m: relativedelta(months=m)
# Years
elif time_group == "years":
fn_get_events = _year_events_fn
num_results = 3
date_range = num_results
now = now - relativedelta(years=num_results - 1)
timedelta_inc = lambda m: relativedelta(years=m)
dates = []
for i in range(0, date_range):
result = [now]
# events for select1 (+select1b)
select1_events = fn_get_events(select1, now, system)
if select1b:
select1b_events = fn_get_events(select1b, now, system)
select1_events = BitOpAnd(system, select1_events, select1b_events)
select1_count = len(select1_events)
result.append(select1_count)
# Move in time
for t_delta in range(0, num_of_rows + 1):
if select1_count == 0:
result.append("")
continue
delta_now = now + timedelta_inc(t_delta)
# events for select2 (+select2b)
select2_events = fn_get_events(select2, delta_now, system)
if select2b:
select2b_events = fn_get_events(select2b, delta_now, system)
select2_events = BitOpAnd(system, select2_events, select2b_events)
if not select2_events.has_events_marked():
result.append("")
continue
both_events = BitOpAnd(system, select1_events, select2_events)
both_count = len(both_events)
# Append to result
if both_count == 0:
result.append(float(0.0))
else:
if as_precent:
result.append((float(both_count) / float(select1_count)) * 100)
else:
result.append(both_count)
dates.append(result)
now = now + timedelta_inc(1)
# clean up results of BitOps
delete_runtime_bitop_keys()
return dates
# --- Custom handlers
CUSTOM_HANDLERS: dict[str, Callable[..., Any]] = {}
def set_custom_handler(event_name: str, callback) -> None:
"""
Set a custom handler for `event_name`.
This makes it possible to construct event names that are complex
(for example looking at active & (web | ios)).
The signature of `callback` is callback(key, cls, cls_args)
Where cls is DayEvents, WeekEvents, MonthEvents or YearEvents and
cls_args is the list of arguments to pass to `cls` constructor.
For example, the code for a custom handler for all active accounts
using web or ios, could look like::
def active_web_ios(key, cls, cls_args):
return cls('active', *cls_args) & (cls('web', *cls_args) | cls('ios', *cls_args))
set_custom_handler('active_web_ios', active_web_ios)
And then use something like::
bitmapist_cohort.render_html_form(
selections1=[
...,
('Active on web or iOS', 'active_web_ios')
],
...
)
"""
CUSTOM_HANDLERS[event_name] = callback
# --- Private
def _dispatch(key: str, cls: type[GenericPeriodEvents], cls_args):
if key in CUSTOM_HANDLERS:
return CUSTOM_HANDLERS[key](key, cls, cls_args)
else:
return cls(key, *cls_args)
def _day_events_fn(key: str, date: date, system: str):
cls = DayEvents
cls_args = (date.year, date.month, date.day, system)
return _dispatch(key, cls, cls_args)
def _weeks_events_fn(key: str, date: date, system: str):
cls = WeekEvents
cls_args = (date.year, date.isocalendar()[1], system)
return _dispatch(key, cls, cls_args)
def _month_events_fn(key: str, date: date, system: str):
cls = MonthEvents
cls_args = (date.year, date.month, system)
return _dispatch(key, cls, cls_args)
def _year_events_fn(key: str, date: date, system: str):
cls = YearEvents
cls_args = (date.year, system)
return _dispatch(key, cls, cls_args)
_LOOKUP: Optional[TemplateLookup] = None
def get_lookup() -> TemplateLookup:
global _LOOKUP
if not _LOOKUP:
file_path = path.dirname(path.abspath(__file__))
_LOOKUP = TemplateLookup(
directories=[path.join(file_path, "tmpl")], encoding_errors="replace"
)
return _LOOKUP
__all__ = ["render_html_form", "render_html_data", "get_dates_data"]