Revision 0ac73401e233eacb481b5b81878815fc98385145 authored by Stefano Zacchiroli on 16 November 2023, 12:15:40 UTC, committed by Stefano Zacchiroli on 16 November 2023, 12:15:40 UTC
1 parent a34da1c
export-rrd.py
#!/usr/bin/env python3
"""Script to execute the export of softwareheritage's rrds data.
"""
import click
import json
import os
import subprocess
DIRPATH='/var/lib/munin/softwareheritage.org/'
FILENAME_PATTERN="prado.softwareheritage.org-softwareheritage_objects_softwareheritage-###-g.rrd"
# The data source used at rrd creation time
DS=42
ENTITIES=[
"content",
"origin",
"revision",
# "directory_entry_dir",
# "directory_entry_file",
# "directory_entry_rev",
# "directory",
# "entity",
# "occurrence_history",
# "person",
# "project",
# "release",
# "revision_history",
# "skipped_content",
# "visit",
]
def compute_cmd(dirpath,
start,
step=86400):
"""Compute the command to execute to retrieve the needed data.
Returns:
The command as string.
"""
cmd = ['rrdtool', 'xport', '--json', '--start', str(start), '--end', 'now-1d',
'--step', str(step)]
for entity in ENTITIES:
filename = FILENAME_PATTERN.replace('###', entity)
filepath = os.path.join(dirpath, filename)
if os.path.exists(filepath):
cmd.extend(['DEF:out-%s1=%s:%s:AVERAGE' % (entity, filepath, DS),
'XPORT:out-%s1:%s' % (entity, entity)])
return cmd
def retrieve_json(cmd):
"""Given the cmd command, execute and returns the right json format.
Args:
cmd: the command to execute to retrieve the desired json.
Returns:
The desired result as json string.
"""
cmdpipe = subprocess.Popen(cmd, stdout=subprocess.PIPE)
data = b''
while True:
line = cmdpipe.stdout.readline()
if not line:
break
# Hack: the json output is not well-formed...
line = line.replace(b'\'', b'"')
line = line.replace(b'about: ', b'"about": ')
line = line.replace(b'meta:', b'"meta": ')
data += line
cmdpipe.stdout.close()
return json.loads(data.decode('utf-8'))
def prepare_data(data):
"""Prepare the data with x,y coordinate.
x is the time, y is the actual value.
"""
# javascript has a ratio of 1000...
step = data['meta']['step'] * 1000 # nb of milliseconds
start_ts = data['meta']['start'] * 1000 # starting ts
legends = data['meta']['legend']
# The legends, something like
# ["content-avg", "content-min", "content-max", "directory_entry_dir-avg", ...]
r = {}
day_ts = start_ts
for day, values in enumerate(data['data']):
day_ts += step
for col, value in enumerate(values):
if value is None:
continue
legend_col = legends[col]
l = r.get(legend_col, [])
l.append((day_ts, value))
r[legend_col] = l
return r
@click.command()
@click.option('--dirpath', default=DIRPATH, help="Default path to look for rrd files.")
@click.option('--start', default=1434499200, help="Default starting timestamp") # Default to 2015-05-12T16:51:25Z
@click.option('--step', default=86400, help="Compute the data step (default to 86400).")
def main(dirpath, start, step):
# Delegate the execution to the system
run_cmd = compute_cmd(dirpath, start, step)
data = retrieve_json(run_cmd)
# Format data
data = prepare_data(data)
print(json.dumps(data))
if __name__ == '__main__':
main()
Computing file changes ...