https://github.com/ab4377/dream-project
Tip revision: 8e64d9628355fde264b6481f9955478fed995fe5 authored by Avinash Bukkittu on 04 October 2017, 22:20:26 UTC
Merge branch 'master' of https://github.com/ab4377/dream-project
Merge branch 'master' of https://github.com/ab4377/dream-project
Tip revision: 8e64d96
ExtractConvertedData.py
import pandas as pd
import numpy as np
import Constants
import os
import sys
import argparse
'''Call this method to prepare data per key'''
def data_preparation(recordIds,start_index,end_index,filename):
assert start_index < end_index
#if end_index > len(recordIds):
# end_index = len(recordIds) - 1
recordIds = recordIds.iloc[start_index:end_index]
accel_data = pd.DataFrame(columns=["x", "y", "z"])
for idx,recordId in recordIds.iterrows():
file_location = Constants.converted_data_location + recordId["recordId"] + "/"
#print file_location
if os.path.isfile(file_location + "outbound.csv"):
print "Fetching outbound data for recordId " + recordId["recordId"]
outbound_df = pd.read_csv(file_location + "outbound.csv")
for idx, row in outbound_df.iterrows():
data = [{"x":row["x"], "y":row["y"], "z":row["z"]}]
accel_data = accel_data.append(data)
if os.path.isfile(file_location + "return.csv"):
print "Fetching return data for recordId " + recordId["recordId"]
return_df = pd.read_csv(file_location + "return.csv")
for idx, row in return_df.iterrows():
data = [{"x":row["x"],"y":row["y"], "z":row["z"]}]
accel_data = accel_data.append(data)
if not accel_data.empty:
accel_data.to_csv(filename,index=False)
print "Written for " + str(search_key)
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("phone_type", help="phone-type")
arg_parser.add_argument("label", help="label")
arg_parser.add_argument("start_index", help="start-index")
arg_parser.add_argument("end_index", help="end-index")
args = arg_parser.parse_args()
search_key = (args.phone_type, args.label)
#print search_key
df = pd.read_csv(Constants.meta_data_location)
grouped = df.groupby(by=["phoneInfo","medTimepoint"])
groups = {}
for key,group in grouped:
grouped_df = grouped.get_group(key)
ll = []
for idx,row in grouped_df.iterrows():
ll.append(row["recordId"])
groups[key] = ll
phones = pd.read_csv(Constants.data_location + "phone_index.csv")
phone_index = {}
for idx,phone in phones.iterrows():
#if type(phone) is not float: #weird way of checking for nan
phone_index[phone["index"]] = phone["phone"]
inv_phone_index = {v: k for k,v in phone_index.iteritems()}
labels = pd.read_csv(Constants.data_location + "label_index.csv")
label_index = {}
for idx,label in labels.iterrows():
#if type(label) is not float: #weird way of checking for nan
label_index[label["index"]] = label["label"]
inv_labels_index = {v: k for k,v in label_index.iteritems()}
phone = args.phone_type.replace(" ","_")
label = args.label.replace(" ","_")
filename = Constants.data_location + phone + "_" + label + "-" + args.start_index + "-" + args.end_index + ".csv"
#filename = Constants.data_location + "phoneindex-" + str(inv_phone_index[args.phone_type]) + "_labelindex-" + str(inv_labels_index[args.label]) + ".csv"
#print search_key
#print grouped.get_group(search_key)
print_str = "FETCHING DATA for " + str(search_key) + " start_index = " + args.start_index + " end_index = " + args.end_index
print print_str
if groups.has_key(search_key):
print "filename: " + filename
data_preparation(grouped.get_group(search_key),start_index=int(args.start_index),end_index=int(args.end_index),filename=filename)