Content - c6960b57bee5afa89f1e1a99fbc2edbb56b16464 - e8c8578/ExtractConvertedData.py

visit type:
Tip revision: 8e64d9628355fde264b6481f9955478fed995fe5 authored by Avinash Bukkittu on 04 October 2017, 22:20:26 UTC
Merge branch 'master' of https://github.com/ab4377/dream-project
Tip revision: 8e64d96
ExtractConvertedData.py
import pandas as pd
import numpy as np
import Constants
import os
import sys
import argparse

'''Call this method to prepare data per key'''
def data_preparation(recordIds,start_index,end_index,filename):
    assert start_index < end_index
    #if end_index > len(recordIds):
    #    end_index = len(recordIds) - 1
    recordIds = recordIds.iloc[start_index:end_index]
    accel_data = pd.DataFrame(columns=["x", "y", "z"])
    for idx,recordId in recordIds.iterrows():
        file_location = Constants.converted_data_location + recordId["recordId"] + "/"
        #print file_location
        if os.path.isfile(file_location + "outbound.csv"):
            print "Fetching outbound data for recordId " + recordId["recordId"]
            outbound_df = pd.read_csv(file_location + "outbound.csv")
            for idx, row in outbound_df.iterrows():
                data = [{"x":row["x"], "y":row["y"], "z":row["z"]}]
                accel_data = accel_data.append(data)
        if os.path.isfile(file_location + "return.csv"):
            print "Fetching return data for recordId " + recordId["recordId"]
            return_df = pd.read_csv(file_location + "return.csv")
            for idx, row in return_df.iterrows():
                data = [{"x":row["x"],"y":row["y"], "z":row["z"]}]
                accel_data = accel_data.append(data)
    if not accel_data.empty:
        accel_data.to_csv(filename,index=False)
        print "Written for " + str(search_key)

if __name__ == '__main__':
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument("phone_type", help="phone-type")
    arg_parser.add_argument("label", help="label")
    arg_parser.add_argument("start_index", help="start-index")
    arg_parser.add_argument("end_index", help="end-index")
    args = arg_parser.parse_args()
    search_key = (args.phone_type, args.label)
    #print search_key
    df = pd.read_csv(Constants.meta_data_location)
    grouped = df.groupby(by=["phoneInfo","medTimepoint"])
    groups = {}
    for key,group in grouped:
        grouped_df = grouped.get_group(key)
        ll = []
        for idx,row in grouped_df.iterrows():
            ll.append(row["recordId"])
        groups[key] = ll


    phones = pd.read_csv(Constants.data_location + "phone_index.csv")
    phone_index = {}

    for idx,phone in phones.iterrows():
        #if type(phone) is not float: #weird way of checking for nan
        phone_index[phone["index"]] = phone["phone"]

    inv_phone_index = {v: k for k,v in phone_index.iteritems()}
    labels = pd.read_csv(Constants.data_location + "label_index.csv")
    label_index = {}
    for idx,label in labels.iterrows():
        #if type(label) is not float: #weird way of checking for nan
        label_index[label["index"]] = label["label"]

    inv_labels_index = {v: k for k,v in label_index.iteritems()}

    phone = args.phone_type.replace(" ","_")
    label = args.label.replace(" ","_")
    filename = Constants.data_location + phone + "_" + label + "-" + args.start_index + "-" + args.end_index + ".csv"
    #filename = Constants.data_location + "phoneindex-" + str(inv_phone_index[args.phone_type]) + "_labelindex-" + str(inv_labels_index[args.label]) + ".csv"
    #print search_key
    #print grouped.get_group(search_key)
    print_str = "FETCHING DATA for " + str(search_key) + " start_index = " + args.start_index + " end_index = " + args.end_index
    print print_str
    if groups.has_key(search_key):
        print "filename: " + filename
        data_preparation(grouped.get_group(search_key),start_index=int(args.start_index),end_index=int(args.end_index),filename=filename)