# -*- coding: utf-8 -*- """ Created on Thu Oct 4 16:39:50 2013 @author: Xiaoxuan Jia """ import json import csv import re import scipy.io import scipy.stats import random import numpy as np import os import itertools import cPickle as pk import pymongo import scipy from scipy.stats import norm import matplotlib.pyplot as plt def SBcorrection(corr, mult_factor): pred = (mult_factor*corr)/(1+(mult_factor-1)*corr) return pred def normalize_CM(CF): new_CF = np.zeros(np.shape(CF)) for col in range(0, np.shape(CF)[1]): total = np.sum(CF[:,col]) norm_col = CF[:,col]/float(total) new_CF[:,col] = norm_col return new_CF def d_prime2x2(CF): H = CF[0,0]/(CF[0,0]+CF[1,0]) # H = hit/(hit+miss) F = CF[0,1]/(CF[0,1]+CF[1,1]) # F = False alarm/(false alarm+correct rejection) if H == 1: H = 1-1/(2*(CF[0,0]+CF[1,0])) if H == 0: H = 0+1/(2*(CF[0,0]+CF[1,0])) if F == 0: F = 0+1/(2*(CF[0,1]+CF[1,1])) if F == 1: F = 1-1/(2*(CF[0,1]+CF[1,1])) d = norm.ppf(H)-norm.ppf(F) return d def d_prime(CF): #have problem when called by module name, artificially change to n by 5 matrix d = [] for i in range(len(CF[0][1])): H = CF[0][i, i]/sum(CF[0][:,i]) # H = target diagnal/target column tempCF = scipy.delete(CF[0], i, 1) # delete the target column F = sum(tempCF[i,:])/sum(tempCF) #if H == 1: # H = 1-1/(2*sum(CF[0][:,i])) #if H == 0: # H = 0+1/(2*sum(CF[0][:,i])) #if F == 0: # F = 0+1/(2*sum(tempCF)) #if F == 1: # F = 1-1/(2*sum(tempCF)) d.append(norm.ppf(H)-norm.ppf(F)) return d def offDmass(CF): return sum(CF[np.eye(CF.shape[0])==0]/float(sum(CF))) class expDataDB(object): def __init__(self, collection, selector, numObjs, obj, trialNum): conn = pymongo.Connection(port = 22334, host = 'localhost') db = conn.mturk col = db[collection] self.obj = obj self.trialNum = trialNum self.subj_data = list(col.find(selector)) self.numObjs = numObjs if obj != 'face': obj_inds = [] for idx, t in enumerate(self.subj_data[0]['ImgData']): if len(np.unique(obj_inds)) == self.numObjs: break else: if len(t)<10: obj_inds.append(t[0]['obj']) else: obj_inds.append(t['obj']) self.models = np.unique(obj_inds) self.models_idxs = {} for idx, model in enumerate(self.models): self.models_idxs[model] = idx self.models_idxs = self.models_idxs self.trial_data = self.preprocess(self.subj_data, self.obj, self.trialNum) self.numResp = numObjs self.totalTrials = len(self.trial_data) self.corr_type = 'pearson' def init_from_pickle(self, pkFile): f = open(pkFile, 'rb') data = pk.load(f) f.close() self.subj_data = data self.trial_data = self.preprocess(self.subj_data) self.totalTrials = len(self.trial_data) def setPopCM(self): if self.numResp == 2: self.popCM, self.CM_order = self.getPopCM2x2fast(self.trial_data) else: self.popCM, self.CM_order = self.getPopCM(self.trial_data) def preprocess(self, subj_data, obj, trialNum): # before the fb experiment, the HvM metadata, uploaded urls dont have unique hash id in the url, after feedback exp, both meta and the pushed json files changed RV = [] #Response vector SV = [] #Stimulus vector DV = [] #Distractor vector if obj=='face': RV = [] #Response vector DV = [] #Distractor vector RT = [] for subj in self.subj_data: # subj is dict in list subj_data; to access string values in a dist within a list, use subj_data[0]['Response'] models_name = np.unique(subj['Response']) models_size = np.unique(subj['Size']) self.models = [] for idx1 in models_name: for idx2 in models_size: self.models.append([str(idx1)+'_'+str(idx2)]) models_idxs = {} for idx, model in enumerate(self.models): models_idxs[tuple(model)] = idx self.models_idxs = models_idxs for t_idx, t in enumerate(subj['RT']): if t_idx>=trialNum[0] and t_idx=trialNum[0] and r_idx=trialNum[0] and s_idx=trialNum[0] and r_idx=trialNum[0] and s_idx=trialNum[0] and r_idx=trialNum[0] and s_idx=trialNum[0] and t_idx=trialNum[0] and r_idx=trialNum[0] and s_idx=trialNum[0] and t_idx=trialNum[0] and r_idx=trialNum[0] and s_idx=trialNum[0] and r_idx=trialNum[0] and s_idx 0: comb[0] = order if pick == target: idx = comb[0].index(pick) CMs[comb[1]][idx, idx] += 1 elif pick != target: CMs[comb[1]][comb[0].index(pick), comb[0].index(target)] += 1 else: print('Matrix Error') return CMs, combs def getexposureCM(self, trial_data, trialNum, expoNum): # trial_data is for individual subj or for all subj (myresult.trial_data) if len(trial_data[0][2]) != len(self.trial_data[0][2]): numResp = len(trial_data[0][2]) # should not use self.trial_data else: numResp = len(self.trial_data[0][2]) #print numResp obj_inds = [] for t in trial_data: if len(np.unique(obj_inds)) == self.numObjs: break else: obj_inds.append(t[0]) condi = self.subj_data[0]['Combinations'] newcondi = [] s1 = set(['NONSWAP', 'SWAP']) for subj in self.subj_data: s2 = set(subj.keys()) for s in subj[list(s1.intersection(s2))[0]]: newcondi.append([x for idx, x in enumerate(condi[int(s)]) if idx>= expoNum[0] and idx=trialNum[0] and r_idx=trialNum[0] and s_idx