https://github.com/chiragjp/nhanes_scidata
Raw File
Tip revision: cbe9ed3cbccb207695b73772a8504f43a8274cdf authored by Chirag J Patel on 16 June 2016, 11:34:26 UTC
corrected weight bug
Tip revision: cbe9ed3
User_Guide_PIC.Rmd
---
title: "Executing NHANES analysis using the PIC-SURE API"
author: "Chirag J Patel and Michael T McDuffie"
date: "May 19, 2016"
output: html_document
---

In this guide, we will show how to estiamte the distribution of lead in children using the NHANES data via the PIC-SURE API.

##Required Libraries for communication
```{r}
library(httr)
library(jsonlite)
library(XML)
```

##Build Base Query Strings for interacting with IRCT

```{r}
IRCT_REST_BASE_URL <- "http://bd2k-picsure.hms.harvard.edu/"

#REST URL
IRCT_CL_SERVICE_URL <- paste(IRCT_REST_BASE_URL,"IRCT-CL/rest/",sep="")

#Service URLS
IRCT_RESOURCE_BASE_URL <- paste(IRCT_CL_SERVICE_URL,"resourceService/",sep="")
IRCT_QUERY_BASE_URL <- paste(IRCT_CL_SERVICE_URL,"queryRESTService/",sep="")
IRCT_RESULTS_BASE_URL <- paste(IRCT_CL_SERVICE_URL,"resultService/",sep="")

#List resources
IRCT_LIST_RESOURCE_URL <- paste(IRCT_RESOURCE_BASE_URL,"resources",sep="")

#Query
IRCT_START_QUERY_URL <- paste(IRCT_QUERY_BASE_URL,"startQuery",sep="")
IRCT_WHERE_QUERY_URL <- paste(IRCT_QUERY_BASE_URL,"whereClause",sep="")
IRCT_RUN_QUERY_URL <- paste(IRCT_QUERY_BASE_URL,"runQuery",sep="")

#Add Select Clause
IRCT_SELECT_QUERY_URL <- paste(IRCT_QUERY_BASE_URL,"selectClause",sep="")

#Get Results
IRCT_GET_JSON_RESULTS_URL <- paste(IRCT_RESULTS_BASE_URL,"download/json",sep="")
```


## First, get the Lead data
```{r}
response <- GET(IRCT_START_QUERY_URL, verbose())
conversationId <- content(response)$cid
whereParameterList <- list(type="where", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/demographics/SEX/", 
                            logicalOperator="AND",
                            predicate="CONTAINS",
                            "data-encounter"="No",
                            cid=conversationId
                           )
response <- GET(IRCT_WHERE_QUERY_URL, query=whereParameterList, verbose())
content(response)


##Add select clauses: get age, race, and Lead biomarker data

selectParameterList <- list(type="select", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/demographics/SEX/",
                            alias="SEX",
                            cid=conversationId)
response <- GET(IRCT_SELECT_QUERY_URL, query=selectParameterList, verbose())
content(response)

selectParameterList <- list(type="select", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/demographics/AGE/",
                            alias="AGE",
                            cid=conversationId)
response <- GET(IRCT_SELECT_QUERY_URL, query=selectParameterList, verbose())
content(response)

selectParameterList <- list(type="select", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/demographics/RACE/",
                            alias="Race",
                            cid=conversationId)
response <- GET(IRCT_SELECT_QUERY_URL, query=selectParameterList, verbose())
content(response)


selectParameterList <- list(type="select", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/laboratory/heavy metals/Lead (ug/dL)", 
                            alias="Lead",
                            cid=conversationId
                           )
response <- GET(IRCT_SELECT_QUERY_URL, query=selectParameterList, verbose())
content(response)

##Run the full query and store the result id

runQueryList <- list(cid=conversationId)
response <- GET(IRCT_RUN_QUERY_URL, query=runQueryList, verbose())
resultId <- content(response)$resultId
content(response)
```

##Retrieve Results 
```{r}
response <- GET(paste(IRCT_GET_JSON_RESULTS_URL,'/',resultId,sep=""), verbose())
results <- content(response)
leadData <- data.frame(do.call("rbind", results))

```

## Now, get the NHANES survey-related information (e.g., Survey Year, PSU, strata)
```{r}

response <- GET(IRCT_START_QUERY_URL, verbose())
conversationId <- content(response)$cid

whereParameterList <- list(type="where", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/demographics/SEX/", 
                            logicalOperator="AND",
                            predicate="CONTAINS",
                            "data-encounter"="No",
                            cid=conversationId
                           )
response <- GET(IRCT_WHERE_QUERY_URL, query=whereParameterList, verbose())
content(response)

selectParameterList <- list(type="select", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/demographics/SDDSRVYR/", 
                            alias="Survey Year",
                            cid=conversationId
                           )
response <- GET(IRCT_SELECT_QUERY_URL, query=selectParameterList, verbose())
content(response)

selectParameterList <- list(type="select", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/demographics/SDMVPSU/", 
                            alias="Survey PSU",
                            cid=conversationId
                           )
response <- GET(IRCT_SELECT_QUERY_URL, query=selectParameterList, verbose())
content(response)

selectParameterList <- list(type="select", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/demographics/SDMVSTRA/", 
                            alias="Survey Strata",
                            cid=conversationId
                           )
response <- GET(IRCT_SELECT_QUERY_URL, query=selectParameterList, verbose())
content(response)

selectParameterList <- list(type="select", 
                            field="NHANES Public/Public Studies///NHANES/NHANES/demographics/WTMEC2YR/", 
                            alias="Survey Weighting",
                            cid=conversationId
                           )
response <- GET(IRCT_SELECT_QUERY_URL, query=selectParameterList, verbose())
content(response)


runQueryList <- list(cid=conversationId)
response <- GET(IRCT_RUN_QUERY_URL, query=runQueryList, verbose())
resultId <- content(response)$resultId
content(response)
```

##Retrieve Survey Related Information
```{r}
response <- GET(paste(IRCT_GET_JSON_RESULTS_URL,'/',resultId,sep=""), verbose())
results <- content(response)
surveyData <- data.frame(do.call("rbind", results))
```

## Get survey data frame ready for computation
```{r}
library(survey)
dim(surveyData)
dim(leadData)
bothData <- cbind(surveyData, leadData)
colnames(bothData) <- c('SDMVSTRA', 'SDDSRVYR', 'PATIENT_NUM', 'WTMEC2YR', 'SDMVPSU', 'LEAD', 'race_white', 'sex_male', 'sex_female', 'age', 'race_black', 'PATIENT_NUM2', 'race_mexican', 'race_other', 'race_other_hispanic') # rename column names for usability

bothData$SDMVSTRA <- as.numeric(bothData$SDMVSTRA)
bothData$WTMEC2YR <- as.numeric(bothData$WTMEC2YR)
bothData$SDMVPSU <- as.numeric(bothData$SDMVPSU)
bothData$SDDSRVYR <- as.numeric(bothData$SDDSRVYR)
bothData$age <- as.numeric(bothData$age)
bothData$LEAD <- as.numeric(bothData$LEAD)

dsn <- svydesign(id=~SDMVPSU, strata=~SDMVSTRA, weights=~WTMEC2YR, nest=T,data=subset(bothData, WTMEC2YR > 0)) ## create survey design object that accomodates NHANES data structure
```

## Estimate of quartiles of lead in the overall population, females, males
```{r}
svyquantile(~LEAD, quantiles= c(0, .25,.5,.75, 1), dsn, na.rm=T) #overall
svyquantile(~LEAD, quantiles= c(0, .25,.5,.75, 1), subset(dsn, sex_female == 'female') , na.rm=T) #females
svyquantile(~LEAD, quantiles= c(0, .25,.5,.75, 1), subset(dsn, sex_male == 'male') , na.rm=T) #males
```
## Estimates of quartiles of lead in participants < 18 years of age
```{r}
age18 <- subset(dsn, age <= 18)
svyquantile(~LEAD, quantiles= c(0, .25,.5,.75, 1), age18, na.rm=T) #overall
svyquantile(~LEAD, quantiles= c(0, .25,.5,.75, 1), subset(age18, sex_female == 'female') , na.rm=T) #females
svyquantile(~LEAD, quantiles= c(0, .25,.5,.75, 1), subset(age18, sex_male == 'male') , na.rm=T) #males

```
back to top