https://github.com/cran/SmartEDA
Raw File
Tip revision: 5a18dd2cd336f1709f74335efd30dd6d52901401 authored by Dayanand Ubrangala on 30 January 2024, 17:50:02 UTC
version 0.3.10
Tip revision: 5a18dd2
ExpNumStat.Rd
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fn_exp_numeric.R
\name{ExpNumStat}
\alias{ExpNumStat}
\title{Summary statistics for numerical variables}
\usage{
ExpNumStat(
  data,
  by = "A",
  gp = NULL,
  Qnt = NULL,
  Nlim = 10,
  MesofShape = 2,
  Outlier = FALSE,
  round = 3,
  weight = NULL,
  dcast = FALSE,
  val = NULL
)
}
\arguments{
\item{data}{dataframe or matrix}

\item{by}{group by A (summary statistics by All), G (summary statistics by group), GA (summary statistics by group and Overall)}

\item{gp}{target variable if any, default NULL}

\item{Qnt}{default NULL. Specified quantile is c(.25,0.75) will find 25th and 75th percentiles}

\item{Nlim}{numeric variable limit (default value is 3 which means it will only consider those variable having more than 3 unique values and variable type is numeric/integer)}

\item{MesofShape}{Measures of shapes (Skewness and kurtosis).}

\item{Outlier}{Calculate the lower hinge, upper hinge and number of outlier}

\item{round}{round off}

\item{weight}{a vector of weights, it must be equal to the length of data}

\item{dcast}{fast dcast from data.table}

\item{val}{Name of the column whose values will be filled to cast (see Details sections for list of column names)}
}
\value{
summary statistics for numeric independent variables

Summary by:

\itemize{
  \item \code{Only overall level}
  \item \code{Only group level}
  \item \code{Both overall and group level}
}
}
\description{
Function provides summary statistics for all numerical variable. This function automatically scans through each variable and select only numeric/integer variables. Also if we know the target variable, function will generate relationship between target variable and each independent variable.
}
\details{
column descriptions
\itemize{
  \item \code{Vname} is Variable name
  \item \code{Group} is Target variable
  \item \code{TN} is Total sample (included NA observations)
  \item \code{nNeg} is Total negative observations
  \item \code{nPos} is Total positive observations
  \item \code{nZero} is Total zero observations
  \item \code{NegInf} is Negative infinite count
  \item \code{PosInf} is Positive infinite count
  \item \code{NA_value} is Not Applicable count
  \item \code{Per_of_Missing} is Percentage of missing
  \item \code{Min} is minimum value
  \item \code{Max} is maximum value
  \item \code{Mean} is average value
  \item \code{Median} is median value
  \item \code{SD} is Standard deviation
  \item \code{CV} is coefficient of variations (SD/mean)*100
  \item \code{IQR} is Inter quartile range
  \item \code{Qnt} is quantile values
  \item \code{MesofShape} is Skewness and Kurtosis
  \item \code{Outlier} is Number of outlier
  \item \code{Cor} is Correlation b/w target and independent variables
}
}
\examples{
# Descriptive summary of numeric variables is Summary by Target variables
ExpNumStat(mtcars,by="G",gp="gear",Qnt=c(0.1,0.2),MesofShape=2,
           Outlier=TRUE,round=3)
# Descriptive summary of numeric variables is Summary by Overall
ExpNumStat(mtcars,by="A",gp="gear",Qnt=c(0.1,0.2),MesofShape=2,
           Outlier=TRUE,round=3)
# Descriptive summary of numeric variables is Summary by Overall and Group
ExpNumStat(mtcars,by="GA",gp="gear",Qnt=seq(0,1,.1),MesofShape=1,
           Outlier=TRUE,round=2)
# Summary by specific statistics for all numeric variables
ExpNumStat(mtcars,by="GA",gp="gear",Qnt=c(0.1,0.2),MesofShape=2,
           Outlier=FALSE,round=2,dcast = TRUE,val = "IQR")
# Weighted summary statistics
ExpNumStat(mtcars,by="GA",gp="gear",Qnt=c(0.1,0.2),MesofShape=2,
           Outlier=FALSE,round=2,dcast = TRUE,val = "IQR", weight = "wt")

}
\seealso{
\code{\link[psych:describe.by]{describe.by}}
}
back to top