examples for module 2

kiat · Aug 10, 2018 · 16611b8 · 16611b8
1 parent a9d33f8
commit 16611b8
Show file tree

Hide file tree

Showing 7 changed files with 214 additions and 49 deletions.
diff --git a/Module-2-Example-1.R b/Module-2-Example-1.R
@@ -0,0 +1,78 @@
+
+# One sample test using asbio library 
+
+# install.packages("asbio")
+library(tcltk)
+library(asbio)
+
+?one.sample.z
+# Read the documentation and make sure you understand it. 
+# Provides a one-sample hypothesis test. 
+# The test assumes that the underlying population is normal and furthermore that σ is known.
+
+
+
+
+#################################
+#################################
+####      An Example     ########
+#################################
+#################################
+
+
+
+# A gym is interested in whether a 6-week weight loss training program they launched has
+# been successful in helping their clients lose weight. To assess this, they took a sample of
+# 30 participants. They are interested in testing the following hypotheses:
+
+
+# H_0 :μ=0 (there is no efect on weight change of program participants)
+# H_1 :μ<0 (program participants lose weight on average)
+
+
+one.sample.z(null.mu = 0, xbar = -2.98, sigma = 6, n = 30, alternative = "less")
+
+# One sample z-test 
+# z*            P-value
+# -2.720355  0.00326059
+
+# This is a small P-value. Thus we have strong evidence agians the null hypothesis. 
+
+
+
+
+
+#################################
+#################################
+####   Another Example   ########
+#################################
+#################################
+
+# Samples from 50 water sources throughout the county are taken and the levels of this
+# chemical are measured. 
+# They are interested in testing the following hypotheses:
+
+
+# H0:μ=15 (the mean level of the chemical is normal)
+# H1:μ≠15 (the mean level of the chemical is abnormal)
+# Suppose we know that the population standard deviation is 6.2. The sample mean from
+# the 50 samples was 16.4 ppm.
+# 
+# Calculate the value of the test statistic and the associated p-value.
+
+
+
+
+one.sample.z(null.mu = 15, xbar = 16.4, sigma = 6.2, n = 50, alternative = "two.sided")
+
+# One sample z-test 
+# z*            P-value
+# 1.596693     0.1103342
+
+# It appears that the sample mean that we observed 
+# (xbar=16.4) is moderately likely to have occurred if the true
+# population mean was 15 ppm (if μ=15). 
+# 
+# This means we don’t have strong evidence against the null hypothesis.
+
+
diff --git a/Module-2-Example-2.R b/Module-2-Example-2.R
@@ -0,0 +1,134 @@
+# Let us remove all variables from the memory to have nothing in memory 
+rm(list=ls())
+
+# Pearson and Lee's data on the heights of parents and children classified by gender
+# 
+
+# set the working directory to the folder that you have stored the data files. 
+# This is only on my laptop, for your latop it is the folder that you stored the files. 
+# On windows machines, you need to replace the \ backslash with a slash. 
+
+# setwd("SET THE Working Director to THE PATH TO THIS DIRECTORY")
+
+
+# Load up your dataset 
+data <- read.csv("Datasets/PearsonLee.csv")
+
+# print(data)
+
+# print out the column names so that you can view and check what is inside the dataframe
+colnames(data)
+
+# If that was too much printing, you can print out the first 4 lines of your dataset and not all of that. 
+# print first 4 rows of mydata
+head(data, n=4)
+
+
+# The data is selected first based on the column name height and then based on the column name toyota. 
+heightsOfBoys <- data$child[data$chl=="Son"]
+heightsOfGirls <- data$child[data$chl=="Daughter"]
+
+# Note that this is a filtering functionality, we first filter the child column that includes the heights of kids and then we divid that column in two groups of Son and Daughter based on the valriable name chl. 
+
+# We can also extract the heights of fathers and mothers from the data frame. 
+heightsOfFathers <- data$parent[data$par=="Father"]
+heightsOfMothers <- data$parent[data$par=="Mother"]
+
+
+# check the lenght of each variables that you have created 
+# this should return 20 
+length(heightsOfBoys)
+
+# This should return 20
+length(heightsOfGirls)
+
+
+# We can use the aggregate function to generate the summary for each different group, girls and boys 
+aggregate(data$child, by =list(data$chl), FUN=summary)
+
+aggregate(data$parent, by =list(data$par), FUN=summary)
+
+
+# read more aboute the aggregate by using the ?aggregate command 
+# aggregate splits the data into subsets, computes summary statistics for each, and returns the result in a convenient form.
+
+# aggregate function can be used to generate the standard deviation for each subset 
+aggregate(data$child, by =list(data$chl), FUN=sd)
+
+
+# Now, let us make a set of plots to present some of the important summary of our autot dataset. 
+# we want to make 4 plots side by side in 2 rows and 2 columns 
+par(mfrow=c(2,2))
+
+# Now we can make boxplot to see the heitghts. 
+# we use the "~" to seperate the height of each child groups in to subsets based on their genders
+boxplot(data$child~data$chl)
+
+# By using the attach command, we can attach the whole dataframe and access the columns as variable names, like height and make 
+attach(data)
+
+# print out the earning
+child
+
+# print out ageGroup
+parent
+
+# We can use the tapply function to calculate the means of each group 
+# Here we want to calculate the mean, but subset it based on their group  
+means <- tapply(child, chl, mean)  
+
+# Now, we find out the lower bound of the heights for each group
+# Please note that we use here a function that we defined and inside that function we use the t.test the t test to 
+lower <- tapply(child, chl, function(v) t.test(v)$conf.int[1])
+
+# To understand how I get the lower bound of my data from t.test function, you can take a look on the return values that t test function returns. 
+
+# then we run t.test and print out the results
+# Read the results of this test and try to understand the details 
+results<-t.test(heightsOfBoys)
+
+# The result of the t-test is something like the following. 
+#Result os
+# One Sample t-test
+#
+# data:  heightsOfBoys
+# t = 40.129, df = 18, p-value < 2.2e-16
+# alternative hypothesis: true mean is not equal to 0
+# 95 percent confidence interval:
+#  63.21792 70.20313
+# sample estimates:
+#  mean of x 
+# 66.71053 
+
+# And the following command returns the confidence intervals 
+# It simply includes a lower bound and upper bound interval of my data 
+t.test(heightsOfBoys)$conf.int
+
+# By using the following lowe bound, the first index of conf.int variable is the lower bound and second is the higher bound
+t.test(heightsOfBoys)$conf.int[1]
+
+# this is the higher bound 
+t.test(heightsOfBoys)$conf.int[2]
+
+# We want to use this function to present the lowerbound and upperbound of the heights of our data to add that to our bar plot and have a nice bar plot.  
+# For this purpose we combine it with tapply command 
+lower <- tapply(child, chl, function(v) t.test(v)$conf.int[1])
+upper <- tapply(child, chl, function(v) t.test(v)$conf.int[2])
+
+# install the package gplots 
+# if you have done this one time before you do not need it again. 
+# install.packages("gplots")
+
+# the we need to load the library 
+library("gplots")
+
+# This is a nice plot that shows also the upper and lower bound of mean in addition to the bar plot. 
+# click on the zoom to see the graph better. 
+barplot2(means, plot.ci = TRUE, ci.l = lower, ci.u =upper, names.arg = c("Boys", "Grils") 
+         , xlab="Heights of Kids",
+         main= "Means of heights of Childs by their group", ylab="Heights of Childs", col= "seagreen2", ylim = c(0, 70) ) 
+
+hist(heightsOfBoys)
+hist(heightsOfGirls)
+
+
diff --git a/R-Example-Program-3.2.R → Module-2-Example-3.R b/R-Example-Program-3.2.R → Module-2-Example-3.R
diff --git a/R-Example-Program-3.3.R → Module-2-Example-4.R b/R-Example-Program-3.3.R → Module-2-Example-4.R
@@ -21,7 +21,7 @@ boxplot(strength~weeks)
 # Bar graph with ionfdenie intervals
 means <- tapply(strength, weeks, mean)
 
-
+# Calculate the Confidence Interval. 
 lower <- tapply(strength, weeks, function(v) t.test(v)$conf.int[1])
 
 upper <- tapply(strength, weeks, function(v) t.test(v)$conf.int[2])
@@ -31,6 +31,7 @@ upper <- tapply(strength, weeks, function(v) t.test(v)$conf.int[2])
 # 
 max(upper)
 
+# A bar plot with intervals on top of it. 
 barplot2(means, plot.ci=TRUE, ci.l=lower, ci.u=upper, ylim = c(0, max(upper)*1.1), 
          names.arg=c("2 weeks", "16 weeks"))
 

diff --git a/R-Example-Program-3.1.R → Module-2-Example-5.R b/R-Example-Program-3.1.R → Module-2-Example-5.R
diff --git a/R-Example-Program-1.R b/R-Example-Program-1.R
diff --git a/R-Example-Program-3.R b/R-Example-Program-3.R