#import data library(readr) data <- read_csv("C:/Users/Saniya and Family/Downloads/files/PSY 350 Class Survey (2).csv") #importing libraries: library(psych) library(effsize) library(apaTables) library(moments) #The three variables to be studied are : coffee consumption, whether or not psychology was first major and consistency #consistency is the average of 10, so we will add another column con_avg con_data <- data[,26:35] data$con_avg <- rowMeans(con_data, na.rm = T) #Q1 Level of measurment: #1. Coffee consumption: Ratio #2. whether or not psychology was first major: Ordinal #3. Consistency Average: Interval #Q2. #a) table(data$firstmajor) #71 are no and 101 are yes noprop = 71/(71+101) #proportion of no :0.4127907 yesprop= 101/(101+71) #proportion of yes :0.5872093 #b) #Coffee: coffeemean = mean(data$coffee, na.rm = T) #5.970588 coffeesd= sd(data$coffee, na.rm = T) #6.926005 coffeskew=skewness(data$coffee, na.rm = T) #1.680131 #Consistency AVerage: con_mean = mean(data$con_avg, na.rm = T) #3.68469 con_sd=sd(data$con_avg, na.rm = T) #0.6388831 con_skew=skewness(data$con_avg, na.rm = T) #-0.1082667 #Q3. #a) qqnorm(data$con_avg) qqline(data$con_avg, col="blue") #b) #As we can see, almost all points line on the line, we can say that the data can be normally distributed. #Also the skewness we calculated is close to 0. Hence we can say data is normally distributed #Q4. #a) cohen1=cohen.d(as.numeric(data$coffee), as.factor(data$firstmajor),alpha=0.05, na.rm = T) d_coffee= cohen1$estimate #-0.182328 #b) #it is a small effect #c) #AS we only need to test whether students drink less than average or not, one tail test should be used. #d) test1=t.test(data$coffee, mu=11.2) ts_coffee = test1$statistic df_coffee = test1$parameter pval_coffee = test1$p.value #test statistic = -9.8445, df = 169, p-value < 2.2e-16 #Q5. #a) aggregate(data$con_avg, list(data$firstmajor), FUN=mean) # no : 3.542254 , yes :3.784818 hence the students who has psychology as first major have more consistency #b) cohen2 = cohen.d(as.numeric(data$con_avg), as.factor(data$firstmajor),alpha=0.05, na.rm = T) d_con = cohen2$estimate #-0.3853915 #c) #It has a medium effect #d) #AS we need to calculate whether mean is higher or lower, two tailed test should be used. #e) test2=t.test(data$con_avg[data$firstmajor=="yes"],data$con_avg[data$firstmajor=="no"]) ts_con = test2$statistic df_con = test2$parameter pval_con = test2$p.value #t = 2.4836, df = 149.78, p-value = 0.01411 #f) #Yes there is a statistically significant difference between their means