############# R-code notes compilation for Week 4 February 8th, 10th Compiled by: Devansh Kamra. B.Math(hons.) 1st year ############# #### as.Date('1/15/2001',format='%m/%d/%Y') #built in function as.Date('April 26, 2001',format='%B %d, %Y') #Doesn't allow time #POSIX = Portable Operating System Interface #Allows dates and times with time zones. #chron - Allows for dates and times. as.Date('22JUN01',format='%d%b%y') #Specify the format of the date being used #%d = day of the month #%m = month (decimal number) #%y = year (2 digits) #%Y = year (4 digits) #%B = month (full name) #%b = month (abbreviated) #### #### bdays = c(CRRao=as.Date('1920-09-10'), PCMahalanobis=as.Date('1893-06-29'), Cramer=as.Date('1893-09-25'), KRParthasarathy=as.Date('1936-06-25') ) weekdays(bdays) #Gives days on which they were born months(bdays) #Gives months in which they were born #### #### datef1<- as.Date("02/08/2021", format = "%m/%d/%Y") datef1 datef2 <- as.Date("February 8, 2021", format = "%B %d, %Y") datef2 #### #### datef <- as.Date("04/08/2021", format = "%m/%d/%Y") datef2 <- as.Date("October 8, 2021", format = "%B %d, %Y") datef1-datef2 #Gives no. of days difference. difftime(datef1, datef2, units = "weeks") #Gives difference in weeks #built in function. difftime(datef1, datef2, units = "days") #Gives difference in days. difftime(datef1, datef2) #default is days datef2+10 #adds 10 days to the given date datef1-10 #subtracts 10 days from the given date #### #### three.days <- as.Date(c("2020-07-22", "2019-04-20", "2022-10-06")) #Creates a vector of 3 dates three.days diff(three.days) #Gives difference between each term in three.days in days #### #### Seven <- seq(datef1, length = 7, by = "week") Seven #Produce seven dates that differ by a week starting ffrom datef1 Seven <- seq(datef1, length = 7, by = 14) Seven #Produce seven dates that differ by 14 days starting from datef1 Seven <- seq(datef1, length = 7, by = "2 weeks") Seven #Produce seven dates that differ by 2 weeks starting from datef1 #### #### ?strptime #Package needed - stringr #Combines dates and times with string manipulation #### #### Time1 <- as.POSIXct("2023-07-24 23:55:26") Time1 Time2 <- as.POSIXct("25072023 08:32:07", format = "%d%m%Y %H:%M:%S") #Specify the format of the date entered Time2 #### #### Time3 <- as.POSIXct("2020-01-01 11:42:03", tz = "IST") #tz = timezone Time3 Time2 > Time1 #Checks the given logical statement and returns TRUE/FALSE Time2 - Time1 #Gives difference in seconds Time1 + 30 Time1 - 30 #### #### as.POSIXct("2021-03-10 08:32:07") - as.POSIXct("2023-03-09 23:55:26") #Adjusts for daylight saving hours Sys.time() # ct = calendar time, lt = local time unclass(Time1) difftime(Time1, as.POSIXct("1970-01-01 00:00:00", tz = "UTC"), units = "secs") #### #### Time1.lt <- as.POSIXlt("2022-07-24 23:55:26") Time1.lt #### #### unclass(Time1.lt) #Gives different components of date and time in a column form unlist(Time1.lt) #Gives different components of date and time in a list form #### #### Time1.lt$sec Time1.lt$wday trunc(Time1.lt, "days") #Gives date and time only till the day trunc(Time1.lt, "mins") #Gives date and time only till mins #### #### require(chron) time1.c = as.chron("2013-07-24 23:55:26") #Gives time in chron format time1.c #Creating times in chron time2.c = as.chron("07/25/13","%m/%d/%Y") #Gives date in the specified format time2.c dates(time1.c) #Extracting the date time2.c > time1.c #logical statement comparing time time1.c + 10 #Adding 10 days time2.c - time1.c difftime(time2.c, time1.c, unit = "hours") #Gives difference in hours as.chron("2013-03-10 08:32:07") - as.chron("2013-03-09 23:55:26") #Gives difference in the time #NOTE: Chron does not adjust for the time zones #### #### decdf <- read.csv(file = "Master.csv", header = TRUE) #reading the dataset # This is the deceased data from the Government of Karnataka COVID-19 Bulletin collated at # http://www.isibang.ac.in/~athreya/incovid19/dataopen/Master.csv head(decdf) #Gives first 6 rows of the dataset names(decdf) <- c("Sno", "District", "Pid", "Age", "Sex", "Description", "Symptoms", "CMB", "DOA", "DOD", "MB.Date", "Notes") #Reassingning names library(dplyr) filter(decdf, Age > 100) #Filters data by age greater than 100 filter(decdf, Age > 100 & Sex == "Female") #filter command filters data according to the condition specified #It only retains rows which satisfy the given conditions head(decdf$DOA) head(decdf$MB.Date) decdf <- filter(decdf, !is.na(DOD)) #Drop the NA rows (subset command does not do that) decdf <- mutate(decdf, reporting.time = as.Date(decdf$MB.Date) - as.Date(decdf$DOD)) #Adds new variable "reporting.time" to the dataframe without disturbing original variables decdf <- mutate(decdf, Month = months(as.Date(decdf$MB.Date))) DT = distinct(decdf, Age) #Selects distinct rows of Age variable DT = distinct(decdf, Age, .keep_all = TRUE) #keeps the other variables as well SL = slice(decdf, 10:12) #Slices the data and shows the selected rows only head(slice, 2) GS = group_by(decdf, Sex) #groups data by the specified variable. #NOTE: Display does NOT show grouping, but it will specify the groups: head(GS, 3) summarise(GS, mean(Age, na.rm = TRUE)) #summarises multiple values into a single value #Gives the mean of age for each gender. sample_n(decdf, size = 2) #Selects 2 random rows from dataframe decdf. sample_frac(decdf, size = 0.0001) #Selects 0.0001 times of number of rows at random. count(decdf, Month) #Gives a frequency table for months orderdf=arrange(decdf, Age) #Creates a new dataframe orderdf which has rows arranged by age. head(orderdf, 2) orderdf2=arrange(decdf, Description) #Arranges the data by alphabetical order of variable Description filteredData <- filter(decdf, Month != "September") groupedData <- group_by(filteredData, Month) summarise(groupedData, mean(Age, na.rm = TRUE)) decdf %>% # %>% is the pipe operator used for chaining codes filter(Month != 5) %>% group_by(Month) %>% summarise(mean(Age, na.rm = TRUE)) ## ######################```