1. Introduction to R Programming Elements:
R Programming
# Expressions expression1 <- 5 + 3 * 2 # Mathematical expression expression2 <- sqrt(16) # Function call expression expression3 <- expression1 > expression2 # Logical expression # Print expressions print(paste("Result of expression1: ", expression1)) print(paste("Result of expression2: ", expression2)) print(paste("Is expression1 greater than expression2? ", expression3)) # Assignments x <- 10 y <- 20 z <- x + y # Print assignments print(paste("Value of x: ", x)) print(paste("Value of y: ", y)) print(paste("Sum of x and y (z): ", z)) # Decision Making if (z > 25) { print("z is greater than 25") } else { print("z is not greater than 25") }
R Programming
# For loop for (i in 1:5) { print(paste("For loop iteration: ", i)) } # While loop counter <- 1 while (counter <= 5) { print(paste("While loop iteration: ", counter)) counter <- counter + 1 } # Repeat loop counter <- 1 repeat { print(paste("Repeat loop iteration: ", counter)) counter <- counter + 1 if (counter > 5) { break } }
R Programming
# Vectors vector1 <- c(1, 2, 3, 4, 5) print("Vector:") print(vector1) # Matrices matrix1 <- matrix(1:9, nrow = 3, ncol = 3) print("Matrix:") print(matrix1) # Arrays array1 <- array(1:12, dim = c(2, 3, 2)) print("Array:") print(array1) # Data Frame data_frame <- data.frame( Name = c("Alice", "Bob", "Charlie"), Age = c(25, 30, 35), Gender = c("F", "M", "M") ) print("Data Frame:") print(data_frame) # List list1 <- list(vector1, matrix1, array1, data_frame) print("List:") print(list1)
2. Using List, DataFrames and Functions in R:
- i. User-defined Functions
- ii. Built-in numeric Function
R Programming
# Creating a list with different data types my_list <- list( numbers = c(1, 2, 3, 4, 5), letters = c("A", "B", "C"), matrix = matrix(1:9, nrow = 3), data_frame = data.frame(Name = c("Alice", "Bob"), Age = c(25, 30)) ) # Accessing elements of the list print("List element - numbers:") print(my_list$numbers) print("List element - letters:") print(my_list$letters) print("List element - matrix:") print(my_list$matrix) print("List element - data_frame:") print(my_list$data_frame) # Modifying elements of the list my_list$numbers <- my_list$numbers * 2 print("Modified List element - numbers:") print(my_list$numbers) # Adding new elements to the list my_list$new_element <- "New Element" print("List after adding new element:") print(my_list) # Removing elements from the list my_list$new_element <- NULL print("List after removing new element:") print(my_list)
R Programming
# Creating a data frame my_data <- data.frame( Name = c("Alice", "Bob", "Charlie", "David"), Age = c(25, 30, 35, 40), Gender = c("F", "M", "M", "M") ) # Display the data frame print("Data Frame:") print(my_data) # Accessing data frame columns print("Names:") print(my_data$Name) # Adding a new column to the data frame my_data$Salary <- c(50000, 60000, 70000, 80000) print("Data Frame after adding Salary column:") print(my_data) # Subsetting the data frame subset_data <- my_data[my_data$Age > 30, ] print("Subset of Data Frame (Age > 30):") print(subset_data) # Modifying a column my_data$Age <- my_data$Age + 1 print("Data Frame after modifying Age column:") print(my_data) # Removing a column from the data frame my_data$Salary <- NULL print("Data Frame after removing Salary column:") print(my_data)
R Programming
# User-defined function add_numbers <- function(a, b) { return(a + b) } # Using the user-defined function result <- add_numbers(10, 20) print(paste("Result of user-defined function (add_numbers): ", result)) # Built-in numeric functions x <- c(1, 2, 3, 4, 5) # Mean mean_value <- mean(x) print(paste("Mean of x: ", mean_value)) # Sum sum_value <- sum(x) print(paste("Sum of x: ", sum_value)) # Standard Deviation sd_value <- sd(x) print(paste("Standard Deviation of x: ", sd_value))
3. Implementing Strings in R:
R Programming
# Vectors string_vector <- c("Hello", "World", "R", "Programming") print("String Vector:") print(string_vector) # Matrices string_matrix <- matrix(c("Hello", "World", "R", "Programming", "Data", "Science"), nrow = 2, byrow = TRUE) print("String Matrix:") print(string_matrix) # Arrays string_array <- array(c("Hello", "World", "R", "Programming", "Data", "Science", "Machine", "Learning"), dim = c(2, 2, 2)) print("String Array:") print(string_array) # Data Frames string_data_frame <- data.frame( ID = c(1, 2, 3), Name = c("Alice", "Bob", "Charlie"), Occupation = c("Data Scientist", "Engineer", "Statistician") ) print("String Data Frame:") print(string_data_frame) # Lists string_list <- list( title = "R Programming", content = string_vector, info_matrix = string_matrix, author = "John Doe" ) print("String List:") print(string_list)
R Programming
# Using paste() function greeting <- paste("Hello", "World", sep = " ") print("Using paste() function:") print(greeting) # Using print() function print("Using print() function:") print(greeting) # Using noquote() function noquote_greeting <- noquote(greeting) print("Using noquote() function:") print(noquote_greeting) # Using format() function formatted_number <- format(12345.6789, nsmall = 2) print("Using format() function:") print(formatted_number) # Using cat() function cat("Using cat() function:\n") cat("Hello", "World", "\n") # Using toString() function vector_to_string <- toString(c(1, 2, 3, 4, 5)) print("Using toString() function:") print(vector_to_string) # Using sprintf() function formatted_string <- sprintf("Name: %s, Age: %d, Salary: %.2f", "Alice", 25, 50000.75) print("Using sprintf() function:") print(formatted_string)
4. Performing Statistics with R (I):
R Programming
# Creating a numeric vector data <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) # Mean mean_value <- mean(data) print(paste("Mean:", mean_value)) # Median median_value <- median(data) print(paste("Median:", median_value)) # Standard Deviation sd_value <- sd(data) print(paste("Standard Deviation:", sd_value)) # Variance variance_value <- var(data) print(paste("Variance:", variance_value)) # Minimum min_value <- min(data) print(paste("Minimum:", min_value)) # Maximum max_value <- max(data) print(paste("Maximum:", max_value)) # Sum sum_value <- sum(data) print(paste("Sum:", sum_value)) # Summary statistics summary_statistics <- summary(data) print("Summary Statistics:") print(summary_statistics)
R Programming
# Load necessary library # install.packages("datasets") library(datasets) # Linear Regression # Using built-in dataset `mtcars` linear_model <- lm(mpg ~ wt, data = mtcars) print("Linear Regression Summary:") print(summary(linear_model)) # Plotting the linear regression plot(mtcars$wt, mtcars$mpg, main = "Linear Regression", xlab = "Weight (1000 lbs)", ylab = "Miles/(US) gallon") abline(linear_model, col = "red") # Multiple Regression multiple_model <- lm(mpg ~ wt + hp + disp, data = mtcars) print("Multiple Regression Summary:") print(summary(multiple_model)) # Plotting the multiple regression (only one variable at a time can be plotted directly) par(mfrow = c(2, 2)) # Plot diagnostics in a 2x2 layout plot(multiple_model)
5. Performing Statistics with R (II):
R Programming
# Normal Distribution # dnorm() - Density function x <- seq(-3, 3, by = 0.1) density_values <- dnorm(x, mean = 0, sd = 1) print("Density values for standard normal distribution:") print(density_values) # Plotting the density function plot(x, density_values, type = "l", main = "Density Function (dnorm)", xlab = "x", ylab = "Density") # pnorm() - Cumulative distribution function cumulative_values <- pnorm(x, mean = 0, sd = 1) print("Cumulative distribution values for standard normal distribution:") print(cumulative_values) # Plotting the cumulative distribution function plot(x, cumulative_values, type = "l", main = "Cumulative Distribution Function (pnorm)", xlab = "x", ylab = "Probability") # qnorm() - Quantile function quantiles <- qnorm(seq(0, 1, by = 0.1), mean = 0, sd = 1) print("Quantiles for standard normal distribution:") print(quantiles) # Generating random numbers using rnorm() random_numbers <- rnorm(1000, mean = 0, sd = 1) print("Random numbers from standard normal distribution:") print(head(random_numbers)) # Plotting histogram of random numbers hist(random_numbers, breaks = 30, main = "Histogram of Random Numbers (rnorm)", xlab = "Value", ylab = "Frequency")
R Programming
# Binomial Distribution # Parameters for the binomial distribution size <- 10 # Number of trials prob <- 0.5 # Probability of success # dbinom() - Density function x <- 0:size density_values <- dbinom(x, size = size, prob = prob) print("Density values for binomial distribution:") print(density_values) # Plotting the density function plot(x, density_values, type = "h", main = "Density Function (dbinom)", xlab = "Number of successes", ylab = "Probability") # pbinom() - Cumulative distribution function cumulative_values <- pbinom(x, size = size, prob = prob) print("Cumulative distribution values for binomial distribution:") print(cumulative_values) # Plotting the cumulative distribution function plot(x, cumulative_values, type = "s", main = "Cumulative Distribution Function (pbinom)", xlab = "Number of successes", ylab = "Cumulative Probability") # qbinom() - Quantile function quantiles <- qbinom(seq(0, 1, by = 0.1), size = size, prob = prob) print("Quantiles for binomial distribution:") print(quantiles) # Generating random numbers using rbinom() random_numbers <- rbinom(1000, size = size, prob = prob) print("Random numbers from binomial distribution:") print(head(random_numbers)) # Plotting histogram of random numbers hist(random_numbers, breaks = size + 1, main = "Histogram of Random Numbers (rbinom)", xlab = "Number of successes", ylab = "Frequency")
6. Data Visualization and Analysis:
[Hint: Contingency Tables, Selection of Parts, Conversion, Complex Tables, Cross Tabulation]
[Hint: Plots, Special Plots, Storing Graphics]
R Programming
# Load necessary library # install.packages("datasets") library(datasets) # Contingency Tables data("mtcars") contingency_table <- table(mtcars$cyl, mtcars$gear) print("Contingency Table (Cylinders vs Gears):") print(contingency_table) # Selection of Parts from a table selected_parts <- contingency_table[1:2, ] print("Selected Parts from Contingency Table (First two rows):") print(selected_parts) # Conversion of table to data frame table_as_dataframe <- as.data.frame(contingency_table) print("Contingency Table as Data Frame:") print(table_as_dataframe) # Complex Tables complex_table <- table(mtcars$cyl, mtcars$gear, mtcars$carb) print("Complex Table (Cylinders vs Gears vs Carburetors):") print(complex_table) # Cross Tabulation cross_tabulation <- xtabs(~ cyl + gear + carb, data = mtcars) print("Cross Tabulation (Cylinders vs Gears vs Carburetors):") print(cross_tabulation)
SQL
# Basic Plots data("mtcars") # Scatter Plot plot(mtcars$wt, mtcars$mpg, main = "Scatter Plot", xlab = "Weight", ylab = "Miles per Gallon") # Histogram hist(mtcars$mpg, main = "Histogram of MPG", xlab = "Miles per Gallon", breaks = 10) # Box Plot boxplot(mpg ~ cyl, data = mtcars, main = "Box Plot", xlab = "Number of Cylinders", ylab = "Miles per Gallon") # Special Plots # Bar Plot barplot(table(mtcars$cyl), main = "Bar Plot of Cylinder Counts", xlab = "Number of Cylinders", ylab = "Frequency") # Pie Chart pie(table(mtcars$cyl), main = "Pie Chart of Cylinder Counts", labels = names(table(mtcars$cyl))) # Storing Graphics # Save plots to files # Save Scatter Plot png("scatter_plot.png") plot(mtcars$wt, mtcars$mpg, main = "Scatter Plot", xlab = "Weight", ylab = "Miles per Gallon") dev.off() # Save Histogram png("histogram.png") hist(mtcars$mpg, main = "Histogram of MPG", xlab = "Miles per Gallon", breaks = 10) dev.off() # Save Box Plot png("boxplot.png") boxplot(mpg ~ cyl, data = mtcars, main = "Box Plot", xlab = "Number of Cylinders", ylab = "Miles per Gallon") dev.off()
7. Object-Oriented Programming in R:
R Programming
S3 Classes
This code demonstrates the creation of an S3 class called person and a custom print method for the class. The create_person function creates an object of the class, and the print.person function defines how objects of this class should be printed.
S3 Classes# Define an S3 class 'person' create_person <- function(name, age) { person <- list(name = name, age = age) class(person) <- "person" return(person) } # Define a method for the 'print' generic function for 'person' class print.person <- function(x) { cat("Name:", x$name, "\n") cat("Age:", x$age, "\n") } # Create an object of class 'person' person1 <- create_person("Alice", 25) # Use the print method for the 'person' class print(person1)
S4 Classes
This code demonstrates the creation of an S4 class called Person with slots for name and age. The setMethod function defines how objects of this class should be displayed using the show method.
S4 Classes# Define an S4 class 'Person' setClass("Person", slots = list( name = "character", age = "numeric" )) # Define a method for the 'show' generic function for 'Person' class setMethod("show", "Person", function(object) { cat("Name:", object@name, "\n") cat("Age:", object@age, "\n") }) # Create an object of class 'Person' person2 <- new("Person", name = "Bob", age = 30) # Use the show method for the 'Person' class show(person2)
R Programming
# Define a reference class 'PersonRef' PersonRef <- setRefClass( "PersonRef", fields = list( name = "character", age = "numeric" ), methods = list( initialize = function(name, age) { name <<- name age <<- age }, get_name = function() { return(name) }, get_age = function() { return(age) }, set_name = function(new_name) { name <<- new_name }, set_age = function(new_age) { age <<- new_age }, print_info = function() { cat("Name:", name, "\n") cat("Age:", age, "\n") } ) ) # Create an object of the reference class 'PersonRef' person3 <- PersonRef$new(name = "Charlie", age = 35) # Use methods of the 'PersonRef' class person3$print_info() person3$set_name("David") person3$set_age(40) person3$print_info()
8. Data Interfaces in R:
[Hint: creating data for CSV, analyzing, writing CSV files]
[Hint: installing, loading, verifying, creating data for xlsx file]
[Develop data interface for maintaining Employee Information]
R Programming
# Creating data for CSV employee_data <- data.frame( ID = 1:5, Name = c("Alice", "Bob", "Charlie", "David", "Eve"), Age = c(25, 30, 35, 40, 45), Department = c("HR", "Finance", "IT", "Marketing", "Sales") ) # Writing data to a CSV file write.csv(employee_data, "employee_data.csv", row.names = FALSE) # Reading data from a CSV file read_data <- read.csv("employee_data.csv") # Analyzing the data print("Summary of the data:") print(summary(read_data)) print("Structure of the data:") print(str(read_data)) # Example analysis: Mean age of employees mean_age <- mean(read_data$Age) print(paste("Mean age of employees:", mean_age))
R Programming
# Installing and loading the 'openxlsx' package (uncomment if needed) # install.packages("openxlsx") library(openxlsx) # Creating data for xlsx file employee_data <- data.frame( ID = 1:5, Name = c("Alice", "Bob", "Charlie", "David", "Eve"), Age = c(25, 30, 35, 40, 45), Department = c("HR", "Finance", "IT", "Marketing", "Sales") ) # Writing data to an xlsx file write.xlsx(employee_data, "employee_data.xlsx") # Reading data from an xlsx file read_data <- read.xlsx("employee_data.xlsx") # Verifying the data print("Read data from xlsx file:") print(read_data)
R Programming
# Installing and loading the 'XML' package (uncomment if needed) # install.packages("XML") library(XML) # Creating an XML structure for employee information employee_data <- xmlTree("Employees") employee_data$addTag("Employee", close = FALSE, ID = "1", Name = "Alice", Age = "25", Department = "HR") employee_data$closeTag() employee_data$addTag("Employee", close = FALSE, ID = "2", Name = "Bob", Age = "30", Department = "Finance") employee_data$closeTag() employee_data$addTag("Employee", close = FALSE, ID = "3", Name = "Charlie", Age = "35", Department = "IT") employee_data$closeTag() # Save the XML to a file saveXML(employee_data, file = "employee_data.xml") # Read and parse the XML file parsed_data <- xmlParse("employee_data.xml") root_node <- xmlRoot(parsed_data) # Extract and display employee information print("Employee Information from XML:") for (i in 1:xmlSize(root_node)) { employee <- root_node[[i]] print(xmlAttrs(employee)) }
R Programming
# Installing and loading the 'RMySQL' package (uncomment if needed) # install.packages("RMySQL") library(RMySQL) # Connecting to a MySQL database con <- dbConnect(MySQL(), user = 'username', password = 'password', dbname = 'database_name', host = 'localhost') # Listing tables in the database tables <- dbListTables(con) print("Tables in the database:") print(tables) # Writing data to a MySQL table employee_data <- data.frame( ID = 1:5, Name = c("Alice", "Bob", "Charlie", "David", "Eve"), Age = c(25, 30, 35, 40, 45), Department = c("HR", "Finance", "IT", "Marketing", "Sales") ) dbWriteTable(con, name = "employees", value = employee_data, row.names = FALSE, overwrite = TRUE) # Reading data from a MySQL table read_data <- dbReadTable(con, "employees") print("Read data from MySQL table:") print(read_data) # Performing a query query_result <- dbGetQuery(con, "SELECT * FROM employees WHERE Age > 30") print("Query result (Age > 30):") print(query_result) # Closing the connection dbDisconnect(con)
9. Handling Errors in R:
R Programming
# Demonstrating various error messages in R # Syntax error tryCatch(eval(parse(text = "x <- 1+")), error = function(e) print(paste("Syntax Error:", e$message))) # Object not found error tryCatch(print(y), error = function(e) print(paste("Object Not Found Error:", e$message))) # Division by zero error tryCatch({ result <- 1 / 0 print(result) }, error = function(e) print(paste("Division by Zero Error:", e$message))) # Invalid subscript type tryCatch({ vec <- c(1, 2, 3) print(vec["a"]) }, error = function(e) print(paste("Invalid Subscript Type Error:", e$message))) # Non-numeric argument to binary operator tryCatch({ result <- "a" + 1 print(result) }, error = function(e) print(paste("Non-numeric Argument Error:", e$message)))
R Programming
# Function to demonstrate warning and stop demonstrate_warning_stop <- function(x) { if (x < 0) { warning("Warning: x is less than 0") } else if (x == 0) { stop("Error: x is zero, stopping execution") } else { return(sqrt(x)) } } # Handling warnings and errors using try and tryCatch try_example <- function(x) { result <- try(demonstrate_warning_stop(x), silent = TRUE) if (inherits(result, "try-error")) { print("An error occurred.") } else { print(paste("Result:", result)) } } tryCatch_example <- function(x) { tryCatch({ result <- demonstrate_warning_stop(x) print(paste("Result:", result)) }, warning = function(w) { print(paste("Warning handled:", w$message)) }, error = function(e) { print(paste("Error handled:", e$message)) }) } # Demonstrate usage of callingHandlers calling_handlers_example <- function(x) { withCallingHandlers({ result <- demonstrate_warning_stop(x) print(paste("Result:", result)) }, warning = function(w) { print(paste("Warning handled in calling handler:", w$message)) invokeRestart("muffleWarning") }) } # Testing the functions print("Testing with x = 4") try_example(4) tryCatch_example(4) calling_handlers_example(4) print("\nTesting with x = 0") try_example(0) tryCatch_example(0) calling_handlers_example(0) print("\nTesting with x = -2") try_example(-2) tryCatch_example(-2) calling_handlers_example(-2)
10. Measuring Performance in R:
SQL
# Installing and loading the 'microbenchmark' package (uncomment if needed) # install.packages("microbenchmark") library(microbenchmark) # Define two functions to compare performance # Function 1: Using a for loop to sum numbers sum_for_loop <- function(n) { total <- 0 for (i in 1:n) { total <- total + i } return(total) } # Function 2: Using vectorized sum function sum_vectorized <- function(n) { return(sum(1:n)) } # Number of iterations n <- 100000 # Measure performance using microbenchmark benchmark_result <- microbenchmark( sum_for_loop(n), sum_vectorized(n), times = 100 ) # Print the benchmark results print(benchmark_result) # Summary of the benchmark results summary_result <- summary(benchmark_result) print(summary_result) # Plot the benchmark results library(ggplot2) autoplot(benchmark_result)