View Single Post
#4
 dlammerts Junior Member Join Date: Apr 2013 Location: San Mateo, CA Posts: 6 Re: *ANSWER* HW4 #4: graphical hint (ok, more than a hint)

Juan et al.,
Here is the R code. It plots the graph, prints slope and intercept, and also calculates bias and variance. You will see three code sections - one for each of the hypothesis classes discussed in class and in this homework:
h(x) = b (class)
h(x) = ax+b (class)
h(x) = ax (HW)

Dirk

#############################################

# Caltech Machine Learning April 2013
# HW4 - Bias and variance (problems #4-7)

# Clear workspace
rm(list = ls())

# Set seed for random number generation
set.seed(2013)

# Set sample size
N_sample = 1000

# Create sample and estimate expected value for the hypothesis using h(x) = b (same as in lecture example 1)
# Initialize variable to store regression parameter for regression h(x) = ax+b (slope a will be forced to zero)
a <- NA
b <- NA
variance <- NA
# Plot f(x)
plot.new()
# Plot f(x) = sin(pi*x)
curve(sin(pi*x), -1, 1, main='h(x) = b', col="black")
for (i in 1:N_sample) {
x_values <- runif(2, -1.0, 1.0)
data_set <- data.frame(x = x_values, y = sin(pi * x_values))
a[i] <- 0.0
b[i] <- 0.5 * (data_set[1,"y"] + data_set[2,"y"])
# Calculate variance for the specific hypothesis for this particular data set using g_bar(x) = 0.0
integrand <- function(x) {(0.0-a[i]*x+b[i])^2}
# Dividing definitive integral by 2 to calculate exepcted value relative to x since range of x is [-1,1]
variance[i] <- 0.5 * (integrate(integrand,-1,1)\$value)
# Plot current h(x)
abline(b[i], a[i], col="grey75")
}
# Plot expected value for hypothesis g
abline(mean(b), mean(a), col="black")
# Plot f(x) = sin(pi*x) again to overlay on top of chart
par(new=T)
curve(sin(pi*x), -1, 1, main='h(x) = b', col="black")
# Print average regression parameters as exepcted value for hypothesis g
mean(a)
mean(b)
# Calculate bias using g_bar(x) = 0.0
integrand <- function(x) {(0.0-sin(pi*x))^2}
# Dividing definitive integral by 2 to calculate exepcted value relative to x since range of x is [-1,1]
bias <- 0.5 * (integrate(integrand,-1,1)\$value)
bias
# Calculate variance using g_bar(x) = 0.77*x
# Average over data set specific variances
mean(variance)

# Create sample and estimate expected value for the hypothesis using h(x) = ax+b (same as in lecture example 2)
# Initialize variable to store regression parameter for regression h(x) = ax+b
a <- NA
b <- NA
variance <- NA
# Plot f(x)
plot.new()
# Plot f(x) = sin(pi*x)
curve(sin(pi*x), -1, 1, main='h(x) = ax+b', col="black")
for (i in 1:N_sample) {
x_values <- runif(2, -1.0, 1.0)
data_set <- data.frame(x = x_values, y = sin(pi * x_values))
regression_params <- lm(formula = data_set\$y ~ data_set\$x)
a[i] <- regression_params\$coefficients
b[i] <- regression_params\$coefficients
# Calculate variance for the specific hypothesis for this particular data set using g_bar(x) = 0.77*x
integrand <- function(x) {(0.77*x-a[i]*x+b[i])^2}
# Dividing definitive integral by 2 to calculate exepcted value relative to x since range of x is [-1,1]
variance[i] <- 0.5 * (integrate(integrand,-1,1)\$value)
# Plot current h(x)
abline(b[i], a[i], col="grey75")
}
# Plot expected value for hypothesis g
abline(mean(b), mean(a), col="black")
# Plot f(x) = sin(pi*x) again to overlay on top of chart
par(new=T)
curve(sin(pi*x), -1, 1, main='h(x) = ax+b', col="black")
# Print average regression parameters as exepcted value for hypothesis g
mean(a)
mean(b)
# Calculate bias using g_bar(x) = 0.77*x
integrand <- function(x) {(0.77*x-sin(pi*x))^2}
# Dividing definitive integral by 2 to calculate exepcted value relative to x since range of x is [-1,1]
bias <- 0.5 * (integrate(integrand,-1,1)\$value)
bias
# Calculate variance using g_bar(x) = 0.77*x
# Average over data set specific variances
mean(variance)

# Create sample and estimate expected value for the hypothesis using h(x) = ax (HW problem)
# Initialize variable to store regression parameter for regression h(x) = ax+b (intercept b will be forced to zero)
a <- NA
b <- NA
variance <- NA
# Plot f(x)
plot.new()
# Plot f(x) = sin(pi*x)
curve(sin(pi*x), -1, 1, main='h(x) = ax', col="black")
for (i in 1:N_sample) {
x_values <- runif(2, -1.0, 1.0)
data_set <- data.frame(x = x_values, y = sin(pi * x_values))
# Force intercept to zero through y ~ 0 + x model term
regression_params <- lm(formula = data_set\$y ~ 0 + data_set\$x)
a[i] <- regression_params\$coefficients
b[i] <- 0.0
# Calculate variance for the specific hypothesis for this particular data set using g_bar(x) = 1.40*x
integrand <- function(x) {(1.40*x-a[i]*x+b[i])^2}
# Dividing definitive integral by 2 to calculate exepcted value relative to x since range of x is [-1,1]
variance[i] <- 0.5 * (integrate(integrand,-1,1)\$value)
# Plot current h(x)
abline(b[i], a[i], col="grey75")
}
# Plot expected value for hypothesis g
abline(mean(b), mean(a), col="black")
# Plot f(x) = sin(pi*x) again to overlay on top of chart
par(new=T)
curve(sin(pi*x), -1, 1, main='h(x) = ax', col="black")
# Print average regression parameters as exepcted value for hypothesis g
mean(a)
mean(b)
# Calculate bias using g_bar(x) = 1.40*x
integrand <- function(x) {(1.40*x-sin(pi*x))^2}
# Dividing definitive integral by 2 to calculate exepcted value relative to x since range of x is [-1,1]
bias <- 0.5 * (integrate(integrand,-1,1)\$value)
bias
# Calculate variance using g_bar(x) = 1.40*x
# Average over data set specific variances
mean(variance)