# We will examine the interest rate for four year car loans, # and the data that we use comes from the U.S. Federal Reserve's mean rates. # # Each of the five pairs consists of a year and the mean interest rate: year <- c(2000 , 2001 , 2002 , 2003 , 2004) rate <- c(9.34 , 8.50 , 7.62 , 6.93 , 6.60) # plot(year,rate,main="Commercial Banks Interest Rate for 4 Year Car Loan",) cor(year,rate) # fit <- lm(rate ~ year) fit summary(fit) # attributes(fit) fit$coefficients[1]; fit$coefficients[[1]] fit$coefficients[2]; fit$coefficients[[2]] fit$fitted.values fit$df.residual # (n-2) for the Simple Linear Model # plot(fit) # fit$coefficients[[2]]*2015+fit$coefficients[[1]] # # # A better use for this formula would be to calculate the residuals and plot them: res <- rate - (fit$coefficients[[2]]*year+fit$coefficients[[1]]) res plot(year,res) residuals(fit) # # If you want to plot the regression line on the same plot # as your scatter plot you can use the abline function along # with your variable fit: # plot(year,rate, main="Commercial Banks Interest Rate for 4 Year Car Loan", sub="http://www.federalreserve.gov/releases/g19/20050805/") abline(fit) # summary(fit) anova(fit) # # MANUAL ESTIMATION # x=as.matrix( cbind(rep(1,5),year) ); x # DESIGN MATRIX y=rate; y # RESPONSES # Parameters Estimation solve( t(x) %*% x ) %*% t(x) %*% y # Estimate of sigma^2 sigma=sum(fit$res^2)/(5-2); sigma; sqrt(sigma) # Covariance matrix covar=solve( t(x) %*% x ) * sigma; covar sqrt( covar[1,1] ) sqrt( covar[2,2] ) # R-squared SST = sum( ( rate - mean(rate) )^2 ) SSR = sum( ( mean(rate) - predict(fit,as.data.frame(year) ) )^2 ) R2=SSR/SST; R2 # # ### BINARY COVARIATE ### y1=rnorm(100,160,5); x1=rep(0,100) y2=rnorm(100,180,10); x2=rep(1,100) y=c(y1,y2); x=c(x1,x2) plot(x,y) # # # DATA HILLS: The record times in 1984 for 35 Scottish hill races # dist: distance in miles (on the map). # climb: total height gained during the route, in feet. # time: record time in minutes # library(MASS) data(hills) hills hills.lm=lm(time~dist+climb,data=hills) summary(hills.lm) fitted(hills.lm) studres(hills.lm) # STANDARDISED RESIDUALS plot(fitted(hills.lm),studres(hills.lm)) qqnorm(studres(hills.lm)) qqline(studres(hills.lm)) # # DATA CLOUDS : Data from an experiment investigating the use of # massive amounts of silver iodide (100 to 1000grams per cloud) # in cloud seeding to increase rainfall. # A data frame with 24 observations on the following 7 variables. # seeding: a factor indicating whether seeding action occured (no or yes). # time: number of days after the first day of the experiment. # sne: suitability criterion. # cloudcover: the percentage cloud cover in the experimental area # prewetness: the total rainfall in the target area one hour before seeding (in cubic metres times 1e+8). # echomotion: a factor showing whether the radar echo was moving or stationary. # rainfall: the amount of rain in cubic metres times 1e+8. # install.packages("HSAUR") data("clouds",package="HSAUR") clouds head(clouds) # boxplot(rainfall~seeding,data=clouds,ylab="Rainfall",xlab="Seeding") boxplot(rainfall~echomotion,data=clouds,ylab="Rainfall",xlab="Echo Motion") # plot(rainfall~time,data=clouds) plot(rainfall~cloudcover,data=clouds) plot(rainfall~sne,data=clouds) plot(rainfall~prewetness,data=clouds) # timelm=lm(rainfall~time,data=clouds); summary(timelm) snelm=lm(rainfall~sne,data=clouds); summary(snelm) cclm=lm(rainfall~cloudcover,data=clouds); summary(cclm) plm=lm(rainfall~prewetness,data=clouds); summary(plm) slm=lm(rainfall~seeding,data=clouds); summary(slm) echolm=lm(rainfall~echomotion,data=clouds); summary(echolm) # cllm=lm(rainfall~time+sne+cloudcover+prewetness+seeding+echomotion,data=clouds); summary(cllm) vcov(cllm) # psymb=as.numeric(clouds$seeding) plot(rainfall~sne,data=clouds,pch=psymb,xlab="S-Ne Criterion") abline(lm(rainfall~sne,data=clouds,subset= seeding=="no")) abline(lm(rainfall~sne,data=clouds,subset= seeding=="yes"),lty=2) legend("topright",legend=c("No Seeding","Seeding"),pch=1:2,lty=1:2,bty="n") # Generate Data n = 100 x = rnorm(n,10,5) b0 = 15; b1=3 e = rnorm(n,0,5) y = b0 + b1 * x + e plot(x,y) # Linear Model model = lm(y~x) summary( model ) anova( model ) # Plots plot(x,y) abline( model,col=2 ) for(j in 1:10){ i=round(runif(1,1,n)); segments( x[i],y[i], x[i], (model$coef[1]+model$coef[2]*x[i]),col=4) } abline(b0,b1,col=3) # Residuals plot(model$res) abline(0,0,col=2) # QQ Plot (normal) qqnorm(model$res) qqline(model$res,col=2)