Modeling

OLS regression

##make a regression
sim1_mod <- lm(y~x,sim1)

##extract the result
fitted(sim1_mod) #derive the predicted values
summary(sim1_mod)$residual #derive the residuals
summary(sim1_mod)$coefficients[1] #derive the predicted intercept
summary(sim1_mod)$coefficients[2] #detive the predicted slope
summary(sim1_mod)$sigma #derive the standard error
confint(sim1_mod, "x", level=0.95) 
confint(sim1_mod, "x", level=0.95)[1]
confint(sim1_mod, "x", level=0.95)[2] #derive the confidence interval

##draw a fitted plot
ggplot(sim1,aes(x,y)) +
  geom_point() +
  geom_smooth(method = "lm",se = F)

##repetition
toy_slope <- rerun(100,summary(lm(y~x,tibble(x=rnorm(10),y=rnorm(10))))$coefficients[2]) #repeat 100 times and extract all the slopes
#here the dataset is not allowed to form a new one as the target value will be fixed
as_vector(toy_slope) #transform it into a vector,and its mean or variance can be calculated

##visualization
toy_slope <-
  rerun(1000,summary(lm(y~x,tibble(x=rnorm(100),y=rnorm(100))))$coefficients[2]) %>%
  as_vector()
ggplot(data.frame(toy_slope)) +
  geom_histogram(aes(toy_slope),binwidth = 0.01)

Behind are "sluttish" orders 

##basics
lm(y~x,sim1) #make a regression with y towards x from dataset `sim1`

#`add_predictions()` adds predictions
#`add_residuals()` adds residuals
#for example
sim1_mod <- lm(y~x,sim1)
sim1 %>% 
  data_grid(x) %>%  #`data_grid()` extracts the unique values of x
  add_predictions(sim1_mod) 
sim1 %>% 
  add_residuals(sim1_mod)

##interactions(one CA and one CO)

#show effects of different models
mod1 <- lm(y~x1+x2,sim3)
mod2 <- lm(y~x1*x2,sim3)
grid <- sim3 %>%
  data_grid(x1,x2) %>%
  gather_predictions(mod1,mod2)

ggplot(sim3,aes(x1,y,color=x2)) +
  geom_point() +
  geom_line(data=grid,aes(y=pred)) +
  facet_wrap(~model)

##interactions(two COs)

#`seq_range()` generates values between the smallest value and the biggest by a fixed distance
> seq_range(c(0.0123,0.9871),n=5)
[1] 0.0123 0.2560 0.4997 0.7434 0.9871
> seq_range(c(0.0123,1.9871),n=5, pretty=T) #make values "pretty"
[1] 0.0 0.5 1.0 1.5 2.0
> seq_range(c(0.0123,1.9871),n=5, trim=0.1) #trim the range by 10%
[1] 0.11104 0.55537 0.99970 1.44403 1.88836
> seq_range(c(0.0123,1.9871),n=5, expand=0.1) #expand the range by 10%
[1] -0.08644  0.45663  0.99970  1.54277  2.08584

#predict
mod1 <- lm(y~x1+x2,sim4)
mod2 <- lm(y~x1*x2,sim4)
grid <- sim4 %>%
  data_grid(
    x1 = seq_range(x1,5),
    x2 = seq_range(x2,5)
    ) %>%
  gather_predictions(mod1,mod2)

猜你喜欢

转载自blog.csdn.net/weixin_51674826/article/details/117111804