1 Exploratory data analysis

#-----------------------------------------------------------------------
# Load packages.

# library(lattice)
# library(latticeExtra)
library(gridExtra)
library(tidyverse)
library(ggpmisc)

theme_set(theme_light())
#-----------------------------------------------------------------------
# Load pre-processed data.

rm(list = ls())
load(file = "high_soybean.RData")
str(high_soybean)
## List of 4
##  $ atrib :'data.frame':  394 obs. of  39 variables:
##   ..$ faz     : chr [1:394] "FAZ. ARGEMIRA" "FAZ. ARGEMIRA" "FAZ. ARGEMIRA" "FAZ. ARGEMIRA" ...
##   ..$ tal     : chr [1:394] "AG2" "AG2" "AG2" "AG2" ...
##   ..$ cam     : chr [1:394] "0-0.1" "0-0.1" "0.1-0.2" "0.1-0.2" ...
##   ..$ rpt     : int [1:394] 1 2 1 2 1 2 1 2 1 2 ...
##   ..$ prod    : num [1:394] 3.89 NA NA NA NA ...
##   ..$ glic    : num [1:394] 275 NA NA NA NA ...
##   ..$ sulf    : num [1:394] 200 NA NA NA NA ...
##   ..$ phos    : num [1:394] 922 NA NA NA NA ...
##   ..$ FLF     : num [1:394] 14.4 NA NA NA NA ...
##   ..$ OLF     : num [1:394] 6.21 NA NA NA NA NA 3.1 NA NA NA ...
##   ..$ MO2     : num [1:394] 45.5 NA NA NA NA ...
##   ..$ HF      : num [1:394] 24.8 NA NA NA NA NA 30.1 NA NA NA ...
##   ..$ rp      : num [1:394] 2.09 1.75 2.39 2.9 2.35 1.14 0.54 1.26 0.94 2.65 ...
##   ..$ dens    : num [1:394] 1.23 1.19 1.28 1.3 1.23 1.2 1.01 1.08 1.15 1.24 ...
##   ..$ poro_e  : num [1:394] 0.53 0.54 0.53 0.52 0.5 0.51 0.61 0.59 0.57 0.53 ...
##   ..$ clay    : int [1:394] 653 NA 725 NA 775 NA 644 NA 724 NA ...
##   ..$ sand    : int [1:394] 162 NA 139 NA 120 NA 175 NA 138 NA ...
##   ..$ pH_H2O  : num [1:394] 6.2 NA 5.3 NA 5.1 NA 6.1 NA 5.2 NA ...
##   ..$ pH_CaCl2: num [1:394] 5.4 NA 4.5 NA 4.4 NA 5.2 NA 4.5 NA ...
##   ..$ P       : num [1:394] 16.4 NA 2.1 NA 0.9 NA 14.4 NA 1.7 NA ...
##   ..$ K       : num [1:394] 97.3 NA 76.9 NA 67.7 NA 78.6 NA 25.6 NA ...
##   ..$ Ca_Mg   : num [1:394] 5.5 NA 2.1 NA 1.1 NA 5.1 NA 1.8 NA ...
##   ..$ Ca      : num [1:394] 4 NA 1.5 NA 0.8 NA 3.7 NA 1.3 NA ...
##   ..$ Mg      : num [1:394] 1.5 NA 0.6 NA 0.3 NA 1.4 NA 0.5 NA ...
##   ..$ Al      : num [1:394] 0 NA 0.4 NA 0.5 NA 0 NA 0.4 NA ...
##   ..$ MO      : num [1:394] 43.3 NA 31.2 NA 21.3 NA 44.5 NA 29.5 NA ...
##   ..$ base    : num [1:394] 5.75 NA 2.3 NA 1.27 NA 5.3 NA 1.87 NA ...
##   ..$ CTC     : num [1:394] 10.15 NA 7.6 NA 5.77 ...
##   ..$ V       : num [1:394] 56.6 NA 30.2 NA 22.1 ...
##   ..$ CaMg    : num [1:394] 2.67 NA 2.5 NA 2.67 NA 2.64 NA 2.6 NA ...
##   ..$ CaK     : num [1:394] 16.03 NA 7.61 NA 4.61 ...
##   ..$ MgK     : num [1:394] 6.01 NA 3.04 NA 1.73 NA 6.95 NA 7.62 NA ...
##   ..$ Ca_     : num [1:394] 39.4 NA 19.7 NA 13.9 NA 36.6 NA 17.9 NA ...
##   ..$ Mg_     : num [1:394] 14.8 NA 7.9 NA 5.2 NA 13.9 NA 6.9 NA ...
##   ..$ K_      : num [1:394] 2.5 NA 2.6 NA 3 NA 2 NA 0.9 NA ...
##   ..$ H_      : num [1:394] 43.4 NA 64.5 NA 69.3 NA 47.5 NA 68.8 NA ...
##   ..$ m_      : num [1:394] 0 NA 14.8 NA 28.2 NA 0 NA 17.7 NA ...
##   ..$ S       : num [1:394] 21.1 NA 33.9 NA 56 NA 10.8 NA 32.7 NA ...
##   ..$ dp      : num [1:394] 2.59 2.59 2.73 2.73 2.45 2.45 2.63 2.63 2.65 2.65 ...
##  $ swrc  :'data.frame':  390 obs. of  15 variables:
##   ..$ faz  : chr [1:390] "FAZ. ARGEMIRA" "FAZ. ARGEMIRA" "FAZ. ARGEMIRA" "FAZ. ARGEMIRA" ...
##   ..$ tal  : chr [1:390] "AG2" "AG2" "AG2" "AG2" ...
##   ..$ cam  : chr [1:390] "0-0.1" "0-0.1" "0.1-0.2" "0.1-0.2" ...
##   ..$ rpt  : int [1:390] 1 2 1 2 1 2 1 2 1 2 ...
##   ..$ S0   : num [1:390] 0.441 0.445 0.39 0.381 0.421 0.437 0.579 0.518 0.477 0.402 ...
##   ..$ S1   : num [1:390] 0.425 0.418 0.35 0.367 0.396 0.41 0.527 0.501 0.431 0.398 ...
##   ..$ S2   : num [1:390] 0.402 0.404 0.347 0.358 0.387 0.408 0.457 0.459 0.422 0.389 ...
##   ..$ S4   : num [1:390] 0.376 0.384 0.34 0.349 0.379 0.403 0.407 0.422 0.395 0.378 ...
##   ..$ S6   : num [1:390] 0.366 0.368 0.335 0.34 0.369 0.394 0.389 0.406 0.384 0.366 ...
##   ..$ S8   : num [1:390] 0.357 0.361 0.328 0.331 0.359 0.381 0.376 0.4 0.376 0.359 ...
##   ..$ S10  : num [1:390] 0.354 0.356 0.321 0.325 0.352 0.37 0.355 0.395 0.371 0.352 ...
##   ..$ S33  : num [1:390] 0.314 0.319 0.286 0.283 0.294 0.29 0.314 0.359 0.331 0.302 ...
##   ..$ S66  : num [1:390] 0.296 0.314 0.283 0.276 0.287 0.282 0.291 0.304 0.319 0.284 ...
##   ..$ S300 : num [1:390] 0.27 0.287 0.253 0.253 0.261 0.259 0.272 0.297 0.284 0.269 ...
##   ..$ S1500: num [1:390] 0.223 0.23 0.225 0.222 0.237 0.242 0.261 0.245 0.284 0.264 ...
##  $ fract :'data.frame':  390 obs. of  10 variables:
##   ..$ faz         : chr [1:390] "FAZ. ARGEMIRA" "FAZ. ARGEMIRA" "FAZ. ARGEMIRA" "FAZ. ARGEMIRA" ...
##   ..$ tal         : chr [1:390] "AG2" "AG2" "AG2" "AG2" ...
##   ..$ cam         : chr [1:390] "0-0.1" "0-0.1" "0.1-0.2" "0.1-0.2" ...
##   ..$ rpt         : int [1:390] 1 2 1 2 1 2 1 2 1 2 ...
##   ..$ lightfree   : num [1:390] 14.4 NA NA NA NA ...
##   ..$ filter1     : num [1:390] 0.13 NA NA NA NA ...
##   ..$ filterC1    : num [1:390] 0.274 NA NA NA NA ...
##   ..$ lightocluded: num [1:390] 6.21 NA NA NA NA NA 3.1 NA NA NA ...
##   ..$ filter2     : num [1:390] 0.128 NA NA NA NA ...
##   ..$ filterC2    : num [1:390] 0.19 NA NA NA NA ...
##  $ legend:List of 4
##   ..$ abbrev      : chr [1:40] "prod" "glic" "phos" "sulf" ...
##   ..$ short_label : chr [1:40] "Soybean ~ productivity" "beta-glucosidase" "Acid ~ phosphatase" "Arylsulfatase" ...
##   ..$ measure_unit: chr [1:40] "(t~ha^{-1})" "(mu*g~p-nitrophenol~g^{-1}~soil~h^{-1})" "(mu*g~p-nitrophenol~g^{-1}~soil~h^{-1})" "(mu*g~p-nitrophenol~g^{-1}~soil~h^{-1})" ...
##   ..$ long_label  : chr [1:40] "Soybean ~ productivity" "beta-glucosidase" "Acid ~ phosphatase" "Arylsulfatase" ...
#-----------------------------------------------------------------------
# Tidy data to the exploratory data analysis.

# str(high_soybean)
da <- as_tibble(high_soybean$atrib)
# str(da)

# Creates id and recode soil layer.
da <- da %>%
    filter(cam != "") %>%
    mutate(id = gsub(x = paste(faz, tal, sep = "_"),
                     pattern = "[^[:alnum:]_]",
                     replacement = ""),
           cam = fct_recode(cam,
                            "L1" = "0-0.1",
                            "L2" = "0.1-0.2",
                            "L3" = "0.2-0.4"),
           idc = paste(id, cam))
# str(da)

# Rename variable.
da <- da %>% rename(poro = "poro_e")

# Select variables.
# cat(names(da), sep = ", ")
da <- da %>%
    select(
        #------------------------------------ Factor variables.
        id,
        idc,
        faz,
        tal,
        cam,
        rpt,
        #------------------------------------ Response variable.
        prod,
        #------------------------------------ On the 1st layer only.
        glic,
        phos,
        sulf,
        #------------------------------------ Fractions of SOC.
        MO,
        # MO2,
        FLF,
        OLF,
        HF,
        #------------------------------------ Physical variables.
        # dens,
        # dp,
        # rp,
        # poro,
        #------------------------------------ Particle size.
        clay,
        sand,
        #------------------------------------ Chemical variables.
        pH_H2O,
        # pH_CaCl2,
        P,
        K,
        Ca,
        Mg,
        # Ca_Mg,
        Al,
        S,
        # base,
        CTC,
        V,
        #------------------------------------ Relations between cations.
        CaMg,
        CaK,
        MgK,
        #------------------------------------ Relative values of cations.
        Ca_,
        Mg_,
        K_,
        # H_,
        # m_,
    )

# Only observed variables of the experiment.
v <- tail(names(da), n = -6)

# Filter, reshape and aggregate with means.
db <- da %>%
    # filter(poro < 1) %>%               # Outside the range.
    select(id, cam, v) %>%
    gather(key = "variable",
           value = "value",
           all_of(v)) %>%              # Wide to long.
    filter(!is.na(value)) %>%          # Remove missings.
    group_by(id, cam, variable) %>%    # Grouping.
    summarize(value = mean(value)) %>% # Summarise with mean.
    ungroup()

db %>%
    filter(variable == "Ca", id == .$id[1])
## # A tibble: 3 x 4
##   id               cam   variable value
##   <chr>            <fct> <chr>    <dbl>
## 1 FAZARGEMIRA_AG10 L1    Ca         4  
## 2 FAZARGEMIRA_AG10 L2    Ca         1.8
## 3 FAZARGEMIRA_AG10 L3    Ca         0.9
# Names of the observed variables.
v <- names(da)
v <- v[v %in% unique(db$variable)]

# The order to show in the plots.
# dput(v)
v <- c("prod", "glic", "phos", "sulf", "P", "Ca", "Mg", "K", "Ca_",
       "Mg_", "K_", "CaMg", "CaK", "MgK", "pH_H2O", "Al", "CTC", "V",
       "MO", "S", "FLF", "OLF", "HF", "clay", "sand")
stopifnot(all(v%in% unique(db$variable)))

# Convert to a factor.
db <- db %>%
    mutate(variable = factor(variable, levels = v))

# Put the prod in the same table.
db <- inner_join(db,
                 da %>% select(id, prod) %>% filter(!is.na(prod)),
                 by = "id")

# Mean productivity (it is the estimate of the null model and a
# reference value).
mp <- da %>%
    filter(cam == "L1") %>%
    pull(prod) %>%
    mean(na.rm = TRUE)
mp
## [1] 4.128046
# The short names of the variables to be used as strip labels.
labs <- as.data.frame(high_soybean$legend,
                      stringsAsFactors = FALSE) %>%
    as_tibble() #%>% select(abbrev, short_label)
labs$abbrev[labs$abbrev == "poro_e"] <- "poro"
# print(labs, n = Inf)

# Does the matching.
db$short_label <- labs$short_label[match(db$variable, labs$abbrev)]
i <- is.na(db$short_label)
if (any(i)) {
    db$short_label[i] <- as.character(db$variable[i])
}

# Level order.
lev <- db %>%
    select(variable, short_label) %>%
    unique() %>%
    arrange(variable) %>%
    pull(short_label)
db <- db %>%
    mutate(short_label = factor(short_label, levels = lev))
db
## # A tibble: 3,965 x 6
##    id               cam   variable  value  prod short_label     
##    <chr>            <fct> <fct>     <dbl> <dbl> <fct>           
##  1 FAZARGEMIRA_AG10 L1    Al         0     4.01 Al^{+3}         
##  2 FAZARGEMIRA_AG10 L1    Ca         4     4.01 Ca^{+2}         
##  3 FAZARGEMIRA_AG10 L1    Ca_       39.9   4.01 Ca^{+2} ~ ('%') 
##  4 FAZARGEMIRA_AG10 L1    CaK       12.8   4.01 Ca:K ~ ratio    
##  5 FAZARGEMIRA_AG10 L1    CaMg       2.67  4.01 Ca:Mg ~ ratio   
##  6 FAZARGEMIRA_AG10 L1    clay     661     4.01 Clay            
##  7 FAZARGEMIRA_AG10 L1    CTC       10.0   4.01 CTC~pH~7.0      
##  8 FAZARGEMIRA_AG10 L1    FLF       16.0   4.01 FLF             
##  9 FAZARGEMIRA_AG10 L1    glic     346.    4.01 beta-glucosidase
## 10 FAZARGEMIRA_AG10 L1    HF        23.4   4.01 HF              
## # … with 3,955 more rows
# Stratification index.
# dc <- db %>%
#     filter(cam != "L3") %>%
#     spread(key = "cam", value = "value")
# str(dc)

# dc %>%
#     filter(variable == "Al")

# Stratification ratio.
si <- db %>%
    filter(cam != "L3") %>%
    spread(key = "cam", value = "value") %>%
    filter(!is.na(L2)) %>%
    mutate(si = L1/L2) %>%
    group_by(short_label) %>%
    summarise(si_mean = mean(si), si_sd = sd(si), n = n()) %>%
    filter(is.finite(si_mean))
si
## # A tibble: 17 x 4
##    short_label       si_mean  si_sd     n
##    <fct>               <dbl>  <dbl> <int>
##  1 Available ~ P       7.23  5.85      65
##  2 Ca^{+2}             2.43  0.845     65
##  3 Mg^{+2}             2.25  0.776     65
##  4 K^{+1}              2.13  0.682     65
##  5 Ca^{+2} ~ ('%')     1.83  0.629     65
##  6 Mg^{+2} ~ ('%')     1.69  0.559     65
##  7 K^{+1} ~ ('%')      1.61  0.502     65
##  8 Ca:Mg ~ ratio       1.08  0.0847    65
##  9 Ca:K ~ ratio        1.23  0.494     65
## 10 Mg:K ~ ratio        1.14  0.473     65
## 11 pH~H[2]*O           1.12  0.0595    65
## 12 CTC~pH~7.0          1.33  0.161     65
## 13 Base ~ saturation   1.76  0.557     65
## 14 SOM                 1.44  0.245     65
## 15 Sulfur              0.641 0.154     65
## 16 Clay                0.917 0.0562    65
## 17 Sand                1.14  0.0996    65
ggplot(data = db,
       mapping = aes(x = cam,
                     y = value)) +
    geom_boxplot(fill = "gray90") +
    stat_summary(mapping = aes(group = 1),
                 geom = "point",
                 fun = mean,
                 shape = 4) +
    stat_summary(mapping = aes(group = 1),
                 geom = "line",
                 fun = mean) +
    xlab(label = "Soil layer") +
    ylab(label = "Values of each response variable") +
    facet_wrap(facets = ~short_label,
               scales = "free",
               labeller = label_parsed) +
    # theme(strip.text.x = element_text(margin = margin(1, 0, 1, 0, "cm"))) +
    theme_bw() +
    geom_label_npc(data = si,
                   mapping = aes(npcx = 0.35,
                                 npcy = 0.95,
                                 label = sprintf("%0.1f (%0.1f)",
                                                 si_mean, si_sd)),
                   alpha = 0.5,
                   size = 2.5,
                   hjust = 0.5,
                   vjust = 1)

db_stats <- db %>%
    group_by(variable, cam) %>%
    summarise_at(.vars = "value",
                 .funs = funs(n = length,
                              mean = mean,
                              sd = sd,
                              min = min,
                              q25 = quantile(., 0.25),
                              median = median,
                              q75 = quantile(., 0.75),
                              max = max))

kable(db_stats)
variable cam n mean sd min q25 median q75 max
prod L1 65 4.1280462 0.3446878 3.369 3.93 4.128 4.343 4.974
glic L1 65 219.8353846 60.1763773 101.700 175.90 219.500 256.400 366.000
phos L1 65 693.8938462 150.7240710 324.000 594.60 695.200 770.000 1021.700
sulf L1 65 89.9492308 34.8608131 34.600 65.30 79.000 113.200 199.500
P L1 65 26.4000000 13.2235940 4.600 14.40 27.300 35.900 62.300
P L2 65 5.8307692 7.2602282 1.200 2.40 3.600 6.000 48.400
P L3 65 1.5953846 1.6090011 0.300 0.60 1.200 1.700 8.500
Ca L1 65 3.3830769 0.5113002 2.200 3.00 3.300 3.800 4.500
Ca L2 65 1.5584615 0.5811874 0.600 1.10 1.500 1.800 3.600
Ca L3 65 0.8815385 0.3297216 0.400 0.70 0.800 1.000 2.600
Mg L1 65 1.2415385 0.1886567 0.800 1.10 1.200 1.400 1.600
Mg L2 65 0.6076923 0.2071440 0.200 0.50 0.600 0.700 1.300
Mg L3 65 0.3615385 0.1342715 0.200 0.30 0.300 0.400 1.000
K L1 65 99.3753846 31.4011048 41.500 77.30 99.200 118.800 182.100
K L2 65 50.8892308 23.0831928 22.500 32.70 44.000 60.400 119.100
K L3 65 36.3784615 16.1147593 14.200 22.20 33.600 47.600 76.600
Ca_ L1 65 36.6630769 4.0996025 28.200 33.80 36.700 39.600 45.700
Ca_ L2 65 22.1107692 7.0019668 8.900 17.00 22.200 27.900 42.600
Ca_ L3 65 15.7876923 4.9436357 7.000 12.60 14.800 18.400 35.400
Mg_ L1 65 13.4492308 1.5183382 10.400 12.50 13.400 14.700 17.300
Mg_ L2 65 8.6353846 2.4803875 3.000 6.90 8.800 10.100 15.800
Mg_ L3 65 6.4661538 2.0756232 3.200 5.20 6.100 7.500 14.200
K_ L1 65 2.7692308 0.8792616 1.300 2.10 2.800 3.400 5.100
K_ L2 65 1.8769231 0.8226803 0.800 1.20 1.600 2.400 4.100
K_ L3 65 1.6969231 0.7804184 0.600 1.10 1.500 2.400 3.300
CaMg L1 65 2.7272308 0.0805549 2.500 2.67 2.730 2.790 2.920
CaMg L2 65 2.5478462 0.1632338 2.000 2.50 2.570 2.600 3.000
CaMg L3 65 2.4656923 0.2634861 2.000 2.33 2.400 2.600 3.000
CaK L1 65 14.5236923 4.7833332 7.070 10.82 13.430 16.510 28.750
CaK L2 65 13.4689231 5.9374498 4.670 8.62 12.510 16.800 28.620
CaK L3 65 11.2455385 5.8003103 3.950 7.08 9.450 14.850 29.100
MgK L1 65 5.3381538 1.7980588 2.570 3.97 4.900 6.050 10.340
MgK L2 65 5.2869231 2.2837818 1.840 3.70 5.100 6.590 11.230
MgK L3 65 4.6506154 2.4934163 1.320 2.71 4.050 6.440 11.640
pH_H2O L1 65 6.0600000 0.2090155 5.600 5.90 6.100 6.200 6.500
pH_H2O L2 65 5.4215385 0.2803415 4.900 5.20 5.400 5.600 6.300
pH_H2O L3 65 5.2138462 0.1983344 4.800 5.10 5.200 5.300 6.100
Al L1 65 0.0000000 0.0000000 0.000 0.00 0.000 0.000 0.000
Al L2 65 0.2800000 0.2017114 0.000 0.00 0.300 0.400 0.700
Al L3 65 0.4569231 0.1402779 0.000 0.40 0.500 0.500 0.700
CTC L1 65 9.2241538 0.9021275 6.820 8.71 9.260 9.970 12.050
CTC L2 65 7.0140000 1.0122682 4.400 6.35 6.950 7.470 10.420
CTC L3 65 5.5980000 0.8768110 3.390 5.07 5.560 5.990 8.580
V L1 65 52.8904615 5.9666580 41.160 48.54 52.760 56.810 67.870
V L2 65 32.6233846 9.7145676 13.550 25.68 32.930 39.720 60.600
V L3 65 23.9612308 7.1443701 12.170 19.61 23.160 27.680 52.770
MO L1 65 40.2936923 4.2778001 28.700 37.80 39.900 43.300 48.400
MO L2 65 28.6769231 5.0886445 12.300 25.60 28.700 31.200 44.500
MO L3 65 20.8569231 4.8289482 8.700 18.10 20.600 22.700 36.800
S L1 65 13.0984615 6.1441662 7.000 9.00 10.800 15.100 32.900
S L2 65 21.8630769 10.9368010 8.800 12.90 18.400 29.200 50.000
S L3 65 43.6000000 18.6370012 13.000 29.00 46.600 55.400 81.400
FLF L1 65 15.1387692 3.8167790 2.240 12.74 15.220 17.520 24.520
OLF L1 65 6.0247692 3.0415622 1.650 4.20 5.230 6.860 14.890
HF L1 65 14.9160000 5.6901403 2.100 9.61 16.160 18.540 30.100
clay L1 65 593.0000000 121.2559277 151.000 563.00 614.000 668.000 752.000
clay L2 65 645.3846154 124.2240532 195.000 620.00 682.000 726.000 770.000
clay L3 65 677.7692308 122.3617548 215.000 648.00 709.000 762.000 798.000
sand L1 65 291.4769231 147.5959548 143.000 192.00 245.000 328.000 826.000
sand L2 65 261.0000000 143.9315158 121.000 171.00 223.000 280.000 786.000
sand L3 65 239.1230769 137.7964427 111.000 151.00 192.000 263.000 763.000
ggplot(data = db %>% filter(variable != "prod"),
       mapping = aes(x = value,
                     y = prod,
                     shape = cam)) +
    labs(shape = "Soil layer") +
    geom_point() +
    geom_smooth(method = "lm", formula = y ~ x, color = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = "Soil variable values") +
    ylab(label = expression("Soybean productivity" ~ (t ~ ha^{-1}))) +
    facet_wrap(facets = ~short_label,
               scales = "free_x",
               labeller = label_parsed) +
    theme_bw() +
    theme(legend.position = c(0.9, 0.1))

2 Regression for the soybean yield on each physical variables

da <- as_tibble(high_soybean$atrib) %>%
    filter(cam != "") %>%
    mutate(id = gsub(x = paste(faz, tal, sep = "_"),
                     pattern = "[^[:alnum:]_]",
                     replacement = ""),
           cam = fct_recode(cam,
                            "L1" = "0-0.1",
                            "L2" = "0.1-0.2",
                            "L3" = "0.2-0.4"),
           idc = paste(id, cam)) %>%
    rename(poro = "poro_e")
# str(da)

v <- names(da)[6:39]

# Filter, reshape and aggregate with means.
db <- da %>%
    filter(poro < 1) %>%               # Outside the range.
    select(id, cam, v) %>%
    gather(key = "variable",
           value = "value", v) %>%     # Wide to long.
    filter(!is.na(value)) %>%          # Remove missings.
    group_by(id, cam, variable) %>%    # Grouping.
    summarize(value = mean(value)) %>% # Summarise with mean.
    ungroup() %>%
    mutate(variable = factor(variable, levels = v))

# Put the prod in the same table.
db <- inner_join(db,
                 da %>% select(id, prod) %>% filter(!is.na(prod)),
                 by = "id")

dc <- db %>%
    spread(key = "variable", value = "value")
dc
## # A tibble: 195 x 37
##    id        cam    prod  glic  sulf  phos   FLF   OLF   MO2    HF    rp  dens  poro  clay
##    <chr>     <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEM… L1     4.01  346.  136. 1018.  16.0  2.83  42.3  23.4 2.00   1.36 0.51    661
##  2 FAZARGEM… L2     4.01   NA    NA    NA   NA   NA     NA    NA   2.4    1.35 0.495   722
##  3 FAZARGEM… L3     4.01   NA    NA    NA   NA   NA     NA    NA   0.915  1.22 0.525   749
##  4 FAZARGEM… L1     3.89  275.  200.  922.  14.4  6.21  45.5  24.8 1.92   1.21 0.535   653
##  5 FAZARGEM… L2     3.89   NA    NA    NA   NA   NA     NA    NA   2.64   1.29 0.525   725
##  6 FAZARGEM… L3     3.89   NA    NA    NA   NA   NA     NA    NA   1.74   1.21 0.505   775
##  7 FAZARGEM… L1     4.12  257   179.  812.  12.1  3.1   45.3  30.1 0.9    1.04 0.6     644
##  8 FAZARGEM… L2     4.12   NA    NA    NA   NA   NA     NA    NA   1.80   1.19 0.55    724
##  9 FAZARGEM… L3     4.12   NA    NA    NA   NA   NA     NA    NA   1.21   1.14 0.56    773
## 10 FAZARGEM… L1     4.25  259.  116.  900.  16.0  6.86  38.7  15.8 1.66   1.17 0.565   713
## # … with 185 more rows, and 23 more variables: sand <dbl>, pH_H2O <dbl>, pH_CaCl2 <dbl>,
## #   P <dbl>, K <dbl>, Ca_Mg <dbl>, Ca <dbl>, Mg <dbl>, Al <dbl>, MO <dbl>, base <dbl>,
## #   CTC <dbl>, V <dbl>, CaMg <dbl>, CaK <dbl>, MgK <dbl>, Ca_ <dbl>, Mg_ <dbl>, K_ <dbl>,
## #   H_ <dbl>, m_ <dbl>, S <dbl>, dp <dbl>
ylab <- expression("Soybean productivity" ~ (t ~ ha^{-1}))

# List to save the fitted models.
fits <- list()

2.1 Soil resistence penetration (rp)

labs[labs$abbrev == "rp", ]
## # A tibble: 1 x 4
##   abbrev short_label       measure_unit long_label                     
##   <chr>  <chr>             <chr>        <chr>                          
## 1 rp     Soil ~ resistence (MPa)        Soil ~ resistence ~ penetration
xlab <- expression("Soil resistence penetration" ~ (MPa))
dc$x <- dc$rp

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x")
dd
## # A tibble: 65 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01  2.00  2.4  0.915
##  2 FAZARGEMIRA_AG2    3.89  1.92  2.64 1.74 
##  3 FAZARGEMIRA_AG3    4.12  0.9   1.80 1.21 
##  4 FAZARGEMIRA_AG5    4.25  1.66  1.98 1.02 
##  5 FAZARGEMIRA_AG6    4.12  1.6   1.62 1.01 
##  6 FAZARGEMIRA_AG9    4.38  1.16  2.26 1.54 
##  7 FAZARGEMIRA_STI5   3.88  0.35  2.60 1.33 
##  8 FAZARGEMIRA_STI6   4.14  0.59  2.26 0.96 
##  9 FAZCALIFORNIA_CA4  4.03  0.89  1.16 1.07 
## 10 FAZFLORIDA_F1      3.83  2.32  3.9  1.99 
## # … with 55 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value   Pr(>F)   
## <none>                            6.2911 -137.79                    
## poly(L1, degree = 2)  2   1.23358 7.5247 -130.15  5.6865 0.005558 **
## poly(L2, degree = 2)  2   0.07081 6.3619 -141.06  0.3264 0.722837   
## poly(L3, degree = 2)  2   0.06929 6.3604 -141.08  0.3194 0.727848   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60106 -0.16805 -0.00195  0.16608  0.90663 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12805    0.04085 101.054  < 2e-16 ***
## poly(L1, degree = 2)1 -1.13116    0.33555  -3.371  0.00134 ** 
## poly(L1, degree = 2)2 -0.03031    0.33892  -0.089  0.92905    
## poly(L2, degree = 2)1  0.02194    0.35885   0.061  0.95147    
## poly(L2, degree = 2)2 -0.26722    0.33347  -0.801  0.42620    
## poly(L3, degree = 2)1  0.07327    0.36725   0.200  0.84256    
## poly(L3, degree = 2)2  0.25808    0.33414   0.772  0.44304    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3293 on 58 degrees of freedom
## Multiple R-squared:  0.1726, Adjusted R-squared:  0.08705 
## F-statistic: 2.017 on 6 and 58 DF,  p-value: 0.07784
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65040 -0.17683  0.00117  0.18044  0.87691 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.367889   0.195664  22.323  < 2e-16 ***
## L1          -0.179542   0.053825  -3.336  0.00145 ** 
## L2           0.008117   0.082990   0.098  0.92241    
## L3           0.011493   0.111020   0.104  0.91789    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3244 on 61 degrees of freedom
## Multiple R-squared:  0.1556, Adjusted R-squared:  0.1141 
## F-statistic: 3.747 on 3 and 61 DF,  p-value: 0.01546
m2 <- update(m0, . ~ L1)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L1, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65245 -0.18205 -0.00759  0.18392  0.88384 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.39888    0.08893  49.464  < 2e-16 ***
## L1          -0.17818    0.05238  -3.401  0.00117 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3193 on 63 degrees of freedom
## Multiple R-squared:  0.1551, Adjusted R-squared:  0.1417 
## F-statistic: 11.57 on 1 and 63 DF,  p-value: 0.001169
gg <- ggplot(data = dd,
       mapping = aes(x = L1, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$rp <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

2.2 Soil bulk density (dens)

labs[labs$abbrev == "dens", ]
## # A tibble: 1 x 4
##   abbrev short_label    measure_unit long_label           
##   <chr>  <chr>          <chr>        <chr>                
## 1 dens   Bulk ~ density (Mg~m^{-3})  Soil ~ bulk ~ density
xlab <- expression("Soil bulk density" ~ (Mg ~ m^{-3}))
dc$x <- dc$dens

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x")
dd
## # A tibble: 65 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 1.36   1.35  1.22
##  2 FAZARGEMIRA_AG2    3.89 1.21   1.29  1.21
##  3 FAZARGEMIRA_AG3    4.12 1.04   1.19  1.14
##  4 FAZARGEMIRA_AG5    4.25 1.17   1.36  1.3 
##  5 FAZARGEMIRA_AG6    4.12 1.28   1.33  1.28
##  6 FAZARGEMIRA_AG9    4.38 1.22   1.36  1.20
##  7 FAZARGEMIRA_STI5   3.88 0.94   1.2   1.23
##  8 FAZARGEMIRA_STI6   4.14 0.965  1.31  1.15
##  9 FAZCALIFORNIA_CA4  4.03 1.36   1.44  1.44
## 10 FAZFLORIDA_F1      3.83 1.38   1.62  1.71
## # … with 55 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value  Pr(>F)  
## <none>                            6.5979 -134.70                  
## poly(L1, degree = 2)  2   0.67148 7.2694 -132.40  2.9514 0.06017 .
## poly(L2, degree = 2)  2   0.11035 6.7083 -137.62  0.4850 0.61817  
## poly(L3, degree = 2)  2   0.19218 6.7901 -136.83  0.8447 0.43491  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72617 -0.24094  0.02521  0.19246  0.84467 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12805    0.04183  98.676   <2e-16 ***
## poly(L1, degree = 2)1 -0.87193    0.41967  -2.078   0.0422 *  
## poly(L1, degree = 2)2 -0.45473    0.37538  -1.211   0.2307    
## poly(L2, degree = 2)1 -0.13982    0.71755  -0.195   0.8462    
## poly(L2, degree = 2)2  0.40923    0.46365   0.883   0.3811    
## poly(L3, degree = 2)1  0.70021    0.68142   1.028   0.3084    
## poly(L3, degree = 2)2 -0.23995    0.41358  -0.580   0.5640    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3373 on 58 degrees of freedom
## Multiple R-squared:  0.1323, Adjusted R-squared:  0.04252 
## F-statistic: 1.474 on 6 and 58 DF,  p-value: 0.2033
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71389 -0.25491  0.02939  0.20244  0.77252 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.2786     0.5019   8.524 5.54e-12 ***
## L1           -0.7784     0.3594  -2.166   0.0342 *  
## L2           -0.3047     0.6891  -0.442   0.6599    
## L3            0.9288     0.6566   1.415   0.1622    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3341 on 61 degrees of freedom
## Multiple R-squared:  0.1043, Adjusted R-squared:  0.06025 
## F-statistic: 2.368 on 3 and 61 DF,  p-value: 0.07948
m2 <- update(m0, . ~ L1)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L1, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74321 -0.21702  0.00661  0.21037  0.83465 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.8341     0.3687  13.110   <2e-16 ***
## L1           -0.5654     0.2934  -1.927   0.0585 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3376 on 63 degrees of freedom
## Multiple R-squared:  0.05568,    Adjusted R-squared:  0.04069 
## F-statistic: 3.714 on 1 and 63 DF,  p-value: 0.05846
gg <- ggplot(data = dd,
       mapping = aes(x = L1, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$dens <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

2.3 Total porosity (poro)

labs[labs$abbrev == "poro", ]
## # A tibble: 1 x 4
##   abbrev short_label      measure_unit   long_label      
##   <chr>  <chr>            <chr>          <chr>           
## 1 poro   Total ~ porisity (m^{3}~m^{-3}) Total ~ porisity
xlab <- expression("Total porosity" ~ (m ~ m^{-3}))
dc$x <- dc$poro

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x")
dd
## # A tibble: 65 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 0.51  0.495 0.525
##  2 FAZARGEMIRA_AG2    3.89 0.535 0.525 0.505
##  3 FAZARGEMIRA_AG3    4.12 0.6   0.55  0.56 
##  4 FAZARGEMIRA_AG5    4.25 0.565 0.49  0.52 
##  5 FAZARGEMIRA_AG6    4.12 0.54  0.49  0.515
##  6 FAZARGEMIRA_AG9    4.38 0.55  0.485 0.55 
##  7 FAZARGEMIRA_STI5   3.88 0.67  0.565 0.535
##  8 FAZARGEMIRA_STI6   4.14 0.625 0.505 0.565
##  9 FAZCALIFORNIA_CA4  4.03 0.475 0.46  0.45 
## 10 FAZFLORIDA_F1      3.83 0.46  0.39  0.39 
## # … with 55 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            6.7519 -133.20               
## poly(L1, degree = 2)  2   0.52684 7.2787 -132.31  2.2628 0.1132
## poly(L2, degree = 2)  2   0.09250 6.8444 -136.31  0.3973 0.6739
## poly(L3, degree = 2)  2   0.29864 7.0505 -134.38  1.2827 0.2850
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75541 -0.27951  0.03118  0.19535  0.79810 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12805    0.04232  97.544   <2e-16 ***
## poly(L1, degree = 2)1  0.73713    0.44388   1.661    0.102    
## poly(L1, degree = 2)2 -0.46739    0.37814  -1.236    0.221    
## poly(L2, degree = 2)1  0.53952    0.65430   0.825    0.413    
## poly(L2, degree = 2)2 -0.01334    0.49372  -0.027    0.979    
## poly(L3, degree = 2)1 -1.02592    0.68282  -1.502    0.138    
## poly(L3, degree = 2)2  0.14590    0.46810   0.312    0.756    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3412 on 58 degrees of freedom
## Multiple R-squared:  0.112,  Adjusted R-squared:  0.02018 
## F-statistic:  1.22 on 6 and 58 DF,  p-value: 0.3095
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7260 -0.2750  0.0265  0.2008  0.7776 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.9647     0.5304   7.475 3.52e-10 ***
## L1            1.7301     0.9637   1.795   0.0776 .  
## L2            1.0908     1.4658   0.744   0.4596    
## L3           -2.5572     1.4772  -1.731   0.0885 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3372 on 61 degrees of freedom
## Multiple R-squared:  0.08762,    Adjusted R-squared:  0.04275 
## F-statistic: 1.953 on 3 and 61 DF,  p-value: 0.1306
m2 <- update(m0, . ~ L1)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L1, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74967 -0.20338 -0.00967  0.21932  0.84866 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.4550     0.4289   8.056 2.85e-11 ***
## L1            1.2660     0.8028   1.577     0.12    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3408 on 63 degrees of freedom
## Multiple R-squared:  0.03797,    Adjusted R-squared:  0.0227 
## F-statistic: 2.487 on 1 and 63 DF,  p-value: 0.1198
gg <- ggplot(data = dd,
       mapping = aes(x = L1, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$poro <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

2.4 Particle density (dp)

labs[labs$abbrev == "dp", ]
## # A tibble: 1 x 4
##   abbrev short_label        measure_unit long_label        
##   <chr>  <chr>              <chr>        <chr>             
## 1 dp     Particle ~ density (Mg~m^{-3})  Particle ~ density
xlab <- expression("Particle density" ~ (Mg ~ m^{-3}))
dc$x <- dc$dp

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x")
dd
## # A tibble: 65 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01  2.78  2.67  2.57
##  2 FAZARGEMIRA_AG2    3.89  2.59  2.73  2.45
##  3 FAZARGEMIRA_AG3    4.12  2.63  2.65  2.59
##  4 FAZARGEMIRA_AG5    4.25  2.68  2.66  2.73
##  5 FAZARGEMIRA_AG6    4.12  2.81  2.59  2.61
##  6 FAZARGEMIRA_AG9    4.38  2.73  2.64  2.69
##  7 FAZARGEMIRA_STI5   3.88  2.83  2.77  2.65
##  8 FAZARGEMIRA_STI6   4.14  2.57  2.64  2.64
##  9 FAZCALIFORNIA_CA4  4.03  2.59  2.67  2.62
## 10 FAZFLORIDA_F1      3.83  2.54  2.64  2.81
## # … with 55 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            6.8847 -131.93               
## poly(L1, degree = 2)  2   0.36893 7.2537 -132.54  1.5540 0.2201
## poly(L2, degree = 2)  2   0.47947 7.3642 -131.55  2.0196 0.1419
## poly(L3, degree = 2)  2   0.02428 6.9090 -135.70  0.1023 0.9030
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69002 -0.23723  0.03723  0.21260  0.68953 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12805    0.04273  96.599   <2e-16 ***
## poly(L1, degree = 2)1 -0.62499    0.36875  -1.695   0.0955 .  
## poly(L1, degree = 2)2  0.22320    0.36073   0.619   0.5385    
## poly(L2, degree = 2)1  0.66239    0.36925   1.794   0.0780 .  
## poly(L2, degree = 2)2  0.28879    0.35030   0.824   0.4131    
## poly(L3, degree = 2)1  0.12277    0.35510   0.346   0.7308    
## poly(L3, degree = 2)2 -0.10660    0.35178  -0.303   0.7629    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3445 on 58 degrees of freedom
## Multiple R-squared:  0.09457,    Adjusted R-squared:  0.0009042 
## F-statistic:  1.01 on 6 and 58 DF,  p-value: 0.428
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74268 -0.18045  0.02907  0.25372  0.73167 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   3.5328     2.2825   1.548   0.1269  
## L1           -1.0552     0.6069  -1.739   0.0872 .
## L2            1.1243     0.6537   1.720   0.0905 .
## L3            0.1494     0.4376   0.342   0.7339  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3397 on 61 degrees of freedom
## Multiple R-squared:  0.07436,    Adjusted R-squared:  0.02883 
## F-statistic: 1.633 on 3 and 61 DF,  p-value: 0.1909
m2 <- update(m0, . ~ L1)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L1, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74779 -0.21954 -0.01903  0.24903  0.85621 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.2453     1.5495   4.031 0.000153 ***
## L1           -0.7939     0.5808  -1.367 0.176502    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3424 on 63 degrees of freedom
## Multiple R-squared:  0.0288, Adjusted R-squared:  0.01339 
## F-statistic: 1.869 on 1 and 63 DF,  p-value: 0.1765
gg <- ggplot(data = dd,
       mapping = aes(x = L1, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$dp <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

2.5 Clay content

labs[labs$abbrev == "clay", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit long_label    
##   <chr>  <chr>       <chr>        <chr>         
## 1 clay   Clay        (g~kg^{-3})  Sand ~ content
xlab <- expression("Clay content" ~ (g^3 ~ kg^{-3}))
dc$x <- dc$clay

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01   661   722   749
##  2 FAZARGEMIRA_AG2    3.89   653   725   775
##  3 FAZARGEMIRA_AG3    4.12   644   724   773
##  4 FAZARGEMIRA_AG5    4.25   713   759   788
##  5 FAZARGEMIRA_AG6    4.12   676   752   719
##  6 FAZARGEMIRA_AG9    4.38   669   734   755
##  7 FAZARGEMIRA_STI5   3.88   641   734   790
##  8 FAZARGEMIRA_STI6   4.14   636   742   773
##  9 FAZCALIFORNIA_CA4  4.03   489   533   563
## 10 FAZGMC_MC2         3.75   682   689   701
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            5.9910 -114.41               
## poly(L1, degree = 2)  2   0.44146 6.4325 -114.36  1.8422 0.1691
## poly(L2, degree = 2)  2   0.22351 6.2145 -116.32  0.9327 0.4002
## poly(L3, degree = 2)  2   0.03777 6.0288 -118.05  0.1576 0.8546
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79516 -0.19219 -0.01592  0.21550  0.85325 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12689    0.04585  90.011   <2e-16 ***
## poly(L1, degree = 2)1 -0.16114    3.38539  -0.048    0.962    
## poly(L1, degree = 2)2 -1.22551    1.75131  -0.700    0.487    
## poly(L2, degree = 2)1  1.69149    2.60406   0.650    0.519    
## poly(L2, degree = 2)2  0.69186    1.40747   0.492    0.625    
## poly(L3, degree = 2)1 -1.98671    4.01244  -0.495    0.623    
## poly(L3, degree = 2)2  0.52238    1.89141   0.276    0.784    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3462 on 50 degrees of freedom
## Multiple R-squared:  0.09816,    Adjusted R-squared:  -0.01006 
## F-statistic: 0.907 on 6 and 50 DF,  p-value: 0.4975
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77366 -0.21495 -0.00186  0.20485  0.85477 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.3222803  0.2616378  16.520   <2e-16 ***
## L1          -0.0025086  0.0013657  -1.837   0.0718 .  
## L2           0.0025862  0.0020645   1.253   0.2158    
## L3          -0.0005539  0.0020764  -0.267   0.7907    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3384 on 53 degrees of freedom
## Multiple R-squared:  0.08621,    Adjusted R-squared:  0.03449 
## F-statistic: 1.667 on 3 and 53 DF,  p-value: 0.1853
m2 <- update(m0, . ~ L1)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L1, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72508 -0.21282  0.00959  0.18115  0.83208 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.4437891  0.2258436  19.676   <2e-16 ***
## L1          -0.0005315  0.0003711  -1.432    0.158    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3412 on 55 degrees of freedom
## Multiple R-squared:  0.03595,    Adjusted R-squared:  0.01842 
## F-statistic: 2.051 on 1 and 55 DF,  p-value: 0.1578
gg <- ggplot(data = dd,
       mapping = aes(x = L1, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$clay <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

2.6 Sand content

labs[labs$abbrev == "sand", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit long_label
##   <chr>  <chr>       <chr>        <chr>     
## 1 sand   Sand        (g~kg^{-3})  Clay
xlab <- expression("Sand content" ~ (g^3 ~ kg^{-3}))
dc$x <- dc$sand

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01   211   174   156
##  2 FAZARGEMIRA_AG2    3.89   162   139   120
##  3 FAZARGEMIRA_AG3    4.12   175   138   127
##  4 FAZARGEMIRA_AG5    4.25   179   151   136
##  5 FAZARGEMIRA_AG6    4.12   189   148   117
##  6 FAZARGEMIRA_AG9    4.38   188   177   159
##  7 FAZARGEMIRA_STI5   3.88   178   137   120
##  8 FAZARGEMIRA_STI6   4.14   178   135   131
##  9 FAZCALIFORNIA_CA4  4.03   461   378   379
## 10 FAZGMC_MC2         3.75   166   162   155
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value  Pr(>F)  
## <none>                            5.5616 -118.65                  
## poly(L1, degree = 2)  2   0.50680 6.0684 -117.68  2.2781 0.11302  
## poly(L2, degree = 2)  2   0.69761 6.2592 -115.91  3.1358 0.05212 .
## poly(L3, degree = 2)  2   0.22280 5.7844 -120.41  1.0015 0.37456  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76920 -0.17151 -0.00491  0.18242  0.84854 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12689    0.04417  93.422   <2e-16 ***
## poly(L1, degree = 2)1 -1.00183    3.92585  -0.255   0.7996    
## poly(L1, degree = 2)2 -3.24372    2.04509  -1.586   0.1190    
## poly(L2, degree = 2)1 -0.87432    4.58998  -0.190   0.8497    
## poly(L2, degree = 2)2  4.71098    2.58776   1.820   0.0747 .  
## poly(L3, degree = 2)1  2.45602    4.79071   0.513   0.6104    
## poly(L3, degree = 2)2 -1.60436    2.44302  -0.657   0.5144    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3335 on 50 degrees of freedom
## Multiple R-squared:  0.1628, Adjusted R-squared:  0.06234 
## F-statistic: 1.621 on 6 and 50 DF,  p-value: 0.161
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72242 -0.22158  0.03343  0.21616  0.85070 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.984978   0.121419  32.820   <2e-16 ***
## L1           0.002931   0.002275   1.289   0.2032    
## L2          -0.005789   0.003307  -1.750   0.0858 .  
## L3           0.003347   0.003367   0.994   0.3247    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.337 on 53 degrees of freedom
## Multiple R-squared:  0.09385,    Adjusted R-squared:  0.04256 
## F-statistic:  1.83 on 3 and 53 DF,  p-value: 0.153
m2 <- update(m0, . ~ L2)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L2, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73657 -0.20223  0.01742  0.19678  0.82525 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.0257229  0.0935269  43.043   <2e-16 ***
## L2          0.0003906  0.0003157   1.237    0.221    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3428 on 55 degrees of freedom
## Multiple R-squared:  0.02708,    Adjusted R-squared:  0.009393 
## F-statistic: 1.531 on 1 and 55 DF,  p-value: 0.2212
gg <- ggplot(data = dd,
       mapping = aes(x = L2, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$sand <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3 Regression for the soybean yield on each chemical variables

3.1 pH on H2O

labs[labs$abbrev == "pH_H2O", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit long_label
##   <chr>  <chr>       <chr>        <chr>     
## 1 pH_H2O pH~H[2]*O   ""           Soil ~ pH
xlab <- expression(Soil ~ pH)
dc$x <- dc$pH_H2O

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01   6.2   5.6   5.2
##  2 FAZARGEMIRA_AG2    3.89   6.2   5.3   5.1
##  3 FAZARGEMIRA_AG3    4.12   6.1   5.2   5.3
##  4 FAZARGEMIRA_AG5    4.25   5.9   5.6   5.2
##  5 FAZARGEMIRA_AG6    4.12   6.1   5.3   5.2
##  6 FAZARGEMIRA_AG9    4.38   5.7   5.5   5.4
##  7 FAZARGEMIRA_STI5   3.88   5.9   5     4.9
##  8 FAZARGEMIRA_STI6   4.14   6.3   5.1   5  
##  9 FAZCALIFORNIA_CA4  4.03   6     5.5   5.1
## 10 FAZGMC_MC2         3.75   6.1   5.7   5.4
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value   Pr(>F)   
## <none>                            4.7061 -128.17                    
## poly(L1, degree = 2)  2   0.20469 4.9108 -129.74  1.0874 0.344936   
## poly(L2, degree = 2)  2   0.12731 4.8334 -130.65  0.6763 0.513098   
## poly(L3, degree = 2)  2   1.24121 5.9474 -118.83  6.5936 0.002874 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75912 -0.13501  0.02235  0.15109  0.73132 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12689    0.04064 101.558  < 2e-16 ***
## poly(L1, degree = 2)1  0.05477    0.33841   0.162  0.87208    
## poly(L1, degree = 2)2 -0.51095    0.35076  -1.457  0.15146    
## poly(L2, degree = 2)1 -0.13209    0.38195  -0.346  0.73091    
## poly(L2, degree = 2)2  0.59341    0.53806   1.103  0.27536    
## poly(L3, degree = 2)1  1.25972    0.38859   3.242  0.00212 ** 
## poly(L3, degree = 2)2 -0.78349    0.54911  -1.427  0.15984    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3068 on 50 degrees of freedom
## Multiple R-squared:  0.2916, Adjusted R-squared:  0.2066 
## F-statistic:  3.43 on 6 and 50 DF,  p-value: 0.006497
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71796 -0.15620  0.05885  0.17442  0.78075 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.27325    1.43572   0.190  0.84978   
## L1          -0.02274    0.20770  -0.109  0.91325   
## L2          -0.13145    0.18509  -0.710  0.48070   
## L3           0.90404    0.27429   3.296  0.00175 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3155 on 53 degrees of freedom
## Multiple R-squared:  0.2058, Adjusted R-squared:  0.1609 
## F-statistic: 4.579 on 3 and 53 DF,  p-value: 0.006345
m2 <- update(m0, . ~ L3)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75651 -0.14751  0.02757  0.15752  0.75752 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.01887    1.11530   0.017 0.986564    
## L3           0.78974    0.21426   3.686 0.000523 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3112 on 55 degrees of freedom
## Multiple R-squared:  0.1981, Adjusted R-squared:  0.1835 
## F-statistic: 13.59 on 1 and 55 DF,  p-value: 0.0005226
gg <- ggplot(data = dd,
       mapping = aes(x = L3, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$pH_H2O <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3.2 Phosphorus (P)

labs[labs$abbrev == "P", ]
## # A tibble: 1 x 4
##   abbrev short_label   measure_unit long_label            
##   <chr>  <chr>         <chr>        <chr>                 
## 1 P      Available ~ P (mg~dm^{-3}) Available ~ phosphorus
xlab <- expression("Available phosphorus" ~ (mg ~ dm^{-3}))
dc$x <- dc$P

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01  44.1   1.2   0.6
##  2 FAZARGEMIRA_AG2    3.89  16.4   2.1   0.9
##  3 FAZARGEMIRA_AG3    4.12  14.4   1.7   0.6
##  4 FAZARGEMIRA_AG5    4.25  30.5   2.7   0.9
##  5 FAZARGEMIRA_AG6    4.12  21.3   3     1.2
##  6 FAZARGEMIRA_AG9    4.38  23.8   2.4   0.3
##  7 FAZARGEMIRA_STI5   3.88  16.4   2.1   1.4
##  8 FAZARGEMIRA_STI6   4.14  23.8   1.7   0.9
##  9 FAZCALIFORNIA_CA4  4.03  20.1   2.1   0.9
## 10 FAZGMC_MC2         3.75   7.8   6     1.2
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            5.9015 -115.27               
## poly(L1, degree = 2)  2   0.27839 6.1799 -116.64  1.1793 0.3159
## poly(L2, degree = 2)  2   0.13144 6.0329 -118.01  0.5568 0.5765
## poly(L3, degree = 2)  2   0.40102 6.3025 -115.52  1.6988 0.1933
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75401 -0.20278 -0.02604  0.20355  0.79204 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             4.1269     0.0455  90.691   <2e-16 ***
## poly(L1, degree = 2)1  -0.3609     0.4977  -0.725    0.472    
## poly(L1, degree = 2)2   0.4020     0.5249   0.766    0.447    
## poly(L2, degree = 2)1   0.3877     0.6208   0.625    0.535    
## poly(L2, degree = 2)2  -0.3552     0.3810  -0.932    0.356    
## poly(L3, degree = 2)1  -0.4525     0.6611  -0.685    0.497    
## poly(L3, degree = 2)2   0.5705     0.3558   1.604    0.115    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3436 on 50 degrees of freedom
## Multiple R-squared:  0.1116, Adjusted R-squared:  0.005033 
## F-statistic: 1.047 on 6 and 50 DF,  p-value: 0.4066
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74487 -0.23755 -0.01489  0.23794  0.83312 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.243178   0.107395  39.510   <2e-16 ***
## L1          -0.005487   0.004248  -1.292    0.202    
## L2           0.011296   0.010376   1.089    0.281    
## L3          -0.019359   0.049379  -0.392    0.697    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3462 on 53 degrees of freedom
## Multiple R-squared:  0.04368,    Adjusted R-squared:  -0.01045 
## F-statistic: 0.8069 on 3 and 53 DF,  p-value: 0.4957
m2 <- update(m0, . ~ L1)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L1, data = dd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7450 -0.2170 -0.0240  0.2352  0.8393 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.222989   0.105454  40.046   <2e-16 ***
## L1          -0.003480   0.003444  -1.011    0.317    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3444 on 55 degrees of freedom
## Multiple R-squared:  0.01823,    Adjusted R-squared:  0.0003827 
## F-statistic: 1.021 on 1 and 55 DF,  p-value: 0.3166
gg <- ggplot(data = dd,
       mapping = aes(x = L1, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$P <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3.3 Potassium (K)

labs[labs$abbrev == "K", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit long_label              
##   <chr>  <chr>       <chr>        <chr>                   
## 1 K      K^{+1}      (mg~dm^{-3}) Exchangeable ~ potassium
xlab <- expression("Exchangeable potassium" ~ (mg ~ dm^{-3}))
dc$x <- dc$K

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 122.   28.3  14.4
##  2 FAZARGEMIRA_AG2    3.89  97.3  76.9  67.7
##  3 FAZARGEMIRA_AG3    4.12  78.6  25.6  17.7
##  4 FAZARGEMIRA_AG5    4.25 130.  104.   64.8
##  5 FAZARGEMIRA_AG6    4.12  61.9  22.5  16.4
##  6 FAZARGEMIRA_AG9    4.38 104    33.8  18.5
##  7 FAZARGEMIRA_STI5   3.88  86.5  50    42.5
##  8 FAZARGEMIRA_STI6   4.14  84.2  39.9  30.5
##  9 FAZCALIFORNIA_CA4  4.03 115.   65.8  45.5
## 10 FAZGMC_MC2         3.75  88.3  34.1  30.3
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            5.5201 -119.08               
## poly(L1, degree = 2)  2   0.51624 6.0364 -117.98  2.3380 0.1070
## poly(L2, degree = 2)  2   0.36886 5.8890 -119.39  1.6705 0.1985
## poly(L3, degree = 2)  2   0.03326 5.5534 -122.73  0.1506 0.8606
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.90304 -0.20829  0.01608  0.17576  0.73455 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.126895   0.044010  93.772   <2e-16 ***
## poly(L1, degree = 2)1  0.994260   0.462344   2.150   0.0364 *  
## poly(L1, degree = 2)2  0.122424   0.346661   0.353   0.7255    
## poly(L2, degree = 2)1  0.008644   0.978314   0.009   0.9930    
## poly(L2, degree = 2)2 -0.761231   0.509674  -1.494   0.1416    
## poly(L3, degree = 2)1 -0.502622   0.971483  -0.517   0.6072    
## poly(L3, degree = 2)2  0.127135   0.398831   0.319   0.7512    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3323 on 50 degrees of freedom
## Multiple R-squared:  0.169,  Adjusted R-squared:  0.06933 
## F-statistic: 1.695 on 6 and 50 DF,  p-value: 0.1416
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82141 -0.20748  0.04155  0.15219  0.76115 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.771893   0.149670  25.201   <2e-16 ***
## L1           0.004760   0.001888   2.521   0.0148 *  
## L2          -0.005088   0.004375  -1.163   0.2501    
## L3           0.003652   0.006270   0.582   0.5627    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3308 on 53 degrees of freedom
## Multiple R-squared:  0.1271, Adjusted R-squared:  0.07768 
## F-statistic: 2.572 on 3 and 53 DF,  p-value: 0.06376
m2 <- update(m0, . ~ L1)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L1, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84809 -0.22231  0.01787  0.20455  0.76498 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.781321   0.148004  25.549   <2e-16 ***
## L1          0.003402   0.001392   2.444   0.0178 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3301 on 55 degrees of freedom
## Multiple R-squared:  0.09796,    Adjusted R-squared:  0.08156 
## F-statistic: 5.973 on 1 and 55 DF,  p-value: 0.01776
gg <- ggplot(data = dd,
       mapping = aes(x = L1, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$K <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3.4 Calcium (Ca)

labs[labs$abbrev == "Ca", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit      long_label            
##   <chr>  <chr>       <chr>             <chr>                 
## 1 Ca     Ca^{+2}     (cmol[c]~dm^{-3}) Exchangeable ~ calcium
xlab <- expression("Exchangeable calcium" ~ (cmol[c] ~ dm^{-3}))
dc$x <- dc$Ca

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01   4     1.8   0.9
##  2 FAZARGEMIRA_AG2    3.89   4     1.5   0.8
##  3 FAZARGEMIRA_AG3    4.12   3.7   1.3   1  
##  4 FAZARGEMIRA_AG5    4.25   3.2   1.8   0.8
##  5 FAZARGEMIRA_AG6    4.12   3.4   1.5   0.9
##  6 FAZARGEMIRA_AG9    4.38   2.5   1.8   1  
##  7 FAZARGEMIRA_STI5   3.88   3.2   0.8   0.5
##  8 FAZARGEMIRA_STI6   4.14   2.9   0.7   0.6
##  9 FAZCALIFORNIA_CA4  4.03   3     1.3   0.6
## 10 FAZGMC_MC2         3.75   3.8   2.2   1.2
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            5.5037 -119.25               
## poly(L1, degree = 2)  2   0.50580 6.0095 -118.23  2.2975 0.1110
## poly(L2, degree = 2)  2   0.26422 5.7680 -120.57  1.2002 0.3097
## poly(L3, degree = 2)  2   0.13642 5.6402 -121.85  0.6197 0.5422
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71979 -0.19582  0.01811  0.22738  0.77483 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12689    0.04394  93.911   <2e-16 ***
## poly(L1, degree = 2)1 -0.24059    0.37180  -0.647   0.5205    
## poly(L1, degree = 2)2 -0.69259    0.33741  -2.053   0.0454 *  
## poly(L2, degree = 2)1  0.54064    0.54876   0.985   0.3293    
## poly(L2, degree = 2)2 -0.66029    0.46327  -1.425   0.1603    
## poly(L3, degree = 2)1  0.32308    0.50857   0.635   0.5282    
## poly(L3, degree = 2)2  0.33723    0.47878   0.704   0.4845    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3318 on 50 degrees of freedom
## Multiple R-squared:  0.1715, Adjusted R-squared:  0.07209 
## F-statistic: 1.725 on 6 and 50 DF,  p-value: 0.1345
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80329 -0.20001 -0.00536  0.19644  0.87218 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.94715    0.33163  11.902   <2e-16 ***
## L1          -0.04116    0.10320  -0.399    0.692    
## L2           0.08501    0.13492   0.630    0.531    
## L3           0.22498    0.27362   0.822    0.415    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3424 on 53 degrees of freedom
## Multiple R-squared:  0.06454,    Adjusted R-squared:  0.01159 
## F-statistic: 1.219 on 3 and 53 DF,  p-value: 0.3119
m2 <- update(m0, . ~ L3)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77664 -0.20597 -0.01347  0.18297  0.86175 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.8451     0.1612  23.847   <2e-16 ***
## L3            0.3339     0.1836   1.819   0.0744 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3375 on 55 degrees of freedom
## Multiple R-squared:  0.05673,    Adjusted R-squared:  0.03958 
## F-statistic: 3.308 on 1 and 55 DF,  p-value: 0.0744
gg <- ggplot(data = dd,
       mapping = aes(x = L3, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$Ca <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3.5 Magnesium (Mg)

labs[labs$abbrev == "Mg", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit      long_label              
##   <chr>  <chr>       <chr>             <chr>                   
## 1 Mg     Mg^{+2}     (cmol[c]~dm^{-3}) Exchangeable ~ magnesium
xlab <- expression("Exchangeable magnesium" ~ (cmol[c] ~ dm^{-3}))
dc$x <- dc$Mg

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01   1.5   0.7   0.4
##  2 FAZARGEMIRA_AG2    3.89   1.5   0.6   0.3
##  3 FAZARGEMIRA_AG3    4.12   1.4   0.5   0.4
##  4 FAZARGEMIRA_AG5    4.25   1.2   0.7   0.3
##  5 FAZARGEMIRA_AG6    4.12   1.3   0.6   0.4
##  6 FAZARGEMIRA_AG9    4.38   1     0.7   0.4
##  7 FAZARGEMIRA_STI5   3.88   1.2   0.3   0.2
##  8 FAZARGEMIRA_STI6   4.14   1     0.3   0.2
##  9 FAZCALIFORNIA_CA4  4.03   1.1   0.5   0.2
## 10 FAZGMC_MC2         3.75   1.3   0.8   0.5
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            5.6910 -117.34               
## poly(L1, degree = 2)  2   0.42233 6.1133 -117.26  1.8553 0.1670
## poly(L2, degree = 2)  2   0.06660 5.7576 -120.67  0.2926 0.7476
## poly(L3, degree = 2)  2   0.14975 5.8407 -119.86  0.6578 0.5224
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73894 -0.22687  0.04408  0.20268  0.79434 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12689    0.04469  92.353   <2e-16 ***
## poly(L1, degree = 2)1 -0.20530    0.38194  -0.538    0.593    
## poly(L1, degree = 2)2 -0.62612    0.34064  -1.838    0.072 .  
## poly(L2, degree = 2)1  0.20128    0.53414   0.377    0.708    
## poly(L2, degree = 2)2 -0.31958    0.43125  -0.741    0.462    
## poly(L3, degree = 2)1  0.54009    0.49684   1.087    0.282    
## poly(L3, degree = 2)2  0.04221    0.43741   0.096    0.924    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3374 on 50 degrees of freedom
## Multiple R-squared:  0.1433, Adjusted R-squared:  0.04052 
## F-statistic: 1.394 on 6 and 50 DF,  p-value: 0.2355
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78688 -0.20293  0.01136  0.18152  0.88899 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.97325    0.32108  12.375   <2e-16 ***
## L1          -0.13765    0.27757  -0.496    0.622    
## L2           0.08511    0.36351   0.234    0.816    
## L3           0.78128    0.60231   1.297    0.200    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3415 on 53 degrees of freedom
## Multiple R-squared:  0.06967,    Adjusted R-squared:  0.01701 
## F-statistic: 1.323 on 3 and 53 DF,  p-value: 0.2766
m2 <- update(m0, . ~ L3)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79969 -0.19300  0.02346  0.16116  0.88746 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.8401     0.1530   25.10   <2e-16 ***
## L3            0.8215     0.4192    1.96   0.0551 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.336 on 55 degrees of freedom
## Multiple R-squared:  0.06527,    Adjusted R-squared:  0.04828 
## F-statistic: 3.841 on 1 and 55 DF,  p-value: 0.0551
gg <- ggplot(data = dd,
       mapping = aes(x = L3, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$Mg <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3.6 Aluminium (Al)

labs[labs$abbrev == "Al", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit      long_label              
##   <chr>  <chr>       <chr>             <chr>                   
## 1 Al     Al^{+3}     (cmol[c]~dm^{-3}) Exchangeable ~ aluminium
xlab <- expression("Exchangeable aluminium" ~ (cmol[c] ~ dm^{-3}))
dc$x <- dc$Al

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01     0   0     0.4
##  2 FAZARGEMIRA_AG2    3.89     0   0.4   0.5
##  3 FAZARGEMIRA_AG3    4.12     0   0.4   0.4
##  4 FAZARGEMIRA_AG5    4.25     0   0     0.4
##  5 FAZARGEMIRA_AG6    4.12     0   0.4   0.4
##  6 FAZARGEMIRA_AG9    4.38     0   0.1   0.4
##  7 FAZARGEMIRA_STI5   3.88     0   0.6   0.7
##  8 FAZARGEMIRA_STI6   4.14     0   0.6   0.6
##  9 FAZCALIFORNIA_CA4  4.03     0   0.2   0.5
## 10 FAZGMC_MC2         3.75     0   0     0.3
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L2, degree = 2) + poly(L3, degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            6.0194 -118.14               
## poly(L2, degree = 2)  2  0.002446 6.0219 -122.12  0.0106 0.9895
## poly(L3, degree = 2)  2  0.276604 6.2960 -119.58  1.1947 0.3110
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L2, degree = 2) + poly(L3, degree = 2), 
##     data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73897 -0.19831  0.00418  0.18996  0.79125 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12689    0.04506  91.577   <2e-16 ***
## poly(L2, degree = 2)1 -0.05861    0.49173  -0.119    0.906    
## poly(L2, degree = 2)2 -0.04200    0.39034  -0.108    0.915    
## poly(L3, degree = 2)1 -0.74098    0.48783  -1.519    0.135    
## poly(L3, degree = 2)2 -0.03422    0.39520  -0.087    0.931    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3402 on 52 degrees of freedom
## Multiple R-squared:  0.09388,    Adjusted R-squared:  0.02418 
## F-statistic: 1.347 on 4 and 52 DF,  p-value: 0.2652
m1 <- update(m0, . ~ L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74194 -0.19224  0.01165  0.19418  0.79957 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.49135    0.17569  25.564   <2e-16 ***
## L2          -0.04196    0.30709  -0.137    0.892    
## L3          -0.76083    0.47105  -1.615    0.112    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.334 on 54 degrees of freedom
## Multiple R-squared:  0.09325,    Adjusted R-squared:  0.05967 
## F-statistic: 2.777 on 2 and 54 DF,  p-value: 0.07114
m2 <- update(m0, . ~ L3)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72823 -0.19974  0.01277  0.19926  0.79626 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.4998     0.1631  27.594   <2e-16 ***
## L3           -0.8051     0.3391  -2.374   0.0211 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.331 on 55 degrees of freedom
## Multiple R-squared:  0.09294,    Adjusted R-squared:  0.07645 
## F-statistic: 5.636 on 1 and 55 DF,  p-value: 0.02112
gg <- ggplot(data = dd,
       mapping = aes(x = L3, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$Al <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3.7 Sulfur (S)

labs[labs$abbrev == "S", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit long_label
##   <chr>  <chr>       <chr>        <chr>     
## 1 S      Sulfur      (mg~dm^{-3}) Sulfur
xlab <- expression("Sulfur" ~ (mg ~ dm^{-3}))
dc$x <- dc$S

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    # filter(prod > 3.5) %>% # Remove dois potenciais pontos influentes.
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01  10.3  13.2  39.8
##  2 FAZARGEMIRA_AG2    3.89  21.1  33.9  56  
##  3 FAZARGEMIRA_AG3    4.12  10.8  32.7  63.8
##  4 FAZARGEMIRA_AG5    4.25   9.4  12.9  42.4
##  5 FAZARGEMIRA_AG6    4.12  10.3  22.1  51.9
##  6 FAZARGEMIRA_AG9    4.38  10.3  16.7  35.1
##  7 FAZARGEMIRA_STI5   3.88  13.7  39.7  59.8
##  8 FAZARGEMIRA_STI6   4.14  24    36.4  47.3
##  9 FAZCALIFORNIA_CA4  4.03   9    13.9  68.7
## 10 FAZGMC_MC2         3.75   9    11.9  25.7
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value  Pr(>F)  
## <none>                            5.2714 -121.70                  
## poly(L1, degree = 2)  2   0.05365 5.3251 -125.12  0.2545 0.77634  
## poly(L2, degree = 2)  2   0.12260 5.3940 -124.39  0.5814 0.56283  
## poly(L3, degree = 2)  2   0.83367 6.1051 -117.33  3.9537 0.02546 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69976 -0.21409 -0.01537  0.18809  0.75704 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12689    0.04301  95.958   <2e-16 ***
## poly(L1, degree = 2)1 -0.21056    0.58410  -0.360   0.7200    
## poly(L1, degree = 2)2 -0.22716    0.43980  -0.516   0.6078    
## poly(L2, degree = 2)1 -0.55592    0.71003  -0.783   0.4373    
## poly(L2, degree = 2)2 -0.24463    0.46839  -0.522   0.6038    
## poly(L3, degree = 2)1  0.20303    0.52248   0.389   0.6992    
## poly(L3, degree = 2)2  0.96971    0.36493   2.657   0.0105 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3247 on 50 degrees of freedom
## Multiple R-squared:  0.2065, Adjusted R-squared:  0.1113 
## F-statistic: 2.168 on 6 and 50 DF,  p-value: 0.06171
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.89525 -0.18514 -0.01213  0.19043  0.78379 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.246864   0.123567  34.369   <2e-16 ***
## L1          -0.009272   0.012117  -0.765    0.448    
## L2          -0.006628   0.008412  -0.788    0.434    
## L3           0.003436   0.003470   0.990    0.327    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3393 on 53 degrees of freedom
## Multiple R-squared:  0.08156,    Adjusted R-squared:  0.02957 
## F-statistic: 1.569 on 3 and 53 DF,  p-value: 0.2078
m2 <- update(m0, . ~ L3 + I(L3^2))
summary(m2)
## 
## Call:
## lm(formula = prod ~ L3 + I(L3^2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.66767 -0.22767  0.00309  0.16295  0.77967 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.6528284  0.2065707  22.524  < 2e-16 ***
## L3          -0.0273184  0.0098782  -2.766  0.00776 ** 
## I(L3^2)      0.0002952  0.0001082   2.728  0.00859 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3282 on 54 degrees of freedom
## Multiple R-squared:  0.1245, Adjusted R-squared:  0.09203 
## F-statistic: 3.838 on 2 and 54 DF,  p-value: 0.02763
gg <- ggplot(data = dd,
       mapping = aes(x = L3, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x + I(x^2), col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$S <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3.8 CTC

labs[labs$abbrev == "CTC", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit      long_label                  
##   <chr>  <chr>       <chr>             <chr>                       
## 1 CTC    CTC~pH~7.0  (cmol[c]~dm^{-3}) Cation ~ exchange ~ capacity
xlab <- expression("Cation exchange capacity" ~ (cmol[c] ~ dm^{-3}))
dc$x <- dc$CTC

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 10.0   6.17  5.24
##  2 FAZARGEMIRA_AG2    3.89 10.2   7.6   5.77
##  3 FAZARGEMIRA_AG3    4.12 10.1   7.27  5.45
##  4 FAZARGEMIRA_AG5    4.25  9.63  6.97  5.07
##  5 FAZARGEMIRA_AG6    4.12  9.26  7.66  5.64
##  6 FAZARGEMIRA_AG9    4.38  8.87  6.59  5.15
##  7 FAZARGEMIRA_STI5   3.88  9.62  7.73  5.81
##  8 FAZARGEMIRA_STI6   4.14  6.82  6.4   5.68
##  9 FAZCALIFORNIA_CA4  4.03  8.2   5.47  4.62
## 10 FAZGMC_MC2         3.75  9.93  7.09  4.98
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            6.0968 -113.41               
## poly(L1, degree = 2)  2   0.05934 6.1562 -116.86  0.2433 0.7850
## poly(L2, degree = 2)  2   0.40722 6.5041 -113.73  1.6698 0.1986
## poly(L3, degree = 2)  2   0.47058 6.5674 -113.17  1.9296 0.1559
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75080 -0.21438 -0.02764  0.18848  0.76893 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.12689    0.04625  89.226   <2e-16 ***
## poly(L1, degree = 2)1 -0.15347    0.41919  -0.366    0.716    
## poly(L1, degree = 2)2  0.22601    0.37238   0.607    0.547    
## poly(L2, degree = 2)1  0.89463    0.58040   1.541    0.130    
## poly(L2, degree = 2)2 -0.66431    0.59610  -1.114    0.270    
## poly(L3, degree = 2)1 -0.92751    0.53483  -1.734    0.089 .  
## poly(L3, degree = 2)2  0.64707    0.59061   1.096    0.279    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3492 on 50 degrees of freedom
## Multiple R-squared:  0.08223,    Adjusted R-squared:  -0.02791 
## F-statistic: 0.7466 on 6 and 50 DF,  p-value: 0.6149
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73457 -0.19643  0.00103  0.22727  0.80800 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.41662    0.51533   8.570 1.39e-11 ***
## L1          -0.03265    0.06441  -0.507    0.614    
## L2           0.11254    0.07905   1.424    0.160    
## L3          -0.13960    0.08509  -1.641    0.107    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3446 on 53 degrees of freedom
## Multiple R-squared:  0.05282,    Adjusted R-squared:  -0.0007979 
## F-statistic: 0.9851 on 3 and 53 DF,  p-value: 0.4069
m2 <- update(m0, . ~ L3)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73906 -0.20919 -0.00284  0.19246  0.79961 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.42362    0.31185  14.185   <2e-16 ***
## L3          -0.05386    0.05600  -0.962     0.34    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3447 on 55 degrees of freedom
## Multiple R-squared:  0.01654,    Adjusted R-squared:  -0.001339 
## F-statistic: 0.9251 on 1 and 55 DF,  p-value: 0.3403
gg <- ggplot(data = dd,
       mapping = aes(x = L3, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$CTC <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3.9 V

labs[labs$abbrev == "V", ]
## # A tibble: 1 x 4
##   abbrev short_label       measure_unit long_label       
##   <chr>  <chr>             <chr>        <chr>            
## 1 V      Base ~ saturation ('%')        Base ~ saturation
xlab <- expression("Base saturation" ~ ("%"))
dc$x <- dc$V

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2) +
    facet_wrap(facets = ~cam)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    spread(key = "cam", value = "x") %>%
    na.omit()
dd
## # A tibble: 57 x 5
##    id                 prod    L1    L2    L3
##    <chr>             <dbl> <dbl> <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   4.01  58.1  41.7  25.5
##  2 FAZARGEMIRA_AG2    3.89  56.6  30.2  22.1
##  3 FAZARGEMIRA_AG3    4.12  52.5  25.7  26.5
##  4 FAZARGEMIRA_AG5    4.25  49.1  39.7  25.0
##  5 FAZARGEMIRA_AG6    4.12  52.5  28.2  23.8
##  6 FAZARGEMIRA_AG9    4.38  42.5  39.3  28.1
##  7 FAZARGEMIRA_STI5   3.88  48.0  15.9  13.9
##  8 FAZARGEMIRA_STI6   4.14  60.4  17.2  15.5
##  9 FAZCALIFORNIA_CA4  4.03  53.6  36    19.9
## 10 FAZGMC_MC2         3.75  53.7  43.6  35.7
## # … with 47 more rows
m0 <- lm(prod ~
             poly(L1, degree = 2) +
             poly(L2, degree = 2) +
             poly(L3, degree = 2),
         data = dd)

drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + poly(L3, 
##     degree = 2)
##                      Df Sum of Sq    RSS     AIC F value Pr(>F)
## <none>                            5.5476 -118.79               
## poly(L1, degree = 2)  2   0.38368 5.9313 -118.98  1.7291 0.1879
## poly(L2, degree = 2)  2   0.00156 5.5491 -122.78  0.0070 0.9930
## poly(L3, degree = 2)  2   0.38866 5.9362 -118.93  1.7515 0.1840
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(L1, degree = 2) + poly(L2, degree = 2) + 
##     poly(L3, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76698 -0.19018  0.01931  0.17530  0.71760 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.126895   0.044119  93.539   <2e-16 ***
## poly(L1, degree = 2)1  0.064551   0.369621   0.175   0.8621    
## poly(L1, degree = 2)2 -0.692296   0.382122  -1.812   0.0760 .  
## poly(L2, degree = 2)1 -0.055678   0.511296  -0.109   0.9137    
## poly(L2, degree = 2)2 -0.028041   0.584945  -0.048   0.9620    
## poly(L3, degree = 2)1  0.956244   0.514784   1.858   0.0691 .  
## poly(L3, degree = 2)2  0.002538   0.617804   0.004   0.9967    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3331 on 50 degrees of freedom
## Multiple R-squared:  0.1649, Adjusted R-squared:  0.0647 
## F-statistic: 1.646 on 6 and 50 DF,  p-value: 0.1543
m1 <- update(m0, . ~ L1 + L2 + L3)
summary(m1)
## 
## Call:
## lm(formula = prod ~ L1 + L2 + L3, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77756 -0.16747  0.00217  0.17624  0.80833 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.658619   0.391988   9.334 8.87e-13 ***
## L1          0.002103   0.007955   0.264    0.792    
## L2          0.001040   0.007168   0.145    0.885    
## L3          0.013685   0.009723   1.407    0.165    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3367 on 53 degrees of freedom
## Multiple R-squared:  0.09575,    Adjusted R-squared:  0.04457 
## F-statistic: 1.871 on 3 and 53 DF,  p-value: 0.1457
m2 <- update(m0, . ~ L3)
summary(m2)
## 
## Call:
## lm(formula = prod ~ L3, data = dd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7524 -0.1742  0.0109  0.1774  0.8041 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.763478   0.158443  23.753   <2e-16 ***
## L3          0.015367   0.006439   2.387   0.0205 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3308 on 55 degrees of freedom
## Multiple R-squared:  0.09385,    Adjusted R-squared:  0.07738 
## F-statistic: 5.697 on 1 and 55 DF,  p-value: 0.02046
gg <- ggplot(data = dd,
       mapping = aes(x = L3, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$V <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1, m2 = m2)
gg

3.10 Glycosidase

labs[labs$abbrev == "glic", 3:4]
## # A tibble: 1 x 2
##   measure_unit                            long_label      
##   <chr>                                   <chr>           
## 1 (mu*g~p-nitrophenol~g^{-1}~soil~h^{-1}) beta-glucosidase
xlab <- expression(beta - "glucosidase")
dc$x <- dc$glic

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    na.omit()
dd
## # A tibble: 62 x 4
##    id                 prod cam       x
##    <chr>             <dbl> <fct> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 L1     346.
##  2 FAZARGEMIRA_AG2    3.89 L1     275.
##  3 FAZARGEMIRA_AG3    4.12 L1     257 
##  4 FAZARGEMIRA_AG5    4.25 L1     259.
##  5 FAZARGEMIRA_AG6    4.12 L1     247.
##  6 FAZARGEMIRA_AG9    4.38 L1     203.
##  7 FAZARGEMIRA_STI5   3.88 L1     170.
##  8 FAZARGEMIRA_STI6   4.14 L1     189 
##  9 FAZCALIFORNIA_CA4  4.03 L1     145.
## 10 FAZFLORIDA_F1      3.83 L1     224 
## # … with 52 more rows
m0 <- lm(prod ~
             poly(x, degree = 2),
         data = dd)

# drop1(m0, test = "F")
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(x, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73951 -0.19653  0.00912  0.19141  0.87829 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.11198    0.04380  93.880   <2e-16 ***
## poly(x, degree = 2)1 -0.16414    0.34488  -0.476    0.636    
## poly(x, degree = 2)2  0.04438    0.34488   0.129    0.898    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3449 on 59 degrees of freedom
## Multiple R-squared:  0.004103,   Adjusted R-squared:  -0.02966 
## F-statistic: 0.1215 on 2 and 59 DF,  p-value: 0.8858
m1 <- update(m0, . ~ x)
summary(m1)
## 
## Call:
## lm(formula = prod ~ x, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74390 -0.19880  0.00881  0.19637  0.87474 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.1886700  0.1656015   25.29   <2e-16 ***
## x           -0.0003489  0.0007270   -0.48    0.633    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.342 on 60 degrees of freedom
## Multiple R-squared:  0.003823,   Adjusted R-squared:  -0.01278 
## F-statistic: 0.2303 on 1 and 60 DF,  p-value: 0.6331
gg <- ggplot(data = dd,
       mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$glic <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1)
gg

3.11 Phosphatase

labs[labs$abbrev == "phos", 3:4]
## # A tibble: 1 x 2
##   measure_unit                            long_label        
##   <chr>                                   <chr>             
## 1 (mu*g~p-nitrophenol~g^{-1}~soil~h^{-1}) Acid ~ phosphatase
xlab <- "Acid phosphatase"
dc$x <- dc$phos

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    na.omit()
dd
## # A tibble: 62 x 4
##    id                 prod cam       x
##    <chr>             <dbl> <fct> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 L1    1018.
##  2 FAZARGEMIRA_AG2    3.89 L1     922.
##  3 FAZARGEMIRA_AG3    4.12 L1     812.
##  4 FAZARGEMIRA_AG5    4.25 L1     900.
##  5 FAZARGEMIRA_AG6    4.12 L1     694 
##  6 FAZARGEMIRA_AG9    4.38 L1     636.
##  7 FAZARGEMIRA_STI5   3.88 L1     756.
##  8 FAZARGEMIRA_STI6   4.14 L1     784.
##  9 FAZCALIFORNIA_CA4  4.03 L1     487.
## 10 FAZFLORIDA_F1      3.83 L1     695.
## # … with 52 more rows
m0 <- lm(prod ~
             poly(x, degree = 2),
         data = dd)

# drop1(m0, test = "F")
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(x, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76108 -0.18033  0.00341  0.22166  0.86298 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.11198    0.04373  94.039   <2e-16 ***
## poly(x, degree = 2)1 -0.20564    0.34430  -0.597    0.553    
## poly(x, degree = 2)2 -0.10144    0.34430  -0.295    0.769    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3443 on 59 degrees of freedom
## Multiple R-squared:  0.007461,   Adjusted R-squared:  -0.02618 
## F-statistic: 0.2218 on 2 and 59 DF,  p-value: 0.8018
m1 <- update(m0, . ~ x)
summary(m1)
## 
## Call:
## lm(formula = prod ~ x, data = dd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7530 -0.1810  0.0107  0.2139  0.8710 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.2308910  0.2022730  20.917   <2e-16 ***
## x           -0.0001709  0.0002839  -0.602     0.55    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3417 on 60 degrees of freedom
## Multiple R-squared:  0.006001,   Adjusted R-squared:  -0.01057 
## F-statistic: 0.3622 on 1 and 60 DF,  p-value: 0.5495
gg <- ggplot(data = dd,
       mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$phos <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1)
gg

3.12 Sulfatase

labs[labs$abbrev == "sulf", ]
## # A tibble: 1 x 4
##   abbrev short_label   measure_unit                            long_label   
##   <chr>  <chr>         <chr>                                   <chr>        
## 1 sulf   Arylsulfatase (mu*g~p-nitrophenol~g^{-1}~soil~h^{-1}) Arylsulfatase
xlab <- "Arylsulfatase"
dc$x <- dc$sulf

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    na.omit()
dd
## # A tibble: 62 x 4
##    id                 prod cam       x
##    <chr>             <dbl> <fct> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 L1    136. 
##  2 FAZARGEMIRA_AG2    3.89 L1    200. 
##  3 FAZARGEMIRA_AG3    4.12 L1    179. 
##  4 FAZARGEMIRA_AG5    4.25 L1    116. 
##  5 FAZARGEMIRA_AG6    4.12 L1     92  
##  6 FAZARGEMIRA_AG9    4.38 L1     62.7
##  7 FAZARGEMIRA_STI5   3.88 L1     87.9
##  8 FAZARGEMIRA_STI6   4.14 L1     62.1
##  9 FAZCALIFORNIA_CA4  4.03 L1     40.1
## 10 FAZFLORIDA_F1      3.83 L1     77.6
## # … with 52 more rows
m0 <- lm(prod ~
             poly(x, degree = 2),
         data = dd)

# drop1(m0, test = "F")
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(x, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73255 -0.23915  0.01229  0.21988  0.81526 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.11198    0.04333  94.908   <2e-16 ***
## poly(x, degree = 2)1  0.40340    0.34115   1.182    0.242    
## poly(x, degree = 2)2 -0.13175    0.34115  -0.386    0.701    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3411 on 59 degrees of freedom
## Multiple R-squared:  0.02556,    Adjusted R-squared:  -0.007476 
## F-statistic: 0.7737 on 2 and 59 DF,  p-value: 0.4659
m1 <- update(m0, . ~ x)
summary(m1)
## 
## Call:
## lm(formula = prod ~ x, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72475 -0.22675  0.00975  0.20088  0.82891 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.980607   0.118404  33.619   <2e-16 ***
## x           0.001463   0.001229   1.191    0.238    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3387 on 60 degrees of freedom
## Multiple R-squared:  0.02309,    Adjusted R-squared:  0.006811 
## F-statistic: 1.418 on 1 and 60 DF,  p-value: 0.2384
gg <- ggplot(data = dd,
       mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$sulf <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1)
gg

3.13 Organic matter

labs[labs$abbrev == "MO2", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit long_label
##   <chr>  <chr>       <chr>        <chr>     
## 1 MO2    SOM         (g~kg^{-3})  SOM
xlab <- expression("Soil organic matter" ~ (g ~ kg^{-1}))
dc$x <- dc$MO2

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    na.omit()
dd
## # A tibble: 62 x 4
##    id                 prod cam       x
##    <chr>             <dbl> <fct> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 L1     42.3
##  2 FAZARGEMIRA_AG2    3.89 L1     45.5
##  3 FAZARGEMIRA_AG3    4.12 L1     45.3
##  4 FAZARGEMIRA_AG5    4.25 L1     38.7
##  5 FAZARGEMIRA_AG6    4.12 L1     37.2
##  6 FAZARGEMIRA_AG9    4.38 L1     36.3
##  7 FAZARGEMIRA_STI5   3.88 L1     43.6
##  8 FAZARGEMIRA_STI6   4.14 L1     42.1
##  9 FAZCALIFORNIA_CA4  4.03 L1     29.6
## 10 FAZFLORIDA_F1      3.83 L1     29.7
## # … with 52 more rows
m0 <- lm(prod ~
             poly(x, degree = 2),
         data = dd)

# drop1(m0, test = "F")
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(x, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72514 -0.21530  0.01927  0.21047  0.85569 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.1120     0.0437  94.101   <2e-16 ***
## poly(x, degree = 2)1  -0.1953     0.3441  -0.568    0.572    
## poly(x, degree = 2)2   0.1537     0.3441   0.447    0.657    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3441 on 59 degrees of freedom
## Multiple R-squared:  0.008768,   Adjusted R-squared:  -0.02483 
## F-statistic: 0.2609 on 2 and 59 DF,  p-value: 0.7712
m1 <- update(m0, . ~ x)
summary(m1)
## 
## Call:
## lm(formula = prod ~ x, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73681 -0.18871  0.00936  0.21229  0.84119 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.283351   0.303004  14.136   <2e-16 ***
## x           -0.004761   0.008332  -0.571     0.57    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3418 on 60 degrees of freedom
## Multiple R-squared:  0.005413,   Adjusted R-squared:  -0.01116 
## F-statistic: 0.3266 on 1 and 60 DF,  p-value: 0.5698
gg <- ggplot(data = dd,
       mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$MO2 <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1)
gg

3.14 Free light fractions of SOC

labs[labs$abbrev == "FLF", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit long_label                         
##   <chr>  <chr>       <chr>        <chr>                              
## 1 FLF    FLF         (g~kg^{-3})  Free ~ light ~ fractions ~ of ~ SOM
xlab <- expression("Free light fractions of SOM" ~ (g ~ kg^{-1}))
dc$x <- dc$FLF

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    na.omit()
dd
## # A tibble: 62 x 4
##    id                 prod cam       x
##    <chr>             <dbl> <fct> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 L1    16.0 
##  2 FAZARGEMIRA_AG2    3.89 L1    14.4 
##  3 FAZARGEMIRA_AG3    4.12 L1    12.1 
##  4 FAZARGEMIRA_AG5    4.25 L1    16.0 
##  5 FAZARGEMIRA_AG6    4.12 L1    12.3 
##  6 FAZARGEMIRA_AG9    4.38 L1    22.2 
##  7 FAZARGEMIRA_STI5   3.88 L1    17.9 
##  8 FAZARGEMIRA_STI6   4.14 L1    17.8 
##  9 FAZCALIFORNIA_CA4  4.03 L1    17.8 
## 10 FAZFLORIDA_F1      3.83 L1     8.92
## # … with 52 more rows
m0 <- lm(prod ~
             poly(x, degree = 2),
         data = dd)

# drop1(m0, test = "F")
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(x, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76119 -0.21022 -0.00299  0.18499  0.81979 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.11198    0.04303  95.557   <2e-16 ***
## poly(x, degree = 2)1 -0.14965    0.33883  -0.442    0.660    
## poly(x, degree = 2)2 -0.50069    0.33883  -1.478    0.145    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3388 on 59 degrees of freedom
## Multiple R-squared:  0.03875,    Adjusted R-squared:  0.006168 
## F-statistic: 1.189 on 2 and 59 DF,  p-value: 0.3116
m1 <- update(m0, . ~ x)
summary(m1)
## 
## Call:
## lm(formula = prod ~ x, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73509 -0.21106  0.00688  0.20491  0.85761 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.18679    0.17648  23.724   <2e-16 ***
## x           -0.00492    0.01125  -0.437    0.663    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3422 on 60 degrees of freedom
## Multiple R-squared:  0.003178,   Adjusted R-squared:  -0.01344 
## F-statistic: 0.1913 on 1 and 60 DF,  p-value: 0.6634
gg <- ggplot(data = dd,
       mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$FLF <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1)
gg

3.15 Ocluded light fractions of SOC

labs[labs$abbrev == "OLF", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit long_label                             
##   <chr>  <chr>       <chr>        <chr>                                  
## 1 OLF    OLF         (g~kg^{-3})  Occluded ~ light ~ fractions ~ of ~ SOM
xlab <- expression("Occluded light fractions of SOM" ~ (g ~ kg^{-1}))
dc$x <- dc$OLF

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    na.omit()
dd
## # A tibble: 62 x 4
##    id                 prod cam       x
##    <chr>             <dbl> <fct> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 L1     2.83
##  2 FAZARGEMIRA_AG2    3.89 L1     6.21
##  3 FAZARGEMIRA_AG3    4.12 L1     3.1 
##  4 FAZARGEMIRA_AG5    4.25 L1     6.86
##  5 FAZARGEMIRA_AG6    4.12 L1     8.45
##  6 FAZARGEMIRA_AG9    4.38 L1     5.5 
##  7 FAZARGEMIRA_STI5   3.88 L1     8   
##  8 FAZARGEMIRA_STI6   4.14 L1     5.5 
##  9 FAZCALIFORNIA_CA4  4.03 L1     3.84
## 10 FAZFLORIDA_F1      3.83 L1     2.87
## # … with 52 more rows
m0 <- lm(prod ~
             poly(x, degree = 2),
         data = dd)

# drop1(m0, test = "F")
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(x, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78080 -0.20732 -0.01523  0.22148  0.86174 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.11198    0.04361  94.283   <2e-16 ***
## poly(x, degree = 2)1  0.23701    0.34341   0.690    0.493    
## poly(x, degree = 2)2 -0.18060    0.34341  -0.526    0.601    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3434 on 59 degrees of freedom
## Multiple R-squared:  0.0126, Adjusted R-squared:  -0.02087 
## F-statistic: 0.3765 on 2 and 59 DF,  p-value: 0.6879
m1 <- update(m0, . ~ x)
summary(m1)
## 
## Call:
## lm(formula = prod ~ x, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82569 -0.21222  0.00839  0.22272  0.87059 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.05156    0.09722  41.676   <2e-16 ***
## x            0.01017    0.01464   0.694     0.49    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3413 on 60 degrees of freedom
## Multiple R-squared:  0.007972,   Adjusted R-squared:  -0.008562 
## F-statistic: 0.4821 on 1 and 60 DF,  p-value: 0.4901
gg <- ggplot(data = dd,
       mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$OLF <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1)
gg

3.16 Heavy fractions of SOC

labs[labs$abbrev == "HF", ]
## # A tibble: 1 x 4
##   abbrev short_label measure_unit long_label     
##   <chr>  <chr>       <chr>        <chr>          
## 1 HF     HF          (g~kg^{-3})  Heavy ~ faction
xlab <- expression("Heavy fractions of SOM" ~ (g ~ kg^{-1}))
dc$x <- dc$HF

ggplot(data = dc,
             mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(span = 1) +
    geom_hline(yintercept = mp, lty = 2)

dd <- dc %>%
    select(id, prod, cam, x) %>%
    na.omit()
dd
## # A tibble: 62 x 4
##    id                 prod cam       x
##    <chr>             <dbl> <fct> <dbl>
##  1 FAZARGEMIRA_AG10   4.01 L1    23.4 
##  2 FAZARGEMIRA_AG2    3.89 L1    24.8 
##  3 FAZARGEMIRA_AG3    4.12 L1    30.1 
##  4 FAZARGEMIRA_AG5    4.25 L1    15.8 
##  5 FAZARGEMIRA_AG6    4.12 L1    16.4 
##  6 FAZARGEMIRA_AG9    4.38 L1     8.66
##  7 FAZARGEMIRA_STI5   3.88 L1    17.6 
##  8 FAZARGEMIRA_STI6   4.14 L1    18.7 
##  9 FAZCALIFORNIA_CA4  4.03 L1     8.03
## 10 FAZFLORIDA_F1      3.83 L1    17.9 
## # … with 52 more rows
m0 <- lm(prod ~
             poly(x, degree = 2),
         data = dd)

# drop1(m0, test = "F")
summary(m0)
## 
## Call:
## lm(formula = prod ~ poly(x, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77788 -0.19746  0.00591  0.22461  0.84426 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.11198    0.04376  93.965   <2e-16 ***
## poly(x, degree = 2)1 -0.20024    0.34457  -0.581    0.563    
## poly(x, degree = 2)2  0.03857    0.34457   0.112    0.911    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3446 on 59 degrees of freedom
## Multiple R-squared:  0.005901,   Adjusted R-squared:  -0.0278 
## F-statistic: 0.1751 on 2 and 59 DF,  p-value: 0.8398
m1 <- update(m0, . ~ x)
summary(m1)
## 
## Call:
## lm(formula = prod ~ x, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77508 -0.19536  0.00588  0.22347  0.84286 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.178217   0.121078  34.508   <2e-16 ***
## x           -0.004462   0.007615  -0.586     0.56    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3417 on 60 degrees of freedom
## Multiple R-squared:  0.00569,    Adjusted R-squared:  -0.01088 
## F-statistic: 0.3434 on 1 and 60 DF,  p-value: 0.5601
gg <- ggplot(data = dd,
       mapping = aes(x = x, y = prod)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    geom_hline(yintercept = mp, lty = 2) +
    xlab(label = xlab) +
    ylab(label = ylab)

fits$HF <- list(xlab = xlab, plot = gg, m0 = m0, m1 = m1)
gg

3.17 HF vs SOM

ggplot(data = dc,
             mapping = aes(x = MO2, y = HF)) +
    geom_point() +
    geom_smooth(span = 1) +
    xlab(label = expression("Organic matter" ~ (g ~ kg^{-1}))) +
    ylab(label = expression("Heavy fractions of SOC" ~ (g ~ kg^{-1})))

dd <- dc %>%
    select(id, MO2, HF) %>%
    na.omit()
dd
## # A tibble: 62 x 3
##    id                  MO2    HF
##    <chr>             <dbl> <dbl>
##  1 FAZARGEMIRA_AG10   42.3 23.4 
##  2 FAZARGEMIRA_AG2    45.5 24.8 
##  3 FAZARGEMIRA_AG3    45.3 30.1 
##  4 FAZARGEMIRA_AG5    38.7 15.8 
##  5 FAZARGEMIRA_AG6    37.2 16.4 
##  6 FAZARGEMIRA_AG9    36.3  8.66
##  7 FAZARGEMIRA_STI5   43.6 17.6 
##  8 FAZARGEMIRA_STI6   42.1 18.7 
##  9 FAZCALIFORNIA_CA4  29.6  8.03
## 10 FAZFLORIDA_F1      29.7 17.9 
## # … with 52 more rows
m0 <- lm(HF ~
             poly(MO2, degree = 2),
         data = dd)

# drop1(m0, test = "F")
summary(m0)
## 
## Call:
## lm(formula = HF ~ poly(MO2, degree = 2), data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.6258  -2.9632   0.1453   2.9337  14.2634 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             14.8434     0.6372  23.295  < 2e-16 ***
## poly(MO2, degree = 2)1  22.4729     5.0173   4.479  3.5e-05 ***
## poly(MO2, degree = 2)2   4.8508     5.0173   0.967    0.338    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.017 on 59 degrees of freedom
## Multiple R-squared:  0.2625, Adjusted R-squared:  0.2375 
## F-statistic:  10.5 on 2 and 59 DF,  p-value: 0.0001257
m1 <- update(m0, . ~ MO2)
summary(m1)
## 
## Call:
## lm(formula = HF ~ MO2, data = dd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.7028  -3.0426   0.6447   2.8194  14.0009 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -4.8748     4.4457  -1.097    0.277    
## MO2           0.5478     0.1222   4.482 3.39e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.015 on 60 degrees of freedom
## Multiple R-squared:  0.2508, Adjusted R-squared:  0.2383 
## F-statistic: 20.08 on 1 and 60 DF,  p-value: 3.394e-05
gg <- ggplot(data = dd,
             mapping = aes(x = MO2, y = HF)) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ x, col = "black") +
    xlab(label = expression("Organic matter" ~ (g ~ kg^{-1}))) +
    ylab(label = expression("Heavy fractions of SOC" ~ (g ~ kg^{-1})))
gg

4 Table with all individual fits

# length(fits)
s <- lapply(fits,
            FUN = function(x) {
                bk <- c(0, 0.001, 0.01, 0.05, 0.1, 1)
                lb <- c("***", "**", "*", ".", "")
                mod <- tail(x, n = 1)[[1]]
                s <- summary(mod)
                s1 <- sprintf("%s = %0.4f (%0.4f)%s",
                              names(coef(mod)),
                              s$coefficient[, "Estimate"],
                              s$coefficient[, "Std. Error"],
                              cut(s$coefficient[, "Pr(>|t|)"],
                                  breaks = bk,
                                  labels = lb))
                s2 <- sprintf("MSE = %0.3f (R^2 = %0.3f)",
                              s$sigma^2,
                              s$r.squared)
                s3 <- sprintf("Fstat = %0.3f (%d/%d)%s",
                              s$fstatistic["value"],
                              s$fstatistic["numdf"],
                              s$fstatistic["dendf"],
                              cut(pf(s$fstatistic["value"],
                                     s$fstatistic["numdf"],
                                     s$fstatistic["dendf"],
                                     lower.tail = FALSE),
                                  breaks = bk,
                                  labels = lb))
                c(s1, s2, s3)
            })

c1 <- do.call(c, s)
l <- sapply(s, length)
c2 <- do.call(c,
              mapply(
                  FUN = function(x, times) {
                      c(x, rep("", times - 1))
                  },
                  x = names(l),
                  times = l))

# library(knitr)
knitr::kable(data.frame(variable = c2,
                        coefficients = c1,
                        stringsAsFactors = FALSE),
             row.names = FALSE,
             align = c("r", "l"))
variable coefficients
rp (Intercept) = 4.3989 (0.0889)***
L1 = -0.1782 (0.0524)**
MSE = 0.102 (R^2 = 0.155)
Fstat = 11.569 (1/63)**
dens (Intercept) = 4.8341 (0.3687)***
L1 = -0.5654 (0.2934).
MSE = 0.114 (R^2 = 0.056)
Fstat = 3.714 (1/63).
poro (Intercept) = 3.4550 (0.4289)***
L1 = 1.2660 (0.8028)
MSE = 0.116 (R^2 = 0.038)
Fstat = 2.487 (1/63)
dp (Intercept) = 6.2453 (1.5495)***
L1 = -0.7939 (0.5808)
MSE = 0.117 (R^2 = 0.029)
Fstat = 1.869 (1/63)
clay (Intercept) = 4.4438 (0.2258)***
L1 = -0.0005 (0.0004)
MSE = 0.116 (R^2 = 0.036)
Fstat = 2.051 (1/55)
sand (Intercept) = 4.0257 (0.0935)***
L2 = 0.0004 (0.0003)
MSE = 0.118 (R^2 = 0.027)
Fstat = 1.531 (1/55)
pH_H2O (Intercept) = 0.0189 (1.1153)
L3 = 0.7897 (0.2143)***
MSE = 0.097 (R^2 = 0.198)
Fstat = 13.585 (1/55)***
P (Intercept) = 4.2230 (0.1055)***
L1 = -0.0035 (0.0034)
MSE = 0.119 (R^2 = 0.018)
Fstat = 1.021 (1/55)
K (Intercept) = 3.7813 (0.1480)***
L1 = 0.0034 (0.0014)*
MSE = 0.109 (R^2 = 0.098)
Fstat = 5.973 (1/55)*
Ca (Intercept) = 3.8451 (0.1612)***
L3 = 0.3339 (0.1836).
MSE = 0.114 (R^2 = 0.057)
Fstat = 3.308 (1/55).
Mg (Intercept) = 3.8401 (0.1530)***
L3 = 0.8215 (0.4192).
MSE = 0.113 (R^2 = 0.065)
Fstat = 3.841 (1/55).
Al (Intercept) = 4.4998 (0.1631)***
L3 = -0.8051 (0.3391)*
MSE = 0.110 (R^2 = 0.093)
Fstat = 5.636 (1/55)*
S (Intercept) = 4.6528 (0.2066)***
L3 = -0.0273 (0.0099)**
I(L3^2) = 0.0003 (0.0001)**
MSE = 0.108 (R^2 = 0.124)
Fstat = 3.838 (2/54)*
CTC (Intercept) = 4.4236 (0.3119)***
L3 = -0.0539 (0.0560)
MSE = 0.119 (R^2 = 0.017)
Fstat = 0.925 (1/55)
V (Intercept) = 3.7635 (0.1584)***
L3 = 0.0154 (0.0064)*
MSE = 0.109 (R^2 = 0.094)
Fstat = 5.697 (1/55)*
glic (Intercept) = 4.1887 (0.1656)***
x = -0.0003 (0.0007)
MSE = 0.117 (R^2 = 0.004)
Fstat = 0.230 (1/60)
phos (Intercept) = 4.2309 (0.2023)***
x = -0.0002 (0.0003)
MSE = 0.117 (R^2 = 0.006)
Fstat = 0.362 (1/60)
sulf (Intercept) = 3.9806 (0.1184)***
x = 0.0015 (0.0012)
MSE = 0.115 (R^2 = 0.023)
Fstat = 1.418 (1/60)
MO2 (Intercept) = 4.2834 (0.3030)***
x = -0.0048 (0.0083)
MSE = 0.117 (R^2 = 0.005)
Fstat = 0.327 (1/60)
FLF (Intercept) = 4.1868 (0.1765)***
x = -0.0049 (0.0112)
MSE = 0.117 (R^2 = 0.003)
Fstat = 0.191 (1/60)
OLF (Intercept) = 4.0516 (0.0972)***
x = 0.0102 (0.0146)
MSE = 0.117 (R^2 = 0.008)
Fstat = 0.482 (1/60)
HF (Intercept) = 4.1782 (0.1211)***
x = -0.0045 (0.0076)
MSE = 0.117 (R^2 = 0.006)
Fstat = 0.343 (1/60)
length(fits)
## [1] 22
p <- lapply(fits, "[[", "plot")
p <- lapply(p, FUN = "+", theme_bw())

do.call(function(...) grid.arrange(..., ncol = 2),
        p[1:8])

do.call(function(...) grid.arrange(..., ncol = 2),
        p[9:16])

do.call(function(...) grid.arrange(..., ncol = 2),
        p[17:22])

5 Regression for the soybean yield on all variables

dd <- dc %>%
    select(id, cam, dens:V) %>%
    gather(key = "var", value = "val", dens:V) %>%
    mutate(varcam = paste(var, cam, sep = ".")) %>%
    select(id, varcam, val) %>%
    spread(key = "varcam", value = "val")

dd <- inner_join(dd,
                 da %>% select(id,
                               prod,
                               glic,
                               phos,
                               sulf,
                               MO2,
                               FLF,
                               OLF,
                               HF) %>%
                 filter(!is.na(prod)),
                 by = "id")

# Model with all variables.
m0 <- lm(prod ~ ., data = dd[, -1])
summary(m0)
## 
## Call:
## lm(formula = prod ~ ., data = dd[, -1])
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.107228 -0.019425  0.003637  0.022937  0.140030 
## 
## Coefficients: (5 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -9.647e+00  1.280e+01  -0.754  0.47956   
## Al.L1               NA         NA      NA       NA   
## Al.L2        2.231e+00  8.434e-01   2.645  0.03828 * 
## Al.L3       -1.917e+00  1.338e+00  -1.432  0.20205   
## base.L1     -1.483e+01  1.609e+01  -0.921  0.39236   
## base.L2     -4.821e+01  1.502e+01  -3.210  0.01836 * 
## base.L3      4.348e+01  1.406e+01   3.092  0.02132 * 
## Ca_Mg.L1     2.267e+01  1.679e+01   1.350  0.22565   
## Ca_Mg.L2     4.624e+01  1.492e+01   3.100  0.02112 * 
## Ca_Mg.L3    -3.535e+01  1.387e+01  -2.549  0.04357 * 
## Ca.L1       -1.069e+01  2.832e+00  -3.775  0.00923 **
## Ca.L2       -9.457e-01  2.710e+00  -0.349  0.73908   
## Ca.L3       -8.232e+00  2.928e+00  -2.812  0.03068 * 
## clay.L1      9.687e-04  1.899e-03   0.510  0.62827   
## clay.L2      6.737e-03  2.528e-03   2.665  0.03727 * 
## clay.L3     -1.651e-03  2.375e-03  -0.695  0.51303   
## CTC.L1      -1.003e-01  8.221e-01  -0.122  0.90690   
## CTC.L2       6.091e-01  4.847e-01   1.257  0.25555   
## CTC.L3      -2.555e-01  3.815e-01  -0.670  0.52798   
## dens.L1     -7.332e+00  1.430e+00  -5.126  0.00217 **
## dens.L2      4.634e+00  1.433e+00   3.234  0.01782 * 
## dens.L3     -1.704e+00  1.398e+00  -1.219  0.26859   
## K.L1         3.555e-02  4.013e-02   0.886  0.40970   
## K.L2         1.013e-01  3.832e-02   2.645  0.03831 * 
## K.L3        -7.581e-02  3.255e-02  -2.329  0.05874 . 
## Mg.L1               NA         NA      NA       NA   
## Mg.L2               NA         NA      NA       NA   
## Mg.L3               NA         NA      NA       NA   
## MO.L1        1.016e-01  3.569e-02   2.846  0.02933 * 
## MO.L2        7.358e-02  7.167e-02   1.027  0.34417   
## MO.L3       -1.066e-01  5.836e-02  -1.827  0.11749   
## P.L1        -1.065e-02  9.071e-03  -1.174  0.28496   
## P.L2         6.063e-02  1.077e-02   5.632  0.00134 **
## P.L3        -1.958e-01  6.396e-02  -3.061  0.02219 * 
## pH_CaCl2.L1  2.407e+00  9.684e-01   2.485  0.04748 * 
## pH_CaCl2.L2  2.372e+00  9.644e-01   2.460  0.04912 * 
## pH_CaCl2.L3  1.398e-01  9.966e-01   0.140  0.89300   
## pH_H2O.L1   -2.306e+00  9.893e-01  -2.331  0.05859 . 
## pH_H2O.L2   -1.661e-01  3.964e-01  -0.419  0.68987   
## pH_H2O.L3    1.777e+00  5.612e-01   3.167  0.01939 * 
## poro.L1     -1.402e+01  3.508e+00  -3.997  0.00714 **
## poro.L2      4.053e+00  4.316e+00   0.939  0.38395   
## poro.L3     -1.707e+00  2.641e+00  -0.646  0.54190   
## sand.L1      4.975e-03  2.560e-03   1.943  0.09998 . 
## sand.L2     -2.271e-03  3.954e-03  -0.574  0.58655   
## sand.L3      2.731e-03  5.105e-03   0.535  0.61194   
## V.L1        -1.981e-02  1.324e-01  -0.150  0.88593   
## V.L2         1.709e-01  4.551e-02   3.755  0.00945 **
## V.L3        -2.133e-01  7.391e-02  -2.886  0.02785 * 
## glic         5.413e-03  1.536e-03   3.524  0.01247 * 
## phos        -1.313e-03  6.139e-04  -2.138  0.07634 . 
## sulf        -3.778e-03  2.263e-03  -1.670  0.14605   
## MO2         -2.220e-02  1.831e-02  -1.212  0.27093   
## FLF         -1.509e-02  1.240e-02  -1.217  0.26920   
## OLF          3.396e-02  2.024e-02   1.678  0.14441   
## HF                  NA         NA      NA       NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1314 on 6 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.9844, Adjusted R-squared:  0.8544 
## F-statistic:  7.57 on 50 and 6 DF,  p-value: 0.008401
# Selection.
m1 <- step(m0,
           direction = "both",
           trace = FALSE,
           k = c(AIC = 2, BIC = log(nrow(dd)))[2])
summary(m1)
## 
## Call:
## lm(formula = prod ~ Al.L2 + Al.L3 + base.L1 + base.L2 + base.L3 + 
##     Ca_Mg.L1 + Ca_Mg.L2 + Ca_Mg.L3 + Ca.L1 + Ca.L3 + clay.L2 + 
##     clay.L3 + CTC.L2 + CTC.L3 + dens.L1 + dens.L2 + dens.L3 + 
##     K.L1 + K.L2 + K.L3 + MO.L1 + MO.L2 + MO.L3 + P.L1 + P.L2 + 
##     P.L3 + pH_CaCl2.L1 + pH_CaCl2.L2 + pH_H2O.L1 + pH_H2O.L3 + 
##     poro.L1 + poro.L2 + sand.L1 + V.L2 + V.L3 + glic + phos + 
##     sulf + MO2 + FLF + OLF, data = dd[, -1])
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.117682 -0.021120  0.002233  0.034184  0.131326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -9.677e+00  3.496e+00  -2.768 0.014347 *  
## Al.L2        1.866e+00  4.490e-01   4.155 0.000846 ***
## Al.L3       -1.814e+00  5.736e-01  -3.162 0.006452 ** 
## base.L1     -1.082e+01  7.449e+00  -1.453 0.166870    
## base.L2     -4.260e+01  6.667e+00  -6.390 1.22e-05 ***
## base.L3      4.026e+01  8.182e+00   4.921 0.000185 ***
## Ca_Mg.L1     1.751e+01  7.512e+00   2.331 0.034115 *  
## Ca_Mg.L2     4.012e+01  6.617e+00   6.063 2.17e-05 ***
## Ca_Mg.L3    -3.194e+01  7.822e+00  -4.083 0.000979 ***
## Ca.L1       -9.357e+00  1.152e+00  -8.120 7.16e-07 ***
## Ca.L3       -8.772e+00  1.222e+00  -7.181 3.17e-06 ***
## clay.L2      6.261e-03  1.250e-03   5.010 0.000155 ***
## clay.L3     -1.968e-03  9.640e-04  -2.042 0.059160 .  
## CTC.L2       4.813e-01  1.502e-01   3.204 0.005916 ** 
## CTC.L3      -2.624e-01  1.966e-01  -1.335 0.201935    
## dens.L1     -6.961e+00  7.955e-01  -8.750 2.81e-07 ***
## dens.L2      4.539e+00  8.948e-01   5.072 0.000138 ***
## dens.L3     -7.218e-01  4.545e-01  -1.588 0.133102    
## K.L1         2.589e-02  1.887e-02   1.373 0.190079    
## K.L2         8.800e-02  1.741e-02   5.055 0.000142 ***
## K.L3        -6.892e-02  1.961e-02  -3.514 0.003132 ** 
## MO.L1        8.692e-02  1.242e-02   6.999 4.29e-06 ***
## MO.L2        8.278e-02  2.734e-02   3.027 0.008485 ** 
## MO.L3       -8.881e-02  3.463e-02  -2.565 0.021555 *  
## P.L1        -8.027e-03  2.391e-03  -3.357 0.004326 ** 
## P.L2         5.846e-02  6.469e-03   9.037 1.86e-07 ***
## P.L3        -2.069e-01  3.437e-02  -6.021 2.34e-05 ***
## pH_CaCl2.L1  2.026e+00  3.935e-01   5.148 0.000119 ***
## pH_CaCl2.L2  1.834e+00  4.007e-01   4.577 0.000363 ***
## pH_H2O.L1   -1.979e+00  4.408e-01  -4.490 0.000432 ***
## pH_H2O.L3    1.784e+00  2.529e-01   7.054 3.91e-06 ***
## poro.L1     -1.342e+01  2.036e+00  -6.589 8.59e-06 ***
## poro.L2      4.863e+00  2.284e+00   2.129 0.050225 .  
## sand.L1      3.668e-03  8.709e-04   4.211 0.000755 ***
## V.L2         1.610e-01  2.298e-02   7.006 4.24e-06 ***
## V.L3        -1.953e-01  2.490e-02  -7.844 1.10e-06 ***
## glic         4.626e-03  6.065e-04   7.628 1.54e-06 ***
## phos        -1.084e-03  2.580e-04  -4.201 0.000771 ***
## sulf        -3.086e-03  1.089e-03  -2.834 0.012568 *  
## MO2         -2.112e-02  7.765e-03  -2.719 0.015835 *  
## FLF         -1.250e-02  7.399e-03  -1.689 0.111866    
## OLF          2.071e-02  8.267e-03   2.505 0.024289 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0932 on 15 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.9804, Adjusted R-squared:  0.9268 
## F-statistic: 18.29 on 41 and 15 DF,  p-value: 1.597e-07
# Diagnostics.
par(mfrow = c(2, 2))
plot(m0)

layout(1)

6 Regression for the soybean yield on all chemical variables

# dput(names(dc))
v <- c("id", "cam", "pH_H2O", "P", "K", "Ca", "Mg", "Al", "S", "CTC",
       "V")

dd <- dc %>%
    select(v) %>%
    gather(key = "var", value = "val", pH_H2O:V) %>%
    mutate(varcam = paste(var, cam, sep = ".")) %>%
    select(id, varcam, val) %>%
    spread(key = "varcam", value = "val")

dd <- inner_join(dd,
                 da %>% select(id, prod) %>% filter(!is.na(prod)),
                 by = "id") %>%
    na.omit()

# Model with all variables.
m0 <- lm(prod ~ ., data = dd[, -1])
summary(m0)
## 
## Call:
## lm(formula = prod ~ ., data = dd[, -1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58454 -0.18312 -0.01526  0.12617  0.48273 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -1.267e+01  9.741e+00  -1.300  0.20339   
## Al.L1               NA         NA      NA       NA   
## Al.L2       -1.028e+00  1.201e+00  -0.856  0.39861   
## Al.L3       -7.779e-01  1.321e+00  -0.589  0.56045   
## Ca.L1       -2.262e+00  1.716e+00  -1.318  0.19741   
## Ca.L2       -6.124e-01  1.006e+00  -0.609  0.54725   
## Ca.L3        7.484e-02  1.273e+00   0.059  0.95349   
## CTC.L1       6.354e-01  8.469e-01   0.750  0.45897   
## CTC.L2       3.934e-01  2.463e-01   1.597  0.12070   
## CTC.L3      -3.041e-01  2.653e-01  -1.146  0.26074   
## K.L1         2.476e-03  4.134e-03   0.599  0.55374   
## K.L2        -1.546e-02  7.147e-03  -2.163  0.03860 * 
## K.L3         1.979e-02  9.844e-03   2.010  0.05347 . 
## Mg.L1        1.118e+00  2.038e+00   0.548  0.58745   
## Mg.L2       -1.892e+00  2.120e+00  -0.893  0.37921   
## Mg.L3        3.324e+00  2.260e+00   1.471  0.15169   
## P.L1        -6.934e-03  6.053e-03  -1.145  0.26108   
## P.L2         2.885e-02  1.356e-02   2.128  0.04167 * 
## P.L3        -1.583e-01  7.773e-02  -2.036  0.05063 . 
## pH_H2O.L1    4.424e-01  8.896e-01   0.497  0.62260   
## pH_H2O.L2    3.063e-01  5.114e-01   0.599  0.55376   
## pH_H2O.L3    1.699e+00  5.353e-01   3.173  0.00347 **
## S.L1         1.099e-02  1.716e-02   0.641  0.52666   
## S.L2         6.184e-04  1.037e-02   0.060  0.95284   
## S.L3        -3.381e-03  4.557e-03  -0.742  0.46397   
## V.L1         1.063e-01  1.408e-01   0.755  0.45625   
## V.L2         3.875e-02  5.455e-02   0.710  0.48297   
## V.L3        -9.969e-02  5.396e-02  -1.847  0.07457 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3069 on 30 degrees of freedom
## Multiple R-squared:  0.5746, Adjusted R-squared:  0.2059 
## F-statistic: 1.559 on 26 and 30 DF,  p-value: 0.1208
# Selection.
m1 <- step(m0,
           direction = "both",
           trace = FALSE,
           k = c(AIC = 2, BIC = log(nrow(dd)))[2])
summary(m1)
## 
## Call:
## lm(formula = prod ~ Ca.L1 + CTC.L1 + CTC.L3 + K.L2 + K.L3 + Mg.L3 + 
##     P.L2 + P.L3 + pH_H2O.L3 + V.L1 + V.L3, data = dd[, -1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70148 -0.13944 -0.01967  0.13353  0.57833 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.415941   3.155886  -3.300 0.001894 ** 
## Ca.L1        -2.621492   0.771577  -3.398 0.001432 ** 
## CTC.L1        0.970184   0.298679   3.248 0.002198 ** 
## CTC.L3       -0.242950   0.128478  -1.891 0.065075 .  
## K.L2         -0.011190   0.004548  -2.460 0.017794 *  
## K.L3          0.018874   0.006830   2.764 0.008254 ** 
## Mg.L3         3.882773   1.685714   2.303 0.025935 *  
## P.L2          0.018563   0.009532   1.947 0.057742 .  
## P.L3         -0.137177   0.049725  -2.759 0.008360 ** 
## pH_H2O.L3     1.514886   0.375847   4.031 0.000212 ***
## V.L1          0.160442   0.048969   3.276 0.002029 ** 
## V.L3         -0.082352   0.030184  -2.728 0.009048 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2786 on 45 degrees of freedom
## Multiple R-squared:  0.4744, Adjusted R-squared:  0.3459 
## F-statistic: 3.692 on 11 and 45 DF,  p-value: 0.0008781
# Diagnostics.
par(mfrow = c(2, 2))
plot(m0)

layout(1)

7 Regression for the soybean yield on all physical variables

# dput(names(dc))
v <- c("id", "cam", "dens", "dp", "rp", "poro", "clay", "sand")
# c("id", "cam", "prod", "glic", "phos", "sulf", "MO2", "FLF", "OLF", "HF")

dd <- dc %>%
    select(v) %>%
    gather(key = "var", value = "val", dens:sand) %>%
    mutate(varcam = paste(var, cam, sep = ".")) %>%
    select(id, varcam, val) %>%
    spread(key = "varcam", value = "val")

dd <- inner_join(dd,
                 da %>% select(id, prod) %>% filter(!is.na(prod)),
                 by = "id") %>%
    na.omit()

# Model with all variables.
m0 <- lm(prod ~ ., data = dd[, -1])
summary(m0)
## 
## Call:
## lm(formula = prod ~ ., data = dd[, -1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80438 -0.17360  0.00524  0.19889  0.52911 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.3957445 15.8547667   0.719    0.477
## clay.L1      0.0002902  0.0025915   0.112    0.911
## clay.L2      0.0032758  0.0030760   1.065    0.294
## clay.L3     -0.0020571  0.0035890  -0.573    0.570
## dens.L1     -4.4087379  8.0214671  -0.550    0.586
## dens.L2     -0.7965933  6.9287392  -0.115    0.909
## dens.L3     -2.8579124  7.6420097  -0.374    0.711
## dp.L1        0.8685133  3.6600880   0.237    0.814
## dp.L2        1.7708034  3.4945543   0.507    0.615
## dp.L3        1.5299098  3.3900364   0.451    0.654
## poro.L1     -8.6615961 21.3586023  -0.406    0.687
## poro.L2     -2.6895548 19.2850659  -0.139    0.890
## poro.L3     -7.6278237 19.3240702  -0.395    0.695
## rp.L1       -0.0128018  0.0998850  -0.128    0.899
## rp.L2        0.0367647  0.1491670   0.246    0.807
## rp.L3        0.1071561  0.1635979   0.655    0.516
## sand.L1      0.0036904  0.0043981   0.839    0.407
## sand.L2     -0.0012435  0.0056102  -0.222    0.826
## sand.L3     -0.0004684  0.0052820  -0.089    0.930
## 
## Residual standard error: 0.3352 on 38 degrees of freedom
## Multiple R-squared:  0.3574, Adjusted R-squared:  0.05303 
## F-statistic: 1.174 on 18 and 38 DF,  p-value: 0.3281
# Selection.
m1 <- step(m0,
           direction = "both",
           trace = FALSE,
           k = c(AIC = 2, BIC = log(nrow(dd)))[2])
summary(m1)
## 
## Call:
## lm(formula = prod ~ clay.L2 + dens.L1 + sand.L1, data = dd[, 
##     -1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81496 -0.19240 -0.00914  0.23122  0.67983 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.9277469  1.0454322   2.801 0.007105 ** 
## clay.L2      0.0026509  0.0011464   2.312 0.024669 *  
## dens.L1     -1.1224687  0.3172092  -3.539 0.000846 ***
## sand.L1      0.0030887  0.0009812   3.148 0.002700 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3031 on 53 degrees of freedom
## Multiple R-squared:  0.2671, Adjusted R-squared:  0.2256 
## F-statistic: 6.438 on 3 and 53 DF,  p-value: 0.0008462
# Diagnostics.
par(mfrow = c(2, 2))
plot(m0)

layout(1)

8 Regression for the soybean yield on all biological variables

# dput(names(dc))
v <- c("id", "prod", "glic", "phos", "sulf", "MO2", "FLF", "OLF", "HF")

dd <- dc %>%
    select(v) %>%
    na.omit()

# Model with all variables.
m0 <- lm(prod ~ ., data = dd[, -1])
summary(m0)
## 
## Call:
## lm(formula = prod ~ ., data = dd[, -1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78921 -0.19406  0.01698  0.22569  0.73391 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.445e+00  3.150e-01  14.111   <2e-16 ***
## glic        -8.857e-05  1.005e-03  -0.088   0.9301    
## phos        -4.011e-04  4.424e-04  -0.907   0.3685    
## sulf         3.668e-03  1.699e-03   2.158   0.0353 *  
## MO2         -1.442e-02  1.251e-02  -1.153   0.2539    
## FLF          3.464e-03  1.327e-02   0.261   0.7951    
## OLF          1.728e-02  1.688e-02   1.024   0.3103    
## HF                  NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3394 on 55 degrees of freedom
## Multiple R-squared:  0.1007, Adjusted R-squared:  0.002541 
## F-statistic: 1.026 on 6 and 55 DF,  p-value: 0.4185
# Selection.
m1 <- step(m0,
           direction = "both",
           trace = FALSE,
           k = c(AIC = 2, BIC = log(nrow(dd)))[1])
summary(m1)
## 
## Call:
## lm(formula = prod ~ sulf + MO2, data = dd[, -1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69483 -0.23464  0.02453  0.22839  0.71871 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.398492   0.303913  14.473   <2e-16 ***
## sulf         0.002658   0.001457   1.825   0.0731 .  
## MO2         -0.014590   0.009790  -1.490   0.1415    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3353 on 59 degrees of freedom
## Multiple R-squared:  0.05853,    Adjusted R-squared:  0.02662 
## F-statistic: 1.834 on 2 and 59 DF,  p-value: 0.1687
# Diagnostics.
par(mfrow = c(2, 2))
plot(m0)

layout(1)
# Save the tidy dataset.
dc$x <- NULL
save(dc, file = "phis-chem.RData")