library(tidyverse)
library(ggplot2)
library(dplyr)
library(gsheet)
library(lme4)
library(cowplot)
library(readxl)
library(writexl)
library(ggdist)
Packages
Importation
= gsheet2tbl("https://docs.google.com/spreadsheets/d/1TC_-I7OpUloaqh5sd3mVu8YW-V1Lxk8SZ_DRCIUmWOE/edit?usp=sharing") gls
Descriptive analysis
Severity
= gls %>%
plot_sev ggplot(aes(sev))+
geom_histogram(color = "white", fill = "black")+
#stat_function(fun=function(x) dbeta(x, 0.94145, 6.45601), color= "darkred", size = 1.2)+
::theme_few()+
ggthemeslabs(x = "Disease severity (%)",
y = "Frequency")+
theme(text = element_text(size = 10, face = "bold"),
axis.title = element_text(size = 10, face = "bold"))
Yield
= gls %>%
yy filter(!yld <= 1500)
mean(gls$yld)
[1] 8965.05
max(gls$yld)
[1] 13073.42
min(yy$yld)
[1] 3382.11
quantile(yy$yld)
0% 25% 50% 75% 100%
3382.110 8018.915 8971.835 9844.248 13073.420
= mean(gls$yld)
mean_intercept = sd(gls$yld)
sd_intercept
= gls %>%
plot_yld filter(!yld <=1000) %>%
ggplot(aes(yld))+
geom_histogram(fill = "black", color = "white", bins = 20) + # Ajusta para densidade
scale_x_continuous(breaks = c(4000, 6000,8000,10000,12000, 14000), limits = c(4000, 14000))+
::theme_few()+
ggthemeslabs(x = "Yield (kg/ha)",
y = "")+
theme(text = element_text(size = 10, face = "bold"),
axis.title = element_text(size = 10, face = "bold"))
plot_yld
plot_grid(plot_sev, plot_yld, ncol = 2, label_x = -0.03, label_size = 14,
labels = c("(a)","(b)"))
ggsave("fig/sev_yld.png", bg = "white", dpi = 600,
width = 8, height = 4)
Prepare data
<- gls |>
estria2 group_by(trial, hybrid) |>
summarise(mean_sev = mean(sev),
mean_yld = mean(yld))
max(estria2$mean_sev)
[1] 76.66667
min(estria2$mean_sev)
[1] 1.333333
mean(estria2$mean_sev)
[1] 13.83716
median(estria2$mean_sev)
[1] 9.5
sd(estria2$mean_sev)
[1] 13.12138
max(estria2$mean_yld)
[1] 12503.86
min(estria2$mean_yld)
[1] 3909.777
mean(estria2$mean_yld)
[1] 8964.191
median(estria2$mean_yld)
[1] 8989.737
sd(estria2$mean_yld)
[1] 1359.003
Visualize
= estria2 %>%
estria2 mutate(
period = case_when(trial == "2018_1"~ "2018/1",
== "2018_2" ~"2018/2",
trial == "2018_19"~"2018/2019",
trial == "2019_1" ~"2019/1"))
trial library(ggthemes)
= estria2 |>
trials ggplot(aes(mean_sev, mean_yld, group = as.factor(period),color = as.factor(period)))+ #
geom_point()+
#facet_wrap(~period)+
geom_smooth(method = "lm", se = FALSE, size = 2)+
scale_color_viridis_d()+
theme_minimal()+
scale_y_continuous(breaks = c(4000, 5000,6000,7000,8000,9000,10000,11000,12000),
limits = c(4000, 12000))+
theme_few()+
labs(x = "Disease severity (%)",
y = "Yield (kg/ha)",
color = "Seasons")+
theme(text = element_text(size = 12, face = "bold"),
legend.position = "none")
#geom_abline(slope = -49.3, intercept = 9714, linetype = 1, linewidth =2, color = "gray50")
ggsave("fig/trials.png", bg = "white", dpi = 600,
width = 6, height = 4)
library(broom)
= estria2 %>%
lmer_stats #group_by(year) %>%
::select(trial, mean_yld,mean_sev) %>%
dplyrgroup_by(trial) %>%
do({
<- lm(.$mean_yld ~ .$mean_sev)
model <- tidy(model)
tidy_model <- confint(model) # Calcula os intervalos de confiança
confint_model bind_cols(tidy_model, confint_model)
})
= lmer_stats |>
lmer_stats filter(term %in% c("(Intercept)",".$mean_sev"))
$term== "(Intercept)",c("parameters")] <- "Intercept"
lmer_stats[lmer_stats$term== ".$mean_sev",c("parameters")] <- "Slope"
lmer_stats[lmer_stats
<- 1
i while (i <= nrow(lmer_stats)) {
if (lmer_stats$parameters[i] == "Slope" && lmer_stats$estimate[i] > 0) {
# Remove a linha do Slope e a linha do Intercept correspondente
<- lmer_stats[-c(i, i - 1), ]
lmer_stats # Atualiza o índice, pois duas linhas foram removidas
<- i - 2
i
}<- i + 1
i
} lmer_stats
= lmer_stats |>
slope_trial_mfilter(parameters == "Slope") %>%
summarise(
Slope = estimate
)
1] = NULL
slope_trial_m[,
|>
slope_trial_m filter(!Slope == "NA") |>
summarise(
mean = mean(Slope))
= lmer_stats |>
intercept_trial_m filter(parameters == "Intercept") %>%
summarise(
Intercept = estimate
)1] = NULL
intercept_trial_m[,
mean(intercept_trial_m$Intercept)
[1] 9711.667
= cbind(slope_trial_m,intercept_trial_m)
regression_trial_m 3] = NULL
regression_trial_m[,
<- regression_trial_m %>%
summary_stats reframe(
mean_intercept = mean(Intercept),
mean_slope = mean(Slope),
ci_intercept_lower = quantile(Intercept, 0.025),
ci_intercept_upper = quantile(Intercept, 0.975),
ci_slope_lower = quantile(Slope, 0.025),
ci_slope_upper = quantile(Slope, 0.975)
)
= estria2|>
trials ggplot(aes(mean_sev, y = mean_yld)) +
geom_point(color = "NA")+
scale_y_continuous(breaks = c(4000,6000,8000,10000,12000),
limits = c(4000, 12000))+
geom_abline(data =regression_trial_m, aes(slope = Slope, intercept = Intercept), size = 1,
alpha = 0.5, color = "grey")+##9ccb86
#geom_abline(data =summary_stats, aes(slope = mean_slope, intercept = mean_intercept),
# size = 1.5, fill = "black", color = "black")+
geom_abline(data = summary_stats,aes(intercept = ci_intercept_lower,slope = ci_slope_lower) ,
size = 1.5, linetype = 2, fill = "grey", color = "grey")+
geom_abline(data = summary_stats, aes(intercept = ci_intercept_upper,slope = ci_slope_upper), size = 1.5, linetype = 2, fill = "grey", color = "grey")+
geom_abline(aes(slope = -49.37, intercept = 9714.0),
size = 1.5, fill = "black", color = "black")+
geom_abline(aes(intercept = 8699.1,slope = -60.59) ,
size = .51, linetype = 2)+
geom_abline(aes(intercept = 10733.8,slope = -38.00), size = .51, linetype = 2)+
::theme_few()+
ggthemestheme(text = element_text(size = 10, face = "bold"),
axis.title = element_text(size = 10, face = "bold"),
legend.position = "none")+
labs(x = "Disease severity (%)", y = "Attainable yield (kg/ha)",
title = "")
= estria2 |>
first_plot filter(trial == "2018_1") %>%
ggplot(aes(mean_sev, mean_yld))+ #, group = trial, color = trial
geom_point()+
facet_wrap(~trial)+
geom_smooth(method = "lm", se = FALSE, size = 2, color = "black")+
scale_x_continuous(breaks = c(0, 20, 40,60,80), limits = c(0, 80))+
scale_y_continuous(breaks = c(5000, 7500, 10000,12500), limits = c(5000, 12500))+
scale_color_viridis_d()+
theme_minimal()+
theme_few()+
labs(x = "Severity (%)",
y = "Yield (kg/ha)",
color = "Trial")+
theme(text = element_text(size = 12, face = "bold"))
= estria2 |>
second_plot filter(trial == "2018_2") %>%
ggplot(aes(mean_sev, mean_yld))+ #, group = trial, color = trial
geom_point()+
facet_wrap(~trial)+
geom_smooth(method = "lm", se = FALSE, size = 2, color = "black")+
scale_x_continuous(breaks = c(0, 20, 40,60,80), limits = c(0, 80))+
scale_y_continuous(breaks = c(5000, 7500, 10000,12500), limits = c(5000, 12500))+
scale_color_viridis_d()+
theme_minimal()+
theme_few()+
labs(x = "Severity (%)",
y = "Yield (kg/ha)",
color = "Trial")+
theme(text = element_text(size = 12, face = "bold"))
= estria2 |>
third_plot filter(trial == "2018_19") %>%
ggplot(aes(mean_sev, mean_yld))+ #, group = trial, color = trial
geom_point()+
facet_wrap(~trial)+
geom_smooth(method = "lm", se = FALSE, size = 2, color = "black")+
scale_x_continuous(breaks = c(0, 20, 40,60,80), limits = c(0, 80))+
scale_y_continuous(breaks = c(5000, 7500, 10000,12500), limits = c(5000, 12500))+
scale_color_viridis_d()+
theme_minimal()+
theme_few()+
labs(x = "Severity (%)",
y = "Yield (kg/ha)",
color = "Trial")+
theme(text = element_text(size = 12, face = "bold"))
= estria2 |>
fourth_plot filter(trial == "2019_1") %>%
ggplot(aes(mean_sev, mean_yld))+ #, group = trial, color = trial
geom_point()+
facet_wrap(~trial)+
geom_smooth(method = "lm", se = FALSE, size = 2, color = "black")+
scale_x_continuous(breaks = c(0, 20, 40,60,80), limits = c(0, 80))+
scale_y_continuous(breaks = c(5000, 7500, 10000,12500), limits = c(5000, 12500))+
scale_color_viridis_d()+
theme_minimal()+
theme_few()+
labs(x = "Severity (%)",
y = "Yield (kg/ha)",
color = "Trial")+
theme(text = element_text(size = 12, face = "bold"))
library(patchwork)
<- (first_plot | plot_yld | second_plot) /
combined_plot | plot_sev | fourth_plot) +
(third_plot plot_layout(widths = c(2, 10, 2), heights = c(1, 1))
combined_plot
ggsave("fig/plot_all.png", bg = "white", width = 10, height = 8)
Modeling
Fitting
library(lme4)
# Fit a mixed-effects model
<- lmer(mean_yld ~ mean_sev + (1 | trial), data = estria2)
obs_model_lmer
# Summary of the model
summary(obs_model_lmer)
Linear mixed model fit by REML ['lmerMod']
Formula: mean_yld ~ mean_sev + (1 | trial)
Data: estria2
REML criterion at convergence: 2880.3
Scaled residuals:
Min 1Q Median 3Q Max
-3.4362 -0.7056 0.0671 0.7105 2.0247
Random effects:
Groups Name Variance Std.Dev.
trial (Intercept) 820678 905.9
Residual 941736 970.4
Number of obs: 174, groups: trial, 4
Fixed effects:
Estimate Std. Error t value
(Intercept) 9714.049 465.870 20.851
mean_sev -49.378 5.745 -8.595
Correlation of Fixed Effects:
(Intr)
mean_sev -0.168
confint(obs_model_lmer)
2.5 % 97.5 %
.sig01 426.84166 1944.19893
.sigma 873.17446 1080.29427
(Intercept) 8699.18638 10733.83869
mean_sev -60.59337 -38.00271
set.seed(1234)
# Parameters from the fitted model
<- 9714.049
fixed_intercept <- -49.378
fixed_slope <- 905.9 # sqrt(820678)
random_effect_sd <- 970.4 # sqrt(941736)
residual_sd
# Number of experiments and points per experiment
<- 200 # New experiments
num_experiments <- 50
points_per_experiment
# Simulate data
# Get the range of observed severity values from the original dataset
set.seed(123)
# Simulate data with variable max severity for each experiment
<- data.frame()
simulated_data_lmer for (exp in 1:num_experiments) {
# Simulate random intercept for the experiment
<- rnorm(1, mean = 0, sd = random_effect_sd)
u_j
# Randomly select a max severity for this simulation between 50% and 80%
<- runif(1, min = 20, max = 80)
max_sev_sim
# Simulate severity values (independent variable) within the 0 to max_sev_sim range
<- runif(points_per_experiment, min = 0, max = max_sev_sim)
sev
# Simulate residuals
<- rnorm(points_per_experiment, mean = 0, sd = residual_sd)
residuals
# Generate yield (dependent variable)
<- fixed_intercept + u_j + fixed_slope * sev + residuals
yld
# Combine into a data frame
<- data.frame(experiment = paste0("Exp_", exp), sev = sev, yld = yld)
exp_data <- rbind(simulated_data_lmer, exp_data)
simulated_data_lmer
}
$hybrid <- paste0("hybrid_", sprintf("%02d", (seq_len(nrow(simulated_data_lmer)) - 1) %/% 50 + 1))
simulated_data_lmer# View simulated data
head(simulated_data_lmer)
summary(simulated_data_lmer)
experiment sev yld hybrid
Length:10000 Min. : 0.00051 Min. : 2528 Length:10000
Class :character 1st Qu.:10.61846 1st Qu.: 7391 Class :character
Mode :character Median :21.04083 Median : 8509 Mode :character
Mean :24.67849 Mean : 8472
3rd Qu.:36.17231 3rd Qu.: 9594
Max. :78.27768 Max. :14239
library(ggplot2)
# Compare simulated and real severity-yield relationships
ggplot(simulated_data_lmer, aes(x = sev, y = yld, color = experiment)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_minimal() +
theme(legend.position = "none")+
labs(title = "Simulated Severity-Yield Data", x = "Severity", y = "Yield")
<- lmer(yld ~ sev + (1 | experiment), data = simulated_data_lmer)
simu_model_lmer simu_model_lmer
Linear mixed model fit by REML ['lmerMod']
Formula: yld ~ sev + (1 | experiment)
Data: simulated_data_lmer
REML criterion at convergence: 166620.8
Random effects:
Groups Name Std.Dev.
experiment (Intercept) 937.3
Residual 966.9
Number of obs: 10000, groups: experiment, 200
Fixed Effects:
(Intercept) sev
9691.01 -49.39
summary(simu_model_lmer)
Linear mixed model fit by REML ['lmerMod']
Formula: yld ~ sev + (1 | experiment)
Data: simulated_data_lmer
REML criterion at convergence: 166620.8
Scaled residuals:
Min 1Q Median 3Q Max
-3.6504 -0.6689 0.0041 0.6771 3.3524
Random effects:
Groups Name Variance Std.Dev.
experiment (Intercept) 878546 937.3
Residual 934819 966.9
Number of obs: 10000, groups: experiment, 200
Fixed effects:
Estimate Std. Error t value
(Intercept) 9691.0145 68.8438 140.77
sev -49.3879 0.6449 -76.59
Correlation of Fixed Effects:
(Intr)
sev -0.231
confint(simu_model_lmer)
2.5 % 97.5 %
.sig01 848.3483 1036.5979
.sigma 953.4327 980.5062
(Intercept) 9555.8113 9826.1935
sev -50.6520 -48.1241
<- fixef(obs_model_lmer) # From the original model
obs_fixed_effects_lmer <- fixef(simu_model_lmer) # From the simulated model
simu_fixed_effects_lmer
print(obs_fixed_effects_lmer)
(Intercept) mean_sev
9714.04857 -49.37773
print(simu_fixed_effects_lmer)
(Intercept) sev
9691.01451 -49.38785
<- VarCorr(obs_model_lmer)
obs_random_effects_lmer <- VarCorr(simu_model_lmer)
simu_random_effects_lmer
print(obs_random_effects_lmer)
Groups Name Std.Dev.
trial (Intercept) 905.91
Residual 970.43
print(simu_random_effects_lmer)
Groups Name Std.Dev.
experiment (Intercept) 937.31
Residual 966.86
<- attr(VarCorr(obs_model_lmer), "sc")
obs_residual_sd_lmer <- attr(VarCorr(simu_model_lmer), "sc")
simu_residual_sd_lmer
print(obs_residual_sd_lmer)
[1] 970.4308
print(simu_residual_sd_lmer)
[1] 966.8604
Plotting
library(broom)
= simulated_data_lmer %>%
lmer_stats #group_by(year) %>%
::select(experiment, yld,sev) %>%
dplyrgroup_by(experiment) %>%
do({
<- lm(.$yld ~ .$sev)
model <- tidy(model)
tidy_model <- confint(model) # Calcula os intervalos de confiança
confint_model bind_cols(tidy_model, confint_model)
})
= lmer_stats |>
lmer_stats filter(term %in% c("(Intercept)",".$sev"))
$term== "(Intercept)",c("parameters")] <- "Intercept"
lmer_stats[lmer_stats$term== ".$sev",c("parameters")] <- "Slope"
lmer_stats[lmer_stats
<- 1
i while (i <= nrow(lmer_stats)) {
if (lmer_stats$parameters[i] == "Slope" && lmer_stats$estimate[i] > 0) {
# Remove a linha do Slope e a linha do Intercept correspondente
<- lmer_stats[-c(i, i - 1), ]
lmer_stats # Atualiza o índice, pois duas linhas foram removidas
<- i - 2
i
}<- i + 1
i
} lmer_stats
= lmer_stats |>
slope_trial_mfilter(parameters == "Slope") %>%
summarise(
Slope = estimate
)
1] = NULL
slope_trial_m[,
|>
slope_trial_m filter(!Slope == "NA") |>
summarise(
mean = mean(Slope))
= lmer_stats |>
intercept_trial_m filter(parameters == "Intercept") %>%
summarise(
Intercept = estimate
)1] = NULL
intercept_trial_m[,
mean(intercept_trial_m$Intercept)
[1] 9692.539
= cbind(slope_trial_m,intercept_trial_m)
regression_trial_m 3] = NULL
regression_trial_m[,
<- regression_trial_m %>%
summary_stats reframe(
mean_intercept = mean(Intercept),
mean_slope = mean(Slope),
ci_intercept_lower = quantile(Intercept, 0.025),
ci_intercept_upper = quantile(Intercept, 0.975),
ci_slope_lower = quantile(Slope, 0.025),
ci_slope_upper = quantile(Slope, 0.975)
)
= simulated_data_lmer|>
lmer_plot ggplot(aes(sev, y = yld)) +
geom_point(color = "NA")+
scale_y_continuous(breaks = c(4000,6000,8000,10000,12000),
limits = c(4000, 12000))+
geom_abline(data =regression_trial_m, aes(slope = Slope, intercept = Intercept), size = 1,
alpha = 0.5, color = "grey")+##9ccb86
# geom_abline(data =summary_stats, aes(slope = mean_slope, intercept = mean_intercept),
# size = 1.5, fill = "black", color = "black")+
geom_abline(data = summary_stats,aes(intercept = ci_intercept_lower,slope = ci_slope_lower) ,
size = 1.5, linetype = 2, fill = "grey", color = "grey")+
geom_abline(data = summary_stats, aes(intercept = ci_intercept_upper,slope = ci_slope_upper), size = 1.5, linetype = 2, fill = "grey", color = "grey")+
geom_abline( aes(slope = -49.38, intercept = 9691.0),
size = 1.5, fill = "black", color = "black")+
geom_abline(aes(intercept = 9555.81,slope = -50.65) ,
size = .51, linetype = 2)+
geom_abline(aes(intercept = 9826.19,slope = -48.12), size = .51, linetype = 2)+
::theme_few()+
ggthemestheme(text = element_text(size = 10, face = "bold"),
axis.title = element_text(size = 10, face = "bold"),
legend.position = "none")+
labs(x = "Disease severity (%)", y = "",
title = "")
lmer_plot
Joining
plot_grid(trials,lmer_plot,plot_sev, plot_yld, labels = c("(a)","(b)","(c)","(d)"), ncol = 2,
label_size = 10, label_x = -0.01)
ggsave("fig/obs_simu_model.png", bg = "white", dpi = 600,width = 6, height = 4)
cor(estria2$mean_sev, estria2$mean_yld)
[1] -0.4051456
cor(simulated_data_lmer$sev, simulated_data_lmer$yld)
[1] -0.5508678
Performance
Empirical
<- predict(obs_model_lmer)
obs_lmer_predicted
<- estria2$mean_yld
obs_lmer_observed
<- obs_lmer_observed - obs_lmer_predicted
obs_lmer_residuals
<- sqrt(mean(obs_lmer_residuals^2))
obs_lmer_RMSE <- mean(abs(obs_lmer_residuals))
obs_lmer_MAE <- cor(obs_lmer_observed, obs_lmer_predicted)
obs_lmer_correlation
::check_normality(obs_model_lmer) performance
OK: residuals appear as normally distributed (p = 0.227).
::check_heteroscedasticity(obs_model_lmer) performance
Warning: Heteroscedasticity (non-constant error variance) detected (p < .001).
::check_autocorrelation(obs_model_lmer) performance
Warning: Autocorrelated residuals detected (p = 0.004).
Simulated
<- predict(simu_model_lmer)
simu_lmer_predicted
<- simulated_data_lmer$yld
simu_lmer_observed
<- simu_lmer_observed - simu_lmer_predicted
simu_lmer_residuals
<- sqrt(mean(simu_lmer_residuals^2))
simu_lmer_RMSE <- mean(abs(simu_lmer_residuals))
simu_lmer_MAE <- cor(simu_lmer_observed, simu_lmer_predicted)
simu_lmer_correlation
::check_normality(simu_model_lmer) performance
OK: residuals appear as normally distributed (p = 0.875).
::check_heteroscedasticity(simu_model_lmer) performance
Warning: Heteroscedasticity (non-constant error variance) detected (p < .001).
::check_autocorrelation(simu_model_lmer) performance
Warning: Autocorrelated residuals detected (p < .001).
Visualization
Simulated
# Extrair os resíduos e quantis teóricos
<- residuals(simu_model_lmer, type = "deviance")
lmer_residuals <- data.frame(
lmer_qq_data theoretical = qqnorm(lmer_residuals, plot.it = FALSE)$x*1000,
residuals = qqnorm(lmer_residuals, plot.it = FALSE)$y
)
# Plotar o QQ plot
= lmer_qq_data %>%
lmer_qq ggplot(aes(x = theoretical, y = residuals)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
scale_x_continuous(breaks = c(-3000,-2000,-1000,0,1000,2000,3000),
limits = c(-3000, 3000))+
scale_y_continuous(breaks = c(-3000,-2000,-1000,0,1000,2000,3000),
limits = c(-3000, 3000))+
labs(x = "Theoretical Quantiles", y = "") +
::theme_few()+
ggthemestheme(text = element_text(size = 12, face = "bold"))
lmer_qq
# Extrair preditores lineares e resíduos
<- predict(simu_model_lmer, type = "link")
lmer_linear_predictors <- data.frame(
lmer_residuals_data linear_predictors = lmer_linear_predictors,
residuals = lmer_residuals
)
# Plotar resíduos vs preditores lineares
= lmer_residuals_data %>%
lmer_predictors ggplot(aes(x = linear_predictors, y = residuals)) +
geom_point(alpha = 0.2, color = "grey", size = 2) +
geom_hline(yintercept = 0, color = "black", linetype = "dashed", size = 1.4) +
scale_x_continuous(breaks = c(4000,6000,8000,10000,12000),
limits = c(4000, 12000))+
scale_y_continuous(breaks = c(-3000,-2000,-1000,0,1000,2000,3000),
limits = c(-3000, 3000))+
labs(x = "Linear Predictors", y = "") +
::theme_few()+
ggthemestheme(text = element_text(size = 12, face = "bold"))
# Calcular média e desvio-padrão dos resíduos
<- mean(lmer_residuals_data$residuals)
mean_res <- sd(lmer_residuals_data$residuals)
sd_res
# Plotar o histograma e a curva acumulada normal
<- ggplot(lmer_residuals_data, aes(x = residuals)) +
lmer_res_hist geom_histogram(aes(y = ..density..), fill = "black", color = "white", bins = 20) + # Ajusta para densidade
stat_function(fun = dnorm, args = list(mean = mean_res, sd = sd_res),
color = "darkred", size = 1.2, linetype = "solid") + # Adiciona a curva normal
scale_x_continuous(breaks = c(-2500, -1250, 0, 1250, 2500),
limits = c(-2500, 2500)) +
labs(x = "Deviance Residuals", y = "") +
::theme_few() +
ggthemestheme(text = element_text(size = 12, face = "bold"))
# Exibir o gráfico
lmer_res_hist
# Extrair valores observados e preditos
<- simulated_data_lmer$yld
lmer_observed <- predict(simu_model_lmer, type = "response")
lmer_predicted <- data.frame(
lmer_prediction_data observed = lmer_observed,
predicted = lmer_predicted
)
# Plotar valores preditos vs observados
= lmer_prediction_data %>%
lmer_pd_ob ggplot(aes(x = observed, y = predicted)) +
geom_point(alpha = 0.5, color = "grey", size = 2) +
geom_abline(intercept = 0, slope = 1, color = "black", linetype = "dashed", size = 1.4) +
scale_x_continuous(breaks = c(4000,6000,8000,10000,12000),
limits = c(4000, 12000))+
scale_y_continuous(breaks = c(4000,6000,8000,10000,12000),
limits = c(4000, 12000))+
labs(x = "Observed", y = "") +
::theme_few()+
ggthemestheme(text = element_text(size = 12, face = "bold"))
lmer_pd_ob
max(lmer_prediction_data$predicted)
[1] 12122.53
Observed
# Extrair os resíduos e quantis teóricos
<- residuals(obs_model_lmer, type = "deviance")
obs_lmer_residuals <- data.frame(
obs_lmer_qq_data theoretical = qqnorm(obs_lmer_residuals, plot.it = FALSE)$x*1000,
residuals = qqnorm(obs_lmer_residuals, plot.it = FALSE)$y
)
# Plotar o QQ plot
= obs_lmer_qq_data %>%
obs_lmer_qq filter(!residuals < -2000) %>%
ggplot(aes(x = theoretical, y = residuals)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
scale_x_continuous(breaks = c(-3000,-2000,-1000,0,1000,2000,3000),
limits = c(-3000, 3000))+
scale_y_continuous(breaks = c(-3000,-2000,-1000,0,1000,2000,3000),
limits = c(-3000, 3000))+
labs(x = "Theoretical Quantiles", y = "Dev. Residuals") +
::theme_few()+
ggthemestheme(text = element_text(size = 12, face = "bold"))
obs_lmer_qq
# Extrair preditores lineares e resíduos
<- predict(obs_model_lmer, type = "link")
obs_lmer_linear_predictors <- data.frame(
obs_lmer_residuals_data linear_predictors = obs_lmer_linear_predictors,
residuals = obs_lmer_residuals
)
# Plotar resíduos vs preditores lineares
= obs_lmer_residuals_data %>%
obs_lmer_predictors filter(!residuals < -2000) %>%
ggplot(aes(x = linear_predictors, y = residuals)) +
geom_point(alpha = 0.2, color = "grey", size = 2) +
geom_hline(yintercept = 0, color = "black", linetype = "dashed", size = 1.4) +
scale_x_continuous(breaks = c(4000,6000,8000,10000,12000),
limits = c(4000, 12000))+
scale_y_continuous(breaks = c(-3000,-2000,-1000,0,1000,2000,3000),
limits = c(-3000, 3000))+
labs(x = "Linear Predictors", y = "Dev. Residuals") +
::theme_few()+
ggthemestheme(text = element_text(size = 12, face = "bold"))
# Calcular média e desvio-padrão dos resíduos
<- mean(obs_lmer_residuals_data$residuals)
mean_res <- sd(obs_lmer_residuals_data$residuals)
sd_res
# Plotar o histograma e a curva acumulada normal
<- ggplot(obs_lmer_residuals_data, aes(x = residuals)) +
obs_lmer_res_hist geom_histogram(aes(y = ..density..), fill = "black", color = "white", bins = 20) + # Ajusta para densidade
stat_function(fun = dnorm, args = list(mean = mean_res, sd = sd_res),
color = "darkred", size = 1.2, linetype = "solid") + # Adiciona a curva normal
scale_x_continuous(breaks = c(-2500, -1250, 0, 1250, 2500),
limits = c(-2500, 2500)) +
labs(x = "Deviance Residuals", y = "Frequency") +
::theme_few() +
ggthemestheme(text = element_text(size = 12, face = "bold"))
# Exibir o gráfico
obs_lmer_res_hist
# Extrair valores observados e preditos
<- estria2$mean_yld
obs_lmer_observed <- predict(obs_model_lmer, type = "response")
obs_lmer_predicted <- data.frame(
obs_lmer_prediction_data observed = obs_lmer_observed,
predicted = obs_lmer_predicted
)
# Plotar valores preditos vs observados
= obs_lmer_prediction_data %>%
obs_lmer_pd_ob ggplot(aes(x = observed, y = predicted)) +
geom_point(alpha = 0.5, color = "grey", size = 2) +
geom_abline(intercept = 0, slope = 1, color = "black", linetype = "dashed", size = 1.4) +
scale_x_continuous(breaks = c(4000,6000,8000,10000,12000),
limits = c(4000, 12000))+
scale_y_continuous(breaks = c(4000,6000,8000,10000,12000),
limits = c(4000, 12000))+
labs(x = "Observed", y = "Predicted") +
::theme_few()+
ggthemestheme(text = element_text(size = 12, face = "bold"))
obs_lmer_pd_ob
Plotting
#plot_grid(obs_lmer_pd_ob,lmer_pd_ob,
# obs_lmer_qq,lmer_qq,
# obs_lmer_predictors,lmer_predictors,
# obs_lmer_res_hist,lmer_res_hist, ncol = 2, labels = c("a"),
# label_x = -0.01,label_y = 0.01)
| lmer_pd_ob) /
(obs_lmer_pd_ob | lmer_qq) /
(obs_lmer_qq | lmer_predictors) /
(obs_lmer_predictors | lmer_res_hist) +
(obs_lmer_res_hist plot_annotation(tag_levels = "a", tag_prefix = "(", tag_suffix = ")") &
theme(plot.tag = element_text(face = "bold", size = 14))
ggsave("fig/lmm_residue.png", dpi = 600, bg = "white", height = 6, width = 8)
Power analysis of model by simulation
Severity
library(simr)
library(ggplot2)
library(dplyr)
# Ajuste do modelo original
<- lmer(mean_yld ~ mean_sev + (1 | trial), data = estria2)
sev_lmer
# Criar um modelo extendido para simulação
<- extend(sev_lmer, along = "mean_sev", n = 10000)
ext_sev_lmer
# Rodar várias simulações do powerCurve e armazenar os resultados
<- 50 # Número de simulações individuais
num_simulations <- list()
power_results
for (i in 1:num_simulations) {
<- as.data.frame(summary(powerCurve(ext_sev_lmer,
power_results[[i]] along = "mean_sev",
nsim = 100,
breaks = 1:100)))
}
<- bind_rows(power_results, .id = "simulation")
power_df
#write_xlsx(power_df, "data/power_df.xlsx")
= read_xlsx("data/power_df.xlsx")
power_df
<- power_df %>%
power_df2 group_by(nlevels) %>%
summarise(mean = mean(mean, na.rm = TRUE),
lower = mean(lower, na.rm = TRUE),
upper = mean(upper, na.rm = TRUE))
%>%
power_df filter(mean >= 0.80 & mean <=0.85)
Plotting
ggplot() +
# Linhas cinzas para cada simulação individual
geom_line(data = power_df, aes(x = nlevels, y = mean * 100, group = simulation),
color = "gray", alpha = 0.4) +
# Linha da média (laranja)
geom_line(data = power_df2, aes(x = nlevels, y = mean * 100),
color = "black", size = 1.2) +
# Linhas superior e inferior (preto, tracejado)
geom_line(data = power_df2, aes(x = nlevels, y = upper * 100),
color = "black", linetype = "dashed", size = 0.8) +
geom_line(data = power_df2, aes(x = nlevels, y = lower * 100),
color = "black", linetype = "dashed", size = 0.8)+
scale_x_continuous(breaks = c(0,5,10,15,20,25,30,35,40,45,50), limits = c(0, 50))+
scale_y_continuous(breaks = c(0,10,20,30,40,50,
60,70,80,90,100), limits = c(0, 100))+
geom_hline(yintercept = 80, linetype = "dashed", size = 1, color = "darkblue")+
geom_vline(xintercept = c(25), linetype = 2, size = 1, color = "darkblue")+
coord_cartesian(xlim = c(0,50))+
::theme_few()+
ggthemestheme(text = element_text(size = 12, face = "bold"),
legend.position = "none")+
labs(x = "Disease severity (%)",
y = "Power (%)")
ggsave("fig/power_model.png", bg = "white", dpi = 600,width = 6, height = 4)
Damage
Economic losses
= data.frame(sev = seq(20,100, by = 10))
sev = data.frame(yld = seq(4000,12000,by = 500))
yield = data.frame(price = seq(100,300, by = 100))
price
<- expand.grid(sev = sev$sev, yld = yield$yld, price = price$price)
combined_data
= combined_data %>%
ylmer_simu_min mutate(loss =((((((49.3/9691.0)*sev)*100)*yld)/100)/1000)*price)
<- c(
custom_labels "100" = "100 USD/ton",
"200" = "200 USD/ton",
"300" = "300 USD/ton"
)
=ylmer_simu_min %>%
heat_loss filter(price == "200") %>%
ggplot(aes(sev,yld, fill = loss)) +
geom_tile(color = "white", size = 0.8) +
scale_x_continuous(breaks = seq(20, 100, by = 10)) +
scale_y_continuous(breaks = seq(4000, 12000, by = 500)) +
scale_fill_viridis_b(option = "E",
guide = guide_colorbar(barwidth = 0.3, barheight = 15),
breaks = seq(0, 2000, by = 300))+
geom_text(aes(label = as.integer(loss)),
size = 3, colour = "white")+
#facet_wrap(~price, labeller = as_labeller(custom_labels))+
theme_minimal_grid(font_size = 14) +
labs(
y = "Attainable yield (kg/ha)",
x = "Disease severity (%)",
fill = "L (USD/ha)"
+
)theme(text = element_text(size = 14),
axis.title = element_text(size = 20, face = "bold"),
strip.text = element_text(size= 14, vjust = -1),
#axis.text.y = element_text(hjust = -3),
axis.text.x = element_text(vjust = 3),
legend.position = "right",
legend.justification = 0.5,
panel.grid = element_blank())
= ylmer_simu_min |>
hist_loss filter(price == "200") %>%
ggplot(aes(loss))+
geom_histogram(color = "white", fill= "#1c1c3c", bins = 15)+
facet_wrap(~price)+
::theme_few() +
ggthemesscale_x_continuous(breaks = seq(0,2100, by = 300)) +
labs(
y = "Frequency",
x = "Economic losses (USD/ha)")+
theme(
text = element_text(face = "bold", size = 14),
axis.title = element_text(size = 20, face = "bold"),
axis.text.x = element_text(vjust = 1, size = 14, face = "bold"),
axis.text.y = element_text(vjust = 1, size = 14, face = "bold"),
legend.position = "none",
legend.justification = 0.5,
panel.grid = element_blank(),
strip.text = element_blank()
)
= ylmer_simu_min |>
loss_200 filter(price == "200")
median(loss_200$loss)
[1] 447.6731
= ylmer_simu_min %>%
hist_loss filter(price == "200") %>%
ggplot(aes(x = loss)) +
stat_halfeye(fill = "#ffc425", alpha = 0.7)+
geom_vline(aes(xintercept = 447.6731), color = "#1c1c3c", linetype = "dashed", size = 2) +
::theme_few() +
ggthemes#scale_x_continuous(breaks = seq(0,2100, by = 100)) +
scale_x_continuous(breaks=c(50,150,250,350,450,550,
650,750,850,950,1050,
1150,1250,1350,1450,1550,1650,1750,1850,1950,
2150))+
labs(
y = "Density",
x = "Economic losses (USD/ha)")+
theme(
text = element_text(face = "bold", size = 14),
axis.title = element_text(size = 20, face = "bold"),
axis.text.x = element_text(vjust = 1, size = 14, face = "bold"),
axis.text.y = element_text(vjust = 1, size = 14, face = "bold"),
legend.position = "none",
legend.justification = 0.5,
panel.grid = element_blank(),
strip.text = element_blank()
)
# Below 25%
= data.frame(sev = seq(5,25, by = 5))
sev_loss_b25
<- expand.grid(sev = sev_loss_b25$sev, yld = yield$yld)
combined_data_b25
= ((((((49.3/9691.0)*25)*combined_data_b25$sev)*combined_data_b25$yld)/100)/1000)*200
loss_25
mean(loss_25)
[1] 30.52317
min(loss_25)
[1] 5.087194
max(loss_25)
[1] 76.30791
# Above 25%
= data.frame(sev = seq(25,100, by = 5))
sev_loss_a25
<- expand.grid(sev = sev_loss_a25$sev, yld = yield$yld)
combined_data_a25
= ((((((49.3/9691.0)*combined_data_a25$sev)*combined_data_a25$sev)*combined_data_a25$yld)/100)/1000)*200
loss_25
mean(loss_25)
[1] 361.1908
min(loss_25)
[1] 25.43597
max(loss_25)
[1] 1220.927
+heat_loss) +
(hist_lossplot_annotation(tag_levels = "a", tag_prefix = "(", tag_suffix = ")")&
theme(plot.tag = element_text(face = "bold", size = 24))
ggsave("fig/loss.png", bg = "white",
dpi = 600,height =8, width = 16)
Relative yield
= lmer_stats %>%
lmer_slopefilter(parameters == "Slope")
library(minpack.lm)
=environment(ecdf(-lmer_slope$estimate))$y
Fx = environment(ecdf(-lmer_slope$estimate))$x
x
= nlsLM(Fx ~ pgamma(x, shape, rate,log = FALSE) ,
slope_reg start = c(shape = 2.5, rate = 0.13),
control = nls.lm.control(maxiter = 1024))
summary(slope_reg)
Formula: Fx ~ pgamma(x, shape, rate, log = FALSE)
Parameters:
Estimate Std. Error t value Pr(>|t|)
shape 29.25149 0.62746 46.62 <2e-16 ***
rate 0.58169 0.01255 46.37 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.02492 on 198 degrees of freedom
Number of iterations to convergence: 12
Achieved convergence tolerance: 1.49e-08
= summary(slope_reg)$coef[1]
shape_res = summary(slope_reg)$coef[2]
rate_res ks.test(Fx, pgamma(x, shape_res, rate_res))
Asymptotic two-sample Kolmogorov-Smirnov test
data: Fx and pgamma(x, shape_res, rate_res)
D = 0.08, p-value = 0.5441
alternative hypothesis: two-sided
= lmer_slope %>%
slope_plot ggplot(aes(x = estimate)) +
geom_histogram(aes(y = ..density..), fill = "black", color = "white", bins = 20) + # Ajusta para densidade
stat_function(fun=function(x) dgamma(-x, shape_res, rate_res), size = 1.2, color = "darkblue")+
::theme_few()+
ggthemes#facet_wrap(~approach)+
labs(x = "Slope (kg/p.p)",
y = "Frequency")+
theme(text = element_text(size = 10, face = "bold"))
= lmer_stats %>%
lmer_interceptfilter(parameters == "Intercept")
shapiro.test(lmer_slope$estimate)
Shapiro-Wilk normality test
data: lmer_slope$estimate
W = 0.98551, p-value = 0.03812
= mean(lmer_intercept$estimate)
mean_intercept = sd(lmer_intercept$estimate)
sd_intercept
<- lmer_intercept %>%
intercept_plot ggplot(aes(x = estimate)) +
geom_histogram(aes(y = ..density..), fill = "black", color = "white", bins = 20) + # Ajusta para densidade
stat_function(fun = dnorm, args = list(mean = mean_intercept, sd = sd_intercept),
color = "darkblue", size = 1.2, linetype = "solid")+
::theme_few()+
ggthemes#facet_wrap(~approach)+
labs(x = "Intercept (kg/ha)",
y = "Frequency")+
theme(text = element_text(size = 10, face = "bold"))
set.seed(1)
<- rnorm(100, mean = 0, sd = random_effect_sd)
u_j
#mean_uj = mean(u_j)
<- expand.grid(sev = seq(0, 100, by = 1), rep = 1:100)
df $yield = 9691.0 - 49.38*df$sev - rep(u_j, each = 101)
df$relative <- df$yield *100 / 9691.0
df
= df %>%
df2 group_by(sev) %>%
summarise(mean = mean(relative),
up_95 = quantile(relative, 0.975),
low_95 = quantile(relative, 0.025))
= ggplot() +
relative_plot geom_line(data = df, aes(x = sev, y = relative, group = rep),
color = "grey", alpha = 0.4) + # Linhas cinzas para cada simulação
geom_line(data = df2, aes(x = sev, y = mean),
color = "black", size = 1.4) + # Linha média
geom_line(data = df2, aes(x = sev, y = up_95),
color = "black", linetype = "dashed",size = 1) + # IC superior
geom_line(data = df2, aes(x = sev, y = low_95),
color = "black", linetype = "dashed",size = 1) + # IC inferior
scale_y_continuous(breaks = c(20,30, 40,50, 60,
70,80,90,100),
limits = c(20, 100),
expand = c(0, 2))+
scale_x_continuous(breaks = c(0,10,20,30,40,50,60,70,80,90, 100),
limits = c(0, 100),
expand = c(0, 2))+
coord_cartesian(xlim = c(0,100), ylim = c(20,100))+
labs(x = "Disease severity (%)", y = "Relative yield (%) ")+
geom_hline(yintercept = 87,
linetype = 2, color = "darkblue", size = 1)+
geom_vline(xintercept = c(25), linetype = 2, color = "darkblue", size = 1)+
::theme_few()+
ggthemestheme(text = element_text(size = 10, face = "bold"),
axis.text.x = element_text(size = 10, face = "bold"),
axis.text.y = element_text(size = 10, face = "bold"))
library(patchwork)
#plot_grid(slope_plot / intercept_plot | relative_plot, ncol = 1,
# label_x = -0.02, label_size = 12)
/ intercept_plot |relative_plot) +
(slope_plot plot_annotation(tag_levels = "a", tag_prefix = "(", tag_suffix = ")")&
theme(plot.tag = element_text(face = "bold", size = 12))
ggsave("fig/relative_parameters.png", bg = "white", dpi = 600,
height = 4, width = 6)
Break-even probability
Framework simulation
NORTA function
<-function(tamanho.amostra,correlacao,m1,m2,sigma1,sigma2)
gera.norm.bid.geral
{<-correlacao
ro<-tamanho.amostra
n<-matrix(0,n,2)
xfor (i in 1:n)
1]<-rnorm(1,m1,sigma1)
{x[i,2]<-rnorm(1,m2+ro*sigma1/sigma2*(x[i,1]-m1),sigma2*(sqrt(1-ro^2)))
x[i,
}return(x)
}
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 2949079 157.5 4708747 251.5 4708747 251.5
Vcells 5853168 44.7 64697311 493.7 80871135 617.0
%>%
estria2 ggplot(aes(mean_sev))+
geom_histogram(color= "white", fill = "black")+
::theme_few() ggthemes
Severity
library(minpack.lm)
= simulated_data_lmer$sev
sev #sev = estria2$mean_sev
= environment(ecdf(sev))$y
Fx_sev = environment(ecdf(sev))$x/100
x_sev
summary(nlsLM(Fx_sev ~ pbeta(x_sev, shape1, shape2, log = FALSE) ,
start = c(shape1 = 1, shape2 = 1),
control = nls.lm.control(maxiter = 100000)))
Formula: Fx_sev ~ pbeta(x_sev, shape1, shape2, log = FALSE)
Parameters:
Estimate Std. Error t value Pr(>|t|)
shape1 1.2041125 0.0005694 2115 <2e-16 ***
shape2 3.6414300 0.0020387 1786 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.004923 on 9998 degrees of freedom
Number of iterations to convergence: 7
Achieved convergence tolerance: 1.49e-08
ks.test(Fx_sev,pbeta(x_sev,1.2041125,3.6414300))
Asymptotic two-sample Kolmogorov-Smirnov test
data: Fx_sev and pbeta(x_sev, 1.2041125, 3.64143)
D = 0.0111, p-value = 0.5689
alternative hypothesis: two-sided
plot(x_sev,Fx_sev)
curve(pbeta(x, 1.2041125,3.6414300),0,1, add = T)
Intercept
= regression_trial_m
intercept_bls 1] = NULL
intercept_bls[,
= mean(intercept_bls$Intercept)
mean_intercept = sd(intercept_bls$Intercept)
sd_intercept
plot(ecdf(intercept_bls$Intercept))
curve(pnorm(x, mean_intercept,sd_intercept), add = T, col = "red")
= environment(ecdf(intercept_bls$Intercept))$y
Fx_res_b0 = environment(ecdf(intercept_bls$Intercept))$x
x_res_b0ks.test(Fx_res_b0, pnorm(x_res_b0, mean(intercept_bls$Intercept), sd(intercept_bls$Intercept)))
Asymptotic two-sample Kolmogorov-Smirnov test
data: Fx_res_b0 and pnorm(x_res_b0, mean(intercept_bls$Intercept), sd(intercept_bls$Intercept))
D = 0.045, p-value = 0.9874
alternative hypothesis: two-sided
Slope
= regression_trial_m
slope_bls 2] = NULL
slope_bls[,
= mean(slope_bls$Slope)
mean_slope = sd(slope_bls$Slope)
sd_slope
plot(ecdf(slope_bls$Slope))
curve(pnorm(x, mean_slope, sd_slope), add = T, col = "red")
library(minpack.lm)
=environment(ecdf(-slope_bls$Slope))$y
Fx = environment(ecdf(-slope_bls$Slope))$x
x
= nlsLM(Fx ~ pgamma(x, shape, rate,log = FALSE) ,
slope_reg start = c(shape = 2.5, rate = 0.13),
control = nls.lm.control(maxiter = 1024))
summary(slope_reg)
Formula: Fx ~ pgamma(x, shape, rate, log = FALSE)
Parameters:
Estimate Std. Error t value Pr(>|t|)
shape 29.25149 0.62746 46.62 <2e-16 ***
rate 0.58169 0.01255 46.37 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.02492 on 198 degrees of freedom
Number of iterations to convergence: 12
Achieved convergence tolerance: 1.49e-08
= summary(slope_reg)$coef[1]
shape = summary(slope_reg)$coef[2]
rate
=environment(ecdf(-slope_bls$Slope))$y
Fx = environment(ecdf(-slope_bls$Slope))$x
x ks.test(Fx, pgamma(x, shape, rate))
Asymptotic two-sample Kolmogorov-Smirnov test
data: Fx and pgamma(x, shape, rate)
D = 0.08, p-value = 0.5441
alternative hypothesis: two-sided
cor(regression_trial_m$Slope,regression_trial_m$Intercept)
[1] -0.1547164
<-gera.norm.bid.geral(10000,0.15,0,0,1,1)
j_res
plot(j_res[,1],j_res[,2])
Corn price
= gsheet2tbl("https://docs.google.com/spreadsheets/d/1LcLVKb6bW7tVaiu6reLsT8sjFR1hwUcembZ7uxAoBP8/edit?usp=sharing")
corn
= corn %>%
corn filter(state %in% c("PR")) %>%
filter(year <= 2019)
= corn %>%
bls_price #filter(year <= 2019) %>%
mutate(price = (price/60)/4)
bls_price
min(bls_price$price)
[1] 0.07679167
mean(bls_price$price)
[1] 0.1153426
mean(bls_price$price)
[1] 0.1153426
median(bls_price$price)
[1] 0.12
sd(bls_price$price)
[1] 0.0198195
%>%
bls_price ggplot(aes(price))+
geom_histogram(fill = "steelblue", color = "white", bins = 10)+
::theme_few()+
ggthemeslabs(x = "Soybean prince",
y = "Frequency")+
scale_x_continuous(breaks = seq(0,1,by=0.025))+
theme(text = element_text(size = 12, face = "bold"),
axis.title = element_text(size = 14, face = "bold"),
legend.position = "none")
hist((bls_price$price), prob = T)
curve(dnorm(x, mean(bls_price$price), sd(bls_price$price)),0.15,0.35, add = T)
plot(ecdf(bls_price$price))
curve(pnorm(x, mean(bls_price$price), sd(bls_price$price)),0.2,0.35, add = T)
mean(bls_price$price)
[1] 0.1153426
median(bls_price$price)
[1] 0.12
shapiro.test(bls_price$price)
Shapiro-Wilk normality test
data: bls_price$price
W = 0.94849, p-value = 0.0937
Kolmogorov-Smirnov Test
= environment(ecdf(bls_price$price))$y
Fx = environment(ecdf(bls_price$price))$x
xks.test(Fx, pnorm(x, mean(bls_price$price), sd(bls_price$price)))
Exact two-sample Kolmogorov-Smirnov test
data: Fx and pnorm(x, mean(bls_price$price), sd(bls_price$price))
D = 0.13889, p-value = 0.8849
alternative hypothesis: two-sided
plot(Fx, pnorm(x, mean(bls_price$price), sd(bls_price$price)))
Vizualization
= bls_price %>%
price_plot ggplot(aes(price))+
geom_histogram(aes(y = ..density..),bins = 10, color = "white", fill = "black")+ #"#1C8C20"
#stat_function(fun=function(x) dnorm(x, mean(sbr_price$price), sd(sbr_price$price)), color= "black", size = 1.2)+
::theme_few()+
ggthemeslabs(x="Soybean price (USD/kg)", y = "Frequency")+
scale_x_continuous(breaks = seq(0,1,by=0.025))+
theme(text = element_text(size = 12, face = "bold"),
axis.title = element_text(size = 14, face = "bold"),
legend.position = "none")
price_plot
ggsave("fig/soybean_price.png", bg = "white",
dpi = 600, height = 5, width = 6)
Simulation
set.seed(1)
=40000
n= seq(0,1, by=0.05)
lambda = seq(-10, 260, by=15)
fun_price = 1
n_aplication = 10
operational_cost
= as.matrix(data.table::CJ(lambda,fun_price))
comb_matrix colnames(comb_matrix) = c("lambda","fun_price")
= cbind(comb_matrix,operational_cost, n_aplication)
comb_matrix = comb_matrix[,"n_aplication"]*(comb_matrix[,"operational_cost"]+comb_matrix[,"fun_price"] )
C = cbind(comb_matrix,C)
comb_matrix
= length(comb_matrix[,1])*n
N = matrix(0, ncol = 12, nrow =N)
big_one 1] = rep(comb_matrix[,1],n)
big_one[,2] = rep(comb_matrix[,2],n)
big_one[,3] = rep(comb_matrix[,3],n)
big_one[,4] = rep(comb_matrix[,4],n)
big_one[,5] = rep(comb_matrix[,5],n)
big_one[,
set.seed(1)
= rbeta(N, 1.2041125,3.6414300)
sn = sn*(1-big_one[,1])
sf
# simulating the coeficientes
set.seed(1)
<-gera.norm.bid.geral(N,0.15,0,0,1,1)
normal_correlated= pnorm(normal_correlated[,2])
b0_n = pnorm(normal_correlated[,1])
b1_n = qnorm(b0_n, mean_intercept,sd_intercept)
b0 = -qgamma(b1_n, shape, rate,)
b1 rm(b0_n,b1_n,normal_correlated)
# Calculating the alha coeficient
= (b1/b0)*100
alfa
# Calculating yield gain
## Moderate Resistant (MR)
= b0 - (-alfa*b0*sn)
yn = b0 - (-alfa*b0*sf)
yf
# Simulating soybean price
set.seed(1)
= rnorm(N, mean(bls_price$price),sd(bls_price$price))
soy_price
#income = yield_gain*soy_price # calculating the income
6] = yn
big_one[,7] = yf
big_one[,8] = b1
big_one[,9] = b0
big_one[,10] = sn
big_one[,11] = sf
big_one[,12] = soy_price
big_one[,colnames(big_one) = c("lambda","fun_price","operational_cost","n_aplication","C","yn","yf",
"b1","b0","sn","sf","soy_price")
= as.data.frame(big_one) %>%
big_one_df filter(b0>=0) %>%
filter(yn>0) %>%
filter(alfa > -3 & alfa < 0) %>%
mutate(yield_gain = yf-yn,
yield_gain_perc = ((yf/yn)-1)*100,
income = yield_gain*soy_price,#0.2
CP = C/soy_price,#0.2
profit = (income>=C)*1) %>%
filter(C <= 180) %>%
filter(lambda >= 0.4) %>%
filter(lambda <= 0.8)
gc()
#write_csv(big_one_df,"data/big_one_df.csv")
= read_csv("data/big_one_df.csv") big_one_df
= big_one_df %>%
sn ggplot(aes(sn))+
geom_histogram(color = "white", fill = "steelblue", bins = 10, alpha= .5)+
theme(text = element_text(face = "bold", size = 14),
axis.title = element_text(face = "bold",size = 16))+
labs(y = "Frequency",
x = "BLS severity (%)")+
::theme_few()
ggthemes
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 2957877 158.0 4708747 251.5 4708747 251.5
Vcells 85476209 652.2 125675078 958.9 85646906 653.5
= big_one_df %>%
b0_graphic ggplot(aes(b0))+
geom_histogram(color = "white", fill = "steelblue", bins = 10, alpha= .5)+
#scale_x_continuous(breaks = c(0,1000,2000,3000,4000,5000,6000,7000), limits = c(0, 7000))+
theme(text = element_text(face = "bold", size = 14),
axis.title = element_text(size = 16, face = "bold"))+
labs(y = "Frequency",
x = "Intercept (kg/ha)")+
::theme_few()
ggthemes
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 2958594 158.1 4708747 251.5 4708747 251.5
Vcells 85478145 652.2 125675078 958.9 85646906 653.5
= big_one_df %>%
b1_graphic ggplot(aes(b1))+
geom_histogram(color = "white", fill = "steelblue", bins = 10, alpha= .5)+
#scale_x_continuous(breaks = c(-100,-90,-80,-70,-60,-50,-40,-30,-20,-10,0), limits = c(-100,0))+
theme(text = element_text(face = "bold", size = 14),
axis.title = element_text(size = 16, face = "bold"))+
labs(y = "Frequency",
x = "Slope (kg/p.p.)")+
::theme_few()
ggthemes
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 2959325 158.1 4708747 251.5 4708747 251.5
Vcells 85480112 652.2 125675078 958.9 85646906 653.5
b1_graphic
= big_one_df %>%
alpha_graphic filter(C <= 180) %>%
filter(lambda >= 0.4) %>%
filter(lambda <= 0.8) %>%
filter(C >= 60) %>%
ggplot(aes(alfa))+
geom_histogram(color = "white", fill = "black", bins = 10)+
#scale_x_continuous(breaks = c(-2.0,-1.75,-1.50,-1.25,-1.00,-0.75,-0.50,-0.25,0.0), limits = c(-2.0,0.0))+
theme(text = element_text(face = "bold", size = 14),
axis.title = element_text(size = 16, face = "bold"))+
labs(y = "Frequency",
x = "Yield loss (%/p.p.)")+
::theme_few()
ggthemes
gc()
alpha_graphic
#alpha_res_graphic,alpha_m_sus_graphic,alpha_sus_graphic,
plot_grid(sn_res_graphic,b0_res_graphic,b1_res_graphic, ncol = 3, labels = c("(a)", "(b)", "(c)"),label_x = -0.03)
ggsave("fig/simulated_variables.png", dpi = 1000, bg = "white",
height = 8, width = 12)
<- big_one_df %>%
heat filter(C <= 180) %>%
filter(lambda >= 0.4) %>%
filter(lambda <= 0.8) %>%
filter(C >= 15) %>%
group_by(lambda, C) %>%
summarise(n = n(), sumn = sum(profit), prob = sumn / n)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 2959523 158.1 4708747 251.5 4708747 251.5
Vcells 85481175 652.2 261029279 1991.5 261024076 1991.5
= heat %>%
heat_graphic ggplot(aes(as.factor(lambda * 100), as.factor(C), fill = prob)) +
geom_tile(size = 0.5, color = "white") +
scale_fill_viridis_b(option = "D", direction = -1) +
scale_color_manual(values = c("#E60E00", "#55E344")) +
guides(color = guide_legend(override.aes = list(size = 2))) +
labs(x = "Fungicide efficacy (%)",
y = "Fungicide + Application cost ($)",
fill = "Pr(I \u2265 C)",
color = "") +
# facet_wrap(vars(class, class2)) +
::theme_few()+
ggthemestheme(strip.text = element_text(face = "bold", size = 14),
text = element_text(face = "bold", size = 14))
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 2962435 158.3 4708747 251.5 4708747 251.5
Vcells 85486263 652.3 261029279 1991.5 261024076 1991.5
= big_one_df %>%
big mutate(class = case_when(sn > 0.25 ~ "High severity",
<= 0.25 ~"Low severity")) sn
<- big %>%
class filter(C <= 105) %>%
filter(lambda >= 0.4) %>%
filter(lambda <= 0.8) %>%
filter(C >= 15) %>%
group_by(lambda, C, class) %>%
summarise(n = n(), sumn = sum(profit), prob = sumn / n, .groups = 'drop') %>%
ungroup()
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 2962483 158.3 4708747 251.5 4708747 251.5
Vcells 90167142 688.0 261029279 1991.5 261024076 1991.5
library(viridis)
= class %>%
heat_graphic filter(!is.na(class)) %>%
ggplot( aes(as.factor(lambda * 100), as.factor(C), fill = prob)) +
geom_tile(size = 0.5, color = "white") +
#geom_text(aes(label = paste(round(prob, 2), sep = "")),
# size = 4, colour = "white")+
scale_fill_viridis_b(option = "E", direction = -1)+
#scale_fill_viridis(discrete = T, option = "E") +
guides(color = guide_legend(override.aes = list(size = 2))) +
labs(x = "Treatment efficacy (%)",
y = "Treatment cost (USD/ha)",
fill = "Pr(I \u2265 C)",
color = "") +
facet_wrap(vars(class), ncol =1) +
::theme_few()+
ggthemestheme(strip.text = element_text(face = "bold", size = 14),
text = element_text(face = "bold", size = 14),
legend.position = "right")
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 2963942 158.3 4708747 251.5 4708747 251.5
Vcells 90170579 688.0 261029279 1991.5 261024076 1991.5
= class %>%
low_class filter(class == "Low severity")
min(low_class$prob)
[1] 0.0002628466
max(low_class$prob)
[1] 0.8714784
mean(low_class$prob)
[1] 0.3308383
%>%
low_class filter(C <= 15)
= class %>%
high_class filter(class == "High severity")
min(high_class$prob)
[1] 0.3460665
max(high_class$prob)
[1] 1
mean(high_class$prob)
[1] 0.9033795
= big %>%
overall filter(C <= 105) %>%
filter(C >= 15) %>%
#mutate(sev_class = " Overall") %>%
::group_by(lambda,class) %>%
dplyrsummarise(yield_gain_median = median(yield_gain),
yield_gain_mean = mean(yield_gain),
up_95 = quantile(yield_gain, 0.975),
low_95 = quantile(yield_gain, 0.025),
up_75 = quantile(yield_gain, 0.75),
low_75 = quantile(yield_gain, 0.25))
= overall %>%
gain_graphic ggplot(aes(lambda*100,yield_gain_mean))+
geom_line(aes(lambda*100, low_95),
linetype = 2,
size = 1,
fill = NA, color = "black")+
geom_line(aes(lambda*100, up_95),
linetype = 2,
size = 1,
fill = NA,color = "black")+
geom_line(size = 1.4, aes(lambda*100,yield_gain_median), color = "#ffc425")+
#scale_y_continuous(breaks = c(0, 500, 1000, 1500,2000,
# 2500, 3000),
# limits = c(0, 3000))+
#scale_x_continuous(breaks = c(40,50,60,70,80), limits = c(40, 80))+
#scale_color_viridis_d()+
#scale_color_manual(values = c('steelblue', '#9ccb86', 'darkred'))+
::theme_few()+
ggthemesfacet_wrap(~class, ncol = 1)+
#facet_wrap(~class+class2)+
labs(x = "Treatment efficacy (%)",
y = "Yield difference (kg/ha)",
color = "Tolerance level",
linetype = "", fill = "")+
theme(text = element_text(face = "bold", size = 14),
strip.text = element_text(size = 14),
legend.position = "top")
library(cowplot)
+heat_graphic) +
(gain_graphicplot_annotation(tag_levels = "a", tag_prefix = "(", tag_suffix = ")")&
theme(plot.tag = element_text(face = "bold", size = 16))
ggsave("fig/break-even_gain.png", dpi = 600, bg = "white",
width = 8, height = 6)