This analysis addresses the question of whether preseason forecast cohorts \(cohort_{pre}\) (i.e., returning run sizes) are an effective predictor of estimated actual catches \(catch_{post}\). That is, does \(catch_{post} \sim cohort_{pre}\) hold? The catches are the quantity of interest, the forecast is the quantity that is available. If a relationship exists between post-season estimates of catches or encounters and post-season estimates of returning cohorts, then the utility of that relationship in the preseason depends on how well the preseason cohort forecasts capture the post-season mortality estimates. As illustrated below, basing the preseason catch estimate on the preseason forecast does not appear to be an effective approach to accurately projecting catches.
First get the forecasts, taken as the time 1 StartCohort.
cohort_pre_ps <- framr::read_coho_cohort(mdb_pre, stocks = 1:126)
While a linear fit is possible, the closure in 2016 appears to exert considerable leverage, likely driving any fitted relationship. More importantly, the scatter shows a clear separation between older and more recent years (with both suggesting a negative correlation).
cc_tot <- inner_join(
fs_ps_spt |>
filter(type == "pst", area_code %in% c("05", "06")) |>
group_by(type, area_code, yr) |>
summarise(catch_tot = sum(val), .groups = "drop")
,
cohort_pre_ps |>
group_by(yr = RunYear) |>
summarise(cohort_tot = sum(cohort), .groups = "drop")
,
by = "yr"
)
cc_tot |>
ggplot(aes(cohort_tot, catch_tot)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
#> `geom_smooth()` using formula 'y ~ x'
However, the A6 linear relationship is weakly significant at 5%.
summary(lm(catch_tot ~ cohort_tot, data = filter(cc_tot, area_code == "05")))
#>
#> Call:
#> lm(formula = catch_tot ~ cohort_tot, data = filter(cc_tot, area_code ==
#> "05"))
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -16071 -7267 -2958 2375 28374
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -2.652e+03 1.431e+04 -0.185 0.857
#> cohort_tot 2.930e-02 1.633e-02 1.794 0.103
#>
#> Residual standard error: 13730 on 10 degrees of freedom
#> Multiple R-squared: 0.2434, Adjusted R-squared: 0.1678
#> F-statistic: 3.218 on 1 and 10 DF, p-value: 0.1031
summary(lm(catch_tot ~ cohort_tot, data = filter(cc_tot, area_code == "06")))
#>
#> Call:
#> lm(formula = catch_tot ~ cohort_tot, data = filter(cc_tot, area_code ==
#> "06"))
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -5845.9 -2287.5 -16.1 2110.0 7196.4
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -4.483e+03 4.083e+03 -1.098 0.2979
#> cohort_tot 1.194e-02 4.658e-03 2.563 0.0282 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 3917 on 10 degrees of freedom
#> Multiple R-squared: 0.3965, Adjusted R-squared: 0.3361
#> F-statistic: 6.569 on 1 and 10 DF, p-value: 0.02823
Removing 2016 underscores that a simple linear fit is possible but ill-advised.
cc_tot |>
filter(yr != "2016") |>
ggplot(aes(cohort_tot, catch_tot)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
#> `geom_smooth()` using formula 'y ~ x'
And removing 2106 also makes the A6 relationship non-significant.
summary(lm(catch_tot ~ cohort_tot, data = filter(cc_tot, area_code == "05", yr != "2016")))
#>
#> Call:
#> lm(formula = catch_tot ~ cohort_tot, data = filter(cc_tot, area_code ==
#> "05", yr != "2016"))
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -18138 -8114 -1920 4494 26322
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 6.243e+03 2.013e+04 0.310 0.763
#> cohort_tot 1.998e-02 2.211e-02 0.904 0.390
#>
#> Residual standard error: 14150 on 9 degrees of freedom
#> Multiple R-squared: 0.08322, Adjusted R-squared: -0.01864
#> F-statistic: 0.817 on 1 and 9 DF, p-value: 0.3896
summary(lm(catch_tot ~ cohort_tot, data = filter(cc_tot, area_code == "06", yr != "2016")))
#>
#> Call:
#> lm(formula = catch_tot ~ cohort_tot, data = filter(cc_tot, area_code ==
#> "06", yr != "2016"))
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -6077.2 -2722.6 -492.1 2285.9 7072.3
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -5.364e+03 5.858e+03 -0.916 0.3837
#> cohort_tot 1.286e-02 6.434e-03 1.999 0.0767 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 4118 on 9 degrees of freedom
#> Multiple R-squared: 0.3075, Adjusted R-squared: 0.2305
#> F-statistic: 3.996 on 1 and 9 DF, p-value: 0.07669
Finally, a focus on the most recent years indicates that the preseason cohort does not effectively inform the postseason catch.
cc_tot |>
filter(yr %in% as.character(2017:2021)) |>
ggplot(aes(cohort_tot, catch_tot)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
#> `geom_smooth()` using formula 'y ~ x'
summary(lm(catch_tot ~ cohort_tot, data = filter(cc_tot, area_code == "05", yr %in% as.character(2017:2021))))
#>
#> Call:
#> lm(formula = catch_tot ~ cohort_tot, data = filter(cc_tot, area_code ==
#> "05", yr %in% as.character(2017:2021)))
#>
#> Residuals:
#> 1 2 3 4
#> -8250 2093 3918 2239
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 3.477e+04 3.553e+04 0.979 0.431
#> cohort_tot -3.242e-02 4.931e-02 -0.657 0.578
#>
#> Residual standard error: 6812 on 2 degrees of freedom
#> Multiple R-squared: 0.1777, Adjusted R-squared: -0.2334
#> F-statistic: 0.4322 on 1 and 2 DF, p-value: 0.5785
summary(lm(catch_tot ~ cohort_tot, data = filter(cc_tot, area_code == "06", yr %in% as.character(2017:2021))))
#>
#> Call:
#> lm(formula = catch_tot ~ cohort_tot, data = filter(cc_tot, area_code ==
#> "06", yr %in% as.character(2017:2021)))
#>
#> Residuals:
#> 1 2 3 4
#> -3228.7 1285.5 1357.8 585.4
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 1.320e+04 1.393e+04 0.948 0.443
#> cohort_tot -1.350e-02 1.933e-02 -0.698 0.557
#>
#> Residual standard error: 2670 on 2 degrees of freedom
#> Multiple R-squared: 0.196, Adjusted R-squared: -0.2059
#> F-statistic: 0.4877 on 1 and 2 DF, p-value: 0.5572
Nonetheless, given the prevalence of MSF opportunity, it may be preferable to examine only the marked cohort.
cc_m <- inner_join(
fs_ps_spt |>
filter(type == "pst", area_code %in% c("05", "06")) |>
group_by(type, area_code, yr) |>
summarise(catch_tot = sum(val), .groups = "drop")
,
cohort_pre_ps |>
filter(StockID %% 2 < 1) |>
group_by(yr = RunYear) |>
summarise(cohort_m = sum(cohort), .groups = "drop")
,
by = "yr"
)
Similar to the total cohort, the relationship is highly leveraged by 2016 and effectively absent since 2017.
(cc_m |>
ggplot(aes(cohort_m, catch_tot)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
)+(
cc_m |>
filter(yr != "2016") |>
ggplot(aes(cohort_m, catch_tot)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
)+(
cc_m |>
filter(yr %in% as.character(2017:2021)) |>
ggplot(aes(cohort_m, catch_tot)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
) + plot_layout(ncol = 1)
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
This remains the case when comparing only marked catch to the marked cohort.
cc_mm <- inner_join(
fs_ps_spt |>
filter(type == "pst", area_code %in% c("05", "06"), var == "MSFQuota") |>
group_by(type, area_code, yr) |>
summarise(catch_m = sum(val), .groups = "drop")
,
cohort_pre_ps |>
filter(StockID %% 2 < 1) |>
group_by(yr = RunYear) |>
summarise(cohort_m = sum(cohort), .groups = "drop")
,
by = "yr"
)
(cc_mm |>
ggplot(aes(cohort_m, catch_m)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
)+(
cc_mm |>
filter(yr != "2016") |>
ggplot(aes(cohort_m, catch_m)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
)+(
cc_mm |>
filter(yr %in% as.character(2017:2021)) |>
ggplot(aes(cohort_m, catch_m)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
) + plot_layout(ncol = 1)
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
Limiting to coho FRAM timestep 4, the month of September, yields an inverse relationship for A5 and a non-informative one for A6.
cc_mm_4 <- inner_join(
fs_ps_spt |>
filter(type == "pst", area_code %in% c("05", "06"), var == "MSFQuota", ts == 4) |>
group_by(type, area_code, yr) |>
summarise(catch_m_4 = sum(val), .groups = "drop")
,
cohort_pre_ps |>
filter(StockID %% 2 < 1) |>
group_by(yr = RunYear) |>
summarise(cohort_m = sum(cohort), .groups = "drop")
,
by = "yr"
)
(cc_mm_4 |>
ggplot(aes(cohort_m, catch_m_4)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
)+(
cc_mm_4 |>
filter(yr %in% as.character(2017:2021)) |>
ggplot(aes(cohort_m, catch_m_4)) +
geom_smooth(method = "lm", se=F) +
geom_text(aes(label = yr)) +
facet_wrap(~area_code, scales = "free")
) + plot_layout(ncol = 1)
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'