diff --git a/Makefile b/Makefile index 2a6aeaa..e4540da 100644 --- a/Makefile +++ b/Makefile @@ -117,7 +117,7 @@ data/lagosne_depth_predictors.csv manuscript/combined.pdf: manuscript/figures.pdf manuscript/appendix.pdf pdftk manuscript/figures.pdf manuscript/appendix.pdf output manuscript/combined.pdf -manuscript/appendix.pdf: manuscript/appendix.Rmd \ +manuscript/appendix.pdf: manuscript/si_template_2019.tex \ figures/00_map_bathy-1.pdf \ figures/lgnemanual-vs-bathy-depth-1.pdf \ figures/01_contrasts-1.pdf \ @@ -125,9 +125,7 @@ figures/01_hypsography-1.pdf \ figures/01_geometry_grid-1.pdf \ figures/02_depth_model_importance-1.pdf \ figures/gg_effort-1.pdf - Rscript -e "rmarkdown::render('$<', output_format = 'pdf_document')" - -pdftk manuscript/appendix.pdf cat 2-end output manuscript/appendix2.pdf - -mv manuscript/appendix2.pdf manuscript/appendix.pdf + cd manuscript && make appendix.pdf manuscript/figures.pdf: manuscript/figures.Rmd \ manuscript/tables.pdf \ @@ -153,6 +151,10 @@ manuscript/manuscript.pdf: manuscript/agujournaltemplate.tex \ manuscript/lagosdepth.bib cd manuscript && make manuscript.pdf +wordcountms: + @echo wordcount: + @~/.TinyTeX/bin/x86_64-linux/texcount manuscript/agujournaltemplate.tex -sum -sub=section + # data/gis.gpkg figures/00_map-1.pdf: figures/00_maps.Rmd Rscript -e "rmarkdown::render('$<', output_format = 'pdf_document')" diff --git a/manuscript/Makefile b/manuscript/Makefile index 653878d..cbd4ce3 100644 --- a/manuscript/Makefile +++ b/manuscript/Makefile @@ -1,7 +1,10 @@ -manuscript.pdf: agujournaltemplate.pdf +manuscript.pdf: agujournaltemplate.pdf wrr_submission.pdf cp agujournaltemplate.pdf manuscript.pdf @echo wordcount: - @~/.TinyTeX/bin/x86_64-linux/texcount agujournaltemplate.tex -sum -sub=section -1 + @~/.TinyTeX/bin/x86_64-linux/texcount agujournaltemplate.tex -sum -sub=section + +appendix.pdf: si_template_2019.pdf + cp si_template_2019.pdf appendix.pdf agujournaltemplate.pdf: agujournaltemplate.tex lagosdepth.bib pdflatex agujournaltemplate diff --git a/manuscript/agujournaltemplate.tex b/manuscript/agujournaltemplate.tex index 482043c..b7dee0d 100644 --- a/manuscript/agujournaltemplate.tex +++ b/manuscript/agujournaltemplate.tex @@ -65,7 +65,7 @@ \begin{document} - +%TC:ignore %% ------------------------------------------------------------------------ %% % Title % @@ -139,7 +139,7 @@ \begin{keypoints} \item Geometric models to predict lake depth, which require in-lake slope, assume that nearshore land slope is a good proxy for in-lake slope. \item Using data from thousands of lakes, we show that nearshore land slope is a poor proxy for in-lake slope and increases prediction error. -\item Increases in prediction error were systematic such that depth was overpredicted in concave and reservoir lakes. +\item Prediction errors were systematic such that depth was overpredicted in concave and reservoir lakes. \end{keypoints} %% ------------------------------------------------------------------------ %% @@ -160,32 +160,10 @@ % see http://sharingscience.agu.org/creating-plain-language-summary/) % %% ------------------------------------------------------------------------ %% - +%TC:endignore %% \begin{abstract} starts the second page - \begin{abstract} - Lake depth is a critical characteristic that influences many important - ecological processes in lakes. Unfortunately, lake depth measurements - are labor-intensive to gather and are only available for a small - fraction of lakes globally. Therefore, scientists have tried to predict - lake depth from characteristics that are easily obtained for all lakes - such as lake surface area or the slope of the land surrounding a lake. - One approach for predicting lake depth simulates lake basins using a - geometric model where nearshore land slope is assumed to be a - representative proxy for in-lake slope and the distance to the center of - the lake is assumed to be a representative proxy for the distance to the - deepest point of the lake. However, these assumptions have rarely been - tested in a broad range of lakes. We used bathymetry data from 5,000 - lakes and reservoirs to test these assumptions and to examine whether - differences in lake type or shape influences depth prediction error. We - found that nearshore land slope was not a representative proxy of - in-lake slope and using it for prediction increases prediction error - substantially relative to models using true in-lake slope. We also found - that models using nearshore land slope as a proxy systematically - overpredicts lake depth in concave lakes (i.e. bowl-shaped; up to 18\% - of lakes in the study population) and reservoir lakes (up to 30\% of - lakes in the study population), suggesting caution in using geometric - models for depth prediction in unsampled lakes. +Lake depth is a critical characteristic that influences many important ecological processes in lakes. Unfortunately, lake depth measurements are labor-intensive to gather and are only available for a small fraction of lakes globally. Therefore, scientists have tried to predict lake depth from characteristics that are easily obtained for all lakes such as lake surface area or the slope of the land surrounding a lake. One approach for predicting lake depth simulates lake basins using a geometric model where nearshore land slope is assumed to be a representative proxy for in-lake slope and the distance to the center of the lake is assumed to be a representative proxy for the distance to the deepest point of the lake. However, these assumptions have rarely been tested in a broad range of lakes. We used bathymetry data from approximately 5,000 lakes and reservoirs to test these assumptions and to examine whether differences in lake type or shape influences depth prediction error. We found that nearshore land slope was not representative of in-lake slope and using it for prediction increases error substantially relative to models using true in-lake slope. We also found that models using nearshore land slope as a proxy systematically overpredict lake depth in concave lakes (i.e. bowl-shaped; up to 18\% of lakes in the study population) and reservoir lakes (up to 30\% of lakes in the study population), suggesting caution in using geometric models for depth prediction in unsampled lakes. \end{abstract} %% ------------------------------------------------------------------------ %% @@ -221,9 +199,9 @@ \section{Introduction} \noindent \noindent -Lake depth is an important factor controlling lake physics, chemistry, and biota. Deeper lakes generally have higher water clarity and less complete mixing compared to shallow lakes \cite{feeEffectsLakeSize1996, readSimulating2368Temperate2014}. These differences are reflected in variation among lakes in terms of biological productivity \cite{qinWaterDepthUnderpins2020} and rates of greenhouse gas production \cite{liSignificantContributionLake2020}. However, because measured depth data is only available for a small fraction ($\sim$25\%) of all lakes, our ability to understand and predict depth-dependent processes is limited. The importance of lake depth, coupled with its limited availability, has led to numerous attempts to predict depth using measures available for all lakes such as lake surface area or the nearshore slope of the land surrounding a lake \cite{heathcotePredictingBathymetricFeatures2015, oliver2016prediction, sobekPredictingDepthVolume2011}. Given the limited prediction accuracy of prior attempts ($\pm$ 6-7 m), studies have explored strategies such as employing more diverse covariates \cite{oliver2016prediction}, varying lake buffer sizes \cite{heathcotePredictingBathymetricFeatures2015}, or estimating hidden groupings (e.g. fitting different models for distinct size classes) among lakes \cite{sobekPredictingDepthVolume2011}. +Lake depth is an important factor controlling lake physics, chemistry, and biota. Deeper lakes generally have higher water clarity and less complete mixing compared to shallow lakes \cite{feeEffectsLakeSize1996, readSimulating2368Temperate2014}. These differences are reflected in variation among lakes in terms of biological productivity \cite{qinWaterDepthUnderpins2020} and rates of greenhouse gas production \cite{liSignificantContributionLake2020}. However, because measured depth data is only available for a small fraction ($\sim$25\%) of all lakes, our ability to understand and predict depth-dependent processes is limited. The importance of lake depth, coupled with its limited availability, has led to numerous attempts to predict depth using measures available for all lakes such as lake surface area or the nearshore slope of the land surrounding a lake \cite{heathcotePredictingBathymetricFeatures2015, oliver2016prediction, sobekPredictingDepthVolume2011}. Given the limited prediction accuracy of prior depth prediction efforts ($\pm$ 6-7 m), a major focus has been on improving prediction accuracy using strategies such as employing more diverse covariates \cite{oliver2016prediction}, varying lake buffer sizes \cite{heathcotePredictingBathymetricFeatures2015}, or estimating hidden groupings (e.g. fitting different models for distinct size classes) among lakes \cite{sobekPredictingDepthVolume2011}. -One approach for predicting lake depth involves using a geometric model that assumes lake basins correspond to an idealized shape such as a cone, bowl, or an elliptic sinusoid \cite{hollisterPredictingMaximumLake2011, neumannMaximumDepthAverage1959, yigzawNewGlobalStorage2018}. All such geometric models for lake depth prediction involve implicit assumptions about the terms of geometric formulae. In the simplest case, where lakes basins are treated as cones (Eq \ref{eq1}, Figure \ref{fig1}), two assumptions are required to make depth predictions for all lakes: 1) that nearshore land slope is a representative proxy for in-lake slope and 2) that the distance to the center of the lake is a representative proxy for the distance to the deepest point of the lake (Figure \ref{fig1}). This cone model imposes the following fixed (i.e. geometric) relationship between slope and horizontal distance: +One approach for predicting lake depth involves using a geometric model that assumes lake basins correspond to an idealized shape such as a cone, bowl, or an elliptic sinusoid \cite{hollisterPredictingMaximumLake2011, neumannMaximumDepthAverage1959, yigzawNewGlobalStorage2018}. All such geometric models for lake depth prediction involve implicit assumptions about the terms of geometric formulae. In the simplest case, where lakes basins are treated as cones (Equation \ref{eq1}, Figure \ref{fig1}), two assumptions are required to make depth predictions for all lakes: 1) that nearshore land slope is a representative proxy for in-lake slope and 2) that the distance to the center of the lake is a representative proxy for the distance to the deepest point of the lake (Figure \ref{fig1}). This cone model imposes the following fixed (i.e. geometric) relationship between slope and horizontal distance: \begin{linenomath*} \begin{equation} @@ -239,58 +217,26 @@ \section{Introduction} \caption{Diagram showing the relations between true (black) and proxy (orange) metrics of lake geometry. Geometric depth calculated via Equation \ref{eq1} requires a single distance and slope metric.}\label{fig1} \end{figure} -The assumptions of the cone model (as well as other geometric models) can be tested by comparing proxy measures of lake geometry against corresponding “true” (i.e. in-lake) values derived from bathymetric maps and by evaluating how lake cross-section shapes differ from that of an idealized cone \cite{johanssonNewApproachesModelling2007}. For instance, lake cross-section shapes have been shown to vary from narrow "convex" forms to outstretched "concave" forms \cite{hakansonLakeFormLake1977}. Because tests of geometric model assumptions require bathymetric map data, which is only available for about 15\% of all lakes, existing evidence may not be applicable to all lakes. The few studies that have tested these assumptions have mostly been limited to individual studies of very large ($>$ 500 ha) lakes or studies on small numbers ($<$ 100) of lakes \cite{johanssonNewApproachesModelling2007}. Studies focused specifically on reservoirs (as opposed to the more typical case where reservoirs and natural lakes are combined), have been even more restricted to that of extremely large lakes $>$ 1000 ha \cite{lehnerHighresolutionMappingWorld2011, messagerEstimatingVolumeAge2016}. - -As a result of this limited testing, we lack knowledge on both the predictive performance of geometric models, the effect of proxies on depth prediction, and whether depth predictions are more sensitive to measurement errors in the horizontal dimension (i.e. distance to the deepest point of the lake) or measurement errors in the vertical dimension (i.e. in-lake slope). Additionally it is unclear whether model prediction error is related to differences in lake type such those with different cross-section shapes or those classified as reservoirs versus natural lakes. +The assumptions of the cone model (as well as other geometric models) can be tested by comparing proxy measures of lake geometry against corresponding “true” (i.e. in-lake) values derived from bathymetric maps and by evaluating how lake cross-section shapes differ from that of an idealized cone \cite{johanssonNewApproachesModelling2007}. For instance, lake cross-section shapes have been shown to vary from narrow "convex" forms to outstretched "concave" forms \cite{hakansonLakeFormLake1977}. Because tests of geometric model assumptions require bathymetric map data, which is only available for about 15\% of all lakes, existing evidence may not be applicable to all lakes. The few studies that have tested these assumptions have been limited to individual studies of very large ($>$ 500 ha) lakes or studies on small numbers ($<$ 100) of lakes \cite{johanssonNewApproachesModelling2007}. Studies focused specifically on reservoirs (as opposed to the more typical case where reservoirs and natural lakes are combined), have been even more restricted to that of extremely large lakes $>$ 1000 ha \cite{lehnerHighresolutionMappingWorld2011, messagerEstimatingVolumeAge2016}. -Given the knowledge gaps identified above, we asked three research questions: 1) How representative is nearshore land slope of in-lake slope; and how representative is the distance to the center of a lake compared to the distance to the deepest point of a lake? 2) How does the use of proxies for lake geometry affect lake depth prediction error? 3) How does lake cross-section shape (i.e. concave versus convex) and lake type (i.e. natural lake vs reservoir) affect depth prediction error? To answer these questions, we extracted maximum depth (hereafter referred to as “observed maximum depth”), in-lake slope, cross-section shape (i.e., concave versus convex), and distance to the deepest point, of approximately 5,000 lakes in the Northeastern and Midwestern US from bathymetric map data and supplemented this data with classification estimates of whether lakes are reservoirs or natural lakes. First, we examined whether measures of lake geometry (in-lake slope and distance to the deepest point of lakes) were related to geometry proxies (nearshore land slope and distance to lake centers). Next, we computed geometric depth estimates (Equation \ref{eq1}) and prediction "offsets" to these estimates using the random forest algorithm (Equation \ref{eq2}). Covariates used in offset modeling included a variety of lake, watershed, and hydrologic subbasin measures that are available for all lakes (Table \ref{table1}). We examined differences in overall prediction error corresponding to different inputs to geometric depth as well as differences in the relative distribution of prediction error in lakes with different characteristics. +As a result of this limited testing, we lack knowledge on both the predictive performance of geometric models, the effect of proxies on depth prediction, and whether depth predictions are more sensitive to measurement errors in the horizontal dimension (i.e. distance to the deepest point of the lake) or measurement errors in the vertical dimension (i.e. in-lake slope). Additionally it is unclear whether model prediction error is related to differences in lake type such those with different cross-section shapes or those classified as reservoirs versus natural lakes. Given these knowledge gaps, we asked three research questions: 1) How representative is nearshore land slope of in-lake slope; and how representative is the distance to the center of a lake compared to the distance to the deepest point of a lake? 2) How does the use of proxies for lake geometry affect lake depth prediction error? 3) How does lake cross-section shape (i.e. concave versus convex) and lake type (i.e. natural lake vs reservoir) affect depth prediction error? To answer these questions, we extracted maximum depth (hereafter referred to as “observed maximum depth”), in-lake slope, cross-section shape (i.e., concave versus convex), and distance to the deepest point, of approximately 5,000 lakes from bathymetric map data and supplemented this data with classification estimates of whether lakes are reservoirs or natural lakes. We used this data to compute geometric depth estimates (Equation \ref{eq1}) and prediction "offsets" to these estimates using the random forest algorithm (Equation \ref{eq3}). Covariates used in offset modeling included a variety of lake, watershed, and hydrologic subbasin measures that are available for all lakes (Table S1). -Given that, by definition, the distance proxy (distance to the center of the lake) must always be greater or equal to the true distance value (distance to the deepest point of the lake), we expect that the use of this proxy will lead to overestimation of lake depth (Figure \ref{fig3}). Furthermore we expect to see greater overestimation error in reservoirs as compared to natural lakes because many reservoirs are known to be drowned river valleys where the deepest point is close to the edge at the end of the reservoir (i.e. next to the dam) rather than in the center of the reservoir \cite{lanza1985interactions}. In a similar fashion, we expect to see overestimation error associated with using a nearshore land slope proxy in lakes with differing cross-section shape (Figure \ref{fig2}) such that the depth of U-shaped (i.e. concave) lakes will be overpredicted whereas the depth of V-shaped (i.e. convex) lakes will be underpredicted. Finally, we expect that overall depth prediction will be strongly related to lake area and hydrologic subbasin variables as these measures have been influential in prior studies \cite{oliver2016prediction}. +By definition, the distance proxy (distance to the center of the lake) must always be greater or equal to the true distance value (distance to the deepest point of the lake). Therefore, we expect that the use of this proxy will lead to overestimation of lake depth (Figure \ref{fig1}). Furthermore we expect to see greater overestimation error in reservoirs as compared to natural lakes because many reservoirs are known to be drowned river valleys where the deepest point is close to the edge at the end of the reservoir (i.e. next to the dam) rather than in the center of the reservoir \cite{lanza1985interactions}. In a similar fashion, we expect to see overestimation error associated with using a nearshore land slope proxy in lakes with differing cross-section shape such that the depth of U-shaped (i.e. concave) lakes will be overpredicted whereas the depth of V-shaped (i.e. convex) lakes will be underpredicted (Figure S1). Finally, we expect that depth predictions themselves will be strongly related to lake area and hydrologic subbasin variables as these measures have been influential in prior studies \cite{oliver2016prediction}. -By testing these expectations, we can establish whether barriers to increased depth prediction accuracy lie in lack of correspondence between true and proxy measures of lake geometry or in hidden grouping (such as lake cross-section shape or reservoir status). This information could help direct future research efforts to focus on particular dimensions of lake geometry (i.e. horizontal versus vertical) or to stratify model predictions based on specific lake types and cross-section shapes. Ultimately, achieving increased depth prediction accuracy would allow for more precise estimates of depth-dependent biotic and chemical processes across broad spatial extents. - -\begin{figure} - \begin{center} - \includegraphics[width=0.75\textwidth,keepaspectratio]{../figures/lake_shape} - \end{center} - \caption{Diagram showing our expectation that slope-based models of lake depth will under predict true depth in convex lakes (left) and over predict true depth in concave lakes (right). Dashed lines represent extrapolated nearshore land slope while solid lines represent the lake bottom.}\label{fig2} - \end{figure} +By testing these expectations, we can establish whether barriers to increased depth prediction accuracy lie in lack of correspondence between true and proxy measures of lake geometry or in hidden groupings among lakes (such as lake cross-section shape or reservoir status). This information could help direct future research efforts to focus on particular dimensions of lake geometry (i.e. horizontal versus vertical) or to stratify model predictions based on specific lake types and cross-section shapes. Ultimately, achieving increased depth prediction accuracy would allow for more precise estimates of depth-dependent biotic and chemical processes across broad spatial extents. \section{Methods} \subsection{Data description} \noindent -We compiled bathymetry data on approximately 5,000 lakes in the Northeastern and Midwestern US from nine official state databases (Figure S1). The original data came in a variety of formats including pre-interpolated rasters (Minnesota), contour lines (Nebraska, Michigan, Massachusetts, Kansas, Iowa), contour polygons (New Hampshire, Connecticut), or point depth soundings (Maine). For the Minnesota data, we simply clipped the raster for each lake to its outline. For data from the remaining states, we processed each lake by converting its original representation to a point layer (if necessary), rasterizing these points, and creating an interpolated bathymetry “surface” using a simple moving window average in the \texttt{raster} R package \cite{hijmansRasterGeographicData2019}. The size of the moving window was adjusted iteratively to ensure that each bathymetry raster contained no missing data. +We compiled bathymetry data on approximately 5,000 lakes in the Northeastern and Midwestern US from nine official state databases (Figure S2). The original data came in a variety of formats including pre-interpolated rasters (Minnesota), contour lines (Nebraska, Michigan, Massachusetts, Kansas, Iowa), contour polygons (New Hampshire, Connecticut), or point depth soundings (Maine). For the Minnesota data, we simply clipped the raster for each lake to its outline. For data from the remaining states, we processed each lake by converting its original representation to a point layer (if necessary), rasterizing these points, and creating an interpolated bathymetry “surface” using a simple moving window average in the \texttt{raster} R package \cite{hijmansRasterGeographicData2019}. The size of the moving window was adjusted iteratively to ensure that each bathymetry raster contained no missing data. All lake bathymetry was specifically calculated relative to high-resolution (1:24,000 scale) NHD \cite{usgsNationalHydrographyDataset2019} waterbodies such that source data and bathymetry surface outputs were clipped to the area of each lake polygon. We restricted the lakes in our study to those with an area of at least 4 ha and a maximum depth of at least 0.3 m (1 ft). The purpose of these restrictions was to ensure that lakes had enough contours (or points, or polygons) to generate adequately smooth interpolations with which to calculate in-lake geometry metrics. We used our generated bathymetry surfaces to find the location of the deepest point in the lake and we resolved ties by choosing the deepest point that was closest to the center of the lake. We calculated the center of the lake not as its centroid but rather by finding the point farthest from the lake shoreline (i.e. its “visual distance to lake center”). For these calculations, we used the \texttt{polylabelr} R package \cite{larssonPolylabelrFindPole2019}, which interfaces with the Mapbox pole of inaccessibility algorithm \cite{agafonkinJSLibraryFinding2019}. We calculated in-lake slope as maximum lake depth divided by the distance to the deepest point and we calculated nearshore land slope for each lake by computing the slope within a 100-m buffer using data from a high resolution digital elevation model ($\sim$15x15m grain) accessed using the \texttt{elevatr} R package \cite{hollisterElevatrAccessElevation2017} and computed using the terrain function in the \texttt{raster} R package \cite{hijmansRasterGeographicData2019}. -We categorized lakes based on their cross-section shape and reservoir class. For cross-section shape, we categorized lakes as either convex or concave following the method of \citeA{hakansonLakeFormLake1977} by computing normalized lake depth-area relationships (i.e. hypsographic curves) and assigning class membership based on whether a lake’s curve falls above or below that of a simple straight-sided cone (Figure S4). We further classified lakes using data from \citeA{polus2020}, which uses the output of a machine learning algorithm to assign a probability to each lake as to whether it is a reservoir or a natural lake. For our purposes, we determined a lake to be a reservoir if the classification probability was 0.75 or greater. The \citeA{polus2020} data product defines reservoirs as any permanent waterbody that has a water control structure likely to significantly impact flow or pool water, beyond simply controlling water level. It makes no distinction between different dam types or dam heights. +We categorized lakes based on their cross-section shape and reservoir class. For cross-section shape, we categorized lakes as either convex or concave following the method of \citeA{hakansonLakeFormLake1977} by computing normalized lake depth-area relationships (i.e. hypsographic curves) and assigning class membership based on whether a lake’s curve falls above or below that of a simple straight-sided cone (Figure S3). We further classified lakes using data from \citeA{polus2020}, which uses the output of a machine learning algorithm to assign a probability to each lake as to whether it is a reservoir or a natural lake. For our purposes, we determined a lake to be a reservoir if the classification probability was 0.75 or greater. The \citeA{polus2020} data product defines reservoirs as any permanent waterbody that has a water control structure likely to significantly impact flow or pool water, beyond simply controlling water level. It makes no distinction between different dam types or dam heights. -\begin{table} -\caption{Summary of lake characteristics for the present study (and for lakes in the contiguous United States from \citeA{stachelek2020}). Predictor variables for computing random forest offsets (Eq \ref{eq2}) are printed in bold face. Dashes (-) indicate an identical sample size among this study and that of the contiguous United States.} \label{table1} -\centering -\setlength\tabcolsep{1.5pt} % default value: 6pt -\begin{tabular}{lllll} - \hline - Variable & Median & Q25 & Q75 & n\\ - \hline - Max depth (m) & 8.2 (7) & 4.6 (3.7) & 14 (12) & 4850 (17700)\\ - \textbf{Elevation (m)} & 300 (340) & 180 (210) & 400 (460) & 4850 (17700)\\ - \textbf{Area (ha)} & 55 (33) & 21 (11) & 140 (100) & 4850 (17700)\\ - \textbf{Island area (ha)} & 0 (0) & 0 (0) & 0.18 (0.076) & 4850 (17700)\\ - \textbf{Perimeter (m)} & 4400 (3500) & 2500 (1800) & 8100 (7300) & 4850 (17700)\\ - \textbf{Shoreline development} & 1.7 (1.7) & 1.4 (1.4) & 2.1 (2.2) & 4850 (17700)\\ - \textbf{Watershed-lake ratio} & 7.8 (10) & 3.9 (4.4) & 17 (28) & 4850 (17700)\\ - Distance to deepest point (m) & 180 (-) & 110 (-) & 290 (-) & 4850 (-)\\ - Distance to lake center (m) & 240 (-) & 160 (-) & 380 (-) & 4850 (-)\\ - In-lake slope (m/m) & 0.046 (-) & 0.024 (-) & 0.079 (-) & 4850 (-)\\ - Nearshore land slope (m/m) & 0.077 (-) & 0.051 (-) & 0.11 (-) & 4850 (-)\\ - \hline - \end{tabular} -\end{table} - -Covariates used in random forest modeling (Table \ref{table1}, Equation \ref{eq2}) for lake elevation, area, island area, perimeter, shoreline development, watershed to lake area ratio, and hydrologic subbasin (i.e. HUC4s), were obtained from the LAGOS-US LOCUS database \cite{smith2020}. One such measure, that of shoreline development, is a measure of lake perimeter shape defined as: +Covariates used in random forest modeling (Table S1, Equation \ref{eq3}) for lake elevation, area, island area, perimeter, shoreline development, watershed to lake area ratio, and hydrologic subbasin (i.e. HUC4s), were obtained from the LAGOS-US LOCUS database \cite{smith2020}. One such measure, that of shoreline development, is a measure of lake perimeter shape defined as: \begin{linenomath*} \begin{equation} @@ -299,11 +245,11 @@ \subsection{Data description} \end{linenomath*} \noindent -where sinuous lakes have larger values of shoreline development and circular lakes have smaller values of shoreline development. Watershed to lake area ratio is an approximation of water residence time and is defined as watershed area divided by lake area. All of our data and data processing code are available at [Zenodo DOI]. +where sinuous lakes have larger values of shoreline development and circular lakes have smaller values of shoreline development. Watershed to lake area ratio is an approximation of water residence time and is defined as watershed area divided by lake area. \subsubsection{Proxy evaluation} \noindent -We conducted a qualitative assessment of whether or not proxy measures of lake geometry are representative of their true values by visual inspection (i.e. plotting each proxy measure against its corresponding true value) and by computing coefficients of determination ($R^2$). We further tested proxy measures by examining their effect on lake depth prediction error. Our approach involved several steps. In the first step, we computed a geometric estimate of lake depth using only geometry information ($depth_{geometric}$, Equation \ref{eq1}). In the second step, we fit a random forest model to predict observed (i.e. true) depth as a function of geometric depth along with several covariates available for all lakes (Table \ref{table1}). The purpose of this random forest “offset” modeling was to more rigorously test our expectations regarding prediction error among different formulations of $depth_{geometric}$ and among different lake types. Each of these steps were executed iteratively for each combination of true and proxy values of slope and distance (Table \ref{table2}). +We conducted a qualitative assessment of whether or not proxy measures of lake geometry are representative of their true values by visual inspection (i.e. plotting each proxy measure against its corresponding true value) and by computing coefficients of determination ($R^2$). We further tested proxy measures by examining their effect on lake depth prediction error. Our approach involved several steps. In the first step, we computed a geometric estimate of lake depth using only geometry information ($depth_{geometric}$, Equation \ref{eq1}). In the second step, we fit a random forest model to predict observed (i.e. true) depth as a function of geometric depth along with several covariates available for all lakes (Table S1). The purpose of this random forest “offset” modeling was to more rigorously test our expectations regarding prediction error among different formulations of $depth_{geometric}$ and among different lake types. Each of these steps were executed iteratively for each combination of true and proxy values of slope and distance (Table \ref{table2}). \subsection{Model description} \subsubsection{Geometric model} @@ -312,7 +258,7 @@ \subsubsection{Geometric model} \subsubsection{Random forest models} \noindent -Nearly all prior studies predicting lake depth using geometric models include a statistical or machine learning model “layer” or “offset” to boost predictive accuracy. For our purposes, this offset modeling enabled us to test our expectations that prediction error would be different among different formulations of $depth_{geometric}$ and among different lake types. It also facilitated direct comparison against prior models of lake depth including those that are non-geometric. We generated an “offset” to geometric depth \cite{hollisterPredictingMaximumLake2011} using the random forest algorithm and the \texttt{ranger} R package \cite{wrightRangerFastImplementation2017} to predict observed maximum depth as a function of covariates including geometric maximum depth (from Equation \ref{eq1}) along with the lake elevation, area, perimeter, and ratio/index measures listed in Table 1: +Prior studies using geometric models to predict lake depth include a statistical or machine learning model “layer” or “offset” to boost predictive accuracy. For our purposes, this offset modeling enabled us to test our expectations that prediction error would be different among different formulations of $depth_{geometric}$ and among different lake types. It also facilitated direct comparison against prior models of lake depth including those that are non-geometric. We generated an “offset” to geometric depth \cite{hollisterPredictingMaximumLake2011} using the random forest algorithm and the \texttt{ranger} R package \cite{wrightRangerFastImplementation2017} to predict observed maximum depth as a function of covariates including geometric maximum depth (from Equation \ref{eq1}) along with the lake elevation, area, perimeter, and ratio/index measures listed in Table 1: \begin{linenomath*} \begin{equation} @@ -329,13 +275,13 @@ \subsubsection{Model comparisons} \subsubsection{Model evaluations} \noindent -We evaluated model fit and prediction error using root-mean-square error (RMSE) and coefficient of determination ($R^2$) metrics on a holdout set containing 25\% of all lakes. We evaluated the residuals (residual = observed - predicted) of each model relative to lake cross-section shape and reservoir classes to determine whether depth is consistently over or under predicted for some lake types relative to others. All of our code for model fitting and evaluation is available at [Zenodo DOI]. +We evaluated model fit and prediction error using root-mean-square error (RMSE) and coefficient of determination ($R^2$) metrics on a holdout set containing 25\% of all lakes. We evaluated the residuals of each model relative to lake cross-section shape and reservoir classes to determine whether depth is consistently over or under predicted for some lake types relative to others. \section{Results} \noindent -Lakes belonging to each cross-section shape and reservoir class were not evenly distributed across our study area (Figure S1). For example, concave lakes were nearly absent from Michigan whereas Maine lakes had an overabundance of lakes categorized as neither concave nor convex. Lakes in the southern portions of our study area tended to be classified as reservoirs whereas lakes in the northern portions of our study area were a more even mix between reservoirs and natural lakes (Figure S1). Approximately 18\%, 80\%, and 2\% of lakes were classified as having a concave, convex, or neither shape respectively whereas approximately 30\% and 70\% of lakes were classified as being a reservoir or a natural lake. +Lakes belonging to each cross-section shape and reservoir class were not evenly distributed across our study area (Figure S2, S3). For example, concave lakes were nearly absent from Michigan whereas Maine lakes had an overabundance of lakes categorized as neither concave nor convex. Lakes in the southern portions of our study area tended to be classified as reservoirs whereas lakes in the northern portions of our study area were a more even mix between reservoirs and natural lakes (Figure S2). Approximately 18\%, 80\%, and 2\% of lakes were classified as having a concave, convex, or neither shape respectively whereas approximately 30\% and 70\% of lakes were classified as being a reservoir or a natural lake. -Although proxy distance to lake center was often much larger in magnitude compared to the true distance to the deepest point of lakes’, they were strongly related ($R^2$ = 0.8). In contrast, proxy nearshore land slope and true in-lake slope were more weakly related ($R^2$ = 0.17). For slope measures, most lakes had lower magnitude (i.e. shallower) nearshore land slope compared to true in-lake slope (Figure \ref{fig3}). Taken together, these results suggest that proxy distance to the center of lakes is representative of true distance to the deepest point of lakes while proxy nearshore land slope is not representative of true in-lake slope. In addition to overall differences between slope and distance measures, we found differences in these relationships among lake shape classes. For example, in-lake slope and distance to the deepest point of the lake metrics were consistently larger in magnitude for convex lakes as compared to concave lakes (Figure S5). However, there were not similar differences among slope and distance metrics for natural lakes versus reservoirs (Figure S5). +Although proxy distance to lake center was often much larger in magnitude compared to the true distance to the deepest point of lakes’, they were strongly related ($R^2$ = 0.8). In contrast, proxy nearshore land slope and true in-lake slope were more weakly related ($R^2$ = 0.17). For slope measures, most lakes had lower magnitude (i.e. shallower) nearshore land slope compared to true in-lake slope (Figure \ref{fig3}). Taken together, these results suggest that proxy distance to the center of lakes is representative of true distance to the deepest point of lakes whereas proxy nearshore land slope is not representative of true in-lake slope. In addition to overall differences between slope and distance measures, we found differences in these relationships among lake shape classes. For example, in-lake slope and distance to the deepest point of the lake metrics were consistently larger in magnitude for convex lakes as compared to concave lakes (Figure S4). However, there were not similar differences among slope and distance metrics for natural lakes versus reservoirs (Figure S4). \begin{figure} \begin{center} @@ -346,7 +292,7 @@ \section{Results} The use of proxy nearshore land slope had a larger effect on model fit and prediction error than the use of proxy distance to lake center (Table \ref{table2}). More specifically, the true slope - proxy distance model had a better fit ($R^2$ = 0.73) and lower prediction error (RMSE = 4.23m) compared to the proxy slope - true distance model ($R^2$ = 0.26, RMSE = 6.87m). Furthermore, analysis of model residuals showed overestimation of lake depth for concave lakes when models included a proxy slope measure (Figure \ref{fig4}). We observed similar but smaller overestimation depending on if a lake was classified as a reservoir rather than a natural lake (Figure \ref{fig4}). -\begin{table} +\begin{table}[h] \caption{Model fit and predictive accuracy metrics (RMSE = root mean square error, $R^2$ = coefficient of determination) for all combinations of true (in-lake slope, distance to the deepest point of the lake) and proxy (nearshore land slope, distance to lake center) metrics.} \label{table2} \centering % \setlength\tabcolsep{1.5pt} % default value: 6pt @@ -369,31 +315,33 @@ \section{Results} \caption{Depth model residuals (residual = observed - predicted) in meters by cross-section shape and reservoir class indicating overprediction of concave and reservoir lakes.}\label{fig4} \end{figure} -The most important covariates in offset models were those relating to spatial location, lake area, and perimeter (Figure S6). Conversely, watershed metrics and lake elevation had little contribution to random forest model fit (Figure S6). The spatial location (i.e. HUC4) covariate was notably less importance in the true slope model compared to the two proxy slope models. Model importance calculations indicated that omitting a geometric max depth measure results in a 130\%, 60\%, or 50\% increase in mean square error depending on the formulation of geometric max depth in Eq \ref{eq1} (Figure S6). +The most important covariates in offset models were those relating to spatial location, lake area, and perimeter (Figure S5). Conversely, watershed metrics and lake elevation had little contribution to random forest model fit (Figure S5). The spatial location (i.e. HUC4) covariate was notably less importance in the true slope model compared to the two proxy slope models. Model importance calculations indicated that omitting a geometric max depth measure results in a 130\%, 60\%, or 50\% increase in mean square error depending on the formulation of geometric max depth in Equation \ref{eq1} (Figure S5). \section{Discussion} \noindent -Our tests of geometric lake depth models show that specific proxy measures of lake geometry are not representative of true measures of lake geometry across a broad array of lakes. Using a cone model example, we show that nearshore land slope is not representative of in-lake slope. Furthermore, our results indicate that the use of nearshore land slope for prediction results in increased error and systematic overestimation of depth in concave and reservoir lakes. Although our analysis was limited to lakes with available bathymetry data, these lakes did not have characteristics that differed from that of the overall lake population (Figure S7, S2). This lack of difference suggests that our results are likely to be broadly applicable to all lakes although there is a possibility that there is some hidden bias not explored for in our analyses. +Our tests of geometric lake depth models show that specific proxy measures of lake geometry are not representative of true geometry measures across a broad array of lakes. Models using non-representative proxies showed increased error and systematic overestimation of depth in concave and reservoir lakes. Although our analysis was limited to lakes with available bathymetry data, these lakes did not have characteristics that differed from that of the overall lake population (Figure S6, S8). Although there is a possibility that there is some hidden bias not explored for in our analyses, this lack of difference suggests that our results are likely to be broadly applicable to all lakes. \subsection{Representativeness of proxy measures of lake geometry} \noindent -In comparing among lake geometry measures, our analysis suggests that proxy distance to lake center is representative of true distance to the deepest point of the lakes but that proxy nearshore land slope is not representative of true in-lake slope. A simple indication of this non representativeness is that proxy nearshore land slope was often (in > 74\% of cases) steeper than true in-lake slope. This finding is consistent with \citeA{heathcotePredictingBathymetricFeatures2015} whos results suggest that in-lake slopes are shallower compared to the surrounding land. The shallow nature of in-lake slopes is likley a function of erosion and sediment transport processes \cite{hakansonLakeBottomDynamics1981, johanssonNewApproachesModelling2007}. +In comparing among lake geometry measures, our analysis suggests that proxy distance to lake center is representative of true distance to the deepest point of the lakes but that proxy nearshore land slope is not representative of true in-lake slope. A simple indication of this non representativeness is that proxy nearshore land slope was often (in $>$ 74\% of cases) steeper than true in-lake slope. This finding is consistent with \citeA{heathcotePredictingBathymetricFeatures2015} whos results suggest that in-lake slopes are shallower compared to the surrounding land. The shallow nature of in-lake slopes is likley a function of erosion and sediment transport processes \cite{hakansonLakeBottomDynamics1981, johanssonNewApproachesModelling2007}. -One surprising finding with respect to the relationship between true and proxy geometry measures when examined by lake class was the fact that there was no greater difference between proxy and true distances in reservoirs compared to natural lakes. This is contrary to the idea that most reservoirs are drowned river valleys where the deepest point is close to the edge at the end of the reservoir (i.e. next to the dam) rather than in the center of the reservoir \cite{lanza1985interactions}. One possible explanation is that the reservoir classification data from \citeA{polus2020} uses a more general definition of a reservoir (i.e. any permanent waterbody that has a water control structure likely to significantly impact flow or pool water) compared to that of conventional classifications that are tied to specific dam types or dam heights. Another possible explanation is that conventional reservoir classifications are conceptually biased towards more southern areas with few natural lakes (Figure S1). +One surprising finding with respect to the relationship between true and proxy geometry measures when examined by lake class was the fact that there was no greater difference between proxy and true distances in reservoirs compared to natural lakes. This is contrary to the idea that most reservoirs are drowned river valleys where the deepest point is close to the edge at the end of the reservoir (i.e. next to the dam) rather than in the center of the reservoir \cite{lanza1985interactions}. One possible explanation is that the reservoir classification data from \citeA{polus2020} uses a more general definition of a reservoir (i.e. any permanent waterbody that has a water control structure likely to significantly impact flow or pool water) compared to that of conventional classifications that are tied to specific dam types or dam heights. Another possible explanation is that conventional reservoir classifications are conceptually biased towards more southern areas with few natural lakes (Figure S2). -We found other differences among lake geometry measures according to lake cross-section shape. One finding was that convex lakes, when compared to concave lakes, had longer distances to lake centers relative to corresponding distances to the deepest point of lakes. In addition, convex lakes often had steeper in-lake slopes relative to nearshore land slopes as compared to concave lakes. These differences are reflected in the class-wise differences in shoreline development (e.g. shorelines were more sinuous in convex lakes compared to concave lakes, Figure S3). It was notable that convex lakes were deeper than concave lakes despite having similar distributions of lake surface area (Figure S3). Given the similarity in lake surface area, the underlying cause of these differences is unknown but one possibility is that geometry is tied to the circumstances of lake formation whereby concave lakes were formed as a result of more intense glacial scouring compared to convex lakes \cite{gorhamPhysicalLimnologyNorthern1958}. While there is some evidence in support of this idea, namely that there is a geographic hotspot of concave lakes associated with the glaciated “prairie pothole region” \cite{hayashiSimpleEquationsRepresent2000}, the overall geographic distribution of lake cross-section shapes does not support this idea. Instead of a concentrated area of concave lakes in formerly glaciated regions, there appears to be a fairly even mix of concave and convex lakes distributed amongst the northern (i.e. glaciated) and southern (non-glaciated) portions of our study area (Figure S1). +We found other differences among lake geometry measures according to lake cross-section shape. One finding was that convex lakes, when compared to concave lakes, had longer distances to lake centers relative to corresponding distances to the deepest point of lakes. In addition, convex lakes often had steeper in-lake slopes relative to nearshore land slopes as compared to concave lakes. Finally, it was notable that convex lakes were deeper than concave lakes despite having similar distributions of lake surface area (Figure S7). The underlying cause of these differences is unknown but one possibility is that geometry is tied to the circumstances of lake formation whereby the formation of concave lakes were a result of more intense glacial scouring compared to that of convex lakes \cite{gorhamPhysicalLimnologyNorthern1958}. While our findings provide some evidence in support of this idea, namely that there is a geographic hotspot of concave lakes associated with the glaciated “prairie pothole region” \cite{hayashiSimpleEquationsRepresent2000}, the overall geographic distribution of lake cross-section shapes does not support this idea. Instead of a concentrated area of concave lakes in formerly glaciated regions, there appears to be a fairly even mix of concave and convex lakes distributed amongst the northern (i.e. glaciated) and southern (non-glaciated) portions of our study area (Figure S2). \subsection{Effects of proxy measures of lake geometry depth prediction error} \noindent -Models using only proxy variables had prediction error rates (RMSE = 6.6m) of a similar magnitude as that of prior studies (RMSE = 6 - 7.3m) predicting lake depth at broad geographic extents \cite{hollisterPredictingMaximumLake2011, oliver2016prediction, messagerEstimatingVolumeAge2016}. When only a single proxy measure was used there was a difference in model sensitivity depending on if it was a horizontal distance measure or a vertical slope measure. In the case of a true slope and proxy distance combination, models were more accurate ($\pm$ 4.2m) than even the most accurate of prior studies \cite{hollisterPredictingMaximumLake2011, oliver2016prediction, messagerEstimatingVolumeAge2016}. Conversely, models using a proxy slope and true distance combination had prediction error rates ($\pm$ 6.9m) of a similar magnitude as that of the baseline proxy-proxy model ($\pm$ 6.6m). The greater sensitivity of depth predictions to proxy slope measures relative to proxy distance measures may be explained by the fact that proxy slope measures were a more imperfect representation of true in-lake slopes relative to proxy versus true distances. In addition, these results help explain the relatively poor predictive performance of prior non-geometric lake depth models given that they rely heavily on lake area as a predictor \cite{messagerEstimatingVolumeAge2016, oliver2016prediction, sobekPredictingDepthVolume2011} and both horizontal distance measures and vertical slope measures appear to be decoupled from lake area (Figure S3). +Models using only proxy variables had prediction error rates (RMSE = 6.6m) of a similar magnitude as that of prior studies (RMSE = 6 - 7.3m) predicting lake depth at broad geographic extents \cite{hollisterPredictingMaximumLake2011, oliver2016prediction, messagerEstimatingVolumeAge2016}. When only a single proxy measure was used, there was a difference in model sensitivity depending on if it was a horizontal distance measure or a vertical slope measure. In the case of a true slope and proxy distance combination, models were more accurate ($\pm$ 4.2m) than even the most accurate of prior studies \cite{hollisterPredictingMaximumLake2011, oliver2016prediction, messagerEstimatingVolumeAge2016}. Conversely, models using a proxy slope and true distance combination had prediction error rates ($\pm$ 6.9m) of a similar magnitude as that of the baseline proxy-proxy model ($\pm$ 6.6m). The greater sensitivity of depth predictions to proxy slope measures relative to proxy distance measures may be explained by the fact that proxy slope measures were a more imperfect representation of true in-lake slopes relative to proxy versus true distances. In addition, these results help explain the relatively poor predictive performance of prior non-geometric lake depth models given that they rely heavily on lake area as a predictor \cite{messagerEstimatingVolumeAge2016, oliver2016prediction, sobekPredictingDepthVolume2011} and both horizontal distance measures and vertical slope measures appear to be decoupled from lake area (Figure S7). \subsection{Effects of lake shape and lake type on depth prediction error} \noindent -As expected, we found that the maximum depth of concave lakes was systematically overpredicted by a simple geometric model using proxy nearshore land slope. However, contrary to our expectation, we did not observe underprediction of depth in convex lakes. The reason we did not observe underprediction of the depth of convex lakes is likely because geometric depth itself was always greater than observed maximum depth owing to the fact that proxy distance is constrained to be greater than true distance. Although the magnitude of overestimation is likely related to class imbalance in our dataset (i.e. there was a greater number of convex lakes), these results suggest that broad scale estimates of lake depth are overestimated particularly when those estimates encompass large numbers of lakes with diverse cross-section shapes. +As expected, we found that the maximum depth of concave lakes was systematically overpredicted by a simple geometric model using proxy nearshore land slope (Figure S1). However, contrary to our expectation, we did not observe underprediction of depth in convex lakes. The reason we did not observe underprediction of the depth of convex lakes is likely because geometric depth itself was always greater than observed maximum depth owing to the fact that proxy distance is constrained to be greater than true distance. This suggests that depth estimates in prior studies may be overestimated when they encompass large numbers of lakes with diverse cross-section shapes. \subsection{Future research} \noindent -One of our models (true slope, proxy distance) was more accurate than even the most accurate of prior studies. However, parameterization of this model requires data on bathymetry which is not available for all lakes. We propose that the error rate of this model ($\pm$ 4.2m) be used as an out-of-sample prediction benchmark for future studies such that they should attempt to match it but not expect to exceed it. Because this model requires bathymetry data, this suggests that it may not be possible with current data and models to produce depth predictions for all lakes with error rates below about 6m. To approach our benchmark using data available for all lakes, future studies could explore alternative modeling approaches such as ordinal modeling, which would capture whether or not a lake crosses some important depth threshold, but would not seek to predict a specific depth value. These studies could also explore emerging data types to boost prediction accuracy such as “topobathymetric” products that integrate both topographic and bathymetric data in a seamless fashion rather than treating them as separate entities. This would allow for more robust tests of the representativeness of geometric model inputs. Unfortunately, topobathymetric products are rare, have mostly been limited nearshore marine environments, and as such are not widely available for inland waters \cite{danielsonTopobathymetricElevationModel2016}. +One of our models (true slope, proxy distance) was more accurate than even the most accurate of prior studies. However, parameterization of this model requires data on bathymetry which is not available for all lakes. We propose that the error rate of this model ($\pm$ 4.2m) be used as an out-of-sample prediction benchmark for future studies such that they should attempt to match it but not expect to exceed it. + +Because this most accurate model requires bathymetry data, this suggests that it may not be possible with current data and models to produce depth predictions for all lakes with error rates below 6m. To achieve high prediction accuracy using data available for all lakes, future studies could explore alternative modeling approaches such as ordinal modeling, which would capture whether or not a lake crosses some important depth threshold but would not seek to predict a specific depth value, or emerging data types such as “topobathymetric” products that integrate both topographic and bathymetric data in a seamless fashion rather than treating them as separate entities. Topobathymetry would allow for more robust tests of the representativeness of geometric model inputs. Unfortunately, topobathymetric products are rare, have mostly been limited nearshore marine environments, and as such are not yet widely available for inland waters \cite{danielsonTopobathymetricElevationModel2016}. Finally, our findings indicate that geometry measures differ according to lake cross-section shape. This makes it an attractive target for inclusion in depth prediction models. Unfortunately, identifying a lake’s cross-section shape requires bathymetry data which is unavailable for most lakes. However, given the conceptual links between cross-section shape, glaciation, and sedimentation \cite{johanssonNewApproachesModelling2007} it may be advantageous for future studies to compile data on sedimentation to determine if this data can be used to predict cross-section shape and boost depth prediction accuracy. @@ -545,7 +493,7 @@ \section{Conclusion} - +%TC:ignore %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % ACKNOWLEDGMENTS @@ -569,10 +517,8 @@ \section{Conclusion} % It is also the appropriate place to thank colleagues and other contributors. % AGU does not normally allow dedications. - -\acknowledgments -Funding was provided by the US NSF Macrosystems Biology Program grants, DEB-1638679; DEB-1638550, DEB-1638539, DEB-1638554. PAS was also supported by USDA National Institute of Food and Agriculture Hatch Project, Grant Number: 176820. Author contributions: JS conceived of the study, built models, analyzed data, and wrote the paper. PJH and PAS provided interpretation of results and edited the paper. This work benefited from participation in the Global Lake Ecological Observatory Network (GLEON). We thank K.S. Cheruvelil for a friendly review of an earlier draft. - +\acknowledgments All data as well as code for data processing, model fitting, and model evaluation is available at [Zenodo DOI]. Funding was provided by the US NSF Macrosystems Biology Program grants, DEB-1638679; DEB-1638550, DEB-1638539, DEB-1638554. PAS was also supported by USDA National Institute of Food and Agriculture Hatch Project, Grant Number: 176820. Author contributions: JS conceived of the study, built models, analyzed data, and wrote the paper. PJH and PAS provided interpretation of results and edited the paper. This work benefited from participation in the Global Lake Ecological Observatory Network (GLEON). We thank K.S. Cheruvelil for a friendly review of an earlier draft. +%TC:endignore %% ------------------------------------------------------------------------ %% %% References and Citations diff --git a/manuscript/appendix.pdf b/manuscript/appendix.pdf index 289c173..2531026 100644 Binary files a/manuscript/appendix.pdf and b/manuscript/appendix.pdf differ diff --git a/manuscript/combined.pdf b/manuscript/combined.pdf index bd2ad98..bcdcc2f 100644 Binary files a/manuscript/combined.pdf and b/manuscript/combined.pdf differ diff --git a/manuscript/si_template_2019.tex b/manuscript/si_template_2019.tex index 439ef8b..ccc2ce6 100644 --- a/manuscript/si_template_2019.tex +++ b/manuscript/si_template_2019.tex @@ -189,23 +189,26 @@ \noindent\textbf{Contents of this file} %%%Remove or add items as needed%%% \begin{enumerate} -\item \textbf{Figure S1:} Map of study lakes -\item \textbf{Figure S2:} Hypsography classification by state +\item \textbf{Figure S1:} Diagram showing expectations regarding depth prediction bias -\item \textbf{Figure S3:} Comparison among lake shape and reservoir classes for true and proxy geometry measures +\item \textbf{Figure S2:} Map of study lakes -\item \textbf{Figure S4:} Importance plot for random forest variables showing increase in mean square error +\item \textbf{Figure S3:} Hypsography classification by state -\item \textbf{Figure S5:} Comparison between characteristics of lakes with bathymetry data against lakes with depth from other sources +\item \textbf{Figure S4:} Lake geometry comparison among shape and reservoir classes +\item \textbf{Figure S5:} Importance plot for random forest variables -\item \textbf{Figure S6:} Comparison of lake characteristics according to differences in lake cross-section shape or reservoir status +\item \textbf{Figure S6:} Characteristics of lakes with bathymetry versus non-bathymetry lakes -\item \textbf{Figure S7:} Comparison between reported depth and depth extracted from bathymetry surfaces +\item \textbf{Figure S7:} Lake characteristics comparison among shape and reservoir classes + +\item \textbf{Figure S8:} Comparison between reported depth and bathymetry depth %if Tables are larger than 1 page, upload as separate excel file \end{enumerate} +\vspace{1em} % \noindent\textbf{Introduction} %Type or paste your text here. The introduction gives a brief overview of the supporting information. You should include information %about as many of the following as possible (when appropriate): @@ -303,6 +306,15 @@ \end{article} \clearpage +\begin{figure} + \begin{center} + \includegraphics[width=0.75\textwidth,keepaspectratio]{../figures/lake_shape} + \end{center} + \caption{Diagram showing our expectation that slope-based models of lake depth will under predict true depth in convex lakes (left) and over predict true depth in concave lakes (right). Dashed lines represent extrapolated nearshore land slope while solid lines represent the lake bottom.}\label{fig2} + \end{figure} + +\clearpage + \begin{figure} \begin{center}\includegraphics[width=0.65\textwidth]{../figures/00_map_bathy-1}\end{center} \caption{Map of study lakes showing A) lake maximum depth measurements, B) cross-section shape class, and C) reservoir classification.}\label{figs1} @@ -341,6 +353,31 @@ \caption{Comparison between reported depth and depth extracted from bathymetry surfaces by US State where reported depths come from the LAGOSUS-Depth product \cite{stachelek2020}. For this figure, no reported depth values originated from the same source as its corresponding bathymetry-derived value.}\label{figs2} \end{figure} +\clearpage + +\begin{table} +\caption{Summary of lake characteristics for the present study (and for lakes in the contiguous United States from \citeA{stachelek2020}). Predictor variables for computing random forest offsets (Equation 2) are printed in bold face. Dashes (-) indicate an identical sample size among this study and that of the contiguous United States.} \label{table1} +\centering +\setlength\tabcolsep{1.5pt} % default value: 6pt +\begin{tabular}{lllll} + \hline + Variable & Median & Q25 & Q75 & n\\ + \hline + Max depth (m) & 8.2 (7) & 4.6 (3.7) & 14 (12) & 4850 (17700)\\ + \textbf{Elevation (m)} & 300 (340) & 180 (210) & 400 (460) & 4850 (17700)\\ + \textbf{Area (ha)} & 55 (33) & 21 (11) & 140 (100) & 4850 (17700)\\ + \textbf{Island area (ha)} & 0 (0) & 0 (0) & 0.18 (0.076) & 4850 (17700)\\ + \textbf{Perimeter (m)} & 4400 (3500) & 2500 (1800) & 8100 (7300) & 4850 (17700)\\ + \textbf{Shoreline development} & 1.7 (1.7) & 1.4 (1.4) & 2.1 (2.2) & 4850 (17700)\\ + \textbf{Watershed-lake ratio} & 7.8 (10) & 3.9 (4.4) & 17 (28) & 4850 (17700)\\ + Distance to deepest point (m) & 180 (-) & 110 (-) & 290 (-) & 4850 (-)\\ + Distance to lake center (m) & 240 (-) & 160 (-) & 380 (-) & 4850 (-)\\ + In-lake slope (m/m) & 0.046 (-) & 0.024 (-) & 0.079 (-) & 4850 (-)\\ + Nearshore land slope (m/m) & 0.077 (-) & 0.051 (-) & 0.11 (-) & 4850 (-)\\ + \hline + \end{tabular} +\end{table} + % Copy/paste for multiples of each file type as needed. % enter figures and tables below here: %%%%%%%