-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMcCullough_ClimateLakes.R
1847 lines (1582 loc) · 103 KB
/
McCullough_ClimateLakes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
######################## Compare PRISM to limno data #####################################################
# Date: 9-11-17
# updated: 7-9-18
# Author: Ian McCullough, [email protected]
# Note: NE (Northeast) refers to forested region, UM (Upper Midwest) refers to mixed agricultural region
##########################################################################################################
#### R libraries ####
library(LAGOSNE)
library(raster)
library(dplyr)
library(pgirmess)
library(Hmisc)
library(randomForest)
library(rgdal)
#### define constants ####
first_year = 1981 # full record is 1970-2011
last_year = 2010
first_day = '0615' #e.g., '0615' for Jun 15
last_day = '0915'
min_years = 25 #minimum number of years of data allowed in analysis
pvalue_cutoff = 0.05 #significance level
limno_var = 'secchi'
# monthly and annual climate variables of interest
vars_of_interest = c('wyppt','fall_ppt','winter_ppt','spring_ppt','summer_ppt',
'wytmax','fall_tmax','winter_tmax','spring_tmax','summer_tmax',
'wytmin','fall_tmin','winter_tmin','spring_tmin','summer_tmin',
'wytmean','fall_tmean','winter_tmean','spring_tmean','summer_tmean')
##### input data from LAGOS NE #####
# About and links to public datasets: https://lagoslakes.org/
dt <- lagosne_load(version = '1.087.1') #returns list of data.frame objects in LAGOS NE
secchi <- dt$secchi #type ?secchi for information about this data frame (Secchi/Water Clarity)
epi_nutr <- dt$epi_nutr #type ?epi_nutr for information about this data frame (Epilimnion Water Quality)
# GIS data downloaded and stored locally from:
# Soranno P., K. Cheruvelil. (2017c). LAGOS-NE-GIS v1.0: A module for LAGOS-NE,
# a multi-scaled geospatial and temporal database of lake ecological context and water
# quality for thousands of U.S. Lakes: 2013-1925. Environmental Data Initiative.
# Package ID: edi.98.1
# http://dx.doi.org/10.6073/pasta/fb4f5687339bec467ce0ed1ea0b5f0ca. Dataset accessed 9/26/2017.
# LAGOS_NE_All_Lakes_4ha_POINTS.zip (5.8 MB)
lakes_4ha_points <- shapefile("C:/Ian_GIS/LAGOS-NE-GISv1.0/LAGOS_NE_All_Lakes_4ha_POINTS/LAGOS_NE_All_Lakes_4ha_POINTS.shp")
lakes_4ha_df <- lakes_4ha_points@data
# STATE.zip (1.8 MB)
LAGOS_NE_states <- shapefile("C:/Ian_GIS/LAGOS-NE-GISv1.0/STATE/STATE.shp")
########################### main program ###############################
# first part is wrangling data
# remove lakes without limno data by merge
sample_lakes <- merge(lakes_4ha_df, secchi, by.x="lagoslakei", by.y="lagoslakeid", all.x=F)
# convert date factor to date format
sample_lakes$sampledate <- as.Date(sample_lakes$sampledate, format="%m/%d/%Y")
sample_lakes$monthday <- format(sample_lakes$sampledate, format="%m%d")
# subset by sample date cutoffs specified above
sample_lakes <- sample_lakes[sample_lakes$monthday >= first_day & sample_lakes$monthday <= last_day,]
# subset by sample months and years
sample_lakes <- subset(sample_lakes, sampleyear >= first_year & sampleyear <= last_year)
# create data frame of just lagoslakeid and HUC 12 zoneIDs for merging later
HUC12_lagos_lakeIDs <- data.frame(HUC12_ZoneID = sample_lakes$HU12_ZoneI, lagoslakeid=sample_lakes$lagoslakei)
# remove duplicates of HUC12 IDs...this is the variable to join to climate data
HUC12_lagos_lakeIDs <- unique(HUC12_lagos_lakeIDs) #now have one row per unique lagos/HUC12 ID combo
# calculate annual means for each lake (by lagoslakeid and sampleyear)
sample_data <- aggregate(sample_lakes[,limno_var], by=list(sample_lakes$lagoslakei, sample_lakes$sampleyear),
FUN='mean')
colnames(sample_data) <- c('lagoslakeid','sampleyear',limno_var)
sample_data <- sample_data[!(is.na(sample_data[,limno_var]) | sample_data[,limno_var]==""), ] #remove rows with NA for limno var
# find number of sample years for each lake
number_years <- as.data.frame(table(sample_data$lagoslakeid))
colnames(number_years) <- c("lagoslakeid","nYears")
number_years$nYears <- as.integer(number_years$nYears) #subset won't work unless no columns are factors
number_years$lagoslakeid <- as.numeric(levels(number_years$lagoslakeid))[number_years$lagoslakeid]
# identify lakes by lagoslakeid with minimum number of years or more
number_years_cut <- subset(number_years, nYears >= min_years)
# remove lakes without enough data years
sample_data_nYears <- merge(sample_data, number_years_cut, by='lagoslakeid')
# merge back with df with lagoslakeid and HUC 12 zone ID to get HUC 12 zone id column back
sample_data_nYears <- merge(sample_data_nYears, HUC12_lagos_lakeIDs, by='lagoslakeid')
# sort this df by lagoslakeid and sampleyear
sample_data_nYears <- sample_data_nYears[order(sample_data_nYears$lagoslakeid, sample_data_nYears$sampleyear),]
# eliminate rows that didn't associate with a HUC 12
sample_data_nYears <- sample_data_nYears[!grepl("OUT_OF_HU12", sample_data_nYears$HUC12_ZoneID),]
# keep only rows from states of interest by creating state data frame and joining
# create state_df for joining ease, which will have lagsolakeid, STATE and region
state_df <- lakes_4ha_df[,c('lagoslakei','STATE')]
state_df <- subset(state_df, STATE %in% c('ME','VT','NY','MN','WI','MI'))
sample_data_nYears <- merge(sample_data_nYears, state_df, by.x='lagoslakeid',by.y='lagoslakei', all.x=F)
sample_data_nYears$STATE <- NULL
# split up data by lagoslakeid to get list of lake-specific time series
sample_data_nYears_split <- split(sample_data_nYears, f=sample_data_nYears$lagoslakeid)
#### loop wrangled data through correlation with climate data (read from disk)
# output is data frame that can be saved or joined to the lake shapefile attribute table
#### ppt variables ####
split_list <- names(sample_data_nYears_split)
clim_var = 'ppt'
out_list <- list()
for (i in 1:length(split_list)){
clim_path <- paste0("C:/Ian_GIS/lagoslakeid_climate/",clim_var,"/lagoslakeid_",split_list[i],"_",clim_var,".csv")
clim_data <- read.csv(clim_path)
clim_data <- clim_data[,-1] #delete useless auto-index column
limno_data <- as.data.frame(sample_data_nYears_split[i])
colnames(limno_data) <- c('lagoslakeid','WY',limno_var,'nYears','HUC12_ZoneID')
clim_limno_merged <- inner_join(clim_data, limno_data, by='WY')
cormat <- as.data.frame(suppressWarnings(cor(clim_limno_merged[sapply(clim_limno_merged, is.numeric)], use='pairwise.complete.obs')))
cor_one_lake <- cormat[limno_var]
cor_one_lake[nrow(cor_one_lake),] <- clim_limno_merged$nYears[1] #make nYears in cor matrix the number of years in analysis, borrowed from another data frame
colnames(cor_one_lake) <- split_list[i]
cor_one_lake <- t(cor_one_lake)
out_list[[i]] <- cor_one_lake
}
# store output in data frame
big_Daddy_ppt <- do.call(rbind.data.frame, out_list)
big_Daddy_ppt$lagoslakeid <- rownames(big_Daddy_ppt)
big_Daddy_ppt <- big_Daddy_ppt[,c(length(big_Daddy_ppt),1,3:length(big_Daddy_ppt)-1)] #reorder columns
big_Daddy_ppt$WY <- NULL
# which are significant?
out_list_p <- list() #empty list to store for loop output
for (i in 1:length(split_list)){
# read in and ready climate data
clim_path <- paste0("C:/Ian_GIS/lagoslakeid_climate/",clim_var,"/lagoslakeid_",split_list[i],"_",clim_var,".csv")
clim_data <- read.csv(clim_path)
clim_data <- clim_data[,-1] #delete useless auto-index column
# get limno data, create correlation matrix based on numeric columns in merged climate and limno data df
limno_data <- as.data.frame(sample_data_nYears_split[i])
colnames(limno_data) <- c('lagoslakeid','WY',limno_var,'nYears','HUC12_ZoneID')
clim_limno_merged <- inner_join(clim_data, limno_data, by='WY')#join by water year to match annual climate with annual limno data
cordat <- as.matrix(clim_limno_merged[sapply(clim_limno_merged, is.numeric)]) #create matrix of only numeric columns
cormat <- rcorr(cordat, type="pearson")
cormat <- rcorr(clim_limno_merged[,limno_var], as.matrix(clim_limno_merged[sapply(clim_limno_merged, is.numeric)], type='pearson'))
cormat_p <- as.data.frame(cormat$P) #pull out pvalues from rcorr output
cormat_p[,1] <- NULL # delete useless first column and row
cormat_p <- cormat_p[-1,]
cormat_df <- data.frame(p=cormat_p[,limno_var]) #create data frame, pulling out only data for limno_var
colnames(cormat_df) <- c(split_list[i]) #name column by lagoslakeid
out_list_p[[i]] <- t(cormat_df) #store in list created above, using transposition so each is row in combined data frame below
}
nameos <- names(clim_limno_merged[sapply(clim_limno_merged, is.numeric)])
pvalue_ppt_df <- do.call(rbind.data.frame, out_list_p)
colnames(pvalue_ppt_df) <- nameos
pvalue_ppt_df$lagoslakeid <- rownames(pvalue_ppt_df)
pvalue_ppt_df <- pvalue_ppt_df[,c(length(pvalue_ppt_df),1,3:length(pvalue_ppt_df)-1)] #reorder columns
pvalue_ppt_df$WY <- NULL
#### tmean variables ####
split_list <- names(sample_data_nYears_split)
clim_var = 'tmean'
out_list <- list()
for (i in 1:length(split_list)){
clim_path <- paste0("C:/Ian_GIS/lagoslakeid_climate/",clim_var,"/lagoslakeid_",split_list[i],"_",clim_var,".csv")
clim_data <- read.csv(clim_path)
clim_data <- clim_data[,-1] #delete useless auto-index column
limno_data <- as.data.frame(sample_data_nYears_split[i])
colnames(limno_data) <- c('lagoslakeid','WY',limno_var,'nYears','HUC12_ZoneID')
clim_limno_merged <- inner_join(clim_data, limno_data, by='WY')
cormat <- as.data.frame(suppressWarnings(cor(clim_limno_merged[sapply(clim_limno_merged, is.numeric)], use='pairwise.complete.obs')))
cor_one_lake <- cormat[limno_var]
cor_one_lake[nrow(cor_one_lake),] <- clim_limno_merged$nYears[1] #make nYears in cor matrix the number of years in analysis, borrowed from another data frame
colnames(cor_one_lake) <- split_list[i]
cor_one_lake <- t(cor_one_lake)
out_list[[i]] <- cor_one_lake
}
# store output in data frame
big_Daddy_tmean <- do.call(rbind.data.frame, out_list)
big_Daddy_tmean$nYears <- NULL
big_Daddy_tmean[,limno_var] <- NULL
big_Daddy_tmean$WY <- NULL
# which are significant?
out_list_p <- list() #empty list to store for loop output
for (i in 1:length(split_list)){
# read in and ready climate data
clim_path <- paste0("C:/Ian_GIS/lagoslakeid_climate/",clim_var,"/lagoslakeid_",split_list[i],"_",clim_var,".csv")
clim_data <- read.csv(clim_path)
clim_data <- clim_data[,-1] #delete useless auto-index column
# get limno data, create correlation matrix based on numeric columns in merged climate and limno data df
limno_data <- as.data.frame(sample_data_nYears_split[i])
colnames(limno_data) <- c('lagoslakeid','WY',limno_var,'nYears','HUC12_ZoneID')
clim_limno_merged <- inner_join(clim_data, limno_data, by='WY')#join by water year to match annual climate with annual limno data
cordat <- as.matrix(clim_limno_merged[sapply(clim_limno_merged, is.numeric)]) #create matrix of only numeric columns
cormat <- rcorr(cordat, type="pearson")
cormat <- rcorr(clim_limno_merged[,limno_var], as.matrix(clim_limno_merged[sapply(clim_limno_merged, is.numeric)], type='pearson'))
cormat_p <- as.data.frame(cormat$P) #pull out pvalues from rcorr output
cormat_p[,1] <- NULL # delete useless first column and row
cormat_p <- cormat_p[-1,]
cormat_df <- data.frame(p=cormat_p[,limno_var]) #create data frame, pulling out only data for limno_var
colnames(cormat_df) <- c(split_list[i]) #name column by lagoslakeid
out_list_p[[i]] <- t(cormat_df) #store in list created above, using transposition so each is row in combined data frame below
}
nameos <- names(clim_limno_merged[sapply(clim_limno_merged, is.numeric)])
pvalue_tmean_df <- do.call(rbind.data.frame, out_list_p)
colnames(pvalue_tmean_df) <- nameos
pvalue_tmean_df$nYears <- NULL
pvalue_tmean_df[,limno_var] <- NULL
pvalue_tmean_df$WY <- NULL
#### tmin variables ####
split_list <- names(sample_data_nYears_split)
clim_var = 'tmin'
out_list <- list()
for (i in 1:length(split_list)){
clim_path <- paste0("C:/Ian_GIS/lagoslakeid_climate/",clim_var,"/lagoslakeid_",split_list[i],"_",clim_var,".csv")
clim_data <- read.csv(clim_path)
clim_data <- clim_data[,-1] #delete useless auto-index column
limno_data <- as.data.frame(sample_data_nYears_split[i])
colnames(limno_data) <- c('lagoslakeid','WY',limno_var,'nYears','HUC12_ZoneID')
clim_limno_merged <- inner_join(clim_data, limno_data, by='WY')
cormat <- as.data.frame(suppressWarnings(cor(clim_limno_merged[sapply(clim_limno_merged, is.numeric)], use='pairwise.complete.obs')))
cor_one_lake <- cormat[limno_var]
cor_one_lake[nrow(cor_one_lake),] <- clim_limno_merged$nYears[1] #make nYears in cor matrix the number of years in analysis, borrowed from another data frame
colnames(cor_one_lake) <- split_list[i]
cor_one_lake <- t(cor_one_lake)
out_list[[i]] <- cor_one_lake
}
# store output in data frame
big_Daddy_tmin <- do.call(rbind.data.frame, out_list)
big_Daddy_tmin$nYears <- NULL
big_Daddy_tmin[,limno_var] <- NULL
big_Daddy_tmin$WY <- NULL
# which are significant?
out_list_p <- list() #empty list to store for loop output
for (i in 1:length(split_list)){
# read in and ready climate data
clim_path <- paste0("C:/Ian_GIS/lagoslakeid_climate/",clim_var,"/lagoslakeid_",split_list[i],"_",clim_var,".csv")
clim_data <- read.csv(clim_path)
clim_data <- clim_data[,-1] #delete useless auto-index column
# get limno data, create correlation matrix based on numeric columns in merged climate and limno data df
limno_data <- as.data.frame(sample_data_nYears_split[i])
colnames(limno_data) <- c('lagoslakeid','WY',limno_var,'nYears','HUC12_ZoneID')
clim_limno_merged <- inner_join(clim_data, limno_data, by='WY')#join by water year to match annual climate with annual limno data
cordat <- as.matrix(clim_limno_merged[sapply(clim_limno_merged, is.numeric)]) #create matrix of only numeric columns
cormat <- rcorr(cordat, type="pearson")
cormat <- rcorr(clim_limno_merged[,limno_var], as.matrix(clim_limno_merged[sapply(clim_limno_merged, is.numeric)], type='pearson'))
cormat_p <- as.data.frame(cormat$P) #pull out pvalues from rcorr output
cormat_p[,1] <- NULL # delete useless first column and row
cormat_p <- cormat_p[-1,]
cormat_df <- data.frame(p=cormat_p[,limno_var]) #create data frame, pulling out only data for limno_var
colnames(cormat_df) <- c(split_list[i]) #name column by lagoslakeid
out_list_p[[i]] <- t(cormat_df) #store in list created above, using transposition so each is row in combined data frame below
}
nameos <- names(clim_limno_merged[sapply(clim_limno_merged, is.numeric)])
pvalue_tmin_df <- do.call(rbind.data.frame, out_list_p)
colnames(pvalue_tmin_df) <- nameos
pvalue_tmin_df$nYears <- NULL
pvalue_tmin_df[,limno_var] <- NULL
pvalue_tmin_df$WY <- NULL
#### tmax variables ####
split_list <- names(sample_data_nYears_split)
clim_var = 'tmax'
out_list <- list()
for (i in 1:length(split_list)){
clim_path <- paste0("C:/Ian_GIS/lagoslakeid_climate/",clim_var,"/lagoslakeid_",split_list[i],"_",clim_var,".csv")
clim_data <- read.csv(clim_path)
clim_data <- clim_data[,-1] #delete useless auto-index column
limno_data <- as.data.frame(sample_data_nYears_split[i])
colnames(limno_data) <- c('lagoslakeid','WY',limno_var,'nYears','HUC12_ZoneID')
clim_limno_merged <- inner_join(clim_data, limno_data, by='WY')
cormat <- as.data.frame(suppressWarnings(cor(clim_limno_merged[sapply(clim_limno_merged, is.numeric)], use='pairwise.complete.obs')))
cor_one_lake <- cormat[limno_var]
cor_one_lake[nrow(cor_one_lake),] <- clim_limno_merged$nYears[1] #make nYears in cor matrix the number of years in analysis, borrowed from another data frame
colnames(cor_one_lake) <- split_list[i]
cor_one_lake <- t(cor_one_lake)
out_list[[i]] <- cor_one_lake
}
# store output in data frame
big_Daddy_tmax <- do.call(rbind.data.frame, out_list)
big_Daddy_tmax$nYears <- NULL
big_Daddy_tmax[,limno_var] <- NULL
big_Daddy_tmax$WY <- NULL
# which are significant?
out_list_p <- list() #empty list to store for loop output
for (i in 1:length(split_list)){
# read in and ready climate data
clim_path <- paste0("C:/Ian_GIS/lagoslakeid_climate/",clim_var,"/lagoslakeid_",split_list[i],"_",clim_var,".csv")
clim_data <- read.csv(clim_path)
clim_data <- clim_data[,-1] #delete useless auto-index column
# get limno data, create correlation matrix based on numeric columns in merged climate and limno data df
limno_data <- as.data.frame(sample_data_nYears_split[i])
colnames(limno_data) <- c('lagoslakeid','WY',limno_var,'nYears','HUC12_ZoneID')
clim_limno_merged <- inner_join(clim_data, limno_data, by='WY')#join by water year to match annual climate with annual limno data
cordat <- as.matrix(clim_limno_merged[sapply(clim_limno_merged, is.numeric)]) #create matrix of only numeric columns
cormat <- rcorr(cordat, type="pearson")
cormat <- rcorr(clim_limno_merged[,limno_var], as.matrix(clim_limno_merged[sapply(clim_limno_merged, is.numeric)], type='pearson'))
cormat_p <- as.data.frame(cormat$P) #pull out pvalues from rcorr output
cormat_p[,1] <- NULL # delete useless first column and row
cormat_p <- cormat_p[-1,]
cormat_df <- data.frame(p=cormat_p[,limno_var]) #create data frame, pulling out only data for limno_var
colnames(cormat_df) <- c(split_list[i]) #name column by lagoslakeid
out_list_p[[i]] <- t(cormat_df) #store in list created above, using transposition so each is row in combined data frame below
}
nameos <- names(clim_limno_merged[sapply(clim_limno_merged, is.numeric)])
pvalue_tmax_df <- do.call(rbind.data.frame, out_list_p)
colnames(pvalue_tmax_df) <- nameos
pvalue_tmax_df$nYears <- NULL
pvalue_tmax_df[,limno_var] <- NULL
pvalue_tmax_df$WY <- NULL
##### combine ppt, tmean, tmax, tmin into single DF to join to lake shapefile for mapping
big_Mama <- cbind.data.frame(big_Daddy_ppt, big_Daddy_tmean, big_Daddy_tmax, big_Daddy_tmin)
big_Mama_p <- cbind.data.frame(pvalue_ppt_df, pvalue_tmean_df, pvalue_tmax_df, pvalue_tmin_df)
# get data frame with only significant p values remaining (based on pvalue_cutoff)
big_Mama_pp <- as.data.frame(apply(big_Mama_p[,2:ncol(big_Mama_p)], 2, function(x) ifelse(x > pvalue_cutoff, NA, x)))
big_Mama_pp$lagoslakeid <- big_Mama_p$lagoslakeid
# counts of significant climate-WQ relationships by variable based on pvalue_cutoff
signif_counts <- apply(big_Mama_pp, 2, function(x) length(which(!is.na(x))))
signif_counts <- as.data.frame(sort(signif_counts, decreasing = T))
colnames(signif_counts) <- 'LakeCount'
signif_counts$pct_signif <- signif_counts$LakeCount/nrow(big_Mama)
signif_counts <- signif_counts[-1,]
order.pct_signif <- order(signif_counts$pct_signif, decreasing = T)
signif_counts$VarRank <- NA
signif_counts$VarRank[order.pct_signif] <- 1:nrow(signif_counts)
signif_counts$Var <- rownames(signif_counts)
# get stats on r values across climate variables
rmin <- sapply(big_Mama, min, na.rm=T) #for some reason, if do apply(big_Mama, 2, min, na.rm=T), get converted to factors and numbers messed up!! FML
rmax <- sapply(big_Mama, max, na.rm=T)
rmedian <- sapply(big_Mama, median, na.rm=T)
rstats_df <- cbind.data.frame(rmin[2:length(rmin)], rmax[2:length(rmax)], rmedian[2:length(rmedian)])
colnames(rstats_df) <- c('rmin','rmax','rmedian')
rstats_df$Var <- rownames(rstats_df)
# make big table to look at climate variable correlations with each other and WQ
ClimCor_stats_table <- merge(signif_counts, rstats_df, by='Var')
ClimCor_stats_table <- subset(ClimCor_stats_table, Var!=c("nYears","secchi"))
# convert factors to numeric
w <- which( sapply(ClimCor_stats_table, class ) == 'factor' )
ClimCor_stats_table[w] <- lapply(ClimCor_stats_table[w], function(x) as.numeric(as.character(x)) )
# round off numeric columns to 3 decimals (dplyr)
ClimCor_stats_table <- ClimCor_stats_table %>% mutate_if(is.numeric, funs(round(., 3)))
#### join output to lake shapefile for mapping ####
lake_output <- merge(lakes_4ha_points, big_Mama, by.x='lagoslakei',by.y='lagoslakeid', all.x=F)
# can export this one and query p values below cutoff and map underneath colored values
lake_output_p <- merge(lakes_4ha_points, big_Mama_p, by.x='lagoslakei',by.y='lagoslakeid', all.x=F)
#### export shp for mapping in ArcGIS ####
dsnname <- paste0("C:/Ian_GIS/GeographicPatterns/",limno_var,"_climate_cor_lakes")
layername <- paste0(limno_var, "_climate_cor_lakes_POINTS25")
#writeOGR(lake_output, dsn=dsnname, layer=layername, driver="ESRI Shapefile", overwrite_layer = T)
dsnname <- paste0("C:/Ian_GIS/GeographicPatterns/",limno_var,"_climate_cor_lakes")
layername <- paste0(limno_var, "_climate_cor_lakes_POINTS_pval25")
#writeOGR(lake_output_p, dsn=dsnname, layer=layername, driver="ESRI Shapefile", overwrite_layer = T)
############# comparing mapped climate sensitivites by region ########################
lake_output_coords <- data.frame(lagoslakeid = lake_output@data$lagoslakei, xCor = lake_output@coords[,1], yCor = lake_output@coords[,2])
# subset by state (for regions) and merge with coords to get xy coordinates in non-spatial data frame
# NE: "northeast" (ME, NY, VT), UM: "upper midwest" (MI, MN, WI)
lake_output_NE <- subset(lake_output@data, STATE %in% c('ME','NY','VT'))
lake_output_NE <- merge(lake_output_NE, lake_output_coords, by.x='lagoslakei', by.y='lagoslakeid', all.X=F)
lake_output_UM <- subset(lake_output@data, STATE %in% c('MI','WI','MN','MO'))
lake_output_UM <- merge(lake_output_UM, lake_output_coords, by.x='lagoslakei', by.y='lagoslakeid', all.X=F)
##### compare climate-WQ correlations between regions
climateWQ_cor_NE <- lake_output_NE[,vars_of_interest]
climateWQ_cor_UM <- lake_output_UM[,vars_of_interest]
##### Northeast
# 5th percentile
cor_NE_5list <- list()
for (i in 1:length(vars_of_interest)){
dd <- quantile(climateWQ_cor_NE[,vars_of_interest[i]], probs=c(0.05), na.rm=T)
cor_NE_5list[i] <- dd
}
cor_NE_5 <- as.data.frame(t(rbind.data.frame(cor_NE_5list)))
rownames(cor_NE_5) <- vars_of_interest
colnames(cor_NE_5) <- 'NE_5pct'
# median
cor_NE_50list <- list()
for (i in 1:length(vars_of_interest)){
dd <- quantile(climateWQ_cor_NE[,vars_of_interest[i]], probs=c(0.5), na.rm=T)
cor_NE_50list[i] <- dd
}
cor_NE_50 <- as.data.frame(t(rbind.data.frame(cor_NE_50list)))
rownames(cor_NE_50) <- vars_of_interest
colnames(cor_NE_50) <- 'NE_50pct'
# 95th percentile
cor_NE_95list <- list()
for (i in 1:length(vars_of_interest)){
dd <- quantile(climateWQ_cor_NE[,vars_of_interest[i]], probs=c(0.95), na.rm=T)
cor_NE_95list[i] <- dd
}
cor_NE_95 <- as.data.frame(t(rbind.data.frame(cor_NE_95list)))
rownames(cor_NE_95) <- vars_of_interest
colnames(cor_NE_95) <- 'NE_95pct'
climateWQ_cor_NE_summary <- cbind.data.frame(cor_NE_5, cor_NE_50, cor_NE_95)
#### Upper Midwest
# 5th percentile
cor_UM_5list <- list()
for (i in 1:length(vars_of_interest)){
dd <- quantile(climateWQ_cor_UM[,vars_of_interest[i]], probs=c(0.05), na.rm=T)
cor_UM_5list[i] <- dd
}
cor_UM_5 <- as.data.frame(t(rbind.data.frame(cor_UM_5list)))
rownames(cor_UM_5) <- vars_of_interest
colnames(cor_UM_5) <- 'UM_5pct'
# median
cor_UM_50list <- list()
for (i in 1:length(vars_of_interest)){
dd <- quantile(climateWQ_cor_UM[,vars_of_interest[i]], probs=c(0.5), na.rm=T)
cor_UM_50list[i] <- dd
}
cor_UM_50 <- as.data.frame(t(rbind.data.frame(cor_UM_50list)))
rownames(cor_UM_50) <- vars_of_interest
colnames(cor_UM_50) <- 'UM_50pct'
# 95th percentile
cor_UM_95list <- list()
for (i in 1:length(vars_of_interest)){
dd <- quantile(climateWQ_cor_UM[,vars_of_interest[i]], probs=c(0.95), na.rm=T)
cor_UM_95list[i] <- dd
}
cor_UM_95 <- as.data.frame(t(rbind.data.frame(cor_UM_95list)))
rownames(cor_UM_95) <- vars_of_interest
colnames(cor_UM_95) <- 'UM_95pct'
climateWQ_cor_UM_summary <- cbind.data.frame(cor_UM_5, cor_UM_50, cor_UM_95)
##### Combined UM and NE
combined_UM_NE_WQcor <- rbind.data.frame(climateWQ_cor_UM, climateWQ_cor_NE)
# 5th percentile
cor_All_5list <- list()
for (i in 1:length(vars_of_interest)){
dd <- quantile(combined_UM_NE_WQcor[,vars_of_interest[i]], probs=c(0.05), na.rm=T)
cor_All_5list[i] <- dd
}
cor_All_5 <- as.data.frame(t(rbind.data.frame(cor_All_5list)))
rownames(cor_All_5) <- vars_of_interest
colnames(cor_All_5) <- 'All_5pct'
# median
cor_All_50list <- list()
for (i in 1:length(vars_of_interest)){
dd <- quantile(combined_UM_NE_WQcor[,vars_of_interest[i]], probs=c(0.5), na.rm=T)
cor_All_50list[i] <- dd
}
cor_All_50 <- as.data.frame(t(rbind.data.frame(cor_All_50list)))
rownames(cor_All_50) <- vars_of_interest
colnames(cor_All_50) <- 'All_50pct'
# 95th percentile
cor_All_95list <- list()
for (i in 1:length(vars_of_interest)){
dd <- quantile(combined_UM_NE_WQcor[,vars_of_interest[i]], probs=c(0.95), na.rm=T)
cor_All_95list[i] <- dd
}
cor_All_95 <- as.data.frame(t(rbind.data.frame(cor_All_95list)))
rownames(cor_All_95) <- vars_of_interest
colnames(cor_All_95) <- 'All_95pct'
combined_UM_NE_WQcor_summary <- cbind.data.frame(cor_All_5, cor_All_50, cor_All_95)
####### get number of significant lakes by region
state_df$Region[state_df$STATE %in% c("VT","NY","ME")] <- "NE" #Northeast
state_df$Region[state_df$STATE %in% c("MI","WI","MN")] <- "UM" #Upper Midwest
signif_lakes <- left_join(big_Mama_pp, state_df, by=c('lagoslakeid'='lagoslakei')) #merge table with only signif p values with state/region df by lagoslakeid
signif_lakes_NE <- subset(signif_lakes, Region=='NE')
signif_lakes_UM <- subset(signif_lakes, Region=='UM')
signif_lakes_NE <- signif_lakes_NE[,vars_of_interest] #subset to only seasonal variables
signif_lakes_UM <- signif_lakes_UM[,vars_of_interest]
signif_lakes_all <- signif_lakes[,vars_of_interest]
# count number of non-NAs...this is the number of significant lakes, given that nonsignif p values were removed already
signif_lakes_NE_count <- apply(signif_lakes_NE, 2, function(x) length(which(!is.na(x))))
signif_lakes_UM_count <- apply(signif_lakes_UM, 2, function(x) length(which(!is.na(x))))
signif_lakes_all_count <- apply(signif_lakes_all, 2, function(x) length(which(!is.na(x))))
signif_lake_count_summary <- data.frame(UM_Signif_n= signif_lakes_UM_count, NE_Signif_n=signif_lakes_NE_count,
all_Signif_n = signif_lakes_all_count)
signif_lake_count_summary$UM_pctSignif <- signif_lake_count_summary$UM_Signif_n/nrow(signif_lakes_UM)
signif_lake_count_summary$NE_pctSignif <- signif_lake_count_summary$NE_Signif_n/nrow(signif_lakes_NE)
signif_lake_count_summary$all_pctSignif <- signif_lake_count_summary$all_Signif_n/nrow(signif_lakes_all)
signif_lake_count_summary$UM_n <- nrow(signif_lakes_UM)
signif_lake_count_summary$NE_n <- nrow(signif_lakes_NE)
signif_lake_count_summary$all_n <- nrow(signif_lakes_all)
# get proportion of lakes not significantly correlated with any climate variable
length(which(!rowSums(!is.na(signif_lakes_NE))))/nrow(signif_lakes_NE)
length(which(!rowSums(!is.na(signif_lakes_UM))))/nrow(signif_lakes_UM)
########### final summary of climate-WQ correlations by region and for both regions combined and save to disk, if desired
final_clim_WQ_cor_summary <- cbind.data.frame(climateWQ_cor_UM_summary, climateWQ_cor_NE_summary, combined_UM_NE_WQcor_summary, signif_lake_count_summary)
#write.csv(final_clim_WQ_cor_summary, "C:/Ian_GIS/GeographicPatterns/Tables/ClimateWQCorrelationSummary.csv")
################ paneled histogram of climate-water clarity correlations, comparing regions ############
#png('C:/Ian_GIS/GeographicPatterns/Figures/paneled_hist.png',width = 2.5,height = 3.87,units = 'in',res=600) #was 3.88 by 9 in
jpeg('C:/Ian_GIS/GeographicPatterns/Figures/FinalFigures/paneled_hist.jpeg',width = 2.5,height = 3.87,units = 'in',res=600)
par(mfrow=c(2,1))
par(mar=c(2,3,0.5,0.5)) #bot,left,top,right
# first plot
plot_var = 'summer_ppt'
hist(lake_output_NE[,plot_var], xlim=c(-0.9,0.9), ylim=c(0,55), col="orange", breaks=seq(-0.9,0.9,0.1),
xaxt='n', las=1, xlab='', ylab='', main='')
hist(lake_output_UM[,plot_var], add=T, xlim=c(-0.9,0.9), ylim=c(0,55), breaks=seq(-0.9,0.9,0.1), col=rgb(0.5,0.5,0.5, 0.5),
xlab='', xaxt='n', yaxt='n', main='')
abline(v=0, lty=2, lwd=2)
axis(side=1, at=seq(-0.9,0.9,0.2), labels=seq(-0.9,0.9,0.2), cex.axis=0.8)
legend('topright',legend=c('NE','MW'), col=c('orange','gray'), pch=c(15,15), bty='n')
# second plot
plot_var = 'summer_tmax'
hist(lake_output_NE[,plot_var], xlim=c(-0.9,0.9), ylim=c(0,55), col="orange", breaks=seq(-0.9,0.9,0.1),
xaxt='n', las=1, xlab='', ylab='', main='')
hist(lake_output_UM[,plot_var], add=T, xlim=c(-0.9,0.9), ylim=c(0,55), breaks=seq(-0.9,0.9,0.1), col=rgb(0.5,0.5,0.5, 0.5),
xlab='', xaxt='n', yaxt='n', main='')
abline(v=0, lty=2, lwd=2)
axis(side=1, at=seq(-0.9,0.9,0.2), labels=seq(-0.9,0.9,0.2), cex.axis=0.8)
#mtext("Correlation coefficient (r)", side=3, at=c(0,50), line=-2, cex=0.7)
# # third plot
# par(mar=c(2.5,3,0.5,0.5)) #bot,left,top,right
# plot_var = 'fall_tmin'
# hist(lake_output_NE[,plot_var], xlim=c(-0.9,0.9), ylim=c(0,55), col="orange", breaks=seq(-0.9,0.9,0.1),
# xaxt='n', las=1, xlab='Correlation coefficient (r)', ylab='', main='')
# hist(lake_output_UM[,plot_var], add=T, xlim=c(-0.9,0.9), ylim=c(0,55), breaks=seq(-0.9,0.9,0.1), col=rgb(0.5,0.5,0.5, 0.5),
# xlab='', xaxt='n', yaxt='n', main='')
# axis(side=1, at=seq(-0.9,0.9,0.2), labels=seq(-0.9,0.9,0.2), cex.axis=0.8)
dev.off()
################################ gradient analysis ####################################
# relationships between climate sensitivities and ecological context variables
# e.g., what is the correlation between clarity-summer precip correlation and lake depth?
# create data frames of coordinates and climate sensitivities and p values of those correlations
x_df = as.data.frame(lake_output@data)
x_df$xCor = lake_output@coords[,1]
x_df$yCor = lake_output@coords[,2]
pval_df = as.data.frame(lake_output_p@data)
pval_df$xCor = lake_output_p@coords[,1]
pval_df$yCor = lake_output_p@coords[,2]
############ depth gradients from LAGOS data (mean, max depth; no predicted values) ###########
# warning: not all lakes necessarily have associated depth data
focal_var_vector <- names(x_df)[33:102]
depth2_gradient <- merge(big_Mama, dt$lakes_limno, by='lagoslakeid')#lose lakes without observed or predicted depth2; oh well
cor_list <- list()
for (i in 1:length(focal_var_vector)){
depth2_gradient <- merge(big_Mama[,c(focal_var_vector[i],'lagoslakeid')], dt$lakes_limno, by='lagoslakeid')
whee <- as.data.frame(suppressWarnings(cor(depth2_gradient[sapply(depth2_gradient, is.numeric)], use='pairwise.complete.obs')))
whee <- as.data.frame(whee[1])
rowname_vec <- rownames(whee)[2:nrow(whee)]
whee <- as.data.frame(whee[-1,]) #delete first row; correlation with itself
names(whee) <- focal_var_vector[i]
rownames(whee) <- rowname_vec
cor_list[i] <- whee
}
depth2_cor_table <- do.call(cbind.data.frame, cor_list)
rownames(depth2_cor_table) <- rowname_vec
colnames(depth2_cor_table) <- focal_var_vector
depth2_cor_table <- as.data.frame(t(depth2_cor_table))
fall <- depth2_cor_table[grep("fall", rownames(depth2_cor_table)), ]
winter <- depth2_cor_table[grep("winter", rownames(depth2_cor_table)), ]
spring <- depth2_cor_table[grep("spring", rownames(depth2_cor_table)), ]
summer <- depth2_cor_table[grep("summer", rownames(depth2_cor_table)), ]
wy <- depth2_cor_table[grep("wy", rownames(depth2_cor_table)), ]
LAGOS_depth_cor_seasonal <- rbind.data.frame(fall,winter,spring,summer,wy)
# data frames for scatter plots
depth2_gradient_df <- merge(big_Mama, dt$lakes_limno, by='lagoslakeid')
depth2_gradient_df <- merge(depth2_gradient_df, state_df, by.x='lagoslakeid',by.y='lagoslakei')
depth2_gradient_df$StateFac <- as.factor(depth2_gradient_df$STATE)
state_abbr <- levels(depth2_gradient_df$StateFac)
### divide depth gradient analysis by region
depth_gradient2_df_UM <- subset(depth2_gradient_df, Region =='UM')
depth_gradient2_df_NE <- subset(depth2_gradient_df, Region =='NE')
# loop through gradient variable to plot correlations between them and select climate variable
depth_gradient2_variables <- names(depth2_gradient_df)[c(74,76)] #only plot mean and max depths
plot_var <- 'summer_ppt'
for (i in 1:length(depth_gradient2_variables)) {
gradient_var <- depth_gradient2_variables[i]
par(mfrow=c(1,2))
plot(depth_gradient2_df_UM[,plot_var]~depth_gradient2_df_UM[,depth_gradient2_variables[i]],
main=paste0(limno_var, ' Upper Midwest'), pch=20, col='red',
xlim=c(), ylim=c(-1,1), ylab=paste0('sensitivity of ', limno_var, ' to ', plot_var),
xlab=depth_gradient2_variables[i])
abline(0,0, lty=2)
# create linear model to calculate coef (slope)
lm <- lm(depth_gradient2_df_UM[,plot_var]~ depth_gradient2_df_UM[,depth_gradient2_variables[i]])
slope <- round(lm$coefficients[2], digits=3)
abline(lm)
cortest <- cor.test(depth_gradient2_df_UM[,plot_var], depth_gradient2_df_UM[,depth_gradient2_variables[i]], alternative = 'two.sided',
method='pearson',conf.level=(1-pvalue_cutoff)) #get pvalue from correlation so it can be plotted
plot_pval <- cortest$p.value
legend('topright', bty='n', legend=paste0('r = ',round(cor(depth_gradient2_df_UM[,plot_var], depth_gradient2_df_UM[,depth_gradient2_variables[i]],
use='pairwise.complete.obs'), digits=3)))
legend('topleft', bty='n', legend=paste0('p = ', round(plot_pval, digits=3)))
legend('bottomleft', bty='n', legend=paste0('coef = ', slope))
legend('bottomright', bty='n', legend=paste0('n = ', length(na.omit(depth_gradient2_df_UM[,gradient_var]))))
mtext(side=3, paste0('gradient variable = ',gradient_var, ', climate variable = ', plot_var), cex=0.75)
plot(depth_gradient2_df_NE[,plot_var]~depth_gradient2_df_NE[,depth_gradient2_variables[i]],
main=paste0(limno_var, ' Northeast'), pch=20, col='dodgerblue',
xlim=c(), ylim=c(-1,1), ylab=paste0('sensitivity of ', limno_var, ' to ', plot_var),
xlab=depth_gradient2_variables[i])
abline(0,0, lty=2)
lm <- lm(depth_gradient2_df_NE[,plot_var]~ depth_gradient2_df_NE[,depth_gradient2_variables[i]])
slope <- round(lm$coefficients[2], digits=3)
abline(lm)
cortest <- cor.test(depth_gradient2_df_NE[,plot_var], depth_gradient2_df_NE[,depth_gradient2_variables[i]], alternative = 'two.sided',
method='pearson',conf.level=(1-pvalue_cutoff)) #get pvalue from correlation so it can be plotted
plot_pval <- cortest$p.value
legend('topright', bty='n', legend=paste0('r = ',round(cor(depth_gradient2_df_NE[,plot_var], depth_gradient2_df_NE[,depth_gradient2_variables[i]],
use='pairwise.complete.obs'), digits=3)))
legend('topleft', bty='n', legend=paste0('p = ', round(plot_pval, digits=3)))
legend('bottomleft', bty='n', legend=paste0('coef = ', slope))
legend('bottomright', bty='n', legend=paste0('n = ', length(na.omit(depth_gradient2_df_NE[,gradient_var]))))
mtext(side=3, paste0('gradient variable = ',gradient_var, ', climate variable = ', plot_var), cex=0.75)
}
####### iws variables (watershed, lake area) ########
focal_var_vector <- names(x_df)[33:102]
iws_gradient <- merge(big_Mama, dt$iws, by='lagoslakeid')
dt$iws$iws_lake_ratio <- dt$iws$iws_ha/dt$iws$iws_lakeareaha #calculate iws area ratio/lake area ratio
cor_list <- list()
for (i in 1:length(focal_var_vector)){
iws_gradient <- merge(big_Mama[,c(focal_var_vector[i],'lagoslakeid')], dt$iws, by='lagoslakeid')
whee <- as.data.frame(suppressWarnings(cor(iws_gradient[sapply(iws_gradient, is.numeric)], use='pairwise.complete.obs')))
whee <- as.data.frame(whee[1])
rowname_vec <- rownames(whee)[2:nrow(whee)]
whee <- as.data.frame(whee[-1,]) #delete first row; correlation with itself
names(whee) <- focal_var_vector[i]
rownames(whee) <- rowname_vec
cor_list[i] <- whee
}
iws_cor_table <- do.call(cbind.data.frame, cor_list)
rownames(iws_cor_table) <- rowname_vec
colnames(iws_cor_table) <- focal_var_vector
iws_cor_table <- as.data.frame(t(iws_cor_table))
fall <- iws_cor_table[grep("fall", rownames(iws_cor_table)), ]
winter <- iws_cor_table[grep("winter", rownames(iws_cor_table)), ]
spring <- iws_cor_table[grep("spring", rownames(iws_cor_table)), ]
summer <- iws_cor_table[grep("summer", rownames(iws_cor_table)), ]
wy <- iws_cor_table[grep("wy", rownames(iws_cor_table)), ]
iws_cor_seasonal <- rbind.data.frame(fall,winter,spring,summer,wy)
# data frames for scatter plots
iws_gradient_df <- merge(big_Mama, dt$iws, by='lagoslakeid')
iws_gradient_df <- merge(iws_gradient_df, state_df, by.x='lagoslakeid',by.y='lagoslakei')
iws_gradient_df$StateFac <- as.factor(iws_gradient_df$STATE)
state_abbr <- levels(iws_gradient_df$StateFac)
### divide iws gradient analysis by region
iws_gradient_df_UM <- subset(iws_gradient_df, Region =='UM')
iws_gradient_df_NE <- subset(iws_gradient_df, Region =='NE')
## single variable plot of regions side by side
# deal with lake area outliers
iws_gradient_df_NE_areaclean <- subset(iws_gradient_df_NE, iws_lakeareaha < 1.2e4)
iws_gradient_df_UM_areaclean <- subset(iws_gradient_df_UM, iws_lakeareaha < 10000)
# plot these without the outliers
plot_var = 'summer_ppt'
gradient_var = 'iws_lake_ratio' #'iws_lake_ratio' ,'iws_lakeareaha'
par(mfrow=c(1,2))
plot(iws_gradient_df_UM_areaclean[,plot_var]~iws_gradient_df_UM_areaclean[,gradient_var],
main=paste0(limno_var, ' Upper Midwest'), pch=20, col='red',
xlim=c(0,20000), ylim=c(-1,1), ylab=paste0('sensitivity of ', limno_var, ' to ', plot_var),
xlab=gradient_var)
abline(0,0, lty=2)
# create linear model to calculate coef (slope)
lm <- lm(iws_gradient_df_UM_areaclean[,plot_var]~ iws_gradient_df_UM_areaclean[,gradient_var])
slope <- round(lm$coefficients[2], digits=3)
abline(lm, lty=1)
cortest <- cor.test(iws_gradient_df_UM_areaclean[,plot_var], iws_gradient_df_UM_areaclean[,gradient_var], alternative = 'two.sided',
method='pearson',conf.level=(1-pvalue_cutoff)) #get pvalue from correlation so it can be plotted
plot_pval <- cortest$p.value
legend('topright', bty='n', legend=paste0('r = ',round(cor(iws_gradient_df_UM_areaclean[,plot_var], iws_gradient_df_UM_areaclean[,gradient_var],
use='pairwise.complete.obs'), digits=3)))
legend('topleft', bty='n', legend=paste0('p = ', round(plot_pval, digits=3)))
legend('bottomleft', bty='n', legend=paste0('coef = ', slope))
legend('bottomright', bty='n', legend=paste0('n = ', length(na.omit(iws_gradient_df_UM_areaclean[,gradient_var]))))
mtext(side=3, paste0('gradient variable = ',gradient_var, ', climate variable = ', plot_var), cex=0.75)
plot(iws_gradient_df_NE_areaclean[,plot_var]~iws_gradient_df_NE_areaclean[,gradient_var],
main=paste0(limno_var, ' Northeast'), pch=20, col='dodgerblue',
xlim=c(0,20000), ylim=c(-1,1), ylab=paste0('sensitivity of ', limno_var, ' to ', plot_var),
xlab=gradient_var)
abline(0,0, lty=2)
# create linear model to calculate coef (slope)
lm <- lm(iws_gradient_df_NE_areaclean[,plot_var]~ iws_gradient_df_NE_areaclean[,gradient_var])
slope <- round(lm$coefficients[2], digits=3)
abline(lm, lty=1)
cortest <- cor.test(iws_gradient_df_NE_areaclean[,plot_var], iws_gradient_df_NE_areaclean[,gradient_var], alternative = 'two.sided',
method='pearson',conf.level=(1-pvalue_cutoff)) #get pvalue from correlation so it can be plotted
plot_pval <- cortest$p.value
legend('topright', bty='n', legend=paste0('r = ',round(cor(iws_gradient_df_NE_areaclean[,plot_var], iws_gradient_df_NE_areaclean[,gradient_var],
use='pairwise.complete.obs'), digits=3)))
legend('topleft', bty='n', legend=paste0('p = ', round(plot_pval, digits=3)))
legend('bottomleft', bty='n', legend=paste0('coef = ', slope))
legend('bottomright', bty='n', legend=paste0('n = ', length(na.omit(iws_gradient_df_NE_areaclean[,gradient_var]))))
mtext(side=3, paste0('gradient variable = ',gradient_var, ', climate variable = ', plot_var), cex=0.75)
# size distributions of study lakes and watersheds?
#hist(iws_gradient_df_NE_areaclean$iws_lakeareaha, main='NE', xlab='ha', xlim=c(0,7000))
################ Land use/land cover (LULC) gradients in watersheds (iws) #################
iws_LULC_gradient <- merge(big_Mama, dt$iws.lulc, by='lagoslakeid')
# calculate total pct forest in 1992 NLCD (decid + conifer + mixed)
dt$iws.lulc$total_forest_pct_1992 <- (dt$iws.lulc$iws_nlcd1992_pct_41 + dt$iws.lulc$iws_nlcd1992_pct_42 +dt$iws.lulc$iws_nlcd1992_pct_43)
# calculate total pct ag in 1992 NLCD (pasture/hay + row crops + small grains)
dt$iws.lulc$total_ag_pct_1992 <- (dt$iws.lulc$iws_nlcd1992_pct_81 + dt$iws.lulc$iws_nlcd1992_pct_82 +dt$iws.lulc$iws_nlcd1992_pct_83)
cor_list <- list()
for (i in 1:length(focal_var_vector)){
iws_LULC_gradient <- merge(big_Mama[,c(focal_var_vector[i],'lagoslakeid')], dt$iws.lulc, by='lagoslakeid')
whee <- as.data.frame(suppressWarnings(cor(iws_LULC_gradient[sapply(iws_LULC_gradient, is.numeric)], use='pairwise.complete.obs')))
whee <- as.data.frame(whee[1])
rowname_vec <- rownames(whee)[2:nrow(whee)]
whee <- as.data.frame(whee[-1,]) #delete first row; correlation with itself
names(whee) <- focal_var_vector[i]
rownames(whee) <- rowname_vec
cor_list[i] <- whee
}
iws_LULC_cor_table <- do.call(cbind.data.frame, cor_list)
rownames(iws_LULC_cor_table) <- rowname_vec
colnames(iws_LULC_cor_table) <- focal_var_vector
iws_LULC_cor_table <- as.data.frame(t(iws_LULC_cor_table))
# get rid of columns with "_ha_" (plain hectares, prefer percent of IWS)
iws_LULC_cor_table <- iws_LULC_cor_table[, -grep("_ha_", colnames(iws_LULC_cor_table))]
# delete columns with all NA
iws_LULC_cor_table <- iws_LULC_cor_table[,colSums(is.na(iws_LULC_cor_table))<nrow(iws_LULC_cor_table)]
fall <- iws_LULC_cor_table[grep("fall", rownames(iws_LULC_cor_table)), ]
winter <- iws_LULC_cor_table[grep("winter", rownames(iws_LULC_cor_table)), ]
spring <- iws_LULC_cor_table[grep("spring", rownames(iws_LULC_cor_table)), ]
summer <- iws_LULC_cor_table[grep("summer", rownames(iws_LULC_cor_table)), ]
wy <- iws_LULC_cor_table[grep("wy", rownames(iws_LULC_cor_table)), ]
iws_LULC_cor_seasonal <- rbind.data.frame(fall,winter,spring,summer,wy)
# data frames for scatter plots
iws_gradient_lulc_df <- merge(big_Mama, dt$iws.lulc, by='lagoslakeid')
iws_gradient_lulc_df <- merge(iws_gradient_lulc_df, state_df, by.x='lagoslakeid',by.y='lagoslakei')
iws_gradient_lulc_df$StateFac <- as.factor(iws_gradient_lulc_df$STATE)
state_abbr <- levels(iws_gradient_df$StateFac)
### divide gradient analysis by region
iws_gradient_lulc_df_UM <- subset(iws_gradient_lulc_df, Region =='UM')
iws_gradient_lulc_df_NE <- subset(iws_gradient_lulc_df, Region =='NE')
# single variable plot of regions side by side
plot_var = 'summer_tmax'
gradient_var = 'total_ag_pct_1992' #'total_ag_pct_1992', 'iws_slope_mean', 'total_forest_pct_1992'
par(mfrow=c(1,2))
plot(iws_gradient_lulc_df_UM[,plot_var]~iws_gradient_lulc_df_UM[,gradient_var],
main=paste0(limno_var, ' Upper Midwest'), pch=20, col='red',
xlim=c(), ylim=c(-1,1), ylab=paste0('sensitivity of ', limno_var, ' to ', plot_var),
xlab=gradient_var)
abline(0,0, lty=2)
# create linear model to calculate coef (slope)
lm <- lm(iws_gradient_lulc_df_UM[,plot_var]~ iws_gradient_lulc_df_UM[,gradient_var])
slope <- round(lm$coefficients[2], digits=3)
abline(lm, lty=1)
cortest <- cor.test(iws_gradient_lulc_df_UM[,plot_var], iws_gradient_lulc_df_UM[,gradient_var], alternative = 'two.sided',
method='pearson',conf.level=(1-pvalue_cutoff)) #get pvalue from correlation so it can be plotted
plot_pval <- cortest$p.value
legend('topright', bty='n', legend=paste0('r = ',round(cor(iws_gradient_lulc_df_UM[,plot_var], iws_gradient_lulc_df_UM[,gradient_var],
use='pairwise.complete.obs'), digits=3)))
legend('topleft', bty='n', legend=paste0('p = ', round(plot_pval, digits=3)))
legend('bottomleft', bty='n', legend=paste0('coef = ', slope))
legend('bottomright', bty='n', legend=paste0('n = ', length(na.omit(iws_gradient_lulc_df_UM[,gradient_var]))))
mtext(side=3, paste0('gradient variable = ',gradient_var, ', climate variable = ', plot_var), cex=0.75)
plot(iws_gradient_lulc_df_NE[,plot_var]~iws_gradient_lulc_df_NE[,gradient_var],
main=paste0(limno_var, ' Northeast'), pch=20, col='dodgerblue',
xlim=c(), ylim=c(-1,1), ylab=paste0('sensitivity of ', limno_var, ' to ', plot_var),
xlab=gradient_var)
abline(0,0, lty=2)
# create linear model to calculate coef (slope)
lm <- lm(iws_gradient_lulc_df_NE[,plot_var]~ iws_gradient_lulc_df_NE[,gradient_var])
slope <- round(lm$coefficients[2], digits=3)
abline(lm, lty=1)
cortest <- cor.test(iws_gradient_lulc_df_NE[,plot_var], iws_gradient_lulc_df_NE[,gradient_var], alternative = 'two.sided',
method='pearson',conf.level=(1-pvalue_cutoff)) #get pvalue from correlation so it can be plotted
plot_pval <- cortest$p.value
legend('topright', bty='n', legend=paste0('r = ',round(cor(iws_gradient_lulc_df_NE[,plot_var], iws_gradient_lulc_df_NE[,gradient_var],
use='pairwise.complete.obs'), digits=3)))
legend('topleft', bty='n', legend=paste0('p = ', round(plot_pval, digits=3)))
legend('bottomleft', bty='n', legend=paste0('coef = ', slope))
legend('bottomright', bty='n', legend=paste0('n = ', length(na.omit(iws_gradient_lulc_df_NE[,gradient_var]))))
mtext(side=3, paste0('gradient variable = ',gradient_var, ', climate variable = ', plot_var), cex=0.75)
### how do forested lakes in agriculture-dominated Upper Midwest respond?
# subset Upper Midwest to isolate lakes with high % forest/low % ag in watershed
# create subset with watersheds with > 85% forest and <8% agriculture (median values in NE region)
UM_forested_iws_subset <- subset(iws_gradient_lulc_df_UM, total_forest_pct_1992 >= 85 & total_ag_pct_1992 <= 8)
UM_remaining_iws_subset <- subset(iws_gradient_lulc_df_UM, !(lagoslakeid %in% UM_forested_iws_subset$lagoslakeid))
# put into single boxplot
UM_forested_iws_subset$GroupID <- 'Subset'
UM_remaining_iws_subset$GroupID <- 'MainMW'
#iws_gradient_lulc_df_UM$GroupID <- 'MixedAg'
iws_gradient_lulc_df_NE$GroupID <- 'Forested'
UM_forested_iws_melted <- rbind.data.frame(UM_forested_iws_subset, UM_remaining_iws_subset)
#dev.off()
UM_NE_forested_iws_melted <- rbind.data.frame(UM_forested_iws_melted, iws_gradient_lulc_df_NE)
#png('C:/Ian_GIS/GeographicPatterns/Figures/NE_UM_Boxplot.png',width = 6,height = 6, units = 'in',res=600)
jpeg('C:/Ian_GIS/GeographicPatterns/Figures/FinalFigures/NE_UM_Boxplot.jpeg',width = 6,height = 6, units = 'in',res=600)
boxplot(UM_NE_forested_iws_melted$summer_ppt ~ UM_NE_forested_iws_melted$GroupID, ylim=c(-1,1), las=1,
ylab='Correlation coefficient (r)', main='Sensitivity to summer precipitation', col=c('orange','gray','gray'),
names=c('NE','MW subset 1','MW subset 2'))
dev.off()
# pairwise comparisons among groups (2 methods) (Forested region=northeastern region)
pairwise.t.test(UM_NE_forested_iws_melted$summer_ppt, UM_NE_forested_iws_melted$GroupID, p.adj='holm')
TukeyHSD(aov(summer_ppt ~ GroupID, data=UM_NE_forested_iws_melted))
################### what's the effect of a forest buffer right around the lake? ###################
buffer100_lulc <- dt$buffer100m.lulc
buffer100_lulc$total_forest_pct_1992 <- (buffer100_lulc$buffer100m_nlcd1992_pct_41 +
buffer100_lulc$buffer100m_nlcd1992_pct_42 +buffer100_lulc$buffer100m_nlcd1992_pct_43)
buffer100_lulc_gradient <- merge(big_Mama, buffer100_lulc, by='lagoslakeid')
cor_list <- list()
for (i in 1:length(focal_var_vector)){
buffer100_lulc_gradient <- merge(big_Mama[,c(focal_var_vector[i],'lagoslakeid')], buffer100_lulc, by='lagoslakeid')
whee <- as.data.frame(suppressWarnings(cor(buffer100_lulc_gradient[sapply(buffer100_lulc_gradient, is.numeric)], use='pairwise.complete.obs')))
whee <- as.data.frame(whee[1])
rowname_vec <- rownames(whee)[2:nrow(whee)]
whee <- as.data.frame(whee[-1,]) #delete first row; correlation with itself
names(whee) <- focal_var_vector[i]
rownames(whee) <- rowname_vec
cor_list[i] <- whee
}
buffer100_lulc_cor_table <- do.call(cbind.data.frame, cor_list)
rownames(buffer100_lulc_cor_table) <- rowname_vec
colnames(buffer100_lulc_cor_table) <- focal_var_vector
buffer100_lulc_cor_table <- as.data.frame(t(buffer100_lulc_cor_table))
# select out columns with pct and mperha in column names
buffer100_lulc_cor_table <- buffer100_lulc_cor_table %>% dplyr:: select(grep("pct", names(buffer100_lulc_cor_table)), grep("mperha", names(buffer100_lulc_cor_table)))
# delete columns with all NA
buffer100_lulc_cor_table <- buffer100_lulc_cor_table[,colSums(is.na(buffer100_lulc_cor_table))<nrow(buffer100_lulc_cor_table)]
fall <- buffer100_lulc_cor_table[grep("fall", rownames(buffer100_lulc_cor_table)), ]
winter <- buffer100_lulc_cor_table[grep("winter", rownames(buffer100_lulc_cor_table)), ]
spring <- buffer100_lulc_cor_table[grep("spring", rownames(buffer100_lulc_cor_table)), ]
summer <- buffer100_lulc_cor_table[grep("summer", rownames(buffer100_lulc_cor_table)), ]
wy <- buffer100_lulc_cor_table[grep("wy", rownames(buffer100_lulc_cor_table)), ]
buffer100_lulc_cor_seasonal <- rbind.data.frame(fall,winter,spring,summer,wy)
# data frames for scatter plots
buffer100_gradient_df <- merge(big_Mama, buffer100_lulc, by='lagoslakeid')
buffer100_gradient_df <- merge(buffer100_gradient_df, state_df, by.x='lagoslakeid',by.y='lagoslakei')
buffer100_gradient_df$StateFac <- as.factor(buffer100_gradient_df$STATE)
state_abbr <- levels(buffer100_gradient_df$StateFac)
### divide buffer100 gradient analysis by region
buffer100_gradient_df_UM <- subset(buffer100_gradient_df, Region =='UM')
buffer100_gradient_df_NE <- subset(buffer100_gradient_df, Region =='NE')
############### freshwater connectivity gradients ##################
iws_conn_gradient <- merge(big_Mama, dt$iws.conn, by='lagoslakeid')
cor_list <- list()
for (i in 1:length(focal_var_vector)){
iws_conn_gradient <- merge(big_Mama[,c(focal_var_vector[i],'lagoslakeid')], dt$iws.conn, by='lagoslakeid')
whee <- as.data.frame(suppressWarnings(cor(iws_conn_gradient[sapply(iws_conn_gradient, is.numeric)], use='pairwise.complete.obs')))
whee <- as.data.frame(whee[1])
rowname_vec <- rownames(whee)[2:nrow(whee)]
whee <- as.data.frame(whee[-1,]) #delete first row; correlation with itself
names(whee) <- focal_var_vector[i]
rownames(whee) <- rowname_vec
cor_list[i] <- whee
}
iws_conn_cor_table <- do.call(cbind.data.frame, cor_list)
rownames(iws_conn_cor_table) <- rowname_vec
colnames(iws_conn_cor_table) <- focal_var_vector
iws_conn_cor_table <- as.data.frame(t(iws_conn_cor_table))
# select out columns with pct and mperha in column names
iws_conn_cor_table <- iws_conn_cor_table %>% dplyr:: select(grep("pct", names(iws_conn_cor_table)), grep("mperha", names(iws_conn_cor_table)))
# delete columns with all NA
iws_conn_cor_table <- iws_conn_cor_table[,colSums(is.na(iws_conn_cor_table))<nrow(iws_conn_cor_table)]
fall <- iws_conn_cor_table[grep("fall", rownames(iws_conn_cor_table)), ]
winter <- iws_conn_cor_table[grep("winter", rownames(iws_conn_cor_table)), ]
spring <- iws_conn_cor_table[grep("spring", rownames(iws_conn_cor_table)), ]
summer <- iws_conn_cor_table[grep("summer", rownames(iws_conn_cor_table)), ]
wy <- iws_conn_cor_table[grep("wy", rownames(iws_conn_cor_table)), ]
iws_conn_cor_seasonal <- rbind.data.frame(fall,winter,spring,summer,wy)
# data frames for scatter plots
iws_conn_gradient_df <- merge(big_Mama, dt$iws.conn, by='lagoslakeid')
iws_conn_gradient_df <- merge(iws_conn_gradient_df, state_df, by.x='lagoslakeid',by.y='lagoslakei')
iws_conn_gradient_df$StateFac <- as.factor(iws_conn_gradient_df$STATE)
state_abbr <- levels(iws_conn_gradient_df$StateFac)
### divide iws conn gradient analysis by region
iws_conn_gradient_df_UM <- subset(iws_conn_gradient_df, Region =='UM')
iws_conn_gradient_df_NE <- subset(iws_conn_gradient_df, Region =='NE')
# single variable plot of regions side by side
plot_var = 'summer_ppt'
gradient_var = 'iws_wl_allwetlandsdissolved_overlapping_area_pct' #iws_streamdensity_streams_density_mperha, iws_lakes_overlapping_area_pct, iws_wl_allwetlandsdissolved_overlapping_area_pct
par(mfrow=c(1,2))
plot(iws_conn_gradient_df_UM[,plot_var]~iws_conn_gradient_df_UM[,gradient_var],
main=paste0(limno_var, ' Upper Midwest'), pch=20, col='red',
xlim=c(0,40), ylim=c(-1,1), ylab=paste0('sensitivity of ', limno_var, ' to ', plot_var),
xlab=gradient_var)