diff --git a/DESCRIPTION b/DESCRIPTION
index a8da22e..ca90bd8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -24,6 +24,7 @@ Imports:
     ggplot2,
     ggrepel,
     ggvenn,
+    glue,
     impute,
     limma,
     mice,
@@ -32,13 +33,13 @@ Imports:
     pheatmap,
     psych,
     readxl,
+    scales,
     seqinr,
     softImpute,
     tidyr (>= 1.3.0),
     tibble,
     tictoc,
-    utils,
-    visdat
+    utils
 Suggests:
     roxyglobals,
     testthat (>= 3.0.0)
diff --git a/NAMESPACE b/NAMESPACE
index ac43cde..a549c57 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -39,6 +39,7 @@ importFrom(Rdpack,reprompt)
 importFrom(UpSetR,fromList)
 importFrom(UpSetR,upset)
 importFrom(data.table,fread)
+importFrom(glue,glue)
 importFrom(impute,impute.knn)
 importFrom(limma,normalizeQuantiles)
 importFrom(mice,complete)
@@ -48,6 +49,7 @@ importFrom(pcaMethods,completeObs)
 importFrom(pcaMethods,pca)
 importFrom(psych,describeBy)
 importFrom(readxl,read_excel)
+importFrom(scales,percent)
 importFrom(seqinr,getName)
 importFrom(seqinr,read.fasta)
 importFrom(seqinr,write.fasta)
@@ -69,4 +71,3 @@ importFrom(tibble,column_to_rownames)
 importFrom(tibble,rownames_to_column)
 importFrom(utils,read.csv)
 importFrom(utils,write.csv)
-importFrom(visdat,vis_miss)
diff --git a/R/dataMissing.R b/R/dataMissing.R
index 95a1475..fd8d5f7 100644
--- a/R/dataMissing.R
+++ b/R/dataMissing.R
@@ -27,20 +27,33 @@
 #' 
 #' @param dataSet The 2d data set of experimental values.
 #' 
+#' @param sort_miss A boolean (default = FALSE) specifying whether to arrange the columns
+#' in order of missingness.
+#' 
 #' @param plot A boolean (default = FALSE) specifying whether to plot the missingness.
 #' 
+#' @param show_pct_legend A boolean (default = TRUE) specifying whether the percentages of
+#' missing and present values in the entire dataset are shown in the legend of the
+#' visualization when \code{plot = TRUE}.
+#' 
 #' @param show_labels A boolean (default = TRUE) specifying whether protein names are
 #' shown in the visualization when \code{plot = TRUE}.
 #' 
+#' @param show_pct_col A boolean (default = TRUE) specifying whether the percentages of
+#' missing data in the samples for that protein are shown in the labels of the
+#' visualization when \code{show_labels = TRUE}.
+#' 
 #' @import dplyr
 #' @import ggplot2
-#' @importFrom visdat vis_miss
+#' @import tidyr
+#' @importFrom glue glue
+#' @importFrom scales percent
 #' 
 #' @returns A 2d dataframe including:
 #' \itemize{
 #' \item "count_miss": The count of missing values for each protein.
-#' \item "pct-miss": The percentage of missing values for each protein.
-#' \item "pct_total_miss": The percentage of missing values for each protein relative to
+#' \item "pct_miss_col": The percentage of missing values for each protein.
+#' \item "pct_miss_tot": The percentage of missing values for each protein relative to
 #' the total missing values in the entire dataset.
 #' }
 #' 
@@ -48,24 +61,81 @@
 #' 
 #' @export
 
-dataMissing <- function(dataSet, plot = FALSE, show_labels = TRUE) {
+dataMissing <- function(dataSet, sort_miss = FALSE,
+                        plot = FALSE, show_pct_legend = TRUE,
+                        show_labels = TRUE, show_pct_col = TRUE) {
+  
   dataMissing <- select(dataSet, -c(R.Condition, R.Replicate))
-  if (plot == TRUE) {
-    if (show_labels == TRUE) {
-      plot <- visdat::vis_miss(dataMissing)
+  
+  if (sort_miss) {
+    dataMissing <- dataMissing[,names(sort(colSums(is.na(dataMissing)), decreasing = TRUE))]
+  }
+  
+  if (plot) {
+    
+    plotdf <- dataMissing %>%
+      mutate(row = row_number()) %>%
+      pivot_longer(cols = -row, names_to = "variable", values_to = "value",
+                   values_transform = list(value = is.na))
+    
+    if (show_pct_legend) {
+      pct_missing <- mean(is.na(dataMissing))*100
+      if (pct_missing == 0) {
+        lab_missing <- "No Missing Values"
+        lab_present <- "Present (100%)"
+      } else if (pct_missing < 0.1) {
+        lab_missing <- "Missing (< 0.1%)"
+        lab_present <- "Present (> 99.9%)"
+      } else {
+        pct_missing <- round(pct_missing, 1)
+        pct_present <- 100 - pct_missing
+        lab_missing <- glue::glue("Missing\n({pct_missing}%)")
+        lab_present <- glue::glue("Present\n({pct_present}%)")
+      }
+    } else {
+      lab_missing <- "Missing"
+      lab_present <- "Present"
+    }
+    
+    plot <- ggplot(plotdf, aes(x = variable, y = row)) +
+      geom_raster(aes(fill = value)) +
+      scale_fill_manual(name = "", breaks = c("TRUE", "FALSE"),
+                        values = c("grey20", "grey80"),
+                        labels = c(lab_missing, lab_present)) +
+      scale_y_reverse() +
+      theme_minimal() +
+      labs(x = "", y = "Observations") +
+      theme(legend.position = "bottom",
+            axis.text.x = element_text(angle = 45, hjust = 0))
+    
+   if (show_labels) {
+     if (show_pct_col) {
+       lab_pct_miss_col <- colMeans(is.na(dataMissing)) %>%
+         sapply(function(x) {
+           case_when(x == 0 ~  "0%",
+                     x < 0.001 ~ "<0.1%",
+                     x < 0.01 ~ "<1%",
+                     x >= 0.01 ~ scales::percent(x, accuracy = 1))
+         })
+       plot <- plot +
+         scale_x_discrete(position = "top", limits = names(dataMissing),
+                          labels = glue::glue("{names(lab_pct_miss_col)} ({lab_pct_miss_col})"))
+     } else {
+       plot <- plot +
+         scale_x_discrete(position = "top", limits = names(dataMissing))
+     }
     } else {
-      plotData <- dataMissing
-      colnames(plotData) <- sprintf("%0*d", nchar(ncol(dataMissing)), 1:ncol(dataMissing))
-      plot <- visdat::vis_miss(plotData) +
-        ggplot2::scale_x_discrete(labels = element_blank())
+      plot <- plot +
+        scale_x_discrete(position = "top", labels = element_blank())
     }
+    
     print(plot)
   }
   
   count_miss <- colSums(is.na(dataMissing))
   result <- data.frame(count_miss,
-                       pct_miss = count_miss/nrow(dataMissing)*100,
-                       pct_total_miss = count_miss/sum(count_miss)*100)
+                       pct_miss_col = colMeans(is.na(dataMissing))*100,
+                       pct_miss_tot = count_miss/sum(count_miss)*100)
   return(as.data.frame(t(result)))
 }
 
diff --git a/R/globals.R b/R/globals.R
index 2789095..b615df7 100644
--- a/R/globals.R
+++ b/R/globals.R
@@ -5,6 +5,8 @@ utils::globalVariables(c(
   "R.Replicate", # <analyze>
   "R.Condition", # <dataMissing>
   "R.Replicate", # <dataMissing>
+  "variable", # <dataMissing>
+  "value", # <dataMissing>
   "PG.Quantity", # <preProcessFiltering>
   "PG.NrOfStrippedSequencesIdentified", # <preProcessFiltering>
   "PG.ProteinNames", # <preProcessFiltering>
diff --git a/docs/articles/scaffold.html b/docs/articles/scaffold.html
index b7a2a6c..83c8ca3 100644
--- a/docs/articles/scaffold.html
+++ b/docs/articles/scaffold.html
@@ -75,7 +75,7 @@ <h4 data-toc-skip class="author"></h4>
 and Metabolomics Facility, UConn<br><math display="inline" xmlns="http://www.w3.org/1998/Math/MathML"><semantics><msup><mi></mi><mn>3</mn></msup><annotation encoding="application/x-tex">^3</annotation></semantics></math>Statistical
 Consulting Services, UConn</p>
             
-            <h4 data-toc-skip class="date">2024-07-25</h4>
+            <h4 data-toc-skip class="date">2024-07-26</h4>
       
       <small class="dont-index">Source: <a href="https://github.com/uconn-scs/msDiaLogue/blob/HEAD/vignettes/scaffold.Rmd" class="external-link"><code>vignettes/scaffold.Rmd</code></a></small>
       <div class="d-none name"><code>scaffold.Rmd</code></div>
diff --git a/docs/articles/usage_template.html b/docs/articles/usage_template.html
index 679d27a..1dbf8fc 100644
--- a/docs/articles/usage_template.html
+++ b/docs/articles/usage_template.html
@@ -75,7 +75,7 @@ <h4 data-toc-skip class="author"></h4>
 and Metabolomics Facility, UConn<br><math display="inline" xmlns="http://www.w3.org/1998/Math/MathML"><semantics><msup><mi></mi><mn>3</mn></msup><annotation encoding="application/x-tex">^3</annotation></semantics></math>Statistical
 Consulting Services, UConn</p>
             
-            <h4 data-toc-skip class="date">2024-07-25</h4>
+            <h4 data-toc-skip class="date">2024-07-26</h4>
       
       <small class="dont-index">Source: <a href="https://github.com/uconn-scs/msDiaLogue/blob/HEAD/vignettes/usage_template.Rmd" class="external-link"><code>vignettes/usage_template.Rmd</code></a></small>
       <div class="d-none name"><code>usage_template.Rmd</code></div>
@@ -1646,16 +1646,16 @@ <h2 id="imputation">Imputation<a class="anchor" aria-label="anchor" href="#imput
 <ul>
 <li><p><code>count_miss</code>: The count of missing values for each
 protein.</p></li>
-<li><p><code>pct-miss</code>: The percentage of missing values for each
-protein.</p></li>
-<li><p><code>pct_total_miss</code>: The percentage of missing values for
+<li><p><code>pct_miss_col</code>: The percentage of missing values for
+each protein.</p></li>
+<li><p><code>pct_miss_tot</code>: The percentage of missing values for
 each protein relative to the total missing values in the entire
 dataset.</p></li>
 </ul>
 <div style="overflow-x: auto;">
 <table class="table">
 <colgroup>
-<col width="6%">
+<col width="5%">
 <col width="5%">
 <col width="4%">
 <col width="4%">
@@ -1722,7 +1722,7 @@ <h2 id="imputation">Imputation<a class="anchor" aria-label="anchor" href="#imput
 <td align="right">3.00</td>
 </tr>
 <tr>
-<td align="left">pct_miss</td>
+<td align="left">pct_miss_col</td>
 <td align="right">0</td>
 <td align="right">0</td>
 <td align="right">0</td>
@@ -1744,7 +1744,7 @@ <h2 id="imputation">Imputation<a class="anchor" aria-label="anchor" href="#imput
 <td align="right">30.00</td>
 </tr>
 <tr>
-<td align="left">pct_total_miss</td>
+<td align="left">pct_miss_tot</td>
 <td align="right">0</td>
 <td align="right">0</td>
 <td align="right">0</td>
diff --git a/docs/articles/usage_template_files/figure-html/unnamed-chunk-16-1.png b/docs/articles/usage_template_files/figure-html/unnamed-chunk-16-1.png
index bcbf135..cfaff76 100644
Binary files a/docs/articles/usage_template_files/figure-html/unnamed-chunk-16-1.png and b/docs/articles/usage_template_files/figure-html/unnamed-chunk-16-1.png differ
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index d40e8ee..5f39acf 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -4,7 +4,7 @@ pkgdown_sha: ~
 articles:
   scaffold: scaffold.html
   usage_template: usage_template.html
-last_built: 2024-07-25T19:05Z
+last_built: 2024-07-26T06:13Z
 urls:
   reference: https://uconn-scs.github.io/msDiaLogue/reference
   article: https://uconn-scs.github.io/msDiaLogue/articles
diff --git a/docs/reference/dataMissing.html b/docs/reference/dataMissing.html
index 39ed972..27c3cba 100644
--- a/docs/reference/dataMissing.html
+++ b/docs/reference/dataMissing.html
@@ -43,7 +43,14 @@ <h1>Counting missing data</h1>
 
     <div class="section level2">
     <h2 id="ref-usage">Usage<a class="anchor" aria-label="anchor" href="#ref-usage"></a></h2>
-    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">dataMissing</span><span class="op">(</span><span class="va">dataSet</span>, plot <span class="op">=</span> <span class="cn">FALSE</span>, show_labels <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span></code></pre></div>
+    <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">dataMissing</span><span class="op">(</span></span>
+<span>  <span class="va">dataSet</span>,</span>
+<span>  sort_miss <span class="op">=</span> <span class="cn">FALSE</span>,</span>
+<span>  plot <span class="op">=</span> <span class="cn">FALSE</span>,</span>
+<span>  show_pct_legend <span class="op">=</span> <span class="cn">TRUE</span>,</span>
+<span>  show_labels <span class="op">=</span> <span class="cn">TRUE</span>,</span>
+<span>  show_pct_col <span class="op">=</span> <span class="cn">TRUE</span></span>
+<span><span class="op">)</span></span></code></pre></div>
     </div>
 
     <div class="section level2">
@@ -54,20 +61,37 @@ <h2 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#argumen
 <dd><p>The 2d data set of experimental values.</p></dd>
 
 
+<dt id="arg-sort-miss">sort_miss<a class="anchor" aria-label="anchor" href="#arg-sort-miss"></a></dt>
+<dd><p>A boolean (default = FALSE) specifying whether to arrange the columns
+in order of missingness.</p></dd>
+
+
 <dt id="arg-plot">plot<a class="anchor" aria-label="anchor" href="#arg-plot"></a></dt>
 <dd><p>A boolean (default = FALSE) specifying whether to plot the missingness.</p></dd>
 
 
+<dt id="arg-show-pct-legend">show_pct_legend<a class="anchor" aria-label="anchor" href="#arg-show-pct-legend"></a></dt>
+<dd><p>A boolean (default = TRUE) specifying whether the percentages of
+missing and present values in the entire dataset are shown in the legend of the
+visualization when <code>plot = TRUE</code>.</p></dd>
+
+
 <dt id="arg-show-labels">show_labels<a class="anchor" aria-label="anchor" href="#arg-show-labels"></a></dt>
 <dd><p>A boolean (default = TRUE) specifying whether protein names are
 shown in the visualization when <code>plot = TRUE</code>.</p></dd>
 
+
+<dt id="arg-show-pct-col">show_pct_col<a class="anchor" aria-label="anchor" href="#arg-show-pct-col"></a></dt>
+<dd><p>A boolean (default = TRUE) specifying whether the percentages of
+missing data in the samples for that protein are shown in the labels of the
+visualization when <code>show_labels = TRUE</code>.</p></dd>
+
 </dl></div>
     <div class="section level2">
     <h2 id="value">Value<a class="anchor" aria-label="anchor" href="#value"></a></h2>
     <p>A 2d dataframe including:</p><ul><li><p>"count_miss": The count of missing values for each protein.</p></li>
-<li><p>"pct-miss": The percentage of missing values for each protein.</p></li>
-<li><p>"pct_total_miss": The percentage of missing values for each protein relative to
+<li><p>"pct_miss_col": The percentage of missing values for each protein.</p></li>
+<li><p>"pct_miss_tot": The percentage of missing values for each protein relative to
 the total missing values in the entire dataset.</p></li>
 </ul></div>
 
diff --git a/docs/search.json b/docs/search.json
index 389ec51..b50d976 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -1 +1 @@
-[{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"setting-quantitative-value-in-scaffold","dir":"Articles","previous_headings":"","what":"1. Setting Quantitative Value in Scaffold","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"quantitative value must set intensity measurement count measurement. Inside Scaffold 5, Samples tab, Display Options dropdown menu top left window must set “Quantitative Value” quantitative value must defined (Experiment –> Quantitative Analysis –> Settings, Quantitative Method dropdown). PMF recommends Average Precursor Intensity value.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"turning-off-normalization-in-scaffold","dir":"Articles","previous_headings":"","what":"2. Turning Off Normalization in Scaffold","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"Normalization must turned . (Experiment –> Quantitative Analysis –> Settings), make sure “Use Normalization” box unchecked. option normalize various methods msDiaLogue, stack normalizations programs.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"minimum-requirements-for-experiment-conditions","dir":"Articles","previous_headings":"","what":"3. Minimum Requirements for Experiment Conditions","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"experiment must contain minimum 2 conditions, condition must minimum 3 replicates. conditions fine, replicates fine, conditions need number replicates. fewer 3 replicates condition, 1 condition, throw error msDiaLogue able process data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"sample-naming-format-requirements","dir":"Articles","previous_headings":"","what":"4. Sample Naming Format Requirements","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"samples must named following format: YYYYMMDD_initials_condition-replicate# (e.g. 20240101_JL_ctrl-1). files may already named way; please check Scaffold “Samples” tab, heading quantitative value column. sample name appear vertically. name formatted , can change going “Load Data” tab, selecting tab sample individually, right-clicking tab, choosing “Edit BioSample”, typing correct name format “Sample Name” box, clicking “Apply”.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"filtering-dataset-for-protein-identification","dir":"Articles","previous_headings":"","what":"5. Filtering Dataset for Protein Identification","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"strongly recommend filter dataset hide proteins 1 peptide identified. “Samples” tab, top-menu bar, “Min # Peptides” dropdown, set 2.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"collapsing-protein-clusters-in-samples-tab","dir":"Articles","previous_headings":"","what":"6. Collapsing Protein Clusters in Samples Tab","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"protein clusters must collapsed. “Samples” tab, first column (header “#”), right click numbered entries , select “Clusters”, select “Collapse ”. converts gray-shaded entries, multiple proteins inferred peptide evidence, expanded view line possible protein match cluster collapsed view first entry appearing table. msDiaLogue, first entry accession number one represented data cluster. ’re now ready export data. “Samples” tab Scaffold, right-click anywhere main data table, choose “Export” (bottom menu), “Export Excel”. Save descriptive filename make sense someone else future choose location ’ll using working directory R. report can saved .xls .csv. can now use preprocessing_scaffold script available page pick transformation step msDiaLogue script provided main Usage Template page. raw data .xls file Toy_Scaffold_Data.xls, specify fileName read raw data file R. raw data stored .RData file Toy_Scaffold_Data.RData, first load data file directly, specify dataSet function.","code":"library(msDiaLogue) ## if the raw data is in a .xls file dataSet <- preprocessing_scaffold(fileName = \"../tests/testData/Toy_Scaffold_Data.xls\") ## if the raw data is in an .Rdata file load(\"../tests/testData/Toy_Scaffold_Data.RData\") dataSet <- preprocessing_scaffold(dataSet = Toy_Scaffold_Data) #> Warning: Removed 4194 rows containing non-finite outside the scale range #> (`stat_bin()`). #> Summary of Full Data Signals: #>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's  #> 7.300e+05 9.640e+06 2.250e+07 5.546e+07 5.630e+07 3.480e+09      4194  #>  #> Levels of Condition: 6 ctrl DM SIC  #> Levels of Replicate: 1 2 3"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"load-r-package","dir":"Articles","previous_headings":"","what":"Load R package","title":"A Usage Template for the R Package msDiaLogue","text":"","code":"# install.packages(\"devtools\") devtools::install_github(\"uconn-scs/msDiaLogue\") ##  ## ── R CMD build ───────────────────────────────────────────────────────────────── ## * checking for file ‘/private/var/folders/yd/khpnz9fj6h524w6qys8n8vt80000gn/T/RtmpIGAfbx/remotes75ac3d1dae80/uconn-scs-msDiaLogue-8a27917/DESCRIPTION’ ... OK ## * preparing ‘msDiaLogue’: ## * checking DESCRIPTION meta-information ... OK ## * checking for LF line-endings in source and make files and shell scripts ## * checking for empty or unneeded directories ## Removed empty directory ‘msDiaLogue/tests’ ## * building ‘msDiaLogue_0.0.1.tar.gz’ library(msDiaLogue)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"preprocessing","dir":"Articles","previous_headings":"","what":"Preprocessing","title":"A Usage Template for the R Package msDiaLogue","text":"Read file Toy_Spectronaut_Data.csv R. Omit observations NaN, save removed data file preprocess_Filtered_Out_NaN.csv current working directory. Filter proteins fewer \\(2\\) unique peptides, save removed data file preprocess_Filtered_Out_Unique.csv current working directory. proteins without names, replace blank protein name entries protein accessions.","code":"fileName <- \"../tests/testData/Toy_Spectronaut_Data.csv\" data <- preprocessing(fileName, filterNaN = TRUE, filterUnique = 2,                       replaceBlank = TRUE, saveRm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"transformation","dir":"Articles","previous_headings":"","what":"Transformation","title":"A Usage Template for the R Package msDiaLogue","text":"Raw intensity measurements, often unsuitable direct statistical modeling, prompt transformation quantitative proteomics workflow. preferred solution log-transformation, effectively addressing issue. log\\(_2\\) transformation commonly employed due ability facilitate straightforward interpretation fold changes protein levels.","code":"dataTran <- transform(data, logFold = 2)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"filtering","dir":"Articles","previous_headings":"","what":"Filtering","title":"A Usage Template for the R Package msDiaLogue","text":"various reasons, contamination, lack informativeness, identification samples, etc., data filtered conducting differential analysis exclude certain proteins. example, protein ‘XPO4_HUMAN’ chosen filtered . removeList = TRUE indicates removal proteins listed listName dataTran. Please note saveRm = TRUE, excluded data (“XPO4_HUMAN”) saved .csv file named filtered_out_data.csv current working directory. Alternatively, wish retain proteins like “PA1B2_HUMAN”, “TEBP_HUMAN”, “UAP1_HUMAN”, simply set removelist = FLASE.","code":"dataFilt <- filterOutIn(dataTran, listName = \"XPO4_HUMAN\",                         removeList = TRUE, saveRm = TRUE) filterOutIn(dataTran, listName = c(\"PA1B2_HUMAN\", \"TEBP_HUMAN\", \"UAP1_HUMAN\"), removeList = FALSE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"imputation","dir":"Articles","previous_headings":"","what":"Imputation","title":"A Usage Template for the R Package msDiaLogue","text":"Mass spectrometry-based quantitative proteomics experiments often yield data missing values, potentially impacting downstream analyses. Various imputation methods developed address issue. far, package provides two imputation methods use: “LocalMinVal”: replaces missing values lowest value protein condition combination; “GlobalMinVal”: replaces missing values lowest value found within entire dataset. Additional methods added later. example, impute NA value dataTran using “LocalMinVal,” set required percentage values must present given protein condition combination values imputed \\(51\\%\\). imputation, may still NA values consequently need filtered . saveRm = TRUE indicates filtered data saved .csv file named filtered_NA_data.csv current working directory. dataImput follows:","code":"dataImput <- impute(dataFilt, imputeType = \"LocalMinVal\",                     reqPercentPresent = 51, reportImputing = FALSE) dataImput <- filterNA(dataImput, saveRm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"normalization","dir":"Articles","previous_headings":"","what":"Normalization","title":"A Usage Template for the R Package msDiaLogue","text":"Normalization designed address systematic biases, enhancing comparability samples preserving signal. Various normalization approaches proposed. far, package provides three normalization methods use: “quant”: Quantile “median”: Column-wise Median “mean”: Column-wise Mean Quantile normalization generally recommended.","code":"dataNorm <- normalize(dataImput, normalizeType = \"quant\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"summarization","dir":"Articles","previous_headings":"","what":"Summarization","title":"A Usage Template for the R Package msDiaLogue","text":"Summarization proves valuable distilling meaningful insights vast intricate data, facilitating efficient identification, quantification, interpretation protein profiles within complex biological samples.","code":"dataSumm <- summarize(dataNorm, saveSumm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"analysis-and-visualization","dir":"Articles","previous_headings":"","what":"Analysis And Visualization","title":"A Usage Template for the R Package msDiaLogue","text":"","code":"analysis1 <- analyze(dataNorm, testType = \"MA\") visualize(analysis1, graphType = \"MA\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"load-r-package","dir":"Articles","previous_headings":"","what":"Load R package","title":"A Usage Template for the R Package msDiaLogue","text":"","code":"library(msDiaLogue)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"preprocessing","dir":"Articles","previous_headings":"","what":"Preprocessing","title":"A Usage Template for the R Package msDiaLogue","text":"function preprocessing() takes .csv file summarized protein abundances, exported Spectronaut. important columns need included file : R.Condition, R.Replicate, PG.ProteinAccessions, PG.ProteinNames, PG.NrOfStrippedSequencesIdentified, PG.Quantity. function reformat data provide functionality initial filtering (based number unique peptides). steps describe functions happen Preprocessing code. 1. Loads raw data raw data .csv file Toy_Spectronaut_Data.csv, specify fileName read raw data file R. raw data stored .RData file Toy_Spectronaut_Data.RData, first load data file directly, specify dataSet function. 2. Filters identified proteins exhibit “NaN” quantitative values NaN, stands ‘Number,’ can found PG.Quantity column proteins identified MS MS/MS evidence raw data, peptides protein lack associated integrated peak area intensity. usually occurs low abundance peptides exhibit intensities close limit detection resulting poor signal--noise (S/N) /interference co-eluting peptide ions similar identical m/z values lead difficulty parsing individual intensity profiles. 3. Applies unique peptides per protein filter General practice proteomics field filter proteins 1 unique peptide identified. adds increased confidence results already filtered 1% false discovery rate (FDR), since proteins identified 2 peptides less likely false positives. recommend filtering protein entries order focus confident targets identified proteome. However, 1-peptide proteins can still observed original protein report Spectronaut. 4. Adds accession numbers identified proteins without informative names Spectronaut reports contain 4 different columns identifying information: PG.Genes, gene name (e.g. CDK1). PG.ProteinAccessions, UniProt identifier number unique entry online database (e.g. P06493). PG.ProteinDescriptions, protein name provided UniProt (e.g. cyclin-dependent kinase 1). PG.ProteinNames, concatenation identifier species (e.g. CDK1_HUMAN). Every entry UniProt accession number, may identifiers, due incomplete annotation. Uniprot includes entries fragments proteins proteins entries redundant, peptide can match multiple entries protein, generates multiple possible identifiers Spectronaut. , ProteinNames entry Spectronaut can switch formats: preference accession number species, can also gene name species instead. option tells msDiaLogue substitute accession number identifier tries pull identifier column information. 5. Saves document working directory filtered data, desired saveRm = TRUE, data removed step 2 (preprocess_Filtered_Out_NaN.csv) step 3 (preprocess_Filtered_Out_Unique.csv) saved current working directory. part preprocessing(), histogram log2log_2-transformed protein abundances provided. helpful way confirm data read correctly, issues numerical values protein abundances. Ideally, histogram appear fairly symmetrical (bell-shaped) without much skew towards smaller larger values.","code":"## if the raw data is in a .csv file fileName <- \"../tests/testData/Toy_Spectronaut_Data.csv\" dataSet <- preprocessing(fileName,                          filterNaN = TRUE, filterUnique = 2,                          replaceBlank = TRUE, saveRm = TRUE) ## if the raw data is in an .Rdata file load(\"../tests/testData/Toy_Spectronaut_Data.RData\") dataSet <- preprocessing(dataSet = Toy_Spectronaut_Data,                          filterNaN = TRUE, filterUnique = 2,                          replaceBlank = TRUE, saveRm = TRUE) #> Warning: Removed 25 rows containing non-finite outside the scale range #> (`stat_bin()`). #> Summary of Full Data Signals (Raw): #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    8577   88074  210461  653661  535701 5044124 #> Levels of Condition: 100fmol 50fmol  #> Levels of Replicate: 1 2 3 4 5"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"transformation","dir":"Articles","previous_headings":"","what":"Transformation","title":"A Usage Template for the R Package msDiaLogue","text":"Raw mass spectrometry intensity measurements often unsuitable direct statistical modeling shape data usually symmetrical variance consistent across range intensities. proteomic workflows convert raw values log2_2 transformation, reshapes data symmetrical distribution, making easier interpret mean-based fold changes, also stabilizes variance across intensity range (.e. reduces heteroscedasticity).","code":"dataTran <- transform(dataSet, logFold = 2)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"filtering","dir":"Articles","previous_headings":"","what":"Filtering","title":"A Usage Template for the R Package msDiaLogue","text":"cases, researcher may wish filter specific protein proteins dataset. common instance proteins identified common contaminants database, identification necessary avoid incorrect matching result irrelevant experimental question included data visualization. scenarios might include mixed-species experiment researcher wants evaluate data one species time. step allows set aside specific proteins downstream analysis, using gene_species identifier format. Case 1. Remove proteins specified user step keep everything else. example, proteins named “ADH1_YEAST” containing “HUMAN” chosen filtered . removeList = TRUE indicates removal proteins union listName regexName dataTran. Please note saveRm = TRUE, excluded data (“ADH1_YEAS” + “*HUMAN”) saved .csv file named filtered_out_data.csv current working directory. Case 2. Keep proteins specified user step remove everything else. Alternatively, keep proteins like “ADH1_YEAST” “*HUMAN”, simply set removelist = FALSE. Extension Besides protein names, function filterProtein() provides similar function filter proteins additional protein information. Spectronaut: “PG.Genes”, “PG.ProteinAccessions”, “PG.ProteinDescriptions”, “PG.ProteinNames”. Scaffold: “ProteinDescriptions”, “AccessionNumber”, “AlternateID”. proteinInformation file name protein information, automatically generated preprocessing(). case, proteins whose \"PG.ProteinDescriptions\" match “Ras-related protein Rab-3D” “Alcohol dehydrogenase 1” kept. Note search value text used exact equality search.","code":"filterOutIn(dataTran, listName = \"ADH1_YEAST\", regexName = \"HUMAN\",             removeList = TRUE, saveRm = TRUE) filterOutIn(dataTran, listName = \"ADH1_YEAST\", regexName = \"HUMAN\",             removeList = FALSE) filterProtein(dataTran, proteinInformation = \"preprocess_protein_information.csv\",               text = c(\"Ras-related protein Rab-3D\", \"Alcohol dehydrogenase 1\"),               by = \"PG.ProteinDescriptions\",               removeList = FALSE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"normalization","dir":"Articles","previous_headings":"","what":"Normalization","title":"A Usage Template for the R Package msDiaLogue","text":"Normalization designed address systematic biases data. Biases can arise inadvertent sample grouping generation preparation, variations instrument performance acquisition, analysis different peptide amounts across experiments, reasons. factors can artificially mask enhance actual biological changes. Many normalization methods developed large datasets, strengths weaknesses. following factors considered choosing normalization method: Experiment-Specific Normalization: experiments run UConn PMF normalized injection amount time analysis facilitate comparison. “Amount” measured UV absorbance 280 nm, standard method generic protein quantification. Assumption Non-Changing Species: biological experiments implicitly assume majority measured species experiment change across conditions. assumption robust thousands species, compared hundreds, tens, experiments different complexities (e.g. purified protein vs. immunoprecipitation vs. full lysate), normalization applied global process, instead subsets experiments relatively similar . far, package provides three normalization methods use: “quant”: Quantile (Bolstad et al. 2003) (values run ranked, quantile bins applied entire dataset, values run adjusted closest bin value) “median”: Protein-wise Median (scalar factor applied protein entry make median sample equal every sample) “mean”: Protein-wise Mean (scalar factor applied protein entry make mean sample equal every sample) Quantile normalization generally recommended UConn SCS.  Oh! message “Warning: Removed 16 rows containing non-finite values” indicates presence 16 NA (Available) values data. NA values arise protein identified particular sample condition automatically excluded generating boxplot retained actual dataset.","code":"dataNorm <- normalize(dataTran, normalizeType = \"quant\") #> Warning: Removed 16 rows containing non-finite outside the scale range #> (`stat_boxplot()`)."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"imputation","dir":"Articles","previous_headings":"","what":"Imputation","title":"A Usage Template for the R Package msDiaLogue","text":"two primary MS/MS acquisition types implemented large scale MS-based proteomics unique advantages disadvantages. Traditional Data-Dependent Acquisition (DDA) methods favor specificity MS/MS sampling comprehensive proteome coverage. Small peptide isolation windows (<3 m/z) result MS/MS spectra contain fragmentation data ideally one peptide. specificity promotes clear peptide identifications comes expense added scan time. DDA experiments, number peptides can selected MS/MS limited instrument scan speeds therefore prioritized highest peptide abundance. Low abundance peptides sampled less frequently MS/MS can result variable peptide coverage many missing protein data across large sample datasets. Data-Independent Acquisition (DIA) methods promote comprehensive peptide coverage specificity sampling many peptides MS/MS simultaneously. Sequential large mass isolation windows (4-50 m/z) used isolate large numbers peptides concurrent MS/MS. produces complicated fragmentation spectra, spectra contain data every observable peptide. major disadvantage type acquisition DIA MS/MS spectra incredibly complex difficult deconvolve. Powerful relatively new software programs like Spectronaut capable successfully parsing fragment ions came co-fragmented peptide using custom libraries, machine learning algorithms, precisely determined retention times measured ion mobility data. observable ions sampled MS/MS, DIA reduces missingness substantially compared DDA, though entirely. Function dataMissing() designed summarize missingness protein, plot = TRUE indicates plotting missingness, show_labels = TRUE means protein names displayed printed plot. Note visual representation generated default, plot generation time varies project size.  percentage protein labels represents proportion missing data samples protein. instance, label “XPO4_HUMAN (80%)” indicates , within observations protein “XPO4_HUMAN”, 80% data missing. Additionally, percentage legend represents proportion missing data whole dataset. case, 8.4% data dataNorm missing. Regardless plot generation, function dataMissing() always returns table providing following information: count_miss: count missing values protein. pct-miss: percentage missing values protein. pct_total_miss: percentage missing values protein relative total missing values entire dataset. example, case protein “XPO4_HUMAN,” 8 NA values samples, representing 80% missing data “XPO4_HUMAN” within sample 50% total missing data entire dataset. Various imputation methods developed address missing-value issue assign reasonable guess quantitative value proteins missing values. far, package provides 10 imputation methods use: impute.min_local(): Replaces missing values lowest measured value protein condition. impute.min_global(): Replaces missing values lowest measured value protein found within entire dataset. impute.knn(): Replaces missing values using k-nearest neighbors algorithm (Troyanskaya et al. 2001). impute.knn_seq(): Replaces missing values using sequential k-nearest neighbors algorithm (Kim, Kim, Yi 2004). impute.knn_trunc(): Replaces missing values using truncated k-nearest neighbors algorithm (Shah et al. 2017). impute.nuc_norm(): Replaces missing values using nuclear-norm regularization (Hastie et al. 2015). impute.mice_cart(): Replaces missing values using classification regression trees (Breiman et al. 1984; Doove, van Buuren, Dusseldorp 2014; van Buuren 2018). impute.mice_norm(): Replaces missing values using Bayesian linear regression (Rubin 1987; Schafer 1997; van Buuren Groothuis-Oudshoorn 2011). impute.pca_bayes(): Replaces missing values using Bayesian principal components analysis (Oba et al. 2003). impute.pca_prob(): Replaces missing values using probabilistic principal components analysis (Stacklies et al. 2007). Additional methods added later. example, impute NA value dataNorm using impute.min_local(), set required percentage values must present given protein condition combination values imputed 51%. reportImputing = TRUE, returned result structure altered list, adding shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. imputation, entries pass percent present threshold still NA values need filtered . saveRm = TRUE indicates filtered data saved .csv file named filtered_NA_data.csv current working directory. dataImput follows:","code":"dataMissing <- dataMissing(dataNorm, plot = TRUE, show_labels = TRUE) dataImput <- impute.min_local(dataNorm, reportImputing = FALSE,                               reqPercentPresent = 0.51) dataImput <- filterNA(dataImput, saveRm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"summarization","dir":"Articles","previous_headings":"","what":"Summarization","title":"A Usage Template for the R Package msDiaLogue","text":"summarization provides table values protein final dataset include final processed abundances fold changes condition, protein’s statistical relation global dataset terms mean, median, standard deviation, parameters. column “Stat” generated result includes following statistics: n: Number. mean: Mean. sd: Standard deviation. median: Median. trimmed: Trimmed mean trim 0.1. mad: Median absolute deviation (median). min: Minimum. max: Maximum. range: difference maximum minimum value. skew: Skewness. kurtosis: Kurtosis. se: Standard error.","code":"dataSumm <- summarize(dataImput, saveSumm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"analysis","dir":"Articles","previous_headings":"","what":"Analysis","title":"A Usage Template for the R Package msDiaLogue","text":"function analyze() calculates results can used subsequent visualizations. Note: following listed analysis compare data two conditions. order conditions affect downstream analysis, second condition serves reference comparison. two conditions exist data conditions specified, conditions automatically generated sorting unique values alphabetically ascending order. two conditions exist data, precisely two conditions comparison must specified via argument conditions.","code":"cond <- c(\"100fmol\", \"50fmol\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"students-t-test","dir":"Articles","previous_headings":"Analysis","what":"Student’s t-test","title":"A Usage Template for the R Package msDiaLogue","text":"Student’s t-test used compare means two conditions protein, reporting difference means conditions P-value test.","code":"anlys_t <- analyze(dataImput, conditions = cond, testType = \"t-test\") #> Data are essentially constant. #> Data are essentially constant."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"moderated-t-test","dir":"Articles","previous_headings":"Analysis","what":"Moderated t-test","title":"A Usage Template for the R Package msDiaLogue","text":"main distinction Student’s moderated t-tests (Smyth 2004) lies variance computed. Student’s t-test calculates variance based data available protein individually, moderated t-test utilizes information chosen proteins calculate variance.","code":"anlys_mod.t <- analyze(dataImput, conditions = cond, testType = \"mod.t-test\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"ma","dir":"Articles","previous_headings":"Analysis","what":"MA","title":"A Usage Template for the R Package msDiaLogue","text":"result testType = \"MA\" generate data plotting MA plot, represents protein-wise averages within condition.","code":"anlys_MA <- analyze(dataImput, conditions = cond, testType = \"MA\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"visualization","dir":"Articles","previous_headings":"","what":"Visualization","title":"A Usage Template for the R Package msDiaLogue","text":"section provides variety options getting global view data, making comparisons, highlighting trends. Keep mind data visualization effective illustrating point answering question data, means find point/question.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"heatmap","dir":"Articles","previous_headings":"Visualization","what":"heatmap","title":"A Usage Template for the R Package msDiaLogue","text":"package offers two options plotting heatmap. Option 1 utilizes source package pheatmap, capable plotting dendrogram simultaneously. default choice heatmaps package.  protein names excessively long, recommended set show_rownames = FALSE view full heatmap. Option 2 use source package ggplot2 generate ggplot object include dendrogram.  heatmap, similar colors within row indicate relatively consistent values, suggesting similar protein expression levels across different samples.","code":"visualize(dataImput, graphType = \"heatmap\",           pkg = \"pheatmap\",           cluster_cols = TRUE, cluster_rows = FALSE,           show_colnames = TRUE, show_rownames = TRUE) visualize(dataImput, graphType = \"heatmap\", pkg = \"ggplot2\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"ma-1","dir":"Articles","previous_headings":"Visualization","what":"MA","title":"A Usage Template for the R Package msDiaLogue","text":"MA plot, short “M vs. plot,” uses two axes: M axis (vertical): Represents logarithm (usually base 2) fold change, ratio expression levels, two conditions. calculated : M=log2(X/Y)=log2X−log2YM = log_2(X/Y) = log_2 X - log_2 Y axis (horizontal): Represents average intensity two conditions, calculated : =12log2(XY)=12[log2(X)+log2(Y)]= \\frac{1}{2}log_2(XY) = \\frac{1}{2}\\left[log_2(X)+log_2(Y)\\right] proteins expected exhibit little variation, leading majority points concentrating around line M = 0 (indicating difference group means).  M.thres = 1 means M thresholds set −1 1. scatters split three parts: significant (M > 1), significant (-1 ≤\\leq M ≤\\leq 1), significant (M < -1). transformLabel = \"Log2\" used prefix title, x-axis, y-axis labels. Additionally, warning message “Removed 16 rows containing missing values” indicates 16 proteins significance.","code":"visualize(anlys_MA, graphType = \"MA\", M.thres = 1, transformLabel = \"Log2\") #> Warning: Removed 16 rows containing missing values or values outside the scale range #> (`geom_text_repel()`)."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"normalize","dir":"Articles","previous_headings":"Visualization","what":"Normalize","title":"A Usage Template for the R Package msDiaLogue","text":"","code":"visualize(dataNorm, graphType = \"normalize\") #> Warning: Removed 16 rows containing non-finite outside the scale range #> (`stat_boxplot()`)."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca","dir":"Articles","previous_headings":"Visualization","what":"PCA","title":"A Usage Template for the R Package msDiaLogue","text":"Principal component analysis (PCA) powerful technique used data analysis simplify reduce dimensionality large datasets. transforms original variables uncorrelated components capture maximum variance. selecting subset components, PCA projects data points onto key directions, enabling visualization analysis lower-dimensional space. aids identifying patterns relationships within complex datasets. visualization graphType = \"PCA_*\", arguments center scale used center data zero mean scale unit variance, default setting TRUE. case dataImput, two proteins, namely “TEBP_HUMAN” “T126B_HUMAN,” constant values, leading error message. choose remove two proteins PCA.","code":"dataPCA <- dataImput[, !(colnames(dataImput) %in% c(\"TEBP_HUMAN\", \"T126B_HUMAN\"))]"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca_scree","dir":"Articles","previous_headings":"Visualization > PCA","what":"PCA_scree","title":"A Usage Template for the R Package msDiaLogue","text":"One way help identify many PCs retain, explore scree plot. scree plot shows eigenvalues PC, represent proportion variance explained component. choice specifies data plotted, either \"variance\" \"eigenvalue\", addlabels = TRUE adds information labels top bars/points, ncp = 10 sets number dimension displayed.","code":"visualize(dataPCA, graphType = \"PCA_scree\", center = TRUE, scale = TRUE,           addlabels = TRUE, choice = \"variance\", ncp = 10) visualize(dataPCA, graphType = \"PCA_scree\", center = TRUE, scale = TRUE,           addlabels = TRUE, choice = \"eigenvalue\", ncp = 10)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca_ind","dir":"Articles","previous_headings":"Visualization > PCA","what":"PCA_ind","title":"A Usage Template for the R Package msDiaLogue","text":"primary PCA plot individual data visually represents distribution individual observations reduced-dimensional space, typically defined PCs. x y axes PCA plot represent PCs. axis corresponds linear combination original variables. Individual data points PCA plot represent observations (e.g., samples) original dataset. Points close origin (0, 0), close “average” across protein abundances. sufficient samples present, plot also produce 95% confidence ellipse, well centroid (mean group provided), groups (condition) provided.","code":"visualize(dataPCA, graphType = \"PCA_ind\", center = TRUE, scale = TRUE,           addlabels = TRUE, addEllipses = TRUE, ellipse.level = 0.95)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca_var","dir":"Articles","previous_headings":"Visualization > PCA","what":"PCA_var","title":"A Usage Template for the R Package msDiaLogue","text":"plot useful analyses based relatively small number proteins. represents association, loading protein first two PCs. Longer arrows represents stronger associations.","code":"visualize(dataPCA, graphType = \"PCA_var\", center = TRUE, scale = TRUE,           addlabels = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca_biplot","dir":"Articles","previous_headings":"Visualization > PCA","what":"PCA_biplot","title":"A Usage Template for the R Package msDiaLogue","text":"PCA biplot includes individual variable plots. , large number proteins, plot can unwieldy.","code":"visualize(dataPCA, graphType = \"PCA_biplot\", center = TRUE, scale = TRUE,           addEllipses = TRUE, ellipse.level = 0.95, label = \"all\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"t-test","dir":"Articles","previous_headings":"Visualization","what":"t-test","title":"A Usage Template for the R Package msDiaLogue","text":"function visualize() can applied t-test output. generates two useful plots: histogram fold changes across analyzed proteins histogram P-values. majority proteins expected show small change conditions, fold change histogram peak around zero. P-values, P-values expected non-significant (0.05). Depending strength treatment effect, may peak p-values near 0.","code":"visualize(anlys_mod.t, graphType = \"t-test\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"upset","dir":"Articles","previous_headings":"Visualization","what":"Upset","title":"A Usage Template for the R Package msDiaLogue","text":"upset plot visual representation helps display overlap intersection sets categories dataset. particularly useful illustrating presence absence elements combinations sets.  plot reveals 18 proteins common 100fmol 50fmol, 1 protein unique 100fmol.","code":"dataSort <- sortcondition(dataSet) visualize(dataSort, graphType = \"Upset\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"venn","dir":"Articles","previous_headings":"Visualization","what":"Venn","title":"A Usage Template for the R Package msDiaLogue","text":"Venn plot another graphical representation relationships sets. circle represents set, overlapping regions show elements shared sets.  example , 100fmol 50fmol groups share 18 proteins. Notably, one protein exclusively found 100fmol group, 50fmol group lacks unique proteins.","code":"visualize(dataSort, graphType = \"Venn\",           show_percentage = TRUE,           fill_color = c(\"blue\", \"yellow\", \"green\", \"red\"),           show_universal = FALSE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"volcano","dir":"Articles","previous_headings":"Visualization","what":"Volcano","title":"A Usage Template for the R Package msDiaLogue","text":"volcano plot graphical representation commonly used proteomics genomics visualize differential expression analysis results. particularly useful identifying significant changes extensive data. displays two important pieces information differences conditions dataset: Statistical significance (vertical): Represents negative log10 P-value. Fold change (horizontal): Represents fold change.","code":"visualize(anlys_mod.t, graphType = \"volcano\",           P.thres = 0.05, logF.thres = 0.6) #> Warning: Removed 15 rows containing missing values or values outside the scale range #> (`geom_text_repel()`)."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"other-useful-function","dir":"Articles","previous_headings":"","what":"Other useful function","title":"A Usage Template for the R Package msDiaLogue","text":"function pullProteinPath() allows see values associated specific protein step processing. can useful questions , “values favorite protein actually measured, imputed?” “didn’t favorite protein make final list? filtered ?”. can also used check whether given protein’s fold-change might processing artifact.","code":"ZC11B <- pullProteinPath(proteinName = \"ZC11B_HUMAN\",                          dataSetList = list(Initial = dataSet,                                             Transformed = dataTran,                                             Normalized = dataNorm,                                             Imputed = dataImput))"},{"path":[]},{"path":"https://uconn-scs.github.io/msDiaLogue/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Shiying Xiao. Author, maintainer. Timothy Moore. Author. Charles Watt. Author, contributor.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Xiao S, Moore T, Watt C (2024). msDiaLogue: Analysis + Visuals Data Indep. Aquisition Mass Spectrometry Data. R package version 0.0.5, https://github.com/uconn-scs/msDiaLogue.","code":"@Manual{,   title = {msDiaLogue: Analysis + Visuals for Data Indep. Aquisition Mass Spectrometry Data},   author = {Shiying Xiao and Timothy Moore and Charles Watt},   year = {2024},   note = {R package version 0.0.5},   url = {https://github.com/uconn-scs/msDiaLogue}, }"},{"path":[]},{"path":"https://uconn-scs.github.io/msDiaLogue/index.html","id":"code-building-for-workflow-package","dir":"","previous_headings":"","what":"Code building for workflow package","title":"msDiaLogue","text":"repository used build customized UConn PMF-SCS workflow Data-Independent Acquisition (DIA) proteomics data. code expanded include options users, detailed explanations steps analysis process.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"msDiaLogue","text":"can install development version msDiaLogue GitHub :","code":"# install.packages(\"devtools\") devtools::install_github(\"uconn-scs/msDiaLogue\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/index.html","id":"main-areas-include","dir":"","previous_headings":"","what":"Main areas include","title":"msDiaLogue","text":"Data preprocessing: getting data Spectronaut Data transformation: options transforming abundance data Data filtering: providing options filter data based preset levels Data normalization: normalization procedures Data imputation: missing data procedures Data summary: calculating presenting numerical summaries tabular form Differential Abundance Analysis: statistical tools DIA data analysis Data visualization: providing clean visuals aid data analysis decisions","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/index.html","id":"general-problems-during-installation","dir":"","previous_headings":"","what":"General problems during installation","title":"msDiaLogue","text":"WARNING: Rtools required build R packages currently installed. Windows, RTools required build package. sh: /opt/gfortran/bin/gfortran: file directory GFortran compiler required build package.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":null,"dir":"Reference","previous_headings":"","what":"Analyzing summarized data — analyze","title":"Analyzing summarized data — analyze","text":"Apply statistical test data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Analyzing summarized data — analyze","text":"","code":"analyze(dataSet, conditions, testType = \"t-test\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Analyzing summarized data — analyze","text":"dataSet 2d data set data. conditions string specifying two conditions compare. order important, second condition serves reference comparison. two conditions dataSet argument specified, conditions automatically selected sorting unique values alphabetically ascending order. testType string (default = \"t-test\") specifying statistical test use: \"t-test\": Unequal variance t-test. \"mod.t-test\": Moderated t-test (Smyth 2004) . \"MA\": Output plot MA plot.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Analyzing summarized data — analyze","text":"2d dataframe includes following information: \"t-test\" \"mod.t-test\": differences means P-values protein two conditions. \"MA\": Protein-wise averages within condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Analyzing summarized data — analyze","text":"second condition serves reference comparison. \"t-test\" \"mod.t-test\": differences calculated subtracting mean second condition mean first condition (Condition 1 - Condition 2). \"MA\": rows ordered conditions. Specifically, first row corresponds protein-wise average first condition, second row corresponds second condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Analyzing summarized data — analyze","text":"Smyth GK (2004). “Linear Models Empirical Bayes Methods Assessing Differential Expression Microarray Experiments.” Statistical Applications Genetics Molecular Biology, 3(1). doi:10.2202/1544-6115.1027 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/dataMissing.html","id":null,"dir":"Reference","previous_headings":"","what":"Counting missing data — dataMissing","title":"Counting missing data — dataMissing","text":"Calculate plot missingness.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/dataMissing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Counting missing data — dataMissing","text":"","code":"dataMissing(dataSet, plot = FALSE, show_labels = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/dataMissing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Counting missing data — dataMissing","text":"dataSet 2d data set experimental values. plot boolean (default = FALSE) specifying whether plot missingness. show_labels boolean (default = TRUE) specifying whether protein names shown visualization plot = TRUE.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/dataMissing.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Counting missing data — dataMissing","text":"2d dataframe including: \"count_miss\": count missing values protein. \"pct-miss\": percentage missing values protein. \"pct_total_miss\": percentage missing values protein relative total missing values entire dataset.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering NA's post-imputation — filterNA","title":"Filtering NA's post-imputation — filterNA","text":"Remove proteins NA values.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering NA's post-imputation — filterNA","text":"","code":"filterNA(dataSet, saveRm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering NA's post-imputation — filterNA","text":"dataSet 2d data set experimental values. saveRm boolean (default = TRUE) specifying whether save removed data current working directory.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filtering NA's post-imputation — filterNA","text":"filtered 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering NA's post-imputation — filterNA","text":"proteins meet imputation requirement removed, .csv file created removed data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering proteins or contaminants — filterOutIn","title":"Filtering proteins or contaminants — filterOutIn","text":"Apply series filtering steps data set.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering proteins or contaminants — filterOutIn","text":"","code":"filterOutIn(   dataSet,   listName = c(),   regexName = c(),   removeList = TRUE,   saveRm = TRUE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering proteins or contaminants — filterOutIn","text":"dataSet 2d data set experimental values. listName character vector proteins select remove. regexName character vector specifying proteins regular expression pattern matching select remove. removeList boolean (default = TRUE) specifying whether list proteins removed selected. TRUE: Remove list proteins data set. FALSE: Remove proteins list data set. saveRm boolean (default = TRUE) specifying whether save removed data current working directory. option works removeList = TRUE.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filtering proteins or contaminants — filterOutIn","text":"filtered 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering proteins or contaminants — filterOutIn","text":"listName regexName provided, protein names selected removed union specified listName matching regex pattern regexName.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":null,"dir":"Reference","previous_headings":"","what":"Filter proteins by gene, accession or description — filterProtein","title":"Filter proteins by gene, accession or description — filterProtein","text":"Filter preprocessed dataset gene, accession, description.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filter proteins by gene, accession or description — filterProtein","text":"","code":"filterProtein(   dataSet,   proteinInformation = \"preprocess_protein_information.csv\",   text = c(),   by = c(\"PG.Genes\", \"PG.ProteinAccessions\", \"PG.ProteinDescriptions\", \"PG.ProteinNames\",     \"ProteinDescriptions\", \"AccessionNumber\", \"AlternateID\"),   removeList = TRUE,   saveRm = TRUE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filter proteins by gene, accession or description — filterProtein","text":"dataSet 2d data set experimental values. proteinInformation name .csv file containing protein information data (including path file, needed). file include following columns: Spectronaut: \"PG.Genes\", \"PG.ProteinAccessions\", \"PG.ProteinDescriptions\", \"PG.ProteinNames\". Scaffold: \"ProteinDescriptions\", \"AccessionNumber\", \"AlternateID\". file automatically generated function preprocessing preprocessing_scaffold. text character vector text used key selecting removing. character string specifying information text filter applied, allowable options: Spectronaut: \"PG.Genes\", \"PG.ProteinAccessions\", \"PG.ProteinDescriptions\", \"PG.ProteinNames\". Scaffold: \"ProteinDescriptions\", \"AccessionNumber\", \"AlternateID\". removeList boolean (default = TRUE) specifying whether list proteins removed selected. TRUE: Remove list proteins data set. FALSE: Remove proteins list data set. saveRm boolean (default = TRUE) specifying whether save removed data current working directory. option works removeList = TRUE.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filter proteins by gene, accession or description — filterProtein","text":"filtered 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filter proteins by gene, accession or description — filterProtein","text":"function extension function preprocessing preprocessing_scaffold allows filtering proteins based additional information.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation of raw data signals — impute","title":"Imputation of raw data signals — impute","text":"Apply imputation method data set.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation of raw data signals — impute","text":"","code":"impute(   dataSet,   imputeType = \"LocalMinVal\",   reqPercentPresent = 0.51,   k = 10,   rowmax = 0.5,   colmax = 0.8,   maxp = 1500,   rng.seed = 362436069,   rank.max = NULL,   lambda = NULL,   thresh = 1e-05,   maxit = 100,   final.svd = TRUE,   reportImputing = FALSE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation of raw data signals — impute","text":"dataSet 2d data set experimental values. imputeType character string (default = \"LocalMinVal\") specifying imputation method use: \"LocalMinVal\": replace missing values lowest value protein condition combination. \"GlobalMinVal\": replace missing values lowest value found within entire data set. \"knn\": replace missing values using k-nearest neighbors algorithm (Troyanskaya et al. 2001) . \"seq-knn\": replace missing values using sequential k-nearest neighbors algorithm (Kim et al. 2004) . \"trunc-knn\": replace missing values using truncated k-nearest neighbors algorithm (Shah et al. 2017) . \"nuc-norm\": replace missing values using nuclear-norm regularization (Hastie et al. 2015) . reqPercentPresent scalar (default = 0.51) specifying required percent values must present given protein condition combination values imputed imputeType = \"LocalMinVal\". k integer (default = 10) indicating number neighbors used imputation imputeType \"knn\", \"seq-knn\", \"trunc-knn\". rowmax scalar (default = 0.5) specifying maximum percent missing data allowed row imputeType = \"knn\". rows rowmax*100% missing imputed using overall mean per sample. colmax scalar (default = 0.8) specifying maximum percent missing data allowed column imputeType = \"knn\". column colmax*100% missing data, program halts reports error. maxp integer (default = 1500) indicating largest block proteins imputed using k-nearest neighbors algorithm imputeType = \"knn\". Larger blocks divided two-means clustering (recursively) prior imputation. rng.seed integer (default = 362436069) specifying seed used random number generator reproducibility imputeType = \"knn\". rank.max integer specifying restriction rank solution imputeType = \"nuc-norm\". default set one less minimum dimension dataset. lambda scalar specifying nuclear-norm regularization parameter imputeType = \"nuc-norm\". lambda = 0, algorithm convergence typically slower. default set maximum singular value obtained singular value decomposition (SVD) dataset. thresh scalar (default = 1e-5) specifying convergence threshold imputeType = \"nuc-norm\", measured relative change Frobenius norm two successive estimates. maxit integer (default = 100) specifying maximum number iterations convergence reached imputeType = \"nuc-norm\". final.svd boolean (default = TRUE) specifying whether perform one-step unregularized iteration final iteration imputeType = \"nuc-norm\", followed soft-thresholding singular values, resulting hard zeros. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation of raw data signals — impute","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation of raw data signals — impute","text":"Hastie T, Mazumder R, Lee JD, Zadeh R (2015). “Matrix Completion Low-Rank SVD via Fast Alternating Least Squares.” Journal Machine Learning Research, 16(104), 3367—3402. http://jmlr.org/papers/v16/hastie15a.html. Kim K, Kim B, Yi G (2004). “Reuse Imputed Data Microarray Analysis Increases Imputation Efficiency.” BMC bioinformatics, 5, 160. doi:10.1186/1471-2105-5-160 . Shah JS, Rai SN, DeFilippis AP, Hill BG, Bhatnagar , Brock GN (2017). “Distribution Based Nearest Neighbor Imputation Truncated High Dimensional Data Applications Pre-Clinical Clinical Metabolomics Studies.” BMC bioinformatics, 18, 114. doi:10.1186/s12859-017-1547-6 . Troyanskaya O, Cantor M, Sherlock G, Brown P, Hastie T, Tibshirani R, Botstein D, Altman RB (2001). “Missing Value Estimation Methods DNA Microarrays.” Bioinformatics, 17(6), 520–525. doi:10.1093/bioinformatics/17.6.520 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the k-nearest neighbors algorithm — impute.knn","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"Apply imputation dataset k-nearest neighbors algorithm (Troyanskaya et al. 2001) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"","code":"impute.knn(   dataSet,   reportImputing = FALSE,   k = 10,   rowmax = 0.5,   colmax = 0.8,   maxp = 1500,   seed = 362436069 )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. k integer (default = 10) indicating number neighbors used imputation. rowmax scalar (default = 0.5) specifying maximum percent missing data allowed row. rows rowmax*100% missing imputed using overall mean per sample. colmax scalar (default = 0.8) specifying maximum percent missing data allowed column. column colmax*100% missing data, program halts reports error. maxp integer (default = 1500) indicating largest block proteins imputed using k-nearest neighbors algorithm. Larger blocks divided two-means clustering (recursively) prior imputation. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"Troyanskaya O, Cantor M, Sherlock G, Brown P, Hastie T, Tibshirani R, Botstein D, Altman RB (2001). “Missing Value Estimation Methods DNA Microarrays.” Bioinformatics, 17(6), 520–525. doi:10.1093/bioinformatics/17.6.520 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"Apply imputation dataset sequential k-nearest neighbors algorithm (Kim et al. 2004) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"","code":"impute.knn_seq(dataSet, reportImputing = FALSE, k = 10)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. k integer (default = 10) indicating number neighbors used imputation.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"Kim K, Kim B, Yi G (2004). “Reuse Imputed Data Microarray Analysis Increases Imputation Efficiency.” BMC bioinformatics, 5, 160. doi:10.1186/1471-2105-5-160 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"Apply imputation dataset truncated k-nearest neighbors algorithm (Shah et al. 2017) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"","code":"impute.knn_trunc(dataSet, reportImputing = FALSE, k = 10)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. k integer (default = 10) indicating number neighbors used imputation.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"Shah JS, Rai SN, DeFilippis AP, Hill BG, Bhatnagar , Brock GN (2017). “Distribution Based Nearest Neighbor Imputation Truncated High Dimensional Data Applications Pre-Clinical Clinical Metabolomics Studies.” BMC bioinformatics, 18, 114. doi:10.1186/s12859-017-1547-6 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by classification and regression trees — impute.mice_cart","title":"Imputation by classification and regression trees — impute.mice_cart","text":"Apply imputation dataset classification regression trees (Breiman et al. 1984; Doove et al. 2014; van Buuren 2018) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by classification and regression trees — impute.mice_cart","text":"","code":"impute.mice_cart(dataSet, reportImputing = FALSE, m = 5, seed = 362436069)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by classification and regression trees — impute.mice_cart","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. m integer (default = 5) specifying number multiple imputations. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by classification and regression trees — impute.mice_cart","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by classification and regression trees — impute.mice_cart","text":"Breiman L, Friedman J, Olshen RA, Stone CJ (1984). Classification Regression Trees. Routledge, New York, NY, USA. ISBN 9780412048418. Doove LL, van Buuren S, Dusseldorp E (2014). “Recursive Partitioning Missing Data Imputation Presence Interaction Effects.” Computational Statistics & Data Analysis, 72, 92–104. doi:10.1016/j.csda.2013.10.025 . van Buuren S (2018). Flexible Imputation Missing Data. Chapman \\& Hall/CRC, New York, NY, USA. ISBN 9781032178639.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by Bayesian linear regression — impute.mice_norm","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"Apply imputation dataset Bayesian linear regression (Rubin 1987; Schafer 1997; van Buuren Groothuis-Oudshoorn 2011) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"","code":"impute.mice_norm(dataSet, reportImputing = FALSE, m = 5, seed = 362436069)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. m integer (default = 5) specifying number multiple imputations. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"Rubin DB (1987). Multiple Imputation Nonresponse Surveys. John Wiley \\& Sons, New York, NY, USA. ISBN 9780471087052. Schafer JL (1997). Analysis Incomplete Multivariate Data. Chapman \\& Hall/CRC, New York, NY, USA. ISBN 9780412040610. van Buuren S, Groothuis-Oudshoorn K (2011). “mice: Multivariate Imputation Chained Equations R.” Journal Statistical Software, 45(3), 1–67. doi:10.18637/jss.v045.i03 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_global.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the global minimum — impute.min_global","title":"Imputation by the global minimum — impute.min_global","text":"Apply imputation dataset minimum measured value protein found within entire dataset.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_global.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the global minimum — impute.min_global","text":"","code":"impute.min_global(dataSet, reportImputing = FALSE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_global.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the global minimum — impute.min_global","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_global.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the global minimum — impute.min_global","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_local.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the local minimum — impute.min_local","title":"Imputation by the local minimum — impute.min_local","text":"Apply imputation dataset minimum measured value protein condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_local.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the local minimum — impute.min_local","text":"","code":"impute.min_local(dataSet, reportImputing = FALSE, reqPercentPresent = 0.51)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_local.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the local minimum — impute.min_local","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. reqPercentPresent scalar (default = 0.51) specifying required percent values must present given protein condition combination values imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_local.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the local minimum — impute.min_local","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the nuclear-norm regularization — impute.nuc_norm","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"Apply imputation dataset nuclear-norm regularization (Hastie et al. 2015) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"","code":"impute.nuc_norm(   dataSet,   reportImputing = FALSE,   rank.max = NULL,   lambda = NULL,   thresh = 1e-05,   maxit = 100,   final.svd = TRUE,   seed = 362436069 )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. rank.max integer specifying restriction rank solution. default set one less minimum dimension dataset. lambda scalar specifying nuclear-norm regularization parameter. lambda = 0, algorithm convergence typically slower. default set maximum singular value obtained singular value decomposition (SVD) dataset. thresh scalar (default = 1e-5) specifying convergence threshold, measured relative change Frobenius norm two successive estimates. maxit integer (default = 100) specifying maximum number iterations convergence reached. final.svd boolean (default = TRUE) specifying whether perform one-step unregularized iteration final iteration, followed soft-thresholding singular values, resulting hard zeros. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"Hastie T, Mazumder R, Lee JD, Zadeh R (2015). “Matrix Completion Low-Rank SVD via Fast Alternating Least Squares.” Journal Machine Learning Research, 16(104), 3367—3402. http://jmlr.org/papers/v16/hastie15a.html.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by Bayesian principal components analysis — impute.pca_bayes","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"Apply imputation dataset Bayesian principal components analysis (Oba et al. 2003) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"","code":"impute.pca_bayes(dataSet, reportImputing = FALSE, nPcs = NULL, maxSteps = 100)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. nPcs integer specifying number principal components calculate. default set minimum number samples number proteins. maxSteps integer (default = 100) specifying maximum number estimation steps.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"Oba S, Sato M, Takemasa , Monden M, Matsubara K, Ishii S (2003). “Bayesian Missing Value Estimation Method Gene Expression Profile Data.” Bioinformatics, 19(16), 2088–2096. doi:10.1093/bioinformatics/btg287 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by probabilistic principal components analysis — impute.pca_prob","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"Apply imputation dataset probabilistic principal components analysis (Stacklies et al. 2007) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"","code":"impute.pca_prob(   dataSet,   reportImputing = FALSE,   nPcs = NULL,   maxIterations = 1000,   seed = 362436069 )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. nPcs integer specifying number principal components calculate. default set minimum number samples number proteins. maxIterations integer (default = 1000) specifying maximum number allowed iterations. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"Stacklies W, Redestig H, Scholz M, Walther D, Selbig J (2007). “pcaMethods–Bioconductor Package Providing PCA Methods Incomplete Data.” Bioinformatics, 23(9), 1164–1167. doi:10.1093/bioinformatics/btm069 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":null,"dir":"Reference","previous_headings":"","what":"Normalization of preprocessed data — normalize","title":"Normalization of preprocessed data — normalize","text":"Apply specified type normalization data set.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Normalization of preprocessed data — normalize","text":"","code":"normalize(dataSet, normalizeType = \"quant\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Normalization of preprocessed data — normalize","text":"dataSet 2d data set experimental values. normalizeType string (default = \"quant\") specifying type normalization apply: \"quant\": Quantile (Bolstad et al. 2003) \"median\": Protein-wise Median \"mean\": Protein-wise Mean \"none\": None","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Normalization of preprocessed data — normalize","text":"normalized 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Normalization of preprocessed data — normalize","text":"Quantile normalization generally recommended. Mean median normalization going included popular previous methods. normalization recommended. Boxplots also generated normalization give visual indicator changes.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Normalization of preprocessed data — normalize","text":"Bolstad BM, Irizarry RA, Astrand M, Speed TP (2003). “Comparison Normalization Methods High Density Oligonucleotide Array Data Based Variance Bias.” Bioinformatics, 19(2), 185–193. doi:10.1093/bioinformatics/19.2.185 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering of raw data signals — preProcessFiltering","title":"Filtering of raw data signals — preProcessFiltering","text":"Apply series filtering steps data set.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering of raw data signals — preProcessFiltering","text":"","code":"preProcessFiltering(   dataSet,   filterNaN = TRUE,   filterUnique = 2,   replaceBlank = TRUE,   saveRm = TRUE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering of raw data signals — preProcessFiltering","text":"dataSet 2d data set experimental values. filterNaN boolean (default = TRUE) specifying whether proteins NaN removed data set. filterUnique integer (default = 2) specifying whether proteins less default number unique peptides removed data set. replaceBlank boolean (default = TRUE) specifying whether proteins without names named accession numbers. saveRm boolean (default = TRUE) specifying whether save removed data current working directory.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filtering of raw data signals — preProcessFiltering","text":"filtered 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering of raw data signals — preProcessFiltering","text":"forms filtering recommended use cases.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"Read data file Spectronaut, apply filtering conditions, select columns necessary analysis, return reformatted data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"","code":"preprocessing(   fileName,   dataSet = NULL,   filterNaN = TRUE,   filterUnique = 2,   replaceBlank = TRUE,   saveRm = TRUE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"fileName name .csv file containing MS data (including path file, needed). dataSet raw data set, already loaded R. filterNaN boolean (default = TRUE) specifying whether observations including NaN omitted. filterUnique integer (default = 2) specifying many number unique peptides required include protein. replaceBlank boolean (default = TRUE) specifying whether proteins without names named accession numbers. saveRm boolean (default = TRUE) specifying whether save removed data current working directory.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"function executes following: Reads file. Applies applicable filters, necessary. Provides summary statistics histogram values reported data set. Selects columns contain necessary information analysis. Re-formats data present individual proteins columns group replicates protein. Stores data data.frame prints levels condition replicate user.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"Read data file Scaffold, select columns necessary analysis, return reformatted data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"","code":"preprocessing_scaffold(fileName, dataSet = NULL)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"fileName name .xls file containing MS data (including path file, needed). dataSet raw data set, already loaded R.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"function executes following: Reads file. Provides summary statistics histogram values reported data set. Selects columns contain necessary information analysis. Re-formats data present individual proteins columns group replicates protein. Stores data data.frame prints levels condition replicate user.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":null,"dir":"Reference","previous_headings":"","what":"Compiling data on a single protein from each step in the process — pullProteinPath","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"Summarize steps performed data one protein.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"","code":"pullProteinPath(proteinName, dataSetList)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"proteinName string identifying protein interest. dataSetList list data frames, order dictates order presentation.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"2d dataframe, protein data step present dataSetList.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"proteinName must match labels data sets exactly.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/sortcondition.html","id":null,"dir":"Reference","previous_headings":"","what":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","title":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","text":"Create keyed dictionary, every unique experimental condition label list every protein value condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/sortcondition.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","text":"","code":"sortcondition(dataSet)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/sortcondition.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","text":"dataSet 2d data set experimental values.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/sortcondition.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","text":"list lists.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarize protein intensities across conditions — summarize","title":"Summarize protein intensities across conditions — summarize","text":"Calculate mean, standard deviation, replicate count protein across every condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarize protein intensities across conditions — summarize","text":"","code":"summarize(dataSet, saveSumm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarize protein intensities across conditions — summarize","text":"dataSet data frame containing data signals labels. saveSumm boolean (default = TRUE) specifying whether save summary statistics current working directory.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarize protein intensities across conditions — summarize","text":"2d summarized data frame.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Summarize protein intensities across conditions — summarize","text":"column 'Stat' generated data.frame includes following statistics: n: Number. mean: Mean. sd: Standard deviation. median: Median. trimmed: Trimmed mean trim 0.1. mad: Median absolute deviation (median). min: Minimum. max: Maximum. range: difference maximum minimum value. skew: Skewness. kurtosis: Kurtosis. se: Standard error.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":null,"dir":"Reference","previous_headings":"","what":"Log-based transformation — transform","title":"Log-based transformation — transform","text":"Apply logarithmic transformation data stabilize variance.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Log-based transformation — transform","text":"","code":"transform(dataSet, logFold = 2)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Log-based transformation — transform","text":"dataSet data frame containing data signals. logFold integer specifying base log transformation.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Log-based transformation — transform","text":"transformed data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Log-based transformation — transform","text":"function executes following: Plots mean-variance relationship using meanVariancePlot(). Log-transforms data, using specified base. Plots mean-variance relationship comparison.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":null,"dir":"Reference","previous_headings":"","what":"Trimming down a protein FASTA file to certain proteins — trimFASTA","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"Trim FASTA file contain proteins present associated Spectronaut report file.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"","code":"trimFASTA(   FASTAFileName,   reportFileName,   outputFileName = \"trimFASTA_output.txt\",   by = \"PG.ProteinNames\",   selectString = \"*BOVIN\" )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"FASTAFileName character string specifying name input FASTA .txt file. reportFileName character string specifying name Spectronaut report .csv file. outputFileName character string (default = \"trimFASTA_output.txt\") specifying name output file. character string (default = \"PG.ProteinNames\") specifying identifier (column name) used selection report file. selectString character string specifying regular expression search .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"FASTA file specified proteins present.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"Depending size FASTA file, function may run slowly take several minutes. FASTA file must .txt format; formats work.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":null,"dir":"Reference","previous_headings":"","what":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"Trim FASTA file contain proteins present associated Spectronaut report file.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"","code":"trimFASTA2.0(   FASTAFileName,   reportFileName,   outputFileName,   selectString = \"*BOVIN\" )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"FASTAFileName string indicating FASTA .txt filename. reportFileName string indicating Spectronaut report .csv filename. outputFileName string indicating name new .txt FASTA file. selectString string containing regular expression search.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"FASTA file specified proteins present.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"Depending size FASTA file, function may run slowly take several minutes. FASTA file must .txt format; formats work.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":null,"dir":"Reference","previous_headings":"","what":"Generating visualizations for MS Data — visualize","title":"Generating visualizations for MS Data — visualize","text":"Create specific graphics illustrate results data analysis function.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generating visualizations for MS Data — visualize","text":"","code":"visualize(   dataSet,   graphType = \"volcano\",   pkg = \"pheatmap\",   cluster_cols = TRUE,   cluster_rows = FALSE,   show_colnames = TRUE,   show_rownames = TRUE,   M.thres = 1,   transformLabel = \"Log2\",   center = TRUE,   scale = TRUE,   addlabels = TRUE,   choice = \"variance\",   ncp = 10,   addEllipses = TRUE,   ellipse.level = 0.95,   label = \"all\",   show_percentage = TRUE,   fill_color = c(\"blue\", \"yellow\", \"green\", \"red\"),   show_universal = FALSE,   P.thres = 0.05,   logF.thres = 0.6 )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generating visualizations for MS Data — visualize","text":"dataSet 2D data frame corresponds output function analyze() name. graphType string indicating graph type. Current options : \"heatmap\" \"MA\" \"normalize\" \"PCA_biplot\" \"PCA_ind\" \"PCA_scree\" \"PCA_var\" \"t-test\" \"Upset\" \"Venn\" \"volcano\" pkg string specifying source package used plot heatmap. Two options: \"pheatmap\" \"ggplot2\". argument works graphType = \"heatmap\". cluster_cols boolean (default = TRUE) determining rows clustered hclust object. argument works graphType = \"heatmap\" pkg = \"pheatmap\". cluster_rows boolean (default = FALSE) determining columns clustered hclust object. argument works graphType = \"heatmap\" pkg = \"pheatmap\". show_colnames boolean (default = TRUE) specifying column names shown. argument works graphType = \"heatmap\" pkg = \"pheatmap\". show_rownames boolean (default = TRUE) specifying row names shown. argument works graphType = \"heatmap\" pkg = \"pheatmap\". M.thres absolute threshold value M (log fold-change) (default = 1) used plot two vertical lines (-M.thres M.thres) MA plot graphType = \"MA\". transformLabel string (default = \"Log2\") used label title axes transformation type transformed MA plot graphType = \"MA\". center boolean (default = TRUE) indicating whether variables shifted zero centered graphType = \"PCA_scree\", graphType = \"PCA_ind\", graphType = \"PCA_var\", graphType = \"PCA_biplot\". scale boolean (default = TRUE) indicating whether variables scaled unit variance analysis takes place graphType = \"PCA_scree\", graphType = \"PCA_ind\", graphType = \"PCA_var\", graphType = \"PCA_biplot\". addlabels boolean (default = TRUE) specifying whether elements labeled. graphType = \"PCA_scree\", specifies whether labels added top bars points show information retained dimension. graphType = \"PCA_ind\", specifies whether active individuals labeled. graphType = \"PCA_var\", specifies whether active variables labeled. choice text (default = \"variance\") specifying PCA data plotted scree plot graphType = \"PCA_scree\". Allowed values \"variance\" \"eigenvalue\". ncp numeric value (default = 10) specifying number dimensions shown graphType = \"PCA_scree\". addEllipses boolean (default = TRUE) specifying whether draw ellipses around individuals graphType = \"PCA_ind\" graphType = \"PCA_biplot\". ellipse.level numeric value (default = 0.95) specifying size concentration ellipse normal probability graphType = \"PCA_ind\" graphType = \"PCA_biplot\". label text (default = \"\") specifying elements labelled graphType = \"PCA_biplot\". Allowed values: \"\": Label active individuals active variables. \"ind\": Label active individuals. \"var\": Label active variables. \"none\": labels. show_percentage boolean (default = TRUE) specifying whether show percentage set graphType = \"Venn\". fill_color text (default = c(\"blue\", \"yellow\", \"green\", \"red\")) specifying colors fill circles graphType = \"Venn\". show_universal boolean (default = FALSE) specifying whether return data.frame logical columns representing sets graphType = \"Venn\". P.thres threshold value P-value (default = 0.05) used plot horizontal line (-log10(P.thres)) volcano plot graphType = \"volcano\". logF.thres absolute threshold value log2(fold change) (default = 0.6) used plot two vertical lines (-logF.thres logF.thres) volcano plot graphType = \"volcano\".","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generating visualizations for MS Data — visualize","text":"object class ggplot.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Generating visualizations for MS Data — visualize","text":"function visualize() designed work directly output function analyze(). Please sure arguments graphType testType match.","code":""}]
+[{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"setting-quantitative-value-in-scaffold","dir":"Articles","previous_headings":"","what":"1. Setting Quantitative Value in Scaffold","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"quantitative value must set intensity measurement count measurement. Inside Scaffold 5, Samples tab, Display Options dropdown menu top left window must set “Quantitative Value” quantitative value must defined (Experiment –> Quantitative Analysis –> Settings, Quantitative Method dropdown). PMF recommends Average Precursor Intensity value.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"turning-off-normalization-in-scaffold","dir":"Articles","previous_headings":"","what":"2. Turning Off Normalization in Scaffold","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"Normalization must turned . (Experiment –> Quantitative Analysis –> Settings), make sure “Use Normalization” box unchecked. option normalize various methods msDiaLogue, stack normalizations programs.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"minimum-requirements-for-experiment-conditions","dir":"Articles","previous_headings":"","what":"3. Minimum Requirements for Experiment Conditions","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"experiment must contain minimum 2 conditions, condition must minimum 3 replicates. conditions fine, replicates fine, conditions need number replicates. fewer 3 replicates condition, 1 condition, throw error msDiaLogue able process data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"sample-naming-format-requirements","dir":"Articles","previous_headings":"","what":"4. Sample Naming Format Requirements","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"samples must named following format: YYYYMMDD_initials_condition-replicate# (e.g. 20240101_JL_ctrl-1). files may already named way; please check Scaffold “Samples” tab, heading quantitative value column. sample name appear vertically. name formatted , can change going “Load Data” tab, selecting tab sample individually, right-clicking tab, choosing “Edit BioSample”, typing correct name format “Sample Name” box, clicking “Apply”.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"filtering-dataset-for-protein-identification","dir":"Articles","previous_headings":"","what":"5. Filtering Dataset for Protein Identification","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"strongly recommend filter dataset hide proteins 1 peptide identified. “Samples” tab, top-menu bar, “Min # Peptides” dropdown, set 2.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/scaffold.html","id":"collapsing-protein-clusters-in-samples-tab","dir":"Articles","previous_headings":"","what":"6. Collapsing Protein Clusters in Samples Tab","title":"Instructions for Exporting Data from Scaffold for Use in msDiaLogue Analysis","text":"protein clusters must collapsed. “Samples” tab, first column (header “#”), right click numbered entries , select “Clusters”, select “Collapse ”. converts gray-shaded entries, multiple proteins inferred peptide evidence, expanded view line possible protein match cluster collapsed view first entry appearing table. msDiaLogue, first entry accession number one represented data cluster. ’re now ready export data. “Samples” tab Scaffold, right-click anywhere main data table, choose “Export” (bottom menu), “Export Excel”. Save descriptive filename make sense someone else future choose location ’ll using working directory R. report can saved .xls .csv. can now use preprocessing_scaffold script available page pick transformation step msDiaLogue script provided main Usage Template page. raw data .xls file Toy_Scaffold_Data.xls, specify fileName read raw data file R. raw data stored .RData file Toy_Scaffold_Data.RData, first load data file directly, specify dataSet function.","code":"library(msDiaLogue) ## if the raw data is in a .xls file dataSet <- preprocessing_scaffold(fileName = \"../tests/testData/Toy_Scaffold_Data.xls\") ## if the raw data is in an .Rdata file load(\"../tests/testData/Toy_Scaffold_Data.RData\") dataSet <- preprocessing_scaffold(dataSet = Toy_Scaffold_Data) #> Warning: Removed 4194 rows containing non-finite outside the scale range #> (`stat_bin()`). #> Summary of Full Data Signals: #>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's  #> 7.300e+05 9.640e+06 2.250e+07 5.546e+07 5.630e+07 3.480e+09      4194  #>  #> Levels of Condition: 6 ctrl DM SIC  #> Levels of Replicate: 1 2 3"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"load-r-package","dir":"Articles","previous_headings":"","what":"Load R package","title":"A Usage Template for the R Package msDiaLogue","text":"","code":"# install.packages(\"devtools\") devtools::install_github(\"uconn-scs/msDiaLogue\") ##  ## ── R CMD build ───────────────────────────────────────────────────────────────── ## * checking for file ‘/private/var/folders/yd/khpnz9fj6h524w6qys8n8vt80000gn/T/RtmpIGAfbx/remotes75ac3d1dae80/uconn-scs-msDiaLogue-8a27917/DESCRIPTION’ ... OK ## * preparing ‘msDiaLogue’: ## * checking DESCRIPTION meta-information ... OK ## * checking for LF line-endings in source and make files and shell scripts ## * checking for empty or unneeded directories ## Removed empty directory ‘msDiaLogue/tests’ ## * building ‘msDiaLogue_0.0.1.tar.gz’ library(msDiaLogue)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"preprocessing","dir":"Articles","previous_headings":"","what":"Preprocessing","title":"A Usage Template for the R Package msDiaLogue","text":"Read file Toy_Spectronaut_Data.csv R. Omit observations NaN, save removed data file preprocess_Filtered_Out_NaN.csv current working directory. Filter proteins fewer \\(2\\) unique peptides, save removed data file preprocess_Filtered_Out_Unique.csv current working directory. proteins without names, replace blank protein name entries protein accessions.","code":"fileName <- \"../tests/testData/Toy_Spectronaut_Data.csv\" data <- preprocessing(fileName, filterNaN = TRUE, filterUnique = 2,                       replaceBlank = TRUE, saveRm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"transformation","dir":"Articles","previous_headings":"","what":"Transformation","title":"A Usage Template for the R Package msDiaLogue","text":"Raw intensity measurements, often unsuitable direct statistical modeling, prompt transformation quantitative proteomics workflow. preferred solution log-transformation, effectively addressing issue. log\\(_2\\) transformation commonly employed due ability facilitate straightforward interpretation fold changes protein levels.","code":"dataTran <- transform(data, logFold = 2)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"filtering","dir":"Articles","previous_headings":"","what":"Filtering","title":"A Usage Template for the R Package msDiaLogue","text":"various reasons, contamination, lack informativeness, identification samples, etc., data filtered conducting differential analysis exclude certain proteins. example, protein ‘XPO4_HUMAN’ chosen filtered . removeList = TRUE indicates removal proteins listed listName dataTran. Please note saveRm = TRUE, excluded data (“XPO4_HUMAN”) saved .csv file named filtered_out_data.csv current working directory. Alternatively, wish retain proteins like “PA1B2_HUMAN”, “TEBP_HUMAN”, “UAP1_HUMAN”, simply set removelist = FLASE.","code":"dataFilt <- filterOutIn(dataTran, listName = \"XPO4_HUMAN\",                         removeList = TRUE, saveRm = TRUE) filterOutIn(dataTran, listName = c(\"PA1B2_HUMAN\", \"TEBP_HUMAN\", \"UAP1_HUMAN\"), removeList = FALSE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"imputation","dir":"Articles","previous_headings":"","what":"Imputation","title":"A Usage Template for the R Package msDiaLogue","text":"Mass spectrometry-based quantitative proteomics experiments often yield data missing values, potentially impacting downstream analyses. Various imputation methods developed address issue. far, package provides two imputation methods use: “LocalMinVal”: replaces missing values lowest value protein condition combination; “GlobalMinVal”: replaces missing values lowest value found within entire dataset. Additional methods added later. example, impute NA value dataTran using “LocalMinVal,” set required percentage values must present given protein condition combination values imputed \\(51\\%\\). imputation, may still NA values consequently need filtered . saveRm = TRUE indicates filtered data saved .csv file named filtered_NA_data.csv current working directory. dataImput follows:","code":"dataImput <- impute(dataFilt, imputeType = \"LocalMinVal\",                     reqPercentPresent = 51, reportImputing = FALSE) dataImput <- filterNA(dataImput, saveRm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"normalization","dir":"Articles","previous_headings":"","what":"Normalization","title":"A Usage Template for the R Package msDiaLogue","text":"Normalization designed address systematic biases, enhancing comparability samples preserving signal. Various normalization approaches proposed. far, package provides three normalization methods use: “quant”: Quantile “median”: Column-wise Median “mean”: Column-wise Mean Quantile normalization generally recommended.","code":"dataNorm <- normalize(dataImput, normalizeType = \"quant\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"summarization","dir":"Articles","previous_headings":"","what":"Summarization","title":"A Usage Template for the R Package msDiaLogue","text":"Summarization proves valuable distilling meaningful insights vast intricate data, facilitating efficient identification, quantification, interpretation protein profiles within complex biological samples.","code":"dataSumm <- summarize(dataNorm, saveSumm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/template.html","id":"analysis-and-visualization","dir":"Articles","previous_headings":"","what":"Analysis And Visualization","title":"A Usage Template for the R Package msDiaLogue","text":"","code":"analysis1 <- analyze(dataNorm, testType = \"MA\") visualize(analysis1, graphType = \"MA\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"load-r-package","dir":"Articles","previous_headings":"","what":"Load R package","title":"A Usage Template for the R Package msDiaLogue","text":"","code":"library(msDiaLogue)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"preprocessing","dir":"Articles","previous_headings":"","what":"Preprocessing","title":"A Usage Template for the R Package msDiaLogue","text":"function preprocessing() takes .csv file summarized protein abundances, exported Spectronaut. important columns need included file : R.Condition, R.Replicate, PG.ProteinAccessions, PG.ProteinNames, PG.NrOfStrippedSequencesIdentified, PG.Quantity. function reformat data provide functionality initial filtering (based number unique peptides). steps describe functions happen Preprocessing code. 1. Loads raw data raw data .csv file Toy_Spectronaut_Data.csv, specify fileName read raw data file R. raw data stored .RData file Toy_Spectronaut_Data.RData, first load data file directly, specify dataSet function. 2. Filters identified proteins exhibit “NaN” quantitative values NaN, stands ‘Number,’ can found PG.Quantity column proteins identified MS MS/MS evidence raw data, peptides protein lack associated integrated peak area intensity. usually occurs low abundance peptides exhibit intensities close limit detection resulting poor signal--noise (S/N) /interference co-eluting peptide ions similar identical m/z values lead difficulty parsing individual intensity profiles. 3. Applies unique peptides per protein filter General practice proteomics field filter proteins 1 unique peptide identified. adds increased confidence results already filtered 1% false discovery rate (FDR), since proteins identified 2 peptides less likely false positives. recommend filtering protein entries order focus confident targets identified proteome. However, 1-peptide proteins can still observed original protein report Spectronaut. 4. Adds accession numbers identified proteins without informative names Spectronaut reports contain 4 different columns identifying information: PG.Genes, gene name (e.g. CDK1). PG.ProteinAccessions, UniProt identifier number unique entry online database (e.g. P06493). PG.ProteinDescriptions, protein name provided UniProt (e.g. cyclin-dependent kinase 1). PG.ProteinNames, concatenation identifier species (e.g. CDK1_HUMAN). Every entry UniProt accession number, may identifiers, due incomplete annotation. Uniprot includes entries fragments proteins proteins entries redundant, peptide can match multiple entries protein, generates multiple possible identifiers Spectronaut. , ProteinNames entry Spectronaut can switch formats: preference accession number species, can also gene name species instead. option tells msDiaLogue substitute accession number identifier tries pull identifier column information. 5. Saves document working directory filtered data, desired saveRm = TRUE, data removed step 2 (preprocess_Filtered_Out_NaN.csv) step 3 (preprocess_Filtered_Out_Unique.csv) saved current working directory. part preprocessing(), histogram log2log_2-transformed protein abundances provided. helpful way confirm data read correctly, issues numerical values protein abundances. Ideally, histogram appear fairly symmetrical (bell-shaped) without much skew towards smaller larger values.","code":"## if the raw data is in a .csv file fileName <- \"../tests/testData/Toy_Spectronaut_Data.csv\" dataSet <- preprocessing(fileName,                          filterNaN = TRUE, filterUnique = 2,                          replaceBlank = TRUE, saveRm = TRUE) ## if the raw data is in an .Rdata file load(\"../tests/testData/Toy_Spectronaut_Data.RData\") dataSet <- preprocessing(dataSet = Toy_Spectronaut_Data,                          filterNaN = TRUE, filterUnique = 2,                          replaceBlank = TRUE, saveRm = TRUE) #> Warning: Removed 25 rows containing non-finite outside the scale range #> (`stat_bin()`). #> Summary of Full Data Signals (Raw): #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    8577   88074  210461  653661  535701 5044124 #> Levels of Condition: 100fmol 50fmol  #> Levels of Replicate: 1 2 3 4 5"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"transformation","dir":"Articles","previous_headings":"","what":"Transformation","title":"A Usage Template for the R Package msDiaLogue","text":"Raw mass spectrometry intensity measurements often unsuitable direct statistical modeling shape data usually symmetrical variance consistent across range intensities. proteomic workflows convert raw values log2_2 transformation, reshapes data symmetrical distribution, making easier interpret mean-based fold changes, also stabilizes variance across intensity range (.e. reduces heteroscedasticity).","code":"dataTran <- transform(dataSet, logFold = 2)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"filtering","dir":"Articles","previous_headings":"","what":"Filtering","title":"A Usage Template for the R Package msDiaLogue","text":"cases, researcher may wish filter specific protein proteins dataset. common instance proteins identified common contaminants database, identification necessary avoid incorrect matching result irrelevant experimental question included data visualization. scenarios might include mixed-species experiment researcher wants evaluate data one species time. step allows set aside specific proteins downstream analysis, using gene_species identifier format. Case 1. Remove proteins specified user step keep everything else. example, proteins named “ADH1_YEAST” containing “HUMAN” chosen filtered . removeList = TRUE indicates removal proteins union listName regexName dataTran. Please note saveRm = TRUE, excluded data (“ADH1_YEAS” + “*HUMAN”) saved .csv file named filtered_out_data.csv current working directory. Case 2. Keep proteins specified user step remove everything else. Alternatively, keep proteins like “ADH1_YEAST” “*HUMAN”, simply set removelist = FALSE. Extension Besides protein names, function filterProtein() provides similar function filter proteins additional protein information. Spectronaut: “PG.Genes”, “PG.ProteinAccessions”, “PG.ProteinDescriptions”, “PG.ProteinNames”. Scaffold: “ProteinDescriptions”, “AccessionNumber”, “AlternateID”. proteinInformation file name protein information, automatically generated preprocessing(). case, proteins whose \"PG.ProteinDescriptions\" match “Ras-related protein Rab-3D” “Alcohol dehydrogenase 1” kept. Note search value text used exact equality search.","code":"filterOutIn(dataTran, listName = \"ADH1_YEAST\", regexName = \"HUMAN\",             removeList = TRUE, saveRm = TRUE) filterOutIn(dataTran, listName = \"ADH1_YEAST\", regexName = \"HUMAN\",             removeList = FALSE) filterProtein(dataTran, proteinInformation = \"preprocess_protein_information.csv\",               text = c(\"Ras-related protein Rab-3D\", \"Alcohol dehydrogenase 1\"),               by = \"PG.ProteinDescriptions\",               removeList = FALSE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"normalization","dir":"Articles","previous_headings":"","what":"Normalization","title":"A Usage Template for the R Package msDiaLogue","text":"Normalization designed address systematic biases data. Biases can arise inadvertent sample grouping generation preparation, variations instrument performance acquisition, analysis different peptide amounts across experiments, reasons. factors can artificially mask enhance actual biological changes. Many normalization methods developed large datasets, strengths weaknesses. following factors considered choosing normalization method: Experiment-Specific Normalization: experiments run UConn PMF normalized injection amount time analysis facilitate comparison. “Amount” measured UV absorbance 280 nm, standard method generic protein quantification. Assumption Non-Changing Species: biological experiments implicitly assume majority measured species experiment change across conditions. assumption robust thousands species, compared hundreds, tens, experiments different complexities (e.g. purified protein vs. immunoprecipitation vs. full lysate), normalization applied global process, instead subsets experiments relatively similar . far, package provides three normalization methods use: “quant”: Quantile (Bolstad et al. 2003) (values run ranked, quantile bins applied entire dataset, values run adjusted closest bin value) “median”: Protein-wise Median (scalar factor applied protein entry make median sample equal every sample) “mean”: Protein-wise Mean (scalar factor applied protein entry make mean sample equal every sample) Quantile normalization generally recommended UConn SCS.  Oh! message “Warning: Removed 16 rows containing non-finite values” indicates presence 16 NA (Available) values data. NA values arise protein identified particular sample condition automatically excluded generating boxplot retained actual dataset.","code":"dataNorm <- normalize(dataTran, normalizeType = \"quant\") #> Warning: Removed 16 rows containing non-finite outside the scale range #> (`stat_boxplot()`)."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"imputation","dir":"Articles","previous_headings":"","what":"Imputation","title":"A Usage Template for the R Package msDiaLogue","text":"two primary MS/MS acquisition types implemented large scale MS-based proteomics unique advantages disadvantages. Traditional Data-Dependent Acquisition (DDA) methods favor specificity MS/MS sampling comprehensive proteome coverage. Small peptide isolation windows (<3 m/z) result MS/MS spectra contain fragmentation data ideally one peptide. specificity promotes clear peptide identifications comes expense added scan time. DDA experiments, number peptides can selected MS/MS limited instrument scan speeds therefore prioritized highest peptide abundance. Low abundance peptides sampled less frequently MS/MS can result variable peptide coverage many missing protein data across large sample datasets. Data-Independent Acquisition (DIA) methods promote comprehensive peptide coverage specificity sampling many peptides MS/MS simultaneously. Sequential large mass isolation windows (4-50 m/z) used isolate large numbers peptides concurrent MS/MS. produces complicated fragmentation spectra, spectra contain data every observable peptide. major disadvantage type acquisition DIA MS/MS spectra incredibly complex difficult deconvolve. Powerful relatively new software programs like Spectronaut capable successfully parsing fragment ions came co-fragmented peptide using custom libraries, machine learning algorithms, precisely determined retention times measured ion mobility data. observable ions sampled MS/MS, DIA reduces missingness substantially compared DDA, though entirely. Function dataMissing() designed summarize missingness protein, plot = TRUE indicates plotting missingness, show_labels = TRUE means protein names displayed printed plot. Note visual representation generated default, plot generation time varies project size.  percentage protein labels represents proportion missing data samples protein. instance, label “XPO4_HUMAN (80%)” indicates , within observations protein “XPO4_HUMAN”, 80% data missing. Additionally, percentage legend represents proportion missing data whole dataset. case, 8.4% data dataNorm missing. Regardless plot generation, function dataMissing() always returns table providing following information: count_miss: count missing values protein. pct_miss_col: percentage missing values protein. pct_miss_tot: percentage missing values protein relative total missing values entire dataset. example, case protein “XPO4_HUMAN,” 8 NA values samples, representing 80% missing data “XPO4_HUMAN” within sample 50% total missing data entire dataset. Various imputation methods developed address missing-value issue assign reasonable guess quantitative value proteins missing values. far, package provides 10 imputation methods use: impute.min_local(): Replaces missing values lowest measured value protein condition. impute.min_global(): Replaces missing values lowest measured value protein found within entire dataset. impute.knn(): Replaces missing values using k-nearest neighbors algorithm (Troyanskaya et al. 2001). impute.knn_seq(): Replaces missing values using sequential k-nearest neighbors algorithm (Kim, Kim, Yi 2004). impute.knn_trunc(): Replaces missing values using truncated k-nearest neighbors algorithm (Shah et al. 2017). impute.nuc_norm(): Replaces missing values using nuclear-norm regularization (Hastie et al. 2015). impute.mice_cart(): Replaces missing values using classification regression trees (Breiman et al. 1984; Doove, van Buuren, Dusseldorp 2014; van Buuren 2018). impute.mice_norm(): Replaces missing values using Bayesian linear regression (Rubin 1987; Schafer 1997; van Buuren Groothuis-Oudshoorn 2011). impute.pca_bayes(): Replaces missing values using Bayesian principal components analysis (Oba et al. 2003). impute.pca_prob(): Replaces missing values using probabilistic principal components analysis (Stacklies et al. 2007). Additional methods added later. example, impute NA value dataNorm using impute.min_local(), set required percentage values must present given protein condition combination values imputed 51%. reportImputing = TRUE, returned result structure altered list, adding shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. imputation, entries pass percent present threshold still NA values need filtered . saveRm = TRUE indicates filtered data saved .csv file named filtered_NA_data.csv current working directory. dataImput follows:","code":"dataMissing <- dataMissing(dataNorm, plot = TRUE, show_labels = TRUE) dataImput <- impute.min_local(dataNorm, reportImputing = FALSE,                               reqPercentPresent = 0.51) dataImput <- filterNA(dataImput, saveRm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"summarization","dir":"Articles","previous_headings":"","what":"Summarization","title":"A Usage Template for the R Package msDiaLogue","text":"summarization provides table values protein final dataset include final processed abundances fold changes condition, protein’s statistical relation global dataset terms mean, median, standard deviation, parameters. column “Stat” generated result includes following statistics: n: Number. mean: Mean. sd: Standard deviation. median: Median. trimmed: Trimmed mean trim 0.1. mad: Median absolute deviation (median). min: Minimum. max: Maximum. range: difference maximum minimum value. skew: Skewness. kurtosis: Kurtosis. se: Standard error.","code":"dataSumm <- summarize(dataImput, saveSumm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"analysis","dir":"Articles","previous_headings":"","what":"Analysis","title":"A Usage Template for the R Package msDiaLogue","text":"function analyze() calculates results can used subsequent visualizations. Note: following listed analysis compare data two conditions. order conditions affect downstream analysis, second condition serves reference comparison. two conditions exist data conditions specified, conditions automatically generated sorting unique values alphabetically ascending order. two conditions exist data, precisely two conditions comparison must specified via argument conditions.","code":"cond <- c(\"100fmol\", \"50fmol\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"students-t-test","dir":"Articles","previous_headings":"Analysis","what":"Student’s t-test","title":"A Usage Template for the R Package msDiaLogue","text":"Student’s t-test used compare means two conditions protein, reporting difference means conditions P-value test.","code":"anlys_t <- analyze(dataImput, conditions = cond, testType = \"t-test\") #> Data are essentially constant. #> Data are essentially constant."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"moderated-t-test","dir":"Articles","previous_headings":"Analysis","what":"Moderated t-test","title":"A Usage Template for the R Package msDiaLogue","text":"main distinction Student’s moderated t-tests (Smyth 2004) lies variance computed. Student’s t-test calculates variance based data available protein individually, moderated t-test utilizes information chosen proteins calculate variance.","code":"anlys_mod.t <- analyze(dataImput, conditions = cond, testType = \"mod.t-test\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"ma","dir":"Articles","previous_headings":"Analysis","what":"MA","title":"A Usage Template for the R Package msDiaLogue","text":"result testType = \"MA\" generate data plotting MA plot, represents protein-wise averages within condition.","code":"anlys_MA <- analyze(dataImput, conditions = cond, testType = \"MA\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"visualization","dir":"Articles","previous_headings":"","what":"Visualization","title":"A Usage Template for the R Package msDiaLogue","text":"section provides variety options getting global view data, making comparisons, highlighting trends. Keep mind data visualization effective illustrating point answering question data, means find point/question.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"heatmap","dir":"Articles","previous_headings":"Visualization","what":"heatmap","title":"A Usage Template for the R Package msDiaLogue","text":"package offers two options plotting heatmap. Option 1 utilizes source package pheatmap, capable plotting dendrogram simultaneously. default choice heatmaps package.  protein names excessively long, recommended set show_rownames = FALSE view full heatmap. Option 2 use source package ggplot2 generate ggplot object include dendrogram.  heatmap, similar colors within row indicate relatively consistent values, suggesting similar protein expression levels across different samples.","code":"visualize(dataImput, graphType = \"heatmap\",           pkg = \"pheatmap\",           cluster_cols = TRUE, cluster_rows = FALSE,           show_colnames = TRUE, show_rownames = TRUE) visualize(dataImput, graphType = \"heatmap\", pkg = \"ggplot2\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"ma-1","dir":"Articles","previous_headings":"Visualization","what":"MA","title":"A Usage Template for the R Package msDiaLogue","text":"MA plot, short “M vs. plot,” uses two axes: M axis (vertical): Represents logarithm (usually base 2) fold change, ratio expression levels, two conditions. calculated : M=log2(X/Y)=log2X−log2YM = log_2(X/Y) = log_2 X - log_2 Y axis (horizontal): Represents average intensity two conditions, calculated : =12log2(XY)=12[log2(X)+log2(Y)]= \\frac{1}{2}log_2(XY) = \\frac{1}{2}\\left[log_2(X)+log_2(Y)\\right] proteins expected exhibit little variation, leading majority points concentrating around line M = 0 (indicating difference group means).  M.thres = 1 means M thresholds set −1 1. scatters split three parts: significant (M > 1), significant (-1 ≤\\leq M ≤\\leq 1), significant (M < -1). transformLabel = \"Log2\" used prefix title, x-axis, y-axis labels. Additionally, warning message “Removed 16 rows containing missing values” indicates 16 proteins significance.","code":"visualize(anlys_MA, graphType = \"MA\", M.thres = 1, transformLabel = \"Log2\") #> Warning: Removed 16 rows containing missing values or values outside the scale range #> (`geom_text_repel()`)."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"normalize","dir":"Articles","previous_headings":"Visualization","what":"Normalize","title":"A Usage Template for the R Package msDiaLogue","text":"","code":"visualize(dataNorm, graphType = \"normalize\") #> Warning: Removed 16 rows containing non-finite outside the scale range #> (`stat_boxplot()`)."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca","dir":"Articles","previous_headings":"Visualization","what":"PCA","title":"A Usage Template for the R Package msDiaLogue","text":"Principal component analysis (PCA) powerful technique used data analysis simplify reduce dimensionality large datasets. transforms original variables uncorrelated components capture maximum variance. selecting subset components, PCA projects data points onto key directions, enabling visualization analysis lower-dimensional space. aids identifying patterns relationships within complex datasets. visualization graphType = \"PCA_*\", arguments center scale used center data zero mean scale unit variance, default setting TRUE. case dataImput, two proteins, namely “TEBP_HUMAN” “T126B_HUMAN,” constant values, leading error message. choose remove two proteins PCA.","code":"dataPCA <- dataImput[, !(colnames(dataImput) %in% c(\"TEBP_HUMAN\", \"T126B_HUMAN\"))]"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca_scree","dir":"Articles","previous_headings":"Visualization > PCA","what":"PCA_scree","title":"A Usage Template for the R Package msDiaLogue","text":"One way help identify many PCs retain, explore scree plot. scree plot shows eigenvalues PC, represent proportion variance explained component. choice specifies data plotted, either \"variance\" \"eigenvalue\", addlabels = TRUE adds information labels top bars/points, ncp = 10 sets number dimension displayed.","code":"visualize(dataPCA, graphType = \"PCA_scree\", center = TRUE, scale = TRUE,           addlabels = TRUE, choice = \"variance\", ncp = 10) visualize(dataPCA, graphType = \"PCA_scree\", center = TRUE, scale = TRUE,           addlabels = TRUE, choice = \"eigenvalue\", ncp = 10)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca_ind","dir":"Articles","previous_headings":"Visualization > PCA","what":"PCA_ind","title":"A Usage Template for the R Package msDiaLogue","text":"primary PCA plot individual data visually represents distribution individual observations reduced-dimensional space, typically defined PCs. x y axes PCA plot represent PCs. axis corresponds linear combination original variables. Individual data points PCA plot represent observations (e.g., samples) original dataset. Points close origin (0, 0), close “average” across protein abundances. sufficient samples present, plot also produce 95% confidence ellipse, well centroid (mean group provided), groups (condition) provided.","code":"visualize(dataPCA, graphType = \"PCA_ind\", center = TRUE, scale = TRUE,           addlabels = TRUE, addEllipses = TRUE, ellipse.level = 0.95)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca_var","dir":"Articles","previous_headings":"Visualization > PCA","what":"PCA_var","title":"A Usage Template for the R Package msDiaLogue","text":"plot useful analyses based relatively small number proteins. represents association, loading protein first two PCs. Longer arrows represents stronger associations.","code":"visualize(dataPCA, graphType = \"PCA_var\", center = TRUE, scale = TRUE,           addlabels = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"pca_biplot","dir":"Articles","previous_headings":"Visualization > PCA","what":"PCA_biplot","title":"A Usage Template for the R Package msDiaLogue","text":"PCA biplot includes individual variable plots. , large number proteins, plot can unwieldy.","code":"visualize(dataPCA, graphType = \"PCA_biplot\", center = TRUE, scale = TRUE,           addEllipses = TRUE, ellipse.level = 0.95, label = \"all\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"t-test","dir":"Articles","previous_headings":"Visualization","what":"t-test","title":"A Usage Template for the R Package msDiaLogue","text":"function visualize() can applied t-test output. generates two useful plots: histogram fold changes across analyzed proteins histogram P-values. majority proteins expected show small change conditions, fold change histogram peak around zero. P-values, P-values expected non-significant (0.05). Depending strength treatment effect, may peak p-values near 0.","code":"visualize(anlys_mod.t, graphType = \"t-test\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"upset","dir":"Articles","previous_headings":"Visualization","what":"Upset","title":"A Usage Template for the R Package msDiaLogue","text":"upset plot visual representation helps display overlap intersection sets categories dataset. particularly useful illustrating presence absence elements combinations sets.  plot reveals 18 proteins common 100fmol 50fmol, 1 protein unique 100fmol.","code":"dataSort <- sortcondition(dataSet) visualize(dataSort, graphType = \"Upset\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"venn","dir":"Articles","previous_headings":"Visualization","what":"Venn","title":"A Usage Template for the R Package msDiaLogue","text":"Venn plot another graphical representation relationships sets. circle represents set, overlapping regions show elements shared sets.  example , 100fmol 50fmol groups share 18 proteins. Notably, one protein exclusively found 100fmol group, 50fmol group lacks unique proteins.","code":"visualize(dataSort, graphType = \"Venn\",           show_percentage = TRUE,           fill_color = c(\"blue\", \"yellow\", \"green\", \"red\"),           show_universal = FALSE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"volcano","dir":"Articles","previous_headings":"Visualization","what":"Volcano","title":"A Usage Template for the R Package msDiaLogue","text":"volcano plot graphical representation commonly used proteomics genomics visualize differential expression analysis results. particularly useful identifying significant changes extensive data. displays two important pieces information differences conditions dataset: Statistical significance (vertical): Represents negative log10 P-value. Fold change (horizontal): Represents fold change.","code":"visualize(anlys_mod.t, graphType = \"volcano\",           P.thres = 0.05, logF.thres = 0.6) #> Warning: Removed 15 rows containing missing values or values outside the scale range #> (`geom_text_repel()`)."},{"path":"https://uconn-scs.github.io/msDiaLogue/articles/usage_template.html","id":"other-useful-function","dir":"Articles","previous_headings":"","what":"Other useful function","title":"A Usage Template for the R Package msDiaLogue","text":"function pullProteinPath() allows see values associated specific protein step processing. can useful questions , “values favorite protein actually measured, imputed?” “didn’t favorite protein make final list? filtered ?”. can also used check whether given protein’s fold-change might processing artifact.","code":"ZC11B <- pullProteinPath(proteinName = \"ZC11B_HUMAN\",                          dataSetList = list(Initial = dataSet,                                             Transformed = dataTran,                                             Normalized = dataNorm,                                             Imputed = dataImput))"},{"path":[]},{"path":"https://uconn-scs.github.io/msDiaLogue/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Shiying Xiao. Author, maintainer. Timothy Moore. Author. Charles Watt. Author, contributor.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Xiao S, Moore T, Watt C (2024). msDiaLogue: Analysis + Visuals Data Indep. Aquisition Mass Spectrometry Data. R package version 0.0.5, https://github.com/uconn-scs/msDiaLogue.","code":"@Manual{,   title = {msDiaLogue: Analysis + Visuals for Data Indep. Aquisition Mass Spectrometry Data},   author = {Shiying Xiao and Timothy Moore and Charles Watt},   year = {2024},   note = {R package version 0.0.5},   url = {https://github.com/uconn-scs/msDiaLogue}, }"},{"path":[]},{"path":"https://uconn-scs.github.io/msDiaLogue/index.html","id":"code-building-for-workflow-package","dir":"","previous_headings":"","what":"Code building for workflow package","title":"msDiaLogue","text":"repository used build customized UConn PMF-SCS workflow Data-Independent Acquisition (DIA) proteomics data. code expanded include options users, detailed explanations steps analysis process.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"msDiaLogue","text":"can install development version msDiaLogue GitHub :","code":"# install.packages(\"devtools\") devtools::install_github(\"uconn-scs/msDiaLogue\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/index.html","id":"main-areas-include","dir":"","previous_headings":"","what":"Main areas include","title":"msDiaLogue","text":"Data preprocessing: getting data Spectronaut Data transformation: options transforming abundance data Data filtering: providing options filter data based preset levels Data normalization: normalization procedures Data imputation: missing data procedures Data summary: calculating presenting numerical summaries tabular form Differential Abundance Analysis: statistical tools DIA data analysis Data visualization: providing clean visuals aid data analysis decisions","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/index.html","id":"general-problems-during-installation","dir":"","previous_headings":"","what":"General problems during installation","title":"msDiaLogue","text":"WARNING: Rtools required build R packages currently installed. Windows, RTools required build package. sh: /opt/gfortran/bin/gfortran: file directory GFortran compiler required build package.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":null,"dir":"Reference","previous_headings":"","what":"Analyzing summarized data — analyze","title":"Analyzing summarized data — analyze","text":"Apply statistical test data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Analyzing summarized data — analyze","text":"","code":"analyze(dataSet, conditions, testType = \"t-test\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Analyzing summarized data — analyze","text":"dataSet 2d data set data. conditions string specifying two conditions compare. order important, second condition serves reference comparison. two conditions dataSet argument specified, conditions automatically selected sorting unique values alphabetically ascending order. testType string (default = \"t-test\") specifying statistical test use: \"t-test\": Unequal variance t-test. \"mod.t-test\": Moderated t-test (Smyth 2004) . \"MA\": Output plot MA plot.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Analyzing summarized data — analyze","text":"2d dataframe includes following information: \"t-test\" \"mod.t-test\": differences means P-values protein two conditions. \"MA\": Protein-wise averages within condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Analyzing summarized data — analyze","text":"second condition serves reference comparison. \"t-test\" \"mod.t-test\": differences calculated subtracting mean second condition mean first condition (Condition 1 - Condition 2). \"MA\": rows ordered conditions. Specifically, first row corresponds protein-wise average first condition, second row corresponds second condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/analyze.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Analyzing summarized data — analyze","text":"Smyth GK (2004). “Linear Models Empirical Bayes Methods Assessing Differential Expression Microarray Experiments.” Statistical Applications Genetics Molecular Biology, 3(1). doi:10.2202/1544-6115.1027 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/dataMissing.html","id":null,"dir":"Reference","previous_headings":"","what":"Counting missing data — dataMissing","title":"Counting missing data — dataMissing","text":"Calculate plot missingness.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/dataMissing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Counting missing data — dataMissing","text":"","code":"dataMissing(   dataSet,   sort_miss = FALSE,   plot = FALSE,   show_pct_legend = TRUE,   show_labels = TRUE,   show_pct_col = TRUE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/dataMissing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Counting missing data — dataMissing","text":"dataSet 2d data set experimental values. sort_miss boolean (default = FALSE) specifying whether arrange columns order missingness. plot boolean (default = FALSE) specifying whether plot missingness. show_pct_legend boolean (default = TRUE) specifying whether percentages missing present values entire dataset shown legend visualization plot = TRUE. show_labels boolean (default = TRUE) specifying whether protein names shown visualization plot = TRUE. show_pct_col boolean (default = TRUE) specifying whether percentages missing data samples protein shown labels visualization show_labels = TRUE.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/dataMissing.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Counting missing data — dataMissing","text":"2d dataframe including: \"count_miss\": count missing values protein. \"pct_miss_col\": percentage missing values protein. \"pct_miss_tot\": percentage missing values protein relative total missing values entire dataset.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering NA's post-imputation — filterNA","title":"Filtering NA's post-imputation — filterNA","text":"Remove proteins NA values.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering NA's post-imputation — filterNA","text":"","code":"filterNA(dataSet, saveRm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering NA's post-imputation — filterNA","text":"dataSet 2d data set experimental values. saveRm boolean (default = TRUE) specifying whether save removed data current working directory.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filtering NA's post-imputation — filterNA","text":"filtered 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterNA.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering NA's post-imputation — filterNA","text":"proteins meet imputation requirement removed, .csv file created removed data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering proteins or contaminants — filterOutIn","title":"Filtering proteins or contaminants — filterOutIn","text":"Apply series filtering steps data set.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering proteins or contaminants — filterOutIn","text":"","code":"filterOutIn(   dataSet,   listName = c(),   regexName = c(),   removeList = TRUE,   saveRm = TRUE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering proteins or contaminants — filterOutIn","text":"dataSet 2d data set experimental values. listName character vector proteins select remove. regexName character vector specifying proteins regular expression pattern matching select remove. removeList boolean (default = TRUE) specifying whether list proteins removed selected. TRUE: Remove list proteins data set. FALSE: Remove proteins list data set. saveRm boolean (default = TRUE) specifying whether save removed data current working directory. option works removeList = TRUE.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filtering proteins or contaminants — filterOutIn","text":"filtered 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterOutIn.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering proteins or contaminants — filterOutIn","text":"listName regexName provided, protein names selected removed union specified listName matching regex pattern regexName.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":null,"dir":"Reference","previous_headings":"","what":"Filter proteins by gene, accession or description — filterProtein","title":"Filter proteins by gene, accession or description — filterProtein","text":"Filter preprocessed dataset gene, accession, description.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filter proteins by gene, accession or description — filterProtein","text":"","code":"filterProtein(   dataSet,   proteinInformation = \"preprocess_protein_information.csv\",   text = c(),   by = c(\"PG.Genes\", \"PG.ProteinAccessions\", \"PG.ProteinDescriptions\", \"PG.ProteinNames\",     \"ProteinDescriptions\", \"AccessionNumber\", \"AlternateID\"),   removeList = TRUE,   saveRm = TRUE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filter proteins by gene, accession or description — filterProtein","text":"dataSet 2d data set experimental values. proteinInformation name .csv file containing protein information data (including path file, needed). file include following columns: Spectronaut: \"PG.Genes\", \"PG.ProteinAccessions\", \"PG.ProteinDescriptions\", \"PG.ProteinNames\". Scaffold: \"ProteinDescriptions\", \"AccessionNumber\", \"AlternateID\". file automatically generated function preprocessing preprocessing_scaffold. text character vector text used key selecting removing. character string specifying information text filter applied, allowable options: Spectronaut: \"PG.Genes\", \"PG.ProteinAccessions\", \"PG.ProteinDescriptions\", \"PG.ProteinNames\". Scaffold: \"ProteinDescriptions\", \"AccessionNumber\", \"AlternateID\". removeList boolean (default = TRUE) specifying whether list proteins removed selected. TRUE: Remove list proteins data set. FALSE: Remove proteins list data set. saveRm boolean (default = TRUE) specifying whether save removed data current working directory. option works removeList = TRUE.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filter proteins by gene, accession or description — filterProtein","text":"filtered 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/filterProtein.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filter proteins by gene, accession or description — filterProtein","text":"function extension function preprocessing preprocessing_scaffold allows filtering proteins based additional information.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation of raw data signals — impute","title":"Imputation of raw data signals — impute","text":"Apply imputation method data set.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation of raw data signals — impute","text":"","code":"impute(   dataSet,   imputeType = \"LocalMinVal\",   reqPercentPresent = 0.51,   k = 10,   rowmax = 0.5,   colmax = 0.8,   maxp = 1500,   rng.seed = 362436069,   rank.max = NULL,   lambda = NULL,   thresh = 1e-05,   maxit = 100,   final.svd = TRUE,   reportImputing = FALSE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation of raw data signals — impute","text":"dataSet 2d data set experimental values. imputeType character string (default = \"LocalMinVal\") specifying imputation method use: \"LocalMinVal\": replace missing values lowest value protein condition combination. \"GlobalMinVal\": replace missing values lowest value found within entire data set. \"knn\": replace missing values using k-nearest neighbors algorithm (Troyanskaya et al. 2001) . \"seq-knn\": replace missing values using sequential k-nearest neighbors algorithm (Kim et al. 2004) . \"trunc-knn\": replace missing values using truncated k-nearest neighbors algorithm (Shah et al. 2017) . \"nuc-norm\": replace missing values using nuclear-norm regularization (Hastie et al. 2015) . reqPercentPresent scalar (default = 0.51) specifying required percent values must present given protein condition combination values imputed imputeType = \"LocalMinVal\". k integer (default = 10) indicating number neighbors used imputation imputeType \"knn\", \"seq-knn\", \"trunc-knn\". rowmax scalar (default = 0.5) specifying maximum percent missing data allowed row imputeType = \"knn\". rows rowmax*100% missing imputed using overall mean per sample. colmax scalar (default = 0.8) specifying maximum percent missing data allowed column imputeType = \"knn\". column colmax*100% missing data, program halts reports error. maxp integer (default = 1500) indicating largest block proteins imputed using k-nearest neighbors algorithm imputeType = \"knn\". Larger blocks divided two-means clustering (recursively) prior imputation. rng.seed integer (default = 362436069) specifying seed used random number generator reproducibility imputeType = \"knn\". rank.max integer specifying restriction rank solution imputeType = \"nuc-norm\". default set one less minimum dimension dataset. lambda scalar specifying nuclear-norm regularization parameter imputeType = \"nuc-norm\". lambda = 0, algorithm convergence typically slower. default set maximum singular value obtained singular value decomposition (SVD) dataset. thresh scalar (default = 1e-5) specifying convergence threshold imputeType = \"nuc-norm\", measured relative change Frobenius norm two successive estimates. maxit integer (default = 100) specifying maximum number iterations convergence reached imputeType = \"nuc-norm\". final.svd boolean (default = TRUE) specifying whether perform one-step unregularized iteration final iteration imputeType = \"nuc-norm\", followed soft-thresholding singular values, resulting hard zeros. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation of raw data signals — impute","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation of raw data signals — impute","text":"Hastie T, Mazumder R, Lee JD, Zadeh R (2015). “Matrix Completion Low-Rank SVD via Fast Alternating Least Squares.” Journal Machine Learning Research, 16(104), 3367—3402. http://jmlr.org/papers/v16/hastie15a.html. Kim K, Kim B, Yi G (2004). “Reuse Imputed Data Microarray Analysis Increases Imputation Efficiency.” BMC bioinformatics, 5, 160. doi:10.1186/1471-2105-5-160 . Shah JS, Rai SN, DeFilippis AP, Hill BG, Bhatnagar , Brock GN (2017). “Distribution Based Nearest Neighbor Imputation Truncated High Dimensional Data Applications Pre-Clinical Clinical Metabolomics Studies.” BMC bioinformatics, 18, 114. doi:10.1186/s12859-017-1547-6 . Troyanskaya O, Cantor M, Sherlock G, Brown P, Hastie T, Tibshirani R, Botstein D, Altman RB (2001). “Missing Value Estimation Methods DNA Microarrays.” Bioinformatics, 17(6), 520–525. doi:10.1093/bioinformatics/17.6.520 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the k-nearest neighbors algorithm — impute.knn","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"Apply imputation dataset k-nearest neighbors algorithm (Troyanskaya et al. 2001) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"","code":"impute.knn(   dataSet,   reportImputing = FALSE,   k = 10,   rowmax = 0.5,   colmax = 0.8,   maxp = 1500,   seed = 362436069 )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. k integer (default = 10) indicating number neighbors used imputation. rowmax scalar (default = 0.5) specifying maximum percent missing data allowed row. rows rowmax*100% missing imputed using overall mean per sample. colmax scalar (default = 0.8) specifying maximum percent missing data allowed column. column colmax*100% missing data, program halts reports error. maxp integer (default = 1500) indicating largest block proteins imputed using k-nearest neighbors algorithm. Larger blocks divided two-means clustering (recursively) prior imputation. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by the k-nearest neighbors algorithm — impute.knn","text":"Troyanskaya O, Cantor M, Sherlock G, Brown P, Hastie T, Tibshirani R, Botstein D, Altman RB (2001). “Missing Value Estimation Methods DNA Microarrays.” Bioinformatics, 17(6), 520–525. doi:10.1093/bioinformatics/17.6.520 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"Apply imputation dataset sequential k-nearest neighbors algorithm (Kim et al. 2004) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"","code":"impute.knn_seq(dataSet, reportImputing = FALSE, k = 10)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. k integer (default = 10) indicating number neighbors used imputation.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_seq.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by the k-nearest neighbors algorithm — impute.knn_seq","text":"Kim K, Kim B, Yi G (2004). “Reuse Imputed Data Microarray Analysis Increases Imputation Efficiency.” BMC bioinformatics, 5, 160. doi:10.1186/1471-2105-5-160 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"Apply imputation dataset truncated k-nearest neighbors algorithm (Shah et al. 2017) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"","code":"impute.knn_trunc(dataSet, reportImputing = FALSE, k = 10)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. k integer (default = 10) indicating number neighbors used imputation.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.knn_trunc.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by the truncated k-nearest neighbors algorithm — impute.knn_trunc","text":"Shah JS, Rai SN, DeFilippis AP, Hill BG, Bhatnagar , Brock GN (2017). “Distribution Based Nearest Neighbor Imputation Truncated High Dimensional Data Applications Pre-Clinical Clinical Metabolomics Studies.” BMC bioinformatics, 18, 114. doi:10.1186/s12859-017-1547-6 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by classification and regression trees — impute.mice_cart","title":"Imputation by classification and regression trees — impute.mice_cart","text":"Apply imputation dataset classification regression trees (Breiman et al. 1984; Doove et al. 2014; van Buuren 2018) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by classification and regression trees — impute.mice_cart","text":"","code":"impute.mice_cart(dataSet, reportImputing = FALSE, m = 5, seed = 362436069)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by classification and regression trees — impute.mice_cart","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. m integer (default = 5) specifying number multiple imputations. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by classification and regression trees — impute.mice_cart","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_cart.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by classification and regression trees — impute.mice_cart","text":"Breiman L, Friedman J, Olshen RA, Stone CJ (1984). Classification Regression Trees. Routledge, New York, NY, USA. ISBN 9780412048418. Doove LL, van Buuren S, Dusseldorp E (2014). “Recursive Partitioning Missing Data Imputation Presence Interaction Effects.” Computational Statistics & Data Analysis, 72, 92–104. doi:10.1016/j.csda.2013.10.025 . van Buuren S (2018). Flexible Imputation Missing Data. Chapman \\& Hall/CRC, New York, NY, USA. ISBN 9781032178639.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by Bayesian linear regression — impute.mice_norm","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"Apply imputation dataset Bayesian linear regression (Rubin 1987; Schafer 1997; van Buuren Groothuis-Oudshoorn 2011) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"","code":"impute.mice_norm(dataSet, reportImputing = FALSE, m = 5, seed = 362436069)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. m integer (default = 5) specifying number multiple imputations. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.mice_norm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by Bayesian linear regression — impute.mice_norm","text":"Rubin DB (1987). Multiple Imputation Nonresponse Surveys. John Wiley \\& Sons, New York, NY, USA. ISBN 9780471087052. Schafer JL (1997). Analysis Incomplete Multivariate Data. Chapman \\& Hall/CRC, New York, NY, USA. ISBN 9780412040610. van Buuren S, Groothuis-Oudshoorn K (2011). “mice: Multivariate Imputation Chained Equations R.” Journal Statistical Software, 45(3), 1–67. doi:10.18637/jss.v045.i03 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_global.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the global minimum — impute.min_global","title":"Imputation by the global minimum — impute.min_global","text":"Apply imputation dataset minimum measured value protein found within entire dataset.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_global.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the global minimum — impute.min_global","text":"","code":"impute.min_global(dataSet, reportImputing = FALSE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_global.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the global minimum — impute.min_global","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_global.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the global minimum — impute.min_global","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_local.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the local minimum — impute.min_local","title":"Imputation by the local minimum — impute.min_local","text":"Apply imputation dataset minimum measured value protein condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_local.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the local minimum — impute.min_local","text":"","code":"impute.min_local(dataSet, reportImputing = FALSE, reqPercentPresent = 0.51)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_local.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the local minimum — impute.min_local","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. reqPercentPresent scalar (default = 0.51) specifying required percent values must present given protein condition combination values imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.min_local.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the local minimum — impute.min_local","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by the nuclear-norm regularization — impute.nuc_norm","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"Apply imputation dataset nuclear-norm regularization (Hastie et al. 2015) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"","code":"impute.nuc_norm(   dataSet,   reportImputing = FALSE,   rank.max = NULL,   lambda = NULL,   thresh = 1e-05,   maxit = 100,   final.svd = TRUE,   seed = 362436069 )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. rank.max integer specifying restriction rank solution. default set one less minimum dimension dataset. lambda scalar specifying nuclear-norm regularization parameter. lambda = 0, algorithm convergence typically slower. default set maximum singular value obtained singular value decomposition (SVD) dataset. thresh scalar (default = 1e-5) specifying convergence threshold, measured relative change Frobenius norm two successive estimates. maxit integer (default = 100) specifying maximum number iterations convergence reached. final.svd boolean (default = TRUE) specifying whether perform one-step unregularized iteration final iteration, followed soft-thresholding singular values, resulting hard zeros. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.nuc_norm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by the nuclear-norm regularization — impute.nuc_norm","text":"Hastie T, Mazumder R, Lee JD, Zadeh R (2015). “Matrix Completion Low-Rank SVD via Fast Alternating Least Squares.” Journal Machine Learning Research, 16(104), 3367—3402. http://jmlr.org/papers/v16/hastie15a.html.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by Bayesian principal components analysis — impute.pca_bayes","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"Apply imputation dataset Bayesian principal components analysis (Oba et al. 2003) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"","code":"impute.pca_bayes(dataSet, reportImputing = FALSE, nPcs = NULL, maxSteps = 100)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. nPcs integer specifying number principal components calculate. default set minimum number samples number proteins. maxSteps integer (default = 100) specifying maximum number estimation steps.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_bayes.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by Bayesian principal components analysis — impute.pca_bayes","text":"Oba S, Sato M, Takemasa , Monden M, Matsubara K, Ishii S (2003). “Bayesian Missing Value Estimation Method Gene Expression Profile Data.” Bioinformatics, 19(16), 2088–2096. doi:10.1093/bioinformatics/btg287 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":null,"dir":"Reference","previous_headings":"","what":"Imputation by probabilistic principal components analysis — impute.pca_prob","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"Apply imputation dataset probabilistic principal components analysis (Stacklies et al. 2007) .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"","code":"impute.pca_prob(   dataSet,   reportImputing = FALSE,   nPcs = NULL,   maxIterations = 1000,   seed = 362436069 )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"dataSet 2d dataset experimental values. reportImputing boolean (default = FALSE) specifying whether provide shadow data frame imputed data labels, 1 indicates corresponding entries imputed, 0 indicates otherwise. Alters return structure. nPcs integer specifying number principal components calculate. default set minimum number samples number proteins. maxIterations integer (default = 1000) specifying maximum number allowed iterations. seed integer (default = 362436069) specifying seed used random number generator reproducibility.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"reportImputing = FALSE, function returns imputed 2d dataframe. reportImputing = TRUE, function returns list imputed 2d dataframe shadow matrix showing proteins replicate imputed.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/impute.pca_prob.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Imputation by probabilistic principal components analysis — impute.pca_prob","text":"Stacklies W, Redestig H, Scholz M, Walther D, Selbig J (2007). “pcaMethods–Bioconductor Package Providing PCA Methods Incomplete Data.” Bioinformatics, 23(9), 1164–1167. doi:10.1093/bioinformatics/btm069 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":null,"dir":"Reference","previous_headings":"","what":"Normalization of preprocessed data — normalize","title":"Normalization of preprocessed data — normalize","text":"Apply specified type normalization data set.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Normalization of preprocessed data — normalize","text":"","code":"normalize(dataSet, normalizeType = \"quant\")"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Normalization of preprocessed data — normalize","text":"dataSet 2d data set experimental values. normalizeType string (default = \"quant\") specifying type normalization apply: \"quant\": Quantile (Bolstad et al. 2003) \"median\": Protein-wise Median \"mean\": Protein-wise Mean \"none\": None","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Normalization of preprocessed data — normalize","text":"normalized 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Normalization of preprocessed data — normalize","text":"Quantile normalization generally recommended. Mean median normalization going included popular previous methods. normalization recommended. Boxplots also generated normalization give visual indicator changes.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/normalize.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Normalization of preprocessed data — normalize","text":"Bolstad BM, Irizarry RA, Astrand M, Speed TP (2003). “Comparison Normalization Methods High Density Oligonucleotide Array Data Based Variance Bias.” Bioinformatics, 19(2), 185–193. doi:10.1093/bioinformatics/19.2.185 .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering of raw data signals — preProcessFiltering","title":"Filtering of raw data signals — preProcessFiltering","text":"Apply series filtering steps data set.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering of raw data signals — preProcessFiltering","text":"","code":"preProcessFiltering(   dataSet,   filterNaN = TRUE,   filterUnique = 2,   replaceBlank = TRUE,   saveRm = TRUE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering of raw data signals — preProcessFiltering","text":"dataSet 2d data set experimental values. filterNaN boolean (default = TRUE) specifying whether proteins NaN removed data set. filterUnique integer (default = 2) specifying whether proteins less default number unique peptides removed data set. replaceBlank boolean (default = TRUE) specifying whether proteins without names named accession numbers. saveRm boolean (default = TRUE) specifying whether save removed data current working directory.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Filtering of raw data signals — preProcessFiltering","text":"filtered 2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preProcessFiltering.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering of raw data signals — preProcessFiltering","text":"forms filtering recommended use cases.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"Read data file Spectronaut, apply filtering conditions, select columns necessary analysis, return reformatted data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"","code":"preprocessing(   fileName,   dataSet = NULL,   filterNaN = TRUE,   filterUnique = 2,   replaceBlank = TRUE,   saveRm = TRUE )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"fileName name .csv file containing MS data (including path file, needed). dataSet raw data set, already loaded R. filterNaN boolean (default = TRUE) specifying whether observations including NaN omitted. filterUnique integer (default = 2) specifying many number unique peptides required include protein. replaceBlank boolean (default = TRUE) specifying whether proteins without names named accession numbers. saveRm boolean (default = TRUE) specifying whether save removed data current working directory.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading, filtering and reformatting of MS DIA data from Spectronaut — preprocessing","text":"function executes following: Reads file. Applies applicable filters, necessary. Provides summary statistics histogram values reported data set. Selects columns contain necessary information analysis. Re-formats data present individual proteins columns group replicates protein. Stores data data.frame prints levels condition replicate user.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"Read data file Scaffold, select columns necessary analysis, return reformatted data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"","code":"preprocessing_scaffold(fileName, dataSet = NULL)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"fileName name .xls file containing MS data (including path file, needed). dataSet raw data set, already loaded R.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"2d dataframe.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/preprocessing_scaffold.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading and reformatting of MS data from Scaffold — preprocessing_scaffold","text":"function executes following: Reads file. Provides summary statistics histogram values reported data set. Selects columns contain necessary information analysis. Re-formats data present individual proteins columns group replicates protein. Stores data data.frame prints levels condition replicate user.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":null,"dir":"Reference","previous_headings":"","what":"Compiling data on a single protein from each step in the process — pullProteinPath","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"Summarize steps performed data one protein.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"","code":"pullProteinPath(proteinName, dataSetList)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"proteinName string identifying protein interest. dataSetList list data frames, order dictates order presentation.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"2d dataframe, protein data step present dataSetList.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/pullProteinPath.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Compiling data on a single protein from each step in the process — pullProteinPath","text":"proteinName must match labels data sets exactly.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/sortcondition.html","id":null,"dir":"Reference","previous_headings":"","what":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","title":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","text":"Create keyed dictionary, every unique experimental condition label list every protein value condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/sortcondition.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","text":"","code":"sortcondition(dataSet)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/sortcondition.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","text":"dataSet 2d data set experimental values.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/sortcondition.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Creating a keyed list of conditions to the list of proteins that are present — sortcondition","text":"list lists.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":null,"dir":"Reference","previous_headings":"","what":"Summarize protein intensities across conditions — summarize","title":"Summarize protein intensities across conditions — summarize","text":"Calculate mean, standard deviation, replicate count protein across every condition.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Summarize protein intensities across conditions — summarize","text":"","code":"summarize(dataSet, saveSumm = TRUE)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Summarize protein intensities across conditions — summarize","text":"dataSet data frame containing data signals labels. saveSumm boolean (default = TRUE) specifying whether save summary statistics current working directory.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Summarize protein intensities across conditions — summarize","text":"2d summarized data frame.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/summarize.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Summarize protein intensities across conditions — summarize","text":"column 'Stat' generated data.frame includes following statistics: n: Number. mean: Mean. sd: Standard deviation. median: Median. trimmed: Trimmed mean trim 0.1. mad: Median absolute deviation (median). min: Minimum. max: Maximum. range: difference maximum minimum value. skew: Skewness. kurtosis: Kurtosis. se: Standard error.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":null,"dir":"Reference","previous_headings":"","what":"Log-based transformation — transform","title":"Log-based transformation — transform","text":"Apply logarithmic transformation data stabilize variance.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Log-based transformation — transform","text":"","code":"transform(dataSet, logFold = 2)"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Log-based transformation — transform","text":"dataSet data frame containing data signals. logFold integer specifying base log transformation.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Log-based transformation — transform","text":"transformed data.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/transform.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Log-based transformation — transform","text":"function executes following: Plots mean-variance relationship using meanVariancePlot(). Log-transforms data, using specified base. Plots mean-variance relationship comparison.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":null,"dir":"Reference","previous_headings":"","what":"Trimming down a protein FASTA file to certain proteins — trimFASTA","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"Trim FASTA file contain proteins present associated Spectronaut report file.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"","code":"trimFASTA(   FASTAFileName,   reportFileName,   outputFileName = \"trimFASTA_output.txt\",   by = \"PG.ProteinNames\",   selectString = \"*BOVIN\" )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"FASTAFileName character string specifying name input FASTA .txt file. reportFileName character string specifying name Spectronaut report .csv file. outputFileName character string (default = \"trimFASTA_output.txt\") specifying name output file. character string (default = \"PG.ProteinNames\") specifying identifier (column name) used selection report file. selectString character string specifying regular expression search .","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"FASTA file specified proteins present.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA","text":"Depending size FASTA file, function may run slowly take several minutes. FASTA file must .txt format; formats work.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":null,"dir":"Reference","previous_headings":"","what":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"Trim FASTA file contain proteins present associated Spectronaut report file.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"","code":"trimFASTA2.0(   FASTAFileName,   reportFileName,   outputFileName,   selectString = \"*BOVIN\" )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"FASTAFileName string indicating FASTA .txt filename. reportFileName string indicating Spectronaut report .csv filename. outputFileName string indicating name new .txt FASTA file. selectString string containing regular expression search.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"FASTA file specified proteins present.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/trimFASTA2.0.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Trimming down a protein FASTA file to certain proteins — trimFASTA2.0","text":"Depending size FASTA file, function may run slowly take several minutes. FASTA file must .txt format; formats work.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":null,"dir":"Reference","previous_headings":"","what":"Generating visualizations for MS Data — visualize","title":"Generating visualizations for MS Data — visualize","text":"Create specific graphics illustrate results data analysis function.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generating visualizations for MS Data — visualize","text":"","code":"visualize(   dataSet,   graphType = \"volcano\",   pkg = \"pheatmap\",   cluster_cols = TRUE,   cluster_rows = FALSE,   show_colnames = TRUE,   show_rownames = TRUE,   M.thres = 1,   transformLabel = \"Log2\",   center = TRUE,   scale = TRUE,   addlabels = TRUE,   choice = \"variance\",   ncp = 10,   addEllipses = TRUE,   ellipse.level = 0.95,   label = \"all\",   show_percentage = TRUE,   fill_color = c(\"blue\", \"yellow\", \"green\", \"red\"),   show_universal = FALSE,   P.thres = 0.05,   logF.thres = 0.6 )"},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Generating visualizations for MS Data — visualize","text":"dataSet 2D data frame corresponds output function analyze() name. graphType string indicating graph type. Current options : \"heatmap\" \"MA\" \"normalize\" \"PCA_biplot\" \"PCA_ind\" \"PCA_scree\" \"PCA_var\" \"t-test\" \"Upset\" \"Venn\" \"volcano\" pkg string specifying source package used plot heatmap. Two options: \"pheatmap\" \"ggplot2\". argument works graphType = \"heatmap\". cluster_cols boolean (default = TRUE) determining rows clustered hclust object. argument works graphType = \"heatmap\" pkg = \"pheatmap\". cluster_rows boolean (default = FALSE) determining columns clustered hclust object. argument works graphType = \"heatmap\" pkg = \"pheatmap\". show_colnames boolean (default = TRUE) specifying column names shown. argument works graphType = \"heatmap\" pkg = \"pheatmap\". show_rownames boolean (default = TRUE) specifying row names shown. argument works graphType = \"heatmap\" pkg = \"pheatmap\". M.thres absolute threshold value M (log fold-change) (default = 1) used plot two vertical lines (-M.thres M.thres) MA plot graphType = \"MA\". transformLabel string (default = \"Log2\") used label title axes transformation type transformed MA plot graphType = \"MA\". center boolean (default = TRUE) indicating whether variables shifted zero centered graphType = \"PCA_scree\", graphType = \"PCA_ind\", graphType = \"PCA_var\", graphType = \"PCA_biplot\". scale boolean (default = TRUE) indicating whether variables scaled unit variance analysis takes place graphType = \"PCA_scree\", graphType = \"PCA_ind\", graphType = \"PCA_var\", graphType = \"PCA_biplot\". addlabels boolean (default = TRUE) specifying whether elements labeled. graphType = \"PCA_scree\", specifies whether labels added top bars points show information retained dimension. graphType = \"PCA_ind\", specifies whether active individuals labeled. graphType = \"PCA_var\", specifies whether active variables labeled. choice text (default = \"variance\") specifying PCA data plotted scree plot graphType = \"PCA_scree\". Allowed values \"variance\" \"eigenvalue\". ncp numeric value (default = 10) specifying number dimensions shown graphType = \"PCA_scree\". addEllipses boolean (default = TRUE) specifying whether draw ellipses around individuals graphType = \"PCA_ind\" graphType = \"PCA_biplot\". ellipse.level numeric value (default = 0.95) specifying size concentration ellipse normal probability graphType = \"PCA_ind\" graphType = \"PCA_biplot\". label text (default = \"\") specifying elements labelled graphType = \"PCA_biplot\". Allowed values: \"\": Label active individuals active variables. \"ind\": Label active individuals. \"var\": Label active variables. \"none\": labels. show_percentage boolean (default = TRUE) specifying whether show percentage set graphType = \"Venn\". fill_color text (default = c(\"blue\", \"yellow\", \"green\", \"red\")) specifying colors fill circles graphType = \"Venn\". show_universal boolean (default = FALSE) specifying whether return data.frame logical columns representing sets graphType = \"Venn\". P.thres threshold value P-value (default = 0.05) used plot horizontal line (-log10(P.thres)) volcano plot graphType = \"volcano\". logF.thres absolute threshold value log2(fold change) (default = 0.6) used plot two vertical lines (-logF.thres logF.thres) volcano plot graphType = \"volcano\".","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generating visualizations for MS Data — visualize","text":"object class ggplot.","code":""},{"path":"https://uconn-scs.github.io/msDiaLogue/reference/visualize.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Generating visualizations for MS Data — visualize","text":"function visualize() designed work directly output function analyze(). Please sure arguments graphType testType match.","code":""}]
diff --git a/man/dataMissing.Rd b/man/dataMissing.Rd
index bee35e8..fd48941 100644
--- a/man/dataMissing.Rd
+++ b/man/dataMissing.Rd
@@ -4,22 +4,40 @@
 \alias{dataMissing}
 \title{Counting missing data}
 \usage{
-dataMissing(dataSet, plot = FALSE, show_labels = TRUE)
+dataMissing(
+  dataSet,
+  sort_miss = FALSE,
+  plot = FALSE,
+  show_pct_legend = TRUE,
+  show_labels = TRUE,
+  show_pct_col = TRUE
+)
 }
 \arguments{
 \item{dataSet}{The 2d data set of experimental values.}
 
+\item{sort_miss}{A boolean (default = FALSE) specifying whether to arrange the columns
+in order of missingness.}
+
 \item{plot}{A boolean (default = FALSE) specifying whether to plot the missingness.}
 
+\item{show_pct_legend}{A boolean (default = TRUE) specifying whether the percentages of
+missing and present values in the entire dataset are shown in the legend of the
+visualization when \code{plot = TRUE}.}
+
 \item{show_labels}{A boolean (default = TRUE) specifying whether protein names are
 shown in the visualization when \code{plot = TRUE}.}
+
+\item{show_pct_col}{A boolean (default = TRUE) specifying whether the percentages of
+missing data in the samples for that protein are shown in the labels of the
+visualization when \code{show_labels = TRUE}.}
 }
 \value{
 A 2d dataframe including:
 \itemize{
 \item "count_miss": The count of missing values for each protein.
-\item "pct-miss": The percentage of missing values for each protein.
-\item "pct_total_miss": The percentage of missing values for each protein relative to
+\item "pct_miss_col": The percentage of missing values for each protein.
+\item "pct_miss_tot": The percentage of missing values for each protein relative to
 the total missing values in the entire dataset.
 }
 }
diff --git a/tests/storedData/dataMissing_Toy.csv b/tests/storedData/dataMissing_Toy.csv
index ceb0c87..0037a70 100644
--- a/tests/storedData/dataMissing_Toy.csv
+++ b/tests/storedData/dataMissing_Toy.csv
@@ -1,4 +1,4 @@
 "","RAB3D_HUMAN","ADH1_YEAST","LYSC_CHICK","BGAL_HUMAN","SYTC_HUMAN","CYC_BOVIN","PA1B2_HUMAN","TEBP_HUMAN","UAP1_HUMAN","B3GLT_HUMAN","NFXL1_HUMAN","VPS36_HUMAN","T126B_HUMAN","ORC3_HUMAN","BAG5_HUMAN","ANGL3_HUMAN","ZC11B_HUMAN","MAP11_HUMAN"
 "count_miss",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,4,3
-"pct_miss",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,40,30
-"pct_total_miss",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12.5,50,37.5
+"pct_miss_col",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,40,30
+"pct_miss_tot",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12.5,50,37.5
diff --git a/vignettes/usage_template.Rmd b/vignettes/usage_template.Rmd
index 5a08942..6e3bc30 100644
--- a/vignettes/usage_template.Rmd
+++ b/vignettes/usage_template.Rmd
@@ -349,9 +349,9 @@ providing the following information:
 
 + `count_miss`: The count of missing values for each protein.
 
-+ `pct-miss`: The percentage of missing values for each protein.
++ `pct_miss_col`: The percentage of missing values for each protein.
 
-+ `pct_total_miss`: The percentage of missing values for each protein relative to the
++ `pct_miss_tot`: The percentage of missing values for each protein relative to the
 total missing values in the entire dataset.
 
 <div style="overflow-x: auto;">