Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V1.8 pre #25

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions bin/gather_minimal_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def gather_pool(expid, args, df_raw, df_cellbender, adqc, oufh = sys.stdout,lane
#Cell-type assignments
#############

azt = pd.read_csv(f'{args.results_dir}/celltype/All_Celltype_Assignments.tsv',sep='\t',index_col=0)
azt = pd.read_csv(f'{args.results_dir}/celltype_assignemt/All_Celltype_Assignments.tsv',sep='\t',index_col=0)
azt_cols_to_add = azt.columns[azt.columns.str.contains('Azimuth')]
ct_cols_to_add = azt.columns[azt.columns.str.contains('Celltypist')]
sc_cols_to_add = azt.columns[azt.columns.str.contains('scpred_prediction')]
Expand All @@ -532,7 +532,7 @@ def gather_pool(expid, args, df_raw, df_cellbender, adqc, oufh = sys.stdout,lane
##########################
# Scrublet
#########################
doublet_data = glob.glob(f'{args.results_dir}/doublets/*.tsv')
doublet_data = glob.glob(f'{args.results_dir}/doublet_detection/doublet_results_combined/*.tsv')
doublet_data_combined = pd.DataFrame()
for f1 in doublet_data:
print(f1)
Expand Down Expand Up @@ -1009,7 +1009,7 @@ def set_argument_parser():

# Load the final QCd dataset
try:
adqc = anndata.read_h5ad(f'{args.results_dir}/merged_h5ad/outlier_filtered_adata.h5ad')
adqc = anndata.read_h5ad(f'{args.results_dir}/handover/merged_h5ad/4.outlier_filtered_adata.h5ad')
except:
try:
adqc = anndata.read_h5ad(f'{args.results_dir}/adata.h5ad')
Expand Down
2 changes: 1 addition & 1 deletion bin/merge_outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def main():
adata.obs = pd.concat([adata.obs,combo_files],axis=1)

adata.write(
'outlier_filtered_adata.h5ad',
'4.outlier_filtered_adata.h5ad',
compression='gzip',
compression_opts=options.anndata_compression_opts
)
Expand Down
118 changes: 81 additions & 37 deletions bin/transfer_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
import pandas as pd


def choose_folder (folder1, folder2):
if not os.path.isdir(folder1):
folder1=folder2
return folder1

def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res=None,web_transfer=False,project_name='all'):

Expand All @@ -26,7 +30,7 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res
try:
os.mkdir(f'{name_dir}/Cellbender')
except:
print('dire exists')
print('dir exists')

if (cellbender)=='cellranger':
# here we do not use cellbender and go with default cellranger
Expand Down Expand Up @@ -58,7 +62,7 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res
if os.path.isdir(dir1):
print("yes22!!")
try:
copyfile(f'{dir1_b}/plots/cellbender_results-cellbender_FPR_{cb_res}_filtered-ambient_signature-scatter_genenames.png', f'{name_dir}/Cellbender/{folder}_ambient_signature-scatter_genenames.png')
copyfile(f'{dir1}/plots/cellbender_results-cellbender_FPR_{cb_res}_filtered-ambient_signature-scatter_genenames.png', f'{name_dir}/Cellbender/{folder}_ambient_signature-scatter_genenames.png')
except:
print('missing1')
try:
Expand Down Expand Up @@ -191,20 +195,24 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res

try:
#NOW COPY THE GT OUTPUTS
folder1 = f'{directory}/deconvolution/deconvolution_results/vireo_gt_fix'
folder1 = f'{directory}/deconvolution/vireo_gt_fix'
folder2 = f'{directory}/deconvolution/deconvolution_results/vireo_gt_fix'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
copyfile(f'{folder1}/assignments_all_pools.tsv', f'{name_dir}/GT Match___1000/assignments_all_pools.tsv')
except:
print('exists')

#if (pipeline=='Deconvolution'):
folder1 = f'{directory}/deconvolution/deconvolution_results/split_donor_h5ad'
folder1 = f'{directory}/deconvolution/split_donor_h5ad'
folder2 = f'{directory}/deconvolution/deconvolution_results/split_donor_h5ad'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
print('prepearing Deconvolution folder')
try:
os.mkdir(f'{name_dir}/Deconvolution')
except:
print('dire exists')
print('dir exists')
Folders = listdir(folder1)
for folder in Folders:
copyfile(f'{folder1}/{folder}/Vireo_plots.pdf', f'{name_dir}/Deconvolution/Vireo_plots_{folder}.pdf')
Expand All @@ -214,24 +222,38 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res
try:
os.mkdir(f'{name_dir}/Deconvolution/csv')
except:
print('dire exists')
try:
copyfile(f'{directory}/deconvolution/vireo/correlations.png', f'{name_dir}/Deconvolution/correlations.png')
copyfile(f'{directory}/deconvolution/vireo/matched_donors.txt', f'{name_dir}/Deconvolution/csv/matched_donors.tsv')
copyfile(f'{directory}/deconvolution/vireo/donor_corelations_matrix.tsv', f'{name_dir}/Deconvolution/csv/donor_corelations_matrix.tsv')
except:
_='corelations not performed'
print('dir exists')
if 'deconvolution_results' in folder1:
try:
copyfile(f'{directory}/deconvolution/deconvolution_results/vireo/correlations.png', f'{name_dir}/Deconvolution/correlations.png')
copyfile(f'{directory}/deconvolution/deconvolution_results/vireo/matched_donors.txt', f'{name_dir}/Deconvolution/csv/matched_donors.tsv')
copyfile(f'{directory}/deconvolution/deconvolution_results/vireo/donor_corelations_matrix.tsv', f'{name_dir}/Deconvolution/csv/donor_corelations_matrix.tsv')
except:
_='corelations not performed'
else:
try:
copyfile(f'{directory}/deconvolution/vireo/correlations.png', f'{name_dir}/Deconvolution/correlations.png')
copyfile(f'{directory}/deconvolution/vireo/matched_donors.txt', f'{name_dir}/Deconvolution/csv/matched_donors.tsv')
copyfile(f'{directory}/deconvolution/vireo/donor_corelations_matrix.tsv', f'{name_dir}/Deconvolution/csv/donor_corelations_matrix.tsv')
except:
_='corelations not performed'

folder1 = f'{directory}/doublets'
folder2 = f'{directory}/doublet_detection'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
print('prepearing Doublet folder')
try:
os.mkdir(f'{name_dir}/Doublets___301')
os.mkdir(f'{name_dir}/Doublets___301/tsv')
except:
print('dire exists')
files = glob.glob(f'{folder1}/*.tsv')
files2 = glob.glob(f'{folder1}/*.png')
print('dir exists')
if 'doublet_detection' in folder1:
files = glob.glob(f'{folder1}/doublet_results_combined/*.tsv')
files2 = glob.glob(f'{folder1}/droplet_type_distribution/*.png')
else:
files = glob.glob(f'{folder1}/*.tsv')
files2 = glob.glob(f'{folder1}/*.png')
for file1 in files:
print(file1)
try:
Expand All @@ -250,16 +272,18 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res


folder1 = f'{directory}/celltype/celltypist'
folder2 = f'{directory}/celltype_assignemt/celltypist'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
print('prepearing celltype folder')
try:
os.mkdir(f'{name_dir}/Cell-type assignment')
except:
print('dire exists')
print('dir exists')
try:
os.mkdir(f'{name_dir}/Cell-type assignment/celltypist')
except:
print('dire exists')
print('dir exists')
Folders = listdir(folder1)
for model_type in Folders:
print(model_type)
Expand All @@ -269,22 +293,22 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res
copyfile(f'{folder1}/{model_type}/{donor}/{donor}_majority_voting.pdf', f'{name_dir}/Cell-type assignment/celltypist/{model_type}_{donor}_majority_voting.pdf')

folder1 = f'{directory}/celltype/azimuth'


folder2 = f'{directory}/celltype_assignemt/azimuth'
folder1=choose_folder (folder1, folder2)
# folder1 = f'{directory}/plots'
if os.path.isdir(folder1):
try:
os.mkdir(f'{name_dir}/Cell-type assignment')
except:
print('dire exists')
print('dir exists')
try:
os.mkdir(f'{name_dir}/Cell-type assignment/azimuth')
except:
print('dire exists')
print('dir exists')
# try:
# os.mkdir(f'{name_dir}/Cell-type assignment/azimuth')
# except:
# print('dire exists')
# print('dir exists')
# copyfile(fil1, f'{name_dir}/QC metrics/plot_ecdf-x_log10.var=total_counts.color=experiment_id-adata.png')
files = glob.glob(f'{folder1}/*[!.gz]')
files2 = glob.glob(f'{folder1}/*/*[!.gz]')
Expand All @@ -297,20 +321,22 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res
print('picked up directory')
continue

folder1 = f'{directory}/celltype/scpred'
folder1 = f'{directory}/celltype/scpred'
folder2 = f'{directory}/celltype_assignemt/scpred'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
try:
os.mkdir(f'{name_dir}/Cell-type assignment')
except:
print('dire exists')
print('dir exists')
try:
os.mkdir(f'{name_dir}/Cell-type assignment/scpred')
except:
print('dire exists')
print('dir exists')
# try:
# os.mkdir(f'{name_dir}/Cell-type assignment/azimuth')
# except:
# print('dire exists')
# print('dir exists')
# copyfile(fil1, f'{name_dir}/QC metrics/plot_ecdf-x_log10.var=total_counts.color=experiment_id-adata.png')
files = glob.glob(f'{folder1}/*[!.RDS]')
files2 = glob.glob(f'{folder1}/*/*[!.RDS]')
Expand All @@ -330,33 +356,43 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res
copy(f'{directory}/celltype/All_Celltype_Assignments.tsv', f'{name_dir}/Cell-type assignment/All_Celltype_Assignments.tsv')
except:
print('doesnt exist')
try:
copy(f'{directory}/celltype_assignemt/All_Celltype_Assignments.tsv', f'{name_dir}/Cell-type assignment/All_Celltype_Assignments.tsv')
except:
print('doesnt exist')


folder1 = f'{directory}/plots/per_celltype_outliers'
folder2 = f'{directory}/clustering_and_integration/plots/per_celltype_outliers'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
print('yes')
try:
os.mkdir(f'{name_dir}/QC metrics')
except:
print('dire exists')
print('dir exists')
os.system(f'ln -s ./{folder1} {name_dir}/QC metrics')


folder1 = f'{directory}/plots'
folder2 = f'{directory}/clustering_and_integration/plots'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
try:
os.mkdir(f'{name_dir}/QC metrics')
except:
print('dire exists')
print('dir exists')

density_files = glob.glob(f'{folder1}/*cell_desity*')
density_files2 = glob.glob(f'{folder1}/merged_h5ad/plots/*cell_desity*')
density_files3 = glob.glob(f'{folder1}/handover/merged_h5ad/plots/*cell_desity*')
density_files.extend(density_files2)
density_files.extend(density_files3)
for dens_file in density_files:
try:
os.mkdir(f'{name_dir}/QC metrics/density')
except:
print('dire exists')
print('dir exists')
nam1 = dens_file.split('/')[-1]
try:
copyfile(f'{dens_file}', f'{name_dir}/QC metrics/density/{nam1}')
Expand All @@ -365,12 +401,14 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res

density_files = glob.glob(f'{folder1}/*adata-outlier_cells*')
density_files2 = glob.glob(f'{folder1}/merged_h5ad/plots/*adata-outlier_cells*')
density_files3 = glob.glob(f'{folder1}/handover/merged_h5ad/plots/*adata-outlier_cells*')
density_files.extend(density_files2)
density_files.extend(density_files3)
for dens_file in density_files:
try:
os.mkdir(f'{name_dir}/QC metrics/outlier_cells')
except:
print('dire exists')
print('dir exists')
nam1 = dens_file.split('/')[-1]
try:
copyfile(f'{dens_file}', f'{name_dir}/QC metrics/outlier_cells/{nam1}')
Expand All @@ -395,16 +433,18 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res


folder1 = f'{directory}/clustering'
folder2 = f'{directory}/clustering_and_integration'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
try:
os.mkdir(f'{name_dir}/Clustering')
os.mkdir(f'{name_dir}/Clustering/Harmony')
except:
print('dire exists')
print('dir exists')
try:
os.mkdir(f'{name_dir}/Clustering/Harmony')
except:
print('dire exists')
print('dir exists')
Harmony_UMAPS = glob.glob(f'{folder1}/*/*harmony*/*/plots/umap*')
for umap1 in Harmony_UMAPS:

Expand All @@ -415,16 +455,18 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res


folder1 = f'{directory}/clustering'
folder2 = f'{directory}/clustering_and_integration'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
try:
os.mkdir(f'{name_dir}/Clustering')
os.mkdir(f'{name_dir}/Clustering/BBKNN')
except:
print('dire exists')
print('dir exists')
try:
os.mkdir(f'{name_dir}/Clustering/BBKNN')
except:
print('dire exists')
print('dir exists')
Harmony_UMAPS = glob.glob(f'{folder1}/*/*bbknn*/*/plots/umap*')
for umap1 in Harmony_UMAPS:

Expand All @@ -443,8 +485,8 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res
try:
os.mkdir(f'{name_dir}/Clustering/Coloured')
except:
print('dire exists')
print('dire exists')
print('dir exists')
print('dir exists')
Coloured_UMAPS = glob.glob(f'{folder1}/*')
for umap1 in Coloured_UMAPS:
name = umap1.split('/')[-1]
Expand All @@ -456,8 +498,10 @@ def main_data_colection(pipeline='',name='',directory='',input_table=None,cb_res
try:
copytree(folder1, f'{name_dir}/Summary')
except:
print('dire exists')
print('dir exists')
folder1 = f'{directory}/celltype'
folder2 = f'{directory}/celltype_assignemt'
folder1=choose_folder (folder1, folder2)
if os.path.isdir(folder1):
try:
copyfile(f"{folder1}/donor_celltype_report.tsv", f'{name_dir}/Summary/donor_celltype_report.tsv')
Expand Down
4 changes: 2 additions & 2 deletions conf/extra_confs/cardinal/freeze1/inputs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ params {
value=true
gt_match_file="$output_dir/deconvolution/deconvolution_results/vireo_gt_fix/assignments_all_pools.tsv" //We prvide this if we want to exclude a particular samples matched to a ceirtain GT cohortc from the adaptive qc
gt_match_based_adaptive_qc_exclusion_pattern = 'U937;THP1' //We run the adaptive QC on these patterns independently regardless on assigned celltype.
file__anndata_merged = "$output_dir/merged_h5ad/1.pre_QC_adata.h5ad"
file__cells_filtered = "$output_dir/merged_h5ad/pre_QC_adata-cell_filtered_per_experiment.tsv.gz"
file__anndata_merged = "$output_dir/handover/merged_h5ad/1.pre_QC_adata.h5ad"
file__cells_filtered = "$output_dir/handover/merged_h5ad/pre_QC_adata-cell_filtered_per_experiment.tsv.gz"

}
harmony{
Expand Down
4 changes: 2 additions & 2 deletions modules/nf-core/modules/azimuth/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process AZIMUTH{
container "wtsihgi/nf_scrna_qc_azimuth:d54db9b"
}

publishDir path: "${params.outdir}/celltype/azimuth/${refset.name}",
publishDir path: "${params.outdir}/celltype_assignemt/azimuth/${refset.name}",
saveAs: {filename -> "${outfil_prfx}_" + filename},
mode: "${params.copy_mode}",
overwrite: "true"
Expand Down Expand Up @@ -68,7 +68,7 @@ process REMAP_AZIMUTH{
container "wtsihgi/nf_scrna_qc_azimuth:d54db9b"
}

publishDir path: "${params.outdir}/celltype/",
publishDir path: "${params.outdir}/celltype_assignemt/",
mode: "${params.copy_mode}",
overwrite: "true"
stageInMode 'copy'
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/modules/cell_hard_filters/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ process CELL_HARD_FILTERS{
}


publishDir path: "${params.outdir}/merged_h5ad/",
publishDir path: "${params.outdir}/handover/merged_h5ad/",
saveAs: {filename ->
if (filename.contains("hard_filters_")) {
filename = '3.hard_filters_annotated_h5ad.h5ad'
Expand Down
Loading
Loading