-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #291 from Steinbeck-Lab/feat-import-2D-3D-SDFs
feat: export and import 2D / 3D SDFs
- Loading branch information
Showing
11 changed files
with
255 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
<?php | ||
|
||
namespace App\Console\Commands; | ||
|
||
use App\Models\Molecule; | ||
use App\Models\Structure; | ||
use DB; | ||
use Illuminate\Console\Command; | ||
|
||
class Import2DSDFs extends Command | ||
{ | ||
/** | ||
* The name and signature of the console command. | ||
* | ||
* @var string | ||
*/ | ||
protected $signature = 'coconut:import-2d-sdfs {file}'; | ||
|
||
/** | ||
* The console command description. | ||
* | ||
* @var string | ||
*/ | ||
protected $description = 'Imports 2D SDFs from a JSON file into Structures table'; | ||
|
||
/** | ||
* Execute the console command. | ||
*/ | ||
public function handle() | ||
{ | ||
$file = storage_path($this->argument('file')); | ||
if (! file_exists($file) || ! is_readable($file)) { | ||
$this->error('File not found or not readable.'); | ||
|
||
return 1; | ||
} | ||
|
||
$batchSize = 1000; | ||
$header = null; | ||
$data = []; | ||
$rowCount = 0; | ||
$json = file_get_contents($file); | ||
$json_data = json_decode($json, true); | ||
if ($json === false) { | ||
exit('Error reading the JSON file'); | ||
} | ||
|
||
$batchSize = 10000; // Number of molecules to process in each batch | ||
$data = []; // Array to store data for batch updating | ||
$totalElements = count($json_data); | ||
|
||
for ($i = 0; $i < $totalElements; $i += $batchSize) { | ||
$this->info('Processing batch '.($i / $batchSize + 1).' of '.ceil($totalElements / $batchSize)); | ||
$batch = array_slice($json_data, $i, $totalElements - $i < $batchSize ? $totalElements - $i : $batchSize); | ||
$this->insertBatch($batch); | ||
} | ||
|
||
$this->info('Annotation scores generated successfully.'); | ||
} | ||
|
||
/** | ||
* Update a batch of data into the database. | ||
* | ||
* @return void | ||
*/ | ||
private function insertBatch(array $data) | ||
{ | ||
DB::transaction(function () use ($data) { | ||
foreach ($data as $identifier => $sdf_2d) { | ||
$molecule = Molecule::select('id')->where('identifier', $identifier)->first(); | ||
$structure = new Structure; | ||
$structure['molecule_id'] = $molecule->id; | ||
$structure['2d'] = json_encode($sdf_2d); | ||
$structure->save(); | ||
} | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
<?php | ||
|
||
namespace App\Console\Commands; | ||
|
||
use App\Models\Molecule; | ||
use DB; | ||
use Illuminate\Console\Command; | ||
|
||
class Import3DSDFs extends Command | ||
{ | ||
/** | ||
* The name and signature of the console command. | ||
* | ||
* @var string | ||
*/ | ||
protected $signature = 'coconut:import-3d-sdfs {file}'; | ||
|
||
/** | ||
* The console command description. | ||
* | ||
* @var string | ||
*/ | ||
protected $description = 'Imports 3D SDFs from multiple JSON file into Structures table'; | ||
|
||
/** | ||
* Execute the console command. | ||
*/ | ||
public function handle() | ||
{ | ||
$batchSize = 10000; // Number of molecules to process in each batch | ||
|
||
$file = storage_path($this->argument('file')); | ||
$file_suffix = (int) $file[strpos($file, '.json') - 1]; | ||
$file_name_without_id = substr($file, 0, strpos($file, '.json') - 1); | ||
|
||
// loop runs though all the files | ||
for (; $file_suffix <= 18; $file_suffix++) { | ||
|
||
$json = null; | ||
$json_data = []; | ||
|
||
$file_name = $file_name_without_id.$file_suffix.'.json'; | ||
$this->info('Starting loop for file '.$file_name); | ||
if (! file_exists($file_name) || ! is_readable($file_name)) { | ||
$this->error('File not found or not readable: '.$file_name); | ||
|
||
return 1; | ||
} | ||
|
||
$json = file_get_contents($file_name); | ||
if ($json === false) { | ||
exit('Error reading the JSON file'); | ||
} | ||
$this->info('File read'); | ||
|
||
$json_data = json_decode($json, true); | ||
$keys = array_keys($json_data); | ||
$this->info(end($keys)); | ||
|
||
$totalElements = count($json_data); | ||
$this->info('Total elements: '.$totalElements); | ||
|
||
for ($i = 0; $i < $totalElements; $i += $batchSize) { | ||
$this->info('Processing batch '.($i / $batchSize + 1).' of '.ceil($totalElements / $batchSize)); | ||
$batch = array_slice($json_data, $i, $totalElements - $i < $batchSize ? $totalElements - $i : $batchSize); | ||
$this->insertBatch($batch); | ||
} | ||
} | ||
$this->info('Annotation scores generated successfully.'); | ||
} | ||
|
||
/** | ||
* Update a batch of data into the database. | ||
* | ||
* @return void | ||
*/ | ||
private function insertBatch(array $data) | ||
{ | ||
DB::transaction(function () use ($data) { | ||
foreach ($data as $identifier => $sdf_3d) { | ||
$structure = Molecule::where('identifier', $identifier)->first()->structures; | ||
$structure['3d'] = json_encode($sdf_3d); | ||
$structure->save(); | ||
} | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 28 additions & 0 deletions
28
database/migrations/2024_11_12_150749_add_columns_on_structures_table.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
<?php | ||
|
||
use Illuminate\Database\Migrations\Migration; | ||
use Illuminate\Database\Schema\Blueprint; | ||
use Illuminate\Support\Facades\Schema; | ||
|
||
return new class extends Migration | ||
{ | ||
/** | ||
* Run the migrations. | ||
*/ | ||
public function up(): void | ||
{ | ||
Schema::table('structures', function (Blueprint $table) { | ||
$table->foreignId('molecule_id')->unique()->constrained()->onDelete('cascade')->after('id'); | ||
}); | ||
} | ||
|
||
/** | ||
* Reverse the migrations. | ||
*/ | ||
public function down(): void | ||
{ | ||
Schema::table('structures', function (Blueprint $table) { | ||
$table->dropColumn(['molecule_id']); | ||
}); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters