Examples
This page provides comprehensive examples of using OpenBabel.jl for common molecular data processing tasks.
Basic File Conversion
SMILES to SDF Conversion
using OpenBabel
using Chain
# Convert SMILES to SDF with 3D coordinates and properties
@chain begin
@read_file("input.smi", "smi")
@gen_3D_coords("fast")
@add_properties(["MW", "logP", "TPSA"])
@output_as("output.sdf", "sdf")
@execute
endBatch File Processing
# Process multiple molecules into separate files
@chain begin
@read_file("database.smi", "smi")
@gen_2D_coords()
@add_index()
@write_multiple_files()
@output_as("molecule", "mol") # Creates molecule1.mol, molecule2.mol, etc.
@execute
endDrug Discovery Workflows
Lead Compound Filtering
# Filter compounds by Lipinski's Rule of Five
@chain begin
@read_file("compound_library.sdf", "sdf")
@add_properties(["MW", "logP", "HBD", "HBA"])
@match_smarts_string("[!#1]") # Must contain non-hydrogen atoms
@sort_by("MW")
@output_as("filtered_compounds.sdf", "sdf")
@execute
end
# Additional filtering can be done in Julia
# Filter by Lipinski's Rule of Five criteriaVirtual Screening Pipeline
# Prepare compounds for virtual screening
@chain begin
@read_file("screening_library.smi", "smi")
@ignore_bad_molecules()
@remove_duplicate_mols()
@gen_3D_coords("med")
@minimize_energy("MMFF94")
@add_partial_charges("gasteiger")
@canonicalize()
@add_properties(["MW", "logP", "TPSA", "rotors"])
@sort_by_reverse("MW")
@output_as("screening_ready.sdf", "sdf")
@execute
endChemical Database Processing
Large Dataset Cleaning
# Clean and standardize a large chemical database
@chain begin
@read_file("raw_database.sdf", "sdf")
@ignore_bad_molecules() # Skip invalid structures
@remove_duplicate_mols() # Remove duplicates
@separate_fragments() # Split salt complexes
@remove_hydrogens() # Use implicit hydrogens
@set_atom_order_canonical() # Standardize atom ordering
@dont_match_smarts_string("[Na,K,Cl,Br]") # Remove simple salts
@add_properties(["MW", "atoms", "bonds"])
@sort_by("MW")
@output_as("clean_database.sdf", "sdf")
@execute
endProperty-Based Filtering
# Extract drug-like molecules based on molecular properties
@chain begin
@read_file("compound_collection.sdf", "sdf")
@add_properties(["MW", "logP", "TPSA", "HBD", "HBA", "rotors"])
# Following steps would need additional Julia filtering
@match_smarts_string("[#6,#7,#8,#16,#15,#9,#17,#35,#53]") # Common drug atoms
@dont_match_smarts_string("[Pb,Hg,As,Cd]") # Exclude toxic metals
@output_as("druglike_molecules.sdf", "sdf")
@execute
endStructure-Activity Relationships
Scaffold Analysis
# Generate molecular scaffolds for SAR analysis
@chain begin
@read_file("active_compounds.sdf", "sdf")
@remove_hydrogens()
@set_atom_order_canonical()
@add_properties(["MW", "rotors", "rings"])
@sort_by("rings")
@output_as("scaffolds.sdf", "sdf")
@execute
endConformer Generation for Flexibility Analysis
# Generate conformers to study molecular flexibility
@chain begin
@read_file("flexible_molecules.smi", "smi")
@gen_3D_coords("slow")
@generate_conformers()
@minimize_energy("MMFF94")
@calculate_energy("MMFF94")
@add_properties(["rotors", "MW"])
@sort_by("Energy")
@output_as("conformer_ensemble.sdf", "sdf")
@execute
endSpecialized Chemical Applications
Natural Product Processing
# Process natural product structures
@chain begin
@read_file("natural_products.sdf", "sdf")
@ignore_bad_molecules()
@gen_3D_coords("slow") # High quality for complex structures
@center_coords_at_zero()
@add_properties(["MW", "logP", "TPSA", "rings", "stereocenters"])
@dont_match_smarts_string("[Si,B,Al]") # Remove unusual atoms
@sort_by_reverse("rings") # Complex ring systems first
@output_as("processed_np.sdf", "sdf")
@execute
endFragment Library Generation
# Create fragment library from larger molecules
@chain begin
@read_file("parent_compounds.smi", "smi")
@separate_fragments() # Split into fragments
@remove_duplicate_mols() # Remove duplicate fragments
@match_smarts_string("[#6,#7,#8]") # Keep C, N, O containing fragments
@add_properties(["MW", "atoms", "HBA", "HBD"])
@sort_by("MW")
@output_as("fragment_library.sdf", "sdf")
@execute
endQuality Control and Validation
Structure Validation Pipeline
# Validate and clean chemical structures
@chain begin
@read_file("unchecked_structures.sdf", "sdf")
@ignore_bad_molecules() # Skip unparseable structures
@canonicalize() # Standardize representation
@convert_dative_bonds() # Handle coordination bonds
@add_properties(["formula", "MW", "atoms", "bonds"])
@add_filename() # Track original source
@add_index() # Add unique identifiers
@output_as("validated_structures.sdf", "sdf")
@execute
endDuplicate Detection and Removal
# Advanced duplicate detection workflow
@chain begin
@read_file("merged_database.sdf", "sdf")
@canonicalize() # Ensure consistent representation
@remove_hydrogens() # Compare without explicit H
@set_atom_order_canonical() # Standard atom ordering
@remove_duplicate_mols() # Remove exact duplicates
@add_properties(["InChI"]) # Add InChI for further comparison
@sort_by("MW")
@output_as("deduplicated_database.sdf", "sdf")
@execute
endPerformance Optimization
Processing Large Datasets Efficiently
# Efficient processing of large chemical databases
function process_large_database(input_file, output_file, start_idx=1, batch_size=10000)
current_idx = start_idx
batch_num = 1
while true
try
@chain begin
@read_file(input_file, "sdf")
@start_with_index(current_idx)
@ignore_bad_molecules()
@add_properties(["MW", "logP"])
@output_as("$(output_file)_batch_$(batch_num).sdf", "sdf")
@execute
end
current_idx += batch_size
batch_num += 1
catch
break # End of file reached
end
end
end
# Usage
process_large_database("huge_database.sdf", "processed", 1, 5000)Memory-Efficient Processing
# Process files in chunks to manage memory usage
@chain begin
@read_file("large_dataset.sdf", "sdf")
@start_with_index(1) # Start from beginning
@ignore_bad_molecules() # Skip problematic structures
@gen_3D_coords("fast") # Use fast method for efficiency
@add_properties(["MW"]) # Minimal properties for speed
@write_multiple_files() # Separate files reduce memory
@output_as("chunk", "sdf") # Creates chunk1.sdf, chunk2.sdf, etc.
@execute
end