diff --git a/solventmapcreator/io/solvationenergyextraction.py b/solventmapcreator/io/solvationenergyextraction.py new file mode 100644 index 0000000000000000000000000000000000000000..1ee7f8ab5551efb63032668dc11a78af9644c6b5 --- /dev/null +++ b/solventmapcreator/io/solvationenergyextraction.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script for extracting and splitting up solvation energies based on solvent. +This is to produce more manageable files from the full information dump from +the phasetransfer code. + +@author: mark +""" + +import logging +import copy +from lxml import etree + +logging.basicConfig() +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(logging.INFO) + +PHASE_NAMESPACE_DICT = {"phase":"http://www-hunter.ch.cam.ac.uk/PhaseSchema"} + +PHASE = "{{{}}}".format(PHASE_NAMESPACE_DICT["phase"]) + +def read_in_solvent_id_list(solvent_filename): + """This reads in the solvent XML, and extracts the solvent IDs. + """ + solvent_list_element_tree = read_xml_file(solvent_filename) + return get_solvent_id_list(solvent_list_element_tree) + +def get_solvent_id_list(solvent_list_element_tree): + """This gets the solventIDs from the solvent list. + """ + xpath_expression = "phase:SolventList/phase:Solvents/phase:Solvent/@phase:solventID" + solvent_ids = solvent_list_element_tree.xpath(xpath_expression, + namespaces=PHASE_NAMESPACE_DICT) + LOGGER.info("Solvent IDs found %i", len(solvent_ids)) + return solvent_ids + +def create_output_filename(to_solvent_id, energy_type): + """This creates an output filename based on the solvent id and energy type. + """ + filename_stem = to_solvent_id.replace(",", "_") + filename_stem += energy_type + return filename_stem +".xml" + +def write_element_tree_to_file(element_tree, filename): + """Writes element tree to file. + """ + element_tree.write(filename, encoding="UTF-8", xml_declaration=True, + pretty_print=True) + +def create_element_tree(element): + """This creates an element tree, ready for writing to file. + """ + return etree.ElementTree(element) + +def generate_energy_values_element(energy_elements, energy_type): + """This generates an EnergyValues element, containing the information given + in the energy elements within a Collection element of the appropriate type. + """ + energy_values = create_energy_values_element() + if energy_type == "binding": + energy_collection = generate_binding_energy_collection(energy_elements) + energy_values.append(energy_collection) + return energy_values + elif energy_type == "free": + energy_collection = generate_free_energy_collection(energy_elements) + energy_values.append(energy_collection) + return energy_values + +def generate_free_energy_collection(free_energy_elements): + """This generates a FreeEnergyCollection element, with the given BindingEnergy + elements. + """ + free_energy_collection = create_free_energy_collection_element() + append_energies_to_collection(free_energy_collection, free_energy_elements) + return free_energy_collection + +def generate_binding_energy_collection(binding_energy_elements): + """This generates a BindingEnergyCollection element, with the given BindingEnergy + elements. + """ + binding_energy_collection = create_binding_energy_collection_element() + append_energies_to_collection(binding_energy_collection, binding_energy_elements) + return binding_energy_collection + +def append_energies_to_collection(energy_collection_element, energy_elements): + """This appends the energies to the collection element. + """ + for energy_element in energy_elements: + energy_collection_element.append(copy.deepcopy(energy_element)) + +def create_energy_values_element(): + """This creates a new EnergyValues Element. + """ + return etree.Element(PHASE + "EnergyValues", nsmap=PHASE_NAMESPACE_DICT) + +def create_free_energy_collection_element(): + """This creates a FreeEnergyCollection element. + """ + return etree.Element(PHASE + "FreeEnergyCollection", nsmap=PHASE_NAMESPACE_DICT) + +def create_binding_energy_collection_element(): + """This creates a BindingEnergyCollection element. + """ + return etree.Element(PHASE + "BindingEnergyCollection", nsmap=PHASE_NAMESPACE_DICT) + +def get_energy_elements(element_tree, xpath_expression): + """This gets Energy elements that match the given XPath expression. + """ + return element_tree.xpath(xpath_expression, namespaces=PHASE_NAMESPACE_DICT) + +def generate_free_energy_xpath_expression(to_solvent_id): + """This generates the XPath expression needed to get values for free energies. + """ + return "/phase:EnergyValues/phase:FreeEnergyCollection/phase:FreeEnergy[@phase:fromSolventID=''][@phase:toSolventID='{:s}']".format(to_solvent_id) + +def generate_binding_energy_xpath_expression(to_solvent_id): + """This generates the XPath expression needed to get values for binding energies. + """ + return "/phase:EnergyValues/phase:BindingEnergyCollection/phase:BindingEnergy[@phase:fromSolventID=''][@phase:toSolventID='{:s}']".format(to_solvent_id) + +def read_xml_file(filename): + """This reads in an xml file and returns ElementTree. Note that this is + used to handle the large file size. + """ + parser = etree.XMLParser(huge_tree=True) + return etree.parse(filename, parser=parser) + diff --git a/solventmapcreator/test/iotest/solvationenergyextractiontest.py b/solventmapcreator/test/iotest/solvationenergyextractiontest.py new file mode 100644 index 0000000000000000000000000000000000000000..80e7c6adae18f5ac1097bccc6828b360a4f51159 --- /dev/null +++ b/solventmapcreator/test/iotest/solvationenergyextractiontest.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script for testing the solvation energy extraction methods. + +@author: mark +""" + +import logging +import unittest +import os +from lxml import etree +import solventmapcreator.io.solvationenergyextraction as solvationenergyextraction + +logging.basicConfig() +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(logging.DEBUG) + +class SolvationEnergyExtractionTestCase(unittest.TestCase): + """Test case for solvationenergyextraction. + """ + def setUp(self): + """Set up for tests + """ + def tearDown(self): + """clean up after tests. + """ + def test_(self): + """ + """ + self.fail("not implemented") + def test_read_in_solvent_id_list(self): + """ + """ + self.fail("not implemented") + def test_get_solvent_id_list(self): + """ + """ + self.fail("not implemented") + def test_create_output_filename(self): + """ + """ + self.fail("not implemented") + def test_write_element_tree_to_file(self): + """ + """ + self.fail("not implemented") + def test_create_element_tree(self): + """ + """ + self.fail("not implemented") + def test_generate_energy_values_element(self): + """ + """ + self.fail("not implemented") + def test_generate_free_energy_collection(self): + """ + """ + self.fail("not implemented") + def test_generate_binding_energy_collection(self): + """ + """ + self.fail("not implemented") + def test_append_energies_to_collection(self): + """ + """ + self.fail("not implemented") + def test_create_energy_values_element(self): + """ + """ + self.fail("not implemented") + def test_create_free_energy_collection_element(self): + """ + """ + self.fail("not implemented") + def test_create_binding_energy_collection_element(self): + """ + """ + self.fail("not implemented") + def test_get_energy_elements(self): + """ + """ + self.fail("not implemented") + def test_generate_free_energy_xpath_expression(self): + """ + """ + self.fail("not implemented") + def test_generate_binding_energy_xpath_expression(self): + """ + """ + self.fail("not implemented") + def test_read_xml_file(self): + """ + """ + self.fail("not implemented") \ No newline at end of file