diff --git a/solventmapcreator/polynomialanalysis/polynomialplotting.py b/solventmapcreator/polynomialanalysis/polynomialplotting.py index 964efbf7e1aa58658859c68f5e331fa00f65c25c..e4eb7aaeb9fd6debdc9523787f4816aa426e1b8e 100644 --- a/solventmapcreator/polynomialanalysis/polynomialplotting.py +++ b/solventmapcreator/polynomialanalysis/polynomialplotting.py @@ -18,10 +18,24 @@ logging.basicConfig() LOGGER = logging.getLogger(__name__) LOGGER.setLevel(logging.WARN) -def plot_binding_energies_from_file(free_energy_filename, polynomial_order, **kwargs): +def plot_free_energies_from_file(free_energy_filename, polynomial_order, **kwargs): """This creates in the plotting information, and outputs a plot to file. """ - plot_data = parse_binding_energies_create_plot_input_data(free_energy_filename, + plot_data = parse_free_energies_create_plot_input_data(free_energy_filename, + polynomial_order, + kwargs.get("pos_subset_list", + None), + kwargs.get("neg_subset_list", + None)) + if type(plot_data['polynomial coefficients']) is dict: + return create_scatter_with_split_poly(plot_data, **kwargs) + else: + return plottinginput.create_scatter_graph_with_polynomial(plot_data) + +def plot_binding_energies_from_file(binding_energy_filename, polynomial_order, **kwargs): + """This creates in the plotting information, and outputs a plot to file. + """ + plot_data = parse_binding_energies_create_plot_input_data(binding_energy_filename, polynomial_order, kwargs.get("pos_subset_list", None), @@ -51,18 +65,32 @@ def create_scatter_with_split_poly(input_data, **kwargs): plottinginput.plt.close(scatter_plot) return 0 -def parse_binding_energies_create_plot_input_data(free_energy_filename, polynomial_order, +def parse_free_energies_create_plot_input_data(free_energy_filename, polynomial_order, pos_subset_list=None, neg_subset_list=None): """This creates the input data for a plot. This overrides the default figure label, so that the figures can be distinguished based on the solvent. """ - datapoints = parse_binding_energy_from_file_with_data_arrays(free_energy_filename) + datapoints = parse_free_energy_from_file_with_data_arrays(free_energy_filename) + plot_data = datapoints.generateScatterPlotParameters(label_type='', polynomial_order=polynomial_order) + if pos_subset_list != None and neg_subset_list != None: + poly_coeff_dict = generate_poly_pos_neg_separate_fit(datapoints, polynomial_order, + pos_subset_list, neg_subset_list) + plot_data['polynomial coefficients'] = poly_coeff_dict + plot_data['figure_label'] = free_energy_filename.replace('.xml', '_fe') + return plot_data + +def parse_binding_energies_create_plot_input_data(binding_energy_filename, polynomial_order, + pos_subset_list=None, neg_subset_list=None): + """This creates the input data for a plot. This overrides the default figure + label, so that the figures can be distinguished based on the solvent. + """ + datapoints = parse_binding_energy_from_file_with_data_arrays(binding_energy_filename) plot_data = datapoints.generateScatterPlotParameters(label_type='', polynomial_order=polynomial_order) if pos_subset_list != None and neg_subset_list != None: poly_coeff_dict = generate_poly_pos_neg_separate_fit(datapoints, polynomial_order, pos_subset_list, neg_subset_list) plot_data['polynomial coefficients'] = poly_coeff_dict - plot_data['figure_label'] = free_energy_filename.replace('.xml', '') + plot_data['figure_label'] = binding_energy_filename.replace('.xml', '_be') return plot_data def generate_poly_pos_neg_separate_fit(datapoints, order, pos_subset_list, neg_subset_list): @@ -76,9 +104,18 @@ def generate_poly_pos_neg_separate_fit(datapoints, order, pos_subset_list, neg_s return {"positive":poly_dict["positive"]["coefficients"], "negative":poly_dict["negative"]["coefficients"]} -def parse_poly_data_to_file_split_fit(binding_energy_filename, order_list, poly_filename, +def parse_free_energy_poly_data_to_file_split_fit(free_energy_filename, order_list, poly_filename, pos_subset_list, neg_subset_list): - """This reads in the energy information, and then performs the fits on the + """This reads in the binding energy information, and then performs the fits on the + two separate subsets, and outputs to file. + """ + datapoints = parse_free_energy_from_file_with_data_arrays(free_energy_filename) + return write_poly_data_to_file_split_fit(datapoints, order_list, poly_filename, + pos_subset_list, neg_subset_list) + +def parse_binding_energy_poly_data_to_file_split_fit(binding_energy_filename, order_list, poly_filename, + pos_subset_list, neg_subset_list): + """This reads in the binding energy information, and then performs the fits on the two separate subsets, and outputs to file. """ datapoints = parse_binding_energy_from_file_with_data_arrays(binding_energy_filename) @@ -96,21 +133,37 @@ def write_poly_data_to_file_split_fit(datapoints, order_list, filename, pos_subset_list, neg_subset_list) -def parse_binding_energies_and_output_poly_data_subset(free_energy_filename, order_list, poly_filename, subset_list): + +def parse_free_energies_and_output_poly_data_subset(free_energy_filename, order_list, poly_filename, subset_list): + """This reads in the energy information, and then selects teh subset of + points to carry out the analysis on, outputs the polynomial information to file. + """ + datapoints = parse_free_energy_from_file_with_data_arrays(free_energy_filename) + datapoints_subset = datapoints.createDatapointsSubGroup(subset_list) + datapoints_subset.createDataArrays() + return write_poly_data_to_file(datapoints_subset, order_list, poly_filename) + +def parse_binding_energies_and_output_poly_data_subset(binding_energy_filename, order_list, poly_filename, subset_list): """This reads in the energy information, and then selects teh subset of points to carry out the analysis on, outputs the polynomial information to file. """ - datapoints = parse_binding_energy_from_file_with_data_arrays(free_energy_filename) + datapoints = parse_binding_energy_from_file_with_data_arrays(binding_energy_filename) datapoints_subset = datapoints.createDatapointsSubGroup(subset_list) datapoints_subset.createDataArrays() return write_poly_data_to_file(datapoints_subset, order_list, poly_filename) +def parse_free_energies_and_output_poly_data(free_energy_filename, order_list, poly_filename): + """This reads in the energy information, and then outputs the polynomial + information to file. + """ + datapoints = parse_free_energy_from_file_with_data_arrays(free_energy_filename) + return write_poly_data_to_file(datapoints, order_list, poly_filename) -def parse_binding_energies_and_output_poly_data(free_energy_filename, order_list, poly_filename): +def parse_binding_energies_and_output_poly_data(binding_energy_filename, order_list, poly_filename): """This reads in the energy information, and then outputs the polynomial information to file. """ - datapoints = parse_binding_energy_from_file_with_data_arrays(free_energy_filename) + datapoints = parse_binding_energy_from_file_with_data_arrays(binding_energy_filename) return write_poly_data_to_file(datapoints, order_list, poly_filename) def write_poly_data_to_file(datapoints, order_list, filename): @@ -119,6 +172,12 @@ def write_poly_data_to_file(datapoints, order_list, filename): """ return polynomialdatawriter.write_poly_data_to_file(datapoints, order_list, filename) +def parse_free_energy_from_file_with_data_arrays(filename): + """This reads in the information form file to a Datapoints object, and + creates the data arrays. + """ + return solvationenergyreader.parse_free_energy_from_file_with_data_arrays(filename) + def parse_binding_energy_from_file_with_data_arrays(filename): """This reads in the information form file to a Datapoints object, and creates the data arrays. diff --git a/solventmapcreator/test/polynomialanalysistest/polynomialplottingtest.py b/solventmapcreator/test/polynomialanalysistest/polynomialplottingtest.py index 0dea4c9b4683bccdff4af2534177b3995b133f02..56f27b32692c5d2b330eec431d084e433b2630d8 100644 --- a/solventmapcreator/test/polynomialanalysistest/polynomialplottingtest.py +++ b/solventmapcreator/test/polynomialanalysistest/polynomialplottingtest.py @@ -24,6 +24,7 @@ class PolynomialPlottingTestCase(unittest.TestCase): """Set up for tests """ self.expected_datapoints = solvationenergyreader.parse_binding_energy_from_file_with_data_arrays("resources/water.xml") + self.expected_free_energydatapoints = solvationenergyreader.parse_free_energy_from_file_with_data_arrays("resources/water_freeenergy.xml") def tearDown(self): """Clean up after tests. """ @@ -34,10 +35,78 @@ class PolynomialPlottingTestCase(unittest.TestCase): os.remove("actual_water_poly_fit_subset.csv") if os.path.isfile("actual_water_poly_fit_split.csv"): os.remove("actual_water_poly_fit_split.csv") + def test_parse_free_energies_create_plot_input_data(self): + """Test to see expected plot data is returned. + """ + expected_dict = {'figure_label': "resources/water_freeenergy_fe", + 'plot_axis_range':[-15.4 , 7.2 , -38.635444, 0.918583], + 'polynomial coefficients': np.array([5.18106356e-01, + 2.54615444e-01, + -3.76651666e-01, + -2.06376692e-02, + -3.76010096e-04]), + 'polynomial order': 4, + 'x_axis_range':[-15.4, 7.2000000000000002], + 'x_data': np.array([[-0.1, -11.1, -14.1, -15.4, -2.4, + -4.3, -5.4, -9.1, 0.5, 1.2, 7.2]]), + 'x_label': "SSIP Value", + 'y_axis_range': [-38.635444, 0.918583], + 'y_data': np.array([[0.9185830776018105, -26.013780093398356, + -34.8195308084233, -38.635444108544405, + -1.4082598726557432, -6.170784422419204, + -9.315464914185585, -20.143787975013183, + 0.6588682955012527, -0.4429837300591526, + -25.844114335864262]]), + 'y_label': "Solvation Free Energy/$kJmol^{-1}$"} + actual_dict = polynomialplotting.parse_free_energies_create_plot_input_data("resources/water_freeenergy.xml", 4) + LOGGER.debug(actual_dict["x_data"].tolist()) + LOGGER.debug(actual_dict["y_data"].tolist()) + self.assertListEqual(sorted(actual_dict.keys()), sorted(expected_dict.keys())) + for key in actual_dict.keys(): + LOGGER.debug("Key: %s", key) + if key == 'y_data' or key == 'x_data' or key == 'polynomial coefficients': + LOGGER.debug("assert equal array x and y data") + np.testing.assert_array_almost_equal(actual_dict[key], + expected_dict[key]) + elif key == 'plot_axis_range' or key == 'x_axis_range' or key == 'y_axis_range': + np.testing.assert_array_almost_equal(np.array(actual_dict[key]), + np.array(expected_dict[key])) + else: + LOGGER.debug("assert equal string") + self.assertEqual(actual_dict[key], expected_dict[key]) + #test to see if new functionality is supported + neg_set_list = ["-5.400solute", "-4.300solute", "-9.100solute", "-11.100solute", "-15.400solute"] + pos_set_list = ["0.500solute", "1.200solute", "7.200solute"] + actual_dict2 = polynomialplotting.parse_free_energies_create_plot_input_data("resources/water_freeenergy.xml", 4, + pos_set_list, neg_set_list) + self.assertListEqual(sorted(actual_dict2.keys()), sorted(expected_dict.keys())) + for key in actual_dict.keys(): + LOGGER.debug("Key: %s", key) + if key == 'y_data' or key == 'x_data': + LOGGER.debug("assert equal array x and y data") + np.testing.assert_array_almost_equal(actual_dict2[key], + expected_dict[key]) + elif key == 'polynomial coefficients': + self.assertListEqual(["negative", "positive"], + sorted(actual_dict2['polynomial coefficients'].keys())) + negative_coeffs = np.array([5.038843e+00, 2.290628e+00, -9.855830e-02, + -6.480660e-03, -1.545673e-04]) + positive_coeffs = np.array([1.351575, -1.317976, -0.126786, + -0.015156, -0.002038]) + np.testing.assert_array_almost_equal(negative_coeffs, + actual_dict2['polynomial coefficients']["negative"]) + np.testing.assert_array_almost_equal(positive_coeffs, + actual_dict2['polynomial coefficients']["positive"]) + elif key == 'plot_axis_range' or key == 'x_axis_range' or key == 'y_axis_range': + np.testing.assert_array_almost_equal(np.array(actual_dict2[key]), + np.array(expected_dict[key])) + else: + LOGGER.debug("assert equal string") + self.assertEqual(expected_dict[key], actual_dict2[key]) def test_parse_binding_energies_create_plot_input_data(self): """Test to see expected plot data is returned. """ - expected_dict = {'figure_label': "resources/water", + expected_dict = {'figure_label': "resources/water_be", 'plot_axis_range':[-15.4 , 7.2 , -38.635444, 0.918583], 'polynomial coefficients': np.array([5.18106356e-01, 2.54615444e-01, @@ -122,7 +191,7 @@ class PolynomialPlottingTestCase(unittest.TestCase): actual_dict["negative"]) np.testing.assert_array_almost_equal(positive_coeffs, actual_dict["positive"]) - def test_parse_poly_data_to_file_split_fit(self): + def test_free_energy_parse_poly_data_to_file_split_fit(self): """Test to see if expected fit is done on the positive and negative subsets after the file was read in. """ @@ -130,9 +199,26 @@ class PolynomialPlottingTestCase(unittest.TestCase): actual_file_name = "actual_water_poly_fit_split.csv" neg_set_list = ["-5.400solute", "-4.300solute", "-9.100solute", "-11.100solute", "-15.400solute"] pos_set_list = ["0.500solute", "1.200solute", "7.200solute"] - poly_file_out = polynomialplotting.parse_poly_data_to_file_split_fit("resources/water.xml", [2, 4], - actual_file_name, pos_set_list, - neg_set_list) + poly_file_out = polynomialplotting.parse_free_energy_poly_data_to_file_split_fit("resources/water_freeenergy.xml", [2, 4], + actual_file_name, pos_set_list, + neg_set_list) + self.assertEqual(0, poly_file_out) + with open(expected_file_name, 'r') as exp_file: + exp_file_lines = exp_file.readlines() + with open(actual_file_name, 'r') as act_file: + act_file_lines = act_file.readlines() + self.assertListEqual(act_file_lines, exp_file_lines) + def test_binding_energy_parse_poly_data_to_file_split_fit(self): + """Test to see if expected fit is done on the positive and negative subsets + after the file was read in. + """ + expected_file_name = "resources/water_poly_fit_split.csv" + actual_file_name = "actual_water_poly_fit_split.csv" + neg_set_list = ["-5.400solute", "-4.300solute", "-9.100solute", "-11.100solute", "-15.400solute"] + pos_set_list = ["0.500solute", "1.200solute", "7.200solute"] + poly_file_out = polynomialplotting.parse_binding_energy_poly_data_to_file_split_fit("resources/water.xml", [2, 4], + actual_file_name, pos_set_list, + neg_set_list) self.assertEqual(0, poly_file_out) with open(expected_file_name, 'r') as exp_file: exp_file_lines = exp_file.readlines() @@ -155,6 +241,20 @@ class PolynomialPlottingTestCase(unittest.TestCase): with open(actual_file_name, 'r') as act_file: act_file_lines = act_file.readlines() self.assertListEqual(act_file_lines, exp_file_lines) + def test_parse_free_energies_and_output_poly_data_subset(self): + """Test to see if expected_fit is done on a smaller subset of points. + """ + expected_file_name = "resources/water_poly_fit_subset.csv" + actual_file_name = "actual_water_poly_fit_subset.csv" + subset_list = ["-5.400solute", "-4.300solute", "-9.100solute", "-11.100solute", "-15.400solute"] + poly_file_out = polynomialplotting.parse_free_energies_and_output_poly_data_subset("resources/water_freeenergy.xml", + [2, 4], actual_file_name, subset_list) + self.assertEqual(0, poly_file_out) + with open(expected_file_name, 'r') as exp_file: + exp_file_lines = exp_file.readlines() + with open(actual_file_name, 'r') as act_file: + act_file_lines = act_file.readlines() + self.assertListEqual(act_file_lines, exp_file_lines) def test_parse_binding_energies_and_output_poly_data_subset(self): """Test to see if expected_fit is done on a smaller subset of points. """ @@ -168,6 +268,19 @@ class PolynomialPlottingTestCase(unittest.TestCase): with open(actual_file_name, 'r') as act_file: act_file_lines = act_file.readlines() self.assertListEqual(act_file_lines, exp_file_lines) + def test_parse_free_energies_and_output_poly_data(self): + """Test to see if expected polynomial fit information is outputted to file. + """ + expected_file_name = "resources/water_poly_fit.csv" + actual_file_name = "actual_water_poly_fit.csv" + poly_file_out = polynomialplotting.parse_free_energies_and_output_poly_data("resources/water_freeenergy.xml", + [2, 4], actual_file_name) + self.assertEqual(0, poly_file_out) + with open(expected_file_name, 'r') as exp_file: + exp_file_lines = exp_file.readlines() + with open(actual_file_name, 'r') as act_file: + act_file_lines = act_file.readlines() + self.assertListEqual(act_file_lines, exp_file_lines) def test_parse_binding_energies_and_output_poly_data(self): """Test to see if expected polynomial fit information is outputted to file. """ @@ -193,6 +306,11 @@ class PolynomialPlottingTestCase(unittest.TestCase): with open(actual_file_name, 'r') as act_file: act_file_lines = act_file.readlines() self.assertListEqual(act_file_lines, exp_file_lines) + def test_parse_free_energy_from_file_with_data_arrays(self): + """Test to see if expected datapoints are read in. + """ + actual_datapoints = polynomialplotting.parse_free_energy_from_file_with_data_arrays("resources/water_freeenergy.xml") + self.assertEqual(self.expected_free_energydatapoints, actual_datapoints) def test_parse_binding_energy_from_file_with_data_arrays(self): """Test to see if expected datapoints are read in. """ diff --git a/solventmapcreator/test/resources/water_freeenergy.xml b/solventmapcreator/test/resources/water_freeenergy.xml new file mode 100644 index 0000000000000000000000000000000000000000..bb8c15910d6d7452d2e01e8ced7ce16808991082 --- /dev/null +++ b/solventmapcreator/test/resources/water_freeenergy.xml @@ -0,0 +1,48 @@ +<?xml version='1.0' encoding='UTF-8'?> +<phase:EnergyValues xmlns:phase="http://www-hunter.ch.cam.ac.uk/PhaseSchema"> + <phase:FreeEnergyCollection><phase:FreeEnergy phase:moleculeID="-14.100solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>-34.8195308084233</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">-34.8195308084233</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="-5.400solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>-9.315464914185585</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">-9.315464914185585</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="-4.300solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>-6.170784422419204</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">-6.170784422419204</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="7.200solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>-25.844114335864262</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">-25.844114335864262</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="0.500solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>0.6588682955012527</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">0.6588682955012527</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="-0.100solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>0.9185830776018105</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">0.9185830776018105</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="-2.400solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>-1.4082598726557432</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">-1.4082598726557432</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="1.200solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>-0.4429837300591526</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">-0.4429837300591526</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="-9.100solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>-20.143787975013183</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">-20.143787975013183</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="-11.100solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>-26.013780093398356</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">-26.013780093398356</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + <phase:FreeEnergy phase:moleculeID="-15.400solute" phase:fromSolventID="" phase:toSolventID="water" phase:valueType="MOLEFRACTION"> + <phase:TotalEnergy>-38.635444108544405</phase:TotalEnergy> + <phase:EnergyContributions><phase:EnergyContribution phase:ssipID="1">-38.635444108544405</phase:EnergyContribution></phase:EnergyContributions> + </phase:FreeEnergy> + </phase:FreeEnergyCollection> +</phase:EnergyValues>