diff --git a/solventmapcreator/clusteringanalysis/clustercalculation.py b/solventmapcreator/clusteringanalysis/clustercalculation.py new file mode 100644 index 0000000000000000000000000000000000000000..09a47dbeeeb70100c54d21a93ae8bb393294e2bb --- /dev/null +++ b/solventmapcreator/clusteringanalysis/clustercalculation.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script for running the clustering analysis on a matrix. + +@author: mark +""" + +import logging +import scipy.cluster.hierarchy as cluster + + +logging.basicConfig() +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(logging.WARN) + +def ward_clustering(triangular_distance_matrix): + """This calculates the ward linkage for clustering. + """ + return calculate_clustering(triangular_distance_matrix, "ward") + +def wpgmc_clustering(triangular_distance_matrix): + """This calculates the median/WPGMC linkage for clustering. + """ + return calculate_clustering(triangular_distance_matrix, "median") + +def upgmc_clustering(triangular_distance_matrix): + """This calculates the centroid/UPGMC linkage for clustering. + """ + return calculate_clustering(triangular_distance_matrix, "centroid") + +def wpgma_clustering(triangular_distance_matrix): + """This calculates the weighted/WPGMA linkage for clustering. + """ + return calculate_clustering(triangular_distance_matrix,"weighted") + +def upgma_clustering(triangular_distance_matrix): + """This calculates the average/UPGMA linkage for clustering. + """ + return calculate_clustering(triangular_distance_matrix, "average") + +def max_clustering(triangular_distance_matrix): + """This calculates the complete/max/farthest linkage for clustering. + """ + return calculate_clustering(triangular_distance_matrix, "complete") + +def min_clustering(triangular_distance_matrix): + """This calculates the single/min/nearest linkage for clustering + """ + return calculate_clustering(triangular_distance_matrix, "single") + +def calculate_clustering(triangular_distance_matrix, method): + """This calculates the clustering for the given input method. + """ + return cluster.linkage(triangular_distance_matrix, method=method) diff --git a/solventmapcreator/test/clusteringanalysistest/clustercalculationtest.py b/solventmapcreator/test/clusteringanalysistest/clustercalculationtest.py new file mode 100644 index 0000000000000000000000000000000000000000..0dc3ed8694faa2bca89b98925d9fcb02716be53e --- /dev/null +++ b/solventmapcreator/test/clusteringanalysistest/clustercalculationtest.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script for testing clustering analysis. + +@author: mark +""" + +import logging +import numpy as np +import unittest +import solventmapcreator.clusteringanalysis.clustercalculation as clustercalculation + +logging.basicConfig() +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(logging.WARN) + +class ClusterCalculationTestCase(unittest.TestCase): + """Test case for cluster calculation script. + """ + def setUp(self): + """Set up before tests. Using example matrix from the wikipedia page for the UPGMA. + https://en.wikipedia.org/wiki/UPGMA. + """ + self.example_distance_matrix = np.array([[0, 17, 21, 31, 23], + [0, 0, 30, 34, 21], + [0, 0, 0, 28, 39], + [0, 0, 0, 0, 43], + [0, 0, 0, 0, 0]]) + def tearDown(self): + """Clean up after test. + """ + del self.example_distance_matrix + def test_ward_clustering(self): + """Test to see if expected clustering matrix is produced for + Ward linkages. + """ + expected_linkage_matrix = np.array([[0. , 1. , 19.570386, 2. ], + [2. , 3. , 28.284271, 2. ], + [4. , 6. , 50.02666 , 3. ], + [5. , 7. , 55.590767, 5. ]]) + actual_matrix = clustercalculation.ward_clustering(self.example_distance_matrix) + np.testing.assert_array_almost_equal(expected_linkage_matrix, actual_matrix) + def test_wpgmc_clustering(self): + """Test to see if expected clustering matrix is produced for + WPGMC linkages. + """ + expected_linkage_matrix = np.array([[0., 1., 19.570386, 2.], + [2., 3., 28.284271, 2.], + [5., 6., 37.759105, 4.], + [4., 7., 41.393689, 5.]]) + actual_matrix = clustercalculation.wpgmc_clustering(self.example_distance_matrix) + np.testing.assert_array_almost_equal(expected_linkage_matrix, actual_matrix) + def test_upgmc_clustering(self): + """Test to see if expected clustering matrix is produced for + UPGMC linkages. + """ + expected_linkage_matrix = np.array([[0. , 1. , 19.570386, 2. ], + [2. , 3. , 28.284271, 2. ], + [5. , 6. , 37.759105, 4. ], + [4. , 7. , 41.393689, 5. ]]) + actual_matrix = clustercalculation.upgmc_clustering(self.example_distance_matrix) + np.testing.assert_array_almost_equal(expected_linkage_matrix, actual_matrix) + def test_wpgma_clustering(self): + """Test to see if expected clustering matrix is produced for + average/UPGMA linkages. + """ + expected_linkage_matrix = np.array([[0., 1., 19.570386, 2.], + [2., 3., 28.284271, 2.], + [5., 6., 40.791491, 4.], + [4., 7., 47.024321, 5.]]) + actual_matrix = clustercalculation.wpgma_clustering(self.example_distance_matrix) + np.testing.assert_array_almost_equal(expected_linkage_matrix, actual_matrix) + def test_upgma_clustering(self): + """Test to see if expected clustering matrix is produced for + average/UPGMA linkages. + """ + expected_linkage_matrix = np.array([[0., 1., 19.570386, 2.], + [2., 3., 28.284271, 2.], + [5., 6., 40.791491, 4.], + [4., 7., 47.024321, 5.]]) + actual_matrix = clustercalculation.upgma_clustering(self.example_distance_matrix) + np.testing.assert_array_almost_equal(expected_linkage_matrix, actual_matrix) + def test_max_clustering(self): + """Test to see if expected clustering matrix is produced for + maximum linkages. + """ + expected_linkage_matrix = np.array([[ 0. , 1. , 19.570386, 2. ], + [ 2. , 3. , 28.284271, 2. ], + [ 4. , 6. , 48.010416, 3. ], + [ 5. , 7. , 50.398413, 5. ]]) + actual_matrix = clustercalculation.max_clustering(self.example_distance_matrix) + np.testing.assert_array_almost_equal(expected_linkage_matrix, actual_matrix) + def test_min_clustering(self): + """Test to see if expected clustering matrix is produced for + minimum linkages. + """ + expected_linkage_matrix = np.array([[0., 1. , 19.570386, 2. ], + [2., 3. , 28.284271, 2. ], + [5., 6. , 31.543621, 4. ], + [4., 7. , 43. , 5. ]]) + actual_matrix = clustercalculation.min_clustering(self.example_distance_matrix) + np.testing.assert_array_almost_equal(expected_linkage_matrix, actual_matrix) + def test_calculate_clustering(self): + """Test to see if expected clustering matrix is produced. + """ + expected_linkage_matrix = np.array([[0., 1., 19.570386, 2.], + [2., 3., 28.284271, 2.], + [5., 6., 40.791491, 4.], + [4., 7., 47.024321, 5.]]) + actual_matrix = clustercalculation.calculate_clustering(self.example_distance_matrix, "average") + np.testing.assert_array_almost_equal(expected_linkage_matrix, actual_matrix) \ No newline at end of file diff --git a/solventmapcreator/test/solvationmapcreatortests.py b/solventmapcreator/test/solvationmapcreatortests.py index 4587d2ce6b7994c038988deb0e1197c7e677bcf0..3b8058ee33cb36e45088b70b030bd2e5b18d5640 100644 --- a/solventmapcreator/test/solvationmapcreatortests.py +++ b/solventmapcreator/test/solvationmapcreatortests.py @@ -22,6 +22,7 @@ from solventmapcreator.test.polynomialanalysistest.polynomialdataanalysistest im from solventmapcreator.test.polynomialanalysistest.polynomialcomparisontest import PolynomialComparisonTestCase from solventmapcreator.test.polynomialanalysistest.polynomialplottingtest import PolynomialPlottingTestCase from solventmapcreator.test.clusteringanalysistest.matrixinputtest import MatrixInputTestCase +from solventmapcreator.test.clusteringanalysistest.clustercalculationtest import ClusterCalculationTestCase logging.basicConfig() LOGGER = logging.getLogger(__name__) @@ -40,7 +41,7 @@ POLYNOMIAL_ANALYSIS_TEST_CASES = [PolynomialDataAnalysisTestCase, PolynomialComparisonTestCase, PolynomialPlottingTestCase] -CLUSTERING_ANALYSIS_TEST_CASES = [MatrixInputTestCase] +CLUSTERING_ANALYSIS_TEST_CASES = [MatrixInputTestCase, ClusterCalculationTestCase] def test_suite(): """Function creates a test suite and then loads all the tests from the