FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects
Commit 6d4fad13 authored by Mark Driver's avatar Mark Driver
Browse files

add similarityanalysis and unimplemented tests.

parent 228c9eab
No related branches found
No related tags found
No related merge requests found
......@@ -15,14 +15,193 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Created on Tue Jan 7 16:34:17 2020
Script to carry out similarity analysis.
@author: Mark
"""
import logging
import pandas
import pathlib
import puresolventinformation.information as pureinf
import solventmapcreator.polynomialanalysis.multidimensionclustering as multicluster
import phasecalculator.io.polynomialio as polyio
logging.basicConfig()
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.WARN)
def read_polynomial_files(poly_filename_list, **kwargs):
"""Parse each file, and then returns the dictionary of information, by solvent ID.
Parameters
----------
poly_filename_list : list
list of polynomial file names.
suffix : str, optional
polynomial file suffix.
temperature_dir : bool, optional
if files are in temperature based directories, indicates to include
the preceding directory name in solvent ID to be able to distiguish
the same solvent composition at different temperatures.
Returns
-------
dict
dict of polynomial data by solvent ID.
"""
return polyio.read_poly_data_to_dict(poly_filename_list, **kwargs)
PURE_SOLVENT_INFO = read_polynomial_files(pureinf.get_polynomial_filenames(), suffix="free_poly_split_fit.csv")
def compare_all_solvents(poly_filename_list, output_dir, output_filename):
"""
Parameters
----------
poly_filename_list : TYPE
DESCRIPTION.
output_dir : TYPE
DESCRIPTION.
output_filename : TYPE
DESCRIPTION.
Returns
-------
None.
"""
poly_data_by_id = read_polynomial_files(poly_filename_list,
suffix="free_poly_split_fit.csv",
temperature_dir=True)
sim_dframe = calculate_similarity_matrix({**poly_data_by_id, **PURE_SOLVENT_INFO})
output_filename = (pathlib.Path(output_dir) / output_filename).as_posix()
write_frame_to_file(sim_dframe, output_filename)
def compare_with_pure_solvents(poly_filename_list, output_dir):
"""
Parameters
----------
poly_filename_list : TYPE
DESCRIPTION.
output_dir : TYPE
DESCRIPTION.
Returns
-------
None.
"""
poly_data_by_id = read_polynomial_files(poly_filename_list,
suffix="free_poly_split_fit.csv",
temperature_dir=True)
sim_dframe = calculate_similarity_matrix({**poly_data_by_id, **PURE_SOLVENT_INFO})
for solv_id in poly_data_by_id.keys():
output_filename = create_slice_output_filename(solv_id, output_dir)
similarity_slice = extract_comparison_to_pure_solvents(sim_dframe, solv_id)
write_frame_to_file(similarity_slice, output_filename)
def compare_solvent_list(poly_filename_list, output_dir, output_filename):
"""
Parameters
----------
poly_filename_list : TYPE
DESCRIPTION.
output_dir : TYPE
DESCRIPTION.
output_filename : TYPE
DESCRIPTION.
Returns
-------
None.
"""
poly_data_by_id = read_polynomial_files(poly_filename_list,
suffix="free_poly_split_fit.csv",
temperature_dir=True)
sim_dframe = calculate_similarity_matrix(poly_data_by_id)
output_filename = (pathlib.Path(output_dir) / output_filename).as_posix()
write_frame_to_file(sim_dframe, output_filename)
def create_slice_output_filename(solv_id, output_dir):
"""
Parameters
----------
solv_id : TYPE
DESCRIPTION.
output_dir : TYPE
DESCRIPTION.
Returns
-------
TYPE
DESCRIPTION.
"""
filename = solv_id.replace(",","_") + ".csv"
return (pathlib.Path(output_dir) / filename).as_posix()
def write_frame_to_file(dframe, output_filename):
"""
Parameters
----------
dframe : TYPE
DESCRIPTION.
output_filename : TYPE
DESCRIPTION.
Returns
-------
None.
"""
dframe.to_csv(output_filename, sep="\t")
def extract_comparison_to_pure_solvents(similarity_dframe, solv_id):
"""
Parameters
----------
similarity_dframe : TYPE
DESCRIPTION.
solv_id : TYPE
DESCRIPTION.
Returns
-------
TYPE
DESCRIPTION.
"""
return similarity_dframe.loc[solv_id, PURE_SOLVENT_INFO.keys()].sort()
def calculate_similarity_matrix(poly_data_by_id):
"""
Parameters
----------
poly_data_by_id : TYPE
DESCRIPTION.
Returns
-------
TYPE
DESCRIPTION.
"""
x_values_list, norm_coefficients = pureinf.get_similarity_domain_information()
stacked_matrix, norm_coeffs = multicluster.generate_normalised_rmsd_matrix_stack(
x_values_list, 8, poly_data_by_id, norm_coefficients=norm_coefficients
)
mean_matrix = multicluster.condense_matrix_to_mean(stacked_matrix)
labels = sorted(poly_data_by_id.keys())
return pandas.DataFrame(mean_matrix,index=labels, columns=labels)
# -*- coding: utf-8 -*-
# phasecalculator calculates FGIPs, solvent similarity and VLE with SSIMPLE.
# Copyright (C) 2019 Mark D. Driver
#
# phasecalculator is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Script for similarity analysis tests.
@author: Mark
"""
import logging
import unittest
import pandas
import pathlib
import numpy as np
import phasecalculator.analysis.similarityanalysis as simanalysis
logging.basicConfig()
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.WARN)
class SimilarityAnalysisTestCase(unittest.TestCase):
"""Test case for similarity analysis."""
def setUp(self):
"""Set up before tests.
Returns
-------
None.
"""
def tearDown(self):
"""Clean up after tests.
Returns
-------
None.
"""
def test_read_polynomial_files(self):
"""Test
Returns
-------
None.
"""
self.fail("not implemented")
def test_compare_all_solvents(self):
"""Test
Returns
-------
None.
"""
self.fail("not implemented")
def test_compare_with_pure_solvents(self):
"""Test
Returns
-------
None.
"""
self.fail("not implemented")
def test_compare_solvent_list(self):
"""Test
Returns
-------
None.
"""
self.fail("not implemented")
def test_write_frame_to_file(self):
"""Test
Returns
-------
None.
"""
self.fail("not implemented")
def test_extract_comparison_to_pure_solvents(self):
"""Test
Returns
-------
None.
"""
self.fail("not implemented")
def test_calculate_similarity_matrix(self):
"""Test
Returns
-------
None.
"""
self.fail("not implemented")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment