import re
import os
import pandas as pd
import numpy as np
import warnings
%matplotlib inline
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
warnings.filterwarnings(action='ignore')
# loading same data as in Table X10 in the Supporting Information
inp = pd.read_csv('crystal-solvent_descriptors_delta_upd.txt', sep='\t', index_col=0)
inp.head()
# leaving only non-conformational polymorphs
inp = inp[inp['RMSD'] < 0.375]
inp.shape
# adding some data on crystal structures for interactive scatterplots
CRYSTAL_SUBSETS = r'.\..\1_polymorphs_subset_retrieval'
different_solvent_poly = pd.read_excel(os.path.join(CRYSTAL_SUBSETS, 'different_solvent_polymorphs.xlsx'))
dsp_data = pd.read_csv('DSC_full.txt', sep='\t')
delta_crystal_solvent_data = inp.reset_index()
def parse_data(string):
p1, f1, p2, f2 = string.split('_')
s1 = different_solvent_poly[different_solvent_poly['REFCODE-solvent'] == p1]['solvent'].values[0]
s2 = different_solvent_poly[different_solvent_poly['REFCODE-solvent'] == p2]['solvent'].values[0]
p1 = p1.split('-')[0]
p2 = p2.split('-')[0]
return [' {} / {}'.format(p1, p2),
' {} / {}'.format(f1.strip(')('), f2.strip(')(')),
' {} / {}'.format(s1, s2)]
def binarize_value(series):
return ['more_polar' if v > series.median() else 'less_polar' for v in series]
def hb_possible(string):
p1, f1, p2, f2 = string.split('_')
hbp1 = dsp_data[dsp_data['REFCODE-solvent'] == p1]['HB_present'].values[0]
hbp2 = dsp_data[dsp_data['REFCODE-solvent'] == p2]['HB_present'].values[0]
return ' True' if max(hbp1, hbp2) else ' False'
def get_name(string):
p1, f1, p2, f2 = string.split('_')
name1 = dsp_data[dsp_data['REFCODE-solvent'] == p1]['[_chemical_name_systematic]'].values[0]
name2 = dsp_data[dsp_data['REFCODE-solvent'] == p2]['[_chemical_name_systematic]'].values[0]
return name1 if len(name1) < len(name2) else name2
delta_crystal_solvent_data['data'] = delta_crystal_solvent_data['index'].apply(parse_data)
delta_crystal_solvent_data['refcodes'] = delta_crystal_solvent_data['data'].apply(lambda x: x[0])
delta_crystal_solvent_data['polymorphs'] = delta_crystal_solvent_data['data'].apply(lambda x: x[1])
delta_crystal_solvent_data['solvents'] = delta_crystal_solvent_data['data'].apply(lambda x: x[2])
delta_crystal_solvent_data['HB_present'] = delta_crystal_solvent_data['index'].apply(hb_possible)
delta_crystal_solvent_data['name'] = delta_crystal_solvent_data['index'].apply(get_name)
delta_crystal_solvent_data['RPSA_large'] = binarize_value(delta_crystal_solvent_data['cm_mean_RPSA'])
delta_crystal_solvent_data['dipole_large'] = binarize_value(delta_crystal_solvent_data['cm_mean_dipole'])
# function that makes scatterplots
def plotly_scatter(X, Y, cm_prop, colouring):
delta_crystal_solvent_data.rename(columns={'cm_mean_RPSA': 'mean_RPSA',
'cm_mean_dipole': 'mean_dipole',
X: '\u0394%s' % X,
Y: '\u0394%s' % Y},
inplace=True)
cm_prop_new = cm_prop.replace('cm_', '')
X_new = '\u0394%s' % X
Y_new = '\u0394%s' % Y
fig = px.scatter(delta_crystal_solvent_data,
x=X_new, y=Y_new,
color=colouring,
color_discrete_map={'more_polar': 'darkred', 'less_polar': 'navy'},
size=delta_crystal_solvent_data[cm_prop_new].fillna(0),
hover_name='name',
hover_data={'refcodes': True,
'polymorphs': True,
'solvents': True,
cm_prop_new: ':.3f',
colouring: False,
X_new: ':.3f',
Y_new: ':.3f'},
width=550, height=500
)
fig.update_traces(marker=dict(opacity=0.6,
line=dict(width=1.5, color='black')),
selector=dict(mode='markers')
)
fig.update_xaxes(zeroline=True, zerolinewidth=1.5, zerolinecolor='black')
fig.update_yaxes(zeroline=True, zerolinewidth=1.5, zerolinecolor='black')
fig.update_layout(
xaxis=dict(title_text=X_new, title_font=dict(size=20), tickfont=dict(size=17)),
yaxis=dict(title_text=Y_new, title_font=dict(size=20), tickfont=dict(size=17))
)
return fig
Polarity as assesed by RPSA decsriptor
fig = make_subplots(rows=4, cols=2)
for i, (X, Y) in enumerate(
(
('CN_mean', 'TopoPSA'), ('CN_mean', 'dipole moment, D'),
('packing_coefficient', 'nHBAcc'), ('TD10_mean', 'TopoPSA'),
('packing_coefficient', 'dipole moment, D'), ('CN_mean', 'nHBAcc'),
('packing_coefficient', 'TopoPSA'), ('QC_px', 'TopoPSA')
)
):
cm_prop = 'cm_mean_RPSA'
colouring = 'RPSA_large'
px_fig = plotly_scatter(X, Y, cm_prop, colouring)
trace1 = px_fig['data'][0]
trace2 = px_fig['data'][1]
r, c = i%4 + 1, i//4 + 1
fig.add_trace(trace1, row=r, col=c)
fig.add_trace(trace2, row=r, col=c)
fig.update_xaxes(title='\u0394%s' % X.replace('_px', ''), title_font=dict(size=18), tickfont=dict(size=16),
zeroline=True, zerolinewidth=1.5, zerolinecolor='black',
row=r, col=c)
fig.update_yaxes(title='\u0394%s' % Y, title_font=dict(size=18), tickfont=dict(size=16),
zeroline=True, zerolinewidth=1.5, zerolinecolor='black',
row=r, col=c)
fig.update_layout(width=950, height=1900)
# fig.write_html('./pics/Fig.i4_RPSA.html')
Polarity as assesed by molecualr dipole moment
fig = make_subplots(rows=4, cols=2)
for i, (X, Y) in enumerate(
(
('CN_mean', 'TopoPSA'), ('CN_mean', 'dipole moment, D'),
('packing_coefficient', 'nHBAcc'), ('TD10_mean', 'TopoPSA'),
('packing_coefficient', 'dipole moment, D'), ('CN_mean', 'nHBAcc'),
('packing_coefficient', 'TopoPSA'), ('QC_px', 'TopoPSA')
)
):
cm_prop = 'cm_mean_dipole'
colouring = 'dipole_large'
px_fig = plotly_scatter(X, Y, cm_prop, colouring)
trace1 = px_fig['data'][0]
trace2 = px_fig['data'][1]
r, c = i%4 + 1, i//4 + 1
fig.add_trace(trace1, row=r, col=c)
fig.add_trace(trace2, row=r, col=c)
fig.update_xaxes(title='\u0394%s' % X.replace('_px', ''), title_font=dict(size=18), tickfont=dict(size=16),
zeroline=True, zerolinewidth=1.5, zerolinecolor='black',
row=r, col=c)
fig.update_yaxes(title='\u0394%s' % Y, title_font=dict(size=18), tickfont=dict(size=16),
zeroline=True, zerolinewidth=1.5, zerolinecolor='black',
row=r, col=c)
fig.update_layout(width=950, height=1900)
# fig.write_html('./pics/Fig.i4_dipole.html')
Clearly, more pronounced response on solvent polarity change is seen for more polar compounds. On the contrary, for less polar compounds there is almost no feedback on solvent polarity change