In [1]:
import re
import os
import pandas as pd
import numpy as np
import warnings

%matplotlib inline
In [2]:
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
In [3]:
warnings.filterwarnings(action='ignore')
In [4]:
# loading same data as in Table X10 in the Supporting Information
inp = pd.read_csv('crystal-solvent_descriptors_delta_upd.txt', sep='\t', index_col=0)
In [5]:
inp.head()
Out[5]:
HB_possible maxZprime RMSD match_15 cm_mean_RPSA cm_mean_dipole Zprime packing_coefficient TD10_mean CN_mean ... Hildebrand solubility parameter Boiling point, oC Freezing point, oC logP_experimental surface tension, 25oC, mN/m thermal conductivity, W*m-1*K-1 dipole moment, D viscosity, mPa*s dielectric constant Polarizability, angstrom3
ABUDAD01_(monoclinic I)_ABUDAD_(monoclinic II) True 1.0 0.0792 1 0.153195 4.129650 0.0 0.006080 0.0 0.0 ... -9.4 -8.0 3.0 0.58 1.388704 -0.0394 1.25 -0.24 -12.24 3.1
ACIGID_(I)_ACIGID01_(II) True 1.0 0.0310 10 0.144616 4.421800 0.0 0.005960 0.0 0.0 ... 7.8 -32.6 -19.0 -3.01 -5.957542 0.0358 1.34 0.53 22.57 -7.2
ACSALA21-ethanol_(I)_ACSALA13_(II) True 1.0 0.0324 11 0.373401 2.448667 0.0 -0.001891 62.0 0.0 ... -1.7 3.6 70.0 -0.02 6.686611 0.0196 1.80 -0.74 11.00 -0.7
ACSALA21-acetone_(I)_ACSALA13_(II) True 1.0 0.0324 11 0.373401 2.448667 0.0 -0.001891 62.0 0.0 ... 4.1 25.6 51.0 -0.10 5.193754 0.0272 0.65 0.04 14.64 -2.0
ADIPAC13-acetic acid_(I)_ADIPAC05_(II) True 1.5 0.0622 15 0.622739 0.012033 1.0 -0.007670 -305.0 -0.5 ... -2.1 -41.0 -101.0 0.97 -3.724239 -0.0154 0.10 -0.69 -0.38 3.6

5 rows × 80 columns

In [6]:
# leaving only non-conformational polymorphs
inp = inp[inp['RMSD'] < 0.375]
In [7]:
inp.shape
Out[7]:
(190, 80)

plotly

In [8]:
# adding some data on crystal structures for interactive scatterplots
CRYSTAL_SUBSETS = r'.\..\1_polymorphs_subset_retrieval'

different_solvent_poly = pd.read_excel(os.path.join(CRYSTAL_SUBSETS, 'different_solvent_polymorphs.xlsx'))
dsp_data = pd.read_csv('DSC_full.txt', sep='\t')
In [9]:
delta_crystal_solvent_data = inp.reset_index()
In [10]:
def parse_data(string):

    p1, f1, p2, f2 = string.split('_')

    s1 = different_solvent_poly[different_solvent_poly['REFCODE-solvent'] == p1]['solvent'].values[0]
    s2 = different_solvent_poly[different_solvent_poly['REFCODE-solvent'] == p2]['solvent'].values[0]

    p1 = p1.split('-')[0]
    p2 = p2.split('-')[0]

    return [' {} / {}'.format(p1, p2),
            ' {} / {}'.format(f1.strip(')('), f2.strip(')(')),
            ' {} / {}'.format(s1, s2)]
In [11]:
def binarize_value(series):
    return ['more_polar' if v > series.median() else 'less_polar' for v in series]
In [12]:
def hb_possible(string):
    p1, f1, p2, f2 = string.split('_')

    hbp1 = dsp_data[dsp_data['REFCODE-solvent'] == p1]['HB_present'].values[0]
    hbp2 = dsp_data[dsp_data['REFCODE-solvent'] == p2]['HB_present'].values[0]
    
    return ' True' if max(hbp1, hbp2) else ' False'
In [13]:
def get_name(string):
    p1, f1, p2, f2 = string.split('_')

    name1 = dsp_data[dsp_data['REFCODE-solvent'] == p1]['[_chemical_name_systematic]'].values[0]
    name2 = dsp_data[dsp_data['REFCODE-solvent'] == p2]['[_chemical_name_systematic]'].values[0]

    return name1 if len(name1) < len(name2) else name2
In [14]:
delta_crystal_solvent_data['data'] = delta_crystal_solvent_data['index'].apply(parse_data)

delta_crystal_solvent_data['refcodes'] = delta_crystal_solvent_data['data'].apply(lambda x: x[0])
delta_crystal_solvent_data['polymorphs'] = delta_crystal_solvent_data['data'].apply(lambda x: x[1])
delta_crystal_solvent_data['solvents'] = delta_crystal_solvent_data['data'].apply(lambda x: x[2])

delta_crystal_solvent_data['HB_present'] = delta_crystal_solvent_data['index'].apply(hb_possible)
delta_crystal_solvent_data['name'] = delta_crystal_solvent_data['index'].apply(get_name)

delta_crystal_solvent_data['RPSA_large'] = binarize_value(delta_crystal_solvent_data['cm_mean_RPSA'])
delta_crystal_solvent_data['dipole_large'] = binarize_value(delta_crystal_solvent_data['cm_mean_dipole'])
In [15]:
# function that makes scatterplots
def plotly_scatter(X, Y, cm_prop, colouring):
    
    delta_crystal_solvent_data.rename(columns={'cm_mean_RPSA': 'mean_RPSA',
                                              'cm_mean_dipole': 'mean_dipole',
                                              X: '\u0394%s' % X,
                                              Y: '\u0394%s' % Y},
                                      inplace=True)

    cm_prop_new = cm_prop.replace('cm_', '')
    X_new = '\u0394%s' % X
    Y_new = '\u0394%s' % Y

    fig = px.scatter(delta_crystal_solvent_data,
                     x=X_new, y=Y_new,
                     color=colouring,
                     color_discrete_map={'more_polar': 'darkred', 'less_polar': 'navy'},
                     size=delta_crystal_solvent_data[cm_prop_new].fillna(0),
                     hover_name='name',
                     hover_data={'refcodes': True,
                                 'polymorphs': True,
                                 'solvents': True,
                                 cm_prop_new: ':.3f',
                                 colouring: False,
                                 X_new: ':.3f',
                                 Y_new: ':.3f'},
                     width=550, height=500
                    )

    fig.update_traces(marker=dict(opacity=0.6,
                                  line=dict(width=1.5, color='black')),
                      selector=dict(mode='markers')
                     )

    fig.update_xaxes(zeroline=True, zerolinewidth=1.5, zerolinecolor='black')
    fig.update_yaxes(zeroline=True, zerolinewidth=1.5, zerolinecolor='black')

    fig.update_layout(
        xaxis=dict(title_text=X_new, title_font=dict(size=20), tickfont=dict(size=17)),
        yaxis=dict(title_text=Y_new, title_font=dict(size=20), tickfont=dict(size=17))
                    )

    return fig

Dependence of the correlation strength on the polarity of compounds

Polarity as assesed by RPSA decsriptor

In [16]:
fig = make_subplots(rows=4, cols=2)

for i, (X, Y) in enumerate(
                            (
                                 ('CN_mean', 'TopoPSA'), ('CN_mean', 'dipole moment, D'),
                                 ('packing_coefficient', 'nHBAcc'), ('TD10_mean', 'TopoPSA'),
                                 ('packing_coefficient', 'dipole moment, D'), ('CN_mean', 'nHBAcc'),
                                 ('packing_coefficient', 'TopoPSA'), ('QC_px', 'TopoPSA')
                            )
                          ):

    cm_prop = 'cm_mean_RPSA'
    colouring = 'RPSA_large'

    px_fig = plotly_scatter(X, Y, cm_prop, colouring)
    trace1 = px_fig['data'][0]
    trace2 = px_fig['data'][1]
    
    r, c = i%4 + 1, i//4 + 1
    fig.add_trace(trace1, row=r, col=c)
    fig.add_trace(trace2, row=r, col=c)

    fig.update_xaxes(title='\u0394%s' % X.replace('_px', ''), title_font=dict(size=18), tickfont=dict(size=16),
                     zeroline=True, zerolinewidth=1.5, zerolinecolor='black',
                     row=r, col=c)
    fig.update_yaxes(title='\u0394%s' % Y, title_font=dict(size=18), tickfont=dict(size=16),
                     zeroline=True, zerolinewidth=1.5, zerolinecolor='black',
                     row=r, col=c)

fig.update_layout(width=950, height=1900)
# fig.write_html('./pics/Fig.i4_RPSA.html')

Polarity as assesed by molecualr dipole moment

In [17]:
fig = make_subplots(rows=4, cols=2)

for i, (X, Y) in enumerate(
                            (
                                 ('CN_mean', 'TopoPSA'), ('CN_mean', 'dipole moment, D'),
                                 ('packing_coefficient', 'nHBAcc'), ('TD10_mean', 'TopoPSA'),
                                 ('packing_coefficient', 'dipole moment, D'), ('CN_mean', 'nHBAcc'),
                                 ('packing_coefficient', 'TopoPSA'), ('QC_px', 'TopoPSA')
                            )
                          ):

    cm_prop = 'cm_mean_dipole'
    colouring = 'dipole_large'

    px_fig = plotly_scatter(X, Y, cm_prop, colouring)
    trace1 = px_fig['data'][0]
    trace2 = px_fig['data'][1]
    
    r, c = i%4 + 1, i//4 + 1
    fig.add_trace(trace1, row=r, col=c)
    fig.add_trace(trace2, row=r, col=c)

    fig.update_xaxes(title='\u0394%s' % X.replace('_px', ''), title_font=dict(size=18), tickfont=dict(size=16),
                     zeroline=True, zerolinewidth=1.5, zerolinecolor='black',
                     row=r, col=c)
    fig.update_yaxes(title='\u0394%s' % Y, title_font=dict(size=18), tickfont=dict(size=16),
                     zeroline=True, zerolinewidth=1.5, zerolinecolor='black',
                     row=r, col=c)

fig.update_layout(width=950, height=1900)
# fig.write_html('./pics/Fig.i4_dipole.html')

Clearly, more pronounced response on solvent polarity change is seen for more polar compounds. On the contrary, for less polar compounds there is almost no feedback on solvent polarity change