The Scientific Computing for Chemists course taught at Wabash College teaches chemistry students to use the Python programming language, Jupyter notebooks, and a number of common Python scientific libraries to process, analyze, and visualize data. Assuming no prior programming experience, the course introduces students to basic programming and applies these skills to solve a variety of chemical problems. The course is structured around Jupyter notebooks as easily shareable documents for lectures, homework sets, and projects; the software used in this course is free and open source, making it easily accessible to any school or research lab.
"The Jupyter Notebook is an open-source web application that allows you to create and share documents that contain live code, equations, visualizations and narrative text. Uses include: data cleaning and transformation, numerical simulation, statistical modeling, data visualization, machine learning, and much more."
Notebooks can be shared on GitHub. Check out my GitHub repository.
%lsmagic
%ls
Application of Hookes Law to vibrational frequency: $$\displaystyle \widetilde{\nu}={\frac{1}{2\pi c}}{\sqrt {\frac {k_{force}}{\mu}}} \tag{1}$$
Lennard-Jones potential: $$ LJ(r) = 4\epsilon[ {(\frac{\sigma}{r})}^{12} - {(\frac{\sigma}{r})}^{6} ] \tag{2} $$
%%bash
# Very basic bash script:
job="count" # the object that you want to toggle
num=10 # the max value of our loop
# Creating a list of elements
enu=(zero one two three four five six \
seven eight nine ten)
if [ $job == "count" ]; # if the job = "count", then...
then
for i in $(seq 0 $num);
do
if [ $i -eq 3 ] || [ $i -eq 7 ] ; # if i = 3 OR i = 7
then echo ${enu[i]}; # then print the ith element
else
echo $i;
fi
done
else
echo ${enu[$num]}; # If job ≠ count, then...
fi
import sympy #symbolic mathematics library in python
x,y,z = sympy.symbols('x,y,z') # creating our variables
f = (2 + x) * (3 + x) # creating our function
F = '(2 + x)(3 + x)' # string of the function
print(sympy.expand(f),sympy.solve(f))
print(sympy.solve(f))
print(sympy.diff(f,x)) # differentiate
print(sympy.integrate(f,x)) # integrate
sympy.init_printing() # pretty print
sympy.integrate(f,x)
%matplotlib inline
#matplotlib inline = have the plot render within the cell
import numpy as np # linear algebra/scientfic computing library
# from -50 to 50 with 22 evenly spaced points
x = np.linspace(-50,50,22)
print(len(x)) # print the length of the vector
print(x) # print the vector
y_ = [] # appending elements to this list
for i in range(0,len(x)):
y_.append(x[i]**3. /3. + 5.*x[i]**2. /2. + 6.*x[i])
y = np.array(y_) # convert the list into an array
# Lets plot the function:
from matplotlib import pyplot as plt
fig = plt.figure(figsize=(9,9)) # creating a figure object
ax = fig.add_subplot(211) # setting the size of the
# plot of x and y and label it:
ax.plot(x,y,label='1st integration of %s'%F)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.legend(loc='best')
fig.show()
# Import "plot" script of my own creation
from plots import simple_plot as sp
# call on the function simple_plot:
sp(Type='scatter',size=211,fig_size=(9,9),x=x,y=y,
xlabel='x',ylabel='y',color='k',invert_x_axis=False,
fit=True,order=3,name='integration_plot.png')
%%bash
# Lets print the first 5 lines of this script:
head -n 5 ./plots.py
import openpyxl # from excel to python
wb = openpyxl.load_workbook('HPLC.xlsx')
Sheet1 = wb.get_sheet_by_name('Sheet1')
peak = [Sheet1.cell(row=i, column=k).value for i in range(2, 12) for k in range(1,2)]
area = [Sheet1.cell(row=i, column=k).value for i in range(2, 12) for k in range(4,5)]
height = [Sheet1.cell(row=i, column=k).value for i in range(2, 12) for k in range(5,6)]
tR = [Sheet1.cell(row=i, column=k).value for i in range(2, 12) for k in range(2,3)]
vol = [Sheet1.cell(row=i, column=k).value for i in range(2, 12) for k in range(3,4)]
print peak
print area
print vol
print tR
print height
%%bash
cd /volumes/rdrive/Fall2017/Instrumental_Design/Project/
ffmpeg -i dance_of_the_goblins.mp3 dance_of_the_goblins.wav
import os
wd = '/Volumes/RMR_4TB/Undergrad_courses/Instrumental_Design/Project/'
os.chdir(wd+'ThinkDSP/code')
from __future__ import print_function, division
import thinkdsp
import thinkplot
import scipy.fftpack
from scipy.fftpack import fft as fft
from scipy.fftpack import ifft as ifft
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
os.chdir(wd)
wave = wd+'dance_of_the_goblins.wav'
response = thinkdsp.read_wave(wave)
start = 2.0
duration = response.duration/65.
response = response.segment(start=start, duration=duration)
response.shift(-start)
response.normalize()
print(duration,'seconds')
response.make_audio()
segment = response.segment(start=start,duration=duration)
segment.normalize()
data = segment.quantize(bound=True,dtype=float)
print('Number of data points = ',len(data))
# Waveform:
total_time = segment.duration+start
x = np.array([total_time/len(data)*i for i in range(0,len(data),1)])
y = [data[i] for i in range(0,len(data),1)]
# Fourier Transform
# Number of sample points
n = len(y) # len = length of a vector
d = 1/response.framerate
print('timestep = ',d)
FT_y = fft(y)
freq = np.fft.fftfreq(n,d)
xf,yf = [],[]
for i in range(0,len(FT_y)):
if freq[i] > 0 and FT_y[i] >0:
yf.append(FT_y[i])
xf.append(freq[i])
%pylab inline
from plots import onebytwo
x1,x2 = x,xf
y1,y2 = y,yf
x1label,x2label = 'Time,t/(seconds)','Frequency, $\\nu$ /(Hz)'
y1label,y2label = 'Signal Intensity','Signal Intensity'
Type1,Type2 = 'line','line'
fit1,fit2 = False,False
color = 'k'
order1,order2 = 1,1
name = 'FT.png'
onebytwo(Type1=Type1,Type2=Type2,x1=x1,y1=y1,x2=x2,y2=y2,
x1label=x1label,y1label=y1label,x2label=x2label,
y2label=y2label,name=name,color=color,fit1=fit1,fit2=fit2,
order1=order1,order2=order2,
invert_x1_axis=False,invert_x2_axis=False)
The above method treats each point in the average the same and only takes the average with the immediately adjacent data points. The triangular smooth approach weights the different data points. For example, if we take the average with five data points as described below.
def tri_smooth(array):
'''
(ndarray) -> ndarray
For an ndarray x, returns a new array xs as the weighted average of each point
The new array xs is shorter than x, len(xs) = len(x)-4.
'''
sum = array[:-4] + 2*array[1:-3] + 3*array[2:-2] + 2*array[3:-1] + array[4:]
array_smooth = sum/9
return(array_smooth)
from scipy.fftpack import *
from scipy.signal import *
# Weighted Moving average:
xs = tri_smooth(array=np.array(xf))
ys = tri_smooth(array=np.array(yf))
#
sfx = scipy.signal.savgol_filter(xf, window_length=55, polyorder=3)
sfy = scipy.signal.savgol_filter(yf, window_length=55, polyorder=3)
#x,y = scipy.ifft(X),scipy.ifft(Y)
%pylab inline
x1,x2 = xs,sfx
y1,y2 = ys,sfy
x1label,x2label = '','Frequency, $\\nu$ /(Hz)'
y1label,y2label = 'Signal Intensity','Signal Intensity'
Type1,Type2 = 'line','line'
fit1,fit2 = False,False
color = 'k'
order1,order2 = 1,1
name = 'FT.png'
onebytwo(Type1=Type1,Type2=Type2,x1=x1,y1=y1,x2=x2,y2=y2,
x1label=x1label,y1label=y1label,x2label=x2label,
y2label=y2label,name=name,color=color,fit1=fit1,fit2=fit2,
order1=order1,order2=order2,
invert_x1_axis=False,invert_x2_axis=False)
from IPython.core.display import display,HTML
# uncomment to render webpage in notebook
#display(HTML("https://en.wikipedia.org/wiki/Piano_key_frequencies"))
# Print the first 5000 frequencies, but only find the meaningful ones:
for i in range(0,len(yf)):
if xf[i] <= 5000:
#print(xf[i],yf[i])
if yf[i] >= 1500 and yf[i] <= np.max(yf):
print('High Intensity:',xf[i],yf[i])
elif yf[i] <= 750 and yf[i] >= 500:
print('Mid Intensity:',xf[i],yf[i])
Anaconda - "Anaconda is the world’s most popular Python data science platform". "Package Management: Manage packages, dependencies and environments with conda"
Numpy, Jupyter/IPython, Matplotlib, Scipy, SymPy, Scikit-image, Pandas
nbextensions - Jupyter Notebook extensions. This consists of line numbering,cell folding, various layouts, spell checking, etc.