Interactions and ANOVA¶
Note: This script is based heavily on Jonathan Taylor’s class notes http://www.stanford.edu/class/stats191/interactions.html
Download and format data:
[1]:
%matplotlib inline
[2]:
from urllib.request import urlopen
import numpy as np
np.set_printoptions(precision=4, suppress=True)
import pandas as pd
pd.set_option("display.width", 100)
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.graphics.api import interaction_plot, abline_plot
from statsmodels.stats.anova import anova_lm
try:
salary_table = pd.read_csv('salary.table')
except: # recent pandas can read URL without urlopen
url = 'http://stats191.stanford.edu/data/salary.table'
fh = urlopen(url)
salary_table = pd.read_table(fh)
salary_table.to_csv('salary.table')
E = salary_table.E
M = salary_table.M
X = salary_table.X
S = salary_table.S
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-2-132ce9110058> in <module>
12 try:
---> 13 salary_table = pd.read_csv('salary.table')
14 except: # recent pandas can read URL without urlopen
/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
684
--> 685 return _read(filepath_or_buffer, kwds)
686
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
456 # Create the parser.
--> 457 parser = TextFileReader(fp_or_buf, **kwds)
458
/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
894
--> 895 self._make_engine(self.engine)
896
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
1134 if engine == "c":
-> 1135 self._engine = CParserWrapper(self.f, **self.options)
1136 else:
/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1916
-> 1917 self._reader = parsers.TextReader(src, **kwds)
1918 self.unnamed_cols = self._reader.unnamed_cols
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()
FileNotFoundError: [Errno 2] File b'salary.table' does not exist: b'salary.table'
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1318 h.request(req.get_method(), req.selector, req.data, headers,
-> 1319 encode_chunked=req.has_header('Transfer-encoding'))
1320 except OSError as err: # timeout error
/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
1251 """Send a complete request to the server."""
-> 1252 self._send_request(method, url, body, headers, encode_chunked)
1253
/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1297 body = _encode(body, 'body')
-> 1298 self.endheaders(body, encode_chunked=encode_chunked)
1299
/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
1246 raise CannotSendHeader()
-> 1247 self._send_output(message_body, encode_chunked=encode_chunked)
1248
/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
/usr/lib/python3.7/http/client.py in send(self, data)
965 if self.auto_open:
--> 966 self.connect()
967 else:
/usr/lib/python3.7/http/client.py in connect(self)
937 self.sock = self._create_connection(
--> 938 (self.host,self.port), self.timeout, self.source_address)
939 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
727 try:
--> 728 raise err
729 finally:
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
715 sock.bind(source_address)
--> 716 sock.connect(sa)
717 # Break explicitly a reference cycle
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-2-132ce9110058> in <module>
14 except: # recent pandas can read URL without urlopen
15 url = 'http://stats191.stanford.edu/data/salary.table'
---> 16 fh = urlopen(url)
17 salary_table = pd.read_table(fh)
18 salary_table.to_csv('salary.table')
/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
523 req = meth(req)
524
--> 525 response = self._open(req, data)
526
527 # post-process response
/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
541 protocol = req.type
542 result = self._call_chain(self.handle_open, protocol, protocol +
--> 543 '_open', req)
544 if result:
545 return result
/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
/usr/lib/python3.7/urllib/request.py in http_open(self, req)
1345
1346 def http_open(self, req):
-> 1347 return self.do_open(http.client.HTTPConnection, req)
1348
1349 http_request = AbstractHTTPHandler.do_request_
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1319 encode_chunked=req.has_header('Transfer-encoding'))
1320 except OSError as err: # timeout error
-> 1321 raise URLError(err)
1322 r = h.getresponse()
1323 except:
URLError: <urlopen error [Errno 111] Connection refused>
Take a look at the data:
[3]:
plt.figure(figsize=(6,6))
symbols = ['D', '^']
colors = ['r', 'g', 'blue']
factor_groups = salary_table.groupby(['E','M'])
for values, group in factor_groups:
i,j = values
plt.scatter(group['X'], group['S'], marker=symbols[j], color=colors[i-1],
s=144)
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-3-06b5e3c35d99> in <module>
2 symbols = ['D', '^']
3 colors = ['r', 'g', 'blue']
----> 4 factor_groups = salary_table.groupby(['E','M'])
5 for values, group in factor_groups:
6 i,j = values
NameError: name 'salary_table' is not defined
<Figure size 432x432 with 0 Axes>
Fit a linear model:
[4]:
formula = 'S ~ C(E) + C(M) + X'
lm = ols(formula, salary_table).fit()
print(lm.summary())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-4-bd1483e9558c> in <module>
1 formula = 'S ~ C(E) + C(M) + X'
----> 2 lm = ols(formula, salary_table).fit()
3 print(lm.summary())
NameError: name 'salary_table' is not defined
Have a look at the created design matrix:
[5]:
lm.model.exog[:5]
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-5-15c89faef8a8> in <module>
----> 1 lm.model.exog[:5]
NameError: name 'lm' is not defined
Or since we initially passed in a DataFrame, we have a DataFrame available in
[6]:
lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-6-136b4afd409d> in <module>
----> 1 lm.model.data.orig_exog[:5]
NameError: name 'lm' is not defined
We keep a reference to the original untouched data in
[7]:
lm.model.data.frame[:5]
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-7-45902db7bdbe> in <module>
----> 1 lm.model.data.frame[:5]
NameError: name 'lm' is not defined
Influence statistics
[8]:
infl = lm.get_influence()
print(infl.summary_table())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-8-cf91966f823d> in <module>
----> 1 infl = lm.get_influence()
2 print(infl.summary_table())
NameError: name 'lm' is not defined
or get a dataframe
[9]:
df_infl = infl.summary_frame()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-9-4a89b3b617d2> in <module>
----> 1 df_infl = infl.summary_frame()
NameError: name 'infl' is not defined
[10]:
df_infl[:5]
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-10-c7ae236e6fec> in <module>
----> 1 df_infl[:5]
NameError: name 'df_infl' is not defined
Now plot the residuals within the groups separately:
[11]:
resid = lm.resid
plt.figure(figsize=(6,6));
for values, group in factor_groups:
i,j = values
group_num = i*2 + j - 1 # for plotting purposes
x = [group_num] * len(group)
plt.scatter(x, resid[group.index], marker=symbols[j], color=colors[i-1],
s=144, edgecolors='black')
plt.xlabel('Group');
plt.ylabel('Residuals');
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-11-b1fbd2073437> in <module>
----> 1 resid = lm.resid
2 plt.figure(figsize=(6,6));
3 for values, group in factor_groups:
4 i,j = values
5 group_num = i*2 + j - 1 # for plotting purposes
NameError: name 'lm' is not defined
Now we will test some interactions using anova or f_test
[12]:
interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
print(interX_lm.summary())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-12-63ad3de263bb> in <module>
----> 1 interX_lm = ols("S ~ C(E) * X + C(M)", salary_table).fit()
2 print(interX_lm.summary())
NameError: name 'salary_table' is not defined
Do an ANOVA check
[13]:
from statsmodels.stats.api import anova_lm
table1 = anova_lm(lm, interX_lm)
print(table1)
interM_lm = ols("S ~ X + C(E)*C(M)", data=salary_table).fit()
print(interM_lm.summary())
table2 = anova_lm(lm, interM_lm)
print(table2)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-13-99371545e58d> in <module>
1 from statsmodels.stats.api import anova_lm
2
----> 3 table1 = anova_lm(lm, interX_lm)
4 print(table1)
5
NameError: name 'lm' is not defined
The design matrix as a DataFrame
[14]:
interM_lm.model.data.orig_exog[:5]
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-14-1dd5303dacd2> in <module>
----> 1 interM_lm.model.data.orig_exog[:5]
NameError: name 'interM_lm' is not defined
The design matrix as an ndarray
[15]:
interM_lm.model.exog
interM_lm.model.exog_names
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-15-127a7082e299> in <module>
----> 1 interM_lm.model.exog
2 interM_lm.model.exog_names
NameError: name 'interM_lm' is not defined
[16]:
infl = interM_lm.get_influence()
resid = infl.resid_studentized_internal
plt.figure(figsize=(6,6))
for values, group in factor_groups:
i,j = values
idx = group.index
plt.scatter(X[idx], resid[idx], marker=symbols[j], color=colors[i-1],
s=144, edgecolors='black')
plt.xlabel('X');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-16-4a4b0ea23d42> in <module>
----> 1 infl = interM_lm.get_influence()
2 resid = infl.resid_studentized_internal
3 plt.figure(figsize=(6,6))
4 for values, group in factor_groups:
5 i,j = values
NameError: name 'interM_lm' is not defined
Looks like one observation is an outlier.
[17]:
drop_idx = abs(resid).argmax()
print(drop_idx) # zero-based index
idx = salary_table.index.drop(drop_idx)
lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()
print(lm32.summary())
print('\n')
interX_lm32 = ols('S ~ C(E) * X + C(M)', data=salary_table, subset=idx).fit()
print(interX_lm32.summary())
print('\n')
table3 = anova_lm(lm32, interX_lm32)
print(table3)
print('\n')
interM_lm32 = ols('S ~ X + C(E) * C(M)', data=salary_table, subset=idx).fit()
table4 = anova_lm(lm32, interM_lm32)
print(table4)
print('\n')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-17-b012081228e7> in <module>
----> 1 drop_idx = abs(resid).argmax()
2 print(drop_idx) # zero-based index
3 idx = salary_table.index.drop(drop_idx)
4
5 lm32 = ols('S ~ C(E) + X + C(M)', data=salary_table, subset=idx).fit()
NameError: name 'resid' is not defined
Replot the residuals
[18]:
resid = interM_lm32.get_influence().summary_frame()['standard_resid']
plt.figure(figsize=(6,6))
resid = resid.reindex(X.index)
for values, group in factor_groups:
i,j = values
idx = group.index
plt.scatter(X.loc[idx], resid.loc[idx], marker=symbols[j], color=colors[i-1],
s=144, edgecolors='black')
plt.xlabel('X[~[32]]');
plt.ylabel('standardized resids');
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-18-9611737d9172> in <module>
----> 1 resid = interM_lm32.get_influence().summary_frame()['standard_resid']
2
3 plt.figure(figsize=(6,6))
4 resid = resid.reindex(X.index)
5 for values, group in factor_groups:
NameError: name 'interM_lm32' is not defined
Plot the fitted values
[19]:
lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
mf = lm_final.model.data.orig_exog
lstyle = ['-','--']
plt.figure(figsize=(6,6))
for values, group in factor_groups:
i,j = values
idx = group.index
plt.scatter(X[idx], S[idx], marker=symbols[j], color=colors[i-1],
s=144, edgecolors='black')
# drop NA because there is no idx 32 in the final model
fv = lm_final.fittedvalues.reindex(idx).dropna()
x = mf.X.reindex(idx).dropna()
plt.plot(x, fv, ls=lstyle[j], color=colors[i-1])
plt.xlabel('Experience');
plt.ylabel('Salary');
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-19-959e75ec2355> in <module>
----> 1 lm_final = ols('S ~ X + C(E)*C(M)', data = salary_table.drop([drop_idx])).fit()
2 mf = lm_final.model.data.orig_exog
3 lstyle = ['-','--']
4
5 plt.figure(figsize=(6,6))
NameError: name 'salary_table' is not defined
From our first look at the data, the difference between Master’s and PhD in the management group is different than in the non-management group. This is an interaction between the two qualitative variables management,M and education,E. We can visualize this by first removing the effect of experience, then plotting the means within each of the 6 groups using interaction.plot.
[20]:
U = S - X * interX_lm32.params['X']
plt.figure(figsize=(6,6))
interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
markersize=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-20-078f1a51de12> in <module>
----> 1 U = S - X * interX_lm32.params['X']
2
3 plt.figure(figsize=(6,6))
4 interaction_plot(E, M, U, colors=['red','blue'], markers=['^','D'],
5 markersize=10, ax=plt.gca())
NameError: name 'S' is not defined
Minority Employment Data¶
[21]:
try:
jobtest_table = pd.read_table('jobtest.table')
except: # do not have data already
url = 'http://stats191.stanford.edu/data/jobtest.table'
jobtest_table = pd.read_table(url)
factor_group = jobtest_table.groupby(['MINORITY'])
fig, ax = plt.subplots(figsize=(6,6))
colors = ['purple', 'green']
markers = ['o', 'v']
for factor, group in factor_group:
ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
marker=markers[factor], s=12**2)
ax.set_xlabel('TEST');
ax.set_ylabel('JPERF');
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-21-917f19c1181a> in <module>
1 try:
----> 2 jobtest_table = pd.read_table('jobtest.table')
3 except: # do not have data already
/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
684
--> 685 return _read(filepath_or_buffer, kwds)
686
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
456 # Create the parser.
--> 457 parser = TextFileReader(fp_or_buf, **kwds)
458
/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
894
--> 895 self._make_engine(self.engine)
896
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
1134 if engine == "c":
-> 1135 self._engine = CParserWrapper(self.f, **self.options)
1136 else:
/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1916
-> 1917 self._reader = parsers.TextReader(src, **kwds)
1918 self.unnamed_cols = self._reader.unnamed_cols
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()
FileNotFoundError: [Errno 2] File b'jobtest.table' does not exist: b'jobtest.table'
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1318 h.request(req.get_method(), req.selector, req.data, headers,
-> 1319 encode_chunked=req.has_header('Transfer-encoding'))
1320 except OSError as err: # timeout error
/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
1251 """Send a complete request to the server."""
-> 1252 self._send_request(method, url, body, headers, encode_chunked)
1253
/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1297 body = _encode(body, 'body')
-> 1298 self.endheaders(body, encode_chunked=encode_chunked)
1299
/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
1246 raise CannotSendHeader()
-> 1247 self._send_output(message_body, encode_chunked=encode_chunked)
1248
/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
/usr/lib/python3.7/http/client.py in send(self, data)
965 if self.auto_open:
--> 966 self.connect()
967 else:
/usr/lib/python3.7/http/client.py in connect(self)
937 self.sock = self._create_connection(
--> 938 (self.host,self.port), self.timeout, self.source_address)
939 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
727 try:
--> 728 raise err
729 finally:
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
715 sock.bind(source_address)
--> 716 sock.connect(sa)
717 # Break explicitly a reference cycle
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-21-917f19c1181a> in <module>
3 except: # do not have data already
4 url = 'http://stats191.stanford.edu/data/jobtest.table'
----> 5 jobtest_table = pd.read_table(url)
6
7 factor_group = jobtest_table.groupby(['MINORITY'])
/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
683 )
684
--> 685 return _read(filepath_or_buffer, kwds)
686
687 parser_f.__name__ = name
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
438 # See https://github.com/python/mypy/issues/1297
439 fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
--> 440 filepath_or_buffer, encoding, compression
441 )
442 kwds["compression"] = compression
/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
194
195 if _is_url(filepath_or_buffer):
--> 196 req = urlopen(filepath_or_buffer)
197 content_encoding = req.headers.get("Content-Encoding", None)
198 if content_encoding == "gzip":
/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
523 req = meth(req)
524
--> 525 response = self._open(req, data)
526
527 # post-process response
/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
541 protocol = req.type
542 result = self._call_chain(self.handle_open, protocol, protocol +
--> 543 '_open', req)
544 if result:
545 return result
/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
/usr/lib/python3.7/urllib/request.py in http_open(self, req)
1345
1346 def http_open(self, req):
-> 1347 return self.do_open(http.client.HTTPConnection, req)
1348
1349 http_request = AbstractHTTPHandler.do_request_
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1319 encode_chunked=req.has_header('Transfer-encoding'))
1320 except OSError as err: # timeout error
-> 1321 raise URLError(err)
1322 r = h.getresponse()
1323 except:
URLError: <urlopen error [Errno 111] Connection refused>
[22]:
min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
print(min_lm.summary())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-22-2dda11467a64> in <module>
----> 1 min_lm = ols('JPERF ~ TEST', data=jobtest_table).fit()
2 print(min_lm.summary())
NameError: name 'jobtest_table' is not defined
[23]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
marker=markers[factor], s=12**2)
ax.set_xlabel('TEST')
ax.set_ylabel('JPERF')
fig = abline_plot(model_results = min_lm, ax=ax)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-23-3bca97a45134> in <module>
1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
3 ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
4 marker=markers[factor], s=12**2)
5
NameError: name 'factor_group' is not defined
[24]:
min_lm2 = ols('JPERF ~ TEST + TEST:MINORITY',
data=jobtest_table).fit()
print(min_lm2.summary())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-24-6f5c2b95207a> in <module>
1 min_lm2 = ols('JPERF ~ TEST + TEST:MINORITY',
----> 2 data=jobtest_table).fit()
3
4 print(min_lm2.summary())
NameError: name 'jobtest_table' is not defined
[25]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
marker=markers[factor], s=12**2)
fig = abline_plot(intercept = min_lm2.params['Intercept'],
slope = min_lm2.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm2.params['Intercept'],
slope = min_lm2.params['TEST'] + min_lm2.params['TEST:MINORITY'],
ax=ax, color='green');
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-25-08231acf3acd> in <module>
1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
3 ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
4 marker=markers[factor], s=12**2)
5
NameError: name 'factor_group' is not defined
[26]:
min_lm3 = ols('JPERF ~ TEST + MINORITY', data = jobtest_table).fit()
print(min_lm3.summary())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-26-db2300cbbd0e> in <module>
----> 1 min_lm3 = ols('JPERF ~ TEST + MINORITY', data = jobtest_table).fit()
2 print(min_lm3.summary())
NameError: name 'jobtest_table' is not defined
[27]:
fig, ax = plt.subplots(figsize=(6,6));
for factor, group in factor_group:
ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
marker=markers[factor], s=12**2)
fig = abline_plot(intercept = min_lm3.params['Intercept'],
slope = min_lm3.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm3.params['Intercept'] + min_lm3.params['MINORITY'],
slope = min_lm3.params['TEST'], ax=ax, color='green');
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-27-772c42f45c76> in <module>
1 fig, ax = plt.subplots(figsize=(6,6));
----> 2 for factor, group in factor_group:
3 ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
4 marker=markers[factor], s=12**2)
5
NameError: name 'factor_group' is not defined
[28]:
min_lm4 = ols('JPERF ~ TEST * MINORITY', data = jobtest_table).fit()
print(min_lm4.summary())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-28-ec41efc607af> in <module>
----> 1 min_lm4 = ols('JPERF ~ TEST * MINORITY', data = jobtest_table).fit()
2 print(min_lm4.summary())
NameError: name 'jobtest_table' is not defined
[29]:
fig, ax = plt.subplots(figsize=(8,6));
for factor, group in factor_group:
ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
marker=markers[factor], s=12**2)
fig = abline_plot(intercept = min_lm4.params['Intercept'],
slope = min_lm4.params['TEST'], ax=ax, color='purple');
fig = abline_plot(intercept = min_lm4.params['Intercept'] + min_lm4.params['MINORITY'],
slope = min_lm4.params['TEST'] + min_lm4.params['TEST:MINORITY'],
ax=ax, color='green');
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-29-636fa8a37910> in <module>
1 fig, ax = plt.subplots(figsize=(8,6));
----> 2 for factor, group in factor_group:
3 ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],
4 marker=markers[factor], s=12**2)
5
NameError: name 'factor_group' is not defined
[30]:
# is there any effect of MINORITY on slope or intercept?
table5 = anova_lm(min_lm, min_lm4)
print(table5)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-30-fbfd21e1cee1> in <module>
1 # is there any effect of MINORITY on slope or intercept?
----> 2 table5 = anova_lm(min_lm, min_lm4)
3 print(table5)
NameError: name 'min_lm' is not defined
[31]:
# is there any effect of MINORITY on intercept
table6 = anova_lm(min_lm, min_lm3)
print(table6)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-31-8194ba41b1ee> in <module>
1 # is there any effect of MINORITY on intercept
----> 2 table6 = anova_lm(min_lm, min_lm3)
3 print(table6)
NameError: name 'min_lm' is not defined
[32]:
# is there any effect of MINORITY on slope
table7 = anova_lm(min_lm, min_lm2)
print(table7)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-32-b4c0ae44d20e> in <module>
1 # is there any effect of MINORITY on slope
----> 2 table7 = anova_lm(min_lm, min_lm2)
3 print(table7)
NameError: name 'min_lm' is not defined
[33]:
# is it just the slope or both?
table8 = anova_lm(min_lm2, min_lm4)
print(table8)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-33-a9a6172af9d0> in <module>
1 # is it just the slope or both?
----> 2 table8 = anova_lm(min_lm2, min_lm4)
3 print(table8)
NameError: name 'min_lm2' is not defined
One-way ANOVA¶
[34]:
try:
rehab_table = pd.read_csv('rehab.table')
except:
url = 'http://stats191.stanford.edu/data/rehab.csv'
rehab_table = pd.read_table(url, delimiter=",")
rehab_table.to_csv('rehab.table')
fig, ax = plt.subplots(figsize=(8,6))
fig = rehab_table.boxplot('Time', 'Fitness', ax=ax, grid=False)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-34-d3ea2b9d3e10> in <module>
1 try:
----> 2 rehab_table = pd.read_csv('rehab.table')
3 except:
/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
684
--> 685 return _read(filepath_or_buffer, kwds)
686
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
456 # Create the parser.
--> 457 parser = TextFileReader(fp_or_buf, **kwds)
458
/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
894
--> 895 self._make_engine(self.engine)
896
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
1134 if engine == "c":
-> 1135 self._engine = CParserWrapper(self.f, **self.options)
1136 else:
/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1916
-> 1917 self._reader = parsers.TextReader(src, **kwds)
1918 self.unnamed_cols = self._reader.unnamed_cols
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()
FileNotFoundError: [Errno 2] File b'rehab.table' does not exist: b'rehab.table'
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1318 h.request(req.get_method(), req.selector, req.data, headers,
-> 1319 encode_chunked=req.has_header('Transfer-encoding'))
1320 except OSError as err: # timeout error
/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
1251 """Send a complete request to the server."""
-> 1252 self._send_request(method, url, body, headers, encode_chunked)
1253
/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1297 body = _encode(body, 'body')
-> 1298 self.endheaders(body, encode_chunked=encode_chunked)
1299
/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
1246 raise CannotSendHeader()
-> 1247 self._send_output(message_body, encode_chunked=encode_chunked)
1248
/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
/usr/lib/python3.7/http/client.py in send(self, data)
965 if self.auto_open:
--> 966 self.connect()
967 else:
/usr/lib/python3.7/http/client.py in connect(self)
937 self.sock = self._create_connection(
--> 938 (self.host,self.port), self.timeout, self.source_address)
939 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
727 try:
--> 728 raise err
729 finally:
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
715 sock.bind(source_address)
--> 716 sock.connect(sa)
717 # Break explicitly a reference cycle
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-34-d3ea2b9d3e10> in <module>
3 except:
4 url = 'http://stats191.stanford.edu/data/rehab.csv'
----> 5 rehab_table = pd.read_table(url, delimiter=",")
6 rehab_table.to_csv('rehab.table')
7
/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
683 )
684
--> 685 return _read(filepath_or_buffer, kwds)
686
687 parser_f.__name__ = name
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
438 # See https://github.com/python/mypy/issues/1297
439 fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
--> 440 filepath_or_buffer, encoding, compression
441 )
442 kwds["compression"] = compression
/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
194
195 if _is_url(filepath_or_buffer):
--> 196 req = urlopen(filepath_or_buffer)
197 content_encoding = req.headers.get("Content-Encoding", None)
198 if content_encoding == "gzip":
/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
523 req = meth(req)
524
--> 525 response = self._open(req, data)
526
527 # post-process response
/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
541 protocol = req.type
542 result = self._call_chain(self.handle_open, protocol, protocol +
--> 543 '_open', req)
544 if result:
545 return result
/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
/usr/lib/python3.7/urllib/request.py in http_open(self, req)
1345
1346 def http_open(self, req):
-> 1347 return self.do_open(http.client.HTTPConnection, req)
1348
1349 http_request = AbstractHTTPHandler.do_request_
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1319 encode_chunked=req.has_header('Transfer-encoding'))
1320 except OSError as err: # timeout error
-> 1321 raise URLError(err)
1322 r = h.getresponse()
1323 except:
URLError: <urlopen error [Errno 111] Connection refused>
[35]:
rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
table9 = anova_lm(rehab_lm)
print(table9)
print(rehab_lm.model.data.orig_exog)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-35-d3bb1b06817c> in <module>
----> 1 rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
2 table9 = anova_lm(rehab_lm)
3 print(table9)
4
5 print(rehab_lm.model.data.orig_exog)
NameError: name 'rehab_table' is not defined
[36]:
print(rehab_lm.summary())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-36-99d31a5bc5c4> in <module>
----> 1 print(rehab_lm.summary())
NameError: name 'rehab_lm' is not defined
Two-way ANOVA¶
[37]:
try:
kidney_table = pd.read_table('./kidney.table')
except:
url = 'http://stats191.stanford.edu/data/kidney.table'
kidney_table = pd.read_csv(url, delim_whitespace=True)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-37-cbc31ddb699c> in <module>
1 try:
----> 2 kidney_table = pd.read_table('./kidney.table')
3 except:
/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
684
--> 685 return _read(filepath_or_buffer, kwds)
686
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
456 # Create the parser.
--> 457 parser = TextFileReader(fp_or_buf, **kwds)
458
/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
894
--> 895 self._make_engine(self.engine)
896
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _make_engine(self, engine)
1134 if engine == "c":
-> 1135 self._engine = CParserWrapper(self.f, **self.options)
1136 else:
/usr/lib/python3/dist-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1916
-> 1917 self._reader = parsers.TextReader(src, **kwds)
1918 self.unnamed_cols = self._reader.unnamed_cols
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()
FileNotFoundError: [Errno 2] File b'./kidney.table' does not exist: b'./kidney.table'
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1318 h.request(req.get_method(), req.selector, req.data, headers,
-> 1319 encode_chunked=req.has_header('Transfer-encoding'))
1320 except OSError as err: # timeout error
/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
1251 """Send a complete request to the server."""
-> 1252 self._send_request(method, url, body, headers, encode_chunked)
1253
/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1297 body = _encode(body, 'body')
-> 1298 self.endheaders(body, encode_chunked=encode_chunked)
1299
/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
1246 raise CannotSendHeader()
-> 1247 self._send_output(message_body, encode_chunked=encode_chunked)
1248
/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
/usr/lib/python3.7/http/client.py in send(self, data)
965 if self.auto_open:
--> 966 self.connect()
967 else:
/usr/lib/python3.7/http/client.py in connect(self)
937 self.sock = self._create_connection(
--> 938 (self.host,self.port), self.timeout, self.source_address)
939 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
727 try:
--> 728 raise err
729 finally:
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
715 sock.bind(source_address)
--> 716 sock.connect(sa)
717 # Break explicitly a reference cycle
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-37-cbc31ddb699c> in <module>
3 except:
4 url = 'http://stats191.stanford.edu/data/kidney.table'
----> 5 kidney_table = pd.read_csv(url, delim_whitespace=True)
/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
683 )
684
--> 685 return _read(filepath_or_buffer, kwds)
686
687 parser_f.__name__ = name
/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
438 # See https://github.com/python/mypy/issues/1297
439 fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
--> 440 filepath_or_buffer, encoding, compression
441 )
442 kwds["compression"] = compression
/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
194
195 if _is_url(filepath_or_buffer):
--> 196 req = urlopen(filepath_or_buffer)
197 content_encoding = req.headers.get("Content-Encoding", None)
198 if content_encoding == "gzip":
/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
523 req = meth(req)
524
--> 525 response = self._open(req, data)
526
527 # post-process response
/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
541 protocol = req.type
542 result = self._call_chain(self.handle_open, protocol, protocol +
--> 543 '_open', req)
544 if result:
545 return result
/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
/usr/lib/python3.7/urllib/request.py in http_open(self, req)
1345
1346 def http_open(self, req):
-> 1347 return self.do_open(http.client.HTTPConnection, req)
1348
1349 http_request = AbstractHTTPHandler.do_request_
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1319 encode_chunked=req.has_header('Transfer-encoding'))
1320 except OSError as err: # timeout error
-> 1321 raise URLError(err)
1322 r = h.getresponse()
1323 except:
URLError: <urlopen error [Errno 111] Connection refused>
Explore the dataset
[38]:
kidney_table.head(10)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-38-fff8acd40403> in <module>
----> 1 kidney_table.head(10)
NameError: name 'kidney_table' is not defined
Balanced panel
[39]:
kt = kidney_table
plt.figure(figsize=(8,6))
fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-39-9312bae60782> in <module>
----> 1 kt = kidney_table
2 plt.figure(figsize=(8,6))
3 fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),
4 colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())
NameError: name 'kidney_table' is not defined
You have things available in the calling namespace available in the formula evaluation namespace
[40]:
kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()
table10 = anova_lm(kidney_lm)
print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',
data=kt).fit(), kidney_lm))
print(anova_lm(ols('np.log(Days+1) ~ C(Duration)', data=kt).fit(),
ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
data=kt).fit()))
print(anova_lm(ols('np.log(Days+1) ~ C(Weight)', data=kt).fit(),
ols('np.log(Days+1) ~ C(Duration) + C(Weight, Sum)',
data=kt).fit()))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-40-c7e1132390fe> in <module>
----> 1 kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', data=kt).fit()
2
3 table10 = anova_lm(kidney_lm)
4
5 print(anova_lm(ols('np.log(Days+1) ~ C(Duration) + C(Weight)',
NameError: name 'kt' is not defined
Sum of squares¶
Illustrates the use of different types of sums of squares (I,II,II) and how the Sum contrast can be used to produce the same output between the 3.
Types I and II are equivalent under a balanced design.
Do not use Type III with non-orthogonal contrast - ie., Treatment
[41]:
sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
data=kt).fit()
print(anova_lm(sum_lm))
print(anova_lm(sum_lm, typ=2))
print(anova_lm(sum_lm, typ=3))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-41-e0c1ed608c29> in <module>
1 sum_lm = ols('np.log(Days+1) ~ C(Duration, Sum) * C(Weight, Sum)',
----> 2 data=kt).fit()
3
4 print(anova_lm(sum_lm))
5 print(anova_lm(sum_lm, typ=2))
NameError: name 'kt' is not defined
[42]:
nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
data=kt).fit()
print(anova_lm(nosum_lm))
print(anova_lm(nosum_lm, typ=2))
print(anova_lm(nosum_lm, typ=3))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-42-95381847ac17> in <module>
1 nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',
----> 2 data=kt).fit()
3 print(anova_lm(nosum_lm))
4 print(anova_lm(nosum_lm, typ=2))
5 print(anova_lm(nosum_lm, typ=3))
NameError: name 'kt' is not defined