I thought this might be interesting enough to share with you. Every time I’m working with DataFrames I somehow miss the search feature: I’d like to search for certain patterns inside the columns and rows. I used to use JQuery DataTables for netgrafio. But I couldn’t find any simple way to integrate it with IPython. Well it was easier than I thought.
Extensions
# <!-- collapse=True -->
from IPython import display
from IPython.core.magic import register_cell_magic, Magics, magics_class, cell_magic
import jinja2
# Create jinja cell magic (http://nbviewer.ipython.org/urls/gist.github.com/bj0/5343292/raw/23a0845ee874827e3635edb0bf5701710a537bfc/jinja2.ipynb)
@magics_class
class JinjaMagics(Magics):
'''Magics class containing the jinja2 magic and state'''
def __init__(self, shell):
super(JinjaMagics, self).__init__(shell)
# create a jinja2 environment to use for rendering
# this can be modified for desired effects (ie: using different variable syntax)
self.env = jinja2.Environment(loader=jinja2.FileSystemLoader('.'))
# possible output types
self.display_functions = dict(html=display.HTML,
latex=display.Latex,
json=display.JSON,
pretty=display.Pretty,
display=display.display)
@cell_magic
def jinja(self, line, cell):
'''
jinja2 cell magic function. Contents of cell are rendered by jinja2, and
the line can be used to specify output type.
ie: "%%jinja html" will return the rendered cell wrapped in an HTML object.
'''
f = self.display_functions.get(line.lower().strip(), display.display)
tmp = self.env.from_string(cell)
rend = tmp.render(dict((k,v) for (k,v) in self.shell.user_ns.items()
if not k.startswith('_') and k not in self.shell.user_ns_hidden))
return f(rend)
ip = get_ipython()
ip.register_magics(JinjaMagics)
DataTable function
# <!-- collapse=True -->
import uuid
def DataTable(df):
""" Prints a pandas.DataFrame as JQuery DataTables """
from IPython.display import HTML
# Generate random container name
id_container = uuid.uuid1()
output = """
<div id="datatable-container-%s">
<link rel="stylesheet" type="text/css" href="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/css/jquery.dataTables.css">
<link rel="stylesheet" type="text/css" href="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/css/jquery.dataTables_themeroller.css">
<script type="text/javascript" charset="utf8" src="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/jquery.dataTables.min.js"></script>
<script type="text/javascript">
var url = window.location.href;
if(url.indexOf("localhost:9999") != -1){
$('#datatable-container-%s table.datatable').dataTable();
} else {
$.getScript("http://code.jquery.com/jquery-1.11.1.min.js");
$(document).ready(function() {
$('#datatable-container-%s table.datatable').dataTable();
});
}
</script>
<!-- Insert table below -->
%s
</div>
""" % (id_container, id_container, id_container, df.to_html(index=False, classes="datatable dataframe"))
return HTML(output)
I know the code is not perfect, but at least it works for me. Now let’s create some random DataFrame:
import pandas as pd
import urllib2
from yurl import URL
# Fetch list of random URLs (found using Google)
response = urllib2.urlopen('http://files.ianonavy.com/urls.txt')
targets_row = response.read()
# Create DataFrame
targets = pd.DataFrame([t for t in targets_row.splitlines()], columns=["Target"])
# Join root domain + suffix
extract_root_domain = lambda x: '.'.join(tldextract.extract(x)[1:3])
target_columns = ['scheme', 'userinfo', 'host', 'port', 'path', 'query', 'fragment', 'decoded']
target_component = [list(URL(t)) for t in targets['Target']]
# Create data frame
df_targets = pd.DataFrame(target_component, columns=target_columns)
Classic HTML output
df_targets[:20]
scheme | userinfo | host | port | path | query | fragment | decoded | |
---|---|---|---|---|---|---|---|---|
0 | http | www.altpress.org | / | False | ||||
1 | http | www.nzfortress.co.nz | False | |||||
2 | http | www.evillasforsale.com | False | |||||
3 | http | www.playingenemy.com | / | False | ||||
4 | http | www.richardsonscharts.com | False | |||||
5 | http | www.xenith.net | False | |||||
6 | http | www.tdbrecords.com | False | |||||
7 | http | www.electrichumanproject.com | / | False | ||||
8 | http | tweekerchick.blogspot.com | / | False | ||||
9 | http | www.besound.com | /pushead/home.html | False | ||||
10 | http | www.porkchopscreenprinting.com | / | False | ||||
11 | http | www.kinseyvisual.com | False | |||||
12 | http | www.rathergood.com | False | |||||
13 | http | www.lepoint.fr | / | False | ||||
14 | http | www.revhq.com | False | |||||
15 | http | www.poprocksandcoke.com | False | |||||
16 | http | www.samuraiblue.com | / | False | ||||
17 | http | www.openbsd.org | /cgi-bin/man.cgi | False | ||||
18 | http | www.sysblog.com | False | |||||
19 | http | www.voicesofsafety.com | False |
JQuery DataTables output
DataTable(df_targets[:20])
<div id="datatable-container-aab341ae-2f8d-11e4-95d5-52540086692e">
<link rel="stylesheet" type="text/css" href="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/css/jquery.dataTables.css">
<link rel="stylesheet" type="text/css" href="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/css/jquery.dataTables_themeroller.css">
<script type="text/javascript" charset="utf8" src="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/jquery.dataTables.min.js"></script>
<script type="text/javascript">
var url = window.location.href;
if(url.indexOf("localhost:9999") != -1){
$('#datatable-container-aab341ae-2f8d-11e4-95d5-52540086692e table.datatable').dataTable();
} else {
$.getScript("http://code.jquery.com/jquery-1.11.1.min.js");
$(document).ready(function() {
$('#datatable-container-aab341ae-2f8d-11e4-95d5-52540086692e table.datatable').dataTable();
});
}
</script>
<!-- Insert table below -->
<table border="1" class="dataframe datatable dataframe">