I thought this might be interesting enough to share with you. Every time I’m working with DataFrames I somehow miss the search feature: I’d like to search for certain patterns inside the columns and rows. I used to use JQuery DataTables for netgrafio. But I couldn’t find any simple way to integrate it with IPython. Well it was easier than I thought.

Extensions

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# <!-- collapse=True -->
from IPython import display
from IPython.core.magic import register_cell_magic, Magics, magics_class, cell_magic
import jinja2

# Create jinja cell magic (http://nbviewer.ipython.org/urls/gist.github.com/bj0/5343292/raw/23a0845ee874827e3635edb0bf5701710a537bfc/jinja2.ipynb)
@magics_class
class JinjaMagics(Magics):
    '''Magics class containing the jinja2 magic and state'''
    
    def __init__(self, shell):
        super(JinjaMagics, self).__init__(shell)
        
        # create a jinja2 environment to use for rendering
        # this can be modified for desired effects (ie: using different variable syntax)
        self.env = jinja2.Environment(loader=jinja2.FileSystemLoader('.'))
        
        # possible output types
        self.display_functions = dict(html=display.HTML, 
                                      latex=display.Latex,
                                      json=display.JSON,
                                      pretty=display.Pretty,
                                      display=display.display)

    
    @cell_magic
    def jinja(self, line, cell):
        '''
        jinja2 cell magic function.  Contents of cell are rendered by jinja2, and 
        the line can be used to specify output type.

        ie: "%%jinja html" will return the rendered cell wrapped in an HTML object.
        '''
        f = self.display_functions.get(line.lower().strip(), display.display)
        
        tmp = self.env.from_string(cell)
        rend = tmp.render(dict((k,v) for (k,v) in self.shell.user_ns.items() 
                                        if not k.startswith('_') and k not in self.shell.user_ns_hidden))
        
        return f(rend)
        
    
ip = get_ipython()
ip.register_magics(JinjaMagics)

DataTable function

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# <!-- collapse=True -->
import uuid

def DataTable(df):
    """ Prints a pandas.DataFrame as JQuery DataTables """
    from IPython.display import HTML
    # Generate random container name
    id_container = uuid.uuid1()
    output = """
        <div id="datatable-container-%s">
            <link rel="stylesheet" type="text/css" href="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/css/jquery.dataTables.css">
            <link rel="stylesheet" type="text/css" href="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/css/jquery.dataTables_themeroller.css">
            <script type="text/javascript" charset="utf8" src="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/jquery.dataTables.min.js"></script>

            <script type="text/javascript">
                var url = window.location.href;
                
                if(url.indexOf("localhost:9999") != -1){
                    $('#datatable-container-%s table.datatable').dataTable();
                } else {
                    $.getScript("http://code.jquery.com/jquery-1.11.1.min.js");
                    $(document).ready(function() {
                        $('#datatable-container-%s table.datatable').dataTable();
                    });
                }
                
            </script>
            <!-- Insert table below -->
            %s
        </div>
    """ % (id_container, id_container, id_container, df.to_html(index=False, classes="datatable dataframe"))
    return HTML(output)

I know the code is not perfect, but at least it works for me. Now let’s create some random DataFrame:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
import pandas as pd
import urllib2
from yurl import URL


# Fetch list of random URLs (found using Google)
response = urllib2.urlopen('http://files.ianonavy.com/urls.txt')
targets_row = response.read()

# Create DataFrame
targets = pd.DataFrame([t for t in targets_row.splitlines()], columns=["Target"])

# Join root domain + suffix
extract_root_domain =  lambda x: '.'.join(tldextract.extract(x)[1:3])

target_columns = ['scheme', 'userinfo', 'host', 'port', 'path', 'query', 'fragment', 'decoded']
target_component = [list(URL(t)) for t in targets['Target']]

# Create data frame
df_targets = pd.DataFrame(target_component, columns=target_columns)

Classic HTML output

1
df_targets[:20]
scheme userinfo host port path query fragment decoded
0 http www.altpress.org / False
1 http www.nzfortress.co.nz False
2 http www.evillasforsale.com False
3 http www.playingenemy.com / False
4 http www.richardsonscharts.com False
5 http www.xenith.net False
6 http www.tdbrecords.com False
7 http www.electrichumanproject.com / False
8 http tweekerchick.blogspot.com / False
9 http www.besound.com /pushead/home.html False
10 http www.porkchopscreenprinting.com / False
11 http www.kinseyvisual.com False
12 http www.rathergood.com False
13 http www.lepoint.fr / False
14 http www.revhq.com False
15 http www.poprocksandcoke.com False
16 http www.samuraiblue.com / False
17 http www.openbsd.org /cgi-bin/man.cgi False
18 http www.sysblog.com False
19 http www.voicesofsafety.com False

JQuery DataTables output

1
DataTable(df_targets[:20])
    <div id="datatable-container-aab341ae-2f8d-11e4-95d5-52540086692e">
        <link rel="stylesheet" type="text/css" href="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/css/jquery.dataTables.css">
        <link rel="stylesheet" type="text/css" href="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/css/jquery.dataTables_themeroller.css">
        <script type="text/javascript" charset="utf8" src="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.0/jquery.dataTables.min.js"></script>

        <script type="text/javascript">
            var url = window.location.href;

            if(url.indexOf("localhost:9999") != -1){
                $('#datatable-container-aab341ae-2f8d-11e4-95d5-52540086692e table.datatable').dataTable();
            } else {
                $.getScript("http://code.jquery.com/jquery-1.11.1.min.js");
                $(document).ready(function() {
                    $('#datatable-container-aab341ae-2f8d-11e4-95d5-52540086692e table.datatable').dataTable();
                });
            }

        </script>
        <!-- Insert table below -->
        <table border="1" class="dataframe datatable dataframe">
scheme userinfo host port path query fragment decoded http www.altpress.org / False http www.nzfortress.co.nz False http www.evillasforsale.com False http www.playingenemy.com / False http www.richardsonscharts.com False http www.xenith.net False http www.tdbrecords.com False http www.electrichumanproject.com / False http tweekerchick.blogspot.com / False http www.besound.com /pushead/home.html False http www.porkchopscreenprinting.com / False http www.kinseyvisual.com False http www.rathergood.com False http www.lepoint.fr / False http www.revhq.com False http www.poprocksandcoke.com False http www.samuraiblue.com / False http www.openbsd.org /cgi-bin/man.cgi False http www.sysblog.com False http www.voicesofsafety.com False