#!/usr/bin/env python
# coding: utf-8
# In[1]:
# Load UCI census and convert to json for sending to the visualization
import pandas as pd
features = ["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Marital Status",
"Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss",
"Hours per week", "Country", "Target"]
# Load dataframe from external CSV and add header information
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test",
names=features, # name features for header row
sep=r'\s*,\s*', # separator used in this dataset
engine='python',
skiprows=[0], # skip first row without data
na_values="?") # add ? where data is missing
# set the sprite_size based on the number of records in dataset,
# larger datasets can crash the browser if the size is too large (>50000)
sprite_size = 32 if len(df.index)>50000 else 64
jsonstr = df.to_json(orient='records')
# In[2]:
# Display the Dive visualization for this data
from IPython.core.display import display, HTML
# Create Facets template
HTML_TEMPLATE = """
"""
# Load the json dataset and the sprite_size into the template
html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)
# Display the template
display(HTML(html))
# In[ ]: