In [1]:
!pip install pandasql
In [2]:
#Checking out meat and birth data
from pandasql import sqldf
from pandasql import load_meat, load_births
In [4]:
meat = load_meat()
births = load_births()
#You can inspect the dataframes directly if you're using Rodeo
#These print statements are here just in case you want to check out your data in the editor, too
print (meat.head())
print (births.head())
In [5]:
meat.to_csv("data-meat.csv", index=False)
births.to_csv("data-births.csv", index=False)
In [7]:
import pandas as pd
import numpy as np
import pylab as pl
In [8]:
df_meat = pd.read_csv("data-meat.csv")
print(df_meat.head())
In [9]:
df_births = pd.read_csv("data-births.csv")
print(df_births.head())
In [10]:
import matplotlib.pyplot as plt
%matplotlib inline
In [12]:
q = """
SELECT
*
FROM
df_meat
LIMIT 10;"""
print (sqldf(q, locals()))
In [16]:
# births per year
q = """
SELECT
strftime("%Y", date) year
, SUM(births) total_Births
FROM df_births
GROUP BY 1
ORDER BY 1;
"""
print (sqldf(q, locals()))
In [18]:
def pysqldf(q):
return sqldf(q, globals())
In [19]:
# joining meats + births on date
q = """
SELECT
m.date
, b.births
, m.beef
FROM
df_meat m
INNER JOIN
df_births b
on m.date = b.date
ORDER BY
m.date
LIMIT 50;
"""
joined = pysqldf(q)
print (joined.head())
In [21]:
q = """
SELECT
date
, beef
, veal
, pork
, lamb_and_mutton
FROM
df_meat
WHERE
lamb_and_mutton >= veal
ORDER BY date DESC
LIMIT 10;
"""
print (pysqldf(q))
In [22]:
#################################################
# SQL FUNCTIONS
# e.g. `RANDOM()`
#################################################
q = """SELECT
*
FROM
meat
ORDER BY RANDOM()
LIMIT 10;"""
print (pysqldf(q))
In [23]:
#################################################
# UNION ALL
#################################################
q = """
SELECT
date
, 'beef' AS meat_type
, beef AS value
FROM df_meat
UNION ALL
SELECT
date
, 'veal' AS meat_type
, veal AS value
FROM df_meat
UNION ALL
SELECT
date
, 'pork' AS meat_type
, pork AS value
FROM df_meat
UNION ALL
SELECT
date
, 'lamb_and_mutton' AS meat_type
, lamb_and_mutton AS value
FROM df_meat
ORDER BY 1
"""
print (pysqldf(q).head(20))
In [25]:
#################################################
# subqueries
# fancy!
#################################################
q = """
SELECT
m1.date
, m1.beef
FROM
df_meat m1
WHERE m1.date IN
(SELECT
date
FROM df_meat
WHERE
beef >= broilers
ORDER BY date)
"""
more_beef_than_broilers = pysqldf(q)
print (more_beef_than_broilers.head(10))
In [26]:
import matplotlib.pyplot as plt
%matplotlib inline
In [28]:
pysqldf = lambda q: sqldf(q, globals())
q = """
SELECT
m.date
, m.beef
, b.births
FROM
df_meat m
LEFT JOIN
df_births b
ON m.date = b.date
WHERE
m.date > '1974-12-31';
"""
df = pysqldf(q)
df.births = df.births.fillna(method='backfill')
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(pd.rolling_mean(df['beef'], 12), color='b')
ax1.set_xlabel('months since 1975')
ax1.set_ylabel('cattle slaughtered', color='b')
ax2 = ax1.twinx()
ax2.plot(pd.rolling_mean(df['births'], 12), color='r')
ax2.set_ylabel('babies born', color='r')
plt.title("Beef Consumption and the Birth Rate")
plt.show()
In [33]:
df.head()
Out[33]:
In [36]:
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(df['beef'].rolling(window=12,center=False).mean(), color='b')
ax1.set_xlabel('months since 1975')
ax1.set_ylabel('cattle slaughtered', color='b')
ax2 = ax1.twinx()
ax2.plot(df['births'].rolling(window=12,center=False).mean(), color='r')
ax2.set_ylabel('babies born', color='r')
plt.title("Beef Consumption and the Birth Rate")
plt.show()
In [ ]:
No comments:
Post a Comment