Saturday, October 14, 2017

Stock Market Application

StockMarket
In [11]:
!pip install pandas_datareader
Collecting pandas_datareader
  Downloading pandas_datareader-0.5.0-py2.py3-none-any.whl (74kB)
Collecting requests-file (from pandas_datareader)
  Downloading requests-file-1.4.2.tar.gz
Collecting requests-ftp (from pandas_datareader)
  Downloading requests-ftp-0.3.1.tar.gz
Requirement already satisfied: pandas>=0.17.0 in c:\programdata\anaconda3\lib\site-packages (from pandas_datareader)
Requirement already satisfied: requests>=2.3.0 in c:\programdata\anaconda3\lib\site-packages (from pandas_datareader)
Requirement already satisfied: six in c:\programdata\anaconda3\lib\site-packages (from requests-file->pandas_datareader)
Requirement already satisfied: python-dateutil>=2 in c:\programdata\anaconda3\lib\site-packages (from pandas>=0.17.0->pandas_datareader)
Requirement already satisfied: pytz>=2011k in c:\programdata\anaconda3\lib\site-packages (from pandas>=0.17.0->pandas_datareader)
Requirement already satisfied: numpy>=1.7.0 in c:\programdata\anaconda3\lib\site-packages (from pandas>=0.17.0->pandas_datareader)
Building wheels for collected packages: requests-file, requests-ftp
  Running setup.py bdist_wheel for requests-file: started
  Running setup.py bdist_wheel for requests-file: finished with status 'done'
  Stored in directory: C:\Users\Yantrajaal\AppData\Local\pip\Cache\wheels\3e\34\3a\c2e634ca7b545510c1b3b7d94dea084e5fdb5f33558f3c3a81
  Running setup.py bdist_wheel for requests-ftp: started
  Running setup.py bdist_wheel for requests-ftp: finished with status 'done'
  Stored in directory: C:\Users\Yantrajaal\AppData\Local\pip\Cache\wheels\76\fb\0d\1026eb562c34a4982dc9d39c9c582a734eefe7f0455f711deb
Successfully built requests-file requests-ftp
Installing collected packages: requests-file, requests-ftp, pandas-datareader
Successfully installed pandas-datareader-0.5.0 requests-file-1.4.2 requests-ftp-0.3.1
In [34]:
# http://pandas-datareader.readthedocs.io/en/latest/remote_data.html#remote-data-yahoo
import pandas as pd
import pandas_datareader.data as web
import datetime
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2016, 1, 1)
f = web.DataReader("F", 'yahoo', start, end)
f.ix['2013-01-04']
Out[34]:
Open         1.351000e+01
High         1.361000e+01
Low          1.335000e+01
Close        1.357000e+01
Adj Close    1.102840e+01
Volume       5.466990e+07
Name: 2013-01-04 00:00:00, dtype: float64
In [12]:
import pandas as pd
#import pandas.io.data as web
#from pandas_datareader import data, wb
#import pandas.io.data as web  # Package and modules for importing data; this code may change depending on pandas version
import datetime
In [36]:
# We will look at stock prices over the past year, starting at January 1, 2016
start = datetime.datetime(2017,1,1)
end = datetime.date.today()

# Let's get Apple stock data; Apple's ticker symbol is AAPL
# First argument is the series we want, second is the source ("yahoo" for Yahoo! Finance), third is the start date, fourth is the end date
apple = web.DataReader("AAPL", "yahoo", start, end)
In [35]:
type(apple)
Out[35]:
pandas.core.frame.DataFrame
In [37]:
apple.head()
Out[37]:
Open High Low Close Adj Close Volume
Date
2017-01-03 115.800003 116.330002 114.760002 116.150002 114.722694 28781900
2017-01-04 115.849998 116.510002 115.750000 116.019997 114.594292 21118100
2017-01-05 115.919998 116.860001 115.809998 116.610001 115.177040 22193600
2017-01-06 116.779999 118.160004 116.470001 117.910004 116.461075 31751900
2017-01-09 117.949997 119.430000 117.940002 118.989998 117.527794 33387600
In [38]:
# Import matplotlib
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import pylab
# This line is necessary for the plot to appear in a Jupyter notebook (you don't need it if you're working in Rodeo)
%matplotlib inline
# Control the default size of figures in this Jupyter notebook (n/a if working in Rodeo)
%pylab inline
pylab.rcParams['figure.figsize'] = (15, 9)   # Change the size of plots
Populating the interactive namespace from numpy and matplotlib
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\magics\pylab.py:161: UserWarning: pylab import has clobbered these variables: ['f', 'pylab']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"
In [39]:
# Plot the adjusted closing price of AAPL
apple["Adj Close"].plot(grid = True)
Out[39]:
<matplotlib.axes._subplots.AxesSubplot at 0x21109808fd0>
In [40]:
from matplotlib.dates import DateFormatter, WeekdayLocator, DayLocator, MONDAY
from matplotlib.finance import candlestick_ohlc
In [41]:
def pandas_candlestick_ohlc(dat, stick = "day", otherseries = None):
    """
    :param dat: pandas DataFrame object with datetime64 index, and float columns "Open", "High", "Low", and "Close", likely created via DataReader from "yahoo"
    :param stick: A string or number indicating the period of time covered by a single candlestick. Valid string inputs include "day", "week", "month", and "year", ("day" default), and any numeric input indicates the number of trading days included in a period
    :param otherseries: An iterable that will be coerced into a list, containing the columns of dat that hold other series to be plotted as lines

    This will show a Japanese candlestick plot for stock data stored in dat, also plotting other series if passed.
    """
    mondays = WeekdayLocator(MONDAY)        # major ticks on the mondays
    alldays = DayLocator()              # minor ticks on the days
    dayFormatter = DateFormatter('%d')      # e.g., 12

    # Create a new DataFrame which includes OHLC data for each period specified by stick input
    transdat = dat.loc[:,["Open", "High", "Low", "Close"]]
    if (type(stick) == str):
        if stick == "day":
            plotdat = transdat
            stick = 1 # Used for plotting
        elif stick in ["week", "month", "year"]:
            if stick == "week":
                transdat["week"] = pd.to_datetime(transdat.index).map(lambda x: x.isocalendar()[1]) # Identify weeks
            elif stick == "month":
                transdat["month"] = pd.to_datetime(transdat.index).map(lambda x: x.month) # Identify months
            transdat["year"] = pd.to_datetime(transdat.index).map(lambda x: x.isocalendar()[0]) # Identify years
            grouped = transdat.groupby(list(set(["year",stick]))) # Group by year and other appropriate variable
            plotdat = pd.DataFrame({"Open": [], "High": [], "Low": [], "Close": []}) # Create empty data frame containing what will be plotted
            for name, group in grouped:
                plotdat = plotdat.append(pd.DataFrame({"Open": group.iloc[0,0],
                                            "High": max(group.High),
                                            "Low": min(group.Low),
                                            "Close": group.iloc[-1,3]},
                                           index = [group.index[0]]))
            if stick == "week": stick = 5
            elif stick == "month": stick = 30
            elif stick == "year": stick = 365

    elif (type(stick) == int and stick >= 1):
        transdat["stick"] = [np.floor(i / stick) for i in range(len(transdat.index))]
        grouped = transdat.groupby("stick")
        plotdat = pd.DataFrame({"Open": [], "High": [], "Low": [], "Close": []}) # Create empty data frame containing what will be plotted
        for name, group in grouped:
            plotdat = plotdat.append(pd.DataFrame({"Open": group.iloc[0,0],
                                        "High": max(group.High),
                                        "Low": min(group.Low),
                                        "Close": group.iloc[-1,3]},
                                       index = [group.index[0]]))

    else:
        raise ValueError('Valid inputs to argument "stick" include the strings "day", "week", "month", "year", or a positive integer')


    # Set plot parameters, including the axis object ax used for plotting
    fig, ax = plt.subplots()
    fig.subplots_adjust(bottom=0.2)
    if plotdat.index[-1] - plotdat.index[0] < pd.Timedelta('730 days'):
        weekFormatter = DateFormatter('%b %d')  # e.g., Jan 12
        ax.xaxis.set_major_locator(mondays)
        ax.xaxis.set_minor_locator(alldays)
    else:
        weekFormatter = DateFormatter('%b %d, %Y')
    ax.xaxis.set_major_formatter(weekFormatter)

    ax.grid(True)

    # Create the candelstick chart
    candlestick_ohlc(ax, list(zip(list(date2num(plotdat.index.tolist())), plotdat["Open"].tolist(), plotdat["High"].tolist(),
                      plotdat["Low"].tolist(), plotdat["Close"].tolist())),
                      colorup = "black", colordown = "red", width = stick * .4)

    # Plot other series (such as moving averages) as lines
    if otherseries != None:
        if type(otherseries) != list:
            otherseries = [otherseries]
        dat.loc[:,otherseries].plot(ax = ax, lw = 1.3, grid = True)

    ax.xaxis_date()
    ax.autoscale_view()
    plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')

    plt.show()
In [42]:
pandas_candlestick_ohlc(apple)
In [45]:
microsoft = web.DataReader("MSFT", "yahoo", start, end)
google = web.DataReader("GOOG", "yahoo", start, end)
In [47]:
microsoft.head()
Out[47]:
Open High Low Close Adj Close Volume
Date
2017-01-03 62.790001 62.840000 62.130001 62.580002 61.520611 20694100
2017-01-04 62.480000 62.750000 62.119999 62.299999 61.245350 21340000
2017-01-05 62.189999 62.660000 62.029999 62.299999 61.245350 24876000
2017-01-06 62.299999 63.150002 62.040001 62.840000 61.776207 19922900
2017-01-09 62.759998 63.080002 62.540001 62.639999 61.579594 20256600
In [48]:
google.head()
Out[48]:
Open High Low Close Adj Close Volume
Date
2017-01-03 778.809998 789.630005 775.799988 786.140015 786.140015 1657300
2017-01-04 788.359985 791.340027 783.159973 786.900024 786.900024 1073000
2017-01-05 786.080017 794.479980 785.020020 794.020020 794.020020 1335200
2017-01-06 795.260010 807.900024 792.203979 806.150024 806.150024 1640200
2017-01-09 806.400024 809.966003 802.830017 806.650024 806.650024 1272400
In [46]:
# Below I create a DataFrame consisting of the adjusted closing price of these stocks, first by making a list of these objects and using the join method
stocks = pd.DataFrame({"AAPL": apple["Adj Close"],
                      "MSFT": microsoft["Adj Close"],
                      "GOOG": google["Adj Close"]})

stocks.head()
Out[46]:
AAPL GOOG MSFT
Date
2017-01-03 114.722694 786.140015 61.520611
2017-01-04 114.594292 786.900024 61.245350
2017-01-05 115.177040 794.020020 61.245350
2017-01-06 116.461075 806.150024 61.776207
2017-01-09 117.527794 806.650024 61.579594
In [49]:
stocks.plot(grid = True)
Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x21109defa58>
In [50]:
stocks.plot(secondary_y = ["AAPL", "MSFT"], grid = True)
Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x2110b13abe0>
In [51]:
# df.apply(arg) will apply the function arg to each column in df, and return a DataFrame with the result
# Recall that lambda x is an anonymous function accepting parameter x; in this case, x will be a pandas Series object
stock_return = stocks.apply(lambda x: x / x[0])
stock_return.head()
Out[51]:
AAPL GOOG MSFT
Date
2017-01-03 1.000000 1.000000 1.000000
2017-01-04 0.998881 1.000967 0.995526
2017-01-05 1.003960 1.010024 0.995526
2017-01-06 1.015153 1.025453 1.004155
2017-01-09 1.024451 1.026090 1.000959
In [52]:
stock_return.plot(grid = True).axhline(y = 1, color = "black", lw = 2)
Out[52]:
<matplotlib.lines.Line2D at 0x21109cf06a0>
In [53]:
# Let's use NumPy's log function, though math's log function would work just as well
import numpy as np

stock_change = stocks.apply(lambda x: np.log(x) - np.log(x.shift(1))) # shift moves dates back by 1.
stock_change.head()
Out[53]:
AAPL GOOG MSFT
Date
2017-01-03 NaN NaN NaN
2017-01-04 -0.001120 0.000966 -0.004484
2017-01-05 0.005072 0.009007 0.000000
2017-01-06 0.011087 0.015161 0.008630
2017-01-09 0.009118 0.000620 -0.003188
In [54]:
stock_change.plot(grid = True).axhline(y = 0, color = "black", lw = 2)
Out[54]:
<matplotlib.lines.Line2D at 0x2110acf1908>
In [56]:
apple["20d"] = np.round(apple["Close"].rolling(window = 20, center = False).mean(), 2)
pandas_candlestick_ohlc(apple.loc['2017-01-04':'2017-08-07',:], otherseries = "20d")
In [57]:
start = datetime.datetime(2017,1,1)
apple = web.DataReader("AAPL", "yahoo", start, end)
apple["20d"] = np.round(apple["Close"].rolling(window = 20, center = False).mean(), 2)

pandas_candlestick_ohlc(apple.loc['2017-01-04':'2017-08-07',:], otherseries = "20d")
In [58]:
apple["50d"] = np.round(apple["Close"].rolling(window = 50, center = False).mean(), 2)
apple["200d"] = np.round(apple["Close"].rolling(window = 200, center = False).mean(), 2)

pandas_candlestick_ohlc(apple.loc['2017-01-04':'2017-08-07',:], otherseries = ["20d", "50d", "200d"])
In [ ]:
 

No comments:

Post a Comment