### Case Study : Stock market data from yahoo-finance for a few stocks

In [1]:
import yfinance as yf
import pandas as pd

In [2]:
# Function to fetch stock data from Yahoo Finance for multiple tickers
def fetch_multiple_stock_data(tickers, start_date, end_date):
    stock_data = pd.DataFrame()
    for ticker in tickers:
        data = yf.download(ticker, start=start_date, end=end_date)
        data['Ticker'] = ticker  # Add ticker as a column
        stock_data = pd.concat([stock_data, data])
    return stock_data

# Function to calculate returns for multiple stocks
def calculate_returns(stock_data):
    stock_data['Daily_Returns'] = stock_data.groupby('Ticker')['Adj Close'].pct_change()
    return stock_data

# Function to calculate risk measures for multiple stocks
def calculate_risk(stock_data):
    risk_measures = stock_data.groupby('Ticker')['Daily_Returns'].agg(['std', 'var'])
    return risk_measures


In [3]:
# Main function
def stocks(ticker_list,start_date,end_date):
    ''' Input : Pass a list of stocks, start date and end date
    Output: Stock data including the daily returns data frame
    and  the risk measures'''

    # Input parameters
    tickers = ticker_list  # Example list of stock tickers
    start_date = start_date
    end_date = end_date

    # Fetch stock data
    stock_data = fetch_multiple_stock_data(tickers, start_date, end_date)

   # Calculate returns
    stock_data = calculate_returns(stock_data)

   # Calculate risk measures
    risk_measures = calculate_risk(stock_data)

    return stock_data

    # Print results
    print("Stock data summary:")
    #print(stock_data.head())
    #print("\nRisk measures:")
    #print(risk_measures)


In [4]:
stocks_data = stocks(['HDFCBANK.NS','TCS','INFY'],start_date='2020-01-01', end_date='2024-08-20')
stocks_data.head()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Ticker,Daily_Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-01,1276.099976,1280.0,1270.599976,1278.599976,1227.428589,1836849,HDFCBANK.NS,
2020-01-02,1279.0,1288.0,1279.0,1286.75,1235.252441,3068583,HDFCBANK.NS,0.006374
2020-01-03,1282.199951,1285.0,1263.599976,1268.400024,1217.636841,5427775,HDFCBANK.NS,-0.014261
2020-01-06,1260.0,1261.800049,1236.0,1240.949951,1191.2854,5445093,HDFCBANK.NS,-0.021641
2020-01-07,1258.900024,1271.449951,1252.25,1260.599976,1210.148926,7362247,HDFCBANK.NS,0.015835


In [5]:
#### Split the data into individual stocks

hdfc = stocks_data[stocks_data['Ticker']=='HDFCBANK.NS'].reset_index()
tcs = stocks_data[stocks_data['Ticker']=='TCS'].reset_index()
infosys = stocks_data[stocks_data['Ticker']=='INFY'].reset_index()

In [6]:
hdfc.describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily_Returns
count,1146,1146.0,1146.0,1146.0,1146.0,1146.0,1146.0,1145.0
mean,2022-04-21 17:30:28.272251392,1441.664441,1455.479754,1426.97426,1441.33783,1402.470897,12919930.0,0.000403
min,2020-01-01 00:00:00,770.450012,810.0,738.75,767.700012,736.975586,548404.0,-0.126069
25%,2021-02-22 06:00:00,1380.0,1395.0,1363.062531,1377.324951,1331.357758,6437438.0,-0.007736
50%,2022-04-21 12:00:00,1491.125,1506.274963,1479.375,1491.525024,1447.983643,10288240.0,0.000621
75%,2023-06-15 18:00:00,1593.225037,1605.949951,1580.225037,1593.924957,1555.526337,16874260.0,0.00827
max,2024-08-19 00:00:00,1791.0,1794.0,1764.650024,1768.650024,1768.650024,86705600.0,0.115996
std,,201.932373,200.744924,204.178335,202.460527,206.0144,9424437.0,0.017563


In [7]:
# group be ticker and compute the mean of the returns

stocks_data.groupby('Ticker').agg({'Daily_Returns':['mean','std','var']})


Unnamed: 0_level_0,Daily_Returns,Daily_Returns,Daily_Returns
Unnamed: 0_level_1,mean,std,var
Ticker,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
HDFCBANK.NS,0.000403,0.017563,0.000308
INFY,0.00095,0.0195,0.00038
TCS,0.000326,0.05852,0.003425


# Probability basics, distributions-Binomial, Normal distributions

### Binomial Distribution

* Compute the monthly returns from HDFC stock data
* Flag as profit if  the returns is more than 5% (say) else loss
* Compute the number of months the stock has made profilt and loss

In [8]:

def compute_monthly_returns(prices_df):
  """
  This function takes a DataFrame containing adjusted daily closing prices
  with columns 'Date' and 'Adj Close' and computes monthly returns.

  Args:
      prices_df: DataFrame containing adjusted daily closing prices.

  Returns:
      A DataFrame with monthly returns.
  """

  # Ensure the 'Date' column is a datetime index
  prices_df['Date'] = pd.to_datetime(prices_df['Date'])
  prices_df.set_index('Date', inplace=True)
  prices_df = prices_df.drop(['Daily_Returns'],axis=1)
  # Resample data to monthly frequency, taking the end-of-month price
#   monthly_prices = prices_df['Adj Close'].resample('M').last()
  # monthly_prices =prices_df['Adj Close'].resample('M').agg(lambda x: (x + 1).prod() - 1)\
  # monthly_prices =prices_df['Adj Close'].asfreq('M').ffill()
  month_end_prices = prices_df.resample('BM').apply(lambda x: x[-1])
  month_end_prices['monthly_returns'] = month_end_prices['Adj Close'].pct_change()
  # Calculate monthly returns (percentage change)
  # monthly_returns = monthly_prices/monthly_prices.shift(1) -1

  return month_end_prices


In [9]:
hdfc_returns = compute_monthly_returns(hdfc).reset_index()
hdfc_returns.head()

  month_end_prices = prices_df.resample('BM').apply(lambda x: x[-1])


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,monthly_returns
0,2020-01-31,1231.449951,1237.800049,1220.25,1226.300049,1177.221802,5589134,HDFCBANK.NS,
1,2020-02-28,1175.5,1185.0,1170.099976,1177.650024,1130.518677,12156528,HDFCBANK.NS,-0.039672
2,2020-03-31,853.799988,873.599976,838.0,861.900024,827.405579,17605546,HDFCBANK.NS,-0.268119
3,2020-04-30,1001.400024,1019.0,992.099976,1001.799988,961.706543,21896567,HDFCBANK.NS,0.162316
4,2020-05-29,944.0,955.0,923.450012,951.650024,913.56366,26512583,HDFCBANK.NS,-0.05006


In [10]:
# from the stock data calculate the monthly returns
profit_threshold = 0.05
hdfc_returns['profit_flag'] = hdfc_returns['monthly_returns'].apply(lambda x: 'Profit' if x > profit_threshold else 'Loss')
print('Counts: \n', hdfc_returns['profit_flag'].value_counts())
print('\n%\n',hdfc_returns['profit_flag'].value_counts(normalize=True))



Counts: 
 profit_flag
Loss      41
Profit    15
Name: count, dtype: int64

%
 profit_flag
Loss      0.732143
Profit    0.267857
Name: proportion, dtype: float64



* what is the probability of making a profit in a given month
* what is the probability of making profit of >5% twice a year
* what is the probability of making profit of >5% "atleast" twice a year


**Activity1:**
* What is the probability of making profit of >5% [0,1,2,... 12] times a year
* Cumulative probability


**Activity 2:**


Consider TCS stock and answer the following questions
* what is the probability of making a profit in a given month
* what is the probability of making profit of >5% twice a year
* what is the probability of making profit of >5% "atleast" twice a year


# Normal Distribution

#### Application of Normal distribution using PDF and CDF functions
* what is the probability of making a profit of 6% in a given year?
* what is the probability of making profit of 5% twice a year?
* what is the probability of making profit of >5%  in a given month?


**Activity 3:**
* What is the probability of hdfc stock closing price less than 1360
* What is the probability of hdfc stock closing price between 1400 to 1500
* What is the Probability of hdfc stock monthly returns being greater than 5% ?