Pavel Banerjee | Freelancer Time Series Analysis Of Stock Market

Time Series Analysis of Stock Market

TimeSeries_Analysis June 26, 2025 1 DATA ANALYTICS PROJECT 2 TIME SERIES ANALYSIS STOCK MARKET AND FORECASTING FOR Explore various time series models to understand historical patterns, identify trends and seasonality, and make short-term or long-term predictions. 3 Collect and preprocess historical stock market data. [ ]: # General-purpose libraries import pandas as pd # Data manipulation import numpy as np # Numerical operations # Data visualization import matplotlib.pyplot as plt # Plotting import seaborn as sns # Advanced visualizations # Machine Learning and Deep Learning from sklearn.preprocessing import MinMaxScaler, StandardScaler ↪normalization/scaling import tensorflow as tf # Deep learning framework # System utilities and warnings import warnings warnings.filterwarnings('ignore') # Ignore warnings for a clean output [ ]: # Importing Data data = pd.read_csv("/content/stock_data.csv") data [ ]: 0 1 2 3 Date- Open- High- # Data␣ Low- Close- Volume- Name AABA AABA AABA AABA 4 …- - … … 12/22/2017 12/26/2017 12/27/2017 12/28/2017 12/29/2017 43.10 …- - … …- 43.42 … …- - AABA - AABA AABA AABA AABA AABA [3019 rows x 7 columns] [ ]: data.info() #dataframe info RangeIndex: 3019 entries, 0 to 3018 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----0 Date 3019 non-null object 1 Open 3019 non-null float64 2 High 3019 non-null float64 3 Low 3019 non-null float64 4 Close 3019 non-null float64 5 Volume 3019 non-null int64 6 Name 3019 non-null object dtypes: float64(4), int64(1), object(2) memory usage: 165.2+ KB [ ]: data.describe() [ ]: count mean std min 25% 50% 75% max #statisics of data Open- High- Low- Close- [ ]: data.head() [ ]: 0 1 2 3 4 Date- Open- High- Low- Close- 2 Volume- Name AABA AABA AABA AABA AABA Volume-e-e-e-e-e-e-e-e+08 [ ]: data.tail() [ ]:- Date 12/22/2017 12/26/2017 12/27/2017 12/28/2017 12/29/2017 Open- [ ]: data.duplicated().sum() High- Low- Close- Volume- Name AABA AABA AABA AABA AABA #sum of duplicated value [ ]: np.int64(0) [ ]: data.isna().sum() #sum of null values [ ]: Date 0 Open 0 High 0 Low 0 Close 0 Volume 0 Name 0 dtype: int64 4 Outlier Detection [ ]: import seaborn as sns data['Date']=pd.to_datetime(data['Date'],format='mixed') data.set_index('Date',inplace=True) [ ]: # boxplot df=data sns.boxplot(df['Close']) [ ]: 3 [ ]: # scatterplot plt.figure(figsize=(12, 6)) plt.scatter(df.index, df['Close'],alpha=0.6, s=10,color='blue') plt.title("Stock Close Price Over Time") plt.xlabel("Date") plt.ylabel("Close Price") plt.grid(True) plt.tight_layout() plt.show() 4 5 Analyse time series concepts Trend: The long-term movement or direction in the data over time. Seasonality:Displays recurring patterns or cycles within each year (monthly). Residual (Noise):What’s left after removing trend and seasonality. [ ]: from statsmodels.tsa.seasonal import seasonal_decompose df=data df # Resample to monthly average closing price monthly_close = df['Close'].resample('M').mean() # Apply seasonal decomposition (additive model) decomposition = seasonal_decompose(monthly_close, model='additive') # Plot the decomposition results fig = decomposition.plot() fig.set_size_inches(14, 10) plt.tight_layout() plt.show() 5 6 Forecasting Prophet Forecasting [ ]: #Facebook prophet forecasting from prophet import Prophet import pandas as pd # Data manipulation import matplotlib.pyplot as plt # Plotting #Fetch the data data=pd.read_csv('/content/stock_data.csv') data['Date']=pd.to_datetime(data['Date'],format='mixed') # data.sort_values(by='Date',inplace=True) # set Date as index column data.set_index('Date',inplace=True) 6 # reset index and rename columns df=data.reset_index()[['Date','Close']].rename(columns={'Date':'ds','Close': ↪'y'}) # model creation model=Prophet() model.fit(df) # future prediction future=model.make_future_dataframe(periods=365,include_history=False) # Prediction prediction=model.predict(future) # plot prediction model.plot(prediction) plt.title('Stock Price Prediction') plt.xlabel('Date') plt.ylabel('Stock Price') plt.legend() plt.show() INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmphqfy2_bm/mdvxw5cs.json DEBUG:cmdstanpy:input tempfile: /tmp/tmphqfy2_bm/2vi4b0qr.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/distpackages/prophet/stan_model/prophet_model.bin', 'random', 'seed=94203', 'data', 'file=/tmp/tmphqfy2_bm/mdvxw5cs.json', 'init=/tmp/tmphqfy2_bm/2vi4b0qr.json', 'output', 'file=/tmp/tmphqfy2_bm/prophet_modelpj3qalnl/prophet_model-.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000'] 10:00:36 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 10:00:37 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing 7 [ ]: prediction [ ]: 0 1 2 3 4 .- 0 1 2 3 4 .- ds- …- trend- …- additive_terms- - - - … - - - yhat_lower- …- yhat_upper- …- additive_terms_lower- - - - … - - - trend_lower trend_upper- … …- additive_terms_upper- - - - … - - - weekly- - - - … - - - \ \ 363 364 - - 0 1 2 3 4 .- weekly_lower- - - - … - - - - weekly_upper- - - - … - - - - 0 1 2 3 4 .- multiplicative_terms- …- 0 1 2 3 4 .- multiplicative_terms_upper- …- yearly- …- - - yearly_lower- …- multiplicative_terms_lower- …- yhat- …- [365 rows x 19 columns] ARIMA Forecating 9 \ yearly_upper- …- \ [ ]: import pandas as pd import matplotlib.pyplot as plt from statsmodels.tsa.arima.model import ARIMA # Load and prepare data data = pd.read_csv('/content/stock_data.csv') data['Date'] = pd.to_datetime(data['Date'],format='mixed') data.set_index('Date', inplace=True) series = data['Close'] # Fit ARIMA model model = ARIMA(series, order=(5, 1, 0)) model_fit = model.fit() # Forecast next 365 days forecast = model_fit.forecast(steps=365) # Plot original and forecast plt.figure(figsize=(10, 5)) plt.plot(series, label='Historical') plt.plot(pd.date_range(series.index[-1], periods=365, freq='D'),forecast,␣ ↪label='Forecast', color='orange') plt.title('ARIMA Forecast') plt.xlabel('Date') plt.ylabel('Stock Price') plt.legend() plt.show() /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`. return get_prediction_index( 10 [ ]: forecast [ ]:- - …- Name: predicted_mean, Length: 365, dtype: float64 SARIMA Forecasting [ ]: import pandas as pd import matplotlib.pyplot as plt from statsmodels.tsa.statespace.sarimax import SARIMAX # Load and prepare time series data = pd.read_csv('/content/stock_data.csv') data['Date'] = pd.to_datetime(data['Date'],format='mixed') data.set_index('Date', inplace=True) # Select the target column 11 series = data['Close'] # Fit SARIMA model model = SARIMAX(series, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)) model_fit = model.fit(disp=False) # Forecast next 365 steps forecast = model_fit.forecast(steps=365) # Plot plt.figure(figsize=(10, 5)) plt.plot(series, label='Historical') plt.plot(pd.date_range(series.index[-1], periods=365, freq='D'), forecast,␣ ↪label='Forecast', color='orange') plt.title('SARIMA Forecast') plt.xlabel('Date') plt.ylabel('Stock Price') plt.legend() plt.show() /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`. return get_prediction_index( 12 Predict future stock prices using LSTM [ ]: import numpy as np # Numerical operations import pandas as pd # Data manipulation import matplotlib.pyplot as plt import yfinance as yf from sklearn.preprocessing import MinMaxScaler # Data normalization/scaling from tensorflow.keras.models import Sequential # Deep learning framework from tensorflow.keras.layers import LSTM, Dense Load Stock Data [ ]: # Load and prepare data data = pd.read_csv('/content/stock_data.csv') data['Date'] = pd.to_datetime(data['Date'],format='mixed') data.set_index('Date', inplace=True) df = data[['Close']] Normalize and Prepare Data [ ]: # Convert to numpy array data_values = df['Close'].values.reshape(-1, 1) # Normalize data scaler = MinMaxScaler() scaled_data = scaler.fit_transform(data_values) # Create sequences of 60 time steps 13 X, y = [], [] seq_len = 60 for i in range(seq_len, len(scaled_data)): X.append(scaled_data[i - seq_len:i]) y.append(scaled_data[i]) X, y = np.array(X), np.array(y) # Reshape for LSTM [samples, time steps, features] X = X.reshape((X.shape[0], X.shape[1], 1)) [ ]: scaled_data [ ]: array([[- ], [- ], [-], …, [-], [-], [-]]) Build LSTM Model [ ]: model = Sequential() model.add(LSTM(50, return_sequences=True, input_shape=(seq_len, 1))) model.add(LSTM(50)) model.add(Dense(1)) model.summary() /usr/local/lib/python3.11/dist-packages/keras/src/layers/rnn/rnn.py:200: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs) Model: "sequential" �� Layer (type) � Output �� lstm (LSTM) � (None, �� lstm_1 (LSTM) � (None, �� dense (Dense) � (None, �� 14 Shape � Param # � 60, 50) � 10,400 � 50) � 20,200 � 1) � 51 � Total params: 30,651 (119.73 KB) Trainable params: 30,651 (119.73 KB) Non-trainable params: 0 (0.00 B) Compile the model [ ]: model.compile(optimizer='adam', loss='mean_squared_error') Train the Model [ ]: history=model.fit(X, y, epochs=10, batch_size=32) Epoch 1/10 93/93 �� 0.0220 Epoch 2/10 93/93 �� 4.8720e-04 Epoch 3/10 93/93 �� 3.9353e-04 Epoch 4/10 93/93 �� 4.1979e-04 Epoch 5/10 93/93 �� 3.6736e-04 Epoch 6/10 93/93 �� 3.4576e-04 Epoch 7/10 93/93 �� 3.4476e-04 Epoch 8/10 93/93 �� loss: 3.1366e-04 Epoch 9/10 93/93 �� 3.6111e-04 Epoch 10/10 93/93 �� 2.7535e-04 4s 9ms/step - loss: 1s 7ms/step - loss: 1s 7ms/step - loss: 1s 7ms/step - loss: 1s 7ms/step - loss: 1s 7ms/step - loss: 1s 9ms/step - loss: 1s 10ms/step - 1s 9ms/step - loss: 1s 7ms/step - loss: 15 [ ]: history.history [ ]: {'loss': [-,-,-,-,-,-,-,-,-,-]} Forecast the Next 365 Days [ ]: # Start from last available sequence forecast_input = scaled_data[-seq_len:] forecast = [] for _ in range(365): input_reshaped = forecast_input.reshape(1, seq_len, 1) pred = model.predict(input_reshaped,verbose=0) forecast.append(pred[0][0]) # Update sequence for next prediction forecast_input = np.append(forecast_input, pred)[-seq_len:] Predict and Inverse Scale [ ]: # Inverse transform forecast forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)) # Create future dates last_date = df.index[-1] future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1),␣ ↪periods=365) # Plot original + forecast plt.figure(figsize=(14, 6)) plt.plot(df.index, df['Close'], label='Historical') plt.plot(future_dates, forecast, label='Forecast (Next 365 Days)', color='red') plt.title('AAPL Stock Price Forecast using LSTM') plt.xlabel('Date') plt.ylabel('Close Price') plt.legend() plt.grid() plt.show() 16 7 Evaluate and compare model accuracy [ ]: data=pd.read_csv('/content/stock_data.csv') data['Date']=pd.to_datetime(data['Date'],format='mixed') data.set_index('Date',inplace=True) df=data[['Close']] [ ]: split_size=int(len(df)*0.8) train_set=df[:split_size] test_set=df[split_size:] test_set [ ]: Close Date- …- - …- [604 rows x 1 columns] 17 [ ]: from statsmodels.tsa.arima.model import ARIMA from sklearn.metrics import␣ ↪mean_squared_error,mean_absolute_error,mean_absolute_percentage_error,r2_score import numpy as np # Import numpy for np.sqrt # Prophet from prophet import Prophet # Reload and prepare data specifically for Prophet evaluation data = pd.read_csv('/content/stock_data.csv') data['Date'] = pd.to_datetime(data['Date'], format='mixed') df = data[['Date', 'Close']].rename(columns={'Date':'ds','Close':'y'}) # Split data split_size=int(len(df)*0.8) train_set_prophet=df[:split_size] test_set_prophet=df[split_size:] # model creation and fitting model=Prophet() model.fit(train_set_prophet) # future prediction dates generation forecast_dates = test_set_prophet['ds'] forecast = pd.DataFrame({'ds': forecast_dates}) # Prediction forecast=model.predict(forecast) # predicted_range predicted_prophet = forecast.set_index('ds')['yhat'] # Actual_range actual_prophet = test_set_prophet.set_index('ds')['y'] # Reindex predicted_prophet to match the actual_prophet index predicted_prophet = predicted_prophet.reindex(actual_prophet.index) actual_prophet,predicted_prophet # Evaluation of prophet mse = mean_squared_error(actual_prophet, predicted_prophet) mae = mean_absolute_error(actual_prophet, predicted_prophet) rmse = np.sqrt(mse) r2 = r2_score(actual_prophet, predicted_prophet) 18 print(f'MAE: {mae:.2f}') print(f'MSE: {mse:.2f}') print(f'RMSE: {rmse:.2f}') print(f'R² Score: {r2:.2f}') INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpqx9k3zjv/fyru9mko.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpqx9k3zjv/haqtljpr.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/distpackages/prophet/stan_model/prophet_model.bin', 'random', 'seed=88443', 'data', 'file=/tmp/tmpqx9k3zjv/fyru9mko.json', 'init=/tmp/tmpqx9k3zjv/haqtljpr.json', 'output', 'file=/tmp/tmpqx9k3zjv/prophet_modelldj2pic3/prophet_model-.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000'] 09:39:02 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 09:39:03 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing MAE: 12.15 MSE: 174.10 RMSE: 13.19 R² Score: -0.14 [ ]: # Prophet Tuning from statsmodels.tsa.arima.model import ARIMA from sklearn.metrics import␣ ↪mean_squared_error,mean_absolute_error,mean_absolute_percentage_error,r2_score import numpy as np # Import numpy for np.sqrt # Prophet from prophet import Prophet # Reload and prepare data specifically for Prophet evaluation data = pd.read_csv('/content/stock_data.csv') data['Date'] = pd.to_datetime(data['Date'], format='mixed') df = data[['Date', 'Close']].rename(columns={'Date':'ds','Close':'y'}) # Split data split_size=int(len(df)*0.8) train_set_prophet=df[:split_size] test_set_prophet=df[split_size:] model = Prophet( yearly_seasonality=True, weekly_seasonality=True, 19 daily_seasonality=False, changepoint_prior_scale=0.05 # try tuning this ) model.fit(train_set_prophet) # future prediction dates generation forecast_dates = test_set_prophet['ds'] forecast = pd.DataFrame({'ds': forecast_dates}) # Prediction forecast=model.predict(forecast) # predicted_range predicted_prophet = forecast.set_index('ds')['yhat'] # Actual_range actual_prophet = test_set_prophet.set_index('ds')['y'] # Reindex predicted_prophet to match the actual_prophet index predicted_prophet = predicted_prophet.reindex(actual_prophet.index) actual_prophet,predicted_prophet # Evaluation of prophet mse = mean_squared_error(actual_prophet, predicted_prophet) mae = mean_absolute_error(actual_prophet, predicted_prophet) rmse = np.sqrt(mse) r2 = r2_score(actual_prophet, predicted_prophet) print(f'MAE: {mae:.2f}') print(f'MSE: {mse:.2f}') print(f'RMSE: {rmse:.2f}') print(f'R² Score: {r2:.2f}') DEBUG:cmdstanpy:input tempfile: /tmp/tmphqfy2_bm/1ty0o3rp.json DEBUG:cmdstanpy:input tempfile: /tmp/tmphqfy2_bm/zy1ky9ja.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/distpackages/prophet/stan_model/prophet_model.bin', 'random', 'seed=70753', 'data', 'file=/tmp/tmphqfy2_bm/1ty0o3rp.json', 'init=/tmp/tmphqfy2_bm/zy1ky9ja.json', 'output', 'file=/tmp/tmphqfy2_bm/prophet_model0___3bq0/prophet_model-.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000'] 10:13:45 - cmdstanpy - INFO - Chain [1] start processing 20 INFO:cmdstanpy:Chain [1] start processing 10:13:47 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing MAE: 12.15 MSE: 174.10 RMSE: 13.19 R² Score: -0.14 ARIMA [ ]: from statsmodels.tsa.arima.model import ARIMA from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score import numpy as np # Use closing price and split data = pd.read_csv('/content/stock_data.csv') data['Date'] = pd.to_datetime(data['Date'],format='mixed') data.set_index('Date', inplace=True) series = data['Close'] train_size = int(len(series) * 0.8) train, test = series[:train_size], series[train_size:] # Build and fit ARIMA model model_arima = ARIMA(train, order=(5, 1, 0)) model_arima_fit = model_arima.fit() # Forecast forecast_arima = model_arima_fit.forecast(steps=len(test)) # Evaluate mae_arima = mean_absolute_error(test, forecast_arima) rmse_arima = np.sqrt(mean_squared_error(test, forecast_arima)) r2_arima = r2_score(test, forecast_arima) print(f'ARIMA - MAE: {mae_arima:.2f}, RMSE: {rmse_arima:.2f}, R²: {r2_arima:. ↪2f}') /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency 21 information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) ARIMA - MAE: 10.36, RMSE: 14.71, R²: -0.42 /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`. return get_prediction_index( /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception. return get_prediction_index( SARIMA [ ]: import pandas as pd import numpy as np from statsmodels.tsa.statespace.sarimax import SARIMAX from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score import matplotlib.pyplot as plt # Load and prepare data data = pd.read_csv('/content/stock_data.csv') data['Date'] = pd.to_datetime(data['Date'],format='mixed') data.set_index('Date', inplace=True) series = data['Close'] train_size = int(len(series) * 0.8) train, test = series[:train_size], series[train_size:] # Model creation model=SARIMAX(train,order=(1,1,1),seasonal_order=(1,1,1,12)) sarima_result=model.fit() # Forecast for length of test data forecast=sarima_result.forecast(steps=len(test)) # Set forecast index = test index forecast.index = test.index # Evaluation metrics mae = mean_absolute_error(test, forecast) rmse = np.sqrt(mean_squared_error(test, forecast)) r2 = r2_score(test, forecast) print(f"SARIMA - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R² Score: {r2:.2f}") /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency 22 information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) SARIMA - MAE: 9.21, RMSE: 13.25, R² Score: -0.15 /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`. return get_prediction_index( /usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception. return get_prediction_index( LSTM [ ]: import yfinance as yf import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score import matplotlib.pyplot as plt from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Dense [ ]: data=pd.read_csv("/content/stock_data.csv") data=data.set_index('Date') df=data[['Close']] df [ ]: Close Date- … 12/22/2017 12/26/2017 12/27/2017 12/28/2017 12/29/2017 - …- 23 [3019 rows x 1 columns] [ ]: # Normalize the data scaler = MinMaxScaler() scaled_data = scaler.fit_transform(df) # Create sequences def create_sequences(data, sequence_length=60): X, y = [], [] for i in range(sequence_length, len(data)): X.append(data[i-sequence_length:i, 0]) y.append(data[i, 0]) return np.array(X), np.array(y) X, y = create_sequences(scaled_data) X = X.reshape((X.shape[0], X.shape[1], 1)) [ ]: split = int(len(X) * 0.8) X_train, X_test = X[:split], X[split:] y_train, y_test = y[:split], y[split:] [ ]: # Build and Train LSTM Model model = Sequential() model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1))) model.add(LSTM(50)) model.add(Dense(1)) [ ]: # compile the model model.compile(optimizer='adam', loss='mean_squared_error') model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test,␣ ↪y_test), verbose=1) Epoch 37/37 loss: Epoch 37/37 loss: Epoch 37/37 loss: Epoch 37/37 loss: Epoch 37/37 loss: Epoch 1/10 �� 5s 39ms/step 0.0199 - val_loss: 0.0027 2/10 �� 0s 10ms/step 6.1472e-04 - val_loss: 0.0011 3/10 �� 1s 10ms/step 4.3429e-04 - val_loss: 8.1909e-04 4/10 �� 0s 10ms/step 4.6716e-04 - val_loss: 5.1708e-04 5/10 �� 1s 10ms/step 4.1046e-04 - val_loss: 7.7883e-04 6/10 24 37/37 loss: Epoch 37/37 loss: Epoch 37/37 loss: Epoch 37/37 loss: Epoch 37/37 loss: �� 0s 10ms/step 3.9663e-04 - val_loss: 0.0013 7/10 �� 1s 10ms/step 4.5193e-04 - val_loss: 6.8477e-04 8/10 �� 1s 10ms/step 3.9807e-04 - val_loss: 8.4000e-04 9/10 �� 0s 10ms/step 3.3438e-04 - val_loss: 6.6127e-04 10/10 �� 1s 10ms/step 3.2513e-04 - val_loss: 5.8121e-04 [ ]: [ ]: # Evaluation # Predict y_pred = model.predict(X_test) y_pred_inv = scaler.inverse_transform(y_pred.reshape(-1, 1)) y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1)) # Metrics mae = mean_absolute_error(y_test_inv, y_pred_inv) rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv)) r2 = r2_score(y_test_inv, y_pred_inv) print(f"MAE: {mae:.2f}") print(f"RMSE: {rmse:.2f}") print(f"R² Score: {r2:.2f}") 19/19 �� 0s 14ms/step MAE: 1.15 RMSE: 1.54 R² Score: 0.98 25