Demnd orecasting model
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import plotly.express as px
import statsmodels.api as sm
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_absolute_error,
mean_absolute_percentage_error,\
mean_squared_error
import math
from sklearn.metrics import r2_score
from statsmodels.tsa.stattools import acf, pacf
from scipy.fftpack import fft
C:\Users\Lenovo\anaconda3\lib\site-packages\pandas\core\computation\
expressions.py:20: UserWarning: Pandas requires version '2.7.3' or
newer of 'numexpr' (version '2.7.1' currently installed).
from pandas.core.computation.check import NUMEXPR_INSTALLED
df = pd.read_excel("E:\Practice_datasets\Lenovo\Data for
Forecast.xlsx")
print(df.isna().sum())
df.head()
Unnamed: 0
Date (week beginning)
Selected Period
dtype: int-
0
0
0
Unnamed: 0 Date (week beginning)
1
Dec 26, 2011
2
Jan 2, 2012
3
Jan 9, 2012
4
Jan 16, 2012
5
Jan 23, 2012
Selected Period-
df = df.rename(columns={'Date (week beginning)':'Date'})
df.head()
0
1
2
3
4
Unnamed: 0
1
2
3
4
5
Dec 26,
Jan 2,
Jan 9,
Jan 16,
Jan 23,
Date-
Selected Period-
#df['Date'] = pd.to_datetime(df['Date'],errors='coerce')
print(df['Date'].min())
print(df['Date'].max())
Apr 1, 2013
Sep 9, 2013
df.info()
RangeIndex: 119 entries, 0 to 118
Data columns (total 3 columns):
#
Column
Non-Null Count
--- ------------------0
Unnamed: 0
119 non-null
1
Date
119 non-null
2
Selected Period 119 non-null
dtypes: int64(2), object(1)
memory usage: 2.9+ KB
Dtype
----int64
object
int64
fig = px.histogram(df, x='Selected Period', nbins=30,
histnorm='probability density')
# Customize the layout (optional)
fig.update_layout(
title='Distribution Plot of "column_name"',
xaxis_title='Value',
yaxis_title='Probability Density'
)
# Show the plot
fig.show()
{"config":{"plotlyServerURL":"https://plot.ly"},"data":
[{"alignmentgroup":"True","bingroup":"x","histnorm":"probability
density","hovertemplate":"Selected
Period=%{x}count=%{y}","legendgroup":"","marker":
{"color":"#636efa"},"name":"","nbinsx":30,"offsetgroup":"","orientatio
n":"v","showlegend":false,"type":"histogram","x":
[-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,
-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
,-,-],"xaxis":"x","yaxis":"y"}],"layout":
{"barmode":"relative","legend":{"tracegroupgap":0},"margin":
{"t":60},"template":{"data":{"bar":[{"error_x":
{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":
{"color":"#E5ECF6","width":0.5}},"type":"bar"}],"barpolar":[{"marker":
{"line":{"color":"#E5ECF6","width":0.5}},"type":"barpolar"}],"carpet":
[{"aaxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"ch
oropleth":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":
{"outlinewidth":0,"ticks":""},"colorscale":[[0,"#0d0887"],
[-,"#46039f"],[-,"#7201a8"],
[-,"#9c179e"],[-,"#bd3786"],
[-,"#d8576b"],[-,"#ed7953"],
[-,"#fb9f3a"],[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"histogram"}],"histogram2d":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":
{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":
[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":
[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar"
:[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl
":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterterna
ry":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":
{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":
{"color":"#C8D4E3"},"line":
{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":
{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers
":"strict","coloraxis":{"colorbar":
{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":
[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],
[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],
[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]],"sequentialminus":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]]},"colorway":
["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692"
,"#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":
{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlake
s":true,"showland":true,"subunitcolor":"white"},"hoverlabel":
{"align":"left"},"hovermode":"closest","mapbox":
{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","po
lar":{"angularaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","radialaxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":
{"xaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"yaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"zaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","caxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"title":
{"x":5.0e-2},"xaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"title":
{"text":"Distribution Plot of \"column_name\""},"xaxis":
{"anchor":"y","domain":[0,1],"title":{"text":"Value"}},"yaxis":
{"anchor":"x","domain":[0,1],"title":{"text":"Probability Density"}}}}
fig = go.Figure(data=[go.Box(y= df['Selected Period'])])
# Add a title to the plot (optional)
fig.update_layout(title='Box Plot shwoing distribution of visits')
# Show the plot
fig.show()
{"config":{"plotlyServerURL":"https://plot.ly"},"data":
[{"type":"box","y":
[-,-,-,-,-,-,-,-
0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
,-,-]}],"layout":{"template":{"data":{"bar":[{"error_x":
{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":
{"color":"#E5ECF6","width":0.5}},"type":"bar"}],"barpolar":[{"marker":
{"line":{"color":"#E5ECF6","width":0.5}},"type":"barpolar"}],"carpet":
[{"aaxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"ch
oropleth":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":
{"outlinewidth":0,"ticks":""},"colorscale":[[0,"#0d0887"],
[-,"#46039f"],[-,"#7201a8"],
[-,"#9c179e"],[-,"#bd3786"],
[-,"#d8576b"],[-,"#ed7953"],
[-,"#fb9f3a"],[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"histogram"}],"histogram2d":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":
{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":
[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":
[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar"
:[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl
":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterterna
ry":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":
{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":
{"color":"#C8D4E3"},"line":
{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":
{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers
":"strict","coloraxis":{"colorbar":
{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":
[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],
[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],
[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]],"sequentialminus":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]]},"colorway":
["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692"
,"#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":
{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlake
s":true,"showland":true,"subunitcolor":"white"},"hoverlabel":
{"align":"left"},"hovermode":"closest","mapbox":
{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","po
lar":{"angularaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","radialaxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":
{"xaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"yaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"zaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","caxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"title":
{"x":5.0e-2},"xaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"title":
{"text":"Box Plot shwoing distribution of visits"}}}
df['Selected
Period'].describe(percentiles=[0.25,0.5,0.75,0.9,.96,.99])
count
mean
std
-e-e-e+06
min-e+06
25%-e+07
50%-e+07
75%-e+07
90%-e+07
96%-e+07
99%-e+07
max-e+07
Name: Selected Period, dtype: float64
Clearrly there are outliers needed to be removed
Q1 = df['Selected Period'].quantile(0.25)
Q3 = df['Selected Period'].quantile(0.75)
IQR = Q3 - Q1
print(IQR-
df.shape
(119, 3)
def remove_outliers_iqr(df, column_name, multiplier=1.5):
Q1 = df[column_name].quantile(0.25)
Q3 = df[column_name].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - multiplier * IQR
upper_bound = Q3 + multiplier * IQR
df_filtered = df[(df[column_name] >= lower_bound) &
(df[column_name] <= upper_bound)]
return df_filtered
# Remove outliers from the 'Value' column
df = remove_outliers_iqr(df,'Selected Period', 1.5 )
df.head()
1
2
3
4
5
Unnamed: 0
2
3
4
5
6
Jan 2,
Jan 9,
Jan 16,
Jan 23,
Jan 30,
Date-
Selected Period-
fig = px.line( df, x='Date', y='Selected Period', title='Genral Trend
as per the Timeline',
width=1000, height=600)
fig.show()
{"config":{"plotlyServerURL":"https://plot.ly"},"data":
[{"hovertemplate":"Date=%{x}Selected
Period=%{y}","legendgroup":"","line":
{"color":"#636efa","dash":"solid"},"mode":"lines","name":"","orientati
on":"v","showlegend":false,"type":"scatter","x":["Jan 2, 2012","Jan 9,
2012","Jan 16, 2012","Jan 23, 2012","Jan 30, 2012","Feb 6, 2012","Feb
13, 2012","Feb 20, 2012","Feb 27, 2012","Mar 5, 2012","Mar 12,
2012","Mar 19, 2012","Mar 26, 2012","Apr 2, 2012","Apr 9, 2012","Apr
23, 2012","Apr 30, 2012","May 7, 2012","May 14, 2012","May 21,
2012","May 28, 2012","Jun 4, 2012","Jun 11, 2012","Jun 18, 2012","Jun
25, 2012","Jul 2, 2012","Jul 9, 2012","Jul 16, 2012","Jul 23,
2012","Jul 30, 2012","Aug 6, 2012","Aug 13, 2012","Aug 20, 2012","Sep
3, 2012","Sep 10, 2012","Sep 17, 2012","Sep 24, 2012","Oct 1,
2012","Oct 8, 2012","Oct 15, 2012","Oct 22, 2012","Oct 29, 2012","Nov
12, 2012","Nov 19, 2012","Nov 26, 2012","Dec 3, 2012","Dec 10,
2012","Dec 17, 2012","Dec 24, 2012","Dec 31, 2012","Jan 7, 2013","Jan
14, 2013","Jan 21, 2013","Jan 28, 2013","Feb 4, 2013","Feb 11,
2013","Feb 18, 2013","Feb 25, 2013","Mar 4, 2013","Mar 11, 2013","Mar
18, 2013","Mar 25, 2013","Apr 1, 2013","Apr 8, 2013","Apr 15,
2013","Apr 22, 2013","Apr 29, 2013","May 6, 2013","May 13, 2013","May
20, 2013","May 27, 2013","Jun 3, 2013","Jun 10, 2013","Jun 17,
2013","Jun 24, 2013","Jul 1, 2013","Jul 8, 2013","Jul 15, 2013","Jul
22, 2013","Jul 29, 2013","Aug 5, 2013","Aug 12, 2013","Aug 19,
2013","Aug 26, 2013","Sep 2, 2013","Sep 9, 2013","Sep 16, 2013","Sep
23, 2013","Sep 30, 2013","Oct 7, 2013","Oct 14, 2013","Oct 21,
2013","Oct 28, 2013","Nov 4, 2013","Nov 18, 2013","Nov 25, 2013","Dec
2, 2013","Dec 9, 2013","Dec 16, 2013","Dec 23, 2013","Dec 30,
2013","Jan 6, 2014","Jan 13, 2014","Jan 20, 2014","Jan 27, 2014","Feb
3, 2014","Feb 10, 2014","Feb 17, 2014","Feb 24, 2014","Mar 3,
2014","Mar 10, 2014","Mar 17, 2014","Mar 24, 2014","Mar 31,
2014"],"xaxis":"x","y":
[-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-],"yaxis":"y"}],"layout"
:{"height":600,"legend":{"tracegroupgap":0},"template":{"data":{"bar":
[{"error_x":{"color":"#2a3f5f"},"error_y":
{"color":"#2a3f5f"},"marker":{"line":
{"color":"#E5ECF6","width":0.5}},"type":"bar"}],"barpolar":[{"marker":
{"line":{"color":"#E5ECF6","width":0.5}},"type":"barpolar"}],"carpet":
[{"aaxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"ch
oropleth":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":
{"outlinewidth":0,"ticks":""},"colorscale":[[0,"#0d0887"],
[-,"#46039f"],[-,"#7201a8"],
[-,"#9c179e"],[-,"#bd3786"],
[-,"#d8576b"],[-,"#ed7953"],
[-,"#fb9f3a"],[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"histogram"}],"histogram2d":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":
{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":
[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":
[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar"
:[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl
":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterterna
ry":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":
{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":
{"color":"#C8D4E3"},"line":
{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":
{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers
":"strict","coloraxis":{"colorbar":
{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":
[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],
[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],
[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]],"sequentialminus":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]]},"colorway":
["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692"
,"#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":
{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlake
s":true,"showland":true,"subunitcolor":"white"},"hoverlabel":
{"align":"left"},"hovermode":"closest","mapbox":
{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","po
lar":{"angularaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","radialaxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":
{"xaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"yaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"zaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","caxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"title":
{"x":5.0e-2},"xaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"title":
{"text":"Genral Trend as per the Timeline"},"width":1000,"xaxis":
{"anchor":"y","domain":[0,1],"title":{"text":"Date"}},"yaxis":
{"anchor":"x","domain":[0,1],"title":{"text":"Selected Period"}}}}
#df['Date'] = pd.to_datetime(df['Date'],errors='coerce')
Feature Engineering
df['Date'] = pd.to_datetime(df['Date'])
df.dtypes
Unnamed: 0
Date
Selected Period
dtype: object
df_1 =
int64
datetime64[ns]
int64
df.copy()
# Set date index
df_1.set_index('Date', inplace=True)
df_1.sort_index(inplace=True)
df_1.head()
Date-
Unnamed: 0
Selected Period
2
3
4
5
6
-
result_add = seasonal_decompose(x=df_1['Selected Period'],
model='additive', extrapolate_trend='freq', period=2)
plt.rcParams.update({'figure.figsize': (12,8)})
result_add.plot().suptitle('Additive Decompose', fontsize=22)
plt.show()
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# Plot the ACF
plot_acf(df_1['Selected Period'], lags=40) # Adjust the 'lags'
parameter as needed
plt.title('Autocorrelation Function (ACF) Plot')
plt.show()
# Plot the ACF
plot_pacf(df_1['Selected Period'], lags=40) # Adjust the 'lags'
parameter as needed
plt.title('Partial Autocorrelation Function (ACF) Plot')
plt.show()
# Calculate and plot ACF and PACF
lag_acf = acf(df_1['Selected Period'], nlags=40)
lag_pacf = pacf(df_1['Selected Period'], nlags=40)
plt.figure(figsize=(12, 6))
# ACF plot
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96/np.sqrt(len(df_1)), linestyle='--', color='gray')
plt.axhline(y=1.96/np.sqrt(len(df_1)), linestyle='--', color='gray')
plt.title('Autocorrelation Function (ACF)')
# PACF plot
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96/np.sqrt(len(df_1)), linestyle='--', color='gray')
plt.axhline(y=1.96/np.sqrt(len(df_1)), linestyle='--', color='gray')
plt.title('Partial Autocorrelation Function (PACF)')
plt.tight_layout()
plt.show()
# from statsmodels.tsa.statespace.sarimax import SARIMAX
# model = SARIMAX(df_1['Selected Period'], order=(p, d, q),
seasonal_order=(P, D, Q, s))
# results = model.fit()
# results
# Perform FFT on the time series data
fft_result = fft(df_1['Selected Period'])
# Calculate the magnitude
magnitude = np.abs(fft_result)
# Create a frequency vector
n = len(df)
freq = np.fft.fftfreq(n, 1)
plt.figure(figsize=(12, 6))
plt.stem(freq, magnitude)
plt.xlabel('Frequency')
plt.ylabel('Magnitude')
plt.title('FFT Analysis')
plt.grid(True)
plt.show()
from statsmodels.tsa.stattools import adfuller, kpss
# ADF Test for stationarity
adf_test = adfuller(df_1['Selected Period'])
print(f'ADF Test p-value: {adf_test[1]}')
# KPSS Test for stationarity
kpss_test = kpss(df_1['Selected Period'])
print(f'KPSS Test p-value: {kpss_test[1]}')
ADF Test p-value:-
KPSS Test p-value: 0.1
:8: InterpolationWarning:
The test statistic is outside of the range of p-values available in
the
look-up table. The actual p-value is greater than the p-value
returned.
df["day_of_week"] = df["Date"].dt.dayofweek
df["day_of_year"] = df["Date"].dt.dayofyear
df['month_year'] = df['Date'].dt.to_period('M')
df["quarter"] = df["Date"].dt.quarter
df["year"] = df["Date"].dt.year
# Get the day of the week as Monday, Tuesday, etc.
df['DayOfWeek'] = df['Date'].dt.strftime('%A')
df['week_day'] =df['Date'].dt.strftime('%A')
#df['month_year'] = df['Date'].dt.to_period('M')
df.head(-
Unnamed: 0
2
3
4
5
6
month_year-
Date-
quarter
1
1
1
1
1
Selected Period-
day_of_week
0
0
0
0
0
day_of_year-
year DayOfWeek week_day
2012
Monday
Monday
2012
Monday
Monday
2012
Monday
Monday
2012
Monday
Monday
2012
Monday
Monday
df['Date'].dtype
dtype('Rolling_Average=%{y}
extra>","legendgroup":"","line":
{"color":"#636efa","dash":"solid"},"mode":"lines","name":"","orientati
on":"v","showlegend":false,"type":"scatter","x":["-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00"],"xaxis":"x","y":
[null,null,null,-e7,-e7,-e7,-,-e7,-e7,-,-e7,-e7,-,1
-e7,-e7,-e7,-e7,-,-
15e7,-e7,-e7,-e7,-e7,-e
7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-,-,-e7,-e7,-,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-,-e
7,-e7,-e7,-e7,-e7,-e7,-,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-,-e7,-e7,-e7,1
-e7,-,-,-e7,-e7,-e
7,-e7,-e7,-e7,-e7,-,-e7,-e7,-e7,-,-e7,-e7,-,-e7,-,-e7,-e7,-e7,-e7,-e7,-,-,-,-e7,-,-e7,-e7,-e7,-e7,-e7,-e7,-],"yaxis":"y"}],"layout":
{"height":600,"legend":{"tracegroupgap":0},"template":{"data":{"bar":
[{"error_x":{"color":"#2a3f5f"},"error_y":
{"color":"#2a3f5f"},"marker":{"line":
{"color":"#E5ECF6","width":0.5}},"type":"bar"}],"barpolar":[{"marker":
{"line":{"color":"#E5ECF6","width":0.5}},"type":"barpolar"}],"carpet":
[{"aaxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"ch
oropleth":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":
{"outlinewidth":0,"ticks":""},"colorscale":[[0,"#0d0887"],
[-,"#46039f"],[-,"#7201a8"],
[-,"#9c179e"],[-,"#bd3786"],
[-,"#d8576b"],[-,"#ed7953"],
[-,"#fb9f3a"],[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"histogram"}],"histogram2d":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":
{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":
[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":
[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar"
:[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl
":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterterna
ry":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":
{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":
{"color":"#C8D4E3"},"line":
{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":
{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers
":"strict","coloraxis":{"colorbar":
{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":
[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],
[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],
[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]],"sequentialminus":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]]},"colorway":
["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692"
,"#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":
{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlake
s":true,"showland":true,"subunitcolor":"white"},"hoverlabel":
{"align":"left"},"hovermode":"closest","mapbox":
{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","po
lar":{"angularaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","radialaxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":
{"xaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"yaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"zaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","caxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"title":
{"x":5.0e-2},"xaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"title":
{"text":"Genral Trend of Rolling Averagee"},"width":1500,"xaxis":
{"anchor":"y","domain":[0,1],"title":{"text":"Date"}},"yaxis":
{"anchor":"x","domain":[0,1],"title":{"text":"Rolling_Average"}}}}
from sklearn.ensemble import RandomForestRegressor
df_1.head()
month_year
Date
Unnamed: 0
\
Selected Period
day_of_week
day_of_year
2
-
0
2
3
-
0
9
4
-
0
16
5
-
0
23
6
-
0
30
-
quarter
Date-
1
1
1
1
1
year DayOfWeek week_day-
Monday
Monday
Monday
Monday
Monday
Monday
Monday
Monday
Monday
Monday
df_1['month_year'] = df_1['month_year'].astype('int64')
training_mask = df_1.index < "-"
training_data = df_1.loc[training_mask]
print(training_data.shape)
testing_mask = df_1.index >= "-"
testing_data = df_1.loc[testing_mask]
print(testing_data.shape)
testing_dates = testing_data.index
(90, 9)
(24, 9)
training_dates = training_data.index
X_train = training_data[["day_of_year","month_year","quarter","year"]]
y_train = training_data["Selected Period"]
X_test = testing_data[["day_of_year","month_year","quarter","year"]]
y_test = testing_data["Selected Period"]
y_train.head()
Date-
-
-
Name: Selected Period, dtype: int64
from sklearn.ensemble import RandomForestRegressor
X_train.dtypes
day_of_year
month_year
quarter
year
dtype: object
int32
int64
int32
int32
# create regressor object
rf_regressor = RandomForestRegressor(n_estimators = 100, random_state
= 0)
# fit the regressor with x and y data
rf_regressor.fit(X_train, y_train)
RandomForestRegressor(random_state=0)
from xgboost import XGBRegressor
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
cv_split = TimeSeriesSplit(n_splits=4, test_size=10)
model = XGBRegressor()
parameters = {
"max_depth": [3, 4, 6, 5, 10],
"learning_rate": [0.01, 0.05, 0.1, 0.2, 0.3],
"n_estimators": [100, 300, 500, 700, 900, 1000],
"colsample_bytree": [0.3, 0.5, 0.7]
}
grid_search = GridSearchCV(estimator=model, cv=cv_split,
param_grid=parameters)
grid_search.fit(X_train, y_train)
GridSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None,
n_splits=4, test_size=10),
estimator=XGBRegressor(base_score=None, booster=None,
callbacks=None,
colsample_bylevel=None,
colsample_bynode=None,
colsample_bytree=None,
early_stopping_rounds=None,
enable_categorical=False,
eval_metric=None,
feature_types=None, gamma=None,
gpu_id=None,
grow_policy=None, impor...
max_cat_to_onehot=None,
max_delta_step=None,
max_depth=None, max_leaves=None,
min_child_weight=None,
missing=nan,
monotone_constraints=None,
n_estimators=100,
n_jobs=None,
num_parallel_tree=None,
predictor=None, random_state=None,
...),
param_grid={'colsample_bytree': [0.3, 0.5, 0.7],
'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],
'max_depth': [3, 4, 6, 5, 10],
'n_estimators': [100, 300, 500, 700, 900,
1000]})
def evaluate_model(y_test, prediction):
print(f"MAE: {mean_absolute_error(y_test, prediction)}")
print(f"MSE: {mean_squared_error(y_test, prediction)}")
print(f"MAPE: {mean_absolute_percentage_error(y_test,
prediction)}")
def plot_predictions(testing_dates, y_test, prediction):
df_test = pd.DataFrame({"date": testing_dates, "actual": y_test,
"prediction": prediction })
figure, ax = plt.subplots(figsize=(10, 5))
df_test.plot(ax=ax, label="Actual", x="date", y="actual")
df_test.plot(ax=ax, label="Prediction", x="date", y="prediction")
plt.legend(["Actual", "Prediction"])
plt.show()
# Evaluating GridSearch results
prediction = grid_search.predict(X_test)
plot_predictions(testing_dates, y_test, prediction)
evaluate_model(y_test, prediction)
MAE:-
MSE:-
MAPE:-
# Evaluating GridSearch results
prediction = rf_regressor.predict(X_test)
plot_predictions(testing_dates, y_test, prediction)
evaluate_model(y_test, prediction)
MAE:-
MSE:-
MAPE:-
y_train = np.array(y_train)
# Calculate R2
r2 = r2_score(y_test, prediction)
print("R2 Score:", r2)
R2 Score: -
window_size = 4
rolling_average = df['Selected
Period'].rolling(window=window_size).mean()
df['Rolling_Average'] = rolling_average
df
Date
Selected Period
day_of_week
day_of_year
\
1
Unnamed: 0
-
-
0
2
2
-
-
0
9
3
-
-
0
16
4
-
-
0
23
5
-
-
0
30
..
...
...
...
...
...
114
-
-
0
62
115
-
-
0
69
116
-
-
0
76
117
-
-
0
83
118
-
-
0
90
1
2
3
4
5
..
month_year-
...
quarter
1
1
1
1
1
...
year DayOfWeek week_day
2012
Monday
Monday
2012
Monday
Monday
2012
Monday
Monday
2012
Monday
Monday
2012
Monday
Monday
...
...
...
Rolling_Average
NaN
NaN
NaN-
...
-
-
1
1
1
1
1
-
Monday
Monday
Monday
Monday
Monday
Monday
Monday
Monday
Monday
Monday
-
[114 rows x 11 columns]
fig = px.line( df, x='Date', y='Rolling_Average', title='Genral Trend
of Rolling Averagee',
width=1000, height=500)
fig.show()
{"config":{"plotlyServerURL":"https://plot.ly"},"data":
[{"hovertemplate":"Date=%{x}Rolling_Average=%{y}
extra>","legendgroup":"","line":
{"color":"#636efa","dash":"solid"},"mode":"lines","name":"","orientati
on":"v","showlegend":false,"type":"scatter","x":["-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","2013-12-
02T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00","-T00:00:00"],"xaxis":"x","y":
[null,null,null,-e7,-e7,-e7,-,-e7,-e7,-,-e7,-e7,-,1
-e7,-e7,-e7,-e7,-,-e7,-e7,-e7,-e7,-e7,-e
7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-,-,-e7,-e7,-,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-,-e
7,-e7,-e7,-e7,-e7,-e7,-,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-e7,-,-e7,-e7,-e7,1
-e7,-,-,-e7,-e7,-e
7,-e7,-e7,-e7,-e7,-,-e7,-e7,-e7,-,-e7,-e7,-,-e7,-,-e7,-e7,-e7,-e7,-e7,-,-,-,-e7,-,-e7,-e7,-e7,-e7,-e7,-e7,-],"yaxis":"y"}],"layout":
{"height":500,"legend":{"tracegroupgap":0},"template":{"data":{"bar":
[{"error_x":{"color":"#2a3f5f"},"error_y":
{"color":"#2a3f5f"},"marker":{"line":
{"color":"#E5ECF6","width":0.5}},"type":"bar"}],"barpolar":[{"marker":
{"line":{"color":"#E5ECF6","width":0.5}},"type":"barpolar"}],"carpet":
[{"aaxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":
{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","min
orgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"ch
oropleth":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":
{"outlinewidth":0,"ticks":""},"colorscale":[[0,"#0d0887"],
[-,"#46039f"],[-,"#7201a8"],
[-,"#9c179e"],[-,"#bd3786"],
[-,"#d8576b"],[-,"#ed7953"],
[-,"#fb9f3a"],[-,"#fdca26"],
[1,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"histogram"}],"histogram2d":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":
{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":
{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":
[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":
[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":
{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":
[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar"
:[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl
":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterterna
ry":[{"marker":{"colorbar":
{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":
[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],
[1,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":
{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":
{"color":"#C8D4E3"},"line":
{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":
{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers
":"strict","coloraxis":{"colorbar":
{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":
[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],
[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],
[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]],"sequentialminus":
[[0,"#0d0887"],[-,"#46039f"],
[-,"#7201a8"],[-,"#9c179e"],
[-,"#bd3786"],[-,"#d8576b"],
[-,"#ed7953"],[-,"#fb9f3a"],
[-,"#fdca26"],[1,"#f0f921"]]},"colorway":
["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692"
,"#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":
{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlake
s":true,"showland":true,"subunitcolor":"white"},"hoverlabel":
{"align":"left"},"hovermode":"closest","mapbox":
{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","po
lar":{"angularaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","radialaxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":
{"xaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"yaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
,"zaxis":
{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"lineco
lor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}
},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":
{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF
6","caxis":
{"gridcolor":"white","linecolor":"white","ticks":""}},"title":
{"x":5.0e-2},"xaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":
{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"",
"title":
{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"title":
{"text":"Genral Trend of Rolling Averagee"},"width":1000,"xaxis":
{"anchor":"y","domain":[0,1],"title":{"text":"Date"}},"yaxis":
{"anchor":"x","domain":[0,1],"title":{"text":"Rolling_Average"}}}}
# Training the XGBoost model
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
xg_param_grid = {
'learning_rate': [0.01, 0.1, 0.2],
'max_depth': [3, 5, 7],
'subsample': [0.8, 0.9, 1.0]
}
grid_search = GridSearchCV(XGBRegressor(), xg_param_grid, cv=3)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
y
xgb_model = XGBRegressor(**best_params)
xgb_model.fit(X_train, y_train)
XGBRegressor(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None,
feature_types=None,
gamma=None, gpu_id=None, grow_policy=None,
importance_type=None,
interaction_constraints=None, learning_rate=0.2,
max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=5, max_leaves=None,
min_child_weight=None, missing=nan,
monotone_constraints=None,
n_estimators=100, n_jobs=None, num_parallel_tree=None,
predictor=None, random_state=None, ...)
predictions_xg = xgb_model.predict(X_test)
mae_xg = mean_absolute_error(y_test, predictions_xg)
rmse_xg = np.sqrt(mean_squared_error(y_test, predictions_xg))
print('Mean Absolute error:',mae_xg,'\n')
print('Root Mean Squared error:', rmse_xg)
Mean Absolute error:-
Root Mean Squared error:-
plot_predictions(testing_dates, y_test, predictions_xg)
evaluate_model(y_test, predictions_xg)
MAE:-
MSE:-
MAPE:-
# Calculate R2
r2 = r2_score(y_test, predictions_xg)
print("R2 Score:", r2)
R2 Score: -
MAPE came down greatly