IMPORT IMPORTANT LIBRARIES AND DATASET:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import time
from IPython.display import clear_output
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
terrorism =
pd.read_csv('/kaggle/input/global-terrorism-data-base/globalterrorismd
b.csv', encoding='latin-1')
ANALYZE DATASET:
terrorism.info()
RangeIndex: 181691 entries, 0 to 181690
Columns: 135 entries, eventid to related
dtypes: float64(55), int64(22), object(58)
memory usage: 187.1+ MB
terrorism.head()
eventid
country \-
iyear
imonth
1970
7
2
NaN
0
NaN
1970
0
0
NaN
0
NaN
1970
1
0
NaN
0
NaN
1970
1
0
NaN
0
NaN
1970
1
0
NaN
0
NaN
country_txt
dbsource \
0 Dominican Republic
PGIS
1
Mexico
PGIS
2
Philippines
PGIS
region
iday approxdate
extended resolution
... addnotes scite1 scite2
scite3
2
...
NaN
NaN
NaN
NaN
1
...
NaN
NaN
NaN
NaN
5
...
NaN
NaN
NaN
NaN
3
PGIS
4
PGIS
0
1
2
3
4
INT_LOG
0
0
-9
-9
-9
Greece
8
...
NaN
NaN
NaN
NaN
Japan
4
...
NaN
NaN
NaN
NaN
INT_IDEO INT_MISC INT_ANY
0
0
0
1
1
1
-9
1
1
-9
1
1
-9
1
1
related
NaN
NaN
NaN
NaN
NaN
[5 rows x 135 columns]
RENAME IMPORTANT COLUMNS:
terrorism.rename(columns={'eventid':'Eventid', 'iyear':'Year',
'imonth':'Month', 'iday':'Day',
'extended':'Extended',
'resolution':'Resolution', 'attacktype1_txt':'Attacktype',
'country_txt':'Country',
'region_txt':'Region', 'provstate': 'Provstate',
'city':'City', 'crit1':'Crit1',
'crit2':'Crit2',
'crit3':'Crit3', 'multiple':'Multiple',
'success':'Success', 'suicide':'Suicide',
'targtype1_txt':'Targtype',
'natlty1_txt':'Natlty1', 'natlty2_txt':'Natlty2',
'natlty3_txt':'Natlty3', 'gname':'Gname',
'gname2':'Gname2', 'gname3':'Gname3',
'guncertain1':'Guncertain1',
'guncertain2':'Guncertain2', 'guncertain3':'Guncertain3',
'claimed':'Claimed',
'weaptype1_txt':'Weaptype', 'weapsubtype1_txt':'Weapsubtype',
'nkill':'Nkill', 'nkillus':'Nkillus',
'nkillter':'Nkillter', 'nwound':'Nwound',
'nwoundus':'Nwoundus',
'nwoundte':'Nwoundter', 'property':'Property',
'propextent_txt':'Propextent',
'propvalue':'Propvalue', 'ishostkid':'Ishostkid',
'nhostkid':'Nhostkid',
'nhostkidus':'Nhostkidus', 'ransom':'Ransom',
'hostkidoutcome':'Hostkidoutcome',
'nreleased':'Nreleased'},inplace=True)
CREATING A NEW DATAFRAME BY SEPARATING TOP 20 IMPORTANT COLUMNS FOR
ANALYSIS:
data = terrorism[['Eventid', 'Year','Country', 'Region', 'Provstate',
'City', 'Crit1', 'Crit2', 'Crit3',
'Success', 'Suicide', 'Attacktype', 'Targtype',
'Natlty1','Gname', 'Guncertain1',
'Claimed', 'Weaptype', 'Nkill','Nwound']]
data.head()
Eventid
Year
Country
Region
\
0
-
1970
Dominican Republic
Central America & Caribbean
1
-
1970
Mexico
North America
2
-
1970
Philippines
Southeast Asia
3
-
1970
Greece
Western Europe
4
-
1970
Japan
East Asia
-
Provstate
NaN
Federal
Tarlac
Attica
Fukouka
City
Santo Domingo
Mexico city
Unknown
Athens
Fukouka
Crit1
1
1
1
1
1
Attacktype
Assassination
Hostage Taking (Kidnapping)
Assassination
Bombing/Explosion
Facility/Infrastructure Attack
Crit2
1
1
1
1
1
Crit3
1
1
1
1
1
Success
1
1
1
1
1
Suicide
0
0
0
0
0
Targtype
Private Citizens & Property
Government (Diplomatic)
Journalists & Media
Government (Diplomatic)
Government (Diplomatic)
\
\
Natlty1
Gname
Guncertain1
\
0
Dominican Republic
MANO-D
0.0
1
Belgium
23rd of September Communist League
0.0
2
United States
Unknown
0.0
3
United States
Unknown
0.0
4
United States
Unknown
0.0
0
1
Claimed
NaN
NaN
Weaptype
Unknown
Unknown
Nkill
1.0
0.0
Nwound
0.0
0.0
2
3
4
NaN
NaN
NaN
Unknown
Explosives
Incendiary
1.0
NaN
NaN
0.0
NaN
NaN
data.tail(-
Eventid-
Year-
Provstate
Suicide \
181686 Middle Shebelle-
Lattakia-
Maguindanao-
Manipur-
Maguindanao-
Claimed
1.0
0.0
0.0
Region
Sub-Saharan Africa
Middle East & North Africa
Southeast Asia
South Asia
Southeast Asia
\
City
Crit1
Crit2
Crit3
Success
Ceelka Geelow
1
1
0
1
Jableh
1
1
0
1
Kubentog
1
1
1
1
Imphal
1
1
1
0
Cotabato City
1
1
1
0
Attacktype
Armed Assault
Bombing/Explosion
Facility/Infrastructure Attack
Bombing/Explosion
Bombing/Explosion
Natlty1
Guncertain1 \
181686
Somalia-
Russia- Philippines-
India- Philippines-
Country
Somalia
Syria
Philippines
India
Philippines
Targtype
Military
Military
Private Citizens & Property
Government (General)
Unknown
Gname
Al-Shabaab
Muslim extremists
Bangsamoro Islamic Freedom Movement (BIFM)
Weaptype
Firearms
Explosives
Incendiary
Unknown
Unknown
Nkill
1.0
2.0
0.0
Nwound
2.0
7.0
0.0
\
-
0.0
0.0
Explosives
Explosives
0.0
0.0
0.0
0.0
FIND COUNTRY WITH THE HIGHEST NUMBER OF TERRORISTS ATTACKS:
print("Country with the Highest No. of Terrorist
Attacks:",data['Country'].value_counts())
Country with the Highest No. of Terrorist Attacks: Country
Iraq
24636
Pakistan
14368
Afghanistan
12731
India
11960
Colombia
8306
...
International
1
Wallis and Futuna
1
South Vietnam
1
Andorra
1
Antigua and Barbuda
1
Name: count, Length: 205, dtype: int64
print("Country with the Highest No. of Terrorist
Attacks:",data['Country'].value_counts().index[0])
Country with the Highest No. of Terrorist Attacks: Iraq
FIND REGION WITH THE HIGHEST NUMBER OF TERRORIST ATTACKS:
print("Region with the Highest No. of Terrorist
Attacks:",data['Region'].value_counts().index[0])
Region with the Highest No. of Terrorist Attacks: Middle East & North
Africa
FIND MAXIMUM NUMBER OF PEOPLE KILLED BY A SINGLE TERRORISTS ATTACK:
print("Maximum No. of people killed by a single terrorist Attack
are:",data['Nkill'].max())
Maximum No. of people killed by a single terrorist Attack are: 1570.0
data['Damage']=data['Nkill']+data['Nwound']
data['Damage']
0
1
2
3
1.0
0.0
1.0
NaN
4
NaN
..-
Name: Damage, Length: 181691, dtype: float64
data['Country']=data['Country'].replace('South
Veitnam','Veitnam',regex=True)
data['Weaptype']=data['Weaptype'].replace('Vehicle.*','Vehicle',regex=
True)
data['Attacktype']=data['Attacktype'].replace('Hostage
Taking.*','Hostage Taking',regex=True)
FIND DOMINANT ATTACK TYPE:
atk_filtered=data['Attacktype'].apply(lambda x:x if x in
['Bombing/Explosion','Armed Assault','Assassination',
'Hostage
Taking','Facility/Infrastructure Attack'] else 'Others')
attack_type=atk_filtered.value_counts().tolist()
attack_type
[88255, 42669, 19312, 12149, 10356, 8950]
labels=['Bombing/Explosion','Armed Assault','Assassination','Hostage
Taking','Facility/Infrastructure Attack','Others']
sizes=[]
for i in attack_type:
percent=(i*100)/len(data['Attacktype'])
sizes.append(percent)
fig,ax=plt.subplots(figsize=(10,10))
patches,texts,autotexts=ax.pie(sizes,labels=labels,autopct='%1.1f%
%',startangle=-20,
shadow=True,explode=(0.05,0,0,0,0,0),
colors=sns.color_palette('Set2',8)[:5]+
[(0.58,0.64,0.65)],
textprops={'fontsize':15,'weight':'light','color':'k'})
ax.axis('equal')
plt.title('Terrorist attack types',fontsize=25,pad=-70,
color=sns.cubehelix_palette(8,start=.5,rot=-.75)[-3])
plt.tight_layout()
ax.legend(loc='lower
right',framealpha=0.5,bbox_to_anchor=(1.8,0.5,0.1,1),
prop={'size':14})
fig.show()
CHECK DAMAGE RATE w.r.t ATTACK TYPES:
cat=["Bombing/Explosion","Armed Assault","Assassination","Hostage
Taking","Facility/Infrastructure Attack"]
color_cat=sns.color_palette("Set2",8)[:5]
color_cat_dict=dict(zip(cat,color_cat))
color_cat_dict
{'Bombing/Explosion': (0.4,-,-),
'Armed Assault': -,-,-),
'Assassination': -,-,-),
'Hostage Taking': -,-,-),
'Facility/Infrastructure Attack': -,
-,-)}
table1=data[["Attacktype","Damage"]].groupby("Attacktype",as_index=Fal
se).sum().sort_values(by="Damage",ascending=False)
table1=table1.reset_index()
table-
index-
Attacktype
Bombing/Explosion
Armed Assault
Unknown
Assassination
Hostage Taking
Hijacking
Unarmed Assault
Facility/Infrastructure Attack
Damage-
inci=[8825,42669,7276,19312,12149,659,1015,10356]
table1["Incidents"]=np.array(inci)
table1["Damage rate"]=table1["Damage"]/table1["Incidents"]
table1
index
Attacktype
rate
0
2
Bombing/Explosion-
Armed Assault-
Unknown-
Assassination-
Hostage Taking-
Hijacking-
Unarmed Assault- Facility/Infrastructure Attack-
labels=table1["Attacktype"].tolist()
labels
['Bombing/Explosion',
'Armed Assault',
Damage
Incidents
-
8825
-
42669
39606.0
7276
37209.0
19312
27732.0
12149
20642.0
659
14791.0
1015
6293.0
10356
Damage
'Unknown',
'Assassination',
'Hostage Taking',
'Hijacking',
'Unarmed Assault',
'Facility/Infrastructure Attack']
x=np.arange(len(labels))
dmg=table1["Damage"].tolist()
gray=(0.5,0.64,0.65)
color_list=[color_cat_dict[ter_type] if
ter_type in color_cat_dict.keys() else gray for ter_type
in labels]
d_rate=table1["Damage rate"].tolist()
fig,ax1=plt.subplots(figsize=(15,8))
# Bar chart
ax1.bar(labels,dmg,color=color_list,align='center')
#Number in bar chart
for i,v in enumerate(dmg):
ax1.text(i-0.3,v-13000 if v==37209 else v+3000,
str(round(v)),color='w' if v==37209 else 'k',fontweight='bold')
#insert a second plot -line plot
ax2=ax1.twinx()
ax2.plot(labels,d_rate,linestyle='--',linewidth=4,marker='o',markerfac
ecolor='black',markersize='10',
label='Mean of Dead/Injured People',color='#C44D51')
plt.title('Terrorist Attack Types and Damage',fontsize=25,pad=20,
color=sns.cubehelix_palette(8,start=.5,rot=-.75)[-3])
ax1.set(xlabel='Types of Terrorist Attacks',ylabel='Number of
Dead/Injured People')
ax1.set_xticklabels(labels,rotation=45)
plt.yticks(fontsize=10)
ax2.legend(loc='upper center')
fig.show()
table2=data[["Weaptype","Damage"]].groupby("Weaptype",as_index=False).
sum().sort_values(by="Damage",ascending=False)
table-
Weaptype
Explosives
Firearms
Unknown
Vehicle
Melee
Chemical
Incendiary
Biological
Sabotage Equipment
Other
Radiological
Fake Weapons
Damage-
df_count=data["Weaptype"].value_counts()
df_count
Weaptype
Explosives
92426
Firearms
58524
Unknown
15157
Incendiary
11135
Melee
3655
Chemical
321
Sabotage Equipment
141
Vehicle
136
Other
114
Biological
35
Fake Weapons
33
Radiological
14
Name: count, dtype: int64
df_count=df_count.reindex(table2["Weaptype"].values)
df_count
Weaptype
Explosives
92426
Firearms
58524
Unknown
15157
Vehicle
136
Melee
3655
Chemical
321
Incendiary
11135
Biological
35
Sabotage Equipment
141
Other
114
Radiological
14
Fake Weapons
33
Name: count, dtype: int64
table2["Weapcount"]=df_count.values
table-
Weaptype
Explosives
Firearms
Unknown
Vehicle
Melee
Chemical
Incendiary
Biological
Sabotage Equipment
Other
Radiological
Fake Weapons
Damage-
Weapcount-
table2["Weap_Damage_rate"]=table2["Damage"]/table2["Weapcount"]
table-
Weaptype
Explosives
Firearms
Unknown
Vehicle
Melee
Chemical
Incendiary
Biological
Sabotage Equipment
Other
Radiological
Fake Weapons
Damage-
Weapcount-
Weap_Damage_rate-
CHECK DAMAGE RATE w.r.t WEAPON TYPES:
weaplabels=table2["Weaptype"].tolist()
weaplabels
['Explosives',
'Firearms',
'Unknown',
'Vehicle',
'Melee',
'Chemical',
'Incendiary',
'Biological',
'Sabotage Equipment',
'Other',
'Radiological',
'Fake Weapons']
y=np.arange(len(weaplabels))
y
array([ 0,
1,
2,
3,
4,
5,
6,
weapdmg=table2["Damage"].tolist()
weapdmg
[-,-,
50167.0,
20297.0,
14489.0,
14377.0,
8898.0,
7,
8,
9, 10, 11])
814.0,
357.0,
225.0,
6.0,
1.0]
wgray=(0.5,0.64,0.65)
w_cat=["Explosives","Firearms","Unknown,Incendiary"]
wcolorcat=sns.color_palette("Set2",8)[:4:1]
wcolorcat
[(0.4,-,-),
-,-,-),
-,-,-),
-,-,-)]
wcolor_cat_dict=dict(zip(w_cat,wcolorcat))
wcolor_cat_dict
{'Explosives': (0.4,-,-),
'Firearms': -,-,-),
'Unknown,Incendiary': -,-,-)}
wcolor_list=[wcolor_cat_dict[ter_type]if
ter_type in wcolor_cat_dict.keys() else wgray for ter_type
in weaplabels]
wcolor_list
[(0.4,-,-),
-,-,-),
(0.5, 0.64, 0.65),
(0.5, 0.64, 0.65),
(0.5, 0.64, 0.65),
(0.5, 0.64, 0.65),
(0.5, 0.64, 0.65),
(0.5, 0.64, 0.65),
(0.5, 0.64, 0.65),
(0.5, 0.64, 0.65),
(0.5, 0.64, 0.65),
(0.5, 0.64, 0.65)]
w_rate=table2["Weap_Damage_rate"].tolist()
w_rate
[-,-,-,-,-,-,-,-,-,-,-,-]
PLOTTING:
fig,ax1=plt.subplots(figsize=(15,8))
# Bar chart
ax1.bar(weaplabels,weapdmg,color=wcolor_list,align='center')
#Number in bar chart
for i,v in enumerate(weapdmg):
ax1.text(i-0.3,v-13000 if v==37209 else v+3000,
str(round(v)),color='w' if v==37209 else 'k',fontweight='bold')
#insert a second plot -line plot
ax2=ax1.twinx()
ax2.plot(weaplabels,w_rate,linestyle='--',linewidth=4,marker='o',marke
rfacecolor='black',markersize='10',
label='Mean of Dead/Injured People',color='#C44D51')
plt.title('Weapon Types and Damage',fontsize=25,pad=20,
color=sns.cubehelix_palette(8,start=.5,rot=-.75)[-3])
ax1.set(xlabel='Type of Weapons',ylabel='Number of Dead/Injured
People')
ax1.set_xticklabels(weaplabels,rotation=45)
plt.yticks(fontsize=10)
ax2.legend(loc='upper center')
fig.show()
TOP 10 TARGETED COUNTRIES FROM 2000 TO 2011:
country_damage= data[data["Year"]>1999]
[["Country","Damage"]].groupby("Country",as_index=False).sum()
country_damage
0
1
2
3
4
.-
Country
Afghanistan
Albania
Algeria
Angola
Argentina
...
Western Sahara
Yemen
Yugoslavia
Zambia
Zimbabwe
Damage-
..-
[167 rows x 2 columns]
data_paint=country_damage.sort_values(by="Damage",ascending=False).hea
d(10)
data_paint
-
Country
Iraq
Afghanistan
Pakistan
India
United States
Nigeria
Syria
Yemen
Somalia
Philippines
Damage-
PLOTTING:
fig,ax=plt.subplots(figsize=(12,6))
ax.barh(data_paint['Country'][::-1],data_paint['Damage'][::1],color='red')
plt.xticks(rotation=-45)
ax.set_ylabel('Countries',size=16)
ax.set_xlabel('Number of Dead/Injured People',size=16)
plt.title('Top 10 Attacked Countries From 2000 To
2011',fontsize=20,pad=10,color=sns.cubehelix_palette(8,start=.5,rot=-.
75)[-3])
plt.show()
TOP 10 TARGETED NATIONALITIES FROM 2000 TO 2011:
nalty_dmg=data[data["Year"]>1999]
[["Natlty1","Damage"]].groupby("Natlty1",as_index=False).sum()
nalty_dmg
0
1
2
3
4
.-
Natlty1
Afghanistan
Albania
Algeria
Angola
Argentina
...
West Bank and Gaza Strip
Yemen
Yugoslavia
Zambia
Zimbabwe
Damage-
..-
[186 rows x 2 columns]
data["Natlty1"]
0
1
2
3
4
Dominican Republic
Belgium
United States
United States
United States
...
181686
Somalia
181687
Russia
181688
Philippines
181689
India
181690
Philippines
Name: Natlty1, Length: 181691, dtype: object
data_paint_natly=nalty_dmg.sort_values(by='Damage',ascending=False).he
ad(10)
data_paint_natly=nalty_dmg.sort_values(by="Damage",ascending=False).he
ad(10)
data_paint_natly-
Natlty1
Iraq
Afghanistan
Pakistan
India
United States
Nigeria
Syria
Damage-
-
Yemen
Somalia
Philippines
-
PLOTTING:
fig,ax=plt.subplots(figsize=(12,6))
ax.barh(data_paint_natly['Natlty1'][::-1],data_paint['Damage'][::1],color='blue')
plt.xticks(rotation=-45)
ax.set_ylabel('Nationality',size=16)
ax.set_xlabel('Number of Dead/Injured People',size=16)
plt.title('Top 10 Targeted Nationalities From 2000 To
2011',fontsize=20,pad=10,color=sns.cubehelix_palette(8,start=.5,rot=-.
75)[-3])
plt.show()
THE SAFEST COUNTRIES FROM 2000 TO 2017 WHICH HAVE "0" DEAD/INJURED PEOPLE
CAUSED BY TERRORIST ATTACKS:
country_damage[country_damage["Damage"]==0]-
Country
Bahamas
Belize
Cyprus
Iceland
New Zealand
Damage-
-
Portugal
Slovenia
Swaziland
Togo
Vietnam
-
country_damage
0
1
2
3
4
.-
Country
Afghanistan
Albania
Algeria
Angola
Argentina
...
Western Sahara
Yemen
Yugoslavia
Zambia
Zimbabwe
Damage-
..-
[167 rows x 2 columns]
len(country_damage[country_damage["Damage"]==0])
10
TIME-SERIES TOP 10 ATTACKED COUNTRIES AND TOP 10 TARGETED NATIONALITIES:
def country_nationality(n):
''' Function to draw the bar charts of top 10
Attacked Countries and Top 10 Targeted Nationalities'''
#Filter data and choose top 10 Attacked Counties
top10_bad=data[data['Year']==n]
[['Country','Damage']].groupby('Country',as_index=False).sum()
data_paint=top10_bad.sort_values(by='Damage',ascending=False).head(10)
fig,ax=plt.subplots(1,2,figsize=(16,4))
ax[0].barh(data_paint['Country'][::-1],data_paint['Damage'][::-1],
color=['red','green','#344650','#df2029','#FFFC00',
'#E4405F','#3B5999','#00c300','#ff0084'])
ax[0].set_ylabel('Countries',size=16)
ax[0].set_xlabel('Number of Dead/Injured People',size=16)
ax[0].set_title('Top 10 Attacked Countries %d'
%n,fontsize=15,pad=10,color=sns.cubehelix_palette(8,start=.5,rot=-.75)
[-3])
plt.subplots_adjust(wspace=0.3)
#Choose column Country and Damage from data
nalty_dmg=data[data['Year']>1999]
[['Natlty1','Damage']].groupby('Natlty1',as_index=False).sum()
data_paint_natly=nalty_dmg.sort_values(by='Damage',ascending=False).he
ad(10)
ax[1].barh(data_paint_natly['Natlty1'][::-1],data_paint['Damage']
[::-1],color='blue')
ax[1].set_ylabel('Nationality',size=16)
ax[1].set_xlabel('Number of Dead/Injured People',size=16)
ax[1].set_title('Top 10 Targeted Nationalities %d'
%n,fontsize=20,pad=10,color=sns.cubehelix_palette(8,start=.5,rot=-.75)
[-3])
plt.show()
for ax in fig.axes:
plt.sca(ax)
plt.show()
list_year=[]
for year in range(1970,1993): list_year.append(year)
for year in range(1994,2012): list_year.append(year)
for year in list_year:
country_nationality(year)
time.sleep(2)
clear_output()
THE TOP 10 ATTACKED COUNTRIES AND TOP 10 TARGETED NATIONALITIES WITH THE
SPECIFIC YEAR INPUT:
def country_nationality_year():
''' Function to paint chart with specific input year'''
# Input year. Try if in list year
try:
year=int(input('Input the year you want to see chart! -): '))
if year==1993:
print('Sorry, No Terrorism Data in 1993')
year=int(input('Input the year you want to see chart!
-): '))
while year not in list_year:
print('Please input the year from 1970 to 2011')
year=int(input('Input the year you want to see chart!
-): '))
except (ValueError,ZeroDivisionError) as ex:
print('Wrong Type, Input Again: ')
country_nationality_year()
else:
country_nationality(year)
country_nationality_year()