Just a notebook containing analysis of the covid-19 pandemic. The data used currently is updated on 23 April, 2020.
Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE. Github Repo
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import math
from datetime import timedelta
import random
import plotly.express as px
import folium as flm
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import plotly.graph_objs as go
#colors
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801'
#offline plotly
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)
#import all the prepared csvs
total_data = pd.read_csv('Data/Cleaned_Final_Global_Data.csv')
day_wise = pd.read_csv('Data/Day_Wise_Data.csv')
country_wise = pd.read_csv('Data/Country_Wise_Data.csv')
full_grouped = pd.read_csv('Data/Full_Grouped_Data.csv')
temp = total_data.groupby('Date')['Confirmed','Recovered','Deaths','Active'].sum().reset_index()
temp = temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)
tm=temp.melt(id_vars='Date', value_vars = ['Active','Deaths','Recovered'])
figure = px.treemap(tm,path=["variable"],values="value",height=300,width=1000,color_discrete_sequence=[act,dth,rec])
figure.data[0].textinfo = 'label+text+value'
figure.show()
temp = total_data.groupby('Date')['Recovered','Deaths','Active'].sum().reset_index()
temp = temp.melt(id_vars="Date", value_vars=['Recovered', 'Deaths', 'Active'],var_name='Case', value_name='Count')
# temp
figure = px.area(temp,x='Date',y='Count',color='Case')
figure.update_layout(xaxis_rangeslider_visible=True)
# figure.update_layout(yaxis_rangeslider_visible=True)
figure.show()
temp = total_data[total_data['Date']==max(total_data['Date'])]
# temp
global_map = flm.Map(location=[0,0],tiles='OpenStreetMap',min_zoom=2,max_zoom=10,zoom_start=0)
# global_map
for i in range (0,len(temp)):
flm.Circle(
location=[temp.iloc[i]['Lat'],temp.iloc[i]['Long']],
color='red',
fill='red',
tooltip = '<li><bold>Country : '+str(temp.iloc[i]['Country/Region'])+
'<li><bold>Province : '+str(temp.iloc[i]['Province/State'])+
'<li><bold>Confirmed : '+str(temp.iloc[i]['Confirmed'])+
'<li><bold>Deaths : '+str(temp.iloc[i]['Deaths']),
radius = int(temp.iloc[i]['Confirmed'])
).add_to(global_map)
global_map
## TRY DOING A CHLOROPLETH MAP IF DEEPER INFO FOUND
temp = full_grouped
temp['Date'] = pd.to_datetime(temp['Date'])
figure = px.choropleth(temp,locations="Country/Region",locationmode='country names', color = np.log(full_grouped["Confirmed"]),
hover_name="Country/Region", animation_frame=full_grouped["Date"].dt.strftime('%Y-%m-%d'),
title="Cases globally over time", color_continuous_scale=px.colors.sequential.Hot_r,projection='natural earth')
figure.update_layout(height=600,width=900)
figure.show()
# full_grouped.info()
fig_c = px.bar(day_wise, x="Date", y="Confirmed", color_discrete_sequence = [act])
fig_d = px.bar(day_wise, x="Date", y="Deaths", color_discrete_sequence = [dth])
fig_r = px.bar(day_wise, x="Date", y="Recovered", color_discrete_sequence =[rec])
fig = make_subplots(rows=1, cols=3, shared_xaxes=False, horizontal_spacing=0.1,
subplot_titles=('Confirmed cases', 'Deaths reported','Recovered'))
fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)
fig.add_trace(fig_r['data'][0], row=1, col=3)
fig.update_layout(height=480)
fig.update_layout(width=1000)
fig.show()
# day_wise.columns
fig_1 = px.line(day_wise, x="Date", y="Deaths / 100 Cases", color_discrete_sequence = [dth])
fig_2 = px.line(day_wise, x="Date", y="Deaths / 100 Recovered", color_discrete_sequence = ['#333333'])
fig = make_subplots(rows=1, cols=2, shared_xaxes=False,
subplot_titles=('Deaths / 100 Cases','Deaths / 100 Recovered'))
fig.add_trace(fig_1['data'][0], row=1, col=1)
fig.add_trace(fig_2['data'][0], row=1, col=2)
fig.update_layout(height=480)
fig.show()
# day_wise.columns
fig_c = px.bar(day_wise, x="Date", y="New Confirmed", color_discrete_sequence = [act])
fig_d = px.bar(day_wise, x="Date", y="No. of countries", color_discrete_sequence = [dth])
fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.1,
subplot_titles=('No. of new cases everyday', 'No. of countries'))
fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)
fig.update_layout(height=480)
fig.show()
# country_wise.head()
fig_a = px.bar(country_wise.sort_values('Active').tail(25),x="Active",y="Country/Region",text='Active',orientation='h',color_discrete_sequence=[act])
fig_a.update_layout(title="Active Cases")
fig_a.show()
fig_c = px.bar(country_wise.sort_values('Confirmed').tail(25), x="Confirmed", y="Country/Region",
text='Confirmed', orientation='h', color_discrete_sequence = [cnf])
fig_d = px.bar(country_wise.sort_values('Deaths').tail(25), x="Deaths", y="Country/Region",
text='Deaths', orientation='h', color_discrete_sequence = [dth])
fig_r = px.bar(country_wise.sort_values('Recovered').tail(25), x="Recovered", y="Country/Region",
text='Recovered', orientation='h', color_discrete_sequence = [rec])
figure = make_subplots(rows=1,cols=3,shared_xaxes=False,horizontal_spacing=0.2,subplot_titles=('Cases Confirmed','Death Reports','Recovered'))
figure.add_trace(fig_c['data'][0],row=1,col=1)
figure.add_trace(fig_d['data'][0],row=1,col=2)
figure.add_trace(fig_r['data'][0],row=1,col=3)
figure.update_layout(height=600)
figure.show()
fig_dc = px.bar(country_wise.sort_values('Deaths / 100 Cases').tail(25), x="Deaths / 100 Cases", y="Country/Region",
text='Deaths / 100 Cases', orientation='h', color_discrete_sequence = ['#f38181'])
fig_rc = px.bar(country_wise.sort_values('Recovered / 100 Cases').tail(25), x="Recovered / 100 Cases", y="Country/Region",
text='Recovered / 100 Cases', orientation='h', color_discrete_sequence = ['#a3de83'])
figure = make_subplots(rows=1,cols=2,shared_xaxes=False,horizontal_spacing=0.2,subplot_titles=('Deaths per 100 Cases','Recovered per 100 Cases'))
figure.update_layout(height=600)
figure.add_trace(fig_dc['data'][0],row=1,col=1)
figure.add_trace(fig_rc['data'][0],row=1,col=2)
figure.show()
country_wise.columns
fig_nc = px.bar(country_wise.sort_values('New Confirmed').tail(25), x="New Confirmed", y="Country/Region",
text='New Confirmed', orientation='h', color_discrete_sequence = ['#c61951'])
temp = country_wise[country_wise['Population']>1000000]
fig_p = px.bar(temp.sort_values('Cases / Million People').tail(25), x="Cases / Million People", y="Country/Region",
text='Cases / Million People', orientation='h', color_discrete_sequence = ['#741938'])
figure = make_subplots(rows=1,cols=2,shared_xaxes=False, horizontal_spacing=0.2, subplot_titles=('New Cases Today','Cases per Million (Pop> 1 mil.)') )
figure.add_trace(fig_nc['data'][0],row=1,col=1)
figure.add_trace(fig_p['data'][0],row=1,col=2)
figure.update_layout(height=600)
figure.show()
fig_wc = px.bar(country_wise.sort_values('1 week change').tail(25), x="1 week change", y="Country/Region",
text='1 week change', orientation='h', color_discrete_sequence = ['#004a7c'])
temp = country_wise[country_wise['Confirmed']>100]
fig_pi = px.bar(temp.sort_values('1 week % increase').tail(25), x="1 week % increase", y="Country/Region",
text='1 week % increase', orientation='h', color_discrete_sequence = ['#005691'],
hover_data=['Confirmed last week', 'Confirmed'])
figure = make_subplots(rows=1,cols=2,shared_xaxes=False, horizontal_spacing=0.2, subplot_titles=('1 Week Change','1 Week % Increase') )
figure.add_trace(fig_wc['data'][0],row=1,col=1)
figure.add_trace(fig_pi['data'][0],row=1,col=2)
figure.update_layout(height=600)
figure.show()
fig = px.scatter(country_wise.sort_values('Deaths', ascending=False).iloc[:25, :],
x='Confirmed', y='Deaths', color='Country/Region', size='Confirmed', height=700,
text='Country/Region', log_x=True, log_y=True, title='Deaths vs Confirmed (Scale is in log10)')
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False)
fig.show()
fig = px.line(full_grouped.sort_values('Confirmed',ascending=False), x="Date", y="Confirmed", color='Country/Region', height=600,
title='Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
fig = px.line(full_grouped.sort_values('Deaths',ascending=False), x="Date", y="Deaths", color='Country/Region', height=600,
title='Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
fig = px.line(full_grouped, x="Date", y="New Confirmed", color='Country/Region', height=600,
title='New Cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
fig = px.line(full_grouped, x="Date", y="Active", color='Country/Region', height=600,
title='Active', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
# country_wise.columns
full_latest = total_data[total_data['Date'] == max(total_data['Date'])]
fig = px.treemap(full_latest.sort_values(by='Confirmed', ascending=False).reset_index(drop=True),
path=["Country/Region"], values="Confirmed", height=700,
title='Number of Confirmed Cases',
color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()
fig = px.treemap(full_latest.sort_values(by='Deaths', ascending=False).reset_index(drop=True),
path=["Country/Region"], values="Deaths", height=700,
title='Number of Deaths reported',
color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()
fig = px.treemap(full_latest.sort_values(by='Active', ascending=False).reset_index(drop=True),
path=["Country/Region"], values="Active", height=700,
title='Active Cases',
color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()
fig = px.treemap(full_latest.sort_values(by='Recovered', ascending=False).reset_index(drop=True),
path=["Country/Region"], values="Recovered", height=700,
title='Recovered Cases',
color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()
# total_data.info()
total_data['Date'] = pd.to_datetime(total_data['Date'])
total_data['Country/Region'] = total_data['Country/Region'].astype(str)
# first date
# ==========
first_date = total_data[total_data['Confirmed']>0]
first_date = first_date.groupby('Country/Region')['Date'].agg(['min']).reset_index()
# last date
# =========
last_date = total_data.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths', 'Recovered']
last_date = last_date.sum().diff().reset_index()
mask = last_date['Country/Region'] != last_date['Country/Region'].shift(1)
last_date.loc[mask, 'Confirmed'] = np.nan
last_date.loc[mask, 'Deaths'] = np.nan
last_date.loc[mask, 'Recovered'] = np.nan
last_date = last_date[last_date['Confirmed']>0]
last_date = last_date.groupby('Country/Region')['Date'].agg(['max']).reset_index()
# first_last
# ==========
first_last = pd.concat([first_date, last_date[['max']]], axis=1)
# added 1 more day, which will show the next day as the day on which last case appeared
first_last['max'] = first_last['max'] + timedelta(days=1)
# no. of days
first_last['Days'] = first_last['max'] - first_last['min']
# task column as country
first_last['Task'] = first_last['Country/Region']
# rename columns
first_last.columns = ['Country/Region', 'Start', 'Finish', 'Days', 'Task']
# sort by no. of days
first_last = first_last.sort_values('Days')
# first_last.head()
# produce random colors
clr = ["#"+''.join([random.choice('0123456789ABC') for j in range(6)]) for i in range(len(first_last))]
# plot
fig = ff.create_gantt(first_last, index_col='Country/Region', colors=clr, show_colorbar=False,
bar_width=0.2, showgrid_x=True, showgrid_y=True, height=2500)
# fig.update_layout(height=3000,wi)
fig.show()
This takes a long time to load. Include in notebook only if needed. Else keep commented.
# temp = total_data.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths']
# temp = temp.sum().diff().reset_index()
# mask = temp['Country/Region'] != temp['Country/Region'].shift(1)
# temp.loc[mask, 'Confirmed'] = np.nan
# temp.loc[mask, 'Deaths'] = np.nan
# # temp = temp[temp['Country/Region'].isin(gt_10000)]
# # countries = ['China', 'Iran', 'South Korea', 'Italy', 'France', 'Germany', 'Italy', 'Spain', 'US']
# countries = temp['Country/Region'].unique()
# n_cols = 4
# n_rows = math.ceil(len(countries)/n_cols)
# fig = make_subplots(rows=n_rows, cols=n_cols, shared_xaxes=False, subplot_titles=countries)
# for ind, country in enumerate(countries):
# row = int((ind/n_cols)+1)
# col = int((ind%n_cols)+1)
# fig.add_trace(go.Bar(x=temp['Date'], y=temp.loc[temp['Country/Region']==country, 'Confirmed'], name=country), row=row, col=col)
# fig.update_layout(height=2000, title_text="No. of new cases in each Country")
# fig.show()
epidemics = pd.DataFrame({
'epidemic' : ['COVID-19', 'SARS', 'EBOLA', 'MERS', 'H1N1'],
'start_year' : [2019, 2003, 2014, 2012, 2009],
'end_year' : [2020, 2004, 2016, 2017, 2010],
'confirmed' : [full_latest['Confirmed'].sum(), 8096, 28646, 2494, 6724149],
'deaths' : [full_latest['Deaths'].sum(), 774, 11323, 858, 19654]
})
epidemics['mortality'] = round((epidemics['deaths']/epidemics['confirmed'])*100, 2)
# epidemics.head()
temp = epidemics.melt(id_vars='epidemic', value_vars=['confirmed', 'deaths', 'mortality'],
var_name='Case', value_name='Value')
fig = px.bar(temp, x="epidemic", y="Value", color='epidemic', text='Value', facet_col="Case",
color_discrete_sequence = px.colors.qualitative.Bold)
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_yaxes(showticklabels=False)
fig.layout.yaxis2.update(matches=None)
fig.layout.yaxis3.update(matches=None)
fig.update_layout(width=1000)
fig.show()