Simple Analysis of Covid-19

Just a notebook containing analysis of the covid-19 pandemic. The data used currently is updated on 23 April, 2020.

Acknowledgment:

Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE. Github Repo

Source Code:

My Github Repo

In [36]:
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import math
from datetime import timedelta
import random
import plotly.express as px
import folium as flm
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import plotly.graph_objs as go
#colors
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801'
In [37]:
#offline plotly
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)
In [38]:
#import all the prepared csvs
total_data = pd.read_csv('Data/Cleaned_Final_Global_Data.csv')
day_wise = pd.read_csv('Data/Day_Wise_Data.csv')
country_wise = pd.read_csv('Data/Country_Wise_Data.csv')
full_grouped = pd.read_csv('Data/Full_Grouped_Data.csv')

Displaying current statistics of cases

In [39]:
temp = total_data.groupby('Date')['Confirmed','Recovered','Deaths','Active'].sum().reset_index()
temp = temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)
tm=temp.melt(id_vars='Date', value_vars = ['Active','Deaths','Recovered'])
figure = px.treemap(tm,path=["variable"],values="value",height=300,width=1000,color_discrete_sequence=[act,dth,rec])
figure.data[0].textinfo = 'label+text+value'
figure.show()

Let us display the graph for cases over time

In [40]:
temp = total_data.groupby('Date')['Recovered','Deaths','Active'].sum().reset_index()
temp = temp.melt(id_vars="Date", value_vars=['Recovered', 'Deaths', 'Active'],var_name='Case', value_name='Count')
# temp
figure = px.area(temp,x='Date',y='Count',color='Case')
figure.update_layout(xaxis_rangeslider_visible=True)
# figure.update_layout(yaxis_rangeslider_visible=True)
figure.show()

Now let us analyse the current scenario of cases across the world

In [41]:
temp = total_data[total_data['Date']==max(total_data['Date'])]
# temp
global_map = flm.Map(location=[0,0],tiles='OpenStreetMap',min_zoom=2,max_zoom=10,zoom_start=0)
# global_map
for i in range (0,len(temp)):
    flm.Circle(
        location=[temp.iloc[i]['Lat'],temp.iloc[i]['Long']],
        color='red',
        fill='red',
        tooltip =   '<li><bold>Country : '+str(temp.iloc[i]['Country/Region'])+
                    '<li><bold>Province : '+str(temp.iloc[i]['Province/State'])+
                    '<li><bold>Confirmed : '+str(temp.iloc[i]['Confirmed'])+
                    '<li><bold>Deaths : '+str(temp.iloc[i]['Deaths']),
        radius = int(temp.iloc[i]['Confirmed'])
    ).add_to(global_map)
global_map

## TRY DOING A CHLOROPLETH MAP IF DEEPER INFO FOUND
Out[41]:

Cases over time in the world

In [42]:
temp = full_grouped
temp['Date'] = pd.to_datetime(temp['Date'])

figure = px.choropleth(temp,locations="Country/Region",locationmode='country names', color = np.log(full_grouped["Confirmed"]),
                       hover_name="Country/Region", animation_frame=full_grouped["Date"].dt.strftime('%Y-%m-%d'),
                       title="Cases globally over time", color_continuous_scale=px.colors.sequential.Hot_r,projection='natural earth')
figure.update_layout(height=600,width=900)
figure.show()
In [43]:
# full_grouped.info()

Cases over time

In [44]:
fig_c = px.bar(day_wise, x="Date", y="Confirmed", color_discrete_sequence = [act])
fig_d = px.bar(day_wise, x="Date", y="Deaths", color_discrete_sequence = [dth])
fig_r = px.bar(day_wise, x="Date", y="Recovered", color_discrete_sequence =[rec])
fig = make_subplots(rows=1, cols=3, shared_xaxes=False, horizontal_spacing=0.1,
                    subplot_titles=('Confirmed cases', 'Deaths reported','Recovered'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)
fig.add_trace(fig_r['data'][0], row=1, col=3)
fig.update_layout(height=480)
fig.update_layout(width=1000)
fig.show()
In [45]:
# day_wise.columns
In [46]:
fig_1 = px.line(day_wise, x="Date", y="Deaths / 100 Cases", color_discrete_sequence = [dth])
fig_2 = px.line(day_wise, x="Date", y="Deaths / 100 Recovered", color_discrete_sequence = ['#333333'])

fig = make_subplots(rows=1, cols=2, shared_xaxes=False, 
                    subplot_titles=('Deaths / 100 Cases','Deaths / 100 Recovered'))

fig.add_trace(fig_1['data'][0], row=1, col=1)
fig.add_trace(fig_2['data'][0], row=1, col=2)

fig.update_layout(height=480)
fig.show()
In [47]:
# day_wise.columns
In [48]:
fig_c = px.bar(day_wise, x="Date", y="New Confirmed", color_discrete_sequence = [act])
fig_d = px.bar(day_wise, x="Date", y="No. of countries", color_discrete_sequence = [dth])

fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.1,
                    subplot_titles=('No. of new cases everyday', 'No. of countries'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)

fig.update_layout(height=480)
fig.show()
In [49]:
# country_wise.head()

Analysis of statistics of top 25 countries

In [50]:
fig_a = px.bar(country_wise.sort_values('Active').tail(25),x="Active",y="Country/Region",text='Active',orientation='h',color_discrete_sequence=[act])
fig_a.update_layout(title="Active Cases")
fig_a.show()
In [51]:
fig_c = px.bar(country_wise.sort_values('Confirmed').tail(25), x="Confirmed", y="Country/Region", 
               text='Confirmed', orientation='h', color_discrete_sequence = [cnf])
fig_d = px.bar(country_wise.sort_values('Deaths').tail(25), x="Deaths", y="Country/Region", 
               text='Deaths', orientation='h', color_discrete_sequence = [dth])
fig_r = px.bar(country_wise.sort_values('Recovered').tail(25), x="Recovered", y="Country/Region", 
               text='Recovered', orientation='h', color_discrete_sequence = [rec])
figure = make_subplots(rows=1,cols=3,shared_xaxes=False,horizontal_spacing=0.2,subplot_titles=('Cases Confirmed','Death Reports','Recovered'))
figure.add_trace(fig_c['data'][0],row=1,col=1)
figure.add_trace(fig_d['data'][0],row=1,col=2)
figure.add_trace(fig_r['data'][0],row=1,col=3)
figure.update_layout(height=600)
figure.show()
In [52]:
fig_dc = px.bar(country_wise.sort_values('Deaths / 100 Cases').tail(25), x="Deaths / 100 Cases", y="Country/Region", 
               text='Deaths / 100 Cases', orientation='h', color_discrete_sequence = ['#f38181'])
fig_rc = px.bar(country_wise.sort_values('Recovered / 100 Cases').tail(25), x="Recovered / 100 Cases", y="Country/Region", 
               text='Recovered / 100 Cases', orientation='h', color_discrete_sequence = ['#a3de83'])
figure = make_subplots(rows=1,cols=2,shared_xaxes=False,horizontal_spacing=0.2,subplot_titles=('Deaths per 100 Cases','Recovered per 100 Cases'))
figure.update_layout(height=600)
figure.add_trace(fig_dc['data'][0],row=1,col=1)
figure.add_trace(fig_rc['data'][0],row=1,col=2)
figure.show()
In [53]:
country_wise.columns
Out[53]:
Index(['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Active',
       'New Confirmed', 'New Recovered', 'New Deaths', 'Deaths / 100 Cases',
       'Recovered / 100 Cases', 'Deaths / 100 Recovered', 'Population',
       'Cases / Million People', 'Confirmed today', 'Confirmed last week',
       '1 week change', '1 week % increase'],
      dtype='object')
In [54]:
fig_nc = px.bar(country_wise.sort_values('New Confirmed').tail(25), x="New Confirmed", y="Country/Region", 
               text='New Confirmed', orientation='h', color_discrete_sequence = ['#c61951'])
temp = country_wise[country_wise['Population']>1000000]
fig_p = px.bar(temp.sort_values('Cases / Million People').tail(25), x="Cases / Million People", y="Country/Region", 
               text='Cases / Million People', orientation='h', color_discrete_sequence = ['#741938'])

figure = make_subplots(rows=1,cols=2,shared_xaxes=False, horizontal_spacing=0.2, subplot_titles=('New Cases Today','Cases per Million (Pop> 1 mil.)') )
figure.add_trace(fig_nc['data'][0],row=1,col=1)
figure.add_trace(fig_p['data'][0],row=1,col=2)
figure.update_layout(height=600)
figure.show()
In [55]:
fig_wc = px.bar(country_wise.sort_values('1 week change').tail(25), x="1 week change", y="Country/Region", 
               text='1 week change', orientation='h', color_discrete_sequence = ['#004a7c'])
temp = country_wise[country_wise['Confirmed']>100]
fig_pi = px.bar(temp.sort_values('1 week % increase').tail(25), x="1 week % increase", y="Country/Region", 
               text='1 week % increase', orientation='h', color_discrete_sequence = ['#005691'], 
                hover_data=['Confirmed last week', 'Confirmed'])
figure = make_subplots(rows=1,cols=2,shared_xaxes=False, horizontal_spacing=0.2, subplot_titles=('1 Week Change','1 Week % Increase') )
figure.add_trace(fig_wc['data'][0],row=1,col=1)
figure.add_trace(fig_pi['data'][0],row=1,col=2)
figure.update_layout(height=600)
figure.show()
In [56]:
fig = px.scatter(country_wise.sort_values('Deaths', ascending=False).iloc[:25, :], 
                 x='Confirmed', y='Deaths', color='Country/Region', size='Confirmed', height=700,
                 text='Country/Region', log_x=True, log_y=True, title='Deaths vs Confirmed (Scale is in log10)')
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False)
fig.show()
In [57]:
fig = px.line(full_grouped.sort_values('Confirmed',ascending=False), x="Date", y="Confirmed", color='Country/Region', height=600,
             title='Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
In [58]:
fig = px.line(full_grouped.sort_values('Deaths',ascending=False), x="Date", y="Deaths", color='Country/Region', height=600,
             title='Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
In [59]:
fig = px.line(full_grouped, x="Date", y="New Confirmed", color='Country/Region', height=600,
             title='New Cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
In [60]:
fig = px.line(full_grouped, x="Date", y="Active", color='Country/Region', height=600,
             title='Active', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
In [61]:
# country_wise.columns

Current Composition of Cases

In [62]:
full_latest = total_data[total_data['Date'] == max(total_data['Date'])]
                         
fig = px.treemap(full_latest.sort_values(by='Confirmed', ascending=False).reset_index(drop=True), 
                 path=["Country/Region"], values="Confirmed", height=700,
                 title='Number of Confirmed Cases',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()
In [63]:
fig = px.treemap(full_latest.sort_values(by='Deaths', ascending=False).reset_index(drop=True), 
                 path=["Country/Region"], values="Deaths", height=700,
                 title='Number of Deaths reported',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()
In [64]:
fig = px.treemap(full_latest.sort_values(by='Active', ascending=False).reset_index(drop=True), 
                 path=["Country/Region"], values="Active", height=700,
                 title='Active Cases',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()
In [65]:
fig = px.treemap(full_latest.sort_values(by='Recovered', ascending=False).reset_index(drop=True), 
                 path=["Country/Region"], values="Recovered", height=700,
                 title='Recovered Cases',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()

We have done quite a bit of analysis about data based on deaths,confirmed and active cases. Now let analyse some more aspects.

Epidemic Span

In [66]:
# total_data.info()
total_data['Date'] = pd.to_datetime(total_data['Date'])
total_data['Country/Region'] = total_data['Country/Region'].astype(str)
In [67]:
# first date
# ==========
first_date = total_data[total_data['Confirmed']>0]
first_date = first_date.groupby('Country/Region')['Date'].agg(['min']).reset_index()

# last date
# =========
last_date = total_data.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths', 'Recovered']
last_date = last_date.sum().diff().reset_index()

mask = last_date['Country/Region'] != last_date['Country/Region'].shift(1)
last_date.loc[mask, 'Confirmed'] = np.nan
last_date.loc[mask, 'Deaths'] = np.nan
last_date.loc[mask, 'Recovered'] = np.nan

last_date = last_date[last_date['Confirmed']>0]
last_date = last_date.groupby('Country/Region')['Date'].agg(['max']).reset_index()

# first_last
# ==========
first_last = pd.concat([first_date, last_date[['max']]], axis=1)

# added 1 more day, which will show the next day as the day on which last case appeared
first_last['max'] = first_last['max'] + timedelta(days=1)

# no. of days
first_last['Days'] = first_last['max'] - first_last['min']

# task column as country
first_last['Task'] = first_last['Country/Region']

# rename columns
first_last.columns = ['Country/Region', 'Start', 'Finish', 'Days', 'Task']

# sort by no. of days
first_last = first_last.sort_values('Days')
# first_last.head()

# produce random colors
clr = ["#"+''.join([random.choice('0123456789ABC') for j in range(6)]) for i in range(len(first_last))]

# plot
fig = ff.create_gantt(first_last, index_col='Country/Region', colors=clr, show_colorbar=False, 
                      bar_width=0.2, showgrid_x=True, showgrid_y=True, height=2500)
# fig.update_layout(height=3000,wi)
fig.show()

Country wise deep analysis

This takes a long time to load. Include in notebook only if needed. Else keep commented.

In [68]:
# temp = total_data.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths']
# temp = temp.sum().diff().reset_index()

# mask = temp['Country/Region'] != temp['Country/Region'].shift(1)

# temp.loc[mask, 'Confirmed'] = np.nan
# temp.loc[mask, 'Deaths'] = np.nan

# # temp = temp[temp['Country/Region'].isin(gt_10000)]

# # countries = ['China', 'Iran', 'South Korea', 'Italy', 'France', 'Germany', 'Italy', 'Spain', 'US']
# countries = temp['Country/Region'].unique()

# n_cols = 4
# n_rows = math.ceil(len(countries)/n_cols)

# fig = make_subplots(rows=n_rows, cols=n_cols, shared_xaxes=False, subplot_titles=countries)

# for ind, country in enumerate(countries):
#     row = int((ind/n_cols)+1)
#     col = int((ind%n_cols)+1)
#     fig.add_trace(go.Bar(x=temp['Date'], y=temp.loc[temp['Country/Region']==country, 'Confirmed'], name=country), row=row, col=col)
    
# fig.update_layout(height=2000, title_text="No. of new cases in each Country")    
# fig.show()

Let us see a bit of comparison of Covid-19 with other epidemics of the past.

In [69]:
epidemics = pd.DataFrame({
    'epidemic' : ['COVID-19', 'SARS', 'EBOLA', 'MERS', 'H1N1'],
    'start_year' : [2019, 2003, 2014, 2012, 2009],
    'end_year' : [2020, 2004, 2016, 2017, 2010],
    'confirmed' : [full_latest['Confirmed'].sum(), 8096, 28646, 2494, 6724149],
    'deaths' : [full_latest['Deaths'].sum(), 774, 11323, 858, 19654]
})

epidemics['mortality'] = round((epidemics['deaths']/epidemics['confirmed'])*100, 2)

# epidemics.head()
In [70]:
temp = epidemics.melt(id_vars='epidemic', value_vars=['confirmed', 'deaths', 'mortality'],
                      var_name='Case', value_name='Value')

fig = px.bar(temp, x="epidemic", y="Value", color='epidemic', text='Value', facet_col="Case",
             color_discrete_sequence = px.colors.qualitative.Bold)
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_yaxes(showticklabels=False)
fig.layout.yaxis2.update(matches=None)
fig.layout.yaxis3.update(matches=None)
fig.update_layout(width=1000)
fig.show()
In [ ]: