Initial EDA (ARCHIVED)
First exploration to refine business and ML question
This was an initial EDA performed at the start of the project to explore the datasets and refine my business and ML question. I am looking at the change in water access and capacity over time in various regions as well as the relaitonship between conflict and water access and installation.
%run /Users/thomasadler/Desktop/futuristic-platipus/notebooks/0-ta-packages.py
filepath = '/Users/thomasadler/Desktop/capstone_docs/'
conflict_df=pd.read_csv(filepath+'uganda_conflict_df_clean.csv')
conflict_df['event_date']=pd.to_datetime(conflict_df['event_date'])
#then to date
conflict_df['event_date']=conflict_df['event_date'].dt.date
#back to datetime
conflict_df['event_date']=pd.to_datetime(conflict_df['event_date'])
working_conflict_df=conflict_df[['event_date', 'clean_adm1', 'clean_adm2', 'clean_adm3', 'clean_adm4','latitude','longitude' ,'fatalities']]
working_conflict_df.head()
working_conflict_df.info()
fatalities_date=working_conflict_df.groupby('event_date').sum()
events_date=working_conflict_df.groupby('event_date').count()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=fatalities_date.index, y=fatalities_date['fatalities'], name="Number of fatalities"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=events_date.index, y=events_date['fatalities'], name="Number of events"),
secondary_y=True,
)
fig.update_yaxes(title_text="Number of fatalities", secondary_y=False)
fig.update_yaxes(title_text="Number of events", secondary_y=True)
fig.update_layout(title="Uganda Events and Fatalities")
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.show()
fatalities_date_adm1=working_conflict_df[['clean_adm1', 'fatalities']].groupby('clean_adm1').sum().sort_values('fatalities', ascending=False).head(10)
fatalities_date_adm2=working_conflict_df[['clean_adm2', 'fatalities']].groupby('clean_adm2').sum().sort_values('fatalities', ascending=False).head(10)
fatalities_date_adm3=working_conflict_df[['clean_adm3', 'fatalities']].groupby('clean_adm3').sum().sort_values('fatalities', ascending=False).head(10)
events_date_adm1=working_conflict_df[['clean_adm1', 'fatalities']].groupby('clean_adm1').count().sort_values('fatalities', ascending=False).head(10)
events_date_adm2=working_conflict_df[['clean_adm2', 'fatalities']].groupby('clean_adm2').count().sort_values('fatalities', ascending=False).head(10)
events_date_adm3=working_conflict_df[['clean_adm3', 'fatalities']].groupby('clean_adm3').count().sort_values('fatalities', ascending=False).head(10)
fig = px.bar(fatalities_date_adm1, x=fatalities_date_adm1.index, y="fatalities", title="Adm1-Number of fatalities")
fig.show()
fig = px.bar(fatalities_date_adm2, x=fatalities_date_adm2.index, y="fatalities", title="Adm2-Number of fatalities")
fig.show()
fig = px.bar(fatalities_date_adm3, x=fatalities_date_adm3.index, y="fatalities", title="Adm3-Number of fatalities")
fig.show()
fig = px.bar(events_date_adm1, x=events_date_adm1.index, y="fatalities", title="Adm1-Number of events")
fig.show()
fig = px.bar(events_date_adm2, x=events_date_adm2.index, y="fatalities", title="Adm2-Number of events")
fig.show()
fig = px.bar(events_date_adm3, x=events_date_adm3.index, y="fatalities",title="Adm3-Number of events")
fig.show()
working_conflict_df['event_year']=pd.DatetimeIndex(working_conflict_df['event_date']).year
working_conflict_df.info()
fig = px.scatter_geo(
working_conflict_df,
lon='longitude', lat='latitude',
size='fatalities',
height=600,
width=800,
animation_frame='event_year'
)
fig.show()
water_df=pd.read_csv(filepath+'uganda_water_df_clean.csv')
water_df['install_year']=pd.to_datetime(water_df[ 'install_year'])
water_df['report_date']=pd.to_datetime(water_df[ 'report_date'])
#then to year
water_df['install_year']=water_df[ 'install_year'].dt.year
#water_df['install_year']=water_df['install_year'].astype('float32')
#then to year
#water_df[['report_year', 'install_year']]=water_df[['report_date', 'install_year']].dt.year
#back to datetime
#water_df[['report_date', 'install_year']]=pd.to_datetime(water_df[['report_date', 'install_year']]
#also for report date)
water_df.info()
water_df['staleness_score'].value_counts()
water_df['management_clean'].value_counts()
water_df.columns
unique_water_df=water_df[['wpdx_id','lat_deg','lon_deg', 'install_year',\
'usage_cap', 'crucialness', 'pressure',\
'served_population']].groupby('wpdx_id').mean()
unique_water_df.head()
working_water_df=water_df[['wpdx_id','lat_deg','lon_deg','report_date', 'clean_adm1', 'clean_adm2',\
'clean_adm3', 'install_year', 'usage_cap', 'crucialness', 'pressure',\
'served_population', 'status_id']]
water_location=unique_water_df.merge(working_water_df, left_on=unique_water_df.index, right_on=working_water_df['wpdx_id'], how='left')
wp_adm1=water_location[['clean_adm1', 'key_0']].groupby('clean_adm1').count().sort_values('key_0', ascending=False).head(10)
wp_adm2=water_location[['clean_adm2', 'key_0']].groupby('clean_adm2').count().sort_values('key_0', ascending=False).head(10)
wp_adm3=water_location[['clean_adm3', 'key_0']].groupby('clean_adm3').count().sort_values('key_0', ascending=False).head(10)
fig = px.bar(wp_adm1, x=wp_adm1.index, y="key_0",title="Adm1-Number of water points")
fig.show()
fig = px.bar(wp_adm2, x=wp_adm2.index, y="key_0",title="Adm2-Number of water points")
fig.show()
fig = px.bar(wp_adm3, x=wp_adm3.index, y="key_0",title="Adm3-Number of water points")
fig.show()
fig = px.histogram(unique_water_df, x="install_year", title='Water point installations')
fig.show()
agago_water_df=working_water_df[working_water_df['clean_adm3']=='Agago']
agago_water_df['wpdx_id'].value_counts()
unique_water_agago_df=agago_water_df.groupby('wpdx_id').mean()
fig = px.histogram(unique_water_agago_df, x="install_year", title='Agago water point installations')
fig.show()
onewaterpoint=agago_water_df[['wpdx_id', 'report_date','install_year','usage_cap']][agago_water_df['wpdx_id']=='6GJMWFRF+329']
onewaterpoint.head()
first_day = onewaterpoint['report_date'].min()
onewaterpoint['install_year_dt']=pd.to_datetime(onewaterpoint['install_year'].astype('float32'), format='%Y')
installed_year=onewaterpoint['install_year_dt'].mean()
last_day=datetime(2022, 6, 30)
if first_day>installed_year:
first_day=installed_year
else:
first_day=first_day
print(first_day, installed_year, last_day)
onewaterpoint=onewaterpoint.groupby(['report_date', 'wpdx_id']).mean()
onewaterpoint.reset_index(level=1,inplace=True)
onewaterpoint
point_df=onewaterpoint.reindex(pd.date_range(start=first_day, end=last_day, freq="d"))
point_df.info()
point_df = point_df.fillna(method='ffill')
point_df.info()
point_df = point_df.fillna(0)
point_df.info()
fig = px.line(point_df, x=point_df.index, y=point_df['usage_cap'], title='Water point installations')
fig.show()
master = pd.DataFrame()
master=pd.concat([master, point_df])
master.tail()
final_timeseries=master['usage_cap'].groupby(level=0).mean()
final_timeseries
fig = px.line(final_timeseries, x=final_timeseries.index, y=final_timeseries.values, title='Water point quality')
fig.show()
def ts_quality(df):
master_df = pd.DataFrame()
unique_points = df['wpdx_id'].unique()
for id in unique_points:
onewaterpoint=df[['wpdx_id', 'report_date','install_year','usage_cap', 'crucialness',\
'pressure', 'served_population','functioning']][df['wpdx_id']==id]
first_day = onewaterpoint['report_date'].min()
onewaterpoint['install_year_dt'] = pd.to_datetime(
onewaterpoint['install_year'].astype('float32'), format='%Y')
installed_year = onewaterpoint['install_year_dt'].mean()
last_day = datetime(2022, 6, 30)
first_day = onewaterpoint['report_date'].min()
onewaterpoint['install_year_dt'] = pd.to_datetime(
onewaterpoint['install_year'].astype('float32'), format='%Y')
installed_year = onewaterpoint['install_year_dt'].mean()
last_day = datetime(2022, 6, 30)
if pd.isnull(first_day) == True:
first_day = datetime(2022, 6, 29)
elif first_day > installed_year:
first_day = installed_year
else:
first_day = first_day
onewaterpoint = onewaterpoint.groupby(['report_date',
'wpdx_id']).mean()
onewaterpoint.reset_index(level=1, inplace=True)
onewaterpoint
point_df = onewaterpoint.reindex(
pd.date_range(start=first_day, end=last_day, freq="d"))
point_df = point_df.fillna(method='ffill')
point_df = point_df.fillna(0)
master_df = pd.concat([master_df, point_df])
final_timeseries=master_df[['usage_cap', 'crucialness',\
'pressure', 'served_population', 'functioning']].groupby(level=0).sum()
return final_timeseries
agago_water_df['functioning']=agago_water_df['status_id'].map({'Yes': 1, 'No': 0, 'Unknown': 0})
agago_water_ts=ts_quality(agago_water_df)
agago_water_ts.tail()
fig = px.line(agago_water_ts, x=agago_water_ts.index, y=agago_water_ts['served_population'], title='Water point usage capacity Agago')
fig.show()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=agago_water_ts.index,
y=agago_water_ts['usage_cap'],
name="Usage capacity"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=agago_water_ts.index,
y=agago_water_ts['served_population'],
name="Served population"),
secondary_y=True,
)
fig.update_yaxes(title_text="Usage capacity", secondary_y=False)
fig.update_yaxes(title_text="Served population", secondary_y=True)
fig.update_layout(title="Agago water quality")
fig.update_xaxes(rangeslider_visible=True)
fig.show()
lira_water_df=working_water_df[working_water_df['clean_adm2']=='Lira']
lira_water_df['functioning']=lira_water_df['status_id'].map({'Yes': 1, 'No': 0, 'Unknown': 0})
lira_water_ts=ts_quality(lira_water_df)
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=lira_water_ts.index,
y=lira_water_ts['usage_cap'],
name="Usage capacity"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=lira_water_ts.index,
y=lira_water_ts['served_population'],
name="Served population"),
secondary_y=True,
)
fig.update_yaxes(title_text="Usage capacity", secondary_y=False)
fig.update_yaxes(title_text="Served population", secondary_y=True)
fig.update_layout(title="Lira water quality")
fig.update_xaxes(rangeslider_visible=True)
fig.show()
kamwenge_water_df=working_water_df[working_water_df['clean_adm2']=='Kamwenge']
kamwenge_water_df['functioning']=kamwenge_water_df['status_id'].map({'Yes': 1, 'No': 0, 'Unknown': 0})
kamwenge_water_ts=ts_quality(kamwenge_water_df)
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=kamwenge_water_ts.index, y=kamwenge_water_ts['usage_cap'], name="Usage capacity"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=kamwenge_water_ts.index, y=kamwenge_water_ts['served_population'], name="Served population"),
secondary_y=True,
)
fig.update_yaxes(title_text="Usage capacity", secondary_y=False)
fig.update_yaxes(title_text="Served population", secondary_y=True)
fig.update_layout(title="Central water quality")
fig.update_xaxes(rangeslider_visible=True)
fig.show()
kitgum_water_df=working_water_df[working_water_df['clean_adm2']=='Kitgum']
kitgum_water_df['functioning']=kitgum_water_df['status_id'].map({'Yes': 1, 'No': 0, 'Unknown': 0})
kitgum_water_ts=ts_quality(kitgum_water_df)
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=kitgum_water_ts.index,
y=kitgum_water_ts['usage_cap'],
name="Usage capacity"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=kitgum_water_ts.index,
y=kitgum_water_ts['served_population'],
name="Served population"),
secondary_y=True,
)
fig.update_yaxes(title_text="Usage capacity", secondary_y=False)
fig.update_yaxes(title_text="Served population", secondary_y=True)
fig.update_layout(title="Kitgum water quality")
fig.update_xaxes(rangeslider_visible=True)
fig.show()
agago_water_ts=agago_water_ts.copy()
lira_water_ts=lira_water_ts.copy()
kamwenge_water_ts=kamwenge_water_ts.copy()
kitgum_water_ts=kitgum_water_ts.copy()
agago_conflict_ts=working_conflict_df[working_conflict_df['clean_adm3']=='Agago']
lira_conflict_ts=working_conflict_df[working_conflict_df['clean_adm3']=='Lira']
kamwenge_conflict_ts=working_conflict_df[working_conflict_df['clean_adm3']=='Kamwenge']
kitgum_conflict_ts=working_conflict_df[working_conflict_df['clean_adm2']=='Kitgum']
def time_series(df1, df2, variable1, variable2, region):
if variable2=='fatalities':
conflict_date=df2.groupby('event_date').sum()
else:
conflict_date=df2.groupby('event_date').count()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=df1.index, y=df1[variable1], name=variable1),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=conflict_date.index, y=conflict_date['fatalities'], name=variable2),
secondary_y=True,
)
# Set y-axes titles
fig.update_yaxes(title_text=variable1, secondary_y=False)
fig.update_yaxes(title_text=variable2, secondary_y=True)
fig.update_layout(title=region)
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.update_layout(xaxis_range=[datetime(1997,1,1),datetime(2022,12,31)])
fig.show()
time_series(agago_water_ts, agago_conflict_ts, 'usage_cap', 'fatalities', 'Agago')
time_series(lira_water_ts, lira_conflict_ts, 'usage_cap', 'fatalities', 'Lira')
time_series(kamwenge_water_ts, kamwenge_conflict_ts, 'usage_cap', 'fatalities', 'Kamwenge')
time_series(kitgum_water_ts, kitgum_conflict_ts, 'usage_cap', 'fatalities', 'Kitgum')
time_series(agago_water_ts, agago_conflict_ts, 'usage_cap', 'events', 'Agago')
time_series(lira_water_ts, lira_conflict_ts, 'usage_cap', 'events', 'Lira')
time_series(kamwenge_water_ts, kamwenge_conflict_ts, 'usage_cap', 'events', 'Kamwenge')
time_series(agago_water_ts, agago_conflict_ts, 'served_population', 'fatalities', 'Agago')
time_series(lira_water_ts, lira_conflict_ts, 'served_population', 'fatalities', 'Lira')
time_series(kamwenge_water_ts, kamwenge_conflict_ts, 'served_population', 'fatalities', 'Kamwenge')
time_series(agago_water_ts, agago_conflict_ts, 'served_population', 'events', 'Agago')
time_series(lira_water_ts, lira_conflict_ts, 'served_population', 'events', 'Lira')
time_series(kamwenge_water_ts, kamwenge_conflict_ts, 'served_population', 'events', 'Kamwenge')
time_series(kitgum_water_ts, kitgum_conflict_ts, 'served_population', 'events', 'Kitgum')
water_datsets=[agago_water_ts, lira_water_ts, kamwenge_water_ts]
for df in water_datsets:
df['opposite_crucialness']=100-df['crucialness']
df['opposite_pressure']=100-df['pressure']
time_series(agago_water_ts, agago_conflict_ts, 'opposite_crucialness', 'fatalities', 'Agago')
time_series(lira_water_ts, lira_conflict_ts, 'opposite_crucialness', 'fatalities', 'Lira')
time_series(kamwenge_water_ts, kamwenge_conflict_ts, 'opposite_crucialness', 'fatalities', 'Kamwenge')
time_series(agago_water_ts, agago_conflict_ts, 'opposite_crucialness', 'events', 'Agago')
time_series(lira_water_ts, lira_conflict_ts, 'opposite_crucialness', 'events', 'Lira')
time_series(kamwenge_water_ts, kamwenge_conflict_ts, 'opposite_crucialness', 'events', 'Kamwenge')
time_series(agago_water_ts, agago_conflict_ts, 'opposite_pressure', 'fatalities', 'Agago')
time_series(lira_water_ts, lira_conflict_ts, 'opposite_pressure', 'fatalities', 'Lira')
time_series(kamwenge_water_ts, kamwenge_conflict_ts, 'opposite_pressure', 'fatalities', 'Kamwenge')
time_series(agago_water_ts, agago_conflict_ts, 'opposite_pressure', 'events', 'Agago')
time_series(lira_water_ts, lira_conflict_ts, 'opposite_pressure', 'events', 'Lira')
time_series(kamwenge_water_ts, kamwenge_conflict_ts, 'opposite_pressure', 'events', 'Kamwenge')
fatalities_date=working_conflict_df.groupby('event_date').sum()
events_date=working_conflict_df.groupby('event_date').count()
install_date=working_water_df.groupby('install_year').count()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=events_date.index, y=events_date['fatalities'], name='events'),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=install_date.index, y=install_date['wpdx_id'], name='installations'),
secondary_y=True,
)
# Set y-axes titles
fig.update_yaxes(title_text='events', secondary_y=False)
fig.update_yaxes(title_text='installations', secondary_y=True)
fig.update_layout(title='Uganda')
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.update_layout(xaxis_range=[datetime(1997,1,1),datetime(2022,12,31)])
fig.show()
agago_fatalities_date=agago_conflict_ts.groupby('event_date').sum()
agago_events_date=agago_conflict_ts.groupby('event_date').count()
agago_install_date=agago_water_df.groupby('install_year').count()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=agago_fatalities_date.index, y=agago_events_date['fatalities'], name='events'),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=agago_install_date.index, y=agago_install_date['wpdx_id'], name='installations'),
secondary_y=True,
)
# Set y-axes titles
fig.update_yaxes(title_text='events', secondary_y=False)
fig.update_yaxes(title_text='installations', secondary_y=True)
fig.update_layout(title='Agago')
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.update_layout(xaxis_range=[datetime(1997,1,1),datetime(2022,12,31)])
fig.show()
lira_fatalities_date=lira_conflict_ts.groupby('event_date').sum()
lira_events_date=lira_conflict_ts.groupby('event_date').count()
lira_install_date=lira_water_df.groupby('install_year').count()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=lira_events_date.index, y=lira_events_date['fatalities'], name='events'),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=lira_install_date.index, y=lira_install_date['wpdx_id'], name='installations'),
secondary_y=True,
)
# Set y-axes titles
fig.update_yaxes(title_text='events', secondary_y=False)
fig.update_yaxes(title_text='installations', secondary_y=True)
fig.update_layout(title='Lira')
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.update_layout(xaxis_range=[datetime(1997,1,1),datetime(2022,12,31)])
fig.show()
kamwenge_fatalities_date=kamwenge_conflict_ts.groupby('event_date').sum()
kamwenge_events_date=kamwenge_conflict_ts.groupby('event_date').count()
kamwenge_install_date=kamwenge_water_df.groupby('install_year').count()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=kamwenge_events_date.index, y=kamwenge_events_date['fatalities'], name='events'),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=kamwenge_install_date.index, y=kamwenge_install_date['wpdx_id'], name='installations'),
secondary_y=True,
)
# Set y-axes titles
fig.update_yaxes(title_text='events', secondary_y=False)
fig.update_yaxes(title_text='installations', secondary_y=True)
fig.update_layout(title='Kamwenge')
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.update_layout(xaxis_range=[datetime(1997,1,1),datetime(2022,12,31)])
fig.show()
water_df['status_id'].value_counts() #status and status clean?
water_df['functioning']=water_df['status_id'].map({'Yes': 1, 'No': 0, 'Unknown': 0})
water_funct_year=water_df.groupby('install_year').sum()
#installations and events
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=events_date.index, y=events_date['fatalities'], name='events'),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=water_funct_year.index, y=water_funct_year['functioning'], name='functioning points'),
secondary_y=True,
)
# Set y-axes titles
fig.update_yaxes(title_text='events', secondary_y=False)
fig.update_yaxes(title_text='functioning points', secondary_y=True)
fig.update_layout(title='Uganda')
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.update_layout(xaxis_range=[datetime(1997,1,1),datetime(2022,12,31)])
fig.show()
agago_fatalities_date=agago_conflict_ts.groupby('event_date').sum()
agago_events_date=agago_conflict_ts.groupby('event_date').count()
agago_water_df['functioning']=agago_water_df['status_id'].map({'Yes': 1, 'No': 0, 'Unknown': 0})
agagowater_funct_year=agago_water_df.groupby('install_year').count()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=agago_events_date.index, y=agago_events_date['fatalities'], name='events'),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=agagowater_funct_year.index, y=agagowater_funct_year['wpdx_id'], name='functioning points'),
secondary_y=True,
)
# Set y-axes titles
fig.update_yaxes(title_text='events', secondary_y=False)
fig.update_yaxes(title_text='functioning points', secondary_y=True)
fig.update_layout(title='Agago')
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.update_layout(xaxis_range=[datetime(1997,1,1),datetime(2022,12,31)])
fig.show()
lira_fatalities_date=lira_conflict_ts.groupby('event_date').sum()
lira_events_date=lira_conflict_ts.groupby('event_date').count()
lira_water_df['functioning']=lira_water_df['status_id'].map({'Yes': 1, 'No': 0, 'Unknown': 0})
lirawater_funct_year=lira_water_df.groupby('install_year').count()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=lira_events_date.index, y=lira_events_date['fatalities'], name='events'),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=lirawater_funct_year.index, y=lirawater_funct_year['wpdx_id'], name='functioning points'),
secondary_y=True,
)
# Set y-axes titles
fig.update_yaxes(title_text='events', secondary_y=False)
fig.update_yaxes(title_text='functioning points', secondary_y=True)
fig.update_layout(title='Lira')
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.update_layout(xaxis_range=[datetime(1997,1,1),datetime(2022,12,31)])
fig.show()
kamwenge_fatalities_date=kamwenge_conflict_ts.groupby('event_date').sum()
kamwenge_events_date=kamwenge_conflict_ts.groupby('event_date').count()
kamwenge_water_df['functioning']=kamwenge_water_df['status_id'].map({'Yes': 1, 'No': 0, 'Unknown': 0})
kamwengewater_funct_year=kamwenge_water_df.groupby('install_year').count()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=kamwenge_events_date.index, y=kamwenge_events_date['fatalities'], name='events'),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=kamwengewater_funct_year.index, y=kamwengewater_funct_year['wpdx_id'], name='functioning points'),
secondary_y=True,
)
# Set y-axes titles
fig.update_yaxes(title_text='events', secondary_y=False)
fig.update_yaxes(title_text='functioning points', secondary_y=True)
fig.update_layout(title='Kamwenge')
fig.update_xaxes(rangeslider_visible=True)
fig.update_traces(opacity=0.65)
fig.update_layout(xaxis_range=[datetime(1997,1,1),datetime(2022,12,31)])
fig.show()