3. Conflict data
Accessing conflict data from the Armed Conflict Location & Event Data Project
We access conflict data from Uganda from the past 20 years. We clean and end up with a list of events, describing the actors involved, the type of conflict and its location.
Here, we are cleaning data from inidividual conflict events from the ACLED API.
%run /Users/thomasadler/Desktop/futuristic-platipus/capstone/notebooks/ta_01_packages_functions.py
%run /Users/thomasadler/Desktop/futuristic-platipus/keys.py
conflict_api_endpoint = "https://api.acleddata.com/acled/read"
We want to get every single conflict event that happened in Uganda since 1997 (the start of the dataset).
uganda_iso = 800
conflict_r = requests.get(
f'{conflict_api_endpoint}?key={conflict_api_key}&email={conflict_api_email}&limit=0&iso={uganda_iso}.csv'
)
#saving as json data
data = conflict_r.json()
#extract events information
events = data['data']
#save to a dataframe
uganda_conflict = pd.DataFrame(events)
uganda_conflict_df = uganda_conflict.copy()
uganda_conflict_df.tail()
uganda_conflict_df.info()
num_columns = [
'latitude',
'longitude',
'fatalities',
]
for col in num_columns:
float_converter(uganda_conflict_df, col)
#check
uganda_conflict_df.info()
date_converter(uganda_conflict_df, 'event_date')
#check
uganda_conflict_df['event_date']
uganda_conflict_df=pd.DataFrame(uganda_conflict_df.drop(columns=['time_precision', 'event_id_cnty','event_id_no_cnty',\
'geo_precision','timestamp','year',\
'iso','iso3', 'region','country']))
#check current columns
uganda_conflict_df.info()
print(
'admin1 in the conflict dataset should be clean_adm1 in the water dataset, check with:',
uganda_conflict_df['admin1'].head(1)[0])
print(
'admin2 in the conflict dataset should be clean_adm2 in the water dataset, check with:',
uganda_conflict_df['admin2'].head(1)[0])
print(
'admin3 in the conflict dataset should be clean_adm3 in the water dataset, check with:',
uganda_conflict_df['admin3'].head(1)[0])
print(
'location in the conflict dataset should be clean_adm4 in the water dataset, check with:',
uganda_conflict_df['location'].head(1)[0])
uganda_conflict_df.rename(columns={
'admin1': 'clean_adm1',
'admin2': 'clean_adm2',
'admin3': 'clean_adm3',
'location': 'clean_adm4'
},
inplace=True)
print(uganda_conflict_df.isna().sum().sum()>0,\
uganda_conflict_df.duplicated().sum()>0,\
uganda_conflict_df.T.duplicated().sum()>0)
uganda_conflict_df.to_csv(data_filepath + 'ta_3_conflict_df_clean.csv')
Image(dictionary_filepath+"3A-Conflict-Dictionary.png")
uganda_conflict_df_upper = uganda_conflict_df.copy()
for col in['clean_adm1', 'clean_adm2', 'clean_adm3', 'clean_adm4']:
uganda_conflict_df_upper[col] = uganda_conflict_df_upper[col].str.upper()
#export to cleaned dataset to csv
uganda_conflict_df_upper.to_csv(data_filepath + 'ta_3_conflict_df_clean_upper.csv')