Source code for get_data

import os
from typing import List

import pandas as pd
import pycountry

import scrape_cost_of_living
import scrape_temperatures

data = [
    'Climate',
    'Cost of Living',
    'Population Density',
    'Safety',
    'Health',
    'Pollution',
    'Corruption Perception',
    'Freedom'
    ]


[docs]def main(): scrape_temperatures.main() scrape_cost_of_living.main() dfs = import_data() dfs[2] = clean_pop_density(dfs[2]) dfs = standardise_country_names(dfs) dfs = promote_to_index(dfs, 'Country') joined_data = join_data(dfs[0], dfs[1:]) data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data')) joined_data.to_csv(os.path.join(data_dir, 'All Data by Country.csv'))
[docs]def standardise_country_names(dfs: List[pd.DataFrame]) -> list: """ Standardisses the country names across all the dataframes. :param dfs: list of dataframes. :type dfs: List[pd.DataFrame] :return: list of dataframes with standardised country names. :rtype: List[pd.DataFrame] """ for df in dfs: std_countries = [] for country in df['Country']: try: if pycountry.countries.search_fuzzy(country)[0].name in ['United States', 'United Kingdom', 'South Korea']: std_countries.append( pycountry.countries.search_fuzzy(country)[0].name) else: std_countries.append(country.title()) except LookupError: std_countries.append(country.title()) df['Country'] = std_countries return dfs
[docs]def import_data(suffix: str = ' by Country.csv') -> list: """ Imports all the data into a list of dataframes. :param suffix: suffix of the file names. :type suffix: str :return: list of dataframes. :rtype: List[pd.DataFrame] """ dfs = [pd.read_csv('data/'+name+suffix) for name in data] return dfs
[docs]def join_data(df1: pd.DataFrame, dfs: list) -> pd.DataFrame: """ Joins the dataframes together. :param df1: dataframe to be joined. :type df1: pd.DataFrame :param dfs: list of dataframes to be joined to df1. :type dfs: List[pd.DataFrame] :return: joined dataframe. :rtype: pd.DataFrame """ for df in dfs: df1 = df1.join(df) return df1
[docs]def clean_pop_density(df: pd.DataFrame) -> pd.DataFrame: """ Renames the columns in the population density dataframe. :param df: dataframe to be cleaned. :type df: pd.DataFrame :return: cleaned dataframe. :rtype: pd.DataFrame """ df.rename(columns={'name': 'Country'}, inplace=True) del df['Rank'] return df
[docs]def promote_to_index(dfs: list, col_name: str) -> list: """ Promotes the specified column to the index of the dataframes. :param dfs: list of dataframes. :type dfs: List[pd.DataFrame] :param col_name: name of column to be promoted. :type col_name: str :return: list of dataframes with the specified column promoted to the index. :rtype: List[pd.DataFrame] """ return [df.set_index(col_name) for df in dfs]
if __name__ == '__main__': main()