Source code for scrape_temperatures

from typing import List

import pandas as pd
from bs4 import BeautifulSoup

import scrape_urls

url = 'https://www.weatherbase.com/weather/countryall.php3'
base_url = 'https://www.weatherbase.com'


[docs]def f_to_c(value: float) -> float:
    """
    Converts Fahrenheit to Celsius.

    :param value: float of the value to convert.
    :type value: float
    :return: Celisus value.
    :rtype: float
    """
    return (value - 32)*(5/9)


[docs]def in_to_mm(value: float) -> float:
    """
    Converts inches to mm.

    :param value: float of the value to convert.
    :type value: float
    :return: mm value.
    :rtype: float
    """
    return value * 25.4


[docs]def check_float(potential_float: str) -> bool:
    """
    Checks if a string is indeed a float.

    :param potential_float: string to check.
    :type potential_float: str
    :return: True if the string is a float, False otherwise.
    :rtype: bool
    """
    try:
        float(potential_float)
        return True
    except ValueError:
        return False


[docs]def get_stats(table: pd.DataFrame) -> dict:
    """
    Aggregates the climate table data to get the maxes and mins and avgs.

    :param table: pandas dataframe of the table to aggregate.
    :type table: pd.DataFrame
    :return: dictionary of the maxes and mins and avgs.
    :rtype: dict
    """
    dic = {}
    if 'Average High Temperature (F)' in table.index:
        dic['max avg max temp'] = f_to_c(
                                         table.loc['Average High Temperature (F)']
                                         .iloc[1:-2].max())
    if 'Average Low Temperature (F)' in table.index:
        dic['min avg min temp'] = f_to_c(
                                         table.loc['Average Low Temperature (F)']
                                         .iloc[1:-2].min())
    if 'Average Temperature (F)' in table.index:
        dic['avg temp'] = f_to_c(
                                 table.loc['Average Temperature (F)']['ANNUAL'])
    if 'Average Precipitation (in)' in table.index:
        dic['avg rainfall (mm)'] = in_to_mm(
                                            table.loc['Average Precipitation (in)']
                                            ['ANNUAL'])
    return dic


[docs]def get_country_stats(soups: List[BeautifulSoup]) -> dict:
    """
    For every country, get the stats on its climate.

    :param soups: list of the soups of the pages.
    :type soups: List[BeautifulSoup]
    :return: dictionary of the stats.
    :rtype: dict
    """
    dic = {}
    for soup in soups:
        table = scrape_urls.get_table(soup)
        country_name = scrape_urls.find_id_in_html(soup,
                                                   'headerfont')[0].text.title()
        dic[country_name] = get_stats(table)
    return dic


[docs]def main():
    # Get list of countries
    countries_soup = scrape_urls.scrape_page(url)

    # Get links to each of the country's stats.
    countries = scrape_urls.find_html_class(countries_soup, 'redglow')

    # Create list of urls
    urls = [base_url + countries[i]['href'] for i in range(len(countries))]

    # Scrape all the pages
    soups = scrape_urls.multi_thread_func(scrape_urls.scrape_page, urls)

    # Get stats for each page
    country_stats = pd.DataFrame(get_country_stats(soups))

    # Save the data to a .csv
    country_stats.to_csv('Climate by Country.csv')


if __name__ == '__main__':
    main()