Source code for scrape_temperatures

from typing import List

import pandas as pd
from bs4 import BeautifulSoup

import scrape_urls

url = 'https://www.weatherbase.com/weather/countryall.php3'
base_url = 'https://www.weatherbase.com'


[docs]def f_to_c(value: float) -> float: """ Converts Fahrenheit to Celsius. :param value: float of the value to convert. :type value: float :return: Celisus value. :rtype: float """ return (value - 32)*(5/9)
[docs]def in_to_mm(value: float) -> float: """ Converts inches to mm. :param value: float of the value to convert. :type value: float :return: mm value. :rtype: float """ return value * 25.4
[docs]def check_float(potential_float: str) -> bool: """ Checks if a string is indeed a float. :param potential_float: string to check. :type potential_float: str :return: True if the string is a float, False otherwise. :rtype: bool """ try: float(potential_float) return True except ValueError: return False
[docs]def get_stats(table: pd.DataFrame) -> dict: """ Aggregates the climate table data to get the maxes and mins and avgs. :param table: pandas dataframe of the table to aggregate. :type table: pd.DataFrame :return: dictionary of the maxes and mins and avgs. :rtype: dict """ dic = {} if 'Average High Temperature (F)' in table.index: dic['max avg max temp'] = f_to_c( table.loc['Average High Temperature (F)'] .iloc[1:-2].max()) if 'Average Low Temperature (F)' in table.index: dic['min avg min temp'] = f_to_c( table.loc['Average Low Temperature (F)'] .iloc[1:-2].min()) if 'Average Temperature (F)' in table.index: dic['avg temp'] = f_to_c( table.loc['Average Temperature (F)']['ANNUAL']) if 'Average Precipitation (in)' in table.index: dic['avg rainfall (mm)'] = in_to_mm( table.loc['Average Precipitation (in)'] ['ANNUAL']) return dic
[docs]def get_country_stats(soups: List[BeautifulSoup]) -> dict: """ For every country, get the stats on its climate. :param soups: list of the soups of the pages. :type soups: List[BeautifulSoup] :return: dictionary of the stats. :rtype: dict """ dic = {} for soup in soups: table = scrape_urls.get_table(soup) country_name = scrape_urls.find_id_in_html(soup, 'headerfont')[0].text.title() dic[country_name] = get_stats(table) return dic
[docs]def main(): # Get list of countries countries_soup = scrape_urls.scrape_page(url) # Get links to each of the country's stats. countries = scrape_urls.find_html_class(countries_soup, 'redglow') # Create list of urls urls = [base_url + countries[i]['href'] for i in range(len(countries))] # Scrape all the pages soups = scrape_urls.multi_thread_func(scrape_urls.scrape_page, urls) # Get stats for each page country_stats = pd.DataFrame(get_country_stats(soups)) # Save the data to a .csv country_stats.to_csv('Climate by Country.csv')
if __name__ == '__main__': main()