diff --git a/python/data science/fakejobs.py b/python/data science/fakejobs.py index 57fee5b..d492c3a 100644 --- a/python/data science/fakejobs.py +++ b/python/data science/fakejobs.py @@ -13,7 +13,7 @@ jobList = soup.find(id='ResultsContainer') jobs = jobList.find_all('div', class_='card-content') # Frame the data -frameList = [] +data = [] def formatElement(el): return el.text.strip() @@ -23,8 +23,8 @@ for job in jobs: frame['title'] = formatElement(job.find('h2', class_='title')) frame['company'] = formatElement(job.find('h3', class_='company')) frame['location'] = formatElement(job.find('p', class_='location')) - frameList.append(frame) + data.append(frame) # Save the data -df = pandas.DataFrame(frameList) +df = pandas.DataFrame(data) df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fakejobs_res.csv') diff --git a/python/data science/fbPercentActive.py b/python/data science/fbPercentActive.py new file mode 100644 index 0000000..1306819 --- /dev/null +++ b/python/data science/fbPercentActive.py @@ -0,0 +1,34 @@ +import requests +import pandas as pd +from bs4 import BeautifulSoup +import pycountry +import json +import os + +# Fetch and parse the website +response = requests.get('https://www.statista.com/statistics/268136/top-15-countries-based-on-number-of-facebook-users/') +content = response.content +soup = BeautifulSoup(content, 'html.parser') + +# Find all of the data points +tds = soup.select('#statTableHTML td') + +# Frame the data +data = [] + +def population(country): + countryCode = pycountry.countries.search_fuzzy(country)[0].alpha_3 + res = requests.get('https://restcountries.eu/rest/v2/alpha/' + countryCode) + return json.loads(res.content)['population'] + +for td1, td2 in zip(tds[::2], tds[1::2]): + frame = {} + frame['country'] = td1.text.strip() + frame['active'] = td2.text.strip() + frame['population'] = population(frame['country']) + frame['percentActive'] = (int(frame['active']) * 1000000 / int(frame['population'])) * 100 + data.append(frame) + +# Save the data +df = pd.DataFrame(data) +df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fbPercentActive_res.csv') diff --git a/python/data science/fbPercentActive_res.csv b/python/data science/fbPercentActive_res.csv new file mode 100644 index 0000000..2045ed0 --- /dev/null +++ b/python/data science/fbPercentActive_res.csv @@ -0,0 +1,21 @@ +,country,active,population,percentActive +0,India,340,1295210000,26.250569405733433 +1,United States,200,323947000,61.738494259863494 +2,Indonesia,140,258705000,54.11569161786591 +3,Brazil,130,206135893,63.065193600223715 +4,Mexico,98,122273473,80.14821007006155 +5,Philippines,88,103279800,85.20543223360231 +6,Vietnam,71,92700000,76.59115426105717 +7,Thailand,54,65327652,82.6602492922905 +8,Egypt,47,91290000,51.484280863183265 +9,Bangladesh,46,161006790,28.570223653300587 +10,Pakistan,45,194125062,23.18093271233572 +11,Colombia,38,48759958,77.93279887566761 +12,United Kingdom,38,65110000,58.36277069574566 +13,Turkey,37,78741053,46.98946558410896 +14,France,33,66710000,49.46784590016489 +15,Argentina,31,43590400,71.11657612685362 +16,Italy,31,60665551,51.099840830589336 +17,Nigeria,31,186988000,16.578603974586603 +18,Germany,28,81770900,34.24201029950753 +19,Peru,27,31488700,85.74504504790607 diff --git a/readme.md b/readme.md index ee42833..1f5ed31 100644 --- a/readme.md +++ b/readme.md @@ -11,6 +11,7 @@ - [Data Science](python/data%20science) - [Fake Jobs Scraper](python/data%20science/fakejobs.py) + - [Country Population vs Active Facebook Users in the Country](python/data%20science/fbPercentActive.py) - [Calculators](python/calculators) - [Binomial Distribution](python/calculators/Binomial%20Distribution.py) - [Pearson's Product-Moment Correlation Coefficient](python/calculators/PMCC.py)