Country Population vs Active Facebook Users in the Country

This commit is contained in:
newt! 2021-08-23 01:49:58 +01:00
parent b2eb65575a
commit 01c554948f
4 changed files with 59 additions and 3 deletions

View file

@ -13,7 +13,7 @@ jobList = soup.find(id='ResultsContainer')
jobs = jobList.find_all('div', class_='card-content') jobs = jobList.find_all('div', class_='card-content')
# Frame the data # Frame the data
frameList = [] data = []
def formatElement(el): def formatElement(el):
return el.text.strip() return el.text.strip()
@ -23,8 +23,8 @@ for job in jobs:
frame['title'] = formatElement(job.find('h2', class_='title')) frame['title'] = formatElement(job.find('h2', class_='title'))
frame['company'] = formatElement(job.find('h3', class_='company')) frame['company'] = formatElement(job.find('h3', class_='company'))
frame['location'] = formatElement(job.find('p', class_='location')) frame['location'] = formatElement(job.find('p', class_='location'))
frameList.append(frame) data.append(frame)
# Save the data # Save the data
df = pandas.DataFrame(frameList) df = pandas.DataFrame(data)
df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fakejobs_res.csv') df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fakejobs_res.csv')

View file

@ -0,0 +1,34 @@
import requests
import pandas as pd
from bs4 import BeautifulSoup
import pycountry
import json
import os
# Fetch and parse the website
response = requests.get('https://www.statista.com/statistics/268136/top-15-countries-based-on-number-of-facebook-users/')
content = response.content
soup = BeautifulSoup(content, 'html.parser')
# Find all of the data points
tds = soup.select('#statTableHTML td')
# Frame the data
data = []
def population(country):
countryCode = pycountry.countries.search_fuzzy(country)[0].alpha_3
res = requests.get('https://restcountries.eu/rest/v2/alpha/' + countryCode)
return json.loads(res.content)['population']
for td1, td2 in zip(tds[::2], tds[1::2]):
frame = {}
frame['country'] = td1.text.strip()
frame['active'] = td2.text.strip()
frame['population'] = population(frame['country'])
frame['percentActive'] = (int(frame['active']) * 1000000 / int(frame['population'])) * 100
data.append(frame)
# Save the data
df = pd.DataFrame(data)
df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fbPercentActive_res.csv')

View file

@ -0,0 +1,21 @@
,country,active,population,percentActive
0,India,340,1295210000,26.250569405733433
1,United States,200,323947000,61.738494259863494
2,Indonesia,140,258705000,54.11569161786591
3,Brazil,130,206135893,63.065193600223715
4,Mexico,98,122273473,80.14821007006155
5,Philippines,88,103279800,85.20543223360231
6,Vietnam,71,92700000,76.59115426105717
7,Thailand,54,65327652,82.6602492922905
8,Egypt,47,91290000,51.484280863183265
9,Bangladesh,46,161006790,28.570223653300587
10,Pakistan,45,194125062,23.18093271233572
11,Colombia,38,48759958,77.93279887566761
12,United Kingdom,38,65110000,58.36277069574566
13,Turkey,37,78741053,46.98946558410896
14,France,33,66710000,49.46784590016489
15,Argentina,31,43590400,71.11657612685362
16,Italy,31,60665551,51.099840830589336
17,Nigeria,31,186988000,16.578603974586603
18,Germany,28,81770900,34.24201029950753
19,Peru,27,31488700,85.74504504790607
1 country active population percentActive
2 0 India 340 1295210000 26.250569405733433
3 1 United States 200 323947000 61.738494259863494
4 2 Indonesia 140 258705000 54.11569161786591
5 3 Brazil 130 206135893 63.065193600223715
6 4 Mexico 98 122273473 80.14821007006155
7 5 Philippines 88 103279800 85.20543223360231
8 6 Vietnam 71 92700000 76.59115426105717
9 7 Thailand 54 65327652 82.6602492922905
10 8 Egypt 47 91290000 51.484280863183265
11 9 Bangladesh 46 161006790 28.570223653300587
12 10 Pakistan 45 194125062 23.18093271233572
13 11 Colombia 38 48759958 77.93279887566761
14 12 United Kingdom 38 65110000 58.36277069574566
15 13 Turkey 37 78741053 46.98946558410896
16 14 France 33 66710000 49.46784590016489
17 15 Argentina 31 43590400 71.11657612685362
18 16 Italy 31 60665551 51.099840830589336
19 17 Nigeria 31 186988000 16.578603974586603
20 18 Germany 28 81770900 34.24201029950753
21 19 Peru 27 31488700 85.74504504790607

View file

@ -11,6 +11,7 @@
- [Data Science](python/data%20science) - [Data Science](python/data%20science)
- [Fake Jobs Scraper](python/data%20science/fakejobs.py) - [Fake Jobs Scraper](python/data%20science/fakejobs.py)
- [Country Population vs Active Facebook Users in the Country](python/data%20science/fbPercentActive.py)
- [Calculators](python/calculators) - [Calculators](python/calculators)
- [Binomial Distribution](python/calculators/Binomial%20Distribution.py) - [Binomial Distribution](python/calculators/Binomial%20Distribution.py)
- [Pearson's Product-Moment Correlation Coefficient](python/calculators/PMCC.py) - [Pearson's Product-Moment Correlation Coefficient](python/calculators/PMCC.py)