Country Population vs Active Facebook Users in the Country
This commit is contained in:
parent
8684a266bf
commit
2839aa9237
4 changed files with 59 additions and 3 deletions
|
@ -13,7 +13,7 @@ jobList = soup.find(id='ResultsContainer')
|
||||||
jobs = jobList.find_all('div', class_='card-content')
|
jobs = jobList.find_all('div', class_='card-content')
|
||||||
|
|
||||||
# Frame the data
|
# Frame the data
|
||||||
frameList = []
|
data = []
|
||||||
|
|
||||||
def formatElement(el):
|
def formatElement(el):
|
||||||
return el.text.strip()
|
return el.text.strip()
|
||||||
|
@ -23,8 +23,8 @@ for job in jobs:
|
||||||
frame['title'] = formatElement(job.find('h2', class_='title'))
|
frame['title'] = formatElement(job.find('h2', class_='title'))
|
||||||
frame['company'] = formatElement(job.find('h3', class_='company'))
|
frame['company'] = formatElement(job.find('h3', class_='company'))
|
||||||
frame['location'] = formatElement(job.find('p', class_='location'))
|
frame['location'] = formatElement(job.find('p', class_='location'))
|
||||||
frameList.append(frame)
|
data.append(frame)
|
||||||
|
|
||||||
# Save the data
|
# Save the data
|
||||||
df = pandas.DataFrame(frameList)
|
df = pandas.DataFrame(data)
|
||||||
df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fakejobs_res.csv')
|
df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fakejobs_res.csv')
|
||||||
|
|
34
python/data science/fbPercentActive.py
Normal file
34
python/data science/fbPercentActive.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
import requests
|
||||||
|
import pandas as pd
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import pycountry
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Fetch and parse the website
|
||||||
|
response = requests.get('https://www.statista.com/statistics/268136/top-15-countries-based-on-number-of-facebook-users/')
|
||||||
|
content = response.content
|
||||||
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
|
|
||||||
|
# Find all of the data points
|
||||||
|
tds = soup.select('#statTableHTML td')
|
||||||
|
|
||||||
|
# Frame the data
|
||||||
|
data = []
|
||||||
|
|
||||||
|
def population(country):
|
||||||
|
countryCode = pycountry.countries.search_fuzzy(country)[0].alpha_3
|
||||||
|
res = requests.get('https://restcountries.eu/rest/v2/alpha/' + countryCode)
|
||||||
|
return json.loads(res.content)['population']
|
||||||
|
|
||||||
|
for td1, td2 in zip(tds[::2], tds[1::2]):
|
||||||
|
frame = {}
|
||||||
|
frame['country'] = td1.text.strip()
|
||||||
|
frame['active'] = td2.text.strip()
|
||||||
|
frame['population'] = population(frame['country'])
|
||||||
|
frame['percentActive'] = (int(frame['active']) * 1000000 / int(frame['population'])) * 100
|
||||||
|
data.append(frame)
|
||||||
|
|
||||||
|
# Save the data
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fbPercentActive_res.csv')
|
21
python/data science/fbPercentActive_res.csv
Normal file
21
python/data science/fbPercentActive_res.csv
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
,country,active,population,percentActive
|
||||||
|
0,India,340,1295210000,26.250569405733433
|
||||||
|
1,United States,200,323947000,61.738494259863494
|
||||||
|
2,Indonesia,140,258705000,54.11569161786591
|
||||||
|
3,Brazil,130,206135893,63.065193600223715
|
||||||
|
4,Mexico,98,122273473,80.14821007006155
|
||||||
|
5,Philippines,88,103279800,85.20543223360231
|
||||||
|
6,Vietnam,71,92700000,76.59115426105717
|
||||||
|
7,Thailand,54,65327652,82.6602492922905
|
||||||
|
8,Egypt,47,91290000,51.484280863183265
|
||||||
|
9,Bangladesh,46,161006790,28.570223653300587
|
||||||
|
10,Pakistan,45,194125062,23.18093271233572
|
||||||
|
11,Colombia,38,48759958,77.93279887566761
|
||||||
|
12,United Kingdom,38,65110000,58.36277069574566
|
||||||
|
13,Turkey,37,78741053,46.98946558410896
|
||||||
|
14,France,33,66710000,49.46784590016489
|
||||||
|
15,Argentina,31,43590400,71.11657612685362
|
||||||
|
16,Italy,31,60665551,51.099840830589336
|
||||||
|
17,Nigeria,31,186988000,16.578603974586603
|
||||||
|
18,Germany,28,81770900,34.24201029950753
|
||||||
|
19,Peru,27,31488700,85.74504504790607
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
- [Data Science](python/data%20science)
|
- [Data Science](python/data%20science)
|
||||||
- [Fake Jobs Scraper](python/data%20science/fakejobs.py)
|
- [Fake Jobs Scraper](python/data%20science/fakejobs.py)
|
||||||
|
- [Country Population vs Active Facebook Users in the Country](python/data%20science/fbPercentActive.py)
|
||||||
- [Calculators](python/calculators)
|
- [Calculators](python/calculators)
|
||||||
- [Binomial Distribution](python/calculators/Binomial%20Distribution.py)
|
- [Binomial Distribution](python/calculators/Binomial%20Distribution.py)
|
||||||
- [Pearson's Product-Moment Correlation Coefficient](python/calculators/PMCC.py)
|
- [Pearson's Product-Moment Correlation Coefficient](python/calculators/PMCC.py)
|
||||||
|
|
Loading…
Reference in a new issue