Country Population vs Active Facebook Users in the Country
This commit is contained in:
parent
8684a266bf
commit
2839aa9237
4 changed files with 59 additions and 3 deletions
|
@ -13,7 +13,7 @@ jobList = soup.find(id='ResultsContainer')
|
|||
jobs = jobList.find_all('div', class_='card-content')
|
||||
|
||||
# Frame the data
|
||||
frameList = []
|
||||
data = []
|
||||
|
||||
def formatElement(el):
|
||||
return el.text.strip()
|
||||
|
@ -23,8 +23,8 @@ for job in jobs:
|
|||
frame['title'] = formatElement(job.find('h2', class_='title'))
|
||||
frame['company'] = formatElement(job.find('h3', class_='company'))
|
||||
frame['location'] = formatElement(job.find('p', class_='location'))
|
||||
frameList.append(frame)
|
||||
data.append(frame)
|
||||
|
||||
# Save the data
|
||||
df = pandas.DataFrame(frameList)
|
||||
df = pandas.DataFrame(data)
|
||||
df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fakejobs_res.csv')
|
||||
|
|
34
python/data science/fbPercentActive.py
Normal file
34
python/data science/fbPercentActive.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
import requests
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
import pycountry
|
||||
import json
|
||||
import os
|
||||
|
||||
# Fetch and parse the website
|
||||
response = requests.get('https://www.statista.com/statistics/268136/top-15-countries-based-on-number-of-facebook-users/')
|
||||
content = response.content
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
|
||||
# Find all of the data points
|
||||
tds = soup.select('#statTableHTML td')
|
||||
|
||||
# Frame the data
|
||||
data = []
|
||||
|
||||
def population(country):
|
||||
countryCode = pycountry.countries.search_fuzzy(country)[0].alpha_3
|
||||
res = requests.get('https://restcountries.eu/rest/v2/alpha/' + countryCode)
|
||||
return json.loads(res.content)['population']
|
||||
|
||||
for td1, td2 in zip(tds[::2], tds[1::2]):
|
||||
frame = {}
|
||||
frame['country'] = td1.text.strip()
|
||||
frame['active'] = td2.text.strip()
|
||||
frame['population'] = population(frame['country'])
|
||||
frame['percentActive'] = (int(frame['active']) * 1000000 / int(frame['population'])) * 100
|
||||
data.append(frame)
|
||||
|
||||
# Save the data
|
||||
df = pd.DataFrame(data)
|
||||
df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fbPercentActive_res.csv')
|
21
python/data science/fbPercentActive_res.csv
Normal file
21
python/data science/fbPercentActive_res.csv
Normal file
|
@ -0,0 +1,21 @@
|
|||
,country,active,population,percentActive
|
||||
0,India,340,1295210000,26.250569405733433
|
||||
1,United States,200,323947000,61.738494259863494
|
||||
2,Indonesia,140,258705000,54.11569161786591
|
||||
3,Brazil,130,206135893,63.065193600223715
|
||||
4,Mexico,98,122273473,80.14821007006155
|
||||
5,Philippines,88,103279800,85.20543223360231
|
||||
6,Vietnam,71,92700000,76.59115426105717
|
||||
7,Thailand,54,65327652,82.6602492922905
|
||||
8,Egypt,47,91290000,51.484280863183265
|
||||
9,Bangladesh,46,161006790,28.570223653300587
|
||||
10,Pakistan,45,194125062,23.18093271233572
|
||||
11,Colombia,38,48759958,77.93279887566761
|
||||
12,United Kingdom,38,65110000,58.36277069574566
|
||||
13,Turkey,37,78741053,46.98946558410896
|
||||
14,France,33,66710000,49.46784590016489
|
||||
15,Argentina,31,43590400,71.11657612685362
|
||||
16,Italy,31,60665551,51.099840830589336
|
||||
17,Nigeria,31,186988000,16.578603974586603
|
||||
18,Germany,28,81770900,34.24201029950753
|
||||
19,Peru,27,31488700,85.74504504790607
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
- [Data Science](python/data%20science)
|
||||
- [Fake Jobs Scraper](python/data%20science/fakejobs.py)
|
||||
- [Country Population vs Active Facebook Users in the Country](python/data%20science/fbPercentActive.py)
|
||||
- [Calculators](python/calculators)
|
||||
- [Binomial Distribution](python/calculators/Binomial%20Distribution.py)
|
||||
- [Pearson's Product-Moment Correlation Coefficient](python/calculators/PMCC.py)
|
||||
|
|
Loading…
Reference in a new issue