Country Population vs Active Facebook Users in the Country

2024-10-09 18:02:31 +01:00 · 2024-10-09 18:02:31 +01:00 · 2839aa9237
commit 2839aa9237
parent 8684a266bf
4 changed files with 59 additions and 3 deletions
--- a/science/fakejobs.py
+++ b/science/fakejobs.py
@ -13,7 +13,7 @@ jobList = soup.find(id='ResultsContainer')
 jobs = jobList.find_all('div', class_='card-content')

 # Frame the data
-frameList = []
+data = []

 def formatElement(el):
    return el.text.strip()
@ -23,8 +23,8 @@ for job in jobs:
    frame['title'] = formatElement(job.find('h2', class_='title'))
    frame['company'] = formatElement(job.find('h3', class_='company'))
    frame['location'] = formatElement(job.find('p', class_='location'))
-    frameList.append(frame)
+    data.append(frame)

 # Save the data
-df = pandas.DataFrame(frameList)
+df = pandas.DataFrame(data)
 df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fakejobs_res.csv')
--- a/science/fbPercentActive.py
+++ b/science/fbPercentActive.py
@ -0,0 +1,34 @@
+import requests
+import pandas as pd
+from bs4 import BeautifulSoup
+import pycountry
+import json
+import os
+
+# Fetch and parse the website
+response = requests.get('https://www.statista.com/statistics/268136/top-15-countries-based-on-number-of-facebook-users/')
+content = response.content
+soup = BeautifulSoup(content, 'html.parser')
+
+# Find all of the data points
+tds = soup.select('#statTableHTML td')
+
+# Frame the data
+data = []
+
+def population(country):
+	countryCode = pycountry.countries.search_fuzzy(country)[0].alpha_3
+	res = requests.get('https://restcountries.eu/rest/v2/alpha/' + countryCode)
+	return json.loads(res.content)['population']
+
+for td1, td2 in zip(tds[::2], tds[1::2]):
+	frame = {}
+	frame['country'] = td1.text.strip()
+	frame['active'] = td2.text.strip()
+	frame['population'] = population(frame['country'])
+	frame['percentActive'] = (int(frame['active']) * 1000000 / int(frame['population'])) * 100
+	data.append(frame)
+
+# Save the data
+df = pd.DataFrame(data)
+df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fbPercentActive_res.csv')
--- a/science/fbPercentActive_res.csv
+++ b/science/fbPercentActive_res.csv
@ -0,0 +1,21 @@
+,country,active,population,percentActive
+0,India,340,1295210000,26.250569405733433
+1,United States,200,323947000,61.738494259863494
+2,Indonesia,140,258705000,54.11569161786591
+3,Brazil,130,206135893,63.065193600223715
+4,Mexico,98,122273473,80.14821007006155
+5,Philippines,88,103279800,85.20543223360231
+6,Vietnam,71,92700000,76.59115426105717
+7,Thailand,54,65327652,82.6602492922905
+8,Egypt,47,91290000,51.484280863183265
+9,Bangladesh,46,161006790,28.570223653300587
+10,Pakistan,45,194125062,23.18093271233572
+11,Colombia,38,48759958,77.93279887566761
+12,United Kingdom,38,65110000,58.36277069574566
+13,Turkey,37,78741053,46.98946558410896
+14,France,33,66710000,49.46784590016489
+15,Argentina,31,43590400,71.11657612685362
+16,Italy,31,60665551,51.099840830589336
+17,Nigeria,31,186988000,16.578603974586603
+18,Germany,28,81770900,34.24201029950753
+19,Peru,27,31488700,85.74504504790607
--- a/readme.md
+++ b/readme.md
@ -11,6 +11,7 @@

 - [Data Science](python/data%20science)
  - [Fake Jobs Scraper](python/data%20science/fakejobs.py)
+  - [Country Population vs Active Facebook Users in the Country](python/data%20science/fbPercentActive.py)
 - [Calculators](python/calculators)
  - [Binomial Distribution](python/calculators/Binomial%20Distribution.py)
  - [Pearson's Product-Moment Correlation Coefficient](python/calculators/PMCC.py)