From 2839aa9237af8a38078ae3f97ceeca7d920b1a9a Mon Sep 17 00:00:00 2001
From: newt <hi@newty.dev>
Date: Wed, 9 Oct 2024 18:02:31 +0100
Subject: [PATCH] Country Population vs Active Facebook Users in the Country

---
 python/data science/fakejobs.py             |  6 ++--
 python/data science/fbPercentActive.py      | 34 +++++++++++++++++++++
 python/data science/fbPercentActive_res.csv | 21 +++++++++++++
 readme.md                                   |  1 +
 4 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 python/data science/fbPercentActive.py
 create mode 100644 python/data science/fbPercentActive_res.csv

diff --git a/python/data science/fakejobs.py b/python/data science/fakejobs.py
index 57fee5b..d492c3a 100644
--- a/python/data science/fakejobs.py	
+++ b/python/data science/fakejobs.py	
@@ -13,7 +13,7 @@ jobList = soup.find(id='ResultsContainer')
 jobs = jobList.find_all('div', class_='card-content')
 
 # Frame the data
-frameList = []
+data = []
 
 def formatElement(el):
     return el.text.strip()
@@ -23,8 +23,8 @@ for job in jobs:
     frame['title'] = formatElement(job.find('h2', class_='title'))
     frame['company'] = formatElement(job.find('h3', class_='company'))
     frame['location'] = formatElement(job.find('p', class_='location'))
-    frameList.append(frame)
+    data.append(frame)
 
 # Save the data
-df = pandas.DataFrame(frameList)
+df = pandas.DataFrame(data)
 df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fakejobs_res.csv')
diff --git a/python/data science/fbPercentActive.py b/python/data science/fbPercentActive.py
new file mode 100644
index 0000000..1306819
--- /dev/null
+++ b/python/data science/fbPercentActive.py	
@@ -0,0 +1,34 @@
+import requests
+import pandas as pd
+from bs4 import BeautifulSoup
+import pycountry
+import json
+import os
+
+# Fetch and parse the website
+response = requests.get('https://www.statista.com/statistics/268136/top-15-countries-based-on-number-of-facebook-users/')
+content = response.content
+soup = BeautifulSoup(content, 'html.parser')
+
+# Find all of the data points
+tds = soup.select('#statTableHTML td')
+
+# Frame the data
+data = []
+
+def population(country):
+	countryCode = pycountry.countries.search_fuzzy(country)[0].alpha_3
+	res = requests.get('https://restcountries.eu/rest/v2/alpha/' + countryCode)
+	return json.loads(res.content)['population']
+
+for td1, td2 in zip(tds[::2], tds[1::2]):
+	frame = {}
+	frame['country'] = td1.text.strip()
+	frame['active'] = td2.text.strip()
+	frame['population'] = population(frame['country'])
+	frame['percentActive'] = (int(frame['active']) * 1000000 / int(frame['population'])) * 100
+	data.append(frame)
+
+# Save the data
+df = pd.DataFrame(data)
+df.to_csv(os.path.dirname(os.path.realpath(__file__)) + '/fbPercentActive_res.csv')
diff --git a/python/data science/fbPercentActive_res.csv b/python/data science/fbPercentActive_res.csv
new file mode 100644
index 0000000..2045ed0
--- /dev/null
+++ b/python/data science/fbPercentActive_res.csv	
@@ -0,0 +1,21 @@
+,country,active,population,percentActive
+0,India,340,1295210000,26.250569405733433
+1,United States,200,323947000,61.738494259863494
+2,Indonesia,140,258705000,54.11569161786591
+3,Brazil,130,206135893,63.065193600223715
+4,Mexico,98,122273473,80.14821007006155
+5,Philippines,88,103279800,85.20543223360231
+6,Vietnam,71,92700000,76.59115426105717
+7,Thailand,54,65327652,82.6602492922905
+8,Egypt,47,91290000,51.484280863183265
+9,Bangladesh,46,161006790,28.570223653300587
+10,Pakistan,45,194125062,23.18093271233572
+11,Colombia,38,48759958,77.93279887566761
+12,United Kingdom,38,65110000,58.36277069574566
+13,Turkey,37,78741053,46.98946558410896
+14,France,33,66710000,49.46784590016489
+15,Argentina,31,43590400,71.11657612685362
+16,Italy,31,60665551,51.099840830589336
+17,Nigeria,31,186988000,16.578603974586603
+18,Germany,28,81770900,34.24201029950753
+19,Peru,27,31488700,85.74504504790607
diff --git a/readme.md b/readme.md
index ee42833..1f5ed31 100644
--- a/readme.md
+++ b/readme.md
@@ -11,6 +11,7 @@
 
 - [Data Science](python/data%20science)
   - [Fake Jobs Scraper](python/data%20science/fakejobs.py)
+  - [Country Population vs Active Facebook Users in the Country](python/data%20science/fbPercentActive.py)
 - [Calculators](python/calculators)
   - [Binomial Distribution](python/calculators/Binomial%20Distribution.py)
   - [Pearson's Product-Moment Correlation Coefficient](python/calculators/PMCC.py)