diff --git a/python/data science/ngram comparison/ngram comparison.py b/python/data science/ngrams/comparison/comparison.py similarity index 100% rename from python/data science/ngram comparison/ngram comparison.py rename to python/data science/ngrams/comparison/comparison.py diff --git a/python/data science/ngram comparison/dubious, orangutan, round, mockingjay, aloof.csv b/python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof.csv similarity index 100% rename from python/data science/ngram comparison/dubious, orangutan, round, mockingjay, aloof.csv rename to python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof.csv diff --git a/python/data science/ngram comparison/dubious, orangutan, round, mockingjay, aloof.png b/python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof.png similarity index 100% rename from python/data science/ngram comparison/dubious, orangutan, round, mockingjay, aloof.png rename to python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof.png diff --git a/python/data science/ngram comparison/metropolis, birthday, acidic, bell, weed.csv b/python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed.csv similarity index 100% rename from python/data science/ngram comparison/metropolis, birthday, acidic, bell, weed.csv rename to python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed.csv diff --git a/python/data science/ngram comparison/metropolis, birthday, acidic, bell, weed.png b/python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed.png similarity index 100% rename from python/data science/ngram comparison/metropolis, birthday, acidic, bell, weed.png rename to python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed.png diff --git a/python/data science/ngram comparison/poop, klutzy.csv b/python/data science/ngrams/comparison/poop, klutzy.csv similarity index 100% rename from python/data science/ngram comparison/poop, klutzy.csv rename to python/data science/ngrams/comparison/poop, klutzy.csv diff --git a/python/data science/ngram comparison/poop, klutzy.png b/python/data science/ngrams/comparison/poop, klutzy.png similarity index 100% rename from python/data science/ngram comparison/poop, klutzy.png rename to python/data science/ngrams/comparison/poop, klutzy.png diff --git a/python/data science/ngram comparison/shuffle, metal, cancer, tumour, milk, gelatinous.csv b/python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous.csv similarity index 100% rename from python/data science/ngram comparison/shuffle, metal, cancer, tumour, milk, gelatinous.csv rename to python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous.csv diff --git a/python/data science/ngram comparison/shuffle, metal, cancer, tumour, milk, gelatinous.png b/python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous.png similarity index 100% rename from python/data science/ngram comparison/shuffle, metal, cancer, tumour, milk, gelatinous.png rename to python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous.png diff --git a/python/data science/ngram comparison/spectacle, barrel, crosshair, glue, gluestick.csv b/python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick.csv similarity index 100% rename from python/data science/ngram comparison/spectacle, barrel, crosshair, glue, gluestick.csv rename to python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick.csv diff --git a/python/data science/ngram comparison/spectacle, barrel, crosshair, glue, gluestick.png b/python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick.png similarity index 100% rename from python/data science/ngram comparison/spectacle, barrel, crosshair, glue, gluestick.png rename to python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick.png diff --git a/python/data science/ngrams/popularity/popularity.py b/python/data science/ngrams/popularity/popularity.py new file mode 100644 index 0000000..554746c --- /dev/null +++ b/python/data science/ngrams/popularity/popularity.py @@ -0,0 +1,42 @@ +import requests +import json +import matplotlib.pyplot as graph +import numpy +import statistics +import os +import pandas + +pandas.options.display.float_format = '{:.10f}'.format + +word = input('Please enter a word to research!\n') +print() +startYear = 1800 +endYear = 2019 +years = range(startYear, endYear + 1) + +response = requests.get('https://books.google.com/ngrams/json?content=%s&year_start=%s&year_end=%s&corpus=26&smoothing=3' % (word, startYear, endYear)) +data = json.loads(response.content)[0] + +frame = {} +points = data['timeseries'] + +frame['word'] = data['ngram'] +frame['stdev'] = numpy.std(points) +frame['mean'] = numpy.mean(points) +frame['median'] = numpy.median(points) +frame['mode'] = statistics.mode(points) +frame['range'] = max(points) - min(points) +frame['q1'] = numpy.percentile(points, 25) +frame['q3'] = numpy.percentile(points, 75) +frame['iqr'] = frame['q3'] - frame['q1'] + +df = pandas.DataFrame([frame]) +print(df) + +m, b = numpy.polyfit(years, points, 1) + +graph.plot(years, points) +graph.plot(years, m * years + b) +graph.title(frame['word']) +graph.ticklabel_format(style='plain') +graph.savefig('%s/%s.png' % (os.path.dirname(os.path.realpath(__file__)), word), dpi=100) diff --git a/python/data science/ngrams/popularity/terrified.png b/python/data science/ngrams/popularity/terrified.png new file mode 100644 index 0000000..093f48f Binary files /dev/null and b/python/data science/ngrams/popularity/terrified.png differ diff --git a/readme.md b/readme.md index d1c8aa5..2526075 100644 --- a/readme.md +++ b/readme.md @@ -12,7 +12,9 @@ - [Data Science](python/data%20science) - [Fake Jobs Scraper](python/data%20science/fakejobs.py) - [Country Population vs Active Facebook Users in the Country](python/data%20science/facebook.py) - - [ngram comparison](python/data%20science/ngrams%20comparison/ngram%20comparison.py) + - [ngrams](python/data%20science/ngrams) + - [Comparison](python/data%20science/ngrams/comparison/comparison.py) + - [Popularity](python/data%20science/ngrams/popularity/popularity.py) - [Calculators](python/calculators) - [Binomial Distribution](python/calculators/binomial%20distribution.py) - [Pearson's Product-Moment Correlation Coefficient](python/calculators/pmcc.py)