diff --git a/python/data science/ngrams/comparison/comparison.py b/python/data science/ngrams/comparison/comparison.py index daef1fc..a91e4e5 100644 --- a/python/data science/ngrams/comparison/comparison.py +++ b/python/data science/ngrams/comparison/comparison.py @@ -6,13 +6,27 @@ import numpy import os import pandas +def yearInput(text): + while True: + try: + x = int(input(text + '\n')) + except ValueError: + print('You must input an integer!\n') + else: + if x > 2019 or x < 1500: + print('The year inputted is out of range. It must be between 1500 and 2019!') + else: + return x + pandas.options.display.float_format = '{:.10f}'.format +startYear = yearInput('Please enter the start year!') +endYear = yearInput('Please enter the end year!') +yearSpan = '%i-%i' % (startYear, endYear) +years = range(startYear, endYear + 1) + words = input('Please enter a list of words. Separate each word with a comma (:\n') print() -startYear = 1800 -endYear = 2019 -years = range(startYear, endYear + 1) response = requests.get('https://books.google.com/ngrams/json?content=%s&year_start=%s&year_end=%s&corpus=26&smoothing=3' % (words, startYear, endYear)) data = json.loads(response.content) @@ -52,7 +66,7 @@ wordList = ', '.join([f['word'] for f in frames]) while True: toSave = input('Would you like to save this data frame in a CSV? (y/n)') if toSave == 'y': - df.to_csv('%s/%s.csv' % (dirName, wordList)) + df.to_csv('%s/%s - %s.csv' % (dirName, wordList, yearSpan)) break if toSave == 'n': break @@ -63,7 +77,7 @@ while True: if toSave == 'y': plt.ticklabel_format(style='plain') plt.legend() - plt.savefig('%s/%s.png' % (dirName, wordList), dpi=100) + plt.savefig('%s/%s - %s.png' % (dirName, wordList, yearSpan), dpi=100) exit() elif toSave == 'n': exit() diff --git a/python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof.csv b/python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof - 1800-2019.csv similarity index 100% rename from python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof.csv rename to python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof - 1800-2019.csv diff --git a/python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof.png b/python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof - 1800-2019.png similarity index 100% rename from python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof.png rename to python/data science/ngrams/comparison/dubious, orangutan, round, mockingjay, aloof - 1800-2019.png diff --git a/python/data science/ngrams/comparison/hallo, hello - 1500-1800.csv b/python/data science/ngrams/comparison/hallo, hello - 1500-1800.csv new file mode 100644 index 0000000..4a00492 --- /dev/null +++ b/python/data science/ngrams/comparison/hallo, hello - 1500-1800.csv @@ -0,0 +1,3 @@ +,word,stdev,mean,median,mode,range,q1,q3,iqr +0,hallo,9.067572402803685e-07,3.7112208955684736e-07,4.9074362859106e-08,0.0,5.638338344786982e-06,0.0,1.517606246384925e-07,1.517606246384925e-07 +1,hello,1.2565752685195384e-07,1.061839190423912e-07,9.166146648047808e-08,0.0,6.695058151048475e-07,0.0,1.4409192620031979e-07,1.4409192620031979e-07 diff --git a/python/data science/ngrams/comparison/hallo, hello - 1500-1800.png b/python/data science/ngrams/comparison/hallo, hello - 1500-1800.png new file mode 100644 index 0000000..c4f86fa Binary files /dev/null and b/python/data science/ngrams/comparison/hallo, hello - 1500-1800.png differ diff --git a/python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed.csv b/python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed - 1800-2019.csv similarity index 100% rename from python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed.csv rename to python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed - 1800-2019.csv diff --git a/python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed.png b/python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed - 1800-2019.png similarity index 100% rename from python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed.png rename to python/data science/ngrams/comparison/metropolis, birthday, acidic, bell, weed - 1800-2019.png diff --git a/python/data science/ngrams/comparison/poop, klutzy.csv b/python/data science/ngrams/comparison/poop, klutzy - 1800-2019.csv similarity index 100% rename from python/data science/ngrams/comparison/poop, klutzy.csv rename to python/data science/ngrams/comparison/poop, klutzy - 1800-2019.csv diff --git a/python/data science/ngrams/comparison/poop, klutzy.png b/python/data science/ngrams/comparison/poop, klutzy - 1800-2019.png similarity index 100% rename from python/data science/ngrams/comparison/poop, klutzy.png rename to python/data science/ngrams/comparison/poop, klutzy - 1800-2019.png diff --git a/python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous.csv b/python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous - 1800-2019.csv similarity index 100% rename from python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous.csv rename to python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous - 1800-2019.csv diff --git a/python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous.png b/python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous - 1800-2019.png similarity index 100% rename from python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous.png rename to python/data science/ngrams/comparison/shuffle, metal, cancer, tumour, milk, gelatinous - 1800-2019.png diff --git a/python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick.csv b/python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick - 1800-2019.csv similarity index 100% rename from python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick.csv rename to python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick - 1800-2019.csv diff --git a/python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick.png b/python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick - 1800-2019.png similarity index 100% rename from python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick.png rename to python/data science/ngrams/comparison/spectacle, barrel, crosshair, glue, gluestick - 1800-2019.png diff --git a/python/data science/ngrams/popularity/hallo - 1500-1800.png b/python/data science/ngrams/popularity/hallo - 1500-1800.png new file mode 100644 index 0000000..1e88363 Binary files /dev/null and b/python/data science/ngrams/popularity/hallo - 1500-1800.png differ diff --git a/python/data science/ngrams/popularity/popularity.py b/python/data science/ngrams/popularity/popularity.py index 554746c..a604858 100644 --- a/python/data science/ngrams/popularity/popularity.py +++ b/python/data science/ngrams/popularity/popularity.py @@ -6,13 +6,27 @@ import statistics import os import pandas +def yearInput(text): + while True: + try: + x = int(input(text + '\n')) + except ValueError: + print('You must input an integer!\n') + else: + if x > 2019 or x < 1500: + print('The year inputted is out of range. It must be between 1500 and 2019!') + else: + return x + pandas.options.display.float_format = '{:.10f}'.format +startYear = yearInput('Please enter the start year!') +endYear = yearInput('Please enter the end year!') +yearSpan = '%i-%i' % (startYear, endYear) +years = range(startYear, endYear + 1) + word = input('Please enter a word to research!\n') print() -startYear = 1800 -endYear = 2019 -years = range(startYear, endYear + 1) response = requests.get('https://books.google.com/ngrams/json?content=%s&year_start=%s&year_end=%s&corpus=26&smoothing=3' % (word, startYear, endYear)) data = json.loads(response.content)[0] @@ -39,4 +53,4 @@ graph.plot(years, points) graph.plot(years, m * years + b) graph.title(frame['word']) graph.ticklabel_format(style='plain') -graph.savefig('%s/%s.png' % (os.path.dirname(os.path.realpath(__file__)), word), dpi=100) +graph.savefig('%s/%s - %s.png' % (os.path.dirname(os.path.realpath(__file__)), word, yearSpan), dpi=100) diff --git a/python/data science/ngrams/popularity/terrified.png b/python/data science/ngrams/popularity/terrified - 1800-2019.png similarity index 100% rename from python/data science/ngrams/popularity/terrified.png rename to python/data science/ngrams/popularity/terrified - 1800-2019.png