Allow years to be input on ngrams

2024-10-09 18:02:33 +01:00 · 2024-10-09 18:02:33 +01:00 · 5e091f26ab
commit 5e091f26ab
parent fc3f6a9cfe
16 changed files with 40 additions and 9 deletions
--- a/science/ngrams/comparison/comparison.py
+++ b/science/ngrams/comparison/comparison.py
@ -6,13 +6,27 @@ import numpy
 import os
 import pandas

+def yearInput(text):
+	while True:
+		try:
+			x = int(input(text + '\n'))
+		except ValueError:
+			print('You must input an integer!\n')
+		else:
+			if x > 2019 or x < 1500:
+				print('The year inputted is out of range. It must be between 1500 and 2019!')
+			else:
+				return x
+
 pandas.options.display.float_format = '{:.10f}'.format

+startYear = yearInput('Please enter the start year!')
+endYear = yearInput('Please enter the end year!')
+yearSpan = '%i-%i' % (startYear, endYear)
+years = range(startYear, endYear + 1)
+
 words = input('Please enter a list of words. Separate each word with a comma (:\n')
 print()
-startYear = 1800
-endYear = 2019
-years = range(startYear, endYear + 1)

 response = requests.get('https://books.google.com/ngrams/json?content=%s&year_start=%s&year_end=%s&corpus=26&smoothing=3' % (words, startYear, endYear))
 data = json.loads(response.content)
@ -52,7 +66,7 @@ wordList = ', '.join([f['word'] for f in frames])
 while True:
 	toSave = input('Would you like to save this data frame in a CSV? (y/n)')
 	if toSave == 'y':
-		df.to_csv('%s/%s.csv' % (dirName, wordList))
+		df.to_csv('%s/%s - %s.csv' % (dirName, wordList, yearSpan))
 		break
 	if toSave == 'n':
 		break
@ -63,7 +77,7 @@ while True:
 	if toSave == 'y':
 		plt.ticklabel_format(style='plain')
 		plt.legend()
-		plt.savefig('%s/%s.png' % (dirName, wordList), dpi=100)
+		plt.savefig('%s/%s - %s.png' % (dirName, wordList, yearSpan), dpi=100)
 		exit()
 	elif toSave == 'n':
 		exit()
--- a/science/ngrams/comparison/dubious,
+++ b/science/ngrams/comparison/dubious,
--- a/science/ngrams/comparison/dubious,
+++ b/science/ngrams/comparison/dubious,
--- a/science/ngrams/comparison/hallo,
+++ b/science/ngrams/comparison/hallo,
@ -0,0 +1,3 @@
+,word,stdev,mean,median,mode,range,q1,q3,iqr
+0,hallo,9.067572402803685e-07,3.7112208955684736e-07,4.9074362859106e-08,0.0,5.638338344786982e-06,0.0,1.517606246384925e-07,1.517606246384925e-07
+1,hello,1.2565752685195384e-07,1.061839190423912e-07,9.166146648047808e-08,0.0,6.695058151048475e-07,0.0,1.4409192620031979e-07,1.4409192620031979e-07
--- a/science/ngrams/comparison/hallo,
+++ b/science/ngrams/comparison/hallo,
--- a/science/ngrams/comparison/metropolis,
+++ b/science/ngrams/comparison/metropolis,
--- a/science/ngrams/comparison/metropolis,
+++ b/science/ngrams/comparison/metropolis,
--- a/science/ngrams/comparison/poop,
+++ b/science/ngrams/comparison/poop,
--- a/science/ngrams/comparison/poop,
+++ b/science/ngrams/comparison/poop,
--- a/science/ngrams/comparison/shuffle,
+++ b/science/ngrams/comparison/shuffle,
--- a/science/ngrams/comparison/shuffle,
+++ b/science/ngrams/comparison/shuffle,
--- a/science/ngrams/comparison/spectacle,
+++ b/science/ngrams/comparison/spectacle,
--- a/science/ngrams/comparison/spectacle,
+++ b/science/ngrams/comparison/spectacle,
--- a/science/ngrams/popularity/hallo
+++ b/science/ngrams/popularity/hallo
--- a/science/ngrams/popularity/popularity.py
+++ b/science/ngrams/popularity/popularity.py
@ -6,13 +6,27 @@ import statistics
 import os
 import pandas

+def yearInput(text):
+	while True:
+		try:
+			x = int(input(text + '\n'))
+		except ValueError:
+			print('You must input an integer!\n')
+		else:
+			if x > 2019 or x < 1500:
+				print('The year inputted is out of range. It must be between 1500 and 2019!')
+			else:
+				return x
+
 pandas.options.display.float_format = '{:.10f}'.format

+startYear = yearInput('Please enter the start year!')
+endYear = yearInput('Please enter the end year!')
+yearSpan = '%i-%i' % (startYear, endYear)
+years = range(startYear, endYear + 1)
+
 word = input('Please enter a word to research!\n')
 print()
-startYear = 1800
-endYear = 2019
-years = range(startYear, endYear + 1)

 response = requests.get('https://books.google.com/ngrams/json?content=%s&year_start=%s&year_end=%s&corpus=26&smoothing=3' % (word, startYear, endYear))
 data = json.loads(response.content)[0]
@ -39,4 +53,4 @@ graph.plot(years, points)
 graph.plot(years, m * years + b)
 graph.title(frame['word'])
 graph.ticklabel_format(style='plain')
-graph.savefig('%s/%s.png' % (os.path.dirname(os.path.realpath(__file__)), word), dpi=100)
+graph.savefig('%s/%s - %s.png' % (os.path.dirname(os.path.realpath(__file__)), word, yearSpan), dpi=100)
--- a/science/ngrams/popularity/terrified.png
+++ b/science/ngrams/popularity/terrified.png