Allow years to be input on ngrams
|
@ -6,13 +6,27 @@ import numpy
|
||||||
import os
|
import os
|
||||||
import pandas
|
import pandas
|
||||||
|
|
||||||
|
def yearInput(text):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
x = int(input(text + '\n'))
|
||||||
|
except ValueError:
|
||||||
|
print('You must input an integer!\n')
|
||||||
|
else:
|
||||||
|
if x > 2019 or x < 1500:
|
||||||
|
print('The year inputted is out of range. It must be between 1500 and 2019!')
|
||||||
|
else:
|
||||||
|
return x
|
||||||
|
|
||||||
pandas.options.display.float_format = '{:.10f}'.format
|
pandas.options.display.float_format = '{:.10f}'.format
|
||||||
|
|
||||||
|
startYear = yearInput('Please enter the start year!')
|
||||||
|
endYear = yearInput('Please enter the end year!')
|
||||||
|
yearSpan = '%i-%i' % (startYear, endYear)
|
||||||
|
years = range(startYear, endYear + 1)
|
||||||
|
|
||||||
words = input('Please enter a list of words. Separate each word with a comma (:\n')
|
words = input('Please enter a list of words. Separate each word with a comma (:\n')
|
||||||
print()
|
print()
|
||||||
startYear = 1800
|
|
||||||
endYear = 2019
|
|
||||||
years = range(startYear, endYear + 1)
|
|
||||||
|
|
||||||
response = requests.get('https://books.google.com/ngrams/json?content=%s&year_start=%s&year_end=%s&corpus=26&smoothing=3' % (words, startYear, endYear))
|
response = requests.get('https://books.google.com/ngrams/json?content=%s&year_start=%s&year_end=%s&corpus=26&smoothing=3' % (words, startYear, endYear))
|
||||||
data = json.loads(response.content)
|
data = json.loads(response.content)
|
||||||
|
@ -52,7 +66,7 @@ wordList = ', '.join([f['word'] for f in frames])
|
||||||
while True:
|
while True:
|
||||||
toSave = input('Would you like to save this data frame in a CSV? (y/n)')
|
toSave = input('Would you like to save this data frame in a CSV? (y/n)')
|
||||||
if toSave == 'y':
|
if toSave == 'y':
|
||||||
df.to_csv('%s/%s.csv' % (dirName, wordList))
|
df.to_csv('%s/%s - %s.csv' % (dirName, wordList, yearSpan))
|
||||||
break
|
break
|
||||||
if toSave == 'n':
|
if toSave == 'n':
|
||||||
break
|
break
|
||||||
|
@ -63,7 +77,7 @@ while True:
|
||||||
if toSave == 'y':
|
if toSave == 'y':
|
||||||
plt.ticklabel_format(style='plain')
|
plt.ticklabel_format(style='plain')
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.savefig('%s/%s.png' % (dirName, wordList), dpi=100)
|
plt.savefig('%s/%s - %s.png' % (dirName, wordList, yearSpan), dpi=100)
|
||||||
exit()
|
exit()
|
||||||
elif toSave == 'n':
|
elif toSave == 'n':
|
||||||
exit()
|
exit()
|
||||||
|
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
@ -0,0 +1,3 @@
|
||||||
|
,word,stdev,mean,median,mode,range,q1,q3,iqr
|
||||||
|
0,hallo,9.067572402803685e-07,3.7112208955684736e-07,4.9074362859106e-08,0.0,5.638338344786982e-06,0.0,1.517606246384925e-07,1.517606246384925e-07
|
||||||
|
1,hello,1.2565752685195384e-07,1.061839190423912e-07,9.166146648047808e-08,0.0,6.695058151048475e-07,0.0,1.4409192620031979e-07,1.4409192620031979e-07
|
|
After Width: | Height: | Size: 32 KiB |
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 56 KiB |
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
Before Width: | Height: | Size: 51 KiB After Width: | Height: | Size: 51 KiB |
Before Width: | Height: | Size: 52 KiB After Width: | Height: | Size: 52 KiB |
BIN
python/data science/ngrams/popularity/hallo - 1500-1800.png
Normal file
After Width: | Height: | Size: 29 KiB |
|
@ -6,13 +6,27 @@ import statistics
|
||||||
import os
|
import os
|
||||||
import pandas
|
import pandas
|
||||||
|
|
||||||
|
def yearInput(text):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
x = int(input(text + '\n'))
|
||||||
|
except ValueError:
|
||||||
|
print('You must input an integer!\n')
|
||||||
|
else:
|
||||||
|
if x > 2019 or x < 1500:
|
||||||
|
print('The year inputted is out of range. It must be between 1500 and 2019!')
|
||||||
|
else:
|
||||||
|
return x
|
||||||
|
|
||||||
pandas.options.display.float_format = '{:.10f}'.format
|
pandas.options.display.float_format = '{:.10f}'.format
|
||||||
|
|
||||||
|
startYear = yearInput('Please enter the start year!')
|
||||||
|
endYear = yearInput('Please enter the end year!')
|
||||||
|
yearSpan = '%i-%i' % (startYear, endYear)
|
||||||
|
years = range(startYear, endYear + 1)
|
||||||
|
|
||||||
word = input('Please enter a word to research!\n')
|
word = input('Please enter a word to research!\n')
|
||||||
print()
|
print()
|
||||||
startYear = 1800
|
|
||||||
endYear = 2019
|
|
||||||
years = range(startYear, endYear + 1)
|
|
||||||
|
|
||||||
response = requests.get('https://books.google.com/ngrams/json?content=%s&year_start=%s&year_end=%s&corpus=26&smoothing=3' % (word, startYear, endYear))
|
response = requests.get('https://books.google.com/ngrams/json?content=%s&year_start=%s&year_end=%s&corpus=26&smoothing=3' % (word, startYear, endYear))
|
||||||
data = json.loads(response.content)[0]
|
data = json.loads(response.content)[0]
|
||||||
|
@ -39,4 +53,4 @@ graph.plot(years, points)
|
||||||
graph.plot(years, m * years + b)
|
graph.plot(years, m * years + b)
|
||||||
graph.title(frame['word'])
|
graph.title(frame['word'])
|
||||||
graph.ticklabel_format(style='plain')
|
graph.ticklabel_format(style='plain')
|
||||||
graph.savefig('%s/%s.png' % (os.path.dirname(os.path.realpath(__file__)), word), dpi=100)
|
graph.savefig('%s/%s - %s.png' % (os.path.dirname(os.path.realpath(__file__)), word, yearSpan), dpi=100)
|
||||||
|
|
Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 34 KiB |