.CSV
import csv
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f) #iterator
header_row = next(reader)
print(header_row)
for index, column_header in enumerate(header_row): #enumerate
print(index, column_header)
['STATION', 'NAME', 'DATE', 'PRCP', 'TAVG', 'TMAX', 'TMIN']
0 STATION
1 NAME
2 DATE
3 PRCP
4 TAVG
5 TMAX
6 TMIN
Extracting and Reading Data
import csv
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
print(header_row)
for index, column_header in enumerate(header_row):
print(index, column_header)
highs = []
for row in reader:
high = int(row[5])
highs.append(high)
print(highs)
[62, 58, 70, 70, 67, 59, 58, 62, 66, 59, 56, 63, 65, 58, 56, 59, 64, 60,
60, 61, 65, 65, 63, 59, 64, 65, 68, 66, 64, 67, 65]
from matplotlib import pyplot as plt
# Plot the high temperatures.
plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(highs, c='red')
# Format plot.
ax.set_title("Daily high temperatures - 2018-7", fontsize=24)
ax.set_xlabel('', fontsize=16)
ax.set_ylabel("Temperature (F)", fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=16)
#plt.show()
plt.savefig('simple.jpg',dpi=300)
The datetime Module
from datetime import datetime
first_date = datetime.strptime('2018-7-1', '%Y-%m-%d')
print(type(first_date))
print(first_date.strftime('%B %d %Y'))
print(first_date)
<class 'datetime.datetime'>
July 01 2018
2018-07-01 00:00:00
%A Weekday name, such as Monday
%B Month name, such as January
%m Month, as a number (01 to 12)
%d Day of the month, as a number (01 to 31)
%Y Four-digit year, such as 2015
%y Two-digit year, such as 15
%H Hour, in 24-hour format (00 to 23)
%I Hour, in 12-hour format (01 to 12)
%p am or pm
%M Minutes (00 to 59) %S Seconds (00 to 61)
import csv
from datetime import datetime
from matplotlib import pyplot as plt
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
dates, highs= [], []
for row in reader:
current_date = datetime.strptime(row[2], "%Y-%m-%d")
dates.append(current_date)
high = int(row[5])
highs.append(high)
plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(dates, highs, c='red')
# Format plot.
ax.set_title("Daily high temperatures - 2018-7", fontsize=24)
ax.set_xlabel('', fontsize=16)
fig.autofmt_xdate()
ax.set_ylabel("Temperature (F)", fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=16)
plt.savefig('simple.jpg',dpi=300)
import csv
from datetime import datetime
from matplotlib import pyplot as plt
filename = 'sitka_weather_2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
dates, highs, lows= [], [], []
for row in reader:
current_date = datetime.strptime(row[2], "%Y-%m-%d")
dates.append(current_date)
high = int(row[5])
highs.append(high)
low = int(row[6])
lows.append(low)
# Plot data.
plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(dates, highs, c='red', alpha=0.5)
ax.plot(dates, lows, c='blue', alpha=0.5)
ax.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)
# Format plot.
ax.set_title("Daily high temperatures - 2018", fontsize=24)
ax.set_xlabel('', fontsize=16)
fig.autofmt_xdate()
ax.set_ylabel("Temperature (F)", fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=16)
plt.show()
import csv
from datetime import datetime
from matplotlib import pyplot as plt
filename = 'death_valley_2018_simple.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
dates, highs, lows = [], [], []
# continue
for row in reader:
try:
current_date = datetime.strptime(row[2], "%Y-%m-%d")
high = int(row[4])
low = int(row[5])
except ValueError:
print(current_date, 'missing data')
else:
dates.append(current_date)
highs.append(high)
lows.append(low)
# Plot data.
#--snip--
.JSON
import json
filename = 'btc_close_2017.json'
with open(filename) as f:
btc_data = json.load(f)
date=[]; close=[]; months=[]
for btc_dict in btc_data:
date.append(btc_dict['date'])
months.append(int(btc_dict['month']))
close.append(int(float(btc_dict['close'])))
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
plt.style.use('seaborn')
ax.plot(close, linewidth=0.5, c='red')
ax.scatter(date,close, s=5, c='red')
ax.set_xticks(date[::20])
ax.set_xticklabels(date[::20],rotation=45,fontsize=6)
ax.set_title('Close',fontsize=10)
plt.savefig('close.jpg',dpi=300)
Many types of objects in Python that qualify as being iterable (可迭代的), such as list, tuple, dictionary. In Python, the mechanism for iteratioin is based upon the following conventions:
import itertools
nums = itertools.count(0,2)
print(next(nums))
print(next(nums))
print(next(nums))
0
2
4
import itertools
nums = itertools.count(0,2)
for i in nums:
if i > 6:
break
print(i)
0
2
4
6
import itertools
cycle_strings = itertools.cycle('ABC')
i = 1
for string in cycle_strings:
if i == 7:
break
print(string)
i = i + 1
A
B
C
A
B
C
import itertools
for item in itertools.repeat('hello', 3):
print(item)
hello world
hello world
hello world
import itertools
nums = itertools.repeat('hello', 3)
print(next(nums))
print(next(nums))
print(next(nums))
print(next(nums))
hello
hello
hello
Traceback (most recent call last):
print(next(nums))
StopIteration
groupby()
from itertools import groupby
for key, value_iter in groupby('aaabbbaaccd'):
print(key, ':', list(value_iter))
a : ['a', 'a', 'a']
b : ['b', 'b', 'b']
a : ['a', 'a']
c : ['c', 'c']
d : ['d']
from itertools import groupby
data = ['a', 'bb', 'ccc', 'dd', 'eee', 'f']
for key, value_iter in groupby(data, len):
print(key, ':', list(value_iter))
1 : ['a']
2 : ['bb']
3 : ['ccc']
2 : ['dd']
3 : ['eee']
1 : ['f']
from itertools import groupby
data = ['a', 'bb', 'cc', 'ddd', 'eee', 'f']
for key, value_iter in groupby(data, len):
print(key, ':', list(value_iter))
1 : ['a']
2 : ['bb', 'cc']
3 : ['ddd', 'eee']
1 : ['f']
zip()
>>> a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b)
>>> zipped
#iterator
>>> list(zipped)
[(1, 4), (2, 5), (3, 6)]
>>> list(zip(a,c))
[(1, 4), (2, 5), (3, 6)]
>>> a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b)
>>> list(zip(*zipped))
[(1, 2, 3), (4, 5, 6)]
>>> zipped = zip(a,b)
>>> x,y = zip(*zipped)
>>> print(x)
(1,2,3)
from itertools import groupby
import matplotlib.pyplot as plt
xy_map = []
for x, y in groupby(sorted(zip(months, close)), lambda w: w[0]):
y_list = []
for first, second in y:
y_list.append(second)
xy_map.append([x, sum(y_list) / len(y_list)])
x_unique, y_mean = zip(*xy_map)
fig, ax = plt.subplots()
plt.style.use('seaborn')
ax.plot(x_unique, y_mean, linewidth=1)
ax.scatter(x_unique, y_mean, s=20)
ax.set_title('Close',fontsize=10)
plt.savefig('close.jpg',dpi=300)