.CSV
from pathlib import Path
import csv
path = Path('sitka_weather_07-2021_simple.csv')
lines = path.read_text().splitlines()
reader = csv.reader(lines)
header_row = next(reader)
print(header_row)
for index, column_header in enumerate(header_row):
print(index,column_header)
['STATION', 'NAME', 'DATE', 'TAVG', 'TMAX', 'TMIN']
0 STATION
1 NAME
2 DATE
3 TAVG
4 TMAX
5 TMIN
Extracting and Reading Data
from pathlib import Path
import csv
path = Path('sitka_weather_07-2021_simple.csv')
lines = path.read_text().splitlines()
reader = csv.reader(lines)
header_row = next(reader)
highs = []
for row in reader:
high = int(row[4])
highs.append(high)
print(highs)
[61, 60, 66, 60, 65, 59, 58, 58, 57, 60, 60, 60, 57, 58, 60, 61, 63, 63,
70, 64, 59, 63, 61, 58, 59, 64, 62, 70, 70, 73, 66]
from matplotlib import pyplot as plt
# Plot the high temperatures.
plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(highs, c='red')
# Format plot.
ax.set_title("Daily high temperatures, July 2021", fontsize=24)
ax.set_xlabel('', fontsize=16)
ax.set_ylabel("Temperature (F)", fontsize=16)
ax.tick_params(labelsize=16)
#plt.show()
plt.savefig('simple.jpg',dpi=300)
The datetime Module
from datetime import datetime
first_date = datetime.strptime('2021-7-1', '%Y-%m-%d')
print(type(first_date))
print(first_date.strftime('%B %d %Y'))
print(first_date)
<class 'datetime.datetime'>
July 01 2021
2021-07-01 00:00:00
%A Weekday name, such as Monday
%B Month name, such as January
%m Month, as a number (01 to 12)
%d Day of the month, as a number (01 to 31)
%Y Four-digit year, such as 2015
%y Two-digit year, such as 15
%H Hour, in 24-hour format (00 to 23)
%I Hour, in 12-hour format (01 to 12)
%p am or pm
%M Minutes (00 to 59) %S Seconds (00 to 61)
from pathlib import Path
import csv
from datetime import datetime
from matplotlib import pyplot as plt
path = Path('sitka_weather_07-2021_simple.csv')
lines = path.read_text().splitlines()
reader = csv.reader(lines)
header_row = next(reader)
dates, highs = [],[]
for row in reader:
current_date = datetime.strptime(row[2], "%Y-%m-%d")
dates.append(current_date)
high = int(row[4])
highs.append(high)
plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(dates, highs, c='red')
# Format plot.
ax.set_title("Daily high temperatures, July 2021", fontsize=24)
ax.set_xlabel('', fontsize=16)
fig.autofmt_xdate()
ax.set_ylabel("Temperature (F)", fontsize=16)
ax.tick_params(labelsize=16)
plt.savefig('simple.jpg',dpi=300)
from pathlib import Path
import csv
from datetime import datetime
from matplotlib import pyplot as plt
path = Path('sitka_weather_2021_simple.csv')
lines = path.read_text().splitlines()
reader = csv.reader(lines)
header_row = next(reader)
dates, highs, lows= [], [], []
for row in reader:
current_date = datetime.strptime(row[2], "%Y-%m-%d")
dates.append(current_date)
high = int(row[4])
highs.append(high)
low = int(row[5])
lows.append(low)
# Plot data.
plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(dates, highs, c='red', alpha=0.5)
ax.plot(dates, lows, c='blue', alpha=0.5)
ax.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)
# Format plot.
ax.set_title("Daily high and low temperatures - 2021", fontsize=24)
ax.set_xlabel('', fontsize=16)
fig.autofmt_xdate()
ax.set_ylabel("Temperature (F)", fontsize=16)
ax.tick_params(labelsize=16)
plt.savefig('simple.jpg',dpi=300)
from pathlib import Path
import csv
from datetime import datetime
from matplotlib import pyplot as plt
path = Path('death_valley_2021_simple.csv')
lines = path.read_text().splitlines()
reader = csv.reader(lines)
header_row = next(reader)
dates, highs, lows= [], [], []
# continue
for row in reader:
try:
current_date = datetime.strptime(row[2], "%Y-%m-%d")
high = int(row[3])
low = int(row[4])
except ValueError:
print(current_date, 'missing data')
else:
dates.append(current_date)
highs.append(high)
lows.append(low)
2021-05-04 00:00:00 missing data
.JSON
from pathlib import Path
from datetime import datetime
import json
path = Path('btc_close_2017.json')
contents = path.read_text()
btc_data = json.loads(contents)
date=[]; close=[]; months=[]
for btc_dict in btc_data:
date.append(datetime.strptime(btc_dict['date'], "%Y-%m-%d"))
months.append(int(btc_dict['month']))
close.append(int(float(btc_dict['close'])))
import matplotlib.pyplot as plt
plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(date,close, linewidth=0.5, c='red')
ax.scatter(date,close, s=5, c='red')
ax.set_title('Close',fontsize=10)
fig.autofmt_xdate()
plt.savefig('close.jpg',dpi=300)
Many types of objects in Python that qualify as being iterable (可迭代的), such as list, tuple, dictionary. In Python, the mechanism for iteratioin is based upon the following conventions:
import itertools
nums = itertools.count(0,2)
print(next(nums))
print(next(nums))
print(next(nums))
0
2
4
import itertools
nums = itertools.count(0,2)
for i in nums:
if i > 6:
break
print(i)
0
2
4
6
import itertools
cycle_strings = itertools.cycle('ABC')
i = 1
for string in cycle_strings:
if i == 7:
break
print(string)
i = i + 1
A
B
C
A
B
C
import itertools
for item in itertools.repeat('hello', 3):
print(item)
hello world
hello world
hello world
import itertools
nums = itertools.repeat('hello', 3)
print(next(nums))
print(next(nums))
print(next(nums))
print(next(nums))
hello
hello
hello
Traceback (most recent call last):
print(next(nums))
StopIteration
groupby()
from itertools import groupby
for key, value_iter in groupby('aaabbbaaccd'):
print(key, ':', list(value_iter))
a : ['a', 'a', 'a']
b : ['b', 'b', 'b']
a : ['a', 'a']
c : ['c', 'c']
d : ['d']
from itertools import groupby
data = ['a', 'bb', 'ccc', 'dd', 'eee', 'f']
for key, value_iter in groupby(data, len):
print(key, ':', list(value_iter))
1 : ['a']
2 : ['bb']
3 : ['ccc']
2 : ['dd']
3 : ['eee']
1 : ['f']
from itertools import groupby
data = ['a', 'bb', 'cc', 'ddd', 'eee', 'f']
for key, value_iter in groupby(data, len):
print(key, ':', list(value_iter))
1 : ['a']
2 : ['bb', 'cc']
3 : ['ddd', 'eee']
1 : ['f']
zip()
>>> a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b)
>>> zipped
#iterator
>>> list(zipped)
[(1, 4), (2, 5), (3, 6)]
>>> list(zip(a,c))
[(1, 4), (2, 5), (3, 6)]
>>> a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b)
>>> list(zip(*zipped))
[(1, 2, 3), (4, 5, 6)]
>>> zipped = zip(a,b)
>>> x,y = zip(*zipped)
>>> print(x)
(1,2,3)
from itertools import groupby
xy_map = []
for x, y in groupby(zip(months, close), lambda w: w[0]):
y_list = []
for first, second in y:
y_list.append(second)
xy_map.append([x, sum(y_list) / len(y_list)])
x_unique, y_mean = zip(*xy_map)
import matplotlib.pyplot as plt
plt.style.use('seaborn')
fig, ax = plt.subplots()
ax.plot(x_unique, y_mean, linewidth=1)
ax.scatter(x_unique, y_mean, s=20)
ax.set_title('Close',fontsize=10)
plt.savefig('close.jpg',dpi=300)