Python Programming

Lecture 12 Downloading Data, API

12.1 Downloading Data (2)

.JSON



import json

filename = 'btc_close_2017.json'
with open(filename) as f:
    btc_data = json.load(f)  

for btc_dict in btc_data:
    date = btc_dict['date']
    month = int(btc_dict['month'])
    week = int(btc_dict['week'])
    weekday = btc_dict['weekday']
    close = int(float(btc_dict['close']))  
    print("{} is month {} week {}, {}, the close price is {} RMB".format(
        date, month, week, weekday, close))

2017-01-01 is month 1 week 52, Sunday, the close price is 6928 RMB
2017-01-02 is month 1 week 1, Monday, the close price is 7070 RMB
2017-01-03 is month 1 week 1, Tuesday, the close price is 7175 RMB
2017-01-04 is month 1 week 1, Wednesday, the close price is 7835 RMB
2017-01-05 is month 1 week 1, Thursday, the close price is 6928 RMB
...



line_chart = pygal.Line(x_label_rotation=20, show_minor_x_labels=False) 
line_chart.title = '收盘价(¥)'
line_chart.x_labels = dates
N = 20  # X-axis shows the date for every 20 days 
line_chart.x_labels_major = dates[::N]
line_chart.add('收盘价', close)
line_chart.render_to_file('收盘价折线图(¥).svg')

line_chart = pygal.Line(x_label_rotation=20, show_minor_x_labels=False)
line_chart.title = '收盘价对数变换(¥)'
line_chart.x_labels = dates
N = 20
line_chart.x_labels_major = dates[::N]
close_log = [math.log10(xx) for xx in close]
line_chart.add('log收盘价', close_log)
line_chart.render_to_file('收盘价对数变换折线图(¥).svg')

itertools


import itertools

nums = itertools.count(0,2)
for i in nums:
    if i > 6:
        break
    print(i, end = " ")
#output
0 2 4 6 

import itertools

cycle_strings = itertools.cycle('ABC')
i = 1
for string in cycle_strings:
    if i == 7:
        break
    print(i, string)
    i += 1
#output
1 A
2 B
3 C
4 A
5 B
6 C
  • We have learned generator.(next(), for)

  • 
    g = (x * x for x in range(10))
    
    def fib(max):
        n, a, b = 0, 0, 1
        while n < max:
            yield b
            a, b = b, a + b
            n = n + 1
        return 'done'
    
  • Iterator is a more general concept: any object whose class has a next method (__next__ in Python 3) and an __iter__ method.

  • Every generator is an iterator, but not vice versa.

  • An iterator can only be traversed once.

  • Iterator and iterable are two different concepts


import itertools

for item in itertools.repeat('hello world', 3):
    print(item)

hello world
hello world
hello world

groupby()


from itertools import groupby

for key, value_iter in groupby('aaabbbaaccd'):
    print(key, ':', list(value_iter))

a : ['a', 'a', 'a']
b : ['b', 'b', 'b']
a : ['a', 'a']
c : ['c', 'c']
d : ['d']

from itertools import groupby

data = ['a', 'bb', 'ccc', 'dd', 'eee', 'f']
for key, value_iter in groupby(data, len):  
    print(key, ':', list(value_iter))

1 : ['a']
2 : ['bb']
3 : ['ccc']
2 : ['dd']
3 : ['eee']
1 : ['f']

from itertools import groupby

data = ['a', 'bb', 'cc', 'ddd', 'eee', 'f']
for key, value_iter in groupby(data, len):  
    print(key, ':', list(value_iter))

1 : ['a']
2 : ['bb', 'cc']
3 : ['ddd', 'eee']
1 : ['f']

zip()


>>> a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b)
>>> zipped
 #返回的是一个对象(iterator)
>>> list(zipped)
[(1, 4), (2, 5), (3, 6)] #使用list()函数转换为列表
>>> list(zip(a,c))
[(1, 4), (2, 5), (3, 6)]

>>> a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b)
>>> list(zip(*zipped)) #解压也使用list进行转换
[(1, 2, 3), (4, 5, 6)]
>>> zipped = zip(a,b)
>>> x,y = zip(*zipped)
>>> print(x)
(1,2,3)

from itertools import groupby

def draw_line(x_data, y_data, title, y_legend):
    xy_map = []
    for x, y in groupby(sorted(zip(x_data, y_data)), key=lambda _: _[0]):  
        y_list = [v for _, v in y]
        xy_map.append([x, sum(y_list) / len(y_list)])  
    x_unique, y_mean = zip(*xy_map) # [*zip(*xy_map)] 
    line_chart = pygal.Line()
    line_chart.title = title
    line_chart.x_labels = x_unique
    line_chart.add(y_legend, y_mean)
    line_chart.render_to_file(title + '.svg')
    return line_chart

idx_month = dates.index('2017-12-01') #list.index()
line_chart_month = draw_line(
    months[:idx_month], close[:idx_month], '收盘价月日均值(¥)', '月日均值')

12.2 API

GitHub

https://api.github.com/search/repositories?q=language:python&sort=stars


url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'

import requests

# Make an API call and store the response.
r = requests.get(url)
print("Status code:", r.status_code)
# Store API response in a variable.
response_dict = r.json()

print(response_dict.keys())

Status code: 200
dict_keys(['total_count', 'incomplete_results', 'items'])
  • Working with the Response Dictionary

import requests
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
# Make an API call and store the response.
r = requests.get(url)
print("Status code:", r.status_code)
# Store API response in a variable.
response_dict = r.json()

print("Total repositories:", response_dict['total_count'])
# Explore information about the repositories.
repo_dicts = response_dict['items']
print("Repositories returned:", len(repo_dicts))

# Examine the first repository.
repo_dict = repo_dicts[0]
print("\nKeys:", len(repo_dict))
for key in sorted(repo_dict.keys()):
    print(key)

print("\nSelected information about first repository:")
print('Name:', repo_dict['name'])
print('Owner:', repo_dict['owner']['login'])
print('Stars:', repo_dict['stargazers_count'])
print('Repository:', repo_dict['html_url'])
print('Created:', repo_dict['created_at'])
print('Updated:', repo_dict['updated_at'])
print('Description:', repo_dict['description'])

Status code: 200
Total repositories: 3675320
Repositories returned: 30

Selected information about first repository:
Name: public-apis
Owner: toddmotto
Stars: 57212
Repository: https://github.com/toddmotto/public-apis
Created: 2016-03-20T23:49:42Z
Updated: 2019-05-22T09:34:50Z
Description: A collective list of free APIs for use in software and web development.
  • Visualizing Repositories Using Pygal

import requests
import pygal
from pygal.style import LightColorizedStyle as LCS, LightenStyle as LS

URL = 'https://api.github.com/search/repositories?q=language:python&sort=star'
r = requests.get(URL)
print("Status code:", r.status_code)
response_dict = r.json()
print("Total repositories:", response_dict['total_count'])
repo_dicts = response_dict['items']

names, stars = [], []
for repo_dict in repo_dicts:
    names.append(repo_dict['name'])
    stars.append(repo_dict['stargazers_count'])
# Make visualization.
my_style = LS('#333366', base_style=LCS)
chart = pygal.Bar(style=my_style, x_label_rotation=45, show_legend=False)
chart.title = 'Most-Starred Python Projects on GitHub'
chart.x_labels = names

chart.add('', stars)
chart.render_to_file('python_repos.svg')

{
"by":"nns",
"descendants":297,
"id":9884165,
"kids":[9885099,9884723,9885165,9884789,9885604,9884137,
9886151,9885220,9885790,9884661,9885844,9885029,9884817,
9887342,9884545,9884372,9884499,9884881,9884109,9886496,
9884342,9887832,9885023,9884334,9884707,9887008,9885348,
9885131,9887539,9885880,9884196,9884640,9886534,9885152],
"score":558,
"time":1436875181,
"title":"New Horizons: Nasa spacecraft speeds past Pluto",
"type":"story",
"url":"http://www.bbc.co.uk/news/science-environment-33524589"
}

import requests
from operator import itemgetter
# Make an API call and store the response.
url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
r = requests.get(url)
print("Status code:", r.status_code)
# Process information about each submission.

submission_ids = r.json()
submission_dicts = []


for submission_id in submission_ids[:5]:
# Make a separate API call for each submission.
    url = ('https://hacker-news.firebaseio.com/v0/item/' +
        str(submission_id) + '.json')
    submission_r = requests.get(url)
    print(submission_r.status_code)
    response_dict = submission_r.json()
    submission_dict = {
        'title': response_dict['title'],
        'link': 'http://news.ycombinator.com/item?id=' + str(submission_id),
        'comments': response_dict.get('descendants', 0)
        }
    submission_dicts.append(submission_dict)    

submission_dicts = sorted(submission_dicts, key=itemgetter('comments'),reverse=True)
for submission_dict in submission_dicts:
    print("\nTitle:", submission_dict['title'])
    print("Discussion link:", submission_dict['link'])
    print("Comments:", submission_dict['comments'])

Working with APIs


import requests

url = "http://t.weather.sojson.com/api/weather/city/101020100"
r = requests.get(url)
print(r.status_code)

response_dict = r.json()
f = response_dict['data']
ff = f['forecast']
ff_today = ff[0]
ff_1 = ff[1]
ff_2 = ff[2]

def show(day):
    for x in day:
        print(x +': ' + str(day[x]))
    print('\n')
show(ff_today)
show(ff_1)
show(ff_2)

Turing Robot


import requests     
    
KEY = '' # 这里填拿到的key    
    
def get_response(msg):    
    apiUrl = 'http://www.tuling123.com/openapi/api'    
    data = {    
        'key'    : KEY,    
        'info'   : msg,    
        'userid' : '路老师',    
    }    
    try:    
        r = requests.post(apiUrl, data=data).json()    
        return r.get('text')    
    except:    
        return None
print(get_response('你好'))

12.3 Wordcloud & Encoding


from matplotlib import pyplot as plt
from wordcloud import WordCloud
 
string = 'Importance of relative word frequencies for font-size. 
With relative_scaling=0, only word-ranks are considered. With 
relative_scaling=1, a word that is twice as frequent will have 
twice the size. If you want to consider the word frequencies 
and not only their rank, relative_scaling around .5 often looks good.'
font = r'C:\Windows\Fonts\Arial.TTF'
wc = WordCloud(font_path=font, #如果是中文必须要添加这个,否则会显示成框框
               background_color='white',
               width=1000,
               height=800,
               ).generate(string)
wc.to_file('ss.png') #保存图片
plt.imshow(wc)  #用plt显示图片
plt.axis('off') #不显示坐标轴
plt.show() #显示图片

Character encoding Encoding

  • ASCII
  • Unicode
  • UTF-8
  • Python Crash Couse (Chapters we do not cover: Chapter 12 - 14, 18 - 20)

    • Chapter 12 -14: Alien Invasion

    • Chapter 18 - 20: Django

  • Python for Everybody (Chapters we do not cover: Chapter 11 - 13, 15 - 16)

    • Chapter 11: Regualer Expressions

    • Chapter 12: Networked Programs 12.4 - 12.8 (urlib, BeautifulSoup)

    • Chapter 13: Using Web Services (XML, JSON, API)

    • Chapter 15: Databases and SQL

    • Chapter 16: Visualizing data (Network, Word Cloud)

Summary

  • Downloading Data
  • API
  • Wordcloud
    • Reading: Python Crash Course, Chapter 16, 17