A string is a sequence of characters. The elements of a string are characters. Empty string ''.(not' ') You can access the characters one at a time with the bracket operator.
>>> fruit = 'banana'
>>> fruit[1]
'a'
>>> len(fruit)
6
>>> fruit = 'banana'
>>> fruit[1:3]
'an'
>>> fruit[3:]
'ana'
in operator
>>> print('a' in 'banana')
True
>>> print('seed' in 'banana')
False
>>> print('ana' not in 'banana')
False
Iteration
fruit = 'banana'
for char in fruit:
print(char)
Strings are immutable (similar to Tuples)
>>> greeting = 'Hello, world!'
>>> greeting[0] = 'J'
TypeError: 'str' object does not
support item assignment
>>> greeting = 'Hello, world!'
>>> new_greeting = 'J' + greeting[1:]
>>> print(new_greeting)
Jello, world!
Comparison operations are useful for putting words in alphabetical order.
>>> print('apple'>'banana')
False
>>> print('ba' > 'banana')
False
>>> a_list = ["orange", "apple", "banana"]
>>> sorted(a_list)
['apple', 'banana', 'orange']
String's methods do not change the original variable but return values.
>>> name = "ada lovelace"
>>> print(name.title())
Ada Lovelace
>>> print(name)
ada lovelace
>>> name = "Ada Lovelace"
>>> print(name.upper())
ADA LOVELACE
>>> print(name.lower())
ada lovelace
>>> favorite_language = ' python '
>>> favorite_language.rstrip()
' python'
>>> favorite_language.lstrip()
'python '
>>> favorite_language.strip()
'python'
>>> favorite_language = 'python '
>>> favorite_language = favorite_language.rstrip()
>>> favorite_language
'python'
.find() searches for the position of a string in another string
>>> word = 'banana'
>>> index = word.find('a')
>>> print(index)
1
>>> word.find('na')
2
>>> word.find('na', 3)
4
>>> data = 'From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008'
>>> atpos = data.find('@')
>>> print(atpos)
21
>>> sppos = data.find(' ',atpos)
>>> print(sppos)
31
>>> host = data[atpos+1 : sppos]
>>> print(host)
uct.ac.za
# 如果没有找到,则会得到 -1
.split() breaks a sentence into words and make a list
>>> s = 'break a sentence into words'
>>> t = s.split()
>>> print(t)
['break', 'a', 'sentence', 'into', 'words']
.isalpha() returns True if all characters in the string are alphabetic (A–Z, a–z), otherwise False.
.isspace() returns True if the string only contains whitespace characters (spaces, tabs, newlines), otherwise False.
>>> print("Hello".isalpha()) # True(全是字母)
>>> print("Hello123".isalpha()) # False(包含数字)
>>> print("你好".isalpha()) # True(支持中文等非拉丁字符)
>>> print(" ".isspace()) # True(全是空格)
>>> print("\t\n".isspace()) # True(制表符和换行符也算空白)
>>> print("Hello".isspace()) # False(包含字母)
>>> number = 42
>>> print('I have spotted number camels.') #error.
>>> print('I have spotted '+str(number)+' camels.') #not simple
>>> number = 42
>>> print(f'I have spotted {number} camels.')
I have spotted 42 camels.
>>> animal = 'camels'
>>> number = 42.12345678
>>> print(f'I spotted {number:.2f} {animal}.')
I spotted 42.12 camels.
>>> print(f'I spotted {number:.0f} {animal}.')
I spotted 42 camels.
>>> print(f'I spotted {number:.2%} {animal}.''')
I spotted 4212.35% camels.
The elements of a string are characters. Empty string ''
Features: Ordered, Repeatable, Immutable
Index and slice are the same with that of tuples.
in operator shows the boolean value for whether a string contains a given string.
You can compare two strings in Alphabetical order.
.upper(), lower(), .title()
rstrip(), .lstrip(), .strip()
.find(), .split(), .isalpha(), isspace()
Formatted String Literals
如下代码读取了包含电影评论的txt文件,并以字符串形式赋值给contents变量。
from pathlib import Path
path = Path('comment.txt')
contents = path.read_text()
# 敏感词列表如下:
words = ["bad", "boring", "awful", "terrible"]
#转化小写
contents = contents.lower()
# 去除标点(只保留字母和空格)
cleaned_text = ""
for char in contents:
if char.isalpha() or char.isspace():
cleaned_text += char # cleaned_text = cleaned_text + char
情感分析(Sentiment Analysis),也称为情绪分析或意见挖掘,广泛用于各种行业和场景,帮助企业和个人理解文本中的情感倾向(正面、负面或中性)。以下是几个重要的应用场景:产品评论分析, 市场调研, 舆情监测, 股票市场预测。
简约版的情感分析和上述练习类似:先对文本进行标准化处理,接着统计正负词出现的次数,最后根据规则(如正负词多少)判断整体情感。
# 1. 加载成语词典
from pathlib import Path
path = Path('idiom_dictionary.txt')
contents = path.read_text()
lines = contents.splitlines()
d_game={}
for line in lines:
if line!="":
endpoint=line.find("拼音")
idiom = line[:endpoint].strip()
pinyin_start = line.find(":", endpoint)
pinyin_end =line.find("释义")
each= line[pinyin_start+1: pinyin_end]
pinyin_list = each.split()
d_game[idiom] = pinyin_list
# print(len(d_game))
# 2. 给定一个成语,找到可以接上的所有成语
idiom = input("请输入第一个成语\n")
char_4th = d_game[idiom][-1]
for x, y in d_game.items():
if char_4th == y[0]:
print(x)
# 3. 从一个给定的成语开始,一直接下去,到不能接下去为止。
idiom = input("请输入第一个成语\n")
enter=""
while enter!="q":
char_4th = d_game[idiom][-1]
for x, y in d_game.items():
if char_4th == y[0]:
idiom = x
print(idiom)
break
enter=input("continue?")
# 谐音取词
import unicodedata #加上第2行和第12行
d_game={}
for line in lines:
if line!="\n":
endpoint=line.find("拼音")
idiom=line[:endpoint].strip()
pinyin_start=line.find(":", endpoint)
pinyin_end=line.find("释义")
each=line[pinyin_start+1: pinyin_end]
each=unicodedata.normalize('NFKD',each).encode('ascii','ignore').decode()
pinyin_list=each.split()
d_game[idiom]=pinyin_list
from pathlib import Path
path = Path('idiom_dictionary.txt')
contents = path.read_text()
lines = contents.splitlines()
d_game={}
for line in lines:
if line!="":
endpoint=line.find("拼音")
idiom = line[:endpoint].strip()
pinyin_start = line.find(":", endpoint)
pinyin_end =line.find("释义")
each= line[pinyin_start+1: pinyin_end]
pinyin_list = each.split()
d_game[idiom] = pinyin_list