Python execl operation
Read the contents of the execL worksheet
The XLRD library needs to be installed
import xlrd # Introduce the XLRD library
# import workbook path
xlsx = xlrd.open_workbook('e:/ Summer training attendance list.xlsx ')
# determine which worksheet to read, or index by worksheet name
# table = xlsx.sheet_by_name("表名")
table = xlsx.sheet_by_index(0)
Print the coordinate value of the cell, be careful not to exceed all the contents of the cell, otherwise an error will be reported
print(table.cell_value(1.2))
print(table.cell(1.2).value)
print(table.row(1) [2].value)
Copy the code
Write to the execl worksheet
The XLWT library needs to be installed
# Introduce XLWT library
import xlwt
Create a new workbook
new_workbook = xlwt.Workbook()
Create a new worksheet
worksheet = new_workbook.add_sheet('sheet1')
Write values at the specified coordinates in the worksheet
worksheet.write(0.0.'test')
Save the workbook
new_workbook.save('test.xls')
Copy the code
Write to the execl table with format
Install the Xlutils library and copy the format of an Excel template into a new execl (this library is no longer supported since Py3.5, so I won’t cover it here).
Python operation database
Install the template Pymysql
Connecting to a Database
import pymysql
# connect to the database, the format for: pymysql. Connect (" address ", "user name", "password", "database name", "encoding format")
data = pymysql.connect("127.0.0.1"."user"."123456"."acg", charset='utf8')
Initialize pointer for database query
cursor = data.cursor()
Copy the code
Add, delete, change and check the essence of the execution of SQL statements, the main connection object is the database, and the operation object is the data table
increase
# increase statement
sql = "INSERT INTO users(name, password) VALUES ('Auto', '123456');"
Execute SQL statement
cursor.execute(sql)
Commit after modifying the stored data
data.commit()
# close the connection
data.close()
Copy the code
change
sql = "UPDATE users SET name='Autovy', password='123456' where id='81'"
cursor.execute(sql)
Commit after modifying the stored data
data.commit()
# close the connection
data.close()
Copy the code
check
1. Return a field as an object
# check
sql = "SELECT nr FROM comment where passage_id='129'"
cursor.execute(sql)
Get the data to which the pointer points
result = cursor.fetchall()
Output the tuple data obtained
print(result)
Copy the code
2. Return all contents as objects
Returns a tuple, so cannot be indexed by field
sql = "SELECT * FROM comment where passage_id='129'"
cursor.execute(sql)
result = cursor.fetchall()
print(result[1] [1])
Copy the code
3. Use aggregate function, need to group return
sql = "SELECT sum(id) FROM comment GROUP BY belong_id;"
cursor.execute(sql)
result = cursor.fetchall()
print(result)
Copy the code
delete
# delete
sql = "DELETE FROM users WHERE name='Auto' "
cursor.execute(sql)
Commit after modifying the stored data
data.commit()
# close the connection
data.close()
Copy the code
With XlWriter, OpenPyXL is more flexible with Excel
Other methods of operating Excel
xlwriter
Install the XlWriter template
Note that the w of the new Workbook is capitalized
Write xlsxwriter with the abbreviation Xw
import xlsxwriter as xw
Create a workbook
workbook = xw.Workbook('test.xlsx')
Create a new worksheet
sheet0 = workbook.add_worksheet('sheet0')
# write number
for i in range(0.300):
sheet0.write(0, i, i)
Close and save
workbook.close()
Copy the code
openpyxl
Install the OpenPyXL template
import openpyxl
# Open workbook
workbook = openpyxl.load_workbook('Summer training attendance sheet. XLSX')
# Open the worksheet
sheet0 = workbook['Sheet1']
Write values to cells, using alphanumeric combinations
sheet0['B3'] = '5'
sheet0['B4'] = '7'
# save file
workbook.save('Summer training.xlsx')
Copy the code
Practical case: File name sorted into Excel
OS templates are built into Python and do not require additional installation
Python OS templates handle files and directories
import os
import xlwt
# file path
file_dir = 'C: / Users/linghao/Desktop/programming notes'
Returns a list of specified file names
os.listdir(file_dir)
Create a workbook to store information
new_workbook = xlwt.Workbook()
worksheet = new_workbook.add_sheet('Programming Notes')
Read the contents of the list file_dir one by one with I
n = 0
for i in os.listdir(file_dir):
worksheet.write(n, 0, i)
n += 1
Save the workbook
new_workbook.save('Programming filenames.xls')
Copy the code
Practical example: Load web page judgment elements
import xlrd
from bs4 import BeautifulSoup
import urllib.request
import xlwt
xlsx = xlrd.open_workbook('684--1366.xlsx')
table = xlsx.sheet_by_index(0)
new_workbook = xlwt.Workbook()
worksheet = new_workbook.add_sheet('sheet1')
for i in range(530.683.1):
url = table.cell(i, 7).value
mysite = urllib.request.urlopen("{}".format(url)).read()
soup = BeautifulSoup(mysite, "html5lib")
choose = soup.select('ol')
if(choose):
worksheet.write(i, 0.'A')
new_workbook.save('test.xls')
Copy the code
Python action word
Write the word
Create a new document
from docx import Document
# alignment
from docx.enum.text import WD_ALIGN_PARAGRAPH
# pounds
from docx.shared import Pt
# Chinese format
from docx.oxml.ns import qn
# Introduce time templates
import time
# Enter today's price
price = input('Price:')
# object
company_list = ['Mr..Mr 'B'.'Mr C'.'Mr D']
Localtime (); time.strftime();
today1 = time.strftime("%Y-%m-%d", time.localtime())
# Format into year, month and day.
today = time.strftime("%Y{y}%m{m}%d{d}", time.localtime()).format(y='years', m='month', d='day')
Walk through the list of objects
for i in company_list:
Create a new document
document = Document()
Set the document base font for styles with an S
document.styles['Normal'].font.name = U 'be'
Set the document base Chinese font
document.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), U 'be')
Create the first paragraph initially
p1 = document.add_paragraph()
# set the alignment of the first paragraph. Default is left
p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
Add text to the first paragraph
run1 = p1.add_run("Notice on issuing price of % S product" % (today))
Set the font format
Set the font style and font size
run1.font.name = Microsoft Yahei # Western font
run1.element.rPr.rFonts.set(qn('w:eastAsia'), U 'Microsoft Yahei') # Chinese font
run1.font.size = Pt(21)
# Set bold
run1.font.bold = True
5 pounds behind the section
p1.space_after = Pt(5)
# 5 pounds in front
p1.space_after = Pt(5)
# Create a second paragraph
p2 = document.add_paragraph()
Add text in the second paragraph to address the user
run2 = p2.add_run(i + ':')
Set the font format
Set the font style and font size
run2.font.name = 'imitation song dynasty style typeface' # Western font
run2.element.rPr.rFonts.set(qn('w:eastAsia'), U 'imitation song dynasty style typeface') # Chinese font
run2.font.size = Pt(16)
# Set bold
run2.font.bold = True
Create a third paragraph
p3 = document.add_paragraph()
Add text in the third paragraph to address the user
run3 = p3.add_run('According to the arrangement of our company, our company will pay % S of wages today.' % price)
Set the font format
Set the font style and font size
run3.font.name = 'imitation song dynasty style typeface' # Western font
run3.element.rPr.rFonts.set(qn('w:eastAsia'), U 'imitation song dynasty style typeface') # Chinese font
run3.font.size = Pt(16)
# Set bold
run3.font.bold = True
# Save the customer name - price notification as the file name
document.save('%s- price notice.docx ' % i)
Copy the code
Insert table image
Create a new document
from docx import Document
# alignment
from docx.enum.text import WD_ALIGN_PARAGRAPH
# pounds
from docx.shared import Pt
# Chinese format
from docx.oxml.ns import qn
# Image size
from docx.shared import Inches
import time
Get the current date
today = time.strftime("%Y{y}%m{m}%d{d}", time.localtime()).format(y='years', m='month', d='day')
# Enter today's price
price = input('Price:')
# object
company_list = ['Mr..Mr 'B'.'Mr C'.'Mr D']
Walk through the list of objects
for i in company_list:
Create a new document
document = Document()
Set the document base font for styles with an S
document.styles['Normal'].font.name = U 'Microsoft Yahei'
document.styles['Normal'].font.size = Pt(14)
Set the document base Chinese font
document.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), U 'Microsoft Yahei')
Insert the image into the document with a width of 6 inches
document.add_picture('test.jpg', width=Inches(6))
Insert table into document
Create a table with behavior 3 and column 3
table = document.add_table(rows=3, cols=3, style='Table Grid')
# merge tables, starting at upper left corner and ending at lower right corner
table.cell(0.0).merge(table.cell(0.2))
Fill in the form (one more table than the normal paragraph)
Fill in the first box
table_run1 = table.cell(0.0).paragraphs[0].add_run("Quotation Sheet for XX Products")
# Modify Chinese and Western fonts
table_run1.font.name = U 'belongs to'
table_run1.element.rPr.rFonts.set(qn('w:eastAsia'), U 'official script')
/ / A/s/s/s/s/s/s/s/s/s/s/s
table.cell(0.0).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
Fill in other Spaces, using the default font of the document
table.cell(1.0).text = 'date'
table.cell(1.1).text = 'price'
table.cell(1.2).text = 'note'
table.cell(2.0).text = today
table.cell(2.1).text = str(price)
table.cell(2.2).text = ' '
# pagination, insert other content on a new page
document.add_page_break()
document.save('%s- price notice.docx ' % i)
Copy the code
Reading word documents
1. Read plain text
from docx import Document
Open a document
document = Document("Information Security Association Introduction material 2.0.docx")
Get all paragraphs
all_paragraphs = document.paragraphs
# Read paragraph by paragraph, usually not read in run, because there will be text fragmentation problem
for paragraph in all_paragraphs:
print(paragraph.text)
Copy the code
2. Read the text in the table
from docx import Document
Open a document
document = Document("Mr. A - Price notice.docx")
Get all tables
all_tables = document.tables
for table in all_tables:
Get all rows
for row in table.rows:
Get all cells per row
for cell in row.cells:
Print the text inside the cell
print(cell.text)
Copy the code
3. More complex text extraction (tables, text mash-ups)
import zipfile
Open the Word document as a zipped package
Set the file path and unzip the file
word = zipfile.ZipFile('Information Security Association Introduction Material 2.0.docx')
# word document content is stored in word/document.xml file of the file, set the encoding mode
xml = word.read('word/document.xml').decode('utf-8')
# Split the string with
intervals to form a list
xml_list = xml.split('<w:t>')
# set an empty list of filtered characters
text_list = []
# Slice characters and store them as a list
for i in xml_list:
if i.find('</w:t>') +1:
text_list.append(i[:i.find('</w:t>')])
else:
pass
Extract the list to plain text
text = "".join(text_list)
print(text)
Copy the code
Word format
1. Create a template (Word)
The previous method of writing Word was optimized to do a good job in function encapsulation
Sample # old four
Create a new document
from docx import Document
# alignment
from docx.enum.text import WD_ALIGN_PARAGRAPH
# pounds
from docx.shared import Pt
# Chinese format
from docx.oxml.ns import qn
# user list
user_list = ['A'.'B'.'C']
for change in user_list:
# Basic setup
document = Document()
document.styles['Normal'].font.name = U 'bold'
document.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), U 'bold')
Create a text template
def add_context(context) :
p = document.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
r = p.add_run(str(context))
r.font.size = Pt(16)
p.space_after = Pt(5)
p.space_before = Pt(5)
add_context("Dear %s customer, % change)
add_context("Hello")
document.save('Letter to %s customer. Docx' % change)
Copy the code
2. Use a template (modify Word)
from docx import Document
# Read word file
document = Document("Information Security Association Introduction material 2.0.docx")
def change_text(old_text, new_text) :
# Modify plain text
Copy and paste word document contents including format
all_paragraphs = document.paragraphs
for paragraphs in all_paragraphs:
for run in paragraphs.runs:
# modify text
run_text = run.text.replace(old_text, new_text)
run.text = run_text
# alter table
all_tables = document.tables
for table in all_tables:
for row in table.rows:
for cell in row.cells:
cell_text = cell.text.replace(old_text, new_text)
cell.text = cell_text
# replace string
change_text('community'.'association')
You can override the save, but be careful to close the open program
document.save("Information Security Association Introduction material 2.0.docx")
Copy the code
Note: The table modification here will use the default format, so keep it as it is and set the document format separately
Word to PDF
Note that the path used must be absolute, because office is actually called and not Python
The library PyWin32 needs to be installed
# Use win32 template functionality
from win32com.client import Dispatch, constants, gencache
Determine the path to read the DOCX and save the PDF path
docx_path = 'C: / Users/linghao brochure/Desktop/test/information security association 2.0 docx'
pdf_path = 'C: / Users/linghao/Desktop/test/information security association brochure. PDF'
# point to the offic function and call it
gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}'.0.8.4)
# Call word service to start conversion
wd = Dispatch("Word.Application")
doc = wd.Documents.Open(docx_path, ReadOnly=1)
doc.ExportAsFixedFormat(pdf_path, constants.wdExportFormatPDF, Item=constants.wdExportDocumentWithMarkup,CreateBookmarks=constants.wdExportCreateHeadingBookmarks)
Exit and save
wd.Quit(constants.wdDoNotSaveChanges)
Copy the code
Read the PDF
Install library PDfMiner3K (guaranteed compatibility with Python after 3.0)
# Import various libraries
# character unstructured transfer library
from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
# to open the PDF
pdf_file = open('Information Security Association Introduction. PDF'.'rb')
# Key data assignment
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()
# Convert text
device = TextConverter(rsrcmgr=rsrcmgr, outfp=retstr, laparams=laparams)
# parameter pass
process_pdf(rsrcmgr=rsrcmgr, device=device, fp=pdf_file)
# close
device.close()
Read values in memory as IO streams
content = retstr.getvalue()
# close
retstr.close()
pdf_file.close()
print(content)
Copy the code
Pandas Imports Excel data
The PANDAS template needs to be installed
import pandas as pd
# Open excel with pandas
e_file = pd.ExcelFile('the original 684-1366. XLSX')
Create table name
data = e_file.parse('Sheet1')
print(data)
Copy the code
Python calls the API
Request url library
import urllib.request
Library for handling JSON data
import json
# constructs the url
key = 'test123'
question1 = input("Please enter your question:")
# Translate the Chinese problem, otherwise an error will be reported
question = urllib.request.quote(question1)
host = 'http://api.gochati.cn/jsapi.php?q=%s&token=%s' % (question, key)
Send the URL request
request = urllib.request.Request(host)
# request adds a header that determines the format of the data to be sent
request.add_header('Content-Type'.'application/json; charset=utf-8')
Receive the returned data
response = urllib.request.urlopen(request)
Read the returned content and translate the read data using JSON
content = response.read().decode("utf-8")
json_all = json.loads(content)
print(content)
Copy the code
Python wechat automatically sends messages
# Introduce a library to deal with wechat, and ensure that you can log in to the wechat web version
import itchat
# Login wechat, allow to generate the TWO-DIMENSIONAL code in the command line, CMD can normally display the two-dimensional code
itchat.auto_login(enableCmdQR=True, hotReload=True)
# Search for a friend name, you can get a nickname, personal introduction, comment name and other information
to_name = itchat.search_friends(name='brother head')
print(to_name)
# Text message sending
itchat.send('Hello duck! ', toUserName=to_name[0] ["UserName"])
# Send pictures
file_img = 'test.jpg'
itchat.send_image(file_img, toUserName=to_name[0] ["UserName"])
# File send
file = 'the original 684-1366. XLSX'
itchat.send_file(file, toUserName=to_name[0] ["UserName"])
Copy the code
Python automatically receives messages
import itchat
# Identify the content of the received message
from itchat.content import *
# function decorates to determine the received message type
# is executed for text
@itchat.msg_register(TEXT)
def text_rely(msg) :
print(msg.text)
reply_text = msg.text.replace('吗?'.'! ')
Send a message to reply to
return reply_text
# execute for file
@itchat.msg_register([PICTURE, RECORDING, ATTACHMENT,VIDEO])
def download_files(msg) :
# save file
msg.download(msg.fileName)
return 'received'
# Automatic and continuous login
itchat.auto_login(enableCmdQR=True, hotReload=True)
# Auto run
itchat.run()
Copy the code
Python modifies file sending information in real time
The message is sent when the file has been modified
import os
import time
import xlrd
import itchat
# Login to wechat
itchat.auto_login(enableCmdQR=True, hotReload=True)
Check excel file for date changes and save the initial data
change_time_save = time.ctime(os.stat('Summer training and learning summary. XLSX').st_mtime)
while True:
time.sleep(2)
Get the current file modification time
change_time = time.ctime(os.stat('Summer training and learning summary. XLSX').st_mtime)
If the file has not changed, skip it
if change_time_save == change_time:
pass
# If the file changes, send
else:
change_time_save = change_time
Read the workbook information below
xlsx = xlrd.open_workbook('Summer training and learning summary. XLSX')
table = xlsx.sheet_by_index(0)
content = str(table.cell_value(0.0))
to_name = itchat.search_friends(name='brother head')
itchat.send(content, toUserName=to_name[0] ["UserName"])
print("sent")
Copy the code
Python crawls static web pages
from bs4 import BeautifulSoup
import requests
word = input("Please enter the word you want to find:")
# constructs the url
url = 'https://youdao.com/w/eng/' + word
Visit a web page
web_data = requests.get(url).text
Parsing web pages with LXML
soup = BeautifulSoup(web_data, 'lxml')
# soup to get the page element, get_text() to get the text
meaning = soup.select('#phrsListTab > div.trans-container > ul > li')
The same element can be iterated through by a for loop
for i in meaning:
print(i.get_text())
Copy the code
Python crawls dynamic pages
Selenium libraries and PhantomJS tools need to be installed
from selenium import webdriver
import time
# Call Phantomjs and open the page using the mock browser
driver = webdriver.PhantomJS(executable_path="D: / phantomjs - 2.1.1 - Windows/phantomjs - 2.1.1 - Windows/bin/phantomjs. Exe")
# Open the page you want to crawl
driver.get("https://www.bilibili.com/video/BV14A411v7Sx?p=59")
Pause for 5 seconds to finish loading the page
time.sleep(5)
The selector filters the required parts
data = driver.find_element_by_css_selector("#viewbox_report > div:nth-child(3)").text
print(data)
# Close the page
driver.quit()
Copy the code
Python web pages automatically enter
from selenium import webdriver
# Keyboard mouse click simulation board
from selenium.webdriver.common.keys import Keys
import time
# Call Phantomjs and open the page using the mock browser
driver = webdriver.PhantomJS(executable_path="D: / phantomjs - 2.1.1 - Windows/phantomjs - 2.1.1 - Windows/bin/phantomjs. Exe")
# Open the page you want to crawl
driver.get("https://www.baidu.com/")
Pause for 5 seconds to finish loading the page
time.sleep(5)
# Locate the form and enter the text
driver.find_element_by_css_selector("#kw").send_keys("bilibili")
# Simulate click submission
# Simulate enter
driver.find_element_by_css_selector("#kw").send_keys(Keys.RETURN)
Pause for 5 seconds to finish loading the page
time.sleep(5)
# Simulate mouse click submission
driver.find_element_by_id("su").click()
time.sleep(5)
# Save a screenshot of the page
driver.save_screenshot("baidu2.png")
# Some operations on the browser
# driver. The back () back
# driver. Forward ()
# driver. The refresh () refresh
Copy the code
Added methods for simulating composite keys
driver.find_element_by_css_selector("#kw").send_keys(Keys.CONTROL, "a")
driver.find_element_by_css_selector("#kw").send_keys(Keys.CONTROL, "x")
Copy the code
Python Management files
import os
Initialize an empty list store file
list_all = []
# Search all folders and all files in a directory
for root, dirs, files in os.walk('C: / Users/linghao/Desktop/programming notes') :# get file
for name in files:
Get the file path
file_path = os.path.join(root, name)
# get file name
file_name = os.path.split(file_path)[-1]
list_all.append(file_name)
print(list_all)
Copy the code
Rename files with re
Import the re library
import re
import os
Matches all XLSX files
pattern = re.compile(r'.+\.docx')
Get file information
for root, dirs, files in os.walk('C: / Users/linghao/Desktop/sign in documents/') :# get file name
for name in files:
Get the file path
file_path = os.path.join(root, name)
# regular match
matching = pattern.search(file_path)
if matching:
# change the name (path-to-path). Note that the file name ends with a slash
os.rename(file_path, os.path.split(file_path)[-2] + '/233.docx')
Copy the code
Copy and delete files
Import the re library
import re
import os
Matches all XLSX files
pattern = re.compile(r'.+\.docx')
Get file information
for root, dirs, files in os.walk('C: / Users/linghao/Desktop/sign in documents/') :# get file name
for name in files:
Get the file path
file_path = os.path.join(root, name)
# regular match
matching = pattern.search(file_path)
if matching:
Call the system command line to copy files
command_line = 'Copy %s C:\ Users\\linghao\ Desktop\ backup' % file_path.replace('/'.'\ \')
os.system(command_line)
os.remove(file_path)
Copy the code