Office Automation in Python

Python execl operation

Read the contents of the execL worksheet

The XLRD library needs to be installed

import xlrd  # Introduce the XLRD library

# import workbook path
xlsx = xlrd.open_workbook('e:/ Summer training attendance list.xlsx ')

# determine which worksheet to read, or index by worksheet name
# table = xlsx.sheet_by_name("表名")
table = xlsx.sheet_by_index(0)

Print the coordinate value of the cell, be careful not to exceed all the contents of the cell, otherwise an error will be reported
print(table.cell_value(1.2))
print(table.cell(1.2).value)
print(table.row(1) [2].value)

Copy the code

Write to the execl worksheet

The XLWT library needs to be installed

# Introduce XLWT library
import xlwt

Create a new workbook
new_workbook = xlwt.Workbook()
Create a new worksheet
worksheet = new_workbook.add_sheet('sheet1')
Write values at the specified coordinates in the worksheet
worksheet.write(0.0.'test')
Save the workbook
new_workbook.save('test.xls')
Copy the code

Write to the execl table with format

Install the Xlutils library and copy the format of an Excel template into a new execl (this library is no longer supported since Py3.5, so I won’t cover it here).

Python operation database

Install the template Pymysql

Connecting to a Database

import pymysql
# connect to the database, the format for: pymysql. Connect (" address ", "user name", "password", "database name", "encoding format")
data = pymysql.connect("127.0.0.1"."user"."123456"."acg", charset='utf8')

Initialize pointer for database query
cursor = data.cursor()
Copy the code

Add, delete, change and check the essence of the execution of SQL statements, the main connection object is the database, and the operation object is the data table

increase

# increase statement
sql = "INSERT INTO users(name, password) VALUES ('Auto', '123456');"
Execute SQL statement
cursor.execute(sql)
Commit after modifying the stored data
data.commit()
# close the connection
data.close()
Copy the code

change

sql = "UPDATE users SET name='Autovy', password='123456' where id='81'"
cursor.execute(sql)
Commit after modifying the stored data
data.commit()
# close the connection
data.close()
Copy the code

check

1. Return a field as an object

# check
sql = "SELECT nr FROM comment where passage_id='129'"
cursor.execute(sql)
Get the data to which the pointer points
result = cursor.fetchall()
Output the tuple data obtained
print(result)
Copy the code

2. Return all contents as objects

Returns a tuple, so cannot be indexed by field

sql = "SELECT * FROM comment where passage_id='129'"
cursor.execute(sql)
result = cursor.fetchall()
print(result[1] [1])
Copy the code

3. Use aggregate function, need to group return

sql = "SELECT sum(id) FROM comment GROUP BY belong_id;"
cursor.execute(sql)
result = cursor.fetchall()
print(result)
Copy the code

delete

# delete
sql = "DELETE FROM users WHERE name='Auto' "
cursor.execute(sql)
Commit after modifying the stored data
data.commit()
# close the connection
data.close()
Copy the code

With XlWriter, OpenPyXL is more flexible with Excel

Other methods of operating Excel

xlwriter

Install the XlWriter template

Note that the w of the new Workbook is capitalized

Write xlsxwriter with the abbreviation Xw
import xlsxwriter as xw

Create a workbook
workbook = xw.Workbook('test.xlsx')
Create a new worksheet
sheet0 = workbook.add_worksheet('sheet0')
# write number
for i in range(0.300):
    sheet0.write(0, i, i)
Close and save
workbook.close()
Copy the code

openpyxl

Install the OpenPyXL template

import openpyxl
# Open workbook
workbook = openpyxl.load_workbook('Summer training attendance sheet. XLSX')
# Open the worksheet
sheet0 = workbook['Sheet1']
Write values to cells, using alphanumeric combinations
sheet0['B3'] = '5'
sheet0['B4'] = '7'
# save file
workbook.save('Summer training.xlsx')
Copy the code

Practical case: File name sorted into Excel

OS templates are built into Python and do not require additional installation

Python OS templates handle files and directories
import os
import xlwt

# file path
file_dir = 'C: / Users/linghao/Desktop/programming notes'

Returns a list of specified file names
os.listdir(file_dir)
Create a workbook to store information
new_workbook = xlwt.Workbook()
worksheet = new_workbook.add_sheet('Programming Notes')

Read the contents of the list file_dir one by one with I
n = 0
for i in os.listdir(file_dir):
    worksheet.write(n, 0, i)
    n += 1

Save the workbook
new_workbook.save('Programming filenames.xls')
Copy the code

Practical example: Load web page judgment elements

import xlrd
from bs4 import BeautifulSoup
import urllib.request
import xlwt


xlsx = xlrd.open_workbook('684--1366.xlsx')
table = xlsx.sheet_by_index(0)
new_workbook = xlwt.Workbook()
worksheet = new_workbook.add_sheet('sheet1')


for i in range(530.683.1):

    url = table.cell(i, 7).value
    mysite = urllib.request.urlopen("{}".format(url)).read()
    soup = BeautifulSoup(mysite, "html5lib")
    choose = soup.select('ol')

    if(choose):
        worksheet.write(i, 0.'A')



new_workbook.save('test.xls')

Copy the code

Python action word

Write the word

Create a new document
from docx import Document
# alignment
from docx.enum.text import WD_ALIGN_PARAGRAPH
# pounds
from docx.shared import Pt
# Chinese format
from docx.oxml.ns import qn
# Introduce time templates
import time

# Enter today's price
price = input('Price:')
# object
company_list = ['Mr..Mr 'B'.'Mr C'.'Mr D']
Localtime (); time.strftime();
today1 = time.strftime("%Y-%m-%d", time.localtime())
# Format into year, month and day.
today = time.strftime("%Y{y}%m{m}%d{d}", time.localtime()).format(y='years', m='month', d='day')


Walk through the list of objects
for i in company_list:
    Create a new document
    document = Document()
    Set the document base font for styles with an S
    document.styles['Normal'].font.name = U 'be'
    Set the document base Chinese font
    document.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), U 'be')

    Create the first paragraph initially
    p1 = document.add_paragraph()
    # set the alignment of the first paragraph. Default is left
    p1.alignment = WD_ALIGN_PARAGRAPH.CENTER
    Add text to the first paragraph
    run1 = p1.add_run("Notice on issuing price of % S product" % (today))

    Set the font format
    Set the font style and font size
    run1.font.name = Microsoft Yahei  # Western font
    run1.element.rPr.rFonts.set(qn('w:eastAsia'), U 'Microsoft Yahei')  # Chinese font
    run1.font.size = Pt(21)
    # Set bold
    run1.font.bold = True
    5 pounds behind the section
    p1.space_after = Pt(5)
    # 5 pounds in front
    p1.space_after = Pt(5)


    # Create a second paragraph
    p2 = document.add_paragraph()
    Add text in the second paragraph to address the user
    run2 = p2.add_run(i + '：')

    Set the font format
    Set the font style and font size
    run2.font.name = 'imitation song dynasty style typeface'  # Western font
    run2.element.rPr.rFonts.set(qn('w:eastAsia'), U 'imitation song dynasty style typeface')  # Chinese font
    run2.font.size = Pt(16)
    # Set bold
    run2.font.bold = True

    Create a third paragraph
    p3 = document.add_paragraph()
    Add text in the third paragraph to address the user
    run3 = p3.add_run('According to the arrangement of our company, our company will pay % S of wages today.' % price)

    Set the font format
    Set the font style and font size
    run3.font.name = 'imitation song dynasty style typeface'  # Western font
    run3.element.rPr.rFonts.set(qn('w:eastAsia'), U 'imitation song dynasty style typeface')  # Chinese font
    run3.font.size = Pt(16)
    # Set bold
    run3.font.bold = True
    
    
    # Save the customer name - price notification as the file name
    document.save('%s- price notice.docx ' % i)
Copy the code

Insert table image

Create a new document
from docx import Document
# alignment
from docx.enum.text import WD_ALIGN_PARAGRAPH
# pounds
from docx.shared import Pt
# Chinese format
from docx.oxml.ns import qn
# Image size
from docx.shared import Inches
import time

Get the current date
today = time.strftime("%Y{y}%m{m}%d{d}", time.localtime()).format(y='years', m='month', d='day')

# Enter today's price
price = input('Price:')
# object
company_list = ['Mr..Mr 'B'.'Mr C'.'Mr D']

Walk through the list of objects
for i in company_list:
    Create a new document
    document = Document()
    Set the document base font for styles with an S
    document.styles['Normal'].font.name = U 'Microsoft Yahei'
    document.styles['Normal'].font.size = Pt(14)
    Set the document base Chinese font
    document.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), U 'Microsoft Yahei')

    Insert the image into the document with a width of 6 inches
    document.add_picture('test.jpg', width=Inches(6))

    Insert table into document
    Create a table with behavior 3 and column 3
    table = document.add_table(rows=3, cols=3, style='Table Grid')
    # merge tables, starting at upper left corner and ending at lower right corner
    table.cell(0.0).merge(table.cell(0.2))

    Fill in the form (one more table than the normal paragraph)
    Fill in the first box
    table_run1 = table.cell(0.0).paragraphs[0].add_run("Quotation Sheet for XX Products")
    # Modify Chinese and Western fonts
    table_run1.font.name = U 'belongs to'
    table_run1.element.rPr.rFonts.set(qn('w:eastAsia'), U 'official script')
    / / A/s/s/s/s/s/s/s/s/s/s/s
    table.cell(0.0).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    Fill in other Spaces, using the default font of the document
    table.cell(1.0).text = 'date'
    table.cell(1.1).text = 'price'
    table.cell(1.2).text = 'note'
    table.cell(2.0).text = today
    table.cell(2.1).text = str(price)
    table.cell(2.2).text = ' '

    # pagination, insert other content on a new page
    document.add_page_break()

    document.save('%s- price notice.docx ' % i)
Copy the code

Reading word documents

1. Read plain text

from docx import Document

Open a document
document = Document("Information Security Association Introduction material 2.0.docx")
Get all paragraphs
all_paragraphs = document.paragraphs

# Read paragraph by paragraph, usually not read in run, because there will be text fragmentation problem
for paragraph in all_paragraphs:
    print(paragraph.text)
Copy the code

2. Read the text in the table

from docx import Document

Open a document
document = Document("Mr. A - Price notice.docx")
Get all tables
all_tables = document.tables

for table in all_tables:
    Get all rows
    for row in table.rows:
        Get all cells per row
        for cell in row.cells:
            Print the text inside the cell
            print(cell.text)
Copy the code

3. More complex text extraction (tables, text mash-ups)

import zipfile

Open the Word document as a zipped package
Set the file path and unzip the file
word = zipfile.ZipFile('Information Security Association Introduction Material 2.0.docx')
# word document content is stored in word/document.xml file of the file, set the encoding mode
xml = word.read('word/document.xml').decode('utf-8')

# Split the string with 
      
        intervals to form a list
      
xml_list = xml.split('<w:t>')
# set an empty list of filtered characters
text_list = []
# Slice characters and store them as a list
for i in xml_list:
    if i.find('</w:t>') +1:
        text_list.append(i[:i.find('</w:t>')])
    else:
        pass
Extract the list to plain text
text = "".join(text_list)
print(text)
Copy the code

Word format

1. Create a template (Word)

The previous method of writing Word was optimized to do a good job in function encapsulation

Sample # old four
Create a new document
from docx import Document
# alignment
from docx.enum.text import WD_ALIGN_PARAGRAPH
# pounds
from docx.shared import Pt
# Chinese format
from docx.oxml.ns import qn

# user list
user_list = ['A'.'B'.'C']

for change in user_list:

    # Basic setup
    document = Document()
    document.styles['Normal'].font.name = U 'bold'
    document.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), U 'bold')

    Create a text template
    def add_context(context) :
        p = document.add_paragraph()
        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
        r = p.add_run(str(context))
        r.font.size = Pt(16)
        p.space_after = Pt(5)
        p.space_before = Pt(5)

    add_context("Dear %s customer, % change)
    add_context("Hello")

    document.save('Letter to %s customer. Docx' % change)
Copy the code

2. Use a template (modify Word)

from docx import Document

# Read word file
document = Document("Information Security Association Introduction material 2.0.docx")

def change_text(old_text, new_text) :
    # Modify plain text
    Copy and paste word document contents including format
    all_paragraphs = document.paragraphs
    for paragraphs in all_paragraphs:
        for run in paragraphs.runs:
            # modify text
            run_text = run.text.replace(old_text, new_text)
            run.text = run_text

    # alter table
    all_tables = document.tables
    for table in all_tables:
        for row in table.rows:
           for cell in row.cells:
                cell_text = cell.text.replace(old_text, new_text)
                cell.text = cell_text

# replace string
change_text('community'.'association')
You can override the save, but be careful to close the open program
document.save("Information Security Association Introduction material 2.0.docx")
Copy the code

Note: The table modification here will use the default format, so keep it as it is and set the document format separately

Word to PDF

Note that the path used must be absolute, because office is actually called and not Python

The library PyWin32 needs to be installed

# Use win32 template functionality
from win32com.client import Dispatch, constants, gencache

Determine the path to read the DOCX and save the PDF path
docx_path = 'C: / Users/linghao brochure/Desktop/test/information security association 2.0 docx'
pdf_path = 'C: / Users/linghao/Desktop/test/information security association brochure. PDF'

# point to the offic function and call it
gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}'.0.8.4)

# Call word service to start conversion
wd = Dispatch("Word.Application")
doc = wd.Documents.Open(docx_path, ReadOnly=1)
doc.ExportAsFixedFormat(pdf_path, constants.wdExportFormatPDF, Item=constants.wdExportDocumentWithMarkup,CreateBookmarks=constants.wdExportCreateHeadingBookmarks)

Exit and save
wd.Quit(constants.wdDoNotSaveChanges)
Copy the code

Read the PDF

Install library PDfMiner3K (guaranteed compatibility with Python after 3.0)

# Import various libraries
# character unstructured transfer library
from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams

# to open the PDF
pdf_file = open('Information Security Association Introduction. PDF'.'rb')
# Key data assignment
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()

# Convert text
device = TextConverter(rsrcmgr=rsrcmgr, outfp=retstr, laparams=laparams)
# parameter pass
process_pdf(rsrcmgr=rsrcmgr, device=device, fp=pdf_file)
# close
device.close()
Read values in memory as IO streams
content = retstr.getvalue()
# close
retstr.close()
pdf_file.close()
print(content)
Copy the code

Pandas Imports Excel data

The PANDAS template needs to be installed

import pandas as pd

# Open excel with pandas
e_file = pd.ExcelFile('the original 684-1366. XLSX')
Create table name
data = e_file.parse('Sheet1')
print(data)
Copy the code

Python calls the API

Request url library
import urllib.request
Library for handling JSON data
import json



# constructs the url
key = 'test123'
question1 = input("Please enter your question:")
# Translate the Chinese problem, otherwise an error will be reported
question = urllib.request.quote(question1)
host = 'http://api.gochati.cn/jsapi.php?q=%s&token=%s' % (question, key)

Send the URL request
request = urllib.request.Request(host)
# request adds a header that determines the format of the data to be sent
request.add_header('Content-Type'.'application/json; charset=utf-8')

Receive the returned data
response = urllib.request.urlopen(request)
Read the returned content and translate the read data using JSON
content = response.read().decode("utf-8")
json_all = json.loads(content)
print(content)
Copy the code

Python wechat automatically sends messages

# Introduce a library to deal with wechat, and ensure that you can log in to the wechat web version
import itchat
# Login wechat, allow to generate the TWO-DIMENSIONAL code in the command line, CMD can normally display the two-dimensional code
itchat.auto_login(enableCmdQR=True, hotReload=True)
# Search for a friend name, you can get a nickname, personal introduction, comment name and other information
to_name = itchat.search_friends(name='brother head')
print(to_name)
# Text message sending
itchat.send('Hello duck! ', toUserName=to_name[0] ["UserName"])
# Send pictures
file_img = 'test.jpg'
itchat.send_image(file_img, toUserName=to_name[0] ["UserName"])
# File send
file = 'the original 684-1366. XLSX'
itchat.send_file(file, toUserName=to_name[0] ["UserName"])
Copy the code

Python automatically receives messages

import itchat
# Identify the content of the received message
from itchat.content import *

# function decorates to determine the received message type
# is executed for text
@itchat.msg_register(TEXT)
def text_rely(msg) :
    print(msg.text)
    reply_text = msg.text.replace('吗？'.'! ')
    Send a message to reply to
    return reply_text
# execute for file
@itchat.msg_register([PICTURE, RECORDING, ATTACHMENT,VIDEO])
def download_files(msg) :
    # save file
    msg.download(msg.fileName)
    return 'received'

# Automatic and continuous login
itchat.auto_login(enableCmdQR=True, hotReload=True)
# Auto run
itchat.run()
Copy the code

Python modifies file sending information in real time

The message is sent when the file has been modified

import os
import time
import xlrd
import itchat

# Login to wechat
itchat.auto_login(enableCmdQR=True, hotReload=True)
Check excel file for date changes and save the initial data
change_time_save = time.ctime(os.stat('Summer training and learning summary. XLSX').st_mtime)
while True:
    time.sleep(2)
    Get the current file modification time
    change_time = time.ctime(os.stat('Summer training and learning summary. XLSX').st_mtime)
    If the file has not changed, skip it
    if change_time_save == change_time:
        pass
    # If the file changes, send
    else:
        change_time_save = change_time
        Read the workbook information below
        xlsx = xlrd.open_workbook('Summer training and learning summary. XLSX')
        table = xlsx.sheet_by_index(0)
        content = str(table.cell_value(0.0))
        to_name = itchat.search_friends(name='brother head')
        itchat.send(content, toUserName=to_name[0] ["UserName"])
        print("sent")
Copy the code

Python crawls static web pages

from bs4 import BeautifulSoup
import requests

word = input("Please enter the word you want to find:")
# constructs the url
url = 'https://youdao.com/w/eng/' +  word

Visit a web page
web_data = requests.get(url).text
Parsing web pages with LXML
soup = BeautifulSoup(web_data, 'lxml')
# soup to get the page element, get_text() to get the text
meaning = soup.select('#phrsListTab > div.trans-container > ul > li')
The same element can be iterated through by a for loop
for i in meaning:
    print(i.get_text())
Copy the code

Python crawls dynamic pages

Selenium libraries and PhantomJS tools need to be installed

from selenium import webdriver
import time

# Call Phantomjs and open the page using the mock browser
driver = webdriver.PhantomJS(executable_path="D: / phantomjs - 2.1.1 - Windows/phantomjs - 2.1.1 - Windows/bin/phantomjs. Exe")
# Open the page you want to crawl
driver.get("https://www.bilibili.com/video/BV14A411v7Sx?p=59")
Pause for 5 seconds to finish loading the page
time.sleep(5)
The selector filters the required parts
data = driver.find_element_by_css_selector("#viewbox_report > div:nth-child(3)").text
print(data)
# Close the page
driver.quit() 
Copy the code

Python web pages automatically enter

from selenium import webdriver
# Keyboard mouse click simulation board
from selenium.webdriver.common.keys import Keys
import time

# Call Phantomjs and open the page using the mock browser
driver = webdriver.PhantomJS(executable_path="D: / phantomjs - 2.1.1 - Windows/phantomjs - 2.1.1 - Windows/bin/phantomjs. Exe")
# Open the page you want to crawl
driver.get("https://www.baidu.com/")
Pause for 5 seconds to finish loading the page
time.sleep(5)

# Locate the form and enter the text
driver.find_element_by_css_selector("#kw").send_keys("bilibili")

# Simulate click submission
# Simulate enter
driver.find_element_by_css_selector("#kw").send_keys(Keys.RETURN)
Pause for 5 seconds to finish loading the page
time.sleep(5)
# Simulate mouse click submission
driver.find_element_by_id("su").click()
time.sleep(5)
# Save a screenshot of the page
driver.save_screenshot("baidu2.png")

# Some operations on the browser
# driver. The back () back
# driver. Forward ()
# driver. The refresh () refresh
Copy the code

Added methods for simulating composite keys

driver.find_element_by_css_selector("#kw").send_keys(Keys.CONTROL, "a")    
driver.find_element_by_css_selector("#kw").send_keys(Keys.CONTROL, "x")    
Copy the code

Python Management files

import os
Initialize an empty list store file
list_all = []
# Search all folders and all files in a directory
for root, dirs, files in os.walk('C: / Users/linghao/Desktop/programming notes') :# get file
    for name in files:
        Get the file path
        file_path = os.path.join(root, name)
        # get file name
        file_name = os.path.split(file_path)[-1]
        list_all.append(file_name)
    print(list_all)

Copy the code

Rename files with re

Import the re library
import re
import os

Matches all XLSX files
pattern = re.compile(r'.+\.docx')
Get file information
for root, dirs, files in os.walk('C: / Users/linghao/Desktop/sign in documents/') :# get file name
    for name in files:
        Get the file path
        file_path = os.path.join(root, name)
        # regular match
        matching = pattern.search(file_path)
        if matching:
            # change the name (path-to-path). Note that the file name ends with a slash
            os.rename(file_path, os.path.split(file_path)[-2] + '/233.docx')
Copy the code

Copy and delete files

Import the re library
import re
import os

Matches all XLSX files
pattern = re.compile(r'.+\.docx')
Get file information
for root, dirs, files in os.walk('C: / Users/linghao/Desktop/sign in documents/') :# get file name
    for name in files:
        Get the file path
        file_path = os.path.join(root, name)
        # regular match
        matching = pattern.search(file_path)
        if matching:
            Call the system command line to copy files
            command_line = 'Copy %s C:\ Users\\linghao\ Desktop\ backup' % file_path.replace('/'.'\ \')
            os.system(command_line)
            os.remove(file_path)

Copy the code