1. Fundamentals
Procedure: 1. After the client slices the file, it submits the upload request to the API server and generates the corresponding presign URL (Step 1 and step 2. If you want to control the number of uploads to the client, you can generate a specified number of tokens at this stage) 2. The generated Presign URL is used to construct an HTTP request to upload data to the S3 service. (Step 3) 3. After the client completes all block uploading, it submits Complete request to API Server, and then API Server sends Complete request to S3 service (Step 4 and step 5) 4. The client retrieves the return from the API Server and completes the final operation. (Consider recycling tokens here)
2. The advantages and disadvantages of
Advantages: 1. Accesskeys and secretkeys are not stored on the client, preventing key leakage. 2. Each presignURL corresponds to a keyName, which allows you to upload and overwrite any existing file within the valid time. 3. The server can combine various Auth systems to complete client authentication and authorization, which is convenient to integrate existing business. 4. The client uploads and downloads flexibly. After receiving the presignURL, you can upload and download the url through any client that supports THE HTTP protocol. 5. Suitable for uploading large files. Compared with Presign, it supports concurrent uploading in the data uploading stage, which greatly improves uploading efficiency.
Disadvantages: 1. The upload procedure requires multiple interactions, and the process is slightly complicated. 2. Restricted S3 block upload standards. This method does not apply to files smaller than 5M.
3. Implementation
Install server dependencies
pip install boto
pip install flask-restful
Copy the code
The server demo code is as follows:
# -*- coding: utf-8 -*- import time import hmac from hashlib import sha1 as sha import boto import boto.s3.connection import re py3k = False try: from urlparse import urlparse, unquote from base64 import encodestring except: py3k = True from urllib.parse import urlparse, unquote from base64 import encodebytes as encodestring from flask import Flask, request from flask_restful import Api, Resource app = Flask(__name__) api = Api(app) from boto.s3.multipart import MultiPartUpload class MultiPartUpload_Presign(MultiPartUpload): def __init__(self,id,bucket,key_name): MultiPartUpload.__init__(self) self.id = id self.bucket = bucket self.key_name = key_name def complete_upload(self): xml = self.to_xml() return self.bucket.complete_multipart_upload(self.key_name, self.id, xml) class S3PreSign(): def __init__(self, object_name,metadata=None, policy=None): Self.service_url = 's3.ceph. Work '# set s3 service endpoint self.access_key = "#access key self.secret_key =" #secret key Self.bucket_name = 'multi-upload' #bucket name self.object_name = STR (object_name) # self.expires = int(time.time()) + int(expires) conn = boto.connect_s3( aws_access_key_id = self.access_key, aws_secret_access_key = self.secret_key, host = self.service_url, port = 80, is_secure=False, # uncommmnt if you are not using ssl # calling_format = boto.s3.connection.OrdinaryCallingFormat(), calling_format = boto.s3.connection.SubdomainCallingFormat(), ) self.bucket = conn.get_bucket(self.bucket_name) self.upload_ID = self.Make_uploadID(self.object_name ,metadata=metadata, policy=policy) def Make_uploadID(self,object_name,metadata=None,policy=None): mpu = self.bucket.initiate_multipart_upload(object_name, metadata=metadata, policy=policy) return mpu.id def complete_upload(self,upload_ID): mpu = MultiPartUpload_Presign(id=upload_ID, bucket=self.bucket, key_name=self.object_name) status_ = 200 try: mpu.complete_upload() except: status_ = 422 finally: return status_ def get_signature_str(self, sign_str): if py3k: key = self.secret_key.encode('utf-8') msg = sign_str.encode('utf-8') else: key = self.secret_key msg = sign_str h = hmac.new(key, msg, digestmod=sha) return (encodestring(h.digest()).strip()).replace('+', '%2b') def build_url(self, expires,partNumber, Signature): url_ = "http://{bucket_name}.{service_url}/{object_name}?uploadId={uploadId}&partNumber={partNumber}&Expires={Expires}&AWSAcces sKeyId={AWSAccessKeyId}&Signature={Signature}".format( bucket_name=self.bucket_name, service_url=self.service_url, object_name=self.object_name, uploadId=self.upload_ID, partNumber=partNumber, Expires= expires, AWSAccessKeyId=self.access_key, Signature=Signature ) return url_ def build_url_with_partid(self,expires, partNumber, partMd5 ): sign_str = "PUT\n{partMd5}\n\n{Expires}\n/{bucket_name}/{object_name}?partNumber={partNumber}&uploadId={uploadId}".format( partMd5=partMd5, Expires=expires, bucket_name=self.bucket_name, object_name=self.object_name, partNumber=partNumber, uploadId=self.upload_ID) Signature_ = self.get_signature_str(sign_str) return self.build_url(expires, partNumber, Signature_) class MultiPart_List(Resource): def post(self): PartNumber_ = {} metadata = {} policy = None # print request.form['keyname'] if 'keyname' in request.form: keyname = request.form['keyname'] else: return "no key", 400 if 'expires' in request.form: expires = request.form['expires'] else: return "no expires", 400 if 'contenttype' in request.form: metadata['Content-Type'] = str(request.form['contenttype']) if 'x-amz-acl' in request.form: policy = str(request.form['x-amz-acl']) for part_ in request.form: if re.match(r'^\d{1,}$',part_): # print part_ PartNumber_[part_] = request.form[part_] meatadata_rule = 'x-amz-meta-' if re.match(meatadata_rule, part_): # print part_ metadata[part_.split(meatadata_rule)[1]] = str(request.form[part_]) print metadata,policy,keyname,expires s3client = S3PreSign(keyname) result = {} result['UploadID'] = s3client.upload_ID expires = int(time.time()) + int(expires) for p_ in PartNumber_: result[p_] = s3client.build_url_with_partid(expires,p_,PartNumber_[p_]) return result, 201 class Complete_MultiPart(Resource): def post(self): if 'keyname' in request.form: keyname = request.form['keyname'] else: return "no key", 400 if 'uploadid' in request.form: uploadid = request.form['uploadid'] else: return "no UploadID", 400 s3client = S3PreSign(keyname) result = s3client.complete_upload(uploadid) return {"status_code":result}, result api.add_resource(MultiPart_List, '/presign') api.add_resource(Complete_MultiPart, '/complete') if __name__ == '__main__': app.run(debug=True)Copy the code
Install client dependencies
pip install requests
Copy the code
The client demo code is as follows:
# -*- coding: utf-8 -*- import requests from base64 import encodestring from hashlib import md5 import os import json from multiprocessing import Pool def multipart_upload_with_part(url_, part_file_path, partMD5): headers = {} headers["Content-MD5"] = partMD5 with open(part_file_path,'r') as fh: response = requests.put(url_, headers=headers, data=fh.read()) if response.status_code == 200: print "{} upload Sucessful !" .format(part_file_path) class S3client(): def __init__(self, key_name, expires,part_num, uploadfile_path, policy=None, contenttype=None, metadata=None ,processes_num=2): self.multipart_data = {} if key_name: self.multipart_data['keyname'] = key_name if expires: self.multipart_data['expires'] = expires if policy: self.multipart_data['x-amz-acl'] = policy if contenttype: self.multipart_data['contenttype'] = contenttype if metadata: for k in metadata: self.multipart_data[k] = metadata[k] self.part_num = part_num self.processes_num = processes_num self.uploadfile_path = Uploadfile_path self.server = 'http://localhost:5000/' self.upload_file_list_ = {} def split_file(self): filelist = [] statinfo = os.stat(self.uploadfile_path) chunksize = statinfo.st_size / self.part_num print "File size: %d(MB)" % (statinfo.st_size / (1024 * 1024)) print self.uploadfile_path,chunksize with open(self.uploadfile_path, "rb") as f: index = 1 while True: chunk = f.read(chunksize) if (chunk): fn = "%s.part.%d" % (self.uploadfile_path, index) # print "creating", fn with open(fn, "wb") as fw: fw.write(chunk) partMD5 = self.compute_hash(fn) tmp_ = {} tmp_[fn] = str(partMD5) filelist.append(tmp_) index = index + 1 else: break return filelist def compute_hash(self, filepath, buf_size=8192, size=None, hash_algorithm=md5): hash_obj = hash_algorithm() with open(filepath) as fp: spos = fp.tell() if size and size < buf_size: s = fp.read(size) else: s = fp.read(buf_size) while s: if not isinstance(s, bytes): s = s.encode('utf-8') hash_obj.update(s) if size: size -= len(s) if size <= 0: break if size and size < buf_size: s = fp.read(size) else: s = fp.read(buf_size) base64_digest = encodestring(hash_obj.digest()).decode('utf-8') if base64_digest[-1] == '\n': base64_digest = base64_digest[0:-1] return base64_digest def make_upload_list(self): upload_file_list = self.split_file() for f in upload_file_list: part_path = f.keys()[0] partMD5 = f.values()[0] # partnum_ = f.keys()[0].split(".")[-1] yield {part_path:partMD5} def get_multipart_presignurl(self): upload_file_list = self.make_upload_list() for i in upload_file_list: self.multipart_data[i.keys()[0].split(".")[-1]] = i.values()[0] self.upload_file_list_[i.keys()[0].split(".")[-1]] = {i.keys()[0]:i.values()[0]} url_ = self.server + "presign" r = requests.post(url_, data=self.multipart_data) allurl_ = json.loads(r.text) UploadID = allurl_.pop('UploadID') return UploadID,allurl_ def complete(self,UploadID,key_name): data = {"uploadid":UploadID,'keyname':key_name} url_ = self.server + "complete" r = requests.post(url_, data=data) if r.status_code == 200: print "Multipart upload finished!" else: print "Multipart upload failed!" def upload_mulprocess(self,allurl_): p = Pool(processes=self.processes_num) for url in allurl_: partNUm = url tmp_file = self.upload_file_list_[partNUm] filepath = tmp_file.keys()[0] partMD5 = tmp_file.values()[0] put_url = allurl_[url] p.apply_async(multipart_upload_with_part, (put_url,filepath,partMD5,)) print 'Waiting for all subprocesses done... ' p.close() p.join() if __name__ == "__main__": Key_name = 'abc.json' # Object name to upload part_num = 6 # Number of file splits Expires = 300 # Signature validity period file_path = '/ TMP /abc.json' # Local path of the uploaded file Processes_num = 2 # ContentType = 'application/json' # content-type policy = 'public-read' # Metadata = {'x-amz-meta-abc':'abcd'} #object metadata # step 1: initialize s3Client = S3client (key_name, expires, part_num file_path, policy, contenttype, metadata, and 2) step # 2: generate PresignURL UploadID, upload_file_list Upload_mulprocess (upload_file_list) = s3Client.get_multipart_presignURL () Step 4: Submit the compelte request to complete the final logical merge of each block of data.Copy the code