使用来自Python脚本的POST发送文件

问题描述:

有没有一种方法可以使用来自Python脚本的POST发送文件?使用来自Python脚本的POST发送文件

是的。您将使用urllib2模块,并使用multipart/form-data内容类型进行编码。下面是一些示例代码,让你开始 - 这是多一点不仅仅是文件上传,但你应该能够通过它来阅读,看看它是如何工作的:

user_agent = "image uploader" 
default_message = "Image $current of $total" 

import logging 
import os 
from os.path import abspath, isabs, isdir, isfile, join 
import random 
import string 
import sys 
import mimetypes 
import urllib2 
import httplib 
import time 
import re 

def random_string (length): 
    return ''.join (random.choice (string.letters) for ii in range (length + 1)) 

def encode_multipart_data (data, files): 
    boundary = random_string (30) 

    def get_content_type (filename): 
     return mimetypes.guess_type (filename)[0] or 'application/octet-stream' 

    def encode_field (field_name): 
     return ('--' + boundary, 
       'Content-Disposition: form-data; name="%s"' % field_name, 
       '', str (data [field_name])) 

    def encode_file (field_name): 
     filename = files [field_name] 
     return ('--' + boundary, 
       'Content-Disposition: form-data; name="%s"; filename="%s"' % (field_name, filename), 
       'Content-Type: %s' % get_content_type(filename), 
       '', open (filename, 'rb').read()) 

    lines = [] 
    for name in data: 
     lines.extend (encode_field (name)) 
    for name in files: 
     lines.extend (encode_file (name)) 
    lines.extend (('--%s--' % boundary, '')) 
    body = '\r\n'.join (lines) 

    headers = {'content-type': 'multipart/form-data; boundary=' + boundary, 
       'content-length': str (len (body))} 

    return body, headers 

def send_post (url, data, files): 
    req = urllib2.Request (url) 
    connection = httplib.HTTPConnection (req.get_host()) 
    connection.request ('POST', req.get_selector(), 
         *encode_multipart_data (data, files)) 
    response = connection.getresponse() 
    logging.debug ('response = %s', response.read()) 
    logging.debug ('Code: %s %s', response.status, response.reason) 

def make_upload_file (server, thread, delay = 15, message = None, 
         username = None, email = None, password = None): 

    delay = max (int (delay or '0'), 15) 

    def upload_file (path, current, total): 
     assert isabs (path) 
     assert isfile (path) 

     logging.debug ('Uploading %r to %r', path, server) 
     message_template = string.Template (message or default_message) 

     data = {'MAX_FILE_SIZE': '3145728', 
       'sub': '', 
       'mode': 'regist', 
       'com': message_template.safe_substitute (current = current, total = total), 
       'resto': thread, 
       'name': username or '', 
       'email': email or '', 
       'pwd': password or random_string (20),} 
     files = {'upfile': path} 

     send_post (server, data, files) 

     logging.info ('Uploaded %r', path) 
     rand_delay = random.randint (delay, delay + 5) 
     logging.debug ('Sleeping for %.2f seconds------------------------------\n\n', rand_delay) 
     time.sleep (rand_delay) 

    return upload_file 

def upload_directory (path, upload_file): 
    assert isabs (path) 
    assert isdir (path) 

    matching_filenames = [] 
    file_matcher = re.compile (r'\.(?:jpe?g|gif|png)$', re.IGNORECASE) 

    for dirpath, dirnames, filenames in os.walk (path): 
     for name in filenames: 
      file_path = join (dirpath, name) 
      logging.debug ('Testing file_path %r', file_path) 
      if file_matcher.search (file_path): 
       matching_filenames.append (file_path) 
      else: 
       logging.info ('Ignoring non-image file %r', path) 

    total_count = len (matching_filenames) 
    for index, file_path in enumerate (matching_filenames): 
     upload_file (file_path, index + 1, total_count) 

def run_upload (options, paths): 
    upload_file = make_upload_file (**options) 

    for arg in paths: 
     path = abspath (arg) 
     if isdir (path): 
      upload_directory (path, upload_file) 
     elif isfile (path): 
      upload_file (path) 
     else: 
      logging.error ('No such path: %r' % path) 

    logging.info ('Done!') 
+1

关于Python 2.6.6,我得到一个错误,在多部分边界解析,而在Windows上使用此代码。我必须从string.letters更改为string.ascii_letters,如http://stackoverflow.com/questions/2823316/generate-a-random-letter-in-python/2823331#2823331中所讨论的那样。对边界的要求在这里讨论:http://stackoverflow.com/questions/147451/what-are-valid-characters-for-creating-a-multipart-form-b​​oundary/147467#147467 – 2011-01-19 12:32:28

+0

调用 run_upload({'因为“上传文件”需要3个参数,所以在这条线上导致错误: upload_file(path) 因为“upload file”需要3个参数 所以我用这行替换它 upload_file(path,1,1) – Radian 2011-10-19 23:13:00

你也可以看看在httplib2,与examples。我发现使用httplib2比使用内置的HTTP模块更简洁。

+1

没有任何示例显示如何处理文件上传。 – dland 2011-09-15 09:45:37

+0

链接已过时+无内联示例。 – jlr 2016-10-17 18:18:16

Chris Atlee的poster库对此非常有效(特别是便利功能poster.encode.multipart_encode())。作为奖励,它支持大文件的流式传输,无需将整个文件加载到内存中。另见Python issue 3244

阻止您直接在文件对象上使用urlopen的唯一原因是内置文件对象缺少定义。一个简单的方法是创建一个提供正确文件的urlopen的子类。 我也在下面的文件中修改了Content-Type标题。

import os 
import urllib2 
class EnhancedFile(file): 
    def __init__(self, *args, **keyws): 
     file.__init__(self, *args, **keyws) 

    def __len__(self): 
     return int(os.fstat(self.fileno())[6]) 

theFile = EnhancedFile('a.xml', 'r') 
theUrl = "http://example.com/abcde" 
theHeaders= {'Content-Type': 'text/xml'} 

theRequest = urllib2.Request(theUrl, theFile, theHeaders) 

response = urllib2.urlopen(theRequest) 

theFile.close() 


for line in response: 
    print line 

http://docs.python-requests.org/en/latest/user/quickstart/#post-a-multipart-encoded-file

的要求使得它非常简单的上传多部分编码的文件:

>>> with open('report.xls', 'rb') as f: r = requests.post('http://httpbin.org/post', files={'report.xls': f}) 

就是这样。我不是在开玩笑 - 这是一行代码。文件已发送。让我们来看看:

>>> r.text 
{ 
    "origin": "179.13.100.4", 
    "files": { 
    "report.xls": "<censored...binary...data>" 
    }, 
    "form": {}, 
    "url": "http://httpbin.org/post", 
    "args": {}, 
    "headers": { 
    "Content-Length": "3196", 
    "Accept-Encoding": "identity, deflate, compress, gzip", 
    "Accept": "*/*", 
    "User-Agent": "python-requests/0.8.0", 
    "Host": "httpbin.org:80", 
    "Content-Type": "multipart/form-data; boundary=127.0.0.1.502.21746.1321131593.786.1" 
    }, 
    "data": "" 
} 

看起来像python请求不处理非常大的多部分文件。

该文档建议您查看requests-toolbelt

Here's the pertinent page从他们的文档。

def visit_v2(device_code, camera_code): 
    image1 = MultipartParam.from_file("files", "/home/yuzx/1.txt") 
    image2 = MultipartParam.from_file("files", "/home/yuzx/2.txt") 
    datagen, headers = multipart_encode([('device_code', device_code), ('position', 3), ('person_data', person_data), image1, image2]) 
    print "".join(datagen) 
    if server_port == 80: 
     port_str = "" 
    else: 
     port_str = ":%s" % (server_port,) 
    url_str = "http://" + server_ip + port_str + "/adopen/device/visit_v2" 
    headers['nothing'] = 'nothing' 
    request = urllib2.Request(url_str, datagen, headers) 
    try: 
     response = urllib2.urlopen(request) 
     resp = response.read() 
     print "http_status =", response.code 
     result = json.loads(resp) 
     print resp 
     return result 
    except urllib2.HTTPError, e: 
     print "http_status =", e.code 
     print e.read() 

我想测试Django的REST API和它的工作对我来说:

def test_upload_file(self): 
     filename = "/Users/Ranvijay/tests/test_price_matrix.csv" 
     data = {'file': open(filename, 'rb')} 
     client = APIClient() 
     # client.credentials(HTTP_AUTHORIZATION='Token ' + token.key) 
     response = client.post(reverse('price-matrix-csv'), data, format='multipart') 

     print response 
     self.assertEqual(response.status_code, status.HTTP_200_OK)