V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
V2EX 提问指南
wangfeng3769
V2EX  ›  问与答

python 的 multiprocessing

  •  
  •   wangfeng3769 · 2014-08-04 14:59:45 +08:00 · 11709 次点击
    这是一个创建于 3771 天前的主题,其中的信息可能已经有所发展或是发生改变。
    python的multiprocessing在windows(4核)下不能正常运行,加了freeze_support()仍然不能正常运行,希望v友们能够帮忙一下。
    9 条回复    2014-08-05 16:10:11 +08:00
    wangfeng3769
        1
    wangfeng3769  
    OP
       2014-08-04 15:00:14 +08:00
    multiprocessing.freeze_support()
    # lock = multiprocessing.Lock()
    w = multiprocessing.Process(target=work, args=(url, ))
    nw = multiprocessing.Process(target=download, args=())
    w.start()
    nw.start()
    w.join()
    nw.join()
    wangfeng3769
        2
    wangfeng3769  
    OP
       2014-08-04 16:34:09 +08:00
    Traceback (most recent call last):
    File "<string>", line 1, in <module>
    File "D:\Python27\lib\multiprocessing\forking.py", line 380, in main
    You have to input a complete URL
    prepare(preparation_data)
    Traceback (most recent call last):
    File "D:\Python27\lib\multiprocessing\forking.py", line 495, in prepare
    File "<string>", line 1, in <module>
    '__parents_main__', file, path_name, etc
    File "D:\Python27\lib\multiprocessing\forking.py", line 380, in main
    File "D:\sworkspace\weixinspider.py", line 160, in <module>
    prepare(preparation_data)
    File "D:\Python27\lib\multiprocessing\forking.py", line 495, in prepare
    main(url)
    File "D:\sworkspace\weixinspider.py", line 147, in main
    '__parents_main__', file, path_name, etc
    File "D:\sworkspace\weixinspider.py", line 160, in <module>
    w.start()
    File "D:\Python27\lib\multiprocessing\process.py", line 130, in start
    main(url)
    File "D:\sworkspace\weixinspider.py", line 147, in main
    self._popen = Popen(self)
    File "D:\Python27\lib\multiprocessing\forking.py", line 258, in __init__
    w.start()
    File "D:\Python27\lib\multiprocessing\process.py", line 130, in start
    cmd = get_command_line() + [rhandle]
    self._popen = Popen(self)
    File "D:\Python27\lib\multiprocessing\forking.py", line 358, in get_command_li
    ne
    File "D:\Python27\lib\multiprocessing\forking.py", line 258, in __init__
    is not going to be frozen to produce a Windows executable.''')
    RuntimeError: cmd = get_command_line() + [rhandle]

    File "D:\Python27\lib\multiprocessing\forking.py", line 358, in get_command_li
    ne
    Attempt to start a new process before the current process
    has finished its bootstrapping phase.

    This probably means that you are on Windows and you have
    forgotten to use the proper idiom in the main module:

    if __name__ == '__main__':
    freeze_support()
    ...

    The "freeze_support()" line can be omitted if the program
    is not going to be frozen to produce a Windows executable.
    is not going to be frozen to produce a Windows executable.''')
    RuntimeError:
    Attempt to start a new process before the current process
    has finished its bootstrapping phase.

    This probably means that you are on Windows and you have
    forgotten to use the proper idiom in the main module:

    if __name__ == '__main__':
    freeze_support()
    ...

    The "freeze_support()" line can be omitted if the program
    is not going to be frozen to produce a Windows executable.
    wangfeng3769
        3
    wangfeng3769  
    OP
       2014-08-04 16:35:22 +08:00
    求大仙们说一声。
    wingao
        4
    wingao  
       2014-08-04 18:02:21 +08:00
    if __name__ == '__main__':
    w = multiprocessing.Process(target=work, args=(url, ))
    nw = multiprocessing.Process(target=download, args=())
    w.start()
    nw.start()
    w.join()
    nw.join()
    wingao
        5
    wingao  
       2014-08-04 18:03:41 +08:00
    这个错误应该是你在其他的子进程里又开了进程
    把创建进程放到 if __name__ == '__main__' 下
    wangfeng3769
        6
    wangfeng3769  
    OP
       2014-08-05 15:53:10 +08:00
    #coding:utf-8
    import re
    import os
    import requests as R
    from BeautifulSoup import BeautifulSoup as BS
    import multiprocessing
    import urlparse
    import time
    opt = "Mozilla/5.0 (Linux; U; Android 4.1.2; zh-cn; GT-I9300 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 MicroMessenger/5.2.380"
    headers = {'User-Agent':opt}
    a,b = multiprocessing.Pipe()
    domain_url = "66365.m.weimob.com"
    G_queue_url = []
    G_spidered_url = []

    def is_existed(file_real_path):
    i=1
    while 1:
    if i==1:
    file_real_path_tem = file_real_path+'%s.htm'%""
    if os.path.isfile(file_real_path_tem):
    file_real_path_tem = file_real_path+'_%s.htm'%str(i)
    else:
    return file_real_path_tem

    i=i+1


    def get_web_page(url):
    try:
    r = R.get(url,headers=headers)
    html = r.text
    except:
    return None

    if html:
    return html
    else:
    return None

    def btree(O):
    if O:
    return BS(O,fromEncoding="utf-8")
    else:
    return None

    def download():
    url = "http://66365.m.weimob.com/weisite/home?pid=66365&bid=135666&wechatid=oAdrCtzBdLhgpyIOYtBNELkWXJ68&wxref=mp.weixin.qq.com"
    print 'download'
    checked_list = []

    while True:
    print 'I am busy'


    recv_data = b.recv()
    # recv_data = [url]
    # print recv_data
    if type(recv_data)!=type([]):
    if recv_data ==0:
    break

    for url in recv_data:
    print url
    if url in checked_list:
    # checked_list.append(url)
    continue
    else:
    checked_list.append(url)

    if re.search(domain_url,url):
    url_list = urlparse.urlparse(url)
    domain_folder = url_list[1]
    file_path = url_list.path
    real_path_r = os.path.sep.join([domain_folder,file_path])
    real_path_l = re.split(r'/|\\',real_path_r)
    # print real_path_l
    if len(real_path_l)==2:
    if not real_path_l[-1]:
    continue
    real_path_f = os.path.sep.join(real_path_l[0:-1])
    real_path_r = is_existed(real_path_r)
    try:
    if not os.path.exists(real_path_f) :
    os.makedirs(real_path_f)
    try:
    f = open(real_path_r,'w')
    except :
    open(real_path_r).close()
    f = open(real_path_r,'w')
    else:
    try:
    f = open(real_path_r,'w')
    except :
    open(real_path_r).close()
    f = open(real_path_r,'w')
    r = R.get(url)
    content = unicode(r.text).encode(r.encoding,'ignore')
    if not content:
    continue
    f.write(content)
    f.close()
    except:
    pass
    else:
    pass

    def get_links(html):
    soup = btree(html)
    # print soup
    if not soup:
    return []
    a_links = soup.findAll('a')
    if not a_links:
    return []
    link_list = []
    for link in a_links:
    # print link
    try:
    link = link.get('href')
    if not link:
    continue
    except:
    # print link
    continue

    if not re.search(domain_url,link) and not re.search('http', link):
    link_list.append("http://"+domain_url+link)
    return link_list

    def work(url):

    global G_spidered_url
    global G_queue_url
    # print G_spidered_url,G_queue_url
    G_spidered_url.append(url)
    html = get_web_page(url)
    all_links = get_links(html)
    send_list=[]
    if G_queue_url and all_links:
    for slink in all_links:
    if slink not in G_queue_url:
    send_list .append(slink)
    G_queue_url.append(slink)
    a.send(send_list)
    elif not G_queue_url and all_links :

    G_queue_url = all_links
    a.send(all_links)

    for url in G_queue_url:
    if url in G_spidered_url:
    continue
    else:
    G_spidered_url.append(url)
    work(url)
    a.send(0)

    def main(url):
    multiprocessing.freeze_support()
    lock = multiprocessing.Lock()
    w = multiprocessing.Process(target=work, args=(url, ))
    nw = multiprocessing.Process(target=download, args=())
    w.start()
    nw.start()
    w.join()
    nw.join()


    if __name__ == '__main__':
    url= "http://66365.m.weimob.com/weisite/home?pid=66365&bid=135666&wechatid=oAdrCtzBdLhgpyIOYtBNELkWXJ68&wxref=mp.weixin.qq.com"

    import sys
    try:
    url = sys.argv[1]
    except:
    print "You have to input a complete URL"
    # main(url)
    multiprocessing.freeze_support()
    lock = multiprocessing.Lock()
    w = multiprocessing.Process(target=work, args=(url, ))
    nw = multiprocessing.Process(target=download, args=())
    w.start()
    nw.start()
    w.join()
    nw.join()


    想说一下 在windows下无法运行download ,看一下怎么回事,专门扒人家网站的爬虫。希望copy下来试试。祝大家好运。
    wangfeng3769
        7
    wangfeng3769  
    OP
       2014-08-05 15:54:05 +08:00
    有点流氓但是呢 现在就这样了。
    wangfeng3769
        9
    wangfeng3769  
    OP
       2014-08-05 16:10:11 +08:00
    退出的时候有点小问题,不知道哪儿出了问题。
    关于   ·   帮助文档   ·   博客   ·   API   ·   FAQ   ·   实用小工具   ·   2635 人在线   最高记录 6679   ·     Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 · 31ms · UTC 10:52 · PVG 18:52 · LAX 02:52 · JFK 05:52
    Developed with CodeLauncher
    ♥ Do have faith in what you're doing.