GoFuckYourself.com - Adult Webmaster Forum - View Single Post - Tech Trying to make first python program, But get error.

adultmobile · 07-02-2016, 03:03 AM

Don't use socket but other libs more http-oriented. Also you seem to use python 2.. I use python 3 only since couple of years

I paste below some pieces of code (sorry for mess) that load stuff ok in python 3 also via cloudflare's:

def getSacImage(sacimageid, sacfolder):

# list here all the global variables accessed from getSacImage, or will consider local

sacimageidstr = str(sacimageid)

imageaddr = 'http://site.com/folder/' + sacimageidstr + '.jpg'
# print('imageaddr:' + imageaddr)

# should use WITH opener to be more clean...

opener = None # define it, so in exception case not to give UnboundLocalError: local variable 'opener' referenced before assignment

opener, errormesg = getGeneric(imageaddr)

if opener is None: # there was an error?
return sacimageid, errormesg # error message includes the URL

# End of Try block, urlopen worked if we are here

# detect type and length...
imgtype = opener.headers.get("Content-Type")
imgsize = opener.headers.get("Content-Length")

# print('Content-Type: ' + imgtype + ', Content-Length: ' + imgsize)

# to know if there is a new image or its still old, can compare the last-modified ( + content lenght optionally)
lastmod = opener.headers.get("Last-Modified")
# print('Last-Modified: ' + lastmod) # ex: Tue, 04 Aug 2009 17:47:52 GMT

local_file = None # define so exists if fails on try (or I get UnboundLocalError)
outpath = None # define so exists if fails on try (or I get UnboundLocalError)

# only copy if is a jpg
if (imgtype == 'image/jpeg'):

image_datetime_obj = datetime.strptime(lastmod, '%a, %d %b %Y %H:%M:%S %Z') # parse Last-Modified into time obj
# (note: %Z for time zone name like GMT; would need %z if UTC offset in the form +HHMM or -HHMM )
# note: time.strptime() returns different type than datetime.strptime() !

image_datetime_plusoneday = image_datetime_obj + timedelta(days=1) # create datetime set to image time + 1 day

if image_datetime_plusoneday < datetime.now(): # datetime obj + timedelta obj 1 day less than datetime now?
if opener is not None: opener.close()
return sacimageid,' image older than 24h, too old, skipped'

# prepare date to be ending file name after ID
filedatemod = lastmod[5:] # strip leading day
filedatemod = filedatemod[:-4] #strip ending GMT
filedatemod = filedatemod.replace(" ", "") # no spaces
filedatemod = filedatemod.replace(":", "") # no :
# print(filedatemod)

try:
outpath = os.path.join(sacfolder, sacimageidstr + '_' + filedatemod + '.jpg')
#print('path: ' + outpath)
except Exception as e:
print('Could not do OS path ', outpath, e.__class__, e) # print in shell
# print('Could not do OS path', outpath, e.__class__, e, file=mylogz) # print in log file if fail
if opener is not None: opener.close()
return sacimageid,' was a local file path error'

if(os.path.isfile(outpath)): # if file exists with same date... no need to re-download
if opener is not None: opener.close()
return sacimageid,' already got the file downloaded before' # not modified since

try:
#print("downloading " + imageaddr)
local_file = open(outpath, 'wb')
except Exception as e:
print('Could not open ', outpath, e.__class__, e) # print in shell
# print('Could not open', outpath, e.__class__, e, file=mylogz) # print in log file if fail
if local_file is not None: local_file.close()
if opener is not None: opener.close()
return sacimageid,' was a local file open error'

try:
local_file.write(opener.read())
except Exception as e:
print('Could not read from web or write to local file: ', outpath, e.__class__, e) # print in shell
if local_file is not None: local_file.close()
if opener is not None: opener.close()
return sacimageid,' was a local file write error'

opener.close()
local_file.close()
return sacimageid,' was an image'
#
else: # not an image...
opener.close()
return sacimageid,' not an image (what is that!?)'

*********

def getGeneric(openthisurl): # returns list includeid

# print('openthisurl:' + openthisurl)

opener = None # define it, so in exception case not to give UnboundLocalError: local variable 'opener' referenced before assignment

# https://docs.python.org/3.3/library/...urllib.request
# For http and https urls, this function returns a http.client.HTTPResponse
# https://docs.python.org/3.3/library/...sponse-objects

# without a timeout, at the read() later, it can keep hanged forever (not even ctrl+c works... on windows)
# The default timeout for urllib2 is infinite
# By default the socket module has no timeout and can hang. Currently, the socket timeout is not
# exposed at the httplib or urllib2 levels. However, you can set the default timeout globally for all sockets
mysockettimeout = 10 # used both in socket.setdefaulttimeout() below and urllib.request.urlopen() later

# To set the socket timeout (global to all functions) is not necessary, enoigh to set a timeout on urlopen ? (to be tested)
# socket.setdefaulttimeout(mysockettimeout) # set the global socket timeout, in seconds, for all users of the socket module

try:
# urllib.request.urlopen(url, data=None[, timeout], *, cafile=None, capath=None, cadefault=True)
# returns http.client.HTTPResponse a file-like object that works as a context manager, plus .info() return the meta-information of the page
# unconfirmed: seems that a timeout set in the urlopen() call also effects the read() call:
opener = urllib.request.urlopen(openthisurl, timeout=mysockettimeout) # if no timeout may be infinite (if no socket.setdefaulttimeout() set)
# could also do in 2 steps, req = request, then req.urlopen(), but we do at once
except HTTPError as e:
if e.code == 404: # if image not found , can be this is not a model ID at all, or, is model ID but image deleted
if opener is not None: opener.close() # normally opener is None if we are here, but let's be 101% sure and if not none, close() it
return None, openthisurl + ' = 404 not found error' # check if got bio, if not, is not model so remove ID from next loop
else: # for example 503 service temporary unavailable
print('Could not urlretrieve HTTPError', openthisurl, e.__class__, e, e.code) # print in shell
if opener is not None: opener.close() # normally opener is None if we are here, but let's be 101% sure and if not none, close() it
return None, openthisurl + ' was an HTTPError: ' + str(e.code)
# here catches also: urllib.error.HTTPError: HTTP Error 404: Not Found
# print('Could not urlretrieve ', openthisurl, e.__class__, e, file=mylogz) # print in log file if fail
except URLError as e:
print('Could not urlretrieve URLError', openthisurl, e.__class__, e, e.args) # print in shell
if hasattr(e, 'reason'): # <--
print('We failed to reach a server. Reason: ', e.reason)
elif hasattr(e, 'code'): # <--
print('The server could not fulfill the request. Error code: ', e.code)
# print('Could not urlretrieve ', openthisurl, e.__class__, e, file=mylogz) # print in log file if fail
if opener is not None: opener.close() # normally opener is None if we are here, but let's be 101% sure and if not none, close() it
return None, openthisurl + ' was an url error'
except Exception as e:
print('Could not urlretrieve: ', openthisurl, e.__class__, e) # print in shell
if opener is not None: opener.close() # normally opener is None if we are here, but let's be 101% sure and if not none, close() it
return None, openthisurl + ' was an error'

# ok if we are here
return opener, openthisurl # pass the urlopen obj instead of None

######## end of getBusyList()

*********

Also you may want to multithread your thing to make it run fast in a loop (or it really never ends as sequential):

# with futures.ProcessPoolExecutor(max_workers=16) as executorxx: # needs if __name__ == '__main__' check...
with futures.ThreadPoolExecutor(max_workers=16) as executorxx:

for xmodelid in includeids: # run for fish new accounts

# note, the order of completion may be not same range serie! Some http call can take more, less or even timeout
# submit(fn, *args, **kwargs) Schedules the callable, fn, to be executed as fn(*args **kwargs) and returns a Future object
futurejobz = executorxx.submit(getSacImage, xmodelid, sacfolder) # same as: getSacImage(xmodelid, sacfolder)

# add_done_callback(fn): fn will be called when the future is cancelled or finishes running.
futurejobz.add_done_callback(printresults) # called at end of this job, replace: printresults(xmodelid, sacresult)

print("Time End Loop: " + time.strftime("%H:%M:%S")) # print time and date

07-02-2016, 03:03 AM
adultmobile No, I am not banned Industry Role: Join Date: Nov 2003 Location: ChatGF.com Posts: 5,345	Don't use socket but other libs more http-oriented. Also you seem to use python 2.. I use python 3 only since couple of years I paste below some pieces of code (sorry for mess) that load stuff ok in python 3 also via cloudflare's: def getSacImage(sacimageid, sacfolder): # list here all the global variables accessed from getSacImage, or will consider local sacimageidstr = str(sacimageid) imageaddr = 'http://site.com/folder/' + sacimageidstr + '.jpg' # print('imageaddr:' + imageaddr) # should use WITH opener to be more clean... opener = None # define it, so in exception case not to give UnboundLocalError: local variable 'opener' referenced before assignment opener, errormesg = getGeneric(imageaddr) if opener is None: # there was an error? return sacimageid, errormesg # error message includes the URL # End of Try block, urlopen worked if we are here # detect type and length... imgtype = opener.headers.get("Content-Type") imgsize = opener.headers.get("Content-Length") # print('Content-Type: ' + imgtype + ', Content-Length: ' + imgsize) # to know if there is a new image or its still old, can compare the last-modified ( + content lenght optionally) lastmod = opener.headers.get("Last-Modified") # print('Last-Modified: ' + lastmod) # ex: Tue, 04 Aug 2009 17:47:52 GMT local_file = None # define so exists if fails on try (or I get UnboundLocalError) outpath = None # define so exists if fails on try (or I get UnboundLocalError) # only copy if is a jpg if (imgtype == 'image/jpeg'): image_datetime_obj = datetime.strptime(lastmod, '%a, %d %b %Y %H:%M:%S %Z') # parse Last-Modified into time obj # (note: %Z for time zone name like GMT; would need %z if UTC offset in the form +HHMM or -HHMM ) # note: time.strptime() returns different type than datetime.strptime() ! image_datetime_plusoneday = image_datetime_obj + timedelta(days=1) # create datetime set to image time + 1 day if image_datetime_plusoneday < datetime.now(): # datetime obj + timedelta obj 1 day less than datetime now? if opener is not None: opener.close() return sacimageid,' image older than 24h, too old, skipped' # prepare date to be ending file name after ID filedatemod = lastmod[5:] # strip leading day filedatemod = filedatemod[:-4] #strip ending GMT filedatemod = filedatemod.replace(" ", "") # no spaces filedatemod = filedatemod.replace(":", "") # no : # print(filedatemod) try: outpath = os.path.join(sacfolder, sacimageidstr + '_' + filedatemod + '.jpg') #print('path: ' + outpath) except Exception as e: print('Could not do OS path ', outpath, e.__class__, e) # print in shell # print('Could not do OS path', outpath, e.__class__, e, file=mylogz) # print in log file if fail if opener is not None: opener.close() return sacimageid,' was a local file path error' if(os.path.isfile(outpath)): # if file exists with same date... no need to re-download if opener is not None: opener.close() return sacimageid,' already got the file downloaded before' # not modified since try: #print("downloading " + imageaddr) local_file = open(outpath, 'wb') except Exception as e: print('Could not open ', outpath, e.__class__, e) # print in shell # print('Could not open', outpath, e.__class__, e, file=mylogz) # print in log file if fail if local_file is not None: local_file.close() if opener is not None: opener.close() return sacimageid,' was a local file open error' try: local_file.write(opener.read()) except Exception as e: print('Could not read from web or write to local file: ', outpath, e.__class__, e) # print in shell if local_file is not None: local_file.close() if opener is not None: opener.close() return sacimageid,' was a local file write error' opener.close() local_file.close() return sacimageid,' was an image' # else: # not an image... opener.close() return sacimageid,' not an image (what is that!?)' ********* def getGeneric(openthisurl): # returns list includeid # print('openthisurl:' + openthisurl) opener = None # define it, so in exception case not to give UnboundLocalError: local variable 'opener' referenced before assignment # https://docs.python.org/3.3/library/...urllib.request # For http and https urls, this function returns a http.client.HTTPResponse # https://docs.python.org/3.3/library/...sponse-objects # without a timeout, at the read() later, it can keep hanged forever (not even ctrl+c works... on windows) # The default timeout for urllib2 is infinite # By default the socket module has no timeout and can hang. Currently, the socket timeout is not # exposed at the httplib or urllib2 levels. However, you can set the default timeout globally for all sockets mysockettimeout = 10 # used both in socket.setdefaulttimeout() below and urllib.request.urlopen() later # To set the socket timeout (global to all functions) is not necessary, enoigh to set a timeout on urlopen ? (to be tested) # socket.setdefaulttimeout(mysockettimeout) # set the global socket timeout, in seconds, for all users of the socket module try: # urllib.request.urlopen(url, data=None[, timeout], , cafile=None, capath=None, cadefault=True) # returns http.client.HTTPResponse a file-like object that works as a context manager, plus .info() return the meta-information of the page # unconfirmed: seems that a timeout set in the urlopen() call also effects the read() call: opener = urllib.request.urlopen(openthisurl, timeout=mysockettimeout) # if no timeout may be infinite (if no socket.setdefaulttimeout() set) # could also do in 2 steps, req = request, then req.urlopen(), but we do at once except HTTPError as e: if e.code == 404: # if image not found , can be this is not a model ID at all, or, is model ID but image deleted if opener is not None: opener.close() # normally opener is None if we are here, but let's be 101% sure and if not none, close() it return None, openthisurl + ' = 404 not found error' # check if got bio, if not, is not model so remove ID from next loop else: # for example 503 service temporary unavailable print('Could not urlretrieve HTTPError', openthisurl, e.__class__, e, e.code) # print in shell if opener is not None: opener.close() # normally opener is None if we are here, but let's be 101% sure and if not none, close() it return None, openthisurl + ' was an HTTPError: ' + str(e.code) # here catches also: urllib.error.HTTPError: HTTP Error 404: Not Found # print('Could not urlretrieve ', openthisurl, e.__class__, e, file=mylogz) # print in log file if fail except URLError as e: print('Could not urlretrieve URLError', openthisurl, e.__class__, e, e.args) # print in shell if hasattr(e, 'reason'): # <-- print('We failed to reach a server. Reason: ', e.reason) elif hasattr(e, 'code'): # <-- print('The server could not fulfill the request. Error code: ', e.code) # print('Could not urlretrieve ', openthisurl, e.__class__, e, file=mylogz) # print in log file if fail if opener is not None: opener.close() # normally opener is None if we are here, but let's be 101% sure and if not none, close() it return None, openthisurl + ' was an url error' except Exception as e: print('Could not urlretrieve: ', openthisurl, e.__class__, e) # print in shell if opener is not None: opener.close() # normally opener is None if we are here, but let's be 101% sure and if not none, close() it return None, openthisurl + ' was an error' # ok if we are here return opener, openthisurl # pass the urlopen obj instead of None ######## end of getBusyList() ******** Also you may want to multithread your thing to make it run fast in a loop (or it really never ends as sequential): # with futures.ProcessPoolExecutor(max_workers=16) as executorxx: # needs if __name__ == '__main__' check... with futures.ThreadPoolExecutor(max_workers=16) as executorxx: for xmodelid in includeids: # run for fish new accounts # note, the order of completion may be not same range serie! Some http call can take more, less or even timeout # submit(fn, args, kwargs) Schedules the callable, fn, to be executed as fn(args **kwargs) and returns a Future object futurejobz = executorxx.submit(getSacImage, xmodelid, sacfolder) # same as: getSacImage(xmodelid, sacfolder) # add_done_callback(fn): fn will be called when the future is cancelled or finishes running. futurejobz.add_done_callback(printresults) # called at end of this job, replace: printresults(xmodelid, sacresult) print("Time End Loop: " + time.strftime("%H:%M:%S")) # print time and date __________________ TubeCamGirl.com