mirror of
https://github.com/xtream1101/humblebundle-downloader.git
synced 2024-11-20 08:49:17 +01:00
Add support for command line options to verify downloaded files.
This commit is contained in:
parent
b99a903434
commit
d3f4fa145c
@ -55,6 +55,23 @@ def parse_args(args):
|
||||
action='store_true',
|
||||
help="Display progress bar for downloads",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--verify',
|
||||
action='store_true',
|
||||
help="Verify downloaded files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--verifyall',
|
||||
action='store_true',
|
||||
help=("Verify previously downloaded files, "
|
||||
"including those that have already been verified."),
|
||||
)
|
||||
parser.add_argument(
|
||||
'--verifyonly',
|
||||
action='store_true',
|
||||
help=("Do not download new files, "
|
||||
"only verify previously downloaded files."),
|
||||
)
|
||||
filter_ext = parser.add_mutually_exclusive_group()
|
||||
filter_ext.add_argument(
|
||||
'-e', '--exclude',
|
||||
@ -92,4 +109,7 @@ def cli():
|
||||
purchase_keys=cli_args.keys,
|
||||
trove=cli_args.trove,
|
||||
update=cli_args.update,
|
||||
verify=cli_args.verify,
|
||||
verifyall=cli_args.verifyall,
|
||||
verifyonly=cli_args.verifyonly,
|
||||
).start()
|
||||
|
@ -27,9 +27,12 @@ class DownloadLibrary:
|
||||
def __init__(self, library_path, cookie_path=None, cookie_auth=None,
|
||||
progress_bar=False, ext_include=None, ext_exclude=None,
|
||||
platform_include=None, purchase_keys=None, trove=False,
|
||||
update=False):
|
||||
update=False, verify=False, verifyall=False, verifyonly=False):
|
||||
self.library_path = library_path
|
||||
self.progress_bar = progress_bar
|
||||
self.verify = verify
|
||||
self.verifyall = verifyall
|
||||
self.verifyonly = verifyonly
|
||||
self.ext_include = [] if ext_include is None else list(map(str.lower, ext_include)) # noqa: E501
|
||||
self.ext_exclude = [] if ext_exclude is None else list(map(str.lower, ext_exclude)) # noqa: E501
|
||||
|
||||
@ -63,14 +66,32 @@ class DownloadLibrary:
|
||||
self.cache_data = self._load_cache_data(self.cache_file)
|
||||
self.purchase_keys = self.purchase_keys if self.purchase_keys else self._get_purchase_keys() # noqa: E501
|
||||
|
||||
if self.trove is True:
|
||||
logger.info("Only checking the Humble Trove...")
|
||||
for product in self._get_trove_products():
|
||||
title = _clean_name(product['human-name'])
|
||||
self._process_trove_product(title, product)
|
||||
else:
|
||||
for order_id in self.purchase_keys:
|
||||
self._process_order_id(order_id)
|
||||
#if we are only verifying, determine if we need to get information from web.
|
||||
self.need_verify_update = False
|
||||
if self.verifyonly is True:
|
||||
for cache_file_key in self.cache_data.keys():
|
||||
cache_purchase_key, cache_purchase_file = cache_file_key.split(":",1)
|
||||
if self.trove is True:
|
||||
if (not('local_filename_rel' in cache_data[cache_file_key])) and (cache_purchase_key == 'trove'):
|
||||
self.need_verify_update=True
|
||||
break
|
||||
else:
|
||||
if ((not('md5' in self.cache_data[cache_file_key])) or (not('local_filename_rel' in self.cache_data[cache_file_key]))) and (cache_purchase_key in self.purchase_keys):
|
||||
self.need_verify_update=True
|
||||
break
|
||||
|
||||
if (self.verifyonly is False) or (self.need_verify_update is True):
|
||||
if self.trove is True:
|
||||
logger.info("Only checking the Humble Trove...")
|
||||
for product in self._get_trove_products():
|
||||
title = _clean_name(product['human-name'])
|
||||
self._process_trove_product(title, product)
|
||||
else:
|
||||
for order_id in self.purchase_keys:
|
||||
self._process_order_id(order_id)
|
||||
|
||||
if (self.verify is True) or (self.verifyonly is True) or (self.verifyall) is True:
|
||||
self._verify_cache_files()
|
||||
|
||||
def _get_trove_download_url(self, machine_name, web_name):
|
||||
try:
|
||||
@ -119,9 +140,21 @@ class DownloadLibrary:
|
||||
or download.get('timestamp')
|
||||
or product.get('date_added', '0')),
|
||||
'md5': download.get('md5', 'UNKNOWN_MD5'),
|
||||
'local_filename_rel': os.path.join('Humble Trove', title, web_name),
|
||||
}
|
||||
cache_file_info = self.cache_data.get(cache_file_key, {})
|
||||
|
||||
#If we are in verifyonly mode, update local file location and md5 if needed, then go to next file.
|
||||
if self.verifyonly is True:
|
||||
if not(cache_file_info == {}):
|
||||
if not('md5' in cache_file_info):
|
||||
cache_file_info['md5']=file_info['md5']
|
||||
self._update_cache_data(cache_file_key,cache_file_info)
|
||||
if not('local_filename_rel' in cache_file_info):
|
||||
cache_file_info['local_filename_rel']=file_info['local_filename_rel']
|
||||
self._update_cache_data(cache_file_key,cache_file_info)
|
||||
continue
|
||||
|
||||
if cache_file_info != {} and self.update is not True:
|
||||
# Do not care about checking for updates at this time
|
||||
continue
|
||||
@ -265,6 +298,17 @@ class DownloadLibrary:
|
||||
local_filename = os.path.join(product_folder, url_filename)
|
||||
cache_file_info = self.cache_data.get(cache_file_key, {})
|
||||
|
||||
#If we are in verifyonly mode, update local file location and md5 if needed, then go to next file.
|
||||
if self.verifyonly is True:
|
||||
if not(cache_file_info == {}):
|
||||
if not('md5' in cache_file_info):
|
||||
cache_file_info['md5']=file_type['md5']
|
||||
self._update_cache_data(cache_file_key,cache_file_info)
|
||||
if not('local_filename_rel' in cache_file_info):
|
||||
cache_file_info['local_filename_rel']=os.path.relpath(local_filename,self.library_path)
|
||||
self._update_cache_data(cache_file_key,cache_file_info)
|
||||
continue
|
||||
|
||||
if cache_file_info != {} and self.update is not True:
|
||||
# Do not care about checking for updates at this time
|
||||
continue
|
||||
@ -288,7 +332,7 @@ class DownloadLibrary:
|
||||
.format(product_r=product_r, url=url))
|
||||
file_info = {
|
||||
'url_last_modified': product_r.headers['Last-Modified'],
|
||||
'web_md5': file_type['md5'],
|
||||
'md5': file_type['md5'],
|
||||
'local_filename_rel': os.path.relpath(local_filename,self.library_path),
|
||||
}
|
||||
if file_info['url_last_modified'] != cache_file_info.get('url_last_modified'): # noqa: E501
|
||||
@ -345,9 +389,10 @@ class DownloadLibrary:
|
||||
if self.progress_bar:
|
||||
# Do not overwrite the progress bar on next print
|
||||
print()
|
||||
if ( file_info['web_md5'] != file_info['file_md5'] ):
|
||||
logger.warning("WARNING: Downloaded md5 mismatch in file {local_filename}\n Web md5:{web_md5}\n File md5:{file_md5}"
|
||||
.format(local_filename=local_filename,web_md5=file_info['web_md5'],file_md5=file_info['file_md5']))
|
||||
if 'md5' in file_info:
|
||||
if ( file_info['md5'] != file_info['file_md5'] ):
|
||||
logger.warning("WARNING: Downloaded md5 mismatch in file {local_filename}\n Web md5:{md5}\n File md5:{file_md5}"
|
||||
.format(local_filename=local_filename,md5=file_info['md5'],file_md5=file_info['file_md5']))
|
||||
self._update_cache_data(cache_file_key, file_info)
|
||||
|
||||
finally:
|
||||
@ -420,3 +465,55 @@ class DownloadLibrary:
|
||||
elif self.ext_exclude != []:
|
||||
return ext not in self.ext_exclude
|
||||
return True
|
||||
|
||||
def _verify_cache_files(self):
|
||||
print ("Verifying downloaded files...")
|
||||
for cache_file_key in self.cache_data.keys():
|
||||
cache_file_info = self.cache_data.get(cache_file_key, {})
|
||||
|
||||
#If file was already verified and verifyall is not set, skip to the next file.
|
||||
if not(self.verifyall is True):
|
||||
if 'verified' in cache_file_info:
|
||||
if cache_file_info['verified']:
|
||||
continue
|
||||
|
||||
if 'local_filename_rel' in cache_file_info:
|
||||
local_filename = os.path.join(self.library_path, cache_file_info['local_filename_rel'])
|
||||
md5_hash = hashlib.md5()
|
||||
try:
|
||||
with open(local_filename,'rb') as f:
|
||||
#Generate md5 for the file on disk.
|
||||
for chunk in iter(lambda: f.read(4096), b''):
|
||||
md5_hash.update(chunk)
|
||||
|
||||
if 'file_md5' in cache_file_info:
|
||||
#Check the md5 against the stored file md5 when we downloaded the file.
|
||||
if not(cache_file_info['file_md5'] == md5_hash.hexdigest()):
|
||||
logger.error("ERROR: Downloaded md5 mismatch in file {local_filename}\n Saved File md5:{file_md5}\n Current File md5:{current_md5}"
|
||||
.format(local_filename=local_filename,file_md5=cache_file_info['file_md5'],current_md5=md5_hash.hexdigest()))
|
||||
#continue on error so we won't mark the file as verified.
|
||||
continue
|
||||
else:
|
||||
logger.info("Downloaded md5 matches in file {local_filename}\n Saved File md5:{file_md5}\n Current File md5:{current_md5}"
|
||||
.format(local_filename=local_filename,file_md5=cache_file_info['file_md5'],current_md5=md5_hash.hexdigest()))
|
||||
else:
|
||||
#If md5 not stored when downloaded, store calculated md5 as the file md5.
|
||||
logger.info("Created initial file md5 for file {local_filename}"
|
||||
.format(local_filename=local_filename))
|
||||
cache_file_info['file_md5'] = md5_hash.hexdigest()
|
||||
self._update_cache_data(cache_file_key,cache_file_info)
|
||||
|
||||
#Warn if web md5 mismatches with file md5.
|
||||
if 'md5' in cache_file_info:
|
||||
if ( cache_file_info['md5'] != cache_file_info['file_md5'] ):
|
||||
logger.warning("WARNING: Downloaded md5 mismatch in file {local_filename}\n Web md5:{md5}\n File md5:{file_md5}"
|
||||
.format(local_filename=local_filename,md5=cache_file_info['md5'],file_md5=cache_file_info['file_md5']))
|
||||
|
||||
#Mark file as verified.
|
||||
if not 'verified' in cache_file_info:
|
||||
cache_file_info['verified']=True
|
||||
self._update_cache_data(cache_file_key,cache_file_info)
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.error("ERROR: Local file not found: {local_filename}"
|
||||
.format(local_filename=local_filename))
|
||||
|
Loading…
Reference in New Issue
Block a user