diff --git a/humblebundle_downloader/cli.py b/humblebundle_downloader/cli.py index 2810a02..fe709ba 100644 --- a/humblebundle_downloader/cli.py +++ b/humblebundle_downloader/cli.py @@ -55,6 +55,23 @@ def parse_args(args): action='store_true', help="Display progress bar for downloads", ) + parser.add_argument( + '--verify', + action='store_true', + help="Verify downloaded files.", + ) + parser.add_argument( + '--verifyall', + action='store_true', + help=("Verify previously downloaded files, " + "including those that have already been verified."), + ) + parser.add_argument( + '--verifyonly', + action='store_true', + help=("Do not download new files, " + "only verify previously downloaded files."), + ) filter_ext = parser.add_mutually_exclusive_group() filter_ext.add_argument( '-e', '--exclude', @@ -92,4 +109,7 @@ def cli(): purchase_keys=cli_args.keys, trove=cli_args.trove, update=cli_args.update, + verify=cli_args.verify, + verifyall=cli_args.verifyall, + verifyonly=cli_args.verifyonly, ).start() diff --git a/humblebundle_downloader/download_library.py b/humblebundle_downloader/download_library.py index 4623f25..75d10d4 100644 --- a/humblebundle_downloader/download_library.py +++ b/humblebundle_downloader/download_library.py @@ -27,9 +27,12 @@ class DownloadLibrary: def __init__(self, library_path, cookie_path=None, cookie_auth=None, progress_bar=False, ext_include=None, ext_exclude=None, platform_include=None, purchase_keys=None, trove=False, - update=False): + update=False, verify=False, verifyall=False, verifyonly=False): self.library_path = library_path self.progress_bar = progress_bar + self.verify = verify + self.verifyall = verifyall + self.verifyonly = verifyonly self.ext_include = [] if ext_include is None else list(map(str.lower, ext_include)) # noqa: E501 self.ext_exclude = [] if ext_exclude is None else list(map(str.lower, ext_exclude)) # noqa: E501 @@ -63,14 +66,32 @@ class DownloadLibrary: self.cache_data = self._load_cache_data(self.cache_file) self.purchase_keys = self.purchase_keys if self.purchase_keys else self._get_purchase_keys() # noqa: E501 - if self.trove is True: - logger.info("Only checking the Humble Trove...") - for product in self._get_trove_products(): - title = _clean_name(product['human-name']) - self._process_trove_product(title, product) - else: - for order_id in self.purchase_keys: - self._process_order_id(order_id) + #if we are only verifying, determine if we need to get information from web. + self.need_verify_update = False + if self.verifyonly is True: + for cache_file_key in self.cache_data.keys(): + cache_purchase_key, cache_purchase_file = cache_file_key.split(":",1) + if self.trove is True: + if (not('local_filename_rel' in cache_data[cache_file_key])) and (cache_purchase_key == 'trove'): + self.need_verify_update=True + break + else: + if ((not('md5' in self.cache_data[cache_file_key])) or (not('local_filename_rel' in self.cache_data[cache_file_key]))) and (cache_purchase_key in self.purchase_keys): + self.need_verify_update=True + break + + if (self.verifyonly is False) or (self.need_verify_update is True): + if self.trove is True: + logger.info("Only checking the Humble Trove...") + for product in self._get_trove_products(): + title = _clean_name(product['human-name']) + self._process_trove_product(title, product) + else: + for order_id in self.purchase_keys: + self._process_order_id(order_id) + + if (self.verify is True) or (self.verifyonly is True) or (self.verifyall) is True: + self._verify_cache_files() def _get_trove_download_url(self, machine_name, web_name): try: @@ -119,9 +140,21 @@ class DownloadLibrary: or download.get('timestamp') or product.get('date_added', '0')), 'md5': download.get('md5', 'UNKNOWN_MD5'), + 'local_filename_rel': os.path.join('Humble Trove', title, web_name), } cache_file_info = self.cache_data.get(cache_file_key, {}) + #If we are in verifyonly mode, update local file location and md5 if needed, then go to next file. + if self.verifyonly is True: + if not(cache_file_info == {}): + if not('md5' in cache_file_info): + cache_file_info['md5']=file_info['md5'] + self._update_cache_data(cache_file_key,cache_file_info) + if not('local_filename_rel' in cache_file_info): + cache_file_info['local_filename_rel']=file_info['local_filename_rel'] + self._update_cache_data(cache_file_key,cache_file_info) + continue + if cache_file_info != {} and self.update is not True: # Do not care about checking for updates at this time continue @@ -265,6 +298,17 @@ class DownloadLibrary: local_filename = os.path.join(product_folder, url_filename) cache_file_info = self.cache_data.get(cache_file_key, {}) + #If we are in verifyonly mode, update local file location and md5 if needed, then go to next file. + if self.verifyonly is True: + if not(cache_file_info == {}): + if not('md5' in cache_file_info): + cache_file_info['md5']=file_type['md5'] + self._update_cache_data(cache_file_key,cache_file_info) + if not('local_filename_rel' in cache_file_info): + cache_file_info['local_filename_rel']=os.path.relpath(local_filename,self.library_path) + self._update_cache_data(cache_file_key,cache_file_info) + continue + if cache_file_info != {} and self.update is not True: # Do not care about checking for updates at this time continue @@ -288,7 +332,7 @@ class DownloadLibrary: .format(product_r=product_r, url=url)) file_info = { 'url_last_modified': product_r.headers['Last-Modified'], - 'web_md5': file_type['md5'], + 'md5': file_type['md5'], 'local_filename_rel': os.path.relpath(local_filename,self.library_path), } if file_info['url_last_modified'] != cache_file_info.get('url_last_modified'): # noqa: E501 @@ -345,9 +389,10 @@ class DownloadLibrary: if self.progress_bar: # Do not overwrite the progress bar on next print print() - if ( file_info['web_md5'] != file_info['file_md5'] ): - logger.warning("WARNING: Downloaded md5 mismatch in file {local_filename}\n Web md5:{web_md5}\n File md5:{file_md5}" - .format(local_filename=local_filename,web_md5=file_info['web_md5'],file_md5=file_info['file_md5'])) + if 'md5' in file_info: + if ( file_info['md5'] != file_info['file_md5'] ): + logger.warning("WARNING: Downloaded md5 mismatch in file {local_filename}\n Web md5:{md5}\n File md5:{file_md5}" + .format(local_filename=local_filename,md5=file_info['md5'],file_md5=file_info['file_md5'])) self._update_cache_data(cache_file_key, file_info) finally: @@ -420,3 +465,55 @@ class DownloadLibrary: elif self.ext_exclude != []: return ext not in self.ext_exclude return True + + def _verify_cache_files(self): + print ("Verifying downloaded files...") + for cache_file_key in self.cache_data.keys(): + cache_file_info = self.cache_data.get(cache_file_key, {}) + + #If file was already verified and verifyall is not set, skip to the next file. + if not(self.verifyall is True): + if 'verified' in cache_file_info: + if cache_file_info['verified']: + continue + + if 'local_filename_rel' in cache_file_info: + local_filename = os.path.join(self.library_path, cache_file_info['local_filename_rel']) + md5_hash = hashlib.md5() + try: + with open(local_filename,'rb') as f: + #Generate md5 for the file on disk. + for chunk in iter(lambda: f.read(4096), b''): + md5_hash.update(chunk) + + if 'file_md5' in cache_file_info: + #Check the md5 against the stored file md5 when we downloaded the file. + if not(cache_file_info['file_md5'] == md5_hash.hexdigest()): + logger.error("ERROR: Downloaded md5 mismatch in file {local_filename}\n Saved File md5:{file_md5}\n Current File md5:{current_md5}" + .format(local_filename=local_filename,file_md5=cache_file_info['file_md5'],current_md5=md5_hash.hexdigest())) + #continue on error so we won't mark the file as verified. + continue + else: + logger.info("Downloaded md5 matches in file {local_filename}\n Saved File md5:{file_md5}\n Current File md5:{current_md5}" + .format(local_filename=local_filename,file_md5=cache_file_info['file_md5'],current_md5=md5_hash.hexdigest())) + else: + #If md5 not stored when downloaded, store calculated md5 as the file md5. + logger.info("Created initial file md5 for file {local_filename}" + .format(local_filename=local_filename)) + cache_file_info['file_md5'] = md5_hash.hexdigest() + self._update_cache_data(cache_file_key,cache_file_info) + + #Warn if web md5 mismatches with file md5. + if 'md5' in cache_file_info: + if ( cache_file_info['md5'] != cache_file_info['file_md5'] ): + logger.warning("WARNING: Downloaded md5 mismatch in file {local_filename}\n Web md5:{md5}\n File md5:{file_md5}" + .format(local_filename=local_filename,md5=cache_file_info['md5'],file_md5=cache_file_info['file_md5'])) + + #Mark file as verified. + if not 'verified' in cache_file_info: + cache_file_info['verified']=True + self._update_cache_data(cache_file_key,cache_file_info) + + except FileNotFoundError: + logger.error("ERROR: Local file not found: {local_filename}" + .format(local_filename=local_filename))