diff --git a/Modules/vdf/LICENSE b/Modules/vdf/LICENSE new file mode 100644 index 0000000..ee59795 --- /dev/null +++ b/Modules/vdf/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2015 Rossen Georgiev + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Modules/vdf/__init__.py b/Modules/vdf/__init__.py index 8b13789..8b64d5d 100644 --- a/Modules/vdf/__init__.py +++ b/Modules/vdf/__init__.py @@ -1 +1,519 @@ +""" +Module for deserializing/serializing to and from VDF +""" +__version__ = "3.4" +__author__ = "Rossen Georgiev" +import re +import sys +import struct +from binascii import crc32 +from io import BytesIO +from io import StringIO as unicodeIO + +try: + from collections.abc import Mapping +except: + from collections import Mapping + +# Py2 & Py3 compatibility +if sys.version_info[0] >= 3: + string_type = str + int_type = int + BOMS = '\ufffe\ufeff' + + def strip_bom(line): + return line.lstrip(BOMS) +else: + from StringIO import StringIO as strIO + string_type = basestring + int_type = long + BOMS = '\xef\xbb\xbf\xff\xfe\xfe\xff' + BOMS_UNICODE = '\\ufffe\\ufeff'.decode('unicode-escape') + + def strip_bom(line): + return line.lstrip(BOMS if isinstance(line, str) else BOMS_UNICODE) + +# string escaping +_unescape_char_map = { + r"\n": "\n", + r"\t": "\t", + r"\v": "\v", + r"\b": "\b", + r"\r": "\r", + r"\f": "\f", + r"\a": "\a", + r"\\": "\\", + r"\?": "?", + r"\"": "\"", + r"\'": "\'", +} +_escape_char_map = {v: k for k, v in _unescape_char_map.items()} + +def _re_escape_match(m): + return _escape_char_map[m.group()] + +def _re_unescape_match(m): + return _unescape_char_map[m.group()] + +def _escape(text): + return re.sub(r"[\n\t\v\b\r\f\a\\\?\"']", _re_escape_match, text) + +def _unescape(text): + return re.sub(r"(\\n|\\t|\\v|\\b|\\r|\\f|\\a|\\\\|\\\?|\\\"|\\')", _re_unescape_match, text) + +# parsing and dumping for KV1 +def parse(fp, mapper=dict, merge_duplicate_keys=True, escaped=True): + """ + Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a VDF) + to a Python object. + + ``mapper`` specifies the Python object used after deserializetion. ``dict` is + used by default. Alternatively, ``collections.OrderedDict`` can be used if you + wish to preserve key order. Or any object that acts like a ``dict``. + + ``merge_duplicate_keys`` when ``True`` will merge multiple KeyValue lists with the + same key into one instead of overwriting. You can se this to ``False`` if you are + using ``VDFDict`` and need to preserve the duplicates. + """ + if not issubclass(mapper, Mapping): + raise TypeError("Expected mapper to be subclass of dict, got %s" % type(mapper)) + if not hasattr(fp, 'readline'): + raise TypeError("Expected fp to be a file-like object supporting line iteration") + + stack = [mapper()] + expect_bracket = False + + re_keyvalue = re.compile(r'^("(?P(?:\\.|[^\\"])*)"|(?P#?[a-z0-9\-\_\\\?$%<>]+))' + r'([ \t]*(' + r'"(?P(?:\\.|[^\\"])*)(?P")?' + r'|(?P(?:(? ])+)' + r'|(?P{[ \t]*)(?P})?' + r'))?', + flags=re.I) + + for lineno, line in enumerate(fp, 1): + if lineno == 1: + line = strip_bom(line) + + line = line.lstrip() + + # skip empty and comment lines + if line == "" or line[0] == '/': + continue + + # one level deeper + if line[0] == "{": + expect_bracket = False + continue + + if expect_bracket: + raise SyntaxError("vdf.parse: expected openning bracket", + (getattr(fp, 'name', '<%s>' % fp.__class__.__name__), lineno, 1, line)) + + # one level back + if line[0] == "}": + if len(stack) > 1: + stack.pop() + continue + + raise SyntaxError("vdf.parse: one too many closing parenthasis", + (getattr(fp, 'name', '<%s>' % fp.__class__.__name__), lineno, 0, line)) + + # parse keyvalue pairs + while True: + match = re_keyvalue.match(line) + + if not match: + try: + line += next(fp) + continue + except StopIteration: + raise SyntaxError("vdf.parse: unexpected EOF (open key quote?)", + (getattr(fp, 'name', '<%s>' % fp.__class__.__name__), lineno, 0, line)) + + key = match.group('key') if match.group('qkey') is None else match.group('qkey') + val = match.group('qval') + if val is None: + val = match.group('val') + if val is not None: + val = val.rstrip() + if val == "": + val = None + + if escaped: + key = _unescape(key) + + # we have a key with value in parenthesis, so we make a new dict obj (level deeper) + if val is None: + if merge_duplicate_keys and key in stack[-1]: + _m = stack[-1][key] + # we've descended a level deeper, if value is str, we have to overwrite it to mapper + if not isinstance(_m, mapper): + _m = stack[-1][key] = mapper() + else: + _m = mapper() + stack[-1][key] = _m + + if match.group('eblock') is None: + # only expect a bracket if it's not already closed or on the same line + stack.append(_m) + if match.group('sblock') is None: + expect_bracket = True + + # we've matched a simple keyvalue pair, map it to the last dict obj in the stack + else: + # if the value is line consume one more line and try to match again, + # until we get the KeyValue pair + if match.group('vq_end') is None and match.group('qval') is not None: + try: + line += next(fp) + continue + except StopIteration: + raise SyntaxError("vdf.parse: unexpected EOF (open quote for value?)", + (getattr(fp, 'name', '<%s>' % fp.__class__.__name__), lineno, 0, line)) + + stack[-1][key] = _unescape(val) if escaped else val + + # exit the loop + break + + if len(stack) != 1: + raise SyntaxError("vdf.parse: unclosed parenthasis or quotes (EOF)", + (getattr(fp, 'name', '<%s>' % fp.__class__.__name__), lineno, 0, line)) + + return stack.pop() + + +def loads(s, **kwargs): + """ + Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + """ + if not isinstance(s, string_type): + raise TypeError("Expected s to be a str, got %s" % type(s)) + + try: + fp = unicodeIO(s) + except TypeError: + fp = strIO(s) + + return parse(fp, **kwargs) + + +def load(fp, **kwargs): + """ + Deserialize ``fp`` (a ``.readline()``-supporting file-like object containing + a JSON document) to a Python object. + """ + return parse(fp, **kwargs) + + +def dumps(obj, pretty=False, escaped=True): + """ + Serialize ``obj`` to a VDF formatted ``str``. + """ + if not isinstance(obj, Mapping): + raise TypeError("Expected data to be an instance of``dict``") + if not isinstance(pretty, bool): + raise TypeError("Expected pretty to be of type bool") + if not isinstance(escaped, bool): + raise TypeError("Expected escaped to be of type bool") + + return ''.join(_dump_gen(obj, pretty, escaped)) + + +def dump(obj, fp, pretty=False, escaped=True): + """ + Serialize ``obj`` as a VDF formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + """ + if not isinstance(obj, Mapping): + raise TypeError("Expected data to be an instance of``dict``") + if not hasattr(fp, 'write'): + raise TypeError("Expected fp to have write() method") + if not isinstance(pretty, bool): + raise TypeError("Expected pretty to be of type bool") + if not isinstance(escaped, bool): + raise TypeError("Expected escaped to be of type bool") + + for chunk in _dump_gen(obj, pretty, escaped): + fp.write(chunk) + + +def _dump_gen(data, pretty=False, escaped=True, level=0): + indent = "\t" + line_indent = "" + + if pretty: + line_indent = indent * level + + for key, value in data.items(): + if escaped and isinstance(key, string_type): + key = _escape(key) + + if isinstance(value, Mapping): + yield '%s"%s"\n%s{\n' % (line_indent, key, line_indent) + for chunk in _dump_gen(value, pretty, escaped, level+1): + yield chunk + yield "%s}\n" % line_indent + else: + if escaped and isinstance(value, string_type): + value = _escape(value) + + yield '%s"%s" "%s"\n' % (line_indent, key, value) + + +# binary VDF +class BASE_INT(int_type): + def __repr__(self): + return "%s(%d)" % (self.__class__.__name__, self) + +class UINT_64(BASE_INT): + pass + +class INT_64(BASE_INT): + pass + +class POINTER(BASE_INT): + pass + +class COLOR(BASE_INT): + pass + +BIN_NONE = b'\x00' +BIN_STRING = b'\x01' +BIN_INT32 = b'\x02' +BIN_FLOAT32 = b'\x03' +BIN_POINTER = b'\x04' +BIN_WIDESTRING = b'\x05' +BIN_COLOR = b'\x06' +BIN_UINT64 = b'\x07' +BIN_END = b'\x08' +BIN_INT64 = b'\x0A' +BIN_END_ALT = b'\x0B' + +def binary_loads(b, mapper=dict, merge_duplicate_keys=True, alt_format=False, raise_on_remaining=True): + """ + Deserialize ``b`` (``bytes`` containing a VDF in "binary form") + to a Python object. + + ``mapper`` specifies the Python object used after deserializetion. ``dict` is + used by default. Alternatively, ``collections.OrderedDict`` can be used if you + wish to preserve key order. Or any object that acts like a ``dict``. + + ``merge_duplicate_keys`` when ``True`` will merge multiple KeyValue lists with the + same key into one instead of overwriting. You can se this to ``False`` if you are + using ``VDFDict`` and need to preserve the duplicates. + """ + if not isinstance(b, bytes): + raise TypeError("Expected s to be bytes, got %s" % type(b)) + + return binary_load(BytesIO(b), mapper, merge_duplicate_keys, alt_format, raise_on_remaining) + +def binary_load(fp, mapper=dict, merge_duplicate_keys=True, alt_format=False, raise_on_remaining=False): + """ + Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + binary VDF) to a Python object. + + ``mapper`` specifies the Python object used after deserializetion. ``dict` is + used by default. Alternatively, ``collections.OrderedDict`` can be used if you + wish to preserve key order. Or any object that acts like a ``dict``. + + ``merge_duplicate_keys`` when ``True`` will merge multiple KeyValue lists with the + same key into one instead of overwriting. You can se this to ``False`` if you are + using ``VDFDict`` and need to preserve the duplicates. + """ + if not hasattr(fp, 'read') or not hasattr(fp, 'tell') or not hasattr(fp, 'seek'): + raise TypeError("Expected fp to be a file-like object with tell()/seek() and read() returning bytes") + if not issubclass(mapper, Mapping): + raise TypeError("Expected mapper to be subclass of dict, got %s" % type(mapper)) + + # helpers + int32 = struct.Struct(' 1: + stack.pop() + continue + break + + key = read_string(fp) + + if t == BIN_NONE: + if merge_duplicate_keys and key in stack[-1]: + _m = stack[-1][key] + else: + _m = mapper() + stack[-1][key] = _m + stack.append(_m) + elif t == BIN_STRING: + stack[-1][key] = read_string(fp) + elif t == BIN_WIDESTRING: + stack[-1][key] = read_string(fp, wide=True) + elif t in (BIN_INT32, BIN_POINTER, BIN_COLOR): + val = int32.unpack(fp.read(int32.size))[0] + + if t == BIN_POINTER: + val = POINTER(val) + elif t == BIN_COLOR: + val = COLOR(val) + + stack[-1][key] = val + elif t == BIN_UINT64: + stack[-1][key] = UINT_64(uint64.unpack(fp.read(int64.size))[0]) + elif t == BIN_INT64: + stack[-1][key] = INT_64(int64.unpack(fp.read(int64.size))[0]) + elif t == BIN_FLOAT32: + stack[-1][key] = float32.unpack(fp.read(float32.size))[0] + else: + raise SyntaxError("Unknown data type at offset %d: %s" % (fp.tell() - 1, repr(t))) + + if len(stack) != 1: + raise SyntaxError("Reached EOF, but Binary VDF is incomplete") + if raise_on_remaining and fp.read(1) != b'': + fp.seek(-1, 1) + raise SyntaxError("Binary VDF ended at offset %d, but there is more data remaining" % (fp.tell() - 1)) + + return stack.pop() + +def binary_dumps(obj, alt_format=False): + """ + Serialize ``obj`` to a binary VDF formatted ``bytes``. + """ + buf = BytesIO() + binary_dump(obj, buf, alt_format) + return buf.getvalue() + +def binary_dump(obj, fp, alt_format=False): + """ + Serialize ``obj`` to a binary VDF formatted ``bytes`` and write it to ``fp`` filelike object + """ + if not isinstance(obj, Mapping): + raise TypeError("Expected obj to be type of Mapping") + if not hasattr(fp, 'write'): + raise TypeError("Expected fp to have write() method") + + for chunk in _binary_dump_gen(obj, alt_format=alt_format): + fp.write(chunk) + +def _binary_dump_gen(obj, level=0, alt_format=False): + if level == 0 and len(obj) == 0: + return + + int32 = struct.Struct('