mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-11-16 08:29:21 +01:00
[docx] split commit for file 6400
Signed-off-by: Ari Archer <ari.web.xyz@gmail.com>
This commit is contained in:
parent
4e32fcb29f
commit
ac0fb2a2f2
@ -1,162 +0,0 @@
|
||||
#include "internal/error.h"
|
||||
#include "internal/thrift.h"
|
||||
|
||||
#include <map>
|
||||
#include <twml/ThriftWriter.h>
|
||||
#include <twml/TensorRecordWriter.h>
|
||||
#include <twml/io/IOError.h>
|
||||
|
||||
using namespace twml::io;
|
||||
|
||||
namespace twml {
|
||||
|
||||
static int32_t getRawThriftType(twml_type dtype) {
|
||||
// convert twml enum to tensor.thrift enum
|
||||
switch (dtype) {
|
||||
case TWML_TYPE_FLOAT:
|
||||
return DATA_TYPE_FLOAT;
|
||||
case TWML_TYPE_DOUBLE:
|
||||
return DATA_TYPE_DOUBLE;
|
||||
case TWML_TYPE_INT64:
|
||||
return DATA_TYPE_INT64;
|
||||
case TWML_TYPE_INT32:
|
||||
return DATA_TYPE_INT32;
|
||||
case TWML_TYPE_UINT8:
|
||||
return DATA_TYPE_UINT8;
|
||||
case TWML_TYPE_STRING:
|
||||
return DATA_TYPE_STRING;
|
||||
case TWML_TYPE_BOOL:
|
||||
return DATA_TYPE_BOOL;
|
||||
default:
|
||||
throw IOError(IOError::UNSUPPORTED_OUTPUT_TYPE);
|
||||
}
|
||||
}
|
||||
|
||||
void TensorRecordWriter::writeTensor(const RawTensor &tensor) {
|
||||
if (tensor.getType() == TWML_TYPE_INT32) {
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_STRUCT, GT_INT32);
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_LIST, 1);
|
||||
m_thrift_writer.writeListHeader(TTYPE_I32, tensor.getNumElements());
|
||||
|
||||
const int32_t *data = tensor.getData<int32_t>();
|
||||
|
||||
for (uint64_t i = 0; i < tensor.getNumElements(); i++)
|
||||
m_thrift_writer.writeInt32(data[i]);
|
||||
|
||||
} else if (tensor.getType() == TWML_TYPE_INT64) {
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_STRUCT, GT_INT64);
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_LIST, 1);
|
||||
m_thrift_writer.writeListHeader(TTYPE_I64, tensor.getNumElements());
|
||||
|
||||
const int64_t *data = tensor.getData<int64_t>();
|
||||
|
||||
for (uint64_t i = 0; i < tensor.getNumElements(); i++)
|
||||
m_thrift_writer.writeInt64(data[i]);
|
||||
|
||||
} else if (tensor.getType() == TWML_TYPE_FLOAT) {
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_STRUCT, GT_FLOAT);
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_LIST, 1);
|
||||
m_thrift_writer.writeListHeader(TTYPE_DOUBLE, tensor.getNumElements());
|
||||
|
||||
const float *data = tensor.getData<float>();
|
||||
|
||||
for (uint64_t i = 0; i < tensor.getNumElements(); i++)
|
||||
m_thrift_writer.writeDouble(static_cast<double>(data[i]));
|
||||
|
||||
} else if (tensor.getType() == TWML_TYPE_DOUBLE) {
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_STRUCT, GT_DOUBLE);
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_LIST, 1);
|
||||
m_thrift_writer.writeListHeader(TTYPE_DOUBLE, tensor.getNumElements());
|
||||
|
||||
const double *data = tensor.getData<double>();
|
||||
|
||||
for (uint64_t i = 0; i < tensor.getNumElements(); i++)
|
||||
m_thrift_writer.writeDouble(data[i]);
|
||||
|
||||
} else if (tensor.getType() == TWML_TYPE_STRING) {
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_STRUCT, GT_STRING);
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_LIST, 1);
|
||||
m_thrift_writer.writeListHeader(TTYPE_STRING, tensor.getNumElements());
|
||||
|
||||
const std::string *data = tensor.getData<std::string>();
|
||||
|
||||
for (uint64_t i = 0; i < tensor.getNumElements(); i++)
|
||||
m_thrift_writer.writeString(data[i]);
|
||||
|
||||
} else if (tensor.getType() == TWML_TYPE_BOOL) {
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_STRUCT, GT_BOOL);
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_LIST, 1);
|
||||
m_thrift_writer.writeListHeader(TTYPE_BOOL, tensor.getNumElements());
|
||||
|
||||
const bool *data = tensor.getData<bool>();
|
||||
|
||||
for (uint64_t i = 0; i < tensor.getNumElements(); i++)
|
||||
m_thrift_writer.writeBool(data[i]);
|
||||
|
||||
} else {
|
||||
throw IOError(IOError::UNSUPPORTED_OUTPUT_TYPE);
|
||||
}
|
||||
|
||||
// write tensor shape field
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_LIST, 2);
|
||||
m_thrift_writer.writeListHeader(TTYPE_I64, tensor.getNumDims());
|
||||
|
||||
for (uint64_t i = 0; i < tensor.getNumDims(); i++)
|
||||
m_thrift_writer.writeInt64(tensor.getDim(i));
|
||||
|
||||
m_thrift_writer.writeStructStop();
|
||||
m_thrift_writer.writeStructStop();
|
||||
}
|
||||
|
||||
void TensorRecordWriter::writeRawTensor(const RawTensor &tensor) {
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_STRUCT, GT_RAW);
|
||||
|
||||
// dataType field
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_I32, 1);
|
||||
m_thrift_writer.writeInt32(getRawThriftType(tensor.getType()));
|
||||
|
||||
// content field
|
||||
uint64_t type_size = getSizeOf(tensor.getType());
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_STRING, 2);
|
||||
const uint8_t *data = reinterpret_cast<const uint8_t *>(tensor.getData<void>());
|
||||
m_thrift_writer.writeBinary(data, tensor.getNumElements() * type_size);
|
||||
|
||||
// shape field
|
||||
m_thrift_writer.writeStructFieldHeader(TTYPE_LIST, 3);
|
||||
m_thrift_writer.writeListHeader(TTYPE_I64, tensor.getNumDims());
|
||||
|
||||
for (uint64_t i = 0; i < tensor.getNumDims(); i++)
|
||||
m_thrift_writer.writeInt64(tensor.getDim(i));
|
||||
|
||||
m_thrift_writer.writeStructStop();
|
||||
m_thrift_writer.writeStructStop();
|
||||
}
|
||||
|
||||
TWMLAPI uint32_t TensorRecordWriter::getRecordsWritten() {
|
||||
return m_records_written;
|
||||
}
|
||||
|
||||
// Caller (usually DataRecordWriter) must precede with struct header field
|
||||
// like thrift_writer.writeStructFieldHeader(TTYPE_MAP, DR_GENERAL_TENSOR)
|
||||
TWMLAPI uint64_t TensorRecordWriter::write(twml::TensorRecord &record) {
|
||||
uint64_t bytes_written_before = m_thrift_writer.getBytesWritten();
|
||||
|
||||
m_thrift_writer.writeMapHeader(TTYPE_I64, TTYPE_STRUCT, record.getRawTensors().size());
|
||||
|
||||
for (auto id_tensor_pairs : record.getRawTensors()) {
|
||||
m_thrift_writer.writeInt64(id_tensor_pairs.first);
|
||||
|
||||
// all tensors written as RawTensor Thrift except for StringTensors
|
||||
// this avoids the overhead of converting little endian to big endian
|
||||
if (id_tensor_pairs.second.getType() == TWML_TYPE_STRING)
|
||||
writeTensor(id_tensor_pairs.second);
|
||||
else
|
||||
writeRawTensor(id_tensor_pairs.second);
|
||||
}
|
||||
|
||||
m_records_written++;
|
||||
|
||||
return m_thrift_writer.getBytesWritten() - bytes_written_before;
|
||||
}
|
||||
|
||||
} // namespace twml
|
BIN
twml/libtwml/src/lib/TensorRecordWriter.docx
Normal file
BIN
twml/libtwml/src/lib/TensorRecordWriter.docx
Normal file
Binary file not shown.
@ -1,33 +0,0 @@
|
||||
#include "internal/endianutils.h"
|
||||
|
||||
#include <twml/ThriftReader.h>
|
||||
#include <twml/Error.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace twml {
|
||||
|
||||
uint8_t ThriftReader::readByte() {
|
||||
return readDirect<uint8_t>();
|
||||
}
|
||||
|
||||
int16_t ThriftReader::readInt16() {
|
||||
return betoh16(readDirect<int16_t>());
|
||||
}
|
||||
|
||||
int32_t ThriftReader::readInt32() {
|
||||
return betoh32(readDirect<int32_t>());
|
||||
}
|
||||
|
||||
int64_t ThriftReader::readInt64() {
|
||||
return betoh64(readDirect<int64_t>());
|
||||
}
|
||||
|
||||
double ThriftReader::readDouble() {
|
||||
double val;
|
||||
int64_t *val_proxy = reinterpret_cast<int64_t*>(&val);
|
||||
*val_proxy = readInt64();
|
||||
return val;
|
||||
}
|
||||
|
||||
} // namespace twml
|
BIN
twml/libtwml/src/lib/ThriftReader.docx
Normal file
BIN
twml/libtwml/src/lib/ThriftReader.docx
Normal file
Binary file not shown.
@ -1,91 +0,0 @@
|
||||
#include "internal/endianutils.h"
|
||||
#include "internal/error.h"
|
||||
#include "internal/thrift.h"
|
||||
|
||||
#include <twml/ThriftWriter.h>
|
||||
#include <twml/Error.h>
|
||||
#include <twml/io/IOError.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
using namespace twml::io;
|
||||
|
||||
namespace twml {
|
||||
|
||||
template <typename T> inline
|
||||
uint64_t ThriftWriter::write(T val) {
|
||||
if (!m_dry_run) {
|
||||
if (m_bytes_written + sizeof(T) > m_buffer_size)
|
||||
throw IOError(IOError::DESTINATION_LARGER_THAN_CAPACITY);
|
||||
memcpy(m_buffer, &val, sizeof(T));
|
||||
m_buffer += sizeof(T);
|
||||
}
|
||||
m_bytes_written += sizeof(T);
|
||||
return sizeof(T);
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::getBytesWritten() {
|
||||
return m_bytes_written;
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeStructFieldHeader(int8_t field_type, int16_t field_id) {
|
||||
return writeInt8(field_type) + writeInt16(field_id);
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeStructStop() {
|
||||
return writeInt8(static_cast<int8_t>(TTYPE_STOP));
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeListHeader(int8_t element_type, int32_t num_elems) {
|
||||
return writeInt8(element_type) + writeInt32(num_elems);
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeMapHeader(int8_t key_type, int8_t val_type, int32_t num_elems) {
|
||||
return writeInt8(key_type) + writeInt8(val_type) + writeInt32(num_elems);
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeDouble(double val) {
|
||||
int64_t bin_value;
|
||||
memcpy(&bin_value, &val, sizeof(int64_t));
|
||||
return writeInt64(bin_value);
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeInt8(int8_t val) {
|
||||
return write(val);
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeInt16(int16_t val) {
|
||||
return write(betoh16(val));
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeInt32(int32_t val) {
|
||||
return write(betoh32(val));
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeInt64(int64_t val) {
|
||||
return write(betoh64(val));
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeBinary(const uint8_t *bytes, int32_t num_bytes) {
|
||||
writeInt32(num_bytes);
|
||||
|
||||
if (!m_dry_run) {
|
||||
if (m_bytes_written + num_bytes > m_buffer_size)
|
||||
throw IOError(IOError::DESTINATION_LARGER_THAN_CAPACITY);
|
||||
memcpy(m_buffer, bytes, num_bytes);
|
||||
m_buffer += num_bytes;
|
||||
}
|
||||
m_bytes_written += num_bytes;
|
||||
|
||||
return 4 + num_bytes;
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeString(std::string str) {
|
||||
return writeBinary(reinterpret_cast<const uint8_t *>(str.data()), str.length());
|
||||
}
|
||||
|
||||
TWMLAPI uint64_t ThriftWriter::writeBool(bool val) {
|
||||
return write(val);
|
||||
}
|
||||
|
||||
} // namespace twml
|
BIN
twml/libtwml/src/lib/ThriftWriter.docx
Normal file
BIN
twml/libtwml/src/lib/ThriftWriter.docx
Normal file
Binary file not shown.
@ -1,167 +0,0 @@
|
||||
#include "internal/interpolate.h"
|
||||
#include "internal/error.h"
|
||||
#include <twml/discretizer_impl.h>
|
||||
#include <twml/optim.h>
|
||||
|
||||
namespace twml {
|
||||
// it is assumed that start_compute and end_compute are valid
|
||||
template<typename T>
|
||||
void discretizerInfer(Tensor &output_keys,
|
||||
Tensor &output_vals,
|
||||
const Tensor &input_ids,
|
||||
const Tensor &input_vals,
|
||||
const Tensor &bin_ids,
|
||||
const Tensor &bin_vals,
|
||||
const Tensor &feature_offsets,
|
||||
int output_bits,
|
||||
const Map<int64_t, int64_t> &ID_to_index,
|
||||
int64_t start_compute,
|
||||
int64_t end_compute,
|
||||
int64_t output_start) {
|
||||
auto out_keysData = output_keys.getData<int64_t>();
|
||||
auto out_valsData = output_vals.getData<T>();
|
||||
uint64_t out_keysStride = output_keys.getStride(0);
|
||||
uint64_t out_valsStride = output_vals.getStride(0);
|
||||
|
||||
auto in_idsData = input_ids.getData<int64_t>();
|
||||
auto in_valsData = input_vals.getData<T>();
|
||||
uint64_t in_idsStride = input_ids.getStride(0);
|
||||
uint64_t in_valsStride = input_vals.getStride(0);
|
||||
|
||||
auto xsData = bin_vals.getData<T>();
|
||||
auto ysData = bin_ids.getData<int64_t>();
|
||||
uint64_t xsStride = bin_vals.getStride(0);
|
||||
uint64_t ysStride = bin_ids.getStride(0);
|
||||
|
||||
auto offsetData = feature_offsets.getData<int64_t>();
|
||||
|
||||
uint64_t total_bins = bin_ids.getNumElements();
|
||||
uint64_t fsize = feature_offsets.getNumElements();
|
||||
|
||||
uint64_t output_size = (1 << output_bits);
|
||||
|
||||
for (uint64_t i = start_compute; i < end_compute; i++) {
|
||||
int64_t feature_ID = in_idsData[i * in_idsStride];
|
||||
T val = in_valsData[i * in_valsStride];
|
||||
|
||||
auto iter = ID_to_index.find(feature_ID);
|
||||
if (iter == ID_to_index.end()) {
|
||||
// feature not calibrated
|
||||
// modulo add operation for new key from feature ID
|
||||
int64_t ikey = feature_ID % (output_size - total_bins) + total_bins;
|
||||
out_keysData[(i + output_start - start_compute) * out_keysStride] = ikey;
|
||||
out_valsData[(i + output_start - start_compute) * out_valsStride] = val;
|
||||
continue;
|
||||
}
|
||||
|
||||
int64_t ikey = iter->second;
|
||||
|
||||
// Perform interpolation
|
||||
uint64_t offset = offsetData[ikey];
|
||||
uint64_t next_offset = (ikey == (int64_t)(fsize - 1)) ? total_bins : offsetData[ikey + 1];
|
||||
uint64_t mainSize = next_offset - offset;
|
||||
|
||||
const T *lxsData = xsData + offset;
|
||||
const int64_t *lysData = ysData + offset;
|
||||
int64_t okey;
|
||||
okey = interpolation<T, int64_t>(lxsData, xsStride,
|
||||
lysData, ysStride,
|
||||
val, mainSize,
|
||||
NEAREST, 0);
|
||||
out_keysData[(i + output_start - start_compute) * out_keysStride] = okey;
|
||||
out_valsData[(i + output_start - start_compute) * out_valsStride] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void discretizerInfer(Tensor &output_keys,
|
||||
Tensor &output_vals,
|
||||
const Tensor &input_ids,
|
||||
const Tensor &input_vals,
|
||||
const Tensor &bin_ids,
|
||||
const Tensor &bin_vals,
|
||||
const Tensor &feature_offsets,
|
||||
int output_bits,
|
||||
const Map<int64_t, int64_t> &ID_to_index,
|
||||
int start_compute,
|
||||
int end_compute,
|
||||
int output_start) {
|
||||
if (input_ids.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "input_ids must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (output_keys.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "output_keys must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (bin_ids.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "bin_ids must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (feature_offsets.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "bin_ids must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (input_vals.getType() != bin_vals.getType()) {
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"Data type of input_vals does not match type of bin_vals");
|
||||
}
|
||||
|
||||
if (bin_vals.getNumDims() != 1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"bin_vals must be 1 Dimensional");
|
||||
}
|
||||
|
||||
if (bin_ids.getNumDims() != 1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"bin_ids must be 1 Dimensional");
|
||||
}
|
||||
|
||||
if (bin_vals.getNumElements() != bin_ids.getNumElements()) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"Dimensions of bin_vals and bin_ids do not match");
|
||||
}
|
||||
|
||||
if (feature_offsets.getStride(0) != 1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"feature_offsets must be contiguous");
|
||||
}
|
||||
|
||||
uint64_t size = input_ids.getDim(0);
|
||||
if (end_compute == -1) {
|
||||
end_compute = size;
|
||||
}
|
||||
|
||||
if (start_compute < 0 || start_compute >= size) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"start_compute out of range");
|
||||
}
|
||||
|
||||
if (end_compute < -1 || end_compute > size) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"end_compute out of range");
|
||||
}
|
||||
|
||||
if (start_compute > end_compute && end_compute != -1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"must have start_compute <= end_compute, or end_compute==-1");
|
||||
}
|
||||
|
||||
switch (input_vals.getType()) {
|
||||
case TWML_TYPE_FLOAT:
|
||||
twml::discretizerInfer<float>(output_keys, output_vals,
|
||||
input_ids, input_vals,
|
||||
bin_ids, bin_vals, feature_offsets, output_bits, ID_to_index,
|
||||
start_compute, end_compute, output_start);
|
||||
break;
|
||||
case TWML_TYPE_DOUBLE:
|
||||
twml::discretizerInfer<double>(output_keys, output_vals,
|
||||
input_ids, input_vals,
|
||||
bin_ids, bin_vals, feature_offsets, output_bits, ID_to_index,
|
||||
start_compute, end_compute, output_start);
|
||||
break;
|
||||
default:
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"Unsupported datatype for discretizerInfer");
|
||||
}
|
||||
}
|
||||
} // namespace twml
|
BIN
twml/libtwml/src/lib/discretizer_impl.docx
Normal file
BIN
twml/libtwml/src/lib/discretizer_impl.docx
Normal file
Binary file not shown.
@ -1,158 +0,0 @@
|
||||
#include "internal/error.h"
|
||||
#include "internal/murmur_hash3.h"
|
||||
#include "internal/utf_converter.h"
|
||||
#include <twml/functions.h>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
namespace twml {
|
||||
|
||||
template<typename T>
|
||||
void add1(Tensor &output, const Tensor input) {
|
||||
T *odata = output.getData<T>();
|
||||
const T *idata = input.getData<T>();
|
||||
const uint64_t num_elements = input.getNumElements();
|
||||
|
||||
for (uint64_t i = 0; i < num_elements; i++) {
|
||||
odata[i] = idata[i] + 1;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void copy(Tensor &output, const Tensor input) {
|
||||
T *odata = output.getData<T>();
|
||||
const T *idata = input.getData<T>();
|
||||
const uint64_t num_elements = input.getNumElements();
|
||||
|
||||
for (uint64_t i = 0; i < num_elements; i++) {
|
||||
odata[i] = idata[i];
|
||||
}
|
||||
}
|
||||
|
||||
void add1(Tensor &output, const Tensor input) {
|
||||
auto type = input.getType();
|
||||
if (output.getType() != type) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "Output type does not match input type");
|
||||
}
|
||||
|
||||
if (output.getNumElements() != input.getNumElements()) {
|
||||
throw twml::Error(TWML_ERR_SIZE, "Output size does not match input size");
|
||||
}
|
||||
|
||||
// TODO: Implement an easier dispatch function
|
||||
switch (type) {
|
||||
case TWML_TYPE_FLOAT:
|
||||
twml::add1<float>(output, input);
|
||||
break;
|
||||
case TWML_TYPE_DOUBLE:
|
||||
twml::add1<double>(output, input);
|
||||
break;
|
||||
default:
|
||||
throw twml::Error(TWML_ERR_TYPE, "add1 only supports float and double tensors");
|
||||
}
|
||||
}
|
||||
|
||||
void copy(Tensor &output, const Tensor input) {
|
||||
auto type = input.getType();
|
||||
if (output.getType() != type) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "Output type does not match input type");
|
||||
}
|
||||
|
||||
if (output.getNumElements() != input.getNumElements()) {
|
||||
throw twml::Error(TWML_ERR_SIZE, "Output size does not match input size");
|
||||
}
|
||||
|
||||
// TODO: Implement an easier dispatch function
|
||||
switch (type) {
|
||||
case TWML_TYPE_FLOAT:
|
||||
twml::copy<float>(output, input);
|
||||
break;
|
||||
case TWML_TYPE_DOUBLE:
|
||||
twml::copy<double>(output, input);
|
||||
break;
|
||||
default:
|
||||
throw twml::Error(TWML_ERR_TYPE, "copy only supports float and double tensors");
|
||||
}
|
||||
}
|
||||
|
||||
int64_t featureId(const std::string &feature) {
|
||||
const char *str = feature.c_str();
|
||||
uint64_t len = feature.size();
|
||||
int64_t id = 0;
|
||||
TWML_CHECK(twml_get_feature_id(&id, len, str), "Error getting featureId");
|
||||
return id;
|
||||
}
|
||||
} // namespace twml
|
||||
|
||||
twml_err twml_add1(twml_tensor output, const twml_tensor input) {
|
||||
HANDLE_EXCEPTIONS(
|
||||
auto out = twml::getTensor(output);
|
||||
auto in = twml::getConstTensor(input);
|
||||
twml::add1(*out, *in););
|
||||
return TWML_ERR_NONE;
|
||||
}
|
||||
|
||||
twml_err twml_copy(twml_tensor output, const twml_tensor input) {
|
||||
HANDLE_EXCEPTIONS(
|
||||
auto out = twml::getTensor(output);
|
||||
auto in = twml::getConstTensor(input);
|
||||
twml::copy(*out, *in););
|
||||
return TWML_ERR_NONE;
|
||||
}
|
||||
|
||||
inline twml_err twml_get_feature_id_internal(int64_t *result,
|
||||
uint64_t out_size, uint16_t *out,
|
||||
uint64_t out2_size, uint16_t *out2,
|
||||
const uint64_t len, const char *str) {
|
||||
uint64_t k = 0;
|
||||
for (uint64_t i = 0; i < len; i++) {
|
||||
if (str[i] == '#') {
|
||||
k = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t hash[16];
|
||||
if (k != 0) {
|
||||
ssize_t n = utf8_to_utf16((const uint8_t *) str, k, out, out_size);
|
||||
if (n < 0) throw std::invalid_argument("error while converting from utf8 to utf16");
|
||||
|
||||
MurmurHash3_x64_128(out, n * sizeof(uint16_t), 0, out2);
|
||||
n = utf8_to_utf16((const uint8_t *) (str + k + 1), len - k - 1, &out2[4], out2_size - 8);
|
||||
if (n < 0) throw std::invalid_argument("error while converting from utf8 to utf16");
|
||||
|
||||
MurmurHash3_x64_128(out2, (n * sizeof(uint16_t)) + 8, 0, hash);
|
||||
} else {
|
||||
ssize_t n = utf8_to_utf16((const uint8_t *)str, len, out, out_size);
|
||||
if (n < 0) throw std::invalid_argument("error while converting from utf8 to utf16");
|
||||
MurmurHash3_x64_128(out, n * sizeof(uint16_t), 0, hash);
|
||||
}
|
||||
int64_t id;
|
||||
memcpy(&id, hash, sizeof(int64_t));
|
||||
*result = id;
|
||||
|
||||
return TWML_ERR_NONE;
|
||||
}
|
||||
|
||||
static const int UTF16_STR_MAX_SIZE = 1024;
|
||||
|
||||
twml_err twml_get_feature_id(int64_t *result, const uint64_t len, const char *str) {
|
||||
try {
|
||||
uint16_t out[UTF16_STR_MAX_SIZE];
|
||||
uint16_t out2[UTF16_STR_MAX_SIZE];
|
||||
return twml_get_feature_id_internal(result,
|
||||
UTF16_STR_MAX_SIZE, out,
|
||||
UTF16_STR_MAX_SIZE, out2,
|
||||
len, str);
|
||||
} catch(const std::invalid_argument &ex) {
|
||||
// If the space on the stack is not enough, try using the heap.
|
||||
// len + 1 is needed because a null terminating character is added at the end.
|
||||
std::vector<uint16_t> out(len + 1);
|
||||
std::vector<uint16_t> out2(len + 1);
|
||||
return twml_get_feature_id_internal(result,
|
||||
len + 1, out.data(),
|
||||
len + 1, out2.data(),
|
||||
len, str);
|
||||
|
||||
}
|
||||
}
|
BIN
twml/libtwml/src/lib/functions.docx
Normal file
BIN
twml/libtwml/src/lib/functions.docx
Normal file
Binary file not shown.
@ -1,241 +0,0 @@
|
||||
#include "internal/linear_search.h"
|
||||
#include "internal/error.h"
|
||||
#include <twml/hashing_discretizer_impl.h>
|
||||
#include <twml/optim.h>
|
||||
#include <algorithm>
|
||||
|
||||
namespace twml {
|
||||
template<typename Tx>
|
||||
static int64_t lower_bound_search(const Tx *data, const Tx val, const int64_t buf_size) {
|
||||
auto index_temp = std::lower_bound(data, data + buf_size, val);
|
||||
return static_cast<int64_t>(index_temp - data);
|
||||
}
|
||||
|
||||
template<typename Tx>
|
||||
static int64_t upper_bound_search(const Tx *data, const Tx val, const int64_t buf_size) {
|
||||
auto index_temp = std::upper_bound(data, data + buf_size, val);
|
||||
return static_cast<int64_t>(index_temp - data);
|
||||
}
|
||||
|
||||
template<typename Tx>
|
||||
using search_method = int64_t (*)(const Tx *, const Tx, const int64_t);
|
||||
|
||||
typedef uint64_t (*hash_signature)(uint64_t, int64_t, uint64_t);
|
||||
|
||||
// uint64_t integer_multiplicative_hashing()
|
||||
//
|
||||
// A function to hash discretized feature_ids into one of 2**output_bits buckets.
|
||||
// This function hashes the feature_ids to achieve a uniform distribution of
|
||||
// IDs, so the hashed IDs are with high probability far apart
|
||||
// Then, bucket_indices can simply be added, resulting in unique new IDs with high probability
|
||||
// We integer hash again to again spread out the new IDs
|
||||
// Finally we take the upper
|
||||
// Required args:
|
||||
// feature_id:
|
||||
// The feature id of the feature to be hashed.
|
||||
// bucket_index:
|
||||
// The bucket index of the discretized feature value
|
||||
// output_bits:
|
||||
// The number of bits of output space for the features to be hashed into.
|
||||
//
|
||||
// Note - feature_ids may have arbitrary distribution within int32s
|
||||
// Note - 64 bit feature_ids can be processed with this, but the upper
|
||||
// 32 bits have no effect on the output
|
||||
// e.g. all feature ids 0 through 255 exist in movie-lens.
|
||||
// this hashing constant is good for 32 LSBs. will use N=32. (can use N<32 also)
|
||||
// this hashing constant is co-prime with 2**32, therefore we have that
|
||||
// a != b, a and b in [0,2**32)
|
||||
// implies
|
||||
// f(a) != f(b) where f(x) = (hashing_constant * x) % (2**32)
|
||||
// note that we are mostly ignoring the upper 32 bits, using modulo 2**32 arithmetic
|
||||
uint64_t integer_multiplicative_hashing(uint64_t feature_id,
|
||||
int64_t bucket_index,
|
||||
uint64_t output_bits) {
|
||||
// possibly use 14695981039346656037 for 64 bit unsigned??
|
||||
// = 20921 * 465383 * 1509404459
|
||||
// alternatively, 14695981039346656039 is prime
|
||||
// We would also need to use N = 64
|
||||
const uint64_t hashing_constant = 2654435761;
|
||||
const uint64_t N = 32;
|
||||
// hash once to prevent problems from anomalous input id distributions
|
||||
feature_id *= hashing_constant;
|
||||
feature_id += bucket_index;
|
||||
// this hash enables the following right shift operation
|
||||
// without losing the bucket information (lower bits)
|
||||
feature_id *= hashing_constant;
|
||||
// output size is a power of 2
|
||||
feature_id >>= N - output_bits;
|
||||
uint64_t mask = (1 << output_bits) - 1;
|
||||
return mask & feature_id;
|
||||
}
|
||||
|
||||
uint64_t integer64_multiplicative_hashing(uint64_t feature_id,
|
||||
int64_t bucket_index,
|
||||
uint64_t output_bits) {
|
||||
const uint64_t hashing_constant = 14695981039346656039UL;
|
||||
const uint64_t N = 64;
|
||||
// hash once to prevent problems from anomalous input id distributions
|
||||
feature_id *= hashing_constant;
|
||||
feature_id += bucket_index;
|
||||
// this hash enables the following right shift operation
|
||||
// without losing the bucket information (lower bits)
|
||||
feature_id *= hashing_constant;
|
||||
// output size is a power of 2
|
||||
feature_id >>= N - output_bits;
|
||||
uint64_t mask = (1 << output_bits) - 1;
|
||||
return mask & feature_id;
|
||||
}
|
||||
|
||||
int64_t option_bits(int64_t options, int64_t high, int64_t low) {
|
||||
options >>= low;
|
||||
options &= (1 << (high - low + 1)) - 1;
|
||||
return options;
|
||||
}
|
||||
|
||||
// it is assumed that start_compute and end_compute are valid
|
||||
template<typename T>
|
||||
void hashDiscretizerInfer(Tensor &output_keys,
|
||||
Tensor &output_vals,
|
||||
const Tensor &input_ids,
|
||||
const Tensor &input_vals,
|
||||
const Tensor &bin_vals,
|
||||
int output_bits,
|
||||
const Map<int64_t, int64_t> &ID_to_index,
|
||||
int64_t start_compute,
|
||||
int64_t end_compute,
|
||||
int64_t n_bin,
|
||||
int64_t options) {
|
||||
auto output_keys_data = output_keys.getData<int64_t>();
|
||||
auto output_vals_data = output_vals.getData<T>();
|
||||
|
||||
auto input_ids_data = input_ids.getData<int64_t>();
|
||||
auto input_vals_data = input_vals.getData<T>();
|
||||
|
||||
auto bin_vals_data = bin_vals.getData<T>();
|
||||
|
||||
// The function pointer implementation removes the option_bits
|
||||
// function call (might be inlined) and corresponding branch from
|
||||
// the hot loop, but it prevents inlining these functions, so
|
||||
// there will be function call overhead. Uncertain which would
|
||||
// be faster, testing needed. Also, code optimizers do weird things...
|
||||
hash_signature hash_fn = integer_multiplicative_hashing;
|
||||
switch (option_bits(options, 4, 2)) {
|
||||
case 0:
|
||||
hash_fn = integer_multiplicative_hashing;
|
||||
break;
|
||||
case 1:
|
||||
hash_fn = integer64_multiplicative_hashing;
|
||||
break;
|
||||
default:
|
||||
hash_fn = integer_multiplicative_hashing;
|
||||
}
|
||||
|
||||
search_method<T> search_fn = lower_bound_search;
|
||||
switch (option_bits(options, 1, 0)) {
|
||||
case 0:
|
||||
search_fn = lower_bound_search<T>;
|
||||
break;
|
||||
case 1:
|
||||
search_fn = linear_search<T>;
|
||||
break;
|
||||
case 2:
|
||||
search_fn = upper_bound_search<T>;
|
||||
break;
|
||||
default:
|
||||
search_fn = lower_bound_search<T>;
|
||||
}
|
||||
|
||||
for (uint64_t i = start_compute; i < end_compute; i++) {
|
||||
int64_t id = input_ids_data[i];
|
||||
T val = input_vals_data[i];
|
||||
|
||||
auto iter = ID_to_index.find(id);
|
||||
if (iter != ID_to_index.end()) {
|
||||
int64_t feature_idx = iter->second;
|
||||
const T *bin_vals_start = bin_vals_data + feature_idx * n_bin;
|
||||
int64_t out_bin_idx = search_fn(bin_vals_start, val, n_bin);
|
||||
output_keys_data[i] = hash_fn(id, out_bin_idx, output_bits);
|
||||
output_vals_data[i] = 1;
|
||||
} else {
|
||||
// feature not calibrated
|
||||
output_keys_data[i] = id & ((1 << output_bits) - 1);
|
||||
output_vals_data[i] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void hashDiscretizerInfer(Tensor &output_keys,
|
||||
Tensor &output_vals,
|
||||
const Tensor &input_ids,
|
||||
const Tensor &input_vals,
|
||||
int n_bin,
|
||||
const Tensor &bin_vals,
|
||||
int output_bits,
|
||||
const Map<int64_t, int64_t> &ID_to_index,
|
||||
int start_compute,
|
||||
int end_compute,
|
||||
int64_t options) {
|
||||
if (input_ids.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "input_ids must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (output_keys.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "output_keys must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (input_vals.getType() != bin_vals.getType()) {
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"Data type of input_vals does not match type of bin_vals");
|
||||
}
|
||||
|
||||
if (bin_vals.getNumDims() != 1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"bin_vals must be 1 Dimensional");
|
||||
}
|
||||
|
||||
uint64_t size = input_ids.getDim(0);
|
||||
if (end_compute == -1) {
|
||||
end_compute = size;
|
||||
}
|
||||
|
||||
if (start_compute < 0 || start_compute >= size) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"start_compute out of range");
|
||||
}
|
||||
|
||||
if (end_compute < -1 || end_compute > size) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"end_compute out of range");
|
||||
}
|
||||
|
||||
if (start_compute > end_compute && end_compute != -1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"must have start_compute <= end_compute, or end_compute==-1");
|
||||
}
|
||||
|
||||
if (output_keys.getStride(0) != 1 || output_vals.getStride(0) != 1 ||
|
||||
input_ids.getStride(0) != 1 || input_vals.getStride(0) != 1 ||
|
||||
bin_vals.getStride(0) != 1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"All Strides must be 1.");
|
||||
}
|
||||
|
||||
switch (input_vals.getType()) {
|
||||
case TWML_TYPE_FLOAT:
|
||||
twml::hashDiscretizerInfer<float>(output_keys, output_vals,
|
||||
input_ids, input_vals,
|
||||
bin_vals, output_bits, ID_to_index,
|
||||
start_compute, end_compute, n_bin, options);
|
||||
break;
|
||||
case TWML_TYPE_DOUBLE:
|
||||
twml::hashDiscretizerInfer<double>(output_keys, output_vals,
|
||||
input_ids, input_vals,
|
||||
bin_vals, output_bits, ID_to_index,
|
||||
start_compute, end_compute, n_bin, options);
|
||||
break;
|
||||
default:
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"Unsupported datatype for hashDiscretizerInfer");
|
||||
}
|
||||
}
|
||||
} // namespace twml
|
BIN
twml/libtwml/src/lib/hashing_discretizer_impl.docx
Normal file
BIN
twml/libtwml/src/lib/hashing_discretizer_impl.docx
Normal file
Binary file not shown.
BIN
twml/libtwml/src/lib/internal/endianutils.docx
Normal file
BIN
twml/libtwml/src/lib/internal/endianutils.docx
Normal file
Binary file not shown.
@ -1,137 +0,0 @@
|
||||
//
|
||||
// endian_fix.h
|
||||
// ImageCore
|
||||
//
|
||||
// For OSes that use glibc < 2.9 (like RHEL5)
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <libkern/OSByteOrder.h>
|
||||
#define htobe16(x) OSSwapHostToBigInt16(x)
|
||||
#define htole16(x) OSSwapHostToLittleInt16(x)
|
||||
#define betoh16(x) OSSwapBigToHostInt16(x)
|
||||
#define letoh16(x) OSSwapLittleToHostInt16(x)
|
||||
#define htobe32(x) OSSwapHostToBigInt32(x)
|
||||
#define htole32(x) OSSwapHostToLittleInt32(x)
|
||||
#define betoh32(x) OSSwapBigToHostInt32(x)
|
||||
#define letoh32(x) OSSwapLittleToHostInt32(x)
|
||||
#define htobe64(x) OSSwapHostToBigInt64(x)
|
||||
#define htole64(x) OSSwapHostToLittleInt64(x)
|
||||
#define betoh64(x) OSSwapBigToHostInt64(x)
|
||||
#define letoh64(x) OSSwapLittleToHostInt64(x)
|
||||
#else
|
||||
#include <endian.h>
|
||||
#ifdef __USE_BSD
|
||||
/* Conversion interfaces. */
|
||||
#include <byteswap.h>
|
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#ifndef htobe16
|
||||
#define htobe16(x) __bswap_16(x)
|
||||
#endif
|
||||
#ifndef htole16
|
||||
#define htole16(x) (x)
|
||||
#endif
|
||||
#ifndef betoh16
|
||||
#define betoh16(x) __bswap_16(x)
|
||||
#endif
|
||||
#ifndef letoh16
|
||||
#define letoh16(x) (x)
|
||||
#endif
|
||||
|
||||
#ifndef htobe32
|
||||
#define htobe32(x) __bswap_32(x)
|
||||
#endif
|
||||
#ifndef htole32
|
||||
#define htole32(x) (x)
|
||||
#endif
|
||||
#ifndef betoh32
|
||||
#define betoh32(x) __bswap_32(x)
|
||||
#endif
|
||||
#ifndef letoh32
|
||||
#define letoh32(x) (x)
|
||||
#endif
|
||||
|
||||
#ifndef htobe64
|
||||
#define htobe64(x) __bswap_64(x)
|
||||
#endif
|
||||
#ifndef htole64
|
||||
#define htole64(x) (x)
|
||||
#endif
|
||||
#ifndef betoh64
|
||||
#define betoh64(x) __bswap_64(x)
|
||||
#endif
|
||||
#ifndef letoh64
|
||||
#define letoh64(x) (x)
|
||||
#endif
|
||||
|
||||
#else /* __BYTE_ORDER == __LITTLE_ENDIAN */
|
||||
#ifndef htobe16
|
||||
#define htobe16(x) (x)
|
||||
#endif
|
||||
#ifndef htole16
|
||||
#define htole16(x) __bswap_16(x)
|
||||
#endif
|
||||
#ifndef be16toh
|
||||
#define be16toh(x) (x)
|
||||
#endif
|
||||
#ifndef le16toh
|
||||
#define le16toh(x) __bswap_16(x)
|
||||
#endif
|
||||
|
||||
#ifndef htobe32
|
||||
#define htobe32(x) (x)
|
||||
#endif
|
||||
#ifndef htole32
|
||||
#define htole32(x) __bswap_32(x)
|
||||
#endif
|
||||
#ifndef betoh32
|
||||
#define betoh32(x) (x)
|
||||
#endif
|
||||
#ifndef letoh32
|
||||
#define letoh32(x) __bswap_32(x)
|
||||
#endif
|
||||
|
||||
#ifndef htobe64
|
||||
#define htobe64(x) (x)
|
||||
#endif
|
||||
#ifndef htole64
|
||||
#define htole64(x) __bswap_64(x)
|
||||
#endif
|
||||
#ifndef betoh64
|
||||
#define betoh64(x) (x)
|
||||
#endif
|
||||
#ifndef letoh64
|
||||
#define letoh64(x) __bswap_64(x)
|
||||
#endif
|
||||
|
||||
#endif /* __BYTE_ORDER == __LITTLE_ENDIAN */
|
||||
|
||||
#else /* __USE_BSD */
|
||||
#ifndef betoh16
|
||||
#define betoh16 be16toh
|
||||
#endif
|
||||
|
||||
#ifndef betoh32
|
||||
#define betoh32 be32toh
|
||||
#endif
|
||||
|
||||
#ifndef betoh64
|
||||
#define betoh64 be64toh
|
||||
#endif
|
||||
|
||||
#ifndef letoh16
|
||||
#define letoh16 le16toh
|
||||
#endif
|
||||
|
||||
#ifndef letoh32
|
||||
#define letoh32 le32toh
|
||||
#endif
|
||||
|
||||
#ifndef letoh64
|
||||
#define letoh64 le64toh
|
||||
#endif
|
||||
|
||||
#endif /* __USE_BSD */
|
||||
#endif /* __APPLE__ */
|
BIN
twml/libtwml/src/lib/internal/error.docx
Normal file
BIN
twml/libtwml/src/lib/internal/error.docx
Normal file
Binary file not shown.
@ -1,29 +0,0 @@
|
||||
#pragma once
|
||||
#include <twml/Error.h>
|
||||
#include <iostream>
|
||||
|
||||
#define HANDLE_EXCEPTIONS(fn) do { \
|
||||
try { \
|
||||
fn \
|
||||
} catch(const twml::Error &e) { \
|
||||
std::cerr << e.what() << std::endl; \
|
||||
return e.err(); \
|
||||
} catch(...) { \
|
||||
std::cerr << "Unknown error\n"; \
|
||||
return TWML_ERR_UNKNOWN; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define TWML_CHECK(fn, msg) do { \
|
||||
twml_err err = fn; \
|
||||
if (err == TWML_ERR_NONE) break; \
|
||||
throw twml::Error(err, msg); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define CHECK_THRIFT_TYPE(real_type, expected_type, type) do { \
|
||||
int real_type_val = real_type; \
|
||||
if (real_type_val != expected_type) { \
|
||||
throw twml::ThriftInvalidType(real_type_val, __func__, type); \
|
||||
} \
|
||||
} while(0)
|
BIN
twml/libtwml/src/lib/internal/interpolate.docx
Normal file
BIN
twml/libtwml/src/lib/internal/interpolate.docx
Normal file
Binary file not shown.
@ -1,74 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <twml/optim.h>
|
||||
namespace twml {
|
||||
|
||||
enum InterpolationMode {LINEAR, NEAREST};
|
||||
|
||||
template<typename Tx, typename Ty>
|
||||
static Tx interpolation(const Tx *xsData, const int64_t xsStride,
|
||||
const Ty *ysData, const int64_t ysStride,
|
||||
const Tx val, const int64_t mainSize,
|
||||
const InterpolationMode mode,
|
||||
const int64_t lowest,
|
||||
const bool return_local_index = false) {
|
||||
int64_t left = 0;
|
||||
int64_t right = mainSize-1;
|
||||
|
||||
if (val <= xsData[0]) {
|
||||
right = 0;
|
||||
} else if (val >= xsData[right*xsStride]) {
|
||||
left = right;
|
||||
} else {
|
||||
while (left < right) {
|
||||
int64_t middle = (left+right)/2;
|
||||
|
||||
if (middle < mainSize - 1 &&
|
||||
val >= xsData[middle*xsStride] &&
|
||||
val <= xsData[(middle+1)*xsStride]) {
|
||||
left = middle;
|
||||
right = middle + 1;
|
||||
break;
|
||||
} else if (val > xsData[middle*xsStride]) {
|
||||
left = middle;
|
||||
} else {
|
||||
right = middle;
|
||||
}
|
||||
}
|
||||
if (lowest) {
|
||||
while (left > 0 &&
|
||||
val >= xsData[(left - 1) * xsStride] &&
|
||||
val == xsData[left * xsStride]) {
|
||||
left--;
|
||||
right--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ty out = 0;
|
||||
if (return_local_index) {
|
||||
out = left;
|
||||
} else if (mode == NEAREST) {
|
||||
out = ysData[left*ysStride];
|
||||
} else {
|
||||
int64_t leftys = left*ysStride;
|
||||
int64_t rightys = right*ysStride;
|
||||
int64_t leftxs = left*xsStride;
|
||||
int64_t rightxs = right*xsStride;
|
||||
if (right != left+1 ||
|
||||
xsData[leftxs] == xsData[rightxs]) {
|
||||
out = ysData[leftys];
|
||||
} else {
|
||||
Tx xLeft = xsData[leftxs];
|
||||
Tx xRight = xsData[rightxs];
|
||||
Tx yLeft = ysData[leftys];
|
||||
Tx ratio = (val - xLeft) / (xRight - xLeft);
|
||||
out = ratio*(ysData[rightys] - yLeft) + yLeft;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace twml
|
||||
#endif
|
BIN
twml/libtwml/src/lib/internal/khash.docx
Normal file
BIN
twml/libtwml/src/lib/internal/khash.docx
Normal file
Binary file not shown.
@ -1,627 +0,0 @@
|
||||
/* The MIT License
|
||||
|
||||
Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
An example:
|
||||
|
||||
#include "khash.h"
|
||||
KHASH_MAP_INIT_INT(32, char)
|
||||
int main() {
|
||||
int ret, is_missing;
|
||||
khiter_t k;
|
||||
khash_t(32) *h = kh_init(32);
|
||||
k = kh_put(32, h, 5, &ret);
|
||||
kh_value(h, k) = 10;
|
||||
k = kh_get(32, h, 10);
|
||||
is_missing = (k == kh_end(h));
|
||||
k = kh_get(32, h, 5);
|
||||
kh_del(32, h, k);
|
||||
for (k = kh_begin(h); k != kh_end(h); ++k)
|
||||
if (kh_exist(h, k)) kh_value(h, k) = 1;
|
||||
kh_destroy(32, h);
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
2013-05-02 (0.2.8):
|
||||
|
||||
* Use quadratic probing. When the capacity is power of 2, stepping function
|
||||
i*(i+1)/2 guarantees to traverse each bucket. It is better than double
|
||||
hashing on cache performance and is more robust than linear probing.
|
||||
|
||||
In theory, double hashing should be more robust than quadratic probing.
|
||||
However, my implementation is probably not for large hash tables, because
|
||||
the second hash function is closely tied to the first hash function,
|
||||
which reduce the effectiveness of double hashing.
|
||||
|
||||
Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
|
||||
|
||||
2011-12-29 (0.2.7):
|
||||
|
||||
* Minor code clean up; no actual effect.
|
||||
|
||||
2011-09-16 (0.2.6):
|
||||
|
||||
* The capacity is a power of 2. This seems to dramatically improve the
|
||||
speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
|
||||
|
||||
- http://code.google.com/p/ulib/
|
||||
- http://nothings.org/computer/judy/
|
||||
|
||||
* Allow to optionally use linear probing which usually has better
|
||||
performance for random input. Double hashing is still the default as it
|
||||
is more robust to certain non-random input.
|
||||
|
||||
* Added Wang's integer hash function (not used by default). This hash
|
||||
function is more robust to certain non-random input.
|
||||
|
||||
2011-02-14 (0.2.5):
|
||||
|
||||
* Allow to declare global functions.
|
||||
|
||||
2009-09-26 (0.2.4):
|
||||
|
||||
* Improve portability
|
||||
|
||||
2008-09-19 (0.2.3):
|
||||
|
||||
* Corrected the example
|
||||
* Improved interfaces
|
||||
|
||||
2008-09-11 (0.2.2):
|
||||
|
||||
* Improved speed a little in kh_put()
|
||||
|
||||
2008-09-10 (0.2.1):
|
||||
|
||||
* Added kh_clear()
|
||||
* Fixed a compiling error
|
||||
|
||||
2008-09-02 (0.2.0):
|
||||
|
||||
* Changed to token concatenation which increases flexibility.
|
||||
|
||||
2008-08-31 (0.1.2):
|
||||
|
||||
* Fixed a bug in kh_get(), which has not been tested previously.
|
||||
|
||||
2008-08-31 (0.1.1):
|
||||
|
||||
* Added destructor
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __AC_KHASH_H
|
||||
#define __AC_KHASH_H
|
||||
|
||||
/*!
|
||||
@header
|
||||
|
||||
Generic hash table library.
|
||||
*/
|
||||
|
||||
#define AC_VERSION_KHASH_H "0.2.8"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
/* compiler specific configuration */
|
||||
|
||||
#if UINT_MAX == 0xffffffffu
|
||||
typedef unsigned int khint32_t;
|
||||
#elif ULONG_MAX == 0xffffffffu
|
||||
typedef unsigned long khint32_t;
|
||||
#endif
|
||||
|
||||
#if ULONG_MAX == ULLONG_MAX
|
||||
typedef unsigned long khint64_t;
|
||||
#else
|
||||
typedef uint64_t khint64_t;
|
||||
#endif
|
||||
|
||||
#ifndef kh_inline
|
||||
#ifdef _MSC_VER
|
||||
#define kh_inline __inline
|
||||
#else
|
||||
#define kh_inline inline
|
||||
#endif
|
||||
#endif /* kh_inline */
|
||||
|
||||
#ifndef klib_unused
|
||||
#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
|
||||
#define klib_unused __attribute__ ((__unused__))
|
||||
#else
|
||||
#define klib_unused
|
||||
#endif
|
||||
#endif /* klib_unused */
|
||||
|
||||
typedef khint32_t khint_t;
|
||||
typedef khint_t khiter_t;
|
||||
|
||||
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
||||
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
||||
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
||||
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
||||
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
||||
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
||||
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
||||
|
||||
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
|
||||
|
||||
#ifndef kroundup32
|
||||
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||
#endif
|
||||
|
||||
#ifndef kcalloc
|
||||
#define kcalloc(N,Z) calloc(N,Z)
|
||||
#endif
|
||||
#ifndef kmalloc
|
||||
#define kmalloc(Z) malloc(Z)
|
||||
#endif
|
||||
#ifndef krealloc
|
||||
#define krealloc(P,Z) realloc(P,Z)
|
||||
#endif
|
||||
#ifndef kfree
|
||||
#define kfree(P) free(P)
|
||||
#endif
|
||||
|
||||
static const double __ac_HASH_UPPER = 0.77;
|
||||
|
||||
#define __KHASH_TYPE(name, khkey_t, khval_t) \
|
||||
typedef struct kh_##name##_s { \
|
||||
khint_t n_buckets, size, n_occupied, upper_bound; \
|
||||
khint32_t *flags; \
|
||||
khkey_t *keys; \
|
||||
khval_t *vals; \
|
||||
} kh_##name##_t;
|
||||
|
||||
#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
|
||||
extern kh_##name##_t *kh_init_##name(void); \
|
||||
extern void kh_destroy_##name(kh_##name##_t *h); \
|
||||
extern void kh_clear_##name(kh_##name##_t *h); \
|
||||
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
|
||||
extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
|
||||
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
|
||||
extern void kh_del_##name(kh_##name##_t *h, khint_t x);
|
||||
|
||||
#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||
SCOPE kh_##name##_t *kh_init_##name(void) { \
|
||||
return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
|
||||
} \
|
||||
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
|
||||
{ \
|
||||
if (h) { \
|
||||
kfree((void *)h->keys); kfree(h->flags); \
|
||||
kfree((void *)h->vals); \
|
||||
kfree(h); \
|
||||
} \
|
||||
} \
|
||||
SCOPE void kh_clear_##name(kh_##name##_t *h) \
|
||||
{ \
|
||||
if (h && h->flags) { \
|
||||
memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
|
||||
h->size = h->n_occupied = 0; \
|
||||
} \
|
||||
} \
|
||||
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
|
||||
{ \
|
||||
if (h->n_buckets) { \
|
||||
khint_t k, i, last, mask, step = 0; \
|
||||
mask = h->n_buckets - 1; \
|
||||
k = __hash_func(key); i = k & mask; \
|
||||
last = i; \
|
||||
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
||||
i = (i + (++step)) & mask; \
|
||||
if (i == last) return h->n_buckets; \
|
||||
} \
|
||||
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
|
||||
} else return 0; \
|
||||
} \
|
||||
SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
|
||||
{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
|
||||
khint32_t *new_flags = 0; \
|
||||
khint_t j = 1; \
|
||||
{ \
|
||||
kroundup32(new_n_buckets); \
|
||||
if (new_n_buckets < 4) new_n_buckets = 4; \
|
||||
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
|
||||
else { /* hash table size to be changed (shrink or expand); rehash */ \
|
||||
new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
|
||||
if (!new_flags) return -1; \
|
||||
memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
|
||||
if (h->n_buckets < new_n_buckets) { /* expand */ \
|
||||
khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||
if (!new_keys) { kfree(new_flags); return -1; } \
|
||||
h->keys = new_keys; \
|
||||
if (kh_is_map) { \
|
||||
khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
|
||||
if (!new_vals) { kfree(new_flags); return -1; } \
|
||||
h->vals = new_vals; \
|
||||
} \
|
||||
} /* otherwise shrink */ \
|
||||
} \
|
||||
} \
|
||||
if (j) { /* rehashing is needed */ \
|
||||
for (j = 0; j != h->n_buckets; ++j) { \
|
||||
if (__ac_iseither(h->flags, j) == 0) { \
|
||||
khkey_t key = h->keys[j]; \
|
||||
khval_t val; \
|
||||
khint_t new_mask; \
|
||||
new_mask = new_n_buckets - 1; \
|
||||
if (kh_is_map) val = h->vals[j]; \
|
||||
__ac_set_isdel_true(h->flags, j); \
|
||||
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
|
||||
khint_t k, i, step = 0; \
|
||||
k = __hash_func(key); \
|
||||
i = k & new_mask; \
|
||||
while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
|
||||
__ac_set_isempty_false(new_flags, i); \
|
||||
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
|
||||
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
|
||||
if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
|
||||
__ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
|
||||
} else { /* write the element and jump out of the loop */ \
|
||||
h->keys[i] = key; \
|
||||
if (kh_is_map) h->vals[i] = val; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
|
||||
h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
|
||||
if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
|
||||
} \
|
||||
kfree(h->flags); /* free the working space */ \
|
||||
h->flags = new_flags; \
|
||||
h->n_buckets = new_n_buckets; \
|
||||
h->n_occupied = h->size; \
|
||||
h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
|
||||
} \
|
||||
return 0; \
|
||||
} \
|
||||
SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
|
||||
{ \
|
||||
khint_t x; \
|
||||
if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
|
||||
if (h->n_buckets > (h->size<<1)) { \
|
||||
if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
|
||||
*ret = -1; return h->n_buckets; \
|
||||
} \
|
||||
} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
|
||||
*ret = -1; return h->n_buckets; \
|
||||
} \
|
||||
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
|
||||
{ \
|
||||
khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
|
||||
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
|
||||
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
|
||||
else { \
|
||||
last = i; \
|
||||
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
||||
if (__ac_isdel(h->flags, i)) site = i; \
|
||||
i = (i + (++step)) & mask; \
|
||||
if (i == last) { x = site; break; } \
|
||||
} \
|
||||
if (x == h->n_buckets) { \
|
||||
if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
|
||||
else x = i; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (__ac_isempty(h->flags, x)) { /* not present at all */ \
|
||||
h->keys[x] = key; \
|
||||
__ac_set_isboth_false(h->flags, x); \
|
||||
++h->size; ++h->n_occupied; \
|
||||
*ret = 1; \
|
||||
} else if (__ac_isdel(h->flags, x)) { /* deleted */ \
|
||||
h->keys[x] = key; \
|
||||
__ac_set_isboth_false(h->flags, x); \
|
||||
++h->size; \
|
||||
*ret = 2; \
|
||||
} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
|
||||
return x; \
|
||||
} \
|
||||
SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
|
||||
{ \
|
||||
if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
|
||||
__ac_set_isdel_true(h->flags, x); \
|
||||
--h->size; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define KHASH_DECLARE(name, khkey_t, khval_t) \
|
||||
__KHASH_TYPE(name, khkey_t, khval_t) \
|
||||
__KHASH_PROTOTYPES(name, khkey_t, khval_t)
|
||||
|
||||
#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||
__KHASH_TYPE(name, khkey_t, khval_t) \
|
||||
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
||||
|
||||
#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
||||
KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
|
||||
|
||||
/* --- BEGIN OF HASH FUNCTIONS --- */
|
||||
|
||||
/*! @function
|
||||
@abstract Integer hash function
|
||||
@param key The integer [khint32_t]
|
||||
@return The hash value [khint_t]
|
||||
*/
|
||||
#define kh_int_hash_func(key) (khint32_t)(key)
|
||||
/*! @function
|
||||
@abstract Integer comparison function
|
||||
*/
|
||||
#define kh_int_hash_equal(a, b) ((a) == (b))
|
||||
/*! @function
|
||||
@abstract 64-bit integer hash function
|
||||
@param key The integer [khint64_t]
|
||||
@return The hash value [khint_t]
|
||||
*/
|
||||
#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
|
||||
/*! @function
|
||||
@abstract 64-bit integer comparison function
|
||||
*/
|
||||
#define kh_int64_hash_equal(a, b) ((a) == (b))
|
||||
/*! @function
|
||||
@abstract const char* hash function
|
||||
@param s Pointer to a null terminated string
|
||||
@return The hash value
|
||||
*/
|
||||
static kh_inline khint_t __ac_X31_hash_string(const char *s)
|
||||
{
|
||||
khint_t h = (khint_t)*s;
|
||||
if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
|
||||
return h;
|
||||
}
|
||||
/*! @function
|
||||
@abstract Another interface to const char* hash function
|
||||
@param key Pointer to a null terminated string [const char*]
|
||||
@return The hash value [khint_t]
|
||||
*/
|
||||
#define kh_str_hash_func(key) __ac_X31_hash_string(key)
|
||||
/*! @function
|
||||
@abstract Const char* comparison function
|
||||
*/
|
||||
#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
|
||||
|
||||
static kh_inline khint_t __ac_Wang_hash(khint_t key)
|
||||
{
|
||||
key += ~(key << 15);
|
||||
key ^= (key >> 10);
|
||||
key += (key << 3);
|
||||
key ^= (key >> 6);
|
||||
key += ~(key << 11);
|
||||
key ^= (key >> 16);
|
||||
return key;
|
||||
}
|
||||
#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key)
|
||||
|
||||
/* --- END OF HASH FUNCTIONS --- */
|
||||
|
||||
/* Other convenient macros... */
|
||||
|
||||
/*!
|
||||
@abstract Type of the hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
*/
|
||||
#define khash_t(name) kh_##name##_t
|
||||
|
||||
/*! @function
|
||||
@abstract Initiate a hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@return Pointer to the hash table [khash_t(name)*]
|
||||
*/
|
||||
#define kh_init(name) kh_init_##name()
|
||||
|
||||
/*! @function
|
||||
@abstract Destroy a hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
*/
|
||||
#define kh_destroy(name, h) kh_destroy_##name(h)
|
||||
|
||||
/*! @function
|
||||
@abstract Reset a hash table without deallocating memory.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
*/
|
||||
#define kh_clear(name, h) kh_clear_##name(h)
|
||||
|
||||
/*! @function
|
||||
@abstract Resize a hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param s New size [khint_t]
|
||||
*/
|
||||
#define kh_resize(name, h, s) kh_resize_##name(h, s)
|
||||
|
||||
/*! @function
|
||||
@abstract Insert a key to the hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param k Key [type of keys]
|
||||
@param r Extra return code: -1 if the operation failed;
|
||||
0 if the key is present in the hash table;
|
||||
1 if the bucket is empty (never used); 2 if the element in
|
||||
the bucket has been deleted [int*]
|
||||
@return Iterator to the inserted element [khint_t]
|
||||
*/
|
||||
#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
|
||||
|
||||
/*! @function
|
||||
@abstract Retrieve a key from the hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param k Key [type of keys]
|
||||
@return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
|
||||
*/
|
||||
#define kh_get(name, h, k) kh_get_##name(h, k)
|
||||
|
||||
/*! @function
|
||||
@abstract Remove a key from the hash table.
|
||||
@param name Name of the hash table [symbol]
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param k Iterator to the element to be deleted [khint_t]
|
||||
*/
|
||||
#define kh_del(name, h, k) kh_del_##name(h, k)
|
||||
|
||||
/*! @function
|
||||
@abstract Test whether a bucket contains data.
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param x Iterator to the bucket [khint_t]
|
||||
@return 1 if containing data; 0 otherwise [int]
|
||||
*/
|
||||
#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
|
||||
|
||||
/*! @function
|
||||
@abstract Get key given an iterator
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param x Iterator to the bucket [khint_t]
|
||||
@return Key [type of keys]
|
||||
*/
|
||||
#define kh_key(h, x) ((h)->keys[x])
|
||||
|
||||
/*! @function
|
||||
@abstract Get value given an iterator
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param x Iterator to the bucket [khint_t]
|
||||
@return Value [type of values]
|
||||
@discussion For hash sets, calling this results in segfault.
|
||||
*/
|
||||
#define kh_val(h, x) ((h)->vals[x])
|
||||
|
||||
/*! @function
|
||||
@abstract Alias of kh_val()
|
||||
*/
|
||||
#define kh_value(h, x) ((h)->vals[x])
|
||||
|
||||
/*! @function
|
||||
@abstract Get the start iterator
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@return The start iterator [khint_t]
|
||||
*/
|
||||
#define kh_begin(h) (khint_t)(0)
|
||||
|
||||
/*! @function
|
||||
@abstract Get the end iterator
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@return The end iterator [khint_t]
|
||||
*/
|
||||
#define kh_end(h) ((h)->n_buckets)
|
||||
|
||||
/*! @function
|
||||
@abstract Get the number of elements in the hash table
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@return Number of elements in the hash table [khint_t]
|
||||
*/
|
||||
#define kh_size(h) ((h)->size)
|
||||
|
||||
/*! @function
|
||||
@abstract Get the number of buckets in the hash table
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@return Number of buckets in the hash table [khint_t]
|
||||
*/
|
||||
#define kh_n_buckets(h) ((h)->n_buckets)
|
||||
|
||||
/*! @function
|
||||
@abstract Iterate over the entries in the hash table
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param kvar Variable to which key will be assigned
|
||||
@param vvar Variable to which value will be assigned
|
||||
@param code Block of code to execute
|
||||
*/
|
||||
#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
|
||||
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
|
||||
if (!kh_exist(h,__i)) continue; \
|
||||
(kvar) = kh_key(h,__i); \
|
||||
(vvar) = kh_val(h,__i); \
|
||||
code; \
|
||||
} }
|
||||
|
||||
/*! @function
|
||||
@abstract Iterate over the values in the hash table
|
||||
@param h Pointer to the hash table [khash_t(name)*]
|
||||
@param vvar Variable to which value will be assigned
|
||||
@param code Block of code to execute
|
||||
*/
|
||||
#define kh_foreach_value(h, vvar, code) { khint_t __i; \
|
||||
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
|
||||
if (!kh_exist(h,__i)) continue; \
|
||||
(vvar) = kh_val(h,__i); \
|
||||
code; \
|
||||
} }
|
||||
|
||||
/* More conenient interfaces */
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash set containing integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
*/
|
||||
#define KHASH_SET_INIT_INT(name) \
|
||||
KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
@param khval_t Type of values [type]
|
||||
*/
|
||||
#define KHASH_MAP_INIT_INT(name, khval_t) \
|
||||
KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing 64-bit integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
*/
|
||||
#define KHASH_SET_INIT_INT64(name) \
|
||||
KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing 64-bit integer keys
|
||||
@param name Name of the hash table [symbol]
|
||||
@param khval_t Type of values [type]
|
||||
*/
|
||||
#define KHASH_MAP_INIT_INT64(name, khval_t) \
|
||||
KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
|
||||
|
||||
typedef const char *kh_cstr_t;
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing const char* keys
|
||||
@param name Name of the hash table [symbol]
|
||||
*/
|
||||
#define KHASH_SET_INIT_STR(name) \
|
||||
KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
|
||||
|
||||
/*! @function
|
||||
@abstract Instantiate a hash map containing const char* keys
|
||||
@param name Name of the hash table [symbol]
|
||||
@param khval_t Type of values [type]
|
||||
*/
|
||||
#define KHASH_MAP_INIT_STR(name, khval_t) \
|
||||
KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
|
||||
|
||||
#endif /* __AC_KHASH_H */
|
BIN
twml/libtwml/src/lib/internal/linear_search.docx
Normal file
BIN
twml/libtwml/src/lib/internal/linear_search.docx
Normal file
Binary file not shown.
@ -1,17 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <twml/optim.h>
|
||||
namespace twml {
|
||||
|
||||
template<typename Tx>
|
||||
static int64_t linear_search(const Tx *xsData, const Tx val, const int64_t mainSize) {
|
||||
int64_t left = 0;
|
||||
int64_t right = mainSize-1;
|
||||
while(left <= right && val > xsData[left])
|
||||
left++;
|
||||
return left;
|
||||
}
|
||||
|
||||
} // namespace twml
|
||||
#endif
|
BIN
twml/libtwml/src/lib/internal/murmur_hash3.docx
Normal file
BIN
twml/libtwml/src/lib/internal/murmur_hash3.docx
Normal file
Binary file not shown.
@ -1,37 +0,0 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
#ifndef _MURMURHASH3_H_
|
||||
#define _MURMURHASH3_H_
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Platform-specific functions and macros
|
||||
|
||||
// Microsoft Visual Studio
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1600)
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
|
||||
// Other compilers
|
||||
|
||||
#else // defined(_MSC_VER)
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#endif // !defined(_MSC_VER)
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
|
||||
|
||||
void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
|
||||
|
||||
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#endif // _MURMURHASH3_H_
|
BIN
twml/libtwml/src/lib/internal/thrift.docx
Normal file
BIN
twml/libtwml/src/lib/internal/thrift.docx
Normal file
Binary file not shown.
@ -1,69 +0,0 @@
|
||||
// For details of how to encode and decode thrift, check
|
||||
// https://github.com/apache/thrift/blob/master/doc/specs/thrift-binary-protocol.md
|
||||
|
||||
// Definitions of the thrift binary format
|
||||
typedef enum {
|
||||
TTYPE_STOP = 0,
|
||||
TTYPE_VOID = 1,
|
||||
TTYPE_BOOL = 2,
|
||||
TTYPE_BYTE = 3,
|
||||
TTYPE_DOUBLE = 4,
|
||||
TTYPE_I16 = 6,
|
||||
TTYPE_I32 = 8,
|
||||
TTYPE_I64 = 10,
|
||||
TTYPE_STRING = 11,
|
||||
TTYPE_STRUCT = 12,
|
||||
TTYPE_MAP = 13,
|
||||
TTYPE_SET = 14,
|
||||
TTYPE_LIST = 15,
|
||||
TTYPE_ENUM = 16,
|
||||
} TTYPES;
|
||||
|
||||
// Fields of a batch prediction response
|
||||
typedef enum {
|
||||
BPR_DUMMY ,
|
||||
BPR_PREDICTIONS,
|
||||
} BPR_FIELDS;
|
||||
|
||||
// Fields of a datarecord
|
||||
typedef enum {
|
||||
DR_CROSS , // fake field for crosses
|
||||
DR_BINARY ,
|
||||
DR_CONTINUOUS ,
|
||||
DR_DISCRETE ,
|
||||
DR_STRING ,
|
||||
DR_SPARSE_BINARY ,
|
||||
DR_SPARSE_CONTINUOUS ,
|
||||
DR_BLOB ,
|
||||
DR_GENERAL_TENSOR ,
|
||||
DR_SPARSE_TENSOR ,
|
||||
} DR_FIELDS;
|
||||
|
||||
// Fields for General tensor
|
||||
typedef enum {
|
||||
GT_DUMMY , // dummy field
|
||||
GT_RAW ,
|
||||
GT_STRING ,
|
||||
GT_INT32 ,
|
||||
GT_INT64 ,
|
||||
GT_FLOAT ,
|
||||
GT_DOUBLE ,
|
||||
GT_BOOL ,
|
||||
} GT_FIELDS;
|
||||
|
||||
typedef enum {
|
||||
SP_DUMMY , // dummy field
|
||||
SP_COO ,
|
||||
} SP_FIELDS;
|
||||
|
||||
// Enum values from tensor.thrift
|
||||
typedef enum {
|
||||
DATA_TYPE_FLOAT ,
|
||||
DATA_TYPE_DOUBLE ,
|
||||
DATA_TYPE_INT32 ,
|
||||
DATA_TYPE_INT64 ,
|
||||
DATA_TYPE_UINT8 ,
|
||||
DATA_TYPE_STRING ,
|
||||
DATA_TYPE_BYTE ,
|
||||
DATA_TYPE_BOOL ,
|
||||
} DATA_TYPES;
|
BIN
twml/libtwml/src/lib/internal/utf_converter.docx
Normal file
BIN
twml/libtwml/src/lib/internal/utf_converter.docx
Normal file
Binary file not shown.
@ -1,10 +0,0 @@
|
||||
#ifndef _UTF_CONVERTER_H_
|
||||
#define _UTF_CONVERTER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
ssize_t utf8_to_utf16(const uint8_t *in, uint64_t in_len, uint16_t *out, uint64_t max_out);
|
||||
|
||||
#endif
|
@ -1,61 +0,0 @@
|
||||
#include <twml/io/IOError.h>
|
||||
|
||||
|
||||
namespace twml {
|
||||
namespace io {
|
||||
|
||||
namespace {
|
||||
std::string messageFromStatus(IOError::Status status) {
|
||||
switch (status) {
|
||||
case IOError::OUT_OF_RANGE:
|
||||
return "failed to read enough input";
|
||||
case IOError::WRONG_MAGIC:
|
||||
return "wrong magic in stream";
|
||||
case IOError::WRONG_HEADER:
|
||||
return "wrong header in stream";
|
||||
case IOError::ERROR_HEADER_CHECKSUM:
|
||||
return "header checksum doesn't match";
|
||||
case IOError::INVALID_METHOD:
|
||||
return "using invalid method";
|
||||
case IOError::USING_RESERVED:
|
||||
return "using reserved flag";
|
||||
case IOError::ERROR_HEADER_EXTRA_FIELD_CHECKSUM:
|
||||
return "extra header field checksum doesn't match";
|
||||
case IOError::CANT_FIT_OUTPUT:
|
||||
return "can't fit output in the given space";
|
||||
case IOError::SPLIT_FILE:
|
||||
return "split files aren't supported";
|
||||
case IOError::BLOCK_SIZE_TOO_LARGE:
|
||||
return "block size is too large";
|
||||
case IOError::SOURCE_LARGER_THAN_DESTINATION:
|
||||
return "source is larger than destination";
|
||||
case IOError::DESTINATION_LARGER_THAN_CAPACITY:
|
||||
return "destination buffer is too small to fit uncompressed result";
|
||||
case IOError::HEADER_FLAG_MISMATCH:
|
||||
return "failed to match flags for compressed and decompressed data";
|
||||
case IOError::NOT_ENOUGH_INPUT:
|
||||
return "not enough input to proceed with decompression";
|
||||
case IOError::ERROR_SOURCE_BLOCK_CHECKSUM:
|
||||
return "source block checksum doesn't match";
|
||||
case IOError::COMPRESSED_DATA_VIOLATION:
|
||||
return "error occurred while decompressing the data";
|
||||
case IOError::ERROR_DESTINATION_BLOCK_CHECKSUM:
|
||||
return "destination block checksum doesn't match";
|
||||
case IOError::EMPTY_RECORD:
|
||||
return "can't write an empty record";
|
||||
case IOError::MALFORMED_MEMORY_RECORD:
|
||||
return "can't write malformed record";
|
||||
case IOError::UNSUPPORTED_OUTPUT_TYPE:
|
||||
return "output data type is not supported";
|
||||
case IOError::OTHER_ERROR:
|
||||
default:
|
||||
return "unknown error occurred";
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
IOError::IOError(Status status): twml::Error(TWML_ERR_IO, "Found error while processing stream: " +
|
||||
messageFromStatus(status)), m_status(status) {}
|
||||
|
||||
} // namespace io
|
||||
} // namespace twml
|
BIN
twml/libtwml/src/lib/io/IOError.docx
Normal file
BIN
twml/libtwml/src/lib/io/IOError.docx
Normal file
Binary file not shown.
@ -1,335 +0,0 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||
// domain. The author hereby disclaims copyright to this source code.
|
||||
|
||||
// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
||||
// algorithms are optimized for their respective platforms. You can still
|
||||
// compile and run any of them on any platform, but your performance with the
|
||||
// non-native version will be less than optimal.
|
||||
|
||||
#include "internal/murmur_hash3.h"
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Platform-specific functions and macros
|
||||
|
||||
// Microsoft Visual Studio
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#define FORCE_INLINE __forceinline
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ROTL32(x,y) _rotl(x,y)
|
||||
#define ROTL64(x,y) _rotl64(x,y)
|
||||
|
||||
#define BIG_CONSTANT(x) (x)
|
||||
|
||||
// Other compilers
|
||||
|
||||
#else // defined(_MSC_VER)
|
||||
|
||||
#define FORCE_INLINE inline __attribute__((always_inline))
|
||||
|
||||
FORCE_INLINE uint32_t rotl32 ( uint32_t x, int8_t r )
|
||||
{
|
||||
return (x << r) | (x >> (32 - r));
|
||||
}
|
||||
|
||||
FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
|
||||
{
|
||||
return (x << r) | (x >> (64 - r));
|
||||
}
|
||||
|
||||
#define ROTL32(x,y) rotl32(x,y)
|
||||
#define ROTL64(x,y) rotl64(x,y)
|
||||
|
||||
#define BIG_CONSTANT(x) (x##LLU)
|
||||
|
||||
#endif // !defined(_MSC_VER)
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Block read - if your platform needs to do endian-swapping or can only
|
||||
// handle aligned reads, do the conversion here
|
||||
|
||||
FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
|
||||
{
|
||||
return p[i];
|
||||
}
|
||||
|
||||
FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
|
||||
{
|
||||
return p[i];
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Finalization mix - force all bits of a hash block to avalanche
|
||||
|
||||
FORCE_INLINE uint32_t fmix32 ( uint32_t h )
|
||||
{
|
||||
h ^= h >> 16;
|
||||
h *= 0x85ebca6b;
|
||||
h ^= h >> 13;
|
||||
h *= 0xc2b2ae35;
|
||||
h ^= h >> 16;
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
//----------
|
||||
|
||||
FORCE_INLINE uint64_t fmix64 ( uint64_t k )
|
||||
{
|
||||
k ^= k >> 33;
|
||||
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
||||
k ^= k >> 33;
|
||||
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
||||
k ^= k >> 33;
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void MurmurHash3_x86_32 ( const void * key, int len,
|
||||
uint32_t seed, void * out )
|
||||
{
|
||||
const uint8_t * data = (const uint8_t*)key;
|
||||
const int nblocks = len / 4;
|
||||
|
||||
uint32_t h1 = seed;
|
||||
|
||||
const uint32_t c1 = 0xcc9e2d51;
|
||||
const uint32_t c2 = 0x1b873593;
|
||||
|
||||
//----------
|
||||
// body
|
||||
|
||||
const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
|
||||
|
||||
for(int i = -nblocks; i; i++)
|
||||
{
|
||||
uint32_t k1 = getblock32(blocks,i);
|
||||
|
||||
k1 *= c1;
|
||||
k1 = ROTL32(k1,15);
|
||||
k1 *= c2;
|
||||
|
||||
h1 ^= k1;
|
||||
h1 = ROTL32(h1,13);
|
||||
h1 = h1*5+0xe6546b64;
|
||||
}
|
||||
|
||||
//----------
|
||||
// tail
|
||||
|
||||
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
|
||||
|
||||
uint32_t k1 = 0;
|
||||
|
||||
switch(len & 3)
|
||||
{
|
||||
case 3: k1 ^= tail[2] << 16;
|
||||
case 2: k1 ^= tail[1] << 8;
|
||||
case 1: k1 ^= tail[0];
|
||||
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
|
||||
};
|
||||
|
||||
//----------
|
||||
// finalization
|
||||
|
||||
h1 ^= len;
|
||||
|
||||
h1 = fmix32(h1);
|
||||
|
||||
*(uint32_t*)out = h1;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void MurmurHash3_x86_128 ( const void * key, const int len,
|
||||
uint32_t seed, void * out )
|
||||
{
|
||||
const uint8_t * data = (const uint8_t*)key;
|
||||
const int nblocks = len / 16;
|
||||
|
||||
uint32_t h1 = seed;
|
||||
uint32_t h2 = seed;
|
||||
uint32_t h3 = seed;
|
||||
uint32_t h4 = seed;
|
||||
|
||||
const uint32_t c1 = 0x239b961b;
|
||||
const uint32_t c2 = 0xab0e9789;
|
||||
const uint32_t c3 = 0x38b34ae5;
|
||||
const uint32_t c4 = 0xa1e38b93;
|
||||
|
||||
//----------
|
||||
// body
|
||||
|
||||
const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
|
||||
|
||||
for(int i = -nblocks; i; i++)
|
||||
{
|
||||
uint32_t k1 = getblock32(blocks,i*4+0);
|
||||
uint32_t k2 = getblock32(blocks,i*4+1);
|
||||
uint32_t k3 = getblock32(blocks,i*4+2);
|
||||
uint32_t k4 = getblock32(blocks,i*4+3);
|
||||
|
||||
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
|
||||
|
||||
h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
|
||||
|
||||
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
|
||||
|
||||
h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
|
||||
|
||||
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
|
||||
|
||||
h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
|
||||
|
||||
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
|
||||
|
||||
h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
|
||||
}
|
||||
|
||||
//----------
|
||||
// tail
|
||||
|
||||
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
||||
|
||||
uint32_t k1 = 0;
|
||||
uint32_t k2 = 0;
|
||||
uint32_t k3 = 0;
|
||||
uint32_t k4 = 0;
|
||||
|
||||
switch(len & 15)
|
||||
{
|
||||
case 15: k4 ^= tail[14] << 16;
|
||||
case 14: k4 ^= tail[13] << 8;
|
||||
case 13: k4 ^= tail[12] << 0;
|
||||
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
|
||||
|
||||
case 12: k3 ^= tail[11] << 24;
|
||||
case 11: k3 ^= tail[10] << 16;
|
||||
case 10: k3 ^= tail[ 9] << 8;
|
||||
case 9: k3 ^= tail[ 8] << 0;
|
||||
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
|
||||
|
||||
case 8: k2 ^= tail[ 7] << 24;
|
||||
case 7: k2 ^= tail[ 6] << 16;
|
||||
case 6: k2 ^= tail[ 5] << 8;
|
||||
case 5: k2 ^= tail[ 4] << 0;
|
||||
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
|
||||
|
||||
case 4: k1 ^= tail[ 3] << 24;
|
||||
case 3: k1 ^= tail[ 2] << 16;
|
||||
case 2: k1 ^= tail[ 1] << 8;
|
||||
case 1: k1 ^= tail[ 0] << 0;
|
||||
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
|
||||
};
|
||||
|
||||
//----------
|
||||
// finalization
|
||||
|
||||
h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
|
||||
|
||||
h1 += h2; h1 += h3; h1 += h4;
|
||||
h2 += h1; h3 += h1; h4 += h1;
|
||||
|
||||
h1 = fmix32(h1);
|
||||
h2 = fmix32(h2);
|
||||
h3 = fmix32(h3);
|
||||
h4 = fmix32(h4);
|
||||
|
||||
h1 += h2; h1 += h3; h1 += h4;
|
||||
h2 += h1; h3 += h1; h4 += h1;
|
||||
|
||||
((uint32_t*)out)[0] = h1;
|
||||
((uint32_t*)out)[1] = h2;
|
||||
((uint32_t*)out)[2] = h3;
|
||||
((uint32_t*)out)[3] = h4;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void MurmurHash3_x64_128 ( const void * key, const int len,
|
||||
const uint32_t seed, void * out )
|
||||
{
|
||||
const uint8_t * data = (const uint8_t*)key;
|
||||
const int nblocks = len / 16;
|
||||
|
||||
uint64_t h1 = seed;
|
||||
uint64_t h2 = seed;
|
||||
|
||||
const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
|
||||
const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
|
||||
|
||||
//----------
|
||||
// body
|
||||
|
||||
const uint64_t * blocks = (const uint64_t *)(data);
|
||||
|
||||
for(int i = 0; i < nblocks; i++)
|
||||
{
|
||||
uint64_t k1 = getblock64(blocks,i*2+0);
|
||||
uint64_t k2 = getblock64(blocks,i*2+1);
|
||||
|
||||
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
||||
|
||||
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
|
||||
|
||||
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
||||
|
||||
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
||||
}
|
||||
|
||||
//----------
|
||||
// tail
|
||||
|
||||
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
|
||||
|
||||
uint64_t k1 = 0;
|
||||
uint64_t k2 = 0;
|
||||
|
||||
switch(len & 15)
|
||||
{
|
||||
case 15: k2 ^= ((uint64_t)tail[14]) << 48;
|
||||
case 14: k2 ^= ((uint64_t)tail[13]) << 40;
|
||||
case 13: k2 ^= ((uint64_t)tail[12]) << 32;
|
||||
case 12: k2 ^= ((uint64_t)tail[11]) << 24;
|
||||
case 11: k2 ^= ((uint64_t)tail[10]) << 16;
|
||||
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
|
||||
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
|
||||
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
||||
|
||||
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56;
|
||||
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48;
|
||||
case 6: k1 ^= ((uint64_t)tail[ 5]) << 40;
|
||||
case 5: k1 ^= ((uint64_t)tail[ 4]) << 32;
|
||||
case 4: k1 ^= ((uint64_t)tail[ 3]) << 24;
|
||||
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16;
|
||||
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8;
|
||||
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
|
||||
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
||||
};
|
||||
|
||||
//----------
|
||||
// finalization
|
||||
|
||||
h1 ^= len; h2 ^= len;
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
h1 = fmix64(h1);
|
||||
h2 = fmix64(h2);
|
||||
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
((uint64_t*)out)[0] = h1;
|
||||
((uint64_t*)out)[1] = h2;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
BIN
twml/libtwml/src/lib/murmur_hash3.docx
Normal file
BIN
twml/libtwml/src/lib/murmur_hash3.docx
Normal file
Binary file not shown.
@ -1,274 +0,0 @@
|
||||
#include "internal/interpolate.h"
|
||||
#include "internal/error.h"
|
||||
#include <twml/optim.h>
|
||||
|
||||
namespace twml {
|
||||
template<typename T>
|
||||
void mdlInfer(Tensor &output_keys, Tensor &output_vals,
|
||||
const Tensor &input_keys, const Tensor &input_vals,
|
||||
const Tensor &bin_ids,
|
||||
const Tensor &bin_vals,
|
||||
const Tensor &feature_offsets,
|
||||
bool return_bin_indices) {
|
||||
auto okeysData = output_keys.getData<int64_t>();
|
||||
auto ovalsData = output_vals.getData<T>();
|
||||
uint64_t okeysStride = output_keys.getStride(0);
|
||||
uint64_t ovaluesStride = output_vals.getStride(0);
|
||||
|
||||
auto ikeysData = input_keys.getData<int64_t>();
|
||||
auto ivalsData = input_vals.getData<T>();
|
||||
uint64_t ikeysStride = input_keys.getStride(0);
|
||||
uint64_t ivaluesStride = input_vals.getStride(0);
|
||||
|
||||
auto xsData = bin_vals.getData<T>();
|
||||
auto ysData = bin_ids.getData<int64_t>();
|
||||
uint64_t xsStride = bin_vals.getStride(0);
|
||||
uint64_t ysStride = bin_ids.getStride(0);
|
||||
|
||||
auto offsetData = feature_offsets.getData<int64_t>();
|
||||
|
||||
uint64_t size = input_keys.getDim(0);
|
||||
uint64_t total_bins = bin_ids.getNumElements();
|
||||
uint64_t fsize = feature_offsets.getNumElements();
|
||||
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
int64_t ikey = ikeysData[i * ikeysStride] - TWML_INDEX_BASE;
|
||||
T val = ivalsData[i * ivaluesStride];
|
||||
if (ikey == -1) {
|
||||
ovalsData[i * ovaluesStride] = val;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Perform interpolation
|
||||
uint64_t offset = offsetData[ikey];
|
||||
uint64_t next_offset = (ikey == (int64_t)(fsize - 1)) ? total_bins : offsetData[ikey + 1];
|
||||
uint64_t mainSize = next_offset - offset;
|
||||
|
||||
const T *lxsData = xsData + offset;
|
||||
const int64_t *lysData = ysData + offset;
|
||||
int64_t okey = interpolation<T, int64_t>(lxsData, xsStride,
|
||||
lysData, ysStride,
|
||||
val, mainSize, NEAREST, 0,
|
||||
return_bin_indices);
|
||||
okeysData[i * okeysStride] = okey + TWML_INDEX_BASE;
|
||||
ovalsData[i * ovaluesStride] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void mdlInfer(Tensor &output_keys, Tensor &output_vals,
|
||||
const Tensor &input_keys, const Tensor &input_vals,
|
||||
const Tensor &bin_ids,
|
||||
const Tensor &bin_vals,
|
||||
const Tensor &feature_offsets,
|
||||
bool return_bin_indices) {
|
||||
if (input_keys.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "input_keys must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (output_keys.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "output_keys must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (bin_ids.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "bin_ids must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (feature_offsets.getType() != TWML_TYPE_INT64) {
|
||||
throw twml::Error(TWML_ERR_TYPE, "bin_ids must be a Long Tensor");
|
||||
}
|
||||
|
||||
if (input_vals.getType() != bin_vals.getType()) {
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"Data type of input_vals does not match type of bin_vals");
|
||||
}
|
||||
|
||||
if (bin_vals.getNumDims() != 1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"bin_vals must be 1 Dimensional");
|
||||
}
|
||||
|
||||
if (bin_ids.getNumDims() != 1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"bin_ids must be 1 Dimensional");
|
||||
}
|
||||
|
||||
if (bin_vals.getNumElements() != bin_ids.getNumElements()) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"Dimensions of bin_vals and bin_ids do not match");
|
||||
}
|
||||
|
||||
if (feature_offsets.getStride(0) != 1) {
|
||||
throw twml::Error(TWML_ERR_SIZE,
|
||||
"feature_offsets must be contiguous");
|
||||
}
|
||||
|
||||
switch (input_vals.getType()) {
|
||||
case TWML_TYPE_FLOAT:
|
||||
twml::mdlInfer<float>(output_keys, output_vals,
|
||||
input_keys, input_vals,
|
||||
bin_ids, bin_vals, feature_offsets,
|
||||
return_bin_indices);
|
||||
break;
|
||||
case TWML_TYPE_DOUBLE:
|
||||
twml::mdlInfer<double>(output_keys, output_vals,
|
||||
input_keys, input_vals,
|
||||
bin_ids, bin_vals, feature_offsets,
|
||||
return_bin_indices);
|
||||
break;
|
||||
default:
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"Unsupported datatype for mdlInfer");
|
||||
}
|
||||
}
|
||||
|
||||
const int DEFAULT_INTERPOLATION_LOWEST = 0;
|
||||
/**
|
||||
* @param output tensor to hold linear or nearest interpolation output.
|
||||
* This function does not allocate space.
|
||||
* The output tensor must have space allcoated.
|
||||
* @param input input tensor; size must match output.
|
||||
* input is assumed to have size [batch_size, number_of_labels].
|
||||
* @param xs the bins.
|
||||
* @param ys the values for the bins.
|
||||
* @param mode: linear or nearest InterpolationMode.
|
||||
* linear is used for isotonic calibration.
|
||||
* nearest is used for MDL calibration and MDL inference.
|
||||
*
|
||||
* @return Returns nothing. Output is stored into the output tensor.
|
||||
*
|
||||
* This is used by IsotonicCalibration inference.
|
||||
*/
|
||||
template <typename T>
|
||||
void interpolation(
|
||||
Tensor output,
|
||||
const Tensor input,
|
||||
const Tensor xs,
|
||||
const Tensor ys,
|
||||
const InterpolationMode mode) {
|
||||
// Sanity check: input and output should have two dims.
|
||||
if (input.getNumDims() != 2 || output.getNumDims() != 2) {
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"input and output should have 2 dimensions.");
|
||||
}
|
||||
|
||||
// Sanity check: input and output size should match.
|
||||
for (int i = 0; i < input.getNumDims(); i++) {
|
||||
if (input.getDim(i) != output.getDim(i)) {
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"input and output mismatch in size.");
|
||||
}
|
||||
}
|
||||
|
||||
// Sanity check: number of labels in input should match
|
||||
// number of labels in xs / ys.
|
||||
if (input.getDim(1) != xs.getDim(0)
|
||||
|| input.getDim(1) != ys.getDim(0)) {
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"input, xs, ys should have the same number of labels.");
|
||||
}
|
||||
|
||||
const uint64_t inputStride0 = input.getStride(0);
|
||||
const uint64_t inputStride1 = input.getStride(1);
|
||||
const uint64_t outputStride0 = output.getStride(0);
|
||||
const uint64_t outputStride1 = output.getStride(1);
|
||||
const uint64_t xsStride0 = xs.getStride(0);
|
||||
const uint64_t xsStride1 = xs.getStride(1);
|
||||
const uint64_t ysStride0 = ys.getStride(0);
|
||||
const uint64_t ysStride1 = ys.getStride(1);
|
||||
const uint64_t mainSize = xs.getDim(1);
|
||||
|
||||
// for each value in the input matrix, compute output value by
|
||||
// calling interpolation.
|
||||
auto inputData = input.getData<T>();
|
||||
auto outputData = output.getData<T>();
|
||||
auto xsData = xs.getData<T>();
|
||||
auto ysData = ys.getData<T>();
|
||||
|
||||
for (uint64_t i = 0; i < input.getDim(0); i++) {
|
||||
for (uint64_t j = 0; j < input.getDim(1); j++) {
|
||||
const T val = inputData[i * inputStride0 + j * inputStride1];
|
||||
const T *lxsData = xsData + j * xsStride0;
|
||||
const T *lysData = ysData + j * ysStride0;
|
||||
const T res = interpolation(
|
||||
lxsData, xsStride1,
|
||||
lysData, ysStride1,
|
||||
val,
|
||||
mainSize,
|
||||
mode,
|
||||
DEFAULT_INTERPOLATION_LOWEST);
|
||||
outputData[i * outputStride0 + j * outputStride1] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void linearInterpolation(
|
||||
Tensor output,
|
||||
const Tensor input,
|
||||
const Tensor xs,
|
||||
const Tensor ys) {
|
||||
switch (input.getType()) {
|
||||
case TWML_TYPE_FLOAT:
|
||||
twml::interpolation<float>(output, input, xs, ys, LINEAR);
|
||||
break;
|
||||
case TWML_TYPE_DOUBLE:
|
||||
twml::interpolation<double>(output, input, xs, ys, LINEAR);
|
||||
break;
|
||||
default:
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"Unsupported datatype for linearInterpolation.");
|
||||
}
|
||||
}
|
||||
|
||||
void nearestInterpolation(
|
||||
Tensor output,
|
||||
const Tensor input,
|
||||
const Tensor xs,
|
||||
const Tensor ys) {
|
||||
switch (input.getType()) {
|
||||
case TWML_TYPE_FLOAT:
|
||||
twml::interpolation<float>(output, input, xs, ys, NEAREST);
|
||||
break;
|
||||
case TWML_TYPE_DOUBLE:
|
||||
twml::interpolation<double>(output, input, xs, ys, NEAREST);
|
||||
break;
|
||||
default:
|
||||
throw twml::Error(TWML_ERR_TYPE,
|
||||
"Unsupported datatype for nearestInterpolation.");
|
||||
}
|
||||
}
|
||||
} // namespace twml
|
||||
|
||||
twml_err twml_optim_mdl_infer(twml_tensor output_keys,
|
||||
twml_tensor output_vals,
|
||||
const twml_tensor input_keys,
|
||||
const twml_tensor input_vals,
|
||||
const twml_tensor bin_ids,
|
||||
const twml_tensor bin_vals,
|
||||
const twml_tensor feature_offsets,
|
||||
bool return_bin_indices) {
|
||||
HANDLE_EXCEPTIONS(
|
||||
using namespace twml;
|
||||
mdlInfer(*getTensor(output_keys),
|
||||
*getTensor(output_vals),
|
||||
*getConstTensor(input_keys),
|
||||
*getConstTensor(input_vals),
|
||||
*getConstTensor(bin_ids),
|
||||
*getConstTensor(bin_vals),
|
||||
*getConstTensor(feature_offsets),
|
||||
return_bin_indices););
|
||||
return TWML_ERR_NONE;
|
||||
}
|
||||
|
||||
twml_err twml_optim_nearest_interpolation(
|
||||
twml_tensor output,
|
||||
const twml_tensor input,
|
||||
const twml_tensor xs,
|
||||
const twml_tensor ys) {
|
||||
HANDLE_EXCEPTIONS(
|
||||
using namespace twml;
|
||||
nearestInterpolation(*getTensor(output),
|
||||
*getConstTensor(input),
|
||||
*getConstTensor(xs),
|
||||
*getConstTensor(ys)););
|
||||
return TWML_ERR_NONE;
|
||||
}
|
BIN
twml/libtwml/src/lib/optim.docx
Normal file
BIN
twml/libtwml/src/lib/optim.docx
Normal file
Binary file not shown.
@ -1,53 +0,0 @@
|
||||
#include "internal/utf_converter.h"
|
||||
|
||||
ssize_t utf8_to_utf16(const uint8_t *in, uint64_t in_len, uint16_t *out, uint64_t max_out) {
|
||||
uint64_t num_out = 0;
|
||||
uint64_t num_in = 0;
|
||||
while (num_in < in_len) {
|
||||
uint32_t uni;
|
||||
uint64_t todo;
|
||||
uint8_t ch = in[num_in];
|
||||
num_in++;
|
||||
if (ch <= 0x7F) {
|
||||
uni = ch;
|
||||
todo = 0;
|
||||
} else if (ch <= 0xBF) {
|
||||
return -1;
|
||||
} else if (ch <= 0xDF) {
|
||||
uni = ch & 0x1F;
|
||||
todo = 1;
|
||||
} else if (ch <= 0xEF) {
|
||||
uni = ch & 0x0F;
|
||||
todo = 2;
|
||||
} else if (ch <= 0xF7) {
|
||||
uni = ch & 0x07;
|
||||
todo = 3;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
for (uint64_t j = 0; j < todo; ++j) {
|
||||
if (num_in == in_len) return -1;
|
||||
uint8_t ch = in[num_in];
|
||||
num_in++;
|
||||
if (ch < 0x80 || ch > 0xBF) return -1;
|
||||
uni <<= 6;
|
||||
uni += ch & 0x3F;
|
||||
}
|
||||
if (uni >= 0xD800 && uni <= 0xDFFF) return -1;
|
||||
if (uni > 0x10FFFF) return -1;
|
||||
if (uni <= 0xFFFF) {
|
||||
if (num_out == max_out) return -1;
|
||||
out[num_out] = uni;
|
||||
num_out++;
|
||||
} else {
|
||||
uni -= 0x10000;
|
||||
if (num_out + 1 >= max_out) return -1;
|
||||
out[num_out] = (uni >> 10) + 0xD800;
|
||||
out[num_out + 1] = (uni & 0x3FF) + 0xDC00;
|
||||
num_out += 2;
|
||||
}
|
||||
}
|
||||
if (num_out == max_out) return -1;
|
||||
out[num_out] = 0;
|
||||
return num_out;
|
||||
}
|
BIN
twml/libtwml/src/lib/utf_converter.docx
Normal file
BIN
twml/libtwml/src/lib/utf_converter.docx
Normal file
Binary file not shown.
BIN
twml/libtwml/src/ops/CMakeLists.docx
Normal file
BIN
twml/libtwml/src/ops/CMakeLists.docx
Normal file
Binary file not shown.
@ -1,79 +0,0 @@
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})
|
||||
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
|
||||
cmake_policy(VERSION 2.8)
|
||||
set(CMAKE_MACOSX_RPATH 1)
|
||||
|
||||
file(GLOB_RECURSE sources *.cpp)
|
||||
|
||||
set (CMAKE_CXX_FLAGS "-Wall -std=c++11 -fno-stack-protector ${CMAKE_CXX_FLAGS}")
|
||||
|
||||
execute_process(
|
||||
COMMAND
|
||||
$ENV{LIBTWML_HOME}/src/ops/scripts/get_inc.sh
|
||||
RESULT_VARIABLE
|
||||
TF_RES
|
||||
OUTPUT_VARIABLE
|
||||
TF_INC)
|
||||
|
||||
if (NOT (${TF_RES} EQUAL "0"))
|
||||
message(${TF_RES})
|
||||
message(FATAL_ERROR "Failed to get include path for tensorflow")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND
|
||||
$ENV{LIBTWML_HOME}/src/ops/scripts/get_lib.sh
|
||||
RESULT_VARIABLE
|
||||
TF_RES
|
||||
OUTPUT_VARIABLE
|
||||
TF_LIB)
|
||||
|
||||
if (NOT (${TF_RES} EQUAL "0"))
|
||||
message(${TF_RES})
|
||||
message(FATAL_ERROR "Failed to get lib path for tensorflow")
|
||||
endif()
|
||||
|
||||
find_path(
|
||||
TWML_INC
|
||||
NAMES "twml.h"
|
||||
PATHS $ENV{LIBTWML_HOME}/include)
|
||||
|
||||
add_library(twml_tf MODULE ${sources})
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "$ENV{LIBTWML_HOME}/cmake")
|
||||
|
||||
if (UNIX)
|
||||
if (APPLE)
|
||||
set (CMAKE_CXX_FLAGS "-undefined dynamic_lookup -stdlib=libc++ ${CMAKE_CXX_FLAGS}")
|
||||
# -Wl,-all_load ensures symbols not used by twml_tf are also included.
|
||||
# -Wl,-noall_load limits the scope of the previous flag.
|
||||
set (LINK_ALL_OPTION "-Wl,-all_load")
|
||||
set (NO_LINK_ALL_OPTION "-Wl,-noall_load")
|
||||
set(TF_FRAMEWORK_LIB ${TF_LIB}/libtensorflow_framework.1.dylib)
|
||||
else()
|
||||
# -Wl,--whole-archive ensures symbols not used by twml_tf are also included.
|
||||
# -Wl,--no-whole-archive limits the scope of the previous flag.
|
||||
set (LINK_ALL_OPTION "-Wl,--whole-archive")
|
||||
set (NO_LINK_ALL_OPTION "-Wl,--no-whole-archive")
|
||||
set(TF_FRAMEWORK_LIB ${TF_LIB}/libtensorflow_framework.so.1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
target_include_directories(
|
||||
twml_tf
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${TWML_INC}
|
||||
# TF_INC needs to be the last to avoid some weird white-spacing issues with generated Makefile.
|
||||
${TF_INC} # Needed because of some header files auto-generated during build time.
|
||||
${TF_INC}/external/nsync/public/
|
||||
)
|
||||
|
||||
target_link_libraries(twml_tf
|
||||
PUBLIC
|
||||
# Since we are using twml_tf as the "one" dynamic library,
|
||||
# we want it to have the C function symbols needed for other functions as well.
|
||||
${LINK_ALL_OPTION} twml ${NO_LINK_ALL_OPTION}
|
||||
${TF_FRAMEWORK_LIB}
|
||||
)
|
@ -1,92 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("Add1")
|
||||
.Attr("T: {float, double, int32}")
|
||||
.Input("input1: T")
|
||||
.Output("output: T")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
c->set_output(0, c->input(0));
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
|
||||
template<typename T>
|
||||
class Add1 : public OpKernel {
|
||||
public:
|
||||
explicit Add1(OpKernelConstruction* context) : OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Grab the input tensor
|
||||
const Tensor& input_tensor = context->input(0);
|
||||
auto input = input_tensor.flat<T>();
|
||||
|
||||
// Create an output tensor
|
||||
Tensor* output_tensor = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
|
||||
&output_tensor));
|
||||
auto output_flat = output_tensor->flat<T>();
|
||||
|
||||
// Add 1 to input and assign to output
|
||||
const int N = input.size();
|
||||
for (int i = 0; i < N; i++) {
|
||||
output_flat(i) = input(i) + 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
REGISTER_OP("Add1Grad")
|
||||
.Attr("T: {float, double, int32}")
|
||||
.Input("grad_output: T")
|
||||
.Output("grad_input: T")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
c->set_output(0, c->input(0));
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
template<typename T>
|
||||
class Add1Grad : public OpKernel {
|
||||
public:
|
||||
explicit Add1Grad(OpKernelConstruction* context) : OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Grab the input tensor
|
||||
const Tensor& grad_output_tensor = context->input(0);
|
||||
auto grad_output = grad_output_tensor.flat<T>();
|
||||
|
||||
// Create an grad_input tensor
|
||||
Tensor* grad_input_tensor = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, grad_output_tensor.shape(),
|
||||
&grad_input_tensor));
|
||||
|
||||
auto grad_input_flat = grad_input_tensor->flat<T>();
|
||||
|
||||
// Copy from grad_output to grad_input
|
||||
const int N = grad_output.size();
|
||||
for (int i = 0; i < N; i++) {
|
||||
grad_input_flat(i) = grad_output(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define REGISTER(Type) \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Add1") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
Add1<Type>); \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Add1Grad") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
Add1Grad<Type>); \
|
||||
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
||||
REGISTER(int32);
|
BIN
twml/libtwml/src/ops/add1.docx
Normal file
BIN
twml/libtwml/src/ops/add1.docx
Normal file
Binary file not shown.
@ -1,183 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
#include "resource_utils.h"
|
||||
|
||||
REGISTER_OP("DecodeAndHashBatchPredictionRequest")
|
||||
.Input("input_bytes: uint8")
|
||||
.Attr("keep_features: list(int)")
|
||||
.Attr("keep_codes: list(int)")
|
||||
.Attr("decode_mode: int = 0")
|
||||
.Output("hashed_data_record_handle: resource")
|
||||
.SetShapeFn(shape_inference::ScalarShape)
|
||||
.Doc(R"doc(
|
||||
A tensorflow OP that decodes batch prediction request and creates a handle to the batch of hashed data records.
|
||||
|
||||
Attr
|
||||
keep_features: a list of int ids to keep.
|
||||
keep_codes: their corresponding code.
|
||||
decode_mode: integer, indicates which decoding method to use. Let a sparse continuous
|
||||
have a feature_name and a dict of {name: value}. 0 indicates feature_ids are computed
|
||||
as hash(name). 1 indicates feature_ids are computed as hash(feature_name, name)
|
||||
shared_name: name used by the resource handle inside the resource manager.
|
||||
container: name used by the container of the resources.
|
||||
|
||||
shared_name and container are required when inheriting from ResourceOpKernel.
|
||||
|
||||
Input
|
||||
input_bytes: Input tensor containing the serialized batch of BatchPredictionRequest.
|
||||
|
||||
Outputs
|
||||
hashed_data_record_handle: A resource handle to the HashedDataRecordResource containing batch of HashedDataRecords.
|
||||
)doc");
|
||||
|
||||
class DecodeAndHashBatchPredictionRequest : public OpKernel {
|
||||
public:
|
||||
explicit DecodeAndHashBatchPredictionRequest(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
std::vector<int64> keep_features;
|
||||
std::vector<int64> keep_codes;
|
||||
|
||||
OP_REQUIRES_OK(context, context->GetAttr("keep_features", &keep_features));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("keep_codes", &keep_codes));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("decode_mode", &m_decode_mode));
|
||||
|
||||
OP_REQUIRES(context, keep_features.size() == keep_codes.size(),
|
||||
errors::InvalidArgument("keep keys and values must have same size."));
|
||||
|
||||
#ifdef USE_DENSE_HASH
|
||||
m_keep_map.set_empty_key(0);
|
||||
#endif // USE_DENSE_HASH
|
||||
|
||||
for (uint64_t i = 0; i < keep_features.size(); i++) {
|
||||
m_keep_map[keep_features[i]] = keep_codes[i];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
twml::Map<int64_t, int64_t> m_keep_map;
|
||||
int64 m_decode_mode;
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
HashedDataRecordResource *resource = nullptr;
|
||||
OP_REQUIRES_OK(context, makeResourceHandle<HashedDataRecordResource>(context, 0, &resource));
|
||||
|
||||
// Store the input bytes in the resource so it isnt freed before the resource.
|
||||
// This is necessary because we are not copying the contents for tensors.
|
||||
resource->input = context->input(0);
|
||||
const uint8_t *input_bytes = resource->input.flat<uint8>().data();
|
||||
twml::HashedDataRecordReader reader;
|
||||
twml::HashedBatchPredictionRequest bpr;
|
||||
reader.setKeepMap(&m_keep_map);
|
||||
reader.setBuffer(input_bytes);
|
||||
reader.setDecodeMode(m_decode_mode);
|
||||
bpr.decode(reader);
|
||||
|
||||
resource->common = std::move(bpr.common());
|
||||
resource->records = std::move(bpr.requests());
|
||||
|
||||
// Each datarecord has a copy of common features.
|
||||
// Initialize total_size by common_size * num_records
|
||||
int64 common_size = static_cast<int64>(resource->common.totalSize());
|
||||
int64 num_records = static_cast<int64>(resource->records.size());
|
||||
int64 total_size = common_size * num_records;
|
||||
for (const auto &record : resource->records) {
|
||||
total_size += static_cast<int64>(record.totalSize());
|
||||
}
|
||||
|
||||
resource->total_size = total_size;
|
||||
resource->num_labels = 0;
|
||||
resource->num_weights = 0;
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("DecodeAndHashBatchPredictionRequest").Device(DEVICE_CPU),
|
||||
DecodeAndHashBatchPredictionRequest);
|
||||
|
||||
REGISTER_OP("DecodeBatchPredictionRequest")
|
||||
.Input("input_bytes: uint8")
|
||||
.Attr("keep_features: list(int)")
|
||||
.Attr("keep_codes: list(int)")
|
||||
.Output("data_record_handle: resource")
|
||||
.SetShapeFn(shape_inference::ScalarShape)
|
||||
.Doc(R"doc(
|
||||
A tensorflow OP that decodes batch prediction request and creates a handle to the batch of data records.
|
||||
|
||||
Attr
|
||||
keep_features: a list of int ids to keep.
|
||||
keep_codes: their corresponding code.
|
||||
shared_name: name used by the resource handle inside the resource manager.
|
||||
container: name used by the container of the resources.
|
||||
|
||||
shared_name and container are required when inheriting from ResourceOpKernel.
|
||||
|
||||
Input
|
||||
input_bytes: Input tensor containing the serialized batch of BatchPredictionRequest.
|
||||
|
||||
Outputs
|
||||
data_record_handle: A resource handle to the DataRecordResource containing batch of DataRecords.
|
||||
)doc");
|
||||
|
||||
class DecodeBatchPredictionRequest : public OpKernel {
|
||||
public:
|
||||
explicit DecodeBatchPredictionRequest(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
std::vector<int64> keep_features;
|
||||
std::vector<int64> keep_codes;
|
||||
|
||||
OP_REQUIRES_OK(context, context->GetAttr("keep_features", &keep_features));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("keep_codes", &keep_codes));
|
||||
|
||||
OP_REQUIRES(context, keep_features.size() == keep_codes.size(),
|
||||
errors::InvalidArgument("keep keys and values must have same size."));
|
||||
|
||||
#ifdef USE_DENSE_HASH
|
||||
m_keep_map.set_empty_key(0);
|
||||
#endif // USE_DENSE_HASH
|
||||
|
||||
for (uint64_t i = 0; i < keep_features.size(); i++) {
|
||||
m_keep_map[keep_features[i]] = keep_codes[i];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
twml::Map<int64_t, int64_t> m_keep_map;
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
DataRecordResource *resource = nullptr;
|
||||
OP_REQUIRES_OK(context, makeResourceHandle<DataRecordResource>(context, 0, &resource));
|
||||
|
||||
// Store the input bytes in the resource so it isnt freed before the resource.
|
||||
// This is necessary because we are not copying the contents for tensors.
|
||||
resource->input = context->input(0);
|
||||
const uint8_t *input_bytes = resource->input.flat<uint8>().data();
|
||||
twml::DataRecordReader reader;
|
||||
twml::BatchPredictionRequest bpr;
|
||||
reader.setKeepMap(&m_keep_map);
|
||||
reader.setBuffer(input_bytes);
|
||||
bpr.decode(reader);
|
||||
|
||||
resource->common = std::move(bpr.common());
|
||||
resource->records = std::move(bpr.requests());
|
||||
|
||||
resource->num_weights = 0;
|
||||
resource->num_labels = 0;
|
||||
resource->keep_map = &m_keep_map;
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("DecodeBatchPredictionRequest").Device(DEVICE_CPU),
|
||||
DecodeBatchPredictionRequest);
|
BIN
twml/libtwml/src/ops/batch_prediction_request.docx
Normal file
BIN
twml/libtwml/src/ops/batch_prediction_request.docx
Normal file
Binary file not shown.
@ -1,224 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
#include "resource_utils.h"
|
||||
|
||||
#include <iterator>
|
||||
|
||||
template<typename InputType, typename RecordType>
|
||||
class DecodeBatchPredictionRequestKernel : public OpKernel {
|
||||
public:
|
||||
explicit DecodeBatchPredictionRequestKernel(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
std::vector<int64> keep_features;
|
||||
std::vector<int64> keep_codes;
|
||||
|
||||
std::vector<int64> label_features;
|
||||
std::vector<int64> weight_features;
|
||||
|
||||
OP_REQUIRES_OK(context, context->GetAttr("keep_features", &keep_features));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("keep_codes", &keep_codes));
|
||||
|
||||
OP_REQUIRES_OK(context, context->GetAttr("label_features", &label_features));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("weight_features", &weight_features));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("decode_mode", &m_decode_mode));
|
||||
|
||||
OP_REQUIRES(context, keep_features.size() == keep_codes.size(),
|
||||
errors::InvalidArgument("keep keys and values must have same size."));
|
||||
|
||||
#ifdef USE_DENSE_HASH
|
||||
m_keep_map.set_empty_key(0);
|
||||
m_labels_map.set_empty_key(0);
|
||||
m_weights_map.set_empty_key(0);
|
||||
#endif // USE_DENSE_HASH
|
||||
|
||||
for (uint64_t i = 0; i < keep_features.size(); i++) {
|
||||
m_keep_map[keep_features[i]] = keep_codes[i];
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < label_features.size(); i++) {
|
||||
m_labels_map[label_features[i]] = i;
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < weight_features.size(); i++) {
|
||||
m_weights_map[weight_features[i]] = i;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
twml::Map<int64_t, int64_t> m_keep_map;
|
||||
twml::Map<int64_t, int64_t> m_labels_map;
|
||||
twml::Map<int64_t, int64_t> m_weights_map;
|
||||
int64 m_decode_mode;
|
||||
|
||||
template<typename ResourceType>
|
||||
void Decode(OpKernelContext* context, ResourceType *resource) {
|
||||
resource->input = context->input(0);
|
||||
const uint8_t *input_bytes = getInputBytes<InputType>(resource->input, 0);
|
||||
int num_labels = static_cast<int>(m_labels_map.size());
|
||||
int num_weights = static_cast<int>(m_weights_map.size());
|
||||
|
||||
typename RecordType::Reader reader;
|
||||
twml::GenericBatchPredictionRequest<RecordType> bpr(num_labels, num_weights);
|
||||
|
||||
reader.setKeepMap(&m_keep_map);
|
||||
reader.setLabelsMap(&m_labels_map);
|
||||
reader.setBuffer(input_bytes);
|
||||
reader.setDecodeMode(m_decode_mode);
|
||||
// Do not set weight map if it is empty. This will take a faster path.
|
||||
if (num_weights != 0) {
|
||||
reader.setWeightsMap(&m_weights_map);
|
||||
}
|
||||
bpr.decode(reader);
|
||||
|
||||
resource->common = std::move(bpr.common());
|
||||
resource->records = std::move(bpr.requests());
|
||||
|
||||
resource->num_labels = num_labels;
|
||||
resource->num_weights = num_weights;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
REGISTER_OP("DecodeAndHashBatchPredictionRequestV2")
|
||||
.Attr("InputType: {uint8, string}")
|
||||
.Input("input_bytes: InputType")
|
||||
.Attr("keep_features: list(int)")
|
||||
.Attr("keep_codes: list(int)")
|
||||
.Attr("label_features: list(int)")
|
||||
.Attr("weight_features: list(int) = []")
|
||||
.Attr("decode_mode: int = 0")
|
||||
.Output("hashed_data_record_handle: resource")
|
||||
.SetShapeFn(shape_inference::ScalarShape)
|
||||
.Doc(R"doc(
|
||||
A tensorflow OP that decodes a list/batch of data records and creates a handle to the batch of hashed data records.
|
||||
|
||||
Compared to DecodeAndHashBatchPredictionRequest, DecodeAndHashBatchPredictionRequestV2 is used for training instead
|
||||
of serving. Thus label_features and weight_features[optional] must be passed, and labels and weights are extracted in
|
||||
the output.
|
||||
DecodeAndHashBatchPredictionRequestV2 controls what DataRecords we want to process together in a batch in training.
|
||||
For instance, we can put all instances for a query in the same batch when training a ranking model.
|
||||
Notice that this OP was added separately to make sure we would not break the API for DecodeAndHashBatchPredictionRequest.
|
||||
It requires some discussions if we merge the two ops into a single .cpp file in a future API revision.
|
||||
|
||||
Attr
|
||||
keep_features: a list of int ids to keep.
|
||||
keep_codes: their corresponding code.
|
||||
label_features: list of feature ids representing the labels.
|
||||
weight_features: list of feature ids representing the weights. Defaults to empty list.
|
||||
decode_mode: integer, indicates which decoding method to use. Let a sparse continuous
|
||||
have a feature_name and a dict of {name: value}. 0 indicates feature_ids are computed
|
||||
as hash(name). 1 indicates feature_ids are computed as hash(feature_name, name)
|
||||
|
||||
Input
|
||||
input_bytes: Input tensor containing the serialized batch of BatchPredictionRequest.
|
||||
|
||||
Outputs
|
||||
hashed_data_record_handle: A resource handle to the HashedDataRecordResource containing batch of HashedDataRecords.
|
||||
)doc");
|
||||
|
||||
template<typename InputType>
|
||||
class DecodeAndHashBatchPredictionRequestV2 :
|
||||
public DecodeBatchPredictionRequestKernel<InputType, twml::HashedDataRecord> {
|
||||
|
||||
public:
|
||||
DecodeAndHashBatchPredictionRequestV2(OpKernelConstruction *context)
|
||||
: DecodeBatchPredictionRequestKernel<InputType, twml::HashedDataRecord>(context) {
|
||||
}
|
||||
|
||||
private:
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
HashedDataRecordResource *resource = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
makeResourceHandle<HashedDataRecordResource>(context, 0, &resource));
|
||||
|
||||
this->Decode(context, resource);
|
||||
|
||||
// Each datarecord has a copy of common features.
|
||||
// Initialize total_size by common_size * num_records
|
||||
int64 common_size = static_cast<int64>(resource->common.totalSize());
|
||||
int64 num_records = static_cast<int64>(resource->records.size());
|
||||
int64 total_size = common_size * num_records;
|
||||
for (const auto &record : resource->records) {
|
||||
total_size += static_cast<int64>(record.totalSize());
|
||||
}
|
||||
|
||||
resource->total_size = total_size;
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("DecodeBatchPredictionRequestV2")
|
||||
.Attr("InputType: {uint8, string}")
|
||||
.Input("input_bytes: InputType")
|
||||
.Attr("keep_features: list(int)")
|
||||
.Attr("keep_codes: list(int)")
|
||||
.Attr("label_features: list(int)")
|
||||
.Attr("weight_features: list(int) = []")
|
||||
.Attr("decode_mode: int = 0")
|
||||
.Output("data_record_handle: resource")
|
||||
.SetShapeFn(shape_inference::ScalarShape)
|
||||
.Doc(R"doc(
|
||||
A tensorflow OP that decodes batch prediction request and creates a handle to the batch of data records.
|
||||
|
||||
Attr
|
||||
keep_features: a list of int ids to keep.
|
||||
keep_codes: their corresponding code.
|
||||
shared_name: name used by the resource handle inside the resource manager.
|
||||
label_features: list of feature ids representing the labels.
|
||||
weight_features: list of feature ids representing the weights. Defaults to empty list.
|
||||
decode_mode: reserved, do not use.
|
||||
|
||||
Input
|
||||
input_bytes: Input tensor containing the serialized batch of BatchPredictionRequest.
|
||||
|
||||
Outputs
|
||||
data_record_handle: A resource handle to the DataRecordResource containing batch of DataRecords.
|
||||
)doc");
|
||||
|
||||
|
||||
template<typename InputType>
|
||||
class DecodeBatchPredictionRequestV2 :
|
||||
public DecodeBatchPredictionRequestKernel<InputType, twml::DataRecord> {
|
||||
public:
|
||||
DecodeBatchPredictionRequestV2(OpKernelConstruction *context)
|
||||
: DecodeBatchPredictionRequestKernel<InputType, twml::DataRecord>(context) {
|
||||
}
|
||||
|
||||
private:
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
DataRecordResource *resource = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
makeResourceHandle<DataRecordResource>(context, 0, &resource));
|
||||
this->Decode(context, resource);
|
||||
resource->keep_map = &(this->m_keep_map);
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define REGISTER_DECODE_OPS(InputType) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("DecodeAndHashBatchPredictionRequestV2") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<InputType>("InputType"), \
|
||||
DecodeAndHashBatchPredictionRequestV2<InputType>); \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("DecodeBatchPredictionRequestV2") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<InputType>("InputType"), \
|
||||
DecodeBatchPredictionRequestV2<InputType>); \
|
||||
|
||||
REGISTER_DECODE_OPS(uint8)
|
||||
REGISTER_DECODE_OPS(string)
|
BIN
twml/libtwml/src/ops/batch_prediction_request_v2.docx
Normal file
BIN
twml/libtwml/src/ops/batch_prediction_request_v2.docx
Normal file
Binary file not shown.
@ -1,82 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("BatchPredictionResponseWriter")
|
||||
.Attr("T: {float, double}")
|
||||
.Input("keys: int64")
|
||||
.Input("values: T")
|
||||
.Output("result: uint8")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP that packages keys and values into a BatchPredictionResponse.
|
||||
|
||||
values: input feature value. (float/double)
|
||||
keys: feature ids from the original BatchPredictionRequest. (int64)
|
||||
|
||||
Outputs
|
||||
bytes: output BatchPredictionRequest serialized using Thrift into a uint8 tensor.
|
||||
)doc");
|
||||
|
||||
template<typename T>
|
||||
class BatchPredictionResponseWriter : public OpKernel {
|
||||
public:
|
||||
explicit BatchPredictionResponseWriter(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
const Tensor& keys = context->input(0);
|
||||
const Tensor& values = context->input(1);
|
||||
|
||||
try {
|
||||
// Ensure the inner dimension matches.
|
||||
if (values.dim_size(values.dims() - 1) != keys.dim_size(keys.dims() - 1)) {
|
||||
throw std::runtime_error("The sizes of keys and values need to match");
|
||||
}
|
||||
|
||||
// set inputs as twml::Tensor
|
||||
const twml::Tensor in_keys_ = TFTensor_to_twml_tensor(keys);
|
||||
const twml::Tensor in_values_ = TFTensor_to_twml_tensor(values);
|
||||
// no tensors in this op
|
||||
const twml::Tensor dummy_dense_keys_;
|
||||
const std::vector<twml::RawTensor> dummy_dense_values_;
|
||||
|
||||
// call constructor BatchPredictionResponse
|
||||
twml::BatchPredictionResponse tempResult(
|
||||
in_keys_, in_values_, dummy_dense_keys_, dummy_dense_values_);
|
||||
|
||||
// determine the length of the result
|
||||
int len = tempResult.encodedSize();
|
||||
TensorShape result_shape = {1, len};
|
||||
|
||||
// Create an output tensor, the size is determined by the content of input.
|
||||
Tensor* result = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, result_shape,
|
||||
&result));
|
||||
twml::Tensor out_result = TFTensor_to_twml_tensor(*result);
|
||||
|
||||
// Call writer of BatchPredictionResponse
|
||||
tempResult.write(out_result);
|
||||
} catch(const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define REGISTER(Type) \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("BatchPredictionResponseWriter") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
BatchPredictionResponseWriter<Type>); \
|
||||
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
BIN
twml/libtwml/src/ops/batch_prediction_response_writer.docx
Normal file
BIN
twml/libtwml/src/ops/batch_prediction_response_writer.docx
Normal file
Binary file not shown.
@ -1,81 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("BatchPredictionTensorResponseWriter")
|
||||
.Attr("T: list({string, int32, int64, float, double})")
|
||||
.Input("keys: int64")
|
||||
.Input("values: T")
|
||||
.Output("result: uint8")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP that packages keys and dense tensors into a BatchPredictionResponse.
|
||||
|
||||
values: list of tensors
|
||||
keys: feature ids from the original BatchPredictionRequest. (int64)
|
||||
|
||||
Outputs
|
||||
bytes: output BatchPredictionRequest serialized using Thrift into a uint8 tensor.
|
||||
)doc");
|
||||
|
||||
class BatchPredictionTensorResponseWriter : public OpKernel {
|
||||
public:
|
||||
explicit BatchPredictionTensorResponseWriter(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
const Tensor& keys = context->input(0);
|
||||
|
||||
try {
|
||||
// set keys as twml::Tensor
|
||||
const twml::Tensor in_keys_ = TFTensor_to_twml_tensor(keys);
|
||||
|
||||
// check sizes
|
||||
uint64_t num_keys = in_keys_.getNumElements();
|
||||
uint64_t num_values = context->num_inputs() - 1;
|
||||
|
||||
OP_REQUIRES(context, num_values % num_keys == 0,
|
||||
errors::InvalidArgument("Number of dense tensors not multiple of dense keys"));
|
||||
|
||||
// set dense tensor values
|
||||
std::vector<twml::RawTensor> in_values_;
|
||||
for (int i = 1; i < context->num_inputs(); i++) {
|
||||
in_values_.push_back(TFTensor_to_twml_raw_tensor(context->input(i)));
|
||||
}
|
||||
|
||||
// no continuous predictions in this op, only tensors
|
||||
const twml::Tensor dummy_cont_keys_;
|
||||
const twml::Tensor dummy_cont_values_;
|
||||
|
||||
// call constructor BatchPredictionResponse
|
||||
twml::BatchPredictionResponse tempResult(
|
||||
dummy_cont_keys_, dummy_cont_values_, in_keys_, in_values_);
|
||||
|
||||
// determine the length of the result
|
||||
int len = tempResult.encodedSize();
|
||||
TensorShape result_shape = {1, len};
|
||||
|
||||
// Create an output tensor, the size is determined by the content of input.
|
||||
Tensor* result = NULL;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, result_shape,
|
||||
&result));
|
||||
twml::Tensor out_result = TFTensor_to_twml_tensor(*result);
|
||||
|
||||
// Call writer of BatchPredictionResponse
|
||||
tempResult.write(out_result);
|
||||
} catch(const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("BatchPredictionTensorResponseWriter").Device(DEVICE_CPU),
|
||||
BatchPredictionTensorResponseWriter);
|
Binary file not shown.
@ -1,330 +0,0 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// TWML modified to optimize binary features:
|
||||
// - Sparse tensor values are assumed to be binary, so only add operation is done
|
||||
// rather than mul-add;
|
||||
// - In house version of vectorization is used instead of Eigen;
|
||||
// - Enable sharding and multithreading.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "binary_sparse_dense_matmul.h"
|
||||
#include "binary_sparse_dense_matmul_impl.h"
|
||||
|
||||
#include "tensorflow/core/framework/bounds_check.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
namespace shape_inference {
|
||||
// TODO: The `a_value` is supposed to be all ones.
|
||||
// Users should not call this op directly but to use it from `sparse_op` python library.
|
||||
// To make it consistent with original op, the signature remains the same currently,
|
||||
// we will think a better way to contrain correct use of this op.
|
||||
// CX-18174
|
||||
REGISTER_OP("BinarySparseTensorDenseMatMul")
|
||||
.Input("a_indices: Tindices")
|
||||
.Input("a_values: T")
|
||||
.Input("a_shape: int64")
|
||||
.Input("b: T")
|
||||
.Output("product: T")
|
||||
.Attr("T: type")
|
||||
.Attr("Tindices: {int32,int64} = DT_INT64")
|
||||
.Attr("adjoint_a: bool = false")
|
||||
.Attr("adjoint_b: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
DimensionHandle unused_dim;
|
||||
ShapeHandle unused;
|
||||
ShapeHandle b;
|
||||
ShapeHandle a_shape;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused)); // a_indices
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); // a_values
|
||||
TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(2, &a_shape));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(a_shape, 2, &a_shape));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 2, &b));
|
||||
|
||||
bool adjoint_a;
|
||||
bool adjoint_b;
|
||||
TF_RETURN_IF_ERROR(c->GetAttr("adjoint_a", &adjoint_a));
|
||||
TF_RETURN_IF_ERROR(c->GetAttr("adjoint_b", &adjoint_b));
|
||||
|
||||
DimensionHandle output_right = c->Dim(b, adjoint_b ? 0 : 1);
|
||||
DimensionHandle output_left = c->Dim(a_shape, adjoint_a ? 1 : 0);
|
||||
DimensionHandle inner_left = c->Dim(a_shape, adjoint_a ? 0 : 1);
|
||||
DimensionHandle inner_right = c->Dim(b, adjoint_b ? 1 : 0);
|
||||
TF_RETURN_IF_ERROR(c->Merge(inner_left, inner_right, &unused_dim));
|
||||
c->set_output(0, c->Matrix(output_left, output_right));
|
||||
return Status::OK();
|
||||
});
|
||||
} // namespace shape_inference
|
||||
|
||||
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
|
||||
template <typename Device, typename T, typename Tindices>
|
||||
class BinarySparseTensorDenseMatMulOp : public OpKernel {
|
||||
public:
|
||||
explicit BinarySparseTensorDenseMatMulOp(OpKernelConstruction* ctx)
|
||||
: OpKernel(ctx) {
|
||||
OP_REQUIRES_OK(ctx, ctx->GetAttr("adjoint_a", &adjoint_a_));
|
||||
OP_REQUIRES_OK(ctx, ctx->GetAttr("adjoint_b", &adjoint_b_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
const Tensor* a_indices;
|
||||
const Tensor* a_values;
|
||||
const Tensor* a_shape;
|
||||
const Tensor* b;
|
||||
OP_REQUIRES_OK(ctx, ctx->input("a_indices", &a_indices));
|
||||
OP_REQUIRES_OK(ctx, ctx->input("a_values", &a_values));
|
||||
OP_REQUIRES_OK(ctx, ctx->input("a_shape", &a_shape));
|
||||
OP_REQUIRES_OK(ctx, ctx->input("b", &b));
|
||||
|
||||
// Check that the dimensions of the two matrices are valid.
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(b->shape()),
|
||||
errors::InvalidArgument("Tensor 'b' is not a matrix"));
|
||||
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsVector(a_shape->shape()),
|
||||
errors::InvalidArgument("Tensor 'a_shape' is not a vector"));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, a_shape->NumElements() == 2,
|
||||
errors::InvalidArgument("Tensor 'a_shape' must have 2 elements"));
|
||||
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsVector(a_values->shape()),
|
||||
errors::InvalidArgument("Tensor 'a_values' is not a vector"));
|
||||
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(a_indices->shape()),
|
||||
errors::InvalidArgument("Tensor 'a_indices' is not a matrix"));
|
||||
|
||||
const int64 nnz = a_indices->shape().dim_size(0);
|
||||
OP_REQUIRES(ctx, nnz == a_values->NumElements(),
|
||||
errors::InvalidArgument("Number of rows of a_indices does not "
|
||||
"match number of entries in a_values"));
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, a_indices->shape().dim_size(1) == a_shape->NumElements(),
|
||||
errors::InvalidArgument("Number of columns of a_indices does not match "
|
||||
"number of entries in a_shape"));
|
||||
|
||||
auto a_shape_t = a_shape->vec<int64>();
|
||||
const int64 outer_left = (adjoint_a_) ? a_shape_t(1) : a_shape_t(0);
|
||||
const int64 outer_right =
|
||||
(adjoint_b_) ? b->shape().dim_size(0) : b->shape().dim_size(1);
|
||||
const int64 inner_left = (adjoint_a_) ? a_shape_t(0) : a_shape_t(1);
|
||||
const int64 inner_right =
|
||||
(adjoint_b_) ? b->shape().dim_size(1) : b->shape().dim_size(0);
|
||||
|
||||
OP_REQUIRES(
|
||||
ctx, inner_right == inner_left,
|
||||
errors::InvalidArgument(
|
||||
"Cannot multiply A and B because inner dimension does not match: ",
|
||||
inner_left, " vs. ", inner_right,
|
||||
". Did you forget a transpose? "
|
||||
"Dimensions of A: [",
|
||||
a_shape_t(0), ", ", a_shape_t(1),
|
||||
"). Dimensions of B: ", b->shape().DebugString()));
|
||||
|
||||
TensorShape out_shape({outer_left, outer_right});
|
||||
Tensor* out = nullptr;
|
||||
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, out_shape, &out));
|
||||
|
||||
if (out->NumElements() == 0) {
|
||||
// If a has shape [0, x] or b has shape [x, 0], the output shape
|
||||
// is a 0-element matrix, so there is nothing to do.
|
||||
return;
|
||||
}
|
||||
|
||||
if (a_values->NumElements() == 0 || b->NumElements() == 0) {
|
||||
// If a has shape [x, 0] and b has shape [0, y], the
|
||||
// output shape is [x, y] where x and y are non-zero, so we fill
|
||||
// the output with zeros.
|
||||
out->flat<T>().device(ctx->eigen_device<Device>()) =
|
||||
out->flat<T>().constant(T(0));
|
||||
return;
|
||||
}
|
||||
|
||||
#define MAYBE_ADJOINT(ADJ_A, ADJ_B) \
|
||||
if (adjoint_a_ == ADJ_A && adjoint_b_ == ADJ_B) { \
|
||||
Status functor_status = functor::SparseTensorDenseMatMulFunctor< \
|
||||
Device, T, Tindices, ADJ_A, \
|
||||
ADJ_B>::Compute(ctx, a_indices, a_values, a_shape, b, out); \
|
||||
OP_REQUIRES_OK(ctx, functor_status); \
|
||||
}
|
||||
|
||||
MAYBE_ADJOINT(false, false);
|
||||
MAYBE_ADJOINT(false, true);
|
||||
MAYBE_ADJOINT(true, false);
|
||||
MAYBE_ADJOINT(true, true);
|
||||
|
||||
#undef MAYBE_ADJOINT
|
||||
}
|
||||
|
||||
private:
|
||||
bool adjoint_a_;
|
||||
bool adjoint_b_;
|
||||
};
|
||||
|
||||
#define REGISTER_CPU(TypeT, TypeIndex) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("BinarySparseTensorDenseMatMul") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<TypeT>("T") \
|
||||
.TypeConstraint<TypeIndex>("Tindices") \
|
||||
.HostMemory("a_shape"), \
|
||||
BinarySparseTensorDenseMatMulOp<CPUDevice, TypeT, TypeIndex>);
|
||||
|
||||
#define REGISTER_KERNELS_CPU(T) \
|
||||
REGISTER_CPU(T, int64); \
|
||||
REGISTER_CPU(T, int32)
|
||||
|
||||
REGISTER_KERNELS_CPU(float);
|
||||
REGISTER_KERNELS_CPU(double);
|
||||
REGISTER_KERNELS_CPU(int32);
|
||||
REGISTER_KERNELS_CPU(complex64);
|
||||
REGISTER_KERNELS_CPU(complex128);
|
||||
|
||||
namespace functor {
|
||||
|
||||
namespace {
|
||||
Status KOutOfBoundsError(int64 k, std::size_t i, int rhs_index_a,
|
||||
std::size_t lhs_right) {
|
||||
return errors::InvalidArgument("k (", k, ") from index[", i, ",", rhs_index_a,
|
||||
"] out of bounds (>=", lhs_right, ")");
|
||||
}
|
||||
|
||||
Status MOutOfBoundsError(int64 m, std::size_t i, int lhs_index_a,
|
||||
int64 out_dim0) {
|
||||
return errors::InvalidArgument("m (", m, ") from index[", i, ",", lhs_index_a,
|
||||
"] out of bounds (>=", out_dim0, ")");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
// The general functor just borrows the code from tf except that add is computed
|
||||
// instead of mul-add.
|
||||
template <typename T, typename Tindices, bool ADJ_A, bool ADJ_B>
|
||||
struct SparseTensorDenseMatMulFunctor<CPUDevice, T, Tindices, ADJ_A, ADJ_B> {
|
||||
// Vectorize certain operations above this size.
|
||||
static const std::size_t kNumVectorize = 32;
|
||||
|
||||
static Status Compute(OpKernelContext* ctx,
|
||||
const Tensor *a_indices,
|
||||
const Tensor *a_values,
|
||||
const Tensor *a_shape,
|
||||
const Tensor *b,
|
||||
Tensor *out) {
|
||||
return EigenCompute(ctx->eigen_device<CPUDevice>(), out->matrix<T>(),
|
||||
a_indices->matrix<Tindices>(), a_values->vec<T>(),
|
||||
b->matrix<T>());
|
||||
}
|
||||
|
||||
static Status EigenCompute(const CPUDevice& d, typename TTypes<T>::Matrix out,
|
||||
typename TTypes<Tindices>::ConstMatrix a_indices,
|
||||
typename TTypes<T>::ConstVec a_values,
|
||||
typename TTypes<T>::ConstMatrix b) {
|
||||
const std::size_t nnz = a_values.size();
|
||||
const std::size_t rhs_right = (ADJ_B ? b.dimension(0) : b.dimension(1));
|
||||
const std::size_t lhs_right = (ADJ_B ? b.dimension(1) : b.dimension(0));
|
||||
const int lhs_index_a = ADJ_A ? 1 : 0;
|
||||
const int rhs_index_a = ADJ_A ? 0 : 1;
|
||||
|
||||
out.setZero();
|
||||
|
||||
if (rhs_right < kNumVectorize) {
|
||||
// Disable vectorization if the RHS of output is too small
|
||||
auto maybe_adjoint_b = MaybeAdjoint<decltype(b), ADJ_B>(b);
|
||||
|
||||
for (std::size_t i = 0; i < nnz; ++i) {
|
||||
const Tindices m = internal::SubtleMustCopy(a_indices(i, lhs_index_a));
|
||||
const Tindices k = internal::SubtleMustCopy(a_indices(i, rhs_index_a));
|
||||
if (!FastBoundsCheck(k, lhs_right)) {
|
||||
return KOutOfBoundsError(k, i, rhs_index_a, lhs_right);
|
||||
}
|
||||
if (!FastBoundsCheck(m, out.dimension(0))) {
|
||||
return MOutOfBoundsError(m, i, lhs_index_a, out.dimension(0));
|
||||
}
|
||||
for (std::size_t n = 0; n < rhs_right; ++n) {
|
||||
const T b_value = maybe_adjoint_b(k, n);
|
||||
out(m, n) += b_value;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Vectorization via Eigen.
|
||||
const int b_chip_index = ADJ_B ? 1 : 0;
|
||||
|
||||
#define LOOP_NNZ(b_passed) \
|
||||
for (std::size_t i = 0; i < nnz; ++i) { \
|
||||
const Tindices m = internal::SubtleMustCopy(a_indices(i, lhs_index_a)); \
|
||||
const Tindices k = internal::SubtleMustCopy(a_indices(i, rhs_index_a)); \
|
||||
if (!FastBoundsCheck(k, lhs_right)) { \
|
||||
return KOutOfBoundsError(k, i, rhs_index_a, lhs_right); \
|
||||
} \
|
||||
if (!FastBoundsCheck(m, out.dimension(0))) { \
|
||||
return MOutOfBoundsError(m, i, lhs_index_a, out.dimension(0)); \
|
||||
} \
|
||||
out.template chip<0>(m) += b_passed.template chip<b_chip_index>(k); \
|
||||
}
|
||||
|
||||
|
||||
if (ADJ_B) {
|
||||
// Perform transpose and conjugation on B once, since we chip out B's
|
||||
// columns in the nnz loop.
|
||||
Eigen::array<int, 2> shuffle; // preserve dimension order
|
||||
shuffle[0] = 1; shuffle[1] = 0;
|
||||
Eigen::Tensor<T, 2, Eigen::ColMajor> col_major_conj_b =
|
||||
b.swap_layout().shuffle(shuffle).conjugate();
|
||||
LOOP_NNZ(col_major_conj_b);
|
||||
} else {
|
||||
LOOP_NNZ(b);
|
||||
}
|
||||
#undef LOOP_NNZ
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// We have only specified and optimised the case with no matrix transpose,
|
||||
// since it is the most typical usage in productions.
|
||||
template <typename Tindices>
|
||||
struct SparseTensorDenseMatMulFunctor<CPUDevice,
|
||||
float, Tindices, false, false> {
|
||||
static Status Compute(OpKernelContext* ctx,
|
||||
const Tensor *a_indices,
|
||||
const Tensor *a_values,
|
||||
const Tensor *a_shape,
|
||||
const Tensor *b,
|
||||
Tensor *out) {
|
||||
auto a_indices_ptr = a_indices->flat<Tindices>().data();
|
||||
auto b_ptr = b->flat<float>().data();
|
||||
auto out_ptr = out->flat<float>().data();
|
||||
const int64 nnz = a_indices->shape().dim_size(0);
|
||||
const int64 outer_left = a_shape->vec<int64>()(0);
|
||||
const int64 outer_right = b->shape().dim_size(1);
|
||||
ParallelLookupAndSegmentSum<Tindices>(ctx, a_indices_ptr, b_ptr, nnz,
|
||||
outer_left, outer_right, out_ptr);
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace functor
|
||||
|
||||
} // namespace tensorflow
|
BIN
twml/libtwml/src/ops/binary_sparse_dense_matmul.docx
Normal file
BIN
twml/libtwml/src/ops/binary_sparse_dense_matmul.docx
Normal file
Binary file not shown.
@ -1,75 +0,0 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// TWML modified to optimize binary features
|
||||
#ifndef TENSORFLOW_CORE_KERNELS_BINARY_SPARSE_TENSOR_DENSE_MATMUL_OP_H_
|
||||
#define TENSORFLOW_CORE_KERNELS_BINARY_SPARSE_TENSOR_DENSE_MATMUL_OP_H_
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/framework/tensor_types.h"
|
||||
#include "tensorflow/core/framework/types.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
namespace functor {
|
||||
|
||||
template <typename Device, typename T, typename Tindices, bool ADJ_A,
|
||||
bool ADJ_B>
|
||||
struct SparseTensorDenseMatMulFunctor {
|
||||
static EIGEN_ALWAYS_INLINE Status Compute(
|
||||
const Device& d, typename TTypes<T>::Matrix out,
|
||||
typename TTypes<Tindices>::ConstMatrix a_indices,
|
||||
typename TTypes<T>::ConstVec a_values, typename TTypes<T>::ConstMatrix b);
|
||||
};
|
||||
|
||||
template <typename MATRIX, bool ADJ>
|
||||
class MaybeAdjoint;
|
||||
|
||||
template <typename MATRIX>
|
||||
class MaybeAdjoint<MATRIX, false> {
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaybeAdjoint(MATRIX m) : m_(m) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename MATRIX::Scalar operator()(
|
||||
const typename MATRIX::Index i, const typename MATRIX::Index j) const {
|
||||
return m_(i, j);
|
||||
}
|
||||
|
||||
private:
|
||||
const MATRIX m_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T MaybeConj(T v) {
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename MATRIX>
|
||||
class MaybeAdjoint<MATRIX, true> {
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaybeAdjoint(MATRIX m) : m_(m) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename MATRIX::Scalar operator()(
|
||||
const typename MATRIX::Index i, const typename MATRIX::Index j) const {
|
||||
return Eigen::numext::conj(m_(j, i));
|
||||
}
|
||||
|
||||
private:
|
||||
const MATRIX m_;
|
||||
};
|
||||
|
||||
} // end namespace functor
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_KERNELS_BINARY_SPARSE_TENSOR_DENSE_MATMUL_OP_H_
|
BIN
twml/libtwml/src/ops/binary_sparse_dense_matmul_impl.docx
Normal file
BIN
twml/libtwml/src/ops/binary_sparse_dense_matmul_impl.docx
Normal file
Binary file not shown.
@ -1,145 +0,0 @@
|
||||
#ifndef TENSORFLOW_CORE_KERNELS_BINARY_SPARSE_TENSOR_DENSE_MATMUL_IMPL_H_
|
||||
#define TENSORFLOW_CORE_KERNELS_BINARY_SPARSE_TENSOR_DENSE_MATMUL_IMPL_H_
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/lib/core/blocking_counter.h"
|
||||
#include "tensorflow/core/lib/core/threadpool.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
|
||||
// `ConservativeShard` is adopted rather than `Shard` in tensorflow because the
|
||||
// original `Shard` may generate number of shards more than the number of
|
||||
// threads, which is not ideal for this case, as it may cause too much overhead.
|
||||
static void ConservativeShard(int max_parallelism, thread::ThreadPool *workers,
|
||||
int64 total, int64 cost_per_unit,
|
||||
std::function<void(int64, int64)> work) {
|
||||
if (total == 0) {
|
||||
return;
|
||||
}
|
||||
max_parallelism = std::min(max_parallelism, workers->NumThreads());
|
||||
if (max_parallelism <= 1) {
|
||||
// Just inline the whole work since we only have 1 thread (core).
|
||||
work(0, total);
|
||||
return;
|
||||
}
|
||||
cost_per_unit = std::max(1LL, cost_per_unit);
|
||||
// We shard [0, total) into "num_shards" shards.
|
||||
// 1 <= num_shards <= num worker threads
|
||||
//
|
||||
// If total * cost_per_unit is small, it is not worth shard too
|
||||
// much. Let us assume each cost unit is 1ns, kMinCostPerShard=10000
|
||||
// is 10us.
|
||||
static const int64 kMinCostPerShard = 10000;
|
||||
const int num_shards =
|
||||
std::max<int>(1, std::min(static_cast<int64>(max_parallelism),
|
||||
total * cost_per_unit / kMinCostPerShard));
|
||||
|
||||
// Each shard contains up to "block_size" units. [0, total) is sharded
|
||||
// into:
|
||||
// [0, block_size), [block_size, 2*block_size), ...
|
||||
// The 1st shard is done by the caller thread and the other shards
|
||||
// are dispatched to the worker threads. The last shard may be smaller than
|
||||
// block_size.
|
||||
const int64 block_size = (total + num_shards - 1) / num_shards;
|
||||
if (block_size >= total) {
|
||||
work(0, total);
|
||||
return;
|
||||
}
|
||||
const int num_shards_used = (total + block_size - 1) / block_size;
|
||||
BlockingCounter counter(num_shards_used - 1);
|
||||
for (int64 start = block_size; start < total; start += block_size) {
|
||||
auto limit = std::min(start + block_size, total);
|
||||
workers->Schedule([&work, &counter, start, limit]() {
|
||||
work(start, limit); // Compute the shard.
|
||||
counter.DecrementCount(); // The shard is done.
|
||||
});
|
||||
}
|
||||
|
||||
// Inline execute the 1st shard.
|
||||
work(0, std::min(block_size, total));
|
||||
counter.Wait();
|
||||
}
|
||||
|
||||
static inline void VectorSum(float *a, const float *b, int n) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] += b[i];
|
||||
}
|
||||
}
|
||||
|
||||
// This func is to vectorize the computation of segment sum.
|
||||
template<typename Tindices>
|
||||
static void LookupAndSegmentSum(const Tindices *a_indices, const float *b,
|
||||
int nnz, int outer_right, float *output) {
|
||||
for (std::size_t i = 0; i < nnz; ++i) {
|
||||
const Tindices m = a_indices[i * 2];
|
||||
const Tindices k = a_indices[i * 2 + 1];
|
||||
auto output_row_m = output + m * outer_right;
|
||||
auto b_row_k = b + k * outer_right;
|
||||
VectorSum(output_row_m, b_row_k, outer_right);
|
||||
}
|
||||
}
|
||||
|
||||
// This func enables sharding and multithreading, it comes with an overhead of
|
||||
// duplicating output buffer to achieve lock free output. So there should not
|
||||
// be too many threads.
|
||||
template<typename Tindices>
|
||||
static void ParallelLookupAndSegmentSum(OpKernelContext *ctx,
|
||||
const Tindices *a_indices,
|
||||
const float *b, int nnz, int outer_left,
|
||||
int outer_right, float *output) {
|
||||
auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
|
||||
int out_size = outer_left * outer_right;
|
||||
if (worker_threads.num_threads <= 1) {
|
||||
memset(output, 0, out_size * sizeof(float));
|
||||
LookupAndSegmentSum<Tindices>(a_indices, b,
|
||||
nnz, outer_right,
|
||||
output);
|
||||
return;
|
||||
}
|
||||
|
||||
// this is to make buffer align with kAllocatorAlignment
|
||||
int padded_out_size = (out_size + (Allocator::kAllocatorAlignment - 1)) &
|
||||
~(Allocator::kAllocatorAlignment - 1);
|
||||
std::size_t num_bytes =
|
||||
(worker_threads.num_threads - 1) * padded_out_size * sizeof(float);
|
||||
auto buffer = std::unique_ptr<float>(reinterpret_cast<float *>(
|
||||
port::AlignedMalloc(num_bytes, Allocator::kAllocatorAlignment)));
|
||||
float *temp_out = buffer.get();
|
||||
|
||||
std::atomic<int> thread_index(0);
|
||||
|
||||
auto task = [&](int64 start, int64 limit) {
|
||||
int local_thread_index = thread_index++;
|
||||
float *buf_ptr = nullptr;
|
||||
if (local_thread_index == 0) {
|
||||
buf_ptr = output;
|
||||
} else {
|
||||
buf_ptr = temp_out + (local_thread_index - 1) * padded_out_size;
|
||||
}
|
||||
memset(buf_ptr, 0, out_size * sizeof(float));
|
||||
|
||||
LookupAndSegmentSum<Tindices>(a_indices + start * 2, b,
|
||||
limit - start, outer_right,
|
||||
buf_ptr);
|
||||
};
|
||||
|
||||
int cost_per_unit = outer_right;
|
||||
|
||||
// We don't use tensorflow shard func as tf may create more shards than
|
||||
// number of threads.
|
||||
ConservativeShard(worker_threads.num_threads, worker_threads.workers, nnz,
|
||||
static_cast<int64>(cost_per_unit), task);
|
||||
|
||||
for (int i = 1; i < thread_index; ++i) {
|
||||
VectorSum(output, temp_out + (i - 1) * padded_out_size, out_size);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace functor
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_KERNELS_BINARY_SPARSE_TENSOR_DENSE_MATMUL_IMPL_H_
|
@ -1,243 +0,0 @@
|
||||
#include "block_format_reader.h"
|
||||
|
||||
#include "tensorflow/core/framework/dataset.h"
|
||||
#include "tensorflow/core/framework/partial_tensor_shape.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/lib/io/random_inputstream.h"
|
||||
|
||||
#if !defined(DISABLE_ZLIB)
|
||||
#include "tensorflow/core/lib/io/zlib_inputstream.h"
|
||||
#endif
|
||||
|
||||
#include <twml.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
|
||||
inline std::string stripPath(std::string const &file_name) {
|
||||
const auto pos = file_name.find_last_of("/");
|
||||
if (pos == std::string::npos) return file_name;
|
||||
return file_name.substr(pos + 1);
|
||||
}
|
||||
|
||||
inline std::string getExtension(std::string const &file_name) {
|
||||
const auto stripped_file_name = stripPath(file_name);
|
||||
const auto pos = stripPath(stripped_file_name).find_last_of(".");
|
||||
if (pos == std::string::npos) return "";
|
||||
return stripped_file_name.substr(pos + 1);
|
||||
}
|
||||
|
||||
REGISTER_OP("BlockFormatDatasetV2")
|
||||
.Input("filenames: string")
|
||||
.Input("compression_type: string")
|
||||
.Input("buffer_size: int64")
|
||||
.Output("handle: variant")
|
||||
.SetIsStateful()
|
||||
.SetShapeFn(shape_inference::ScalarShape)
|
||||
.Doc(R"doc(
|
||||
|
||||
Creates a dataset for streaming BlockFormat data in compressed (e.g. gzip), uncompressed formats.
|
||||
This op also has the ability stream a dataset containing files from multiple formats mentioned above.
|
||||
|
||||
filenames: A scalar or vector containing the name(s) of the file(s) to be read.
|
||||
compression_type: A scalar string denoting the compression type. Can be 'none', 'zlib', 'auto'.
|
||||
buffer_size: A scalar denoting the buffer size to use during decompression.
|
||||
|
||||
Outputs
|
||||
handle: A handle to the dataset. This handle is later used to create an iterator to stream the data from the dataset.
|
||||
|
||||
)doc");
|
||||
|
||||
|
||||
class BlockFormatDatasetV2 : public DatasetOpKernel {
|
||||
public:
|
||||
using DatasetOpKernel::DatasetOpKernel;
|
||||
|
||||
void MakeDataset(OpKernelContext* ctx, DatasetBase **output) override {
|
||||
const Tensor* filenames_tensor;
|
||||
OP_REQUIRES_OK(ctx, ctx->input("filenames", &filenames_tensor));
|
||||
OP_REQUIRES(
|
||||
ctx, filenames_tensor->dims() <= 1,
|
||||
errors::InvalidArgument("`filenames` must be a scalar or a vector."));
|
||||
|
||||
const auto filenames_flat = filenames_tensor->flat<string>();
|
||||
const int64 num_files = filenames_tensor->NumElements();
|
||||
std::vector<string> filenames;
|
||||
filenames.reserve(num_files);
|
||||
std::copy(filenames_flat.data(),
|
||||
filenames_flat.data() + num_files,
|
||||
std::back_inserter(filenames));
|
||||
|
||||
string compression_type;
|
||||
OP_REQUIRES_OK(
|
||||
ctx, tensorflow::data::ParseScalarArgument<string>(
|
||||
ctx, "compression_type", &compression_type));
|
||||
|
||||
int64 buffer_size = -1;
|
||||
OP_REQUIRES_OK(
|
||||
ctx, tensorflow::data::ParseScalarArgument<int64>(
|
||||
ctx, "buffer_size", &buffer_size));
|
||||
|
||||
OP_REQUIRES(ctx, buffer_size >= 0,
|
||||
errors::InvalidArgument(
|
||||
"`buffer_size` must be >= 0 (0 == no buffering)"));
|
||||
|
||||
OP_REQUIRES(ctx,
|
||||
compression_type == "auto" ||
|
||||
compression_type == "gz" ||
|
||||
compression_type == "",
|
||||
errors::InvalidArgument("Unknown extension: ", compression_type));
|
||||
|
||||
*output = new Dataset(ctx, std::move(filenames), compression_type, buffer_size);
|
||||
}
|
||||
|
||||
private:
|
||||
class Dataset : public DatasetBase {
|
||||
public:
|
||||
Dataset(OpKernelContext* ctx,
|
||||
std::vector<string> filenames,
|
||||
std::string compression_type,
|
||||
int64 buffer_size)
|
||||
: DatasetBase(DatasetContext(ctx)),
|
||||
compression_type_(compression_type),
|
||||
buffer_size_(buffer_size),
|
||||
filenames_(std::move(filenames))
|
||||
{}
|
||||
|
||||
const DataTypeVector& output_dtypes() const override {
|
||||
static DataTypeVector* dtypes = new DataTypeVector({DT_STRING});
|
||||
return *dtypes;
|
||||
}
|
||||
|
||||
const std::vector<PartialTensorShape>& output_shapes() const override {
|
||||
static std::vector<PartialTensorShape>* shapes =
|
||||
new std::vector<PartialTensorShape>({{}});
|
||||
return *shapes;
|
||||
}
|
||||
|
||||
string DebugString() const override { return "BlockFormatDatasetV2::Dataset"; }
|
||||
|
||||
protected:
|
||||
Status AsGraphDefInternal(SerializationContext* ctx,
|
||||
DatasetGraphDefBuilder* b,
|
||||
Node** output) const override {
|
||||
Node* filenames = nullptr;
|
||||
Node* compression_type = nullptr;
|
||||
Node* buffer_size = nullptr;
|
||||
TF_RETURN_IF_ERROR(b->AddVector(filenames_, &filenames));
|
||||
TF_RETURN_IF_ERROR(b->AddScalar(compression_type_, &compression_type));
|
||||
TF_RETURN_IF_ERROR(
|
||||
b->AddScalar(buffer_size_, &buffer_size));
|
||||
TF_RETURN_IF_ERROR(b->AddDataset(
|
||||
this, {filenames, compression_type, buffer_size}, output));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<IteratorBase> MakeIteratorInternal(
|
||||
const string& prefix) const override {
|
||||
return std::unique_ptr<IteratorBase>(
|
||||
new Iterator({this, strings::StrCat(prefix, "::BlockFormat")}));
|
||||
}
|
||||
|
||||
class Iterator : public DatasetIterator<Dataset> {
|
||||
public:
|
||||
explicit Iterator(const Params ¶ms)
|
||||
: DatasetIterator<Dataset>(params) {}
|
||||
|
||||
Status GetNextInternal(IteratorContext* ctx,
|
||||
std::vector<Tensor>* out_tensors,
|
||||
bool* end_of_sequence) override {
|
||||
mutex_lock l(mu_);
|
||||
do {
|
||||
// We are currently processing a file, so try to read the next record.
|
||||
if (reader_) {
|
||||
Tensor result_tensor(cpu_allocator(), DT_STRING, {});
|
||||
Status s = reader_->ReadNext(&result_tensor.scalar<string>()());
|
||||
if (s.ok()) {
|
||||
out_tensors->emplace_back(std::move(result_tensor));
|
||||
*end_of_sequence = false;
|
||||
return Status::OK();
|
||||
} else if (!errors::IsOutOfRange(s)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
// We have reached the end of the current file, so maybe
|
||||
// move on to next file.
|
||||
reader_.reset();
|
||||
++current_file_index_;
|
||||
}
|
||||
|
||||
// Iteration ends when there are no more files to process.
|
||||
if (current_file_index_ == dataset()->filenames_.size()) {
|
||||
*end_of_sequence = true;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Actually move on to next file.
|
||||
const string& next_filename =
|
||||
dataset()->filenames_[current_file_index_];
|
||||
|
||||
auto compression_type = dataset()->compression_type_;
|
||||
int64 buffer_size = dataset()->buffer_size_;
|
||||
|
||||
if (compression_type == "auto") {
|
||||
compression_type = getExtension(next_filename);
|
||||
}
|
||||
|
||||
if (compression_type != "gz" && compression_type != "") {
|
||||
return errors::InvalidArgument("Unknown extension: ", compression_type);
|
||||
}
|
||||
|
||||
tensorflow::Env* env = tensorflow::Env::Default();
|
||||
TF_CHECK_OK(env->NewRandomAccessFile(next_filename, &file_));
|
||||
|
||||
// RandomAccessInputstream defaults the second param to "false".
|
||||
// The second parameter "false" is the key issue.
|
||||
// "false" assumes the ownership of the file is elsewhere.
|
||||
// But making that "true" causes segfaults down the line.
|
||||
// So keep the ownership of "file_" in this class and clean up properly.
|
||||
file_stream_.reset(new tensorflow::io::RandomAccessInputStream(file_.get(), false));
|
||||
|
||||
if (compression_type == "gz") {
|
||||
// unpack_stream does not take ownership of file_stream_
|
||||
#if !defined(DISABLE_ZLIB)
|
||||
unpack_stream_.reset(new tensorflow::io::ZlibInputStream(
|
||||
file_stream_.get(),
|
||||
buffer_size,
|
||||
buffer_size,
|
||||
tensorflow::io::ZlibCompressionOptions::GZIP()));
|
||||
reader_.reset(new BlockFormatReader(unpack_stream_.get()));
|
||||
#else
|
||||
return errors::InvalidArgument("libtwml compiled without zlib support");
|
||||
#endif
|
||||
} else {
|
||||
unpack_stream_.reset(nullptr);
|
||||
reader_.reset(new BlockFormatReader(file_stream_.get()));
|
||||
}
|
||||
} while (true);
|
||||
}
|
||||
|
||||
private:
|
||||
mutex mu_;
|
||||
uint64_t current_file_index_ GUARDED_BY(mu_) = 0;
|
||||
std::unique_ptr<tensorflow::RandomAccessFile> file_;
|
||||
std::unique_ptr<tensorflow::io::InputStreamInterface> file_stream_;
|
||||
std::unique_ptr<tensorflow::io::InputStreamInterface> unpack_stream_;
|
||||
std::unique_ptr<BlockFormatReader> reader_ GUARDED_BY(mu_);
|
||||
};
|
||||
|
||||
const std::string compression_type_;
|
||||
const int64 buffer_size_;
|
||||
const std::vector<string> filenames_;
|
||||
};
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("BlockFormatDatasetV2")
|
||||
.Device(DEVICE_CPU),
|
||||
BlockFormatDatasetV2);
|
BIN
twml/libtwml/src/ops/block_format_dataset.docx
Normal file
BIN
twml/libtwml/src/ops/block_format_dataset.docx
Normal file
Binary file not shown.
BIN
twml/libtwml/src/ops/block_format_reader.docx
Normal file
BIN
twml/libtwml/src/ops/block_format_reader.docx
Normal file
Binary file not shown.
@ -1,50 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/lib/io/random_inputstream.h"
|
||||
|
||||
#include <twml.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
using tensorflow::int64;
|
||||
using tensorflow::Status;
|
||||
using std::string;
|
||||
|
||||
class BlockFormatReader : twml::BlockFormatReader {
|
||||
public:
|
||||
explicit BlockFormatReader(tensorflow::io::InputStreamInterface *stream)
|
||||
: twml::BlockFormatReader() , stream_(stream) {
|
||||
}
|
||||
|
||||
// Read the next record.
|
||||
// Returns OK on success,
|
||||
// Returns OUT_OF_RANGE for end of file, or something else for an error.
|
||||
Status ReadNext(string* record) {
|
||||
if (this->next()) {
|
||||
return stream_->ReadNBytes(this->current_size(), record);
|
||||
}
|
||||
return tensorflow::errors::OutOfRange("eof");
|
||||
}
|
||||
|
||||
uint64_t read_bytes(void *dest, int size, int count) {
|
||||
uint64_t bytesToRead = size * count;
|
||||
std::string current;
|
||||
// TODO: Try to merge ReadNBytes and the memcpy below
|
||||
// ReadNBytes performs a memory copy already.
|
||||
Status status = stream_->ReadNBytes(bytesToRead, ¤t);
|
||||
if (!status.ok()) {
|
||||
return 0;
|
||||
}
|
||||
memcpy(dest, current.c_str(), bytesToRead);
|
||||
return count;
|
||||
}
|
||||
|
||||
private:
|
||||
tensorflow::io::InputStreamInterface *stream_;
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(BlockFormatReader);
|
||||
};
|
@ -1,138 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <algorithm> // std::fill_n
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("CompressSampleIds")
|
||||
.Attr("T: {int32}")
|
||||
.Input("input: T")
|
||||
.Output("output: T")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
c->set_output(0, c->Vector(c->kUnknownDim));
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
|
||||
template<typename T>
|
||||
class CompressSampleIds : public OpKernel {
|
||||
public:
|
||||
explicit CompressSampleIds(OpKernelConstruction* context) : OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Grab the input tensor
|
||||
const Tensor& input_tensor = context->input(0);
|
||||
auto input = input_tensor.flat<T>();
|
||||
const int N = input.size();
|
||||
|
||||
// Check for improper input
|
||||
bool error = (N > 0 && input(0) < 0);
|
||||
for (int i = 1; !error && i < N; i++) {
|
||||
error = input(i - 1) > input(i);
|
||||
}
|
||||
|
||||
OP_REQUIRES(
|
||||
context, !error,
|
||||
errors::InvalidArgument(
|
||||
"Error in CompressSampleIds. SampleIds must be non-negative and non-decreasing"
|
||||
)
|
||||
);
|
||||
|
||||
// choose output size, either last input element + 1, or 0
|
||||
int output_size = 0;
|
||||
if (N > 0) {
|
||||
output_size = input(N - 1) + 1;
|
||||
}
|
||||
|
||||
// Create an output tensor
|
||||
Tensor* output_tensor = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
context->allocate_output(0, TensorShape({output_size}), &output_tensor)
|
||||
);
|
||||
auto output_flat = output_tensor->flat<T>();
|
||||
|
||||
// Zero-initialize output
|
||||
for (int i = 0; i < output_size; i++) {
|
||||
output_flat(i) = 0;
|
||||
}
|
||||
|
||||
// count how many of each input element
|
||||
for (int i = 0; i < N; i++) {
|
||||
output_flat(input(i)) ++;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("DecompressSampleIds")
|
||||
.Attr("T: {int32}")
|
||||
.Input("input: T")
|
||||
.Output("output: T")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
c->set_output(0, c->Vector(c->kUnknownDim));
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
|
||||
template<typename T>
|
||||
class DecompressSampleIds : public OpKernel {
|
||||
public:
|
||||
explicit DecompressSampleIds(OpKernelConstruction* context) : OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Grab the input tensor
|
||||
const Tensor& input_tensor = context->input(0);
|
||||
auto input = input_tensor.flat<T>();
|
||||
const int N = input.size();
|
||||
|
||||
// Check for improper input
|
||||
bool error = false;
|
||||
int output_size = 0;
|
||||
for (int i = 0; !error && i < N; i++) {
|
||||
error = input(i) < 0;
|
||||
output_size += input(i);
|
||||
}
|
||||
|
||||
OP_REQUIRES(
|
||||
context, !error,
|
||||
errors::InvalidArgument(
|
||||
"Error in DecompressSampleIds. Inputs must be non-negative."
|
||||
)
|
||||
);
|
||||
|
||||
// Create an output tensor
|
||||
Tensor* output_tensor = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
context->allocate_output(0, TensorShape({output_size}),&output_tensor)
|
||||
);
|
||||
auto output_flat = output_tensor->flat<T>();
|
||||
|
||||
T *output_data = output_flat.data();
|
||||
for (int current_sample = 0; current_sample < N; current_sample++) {
|
||||
std::fill_n(output_data, input(current_sample), current_sample);
|
||||
output_data += input(current_sample);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
#define REGISTER(Type) \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("CompressSampleIds") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
CompressSampleIds<Type>); \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("DecompressSampleIds") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
DecompressSampleIds<Type>); \
|
||||
\
|
||||
|
||||
REGISTER(int32);
|
BIN
twml/libtwml/src/ops/compress_sample_ids.docx
Normal file
BIN
twml/libtwml/src/ops/compress_sample_ids.docx
Normal file
Binary file not shown.
@ -1,116 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "../tensorflow_utils.h"
|
||||
#include "../resource_utils.h"
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
|
||||
using std::string;
|
||||
|
||||
void join(const std::set<string>& v, char c, string& s) {
|
||||
s.clear();
|
||||
std::set<std::string>::iterator it = v.begin();
|
||||
while (it != v.end()) {
|
||||
s += *it;
|
||||
it++;
|
||||
if (it != v.end()) s+= c;
|
||||
}
|
||||
}
|
||||
|
||||
// cpp function that computes substrings of a given word
|
||||
std::string computeSubwords(std::string word, int32_t minn, int32_t maxn) {
|
||||
std::string word2 = "<" + word + ">";
|
||||
std::set<string> ngrams;
|
||||
std::string s;
|
||||
ngrams.insert(word);
|
||||
ngrams.insert(word2);
|
||||
for (size_t i = 0; i < word2.size(); i++) {
|
||||
if ((word2[i] & 0xC0) == 0x80) continue;
|
||||
for (size_t j = minn; i+j <= word2.size() && j <= maxn; j++) {
|
||||
ngrams.insert(word2.substr(i, j));
|
||||
}
|
||||
}
|
||||
join(ngrams, ';', s);
|
||||
ngrams.clear();
|
||||
return s;
|
||||
}
|
||||
|
||||
// tf-op function that computes substrings for a given tensor of words
|
||||
template< typename ValueType>
|
||||
|
||||
void ComputeSubStringsTensor(OpKernelContext *context, int32 min_n, int32 max_n) {
|
||||
try {
|
||||
const Tensor& values = context->input(0);
|
||||
|
||||
auto values_flat = values.flat<ValueType>();
|
||||
|
||||
// batch_size from input_size :
|
||||
const int batch_size = values_flat.size();
|
||||
|
||||
// define the output tensor
|
||||
Tensor* substrings = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, values.shape(), &substrings));
|
||||
|
||||
auto substrings_flat = substrings->flat<ValueType>();
|
||||
// compute substrings for the given tensor values
|
||||
for (int64 i = 0; i < batch_size; i++) {
|
||||
substrings_flat(i) = computeSubwords(values_flat(i), min_n, max_n);
|
||||
}
|
||||
}
|
||||
catch (const std::exception &err) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(err.what()));
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_OP("GetSubstrings")
|
||||
.Attr("ValueType: {string}")
|
||||
.Attr("min_n: int")
|
||||
.Attr("max_n: int")
|
||||
.Input("values: ValueType")
|
||||
.Output("substrings: ValueType")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
c->set_output(0, c->input(0));
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP to convert word to substrings of length between min_n and max_n.
|
||||
|
||||
Attr
|
||||
min_n,max_n: The size of the substrings.
|
||||
|
||||
Input
|
||||
values: 1D input tensor containing the values.
|
||||
|
||||
Outputs
|
||||
substrings: A string tensor where substrings are joined by ";".
|
||||
)doc");
|
||||
|
||||
template<typename ValueType>
|
||||
class GetSubstrings : public OpKernel {
|
||||
public:
|
||||
explicit GetSubstrings(OpKernelConstruction *context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("min_n", &min_n));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("max_n", &max_n));
|
||||
}
|
||||
|
||||
private:
|
||||
int32 min_n;
|
||||
int32 max_n;
|
||||
void Compute(OpKernelContext *context) override {
|
||||
ComputeSubStringsTensor<ValueType>(context, min_n, max_n);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#define REGISTER_SUBSTRINGS(ValueType) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("GetSubstrings") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<ValueType>("ValueType"), \
|
||||
GetSubstrings<ValueType>); \
|
||||
|
||||
REGISTER_SUBSTRINGS(string)
|
BIN
twml/libtwml/src/ops/contrib/get_substrings.docx
Normal file
BIN
twml/libtwml/src/ops/contrib/get_substrings.docx
Normal file
Binary file not shown.
File diff suppressed because it is too large
Load Diff
BIN
twml/libtwml/src/ops/data_record.docx
Normal file
BIN
twml/libtwml/src/ops/data_record.docx
Normal file
Binary file not shown.
@ -1,81 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("DataRecordTensorWriter")
|
||||
.Attr("T: list({string, int32, int64, float, double, bool})")
|
||||
.Input("keys: int64")
|
||||
.Input("values: T")
|
||||
.Output("result: uint8")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP that packages keys and dense tensors into a DataRecord.
|
||||
|
||||
values: list of tensors
|
||||
keys: feature ids from the original DataRecord (int64)
|
||||
|
||||
Outputs
|
||||
bytes: output DataRecord serialized using Thrift into a uint8 tensor.
|
||||
)doc");
|
||||
|
||||
class DataRecordTensorWriter : public OpKernel {
|
||||
public:
|
||||
explicit DataRecordTensorWriter(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
const Tensor& keys = context->input(0);
|
||||
|
||||
try {
|
||||
// set keys as twml::Tensor
|
||||
const twml::Tensor in_keys_ = TFTensor_to_twml_tensor(keys);
|
||||
|
||||
// check sizes
|
||||
uint64_t num_keys = in_keys_.getNumElements();
|
||||
uint64_t num_values = context->num_inputs() - 1;
|
||||
|
||||
OP_REQUIRES(context, num_keys == num_values,
|
||||
errors::InvalidArgument("Number of dense keys and dense tensors do not match"));
|
||||
|
||||
// populate DataRecord object
|
||||
const int64_t *keys = in_keys_.getData<int64_t>();
|
||||
twml::DataRecord record = twml::DataRecord();
|
||||
|
||||
for (int i = 1; i < context->num_inputs(); i++) {
|
||||
const twml::RawTensor& value = TFTensor_to_twml_raw_tensor(context->input(i));
|
||||
record.addRawTensor(keys[i-1], value);
|
||||
}
|
||||
|
||||
// determine the length of the encoded result (no memory is copied)
|
||||
twml::ThriftWriter thrift_dry_writer = twml::ThriftWriter(nullptr, 0, true);
|
||||
twml::DataRecordWriter record_dry_writer = twml::DataRecordWriter(thrift_dry_writer);
|
||||
record_dry_writer.write(record);
|
||||
int len = thrift_dry_writer.getBytesWritten();
|
||||
TensorShape result_shape = {1, len};
|
||||
|
||||
// allocate output tensor
|
||||
Tensor* result = NULL;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, result_shape, &result));
|
||||
twml::Tensor out_result = TFTensor_to_twml_tensor(*result);
|
||||
|
||||
// write to output tensor
|
||||
uint8_t *buffer = out_result.getData<uint8_t>();
|
||||
twml::ThriftWriter thrift_writer = twml::ThriftWriter(buffer, len, false);
|
||||
twml::DataRecordWriter record_writer = twml::DataRecordWriter(thrift_writer);
|
||||
record_writer.write(record);
|
||||
} catch(const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("DataRecordTensorWriter").Device(DEVICE_CPU),
|
||||
DataRecordTensorWriter);
|
BIN
twml/libtwml/src/ops/data_record_tensor_writer.docx
Normal file
BIN
twml/libtwml/src/ops/data_record_tensor_writer.docx
Normal file
Binary file not shown.
@ -1,293 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
|
||||
void ComputeDiscretizers(OpKernelContext* context, const bool return_bin_indices = false) {
|
||||
const Tensor& keys = context->input(0);
|
||||
const Tensor& vals = context->input(1);
|
||||
const Tensor& bin_ids = context->input(2);
|
||||
const Tensor& bin_vals = context->input(3);
|
||||
const Tensor& feature_offsets = context->input(4);
|
||||
|
||||
Tensor* new_keys = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, keys.shape(),
|
||||
&new_keys));
|
||||
Tensor* new_vals = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(1, keys.shape(),
|
||||
&new_vals));
|
||||
|
||||
try {
|
||||
twml::Tensor out_keys_ = TFTensor_to_twml_tensor(*new_keys);
|
||||
twml::Tensor out_vals_ = TFTensor_to_twml_tensor(*new_vals);
|
||||
|
||||
const twml::Tensor in_keys_ = TFTensor_to_twml_tensor(keys);
|
||||
const twml::Tensor in_vals_ = TFTensor_to_twml_tensor(vals);
|
||||
const twml::Tensor bin_ids_ = TFTensor_to_twml_tensor(bin_ids);
|
||||
const twml::Tensor bin_vals_ = TFTensor_to_twml_tensor(bin_vals);
|
||||
const twml::Tensor feature_offsets_ = TFTensor_to_twml_tensor(feature_offsets);
|
||||
twml::mdlInfer(out_keys_, out_vals_,
|
||||
in_keys_, in_vals_,
|
||||
bin_ids_, bin_vals_,
|
||||
feature_offsets_,
|
||||
return_bin_indices);
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_OP("MDL")
|
||||
.Attr("T: {float, double}")
|
||||
.Input("keys: int64")
|
||||
.Input("vals: T")
|
||||
.Input("bin_ids: int64")
|
||||
.Input("bin_vals: T")
|
||||
.Input("feature_offsets: int64")
|
||||
.Output("new_keys: int64")
|
||||
.Output("new_vals: T")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
// TODO: check sizes
|
||||
c->set_output(0, c->input(0));
|
||||
c->set_output(1, c->input(0));
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
This operation discretizes a tensor containing continuous features.
|
||||
|
||||
Input
|
||||
keys: A tensor containing feature ids.
|
||||
vals: A tensor containing values at corresponding feature ids.
|
||||
bin_ids: A tensor containing the discretized feature id for a given bin.
|
||||
bin_vals: A tensor containing the bin boundaries for value at a given feature id.
|
||||
feature_offsets: Specifies the starting location of bins for a given feature id.
|
||||
|
||||
Expected Sizes:
|
||||
keys, vals: [N].
|
||||
bin_ids, bin_vals: [sum_{n=1}^{n=num_classes} num_bins(n)]
|
||||
|
||||
where
|
||||
- N is the number of sparse features in the current batch.
|
||||
- [0, num_classes) represents the range each feature id can take.
|
||||
- num_bins(n) is the number of bins for a given feature id.
|
||||
- If num_bins is fixed, then xs, ys are of size [num_classes * num_bins].
|
||||
|
||||
Expected Types:
|
||||
keys, bin_ids: int64.
|
||||
vals: float or double.
|
||||
bin_vals: same as vals.
|
||||
|
||||
Before using MDL, you should use a hashmap to get the intersection of
|
||||
input `keys` with the features that MDL knows about:
|
||||
::
|
||||
keys, vals # keys can be in range [0, 1 << 63)
|
||||
mdl_keys = hashmap.find(keys) # mdl_keys are now in range [0, num_classes_from_calibration)
|
||||
mdl_keys = where (mdl_keys != -1) # Ignore keys not found
|
||||
|
||||
|
||||
Inside MDL, the following is happening:
|
||||
::
|
||||
start = offsets[key[i]]
|
||||
end = offsets[key[i] + 1]
|
||||
idx = binary_search for val[i] in [bin_vals[start], bin_vals[end]]
|
||||
|
||||
result_keys[i] = bin_ids[idx]
|
||||
val[i] = 1 # binary feature value
|
||||
|
||||
Outputs
|
||||
new_keys: The discretized feature ids with same shape and size as keys.
|
||||
new_vals: The discretized values with the same shape and size as vals.
|
||||
|
||||
)doc");
|
||||
|
||||
|
||||
template<typename T>
|
||||
class MDL : public OpKernel {
|
||||
public:
|
||||
explicit MDL(OpKernelConstruction* context) : OpKernel(context) {
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
ComputeDiscretizers(context);
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("PercentileDiscretizer")
|
||||
.Attr("T: {float, double}")
|
||||
.Input("keys: int64")
|
||||
.Input("vals: T")
|
||||
.Input("bin_ids: int64")
|
||||
.Input("bin_vals: T")
|
||||
.Input("feature_offsets: int64")
|
||||
.Output("new_keys: int64")
|
||||
.Output("new_vals: T")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
// TODO: check sizes
|
||||
c->set_output(0, c->input(0));
|
||||
c->set_output(1, c->input(0));
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
This operation discretizes a tensor containing continuous features.
|
||||
|
||||
Input
|
||||
keys: A tensor containing feature ids.
|
||||
vals: A tensor containing values at corresponding feature ids.
|
||||
bin_ids: A tensor containing the discretized feature id for a given bin.
|
||||
bin_vals: A tensor containing the bin boundaries for value at a given feature id.
|
||||
feature_offsets: Specifies the starting location of bins for a given feature id.
|
||||
|
||||
Expected Sizes:
|
||||
keys, vals: [N].
|
||||
bin_ids, bin_vals: [sum_{n=1}^{n=num_classes} num_bins(n)]
|
||||
|
||||
where
|
||||
- N is the number of sparse features in the current batch.
|
||||
- [0, num_classes) represents the range each feature id can take.
|
||||
- num_bins(n) is the number of bins for a given feature id.
|
||||
- If num_bins is fixed, then xs, ys are of size [num_classes * num_bins].
|
||||
|
||||
Expected Types:
|
||||
keys, bin_ids: int64.
|
||||
vals: float or double.
|
||||
bin_vals: same as vals.
|
||||
|
||||
Before using PercentileDiscretizer, you should use a hashmap to get the intersection of
|
||||
input `keys` with the features that PercentileDiscretizer knows about:
|
||||
::
|
||||
keys, vals # keys can be in range [0, 1 << 63)
|
||||
percentile_discretizer_keys = hashmap.find(keys) # percentile_discretizer_keys are now in range [0, num_classes_from_calibration)
|
||||
percentile_discretizer_keys = where (percentile_discretizer_keys != -1) # Ignore keys not found
|
||||
|
||||
|
||||
Inside PercentileDiscretizer, the following is happening:
|
||||
::
|
||||
start = offsets[key[i]]
|
||||
end = offsets[key[i] + 1]
|
||||
idx = binary_search for val[i] in [bin_vals[start], bin_vals[end]]
|
||||
|
||||
result_keys[i] = bin_ids[idx]
|
||||
val[i] = 1 # binary feature value
|
||||
|
||||
Outputs
|
||||
new_keys: The discretized feature ids with same shape and size as keys.
|
||||
new_vals: The discretized values with the same shape and size as vals.
|
||||
|
||||
)doc");
|
||||
|
||||
template<typename T>
|
||||
class PercentileDiscretizer : public OpKernel {
|
||||
public:
|
||||
explicit PercentileDiscretizer(OpKernelConstruction* context) : OpKernel(context) {
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
ComputeDiscretizers(context);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
REGISTER_OP("PercentileDiscretizerBinIndices")
|
||||
.Attr("T: {float, double}")
|
||||
.Input("keys: int64")
|
||||
.Input("vals: T")
|
||||
.Input("bin_ids: int64")
|
||||
.Input("bin_vals: T")
|
||||
.Input("feature_offsets: int64")
|
||||
.Output("new_keys: int64")
|
||||
.Output("new_vals: T")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
// TODO: check sizes
|
||||
c->set_output(0, c->input(0));
|
||||
c->set_output(1, c->input(0));
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
This operation discretizes a tensor containing continuous features.
|
||||
If the feature id and bin id of the discretized value is the same on multiple runs, they
|
||||
will always be assigned to the same output key and value, regardless of the bin_id assigned during
|
||||
calibration.
|
||||
|
||||
Input
|
||||
keys: A tensor containing feature ids.
|
||||
vals: A tensor containing values at corresponding feature ids.
|
||||
bin_ids: A tensor containing the discretized feature id for a given bin.
|
||||
bin_vals: A tensor containing the bin boundaries for value at a given feature id.
|
||||
feature_offsets: Specifies the starting location of bins for a given feature id.
|
||||
|
||||
Expected Sizes:
|
||||
keys, vals: [N].
|
||||
bin_ids, bin_vals: [sum_{n=1}^{n=num_classes} num_bins(n)]
|
||||
|
||||
where
|
||||
- N is the number of sparse features in the current batch.
|
||||
- [0, num_classes) represents the range each feature id can take.
|
||||
- num_bins(n) is the number of bins for a given feature id.
|
||||
- If num_bins is fixed, then xs, ys are of size [num_classes * num_bins].
|
||||
|
||||
Expected Types:
|
||||
keys, bin_ids: int64.
|
||||
vals: float or double.
|
||||
bin_vals: same as vals.
|
||||
|
||||
Before using PercentileDiscretizerBinIndices, you should use a hashmap to get the intersection of
|
||||
input `keys` with the features that PercentileDiscretizerBinIndices knows about:
|
||||
::
|
||||
keys, vals # keys can be in range [0, 1 << 63)
|
||||
percentile_discretizer_keys = hashmap.find(keys) # percentile_discretizer_keys are now in range [0, num_classes_from_calibration)
|
||||
percentile_discretizer_keys = where (percentile_discretizer_keys != -1) # Ignore keys not found
|
||||
|
||||
|
||||
Inside PercentileDiscretizerBinIndices, the following is happening:
|
||||
::
|
||||
start = offsets[key[i]]
|
||||
end = offsets[key[i] + 1]
|
||||
idx = binary_search for val[i] in [bin_vals[start], bin_vals[end]]
|
||||
|
||||
result_keys[i] = bin_ids[idx]
|
||||
val[i] = 1 # binary feature value
|
||||
|
||||
Outputs
|
||||
new_keys: The discretized feature ids with same shape and size as keys.
|
||||
new_vals: The discretized values with the same shape and size as vals.
|
||||
|
||||
)doc");
|
||||
|
||||
template<typename T>
|
||||
class PercentileDiscretizerBinIndices : public OpKernel {
|
||||
public:
|
||||
explicit PercentileDiscretizerBinIndices(OpKernelConstruction* context) : OpKernel(context) {
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
ComputeDiscretizers(context, true);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#define REGISTER(Type) \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("PercentileDiscretizerBinIndices") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
PercentileDiscretizerBinIndices<Type>); \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("PercentileDiscretizer") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
PercentileDiscretizer<Type>); \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("MDL") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
MDL<Type>); \
|
||||
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
BIN
twml/libtwml/src/ops/discretizer.docx
Normal file
BIN
twml/libtwml/src/ops/discretizer.docx
Normal file
Binary file not shown.
@ -1,134 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
REGISTER_OP("FeatureExtractor")
|
||||
.Attr("T: {float, double} = DT_FLOAT")
|
||||
.Input("mask_in: bool")
|
||||
.Input("ids_in: int64")
|
||||
.Input("keys_in: int64")
|
||||
.Input("values_in: T")
|
||||
.Input("codes_in: int64")
|
||||
.Input("types_in: int8")
|
||||
.Output("ids_out: int64")
|
||||
.Output("keys_out: int64")
|
||||
.Output("values_out: T")
|
||||
.Output("codes_out: int64")
|
||||
.Output("types_out: int8")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP that extracts the desired indices of a Tensor based on a mask
|
||||
|
||||
Input
|
||||
mask_in: boolean Tensor that determines which are the indices to be kept (bool)
|
||||
ids_in: input indices Tensor (int64)
|
||||
keys_in: input keys Tensor (int64)
|
||||
values_in: input values Tensor (float/double)
|
||||
codes_in: input codes Tensor (int64)
|
||||
types_in: input types Tensor(int8)
|
||||
|
||||
Outputs
|
||||
ids_out: output indices Tensor (int64)
|
||||
keys_out: output keys Tensor (int64)
|
||||
values_out: output values Tensor (float/double)
|
||||
codes_out: output codes Tensor (int64)
|
||||
types_out: output types Tensor(int8)
|
||||
|
||||
)doc");
|
||||
template <typename T>
|
||||
class FeatureExtractor : public OpKernel {
|
||||
public:
|
||||
explicit FeatureExtractor(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
template <typename A, typename U>
|
||||
bool allequal(const A &t, const U &u) {
|
||||
return t == u;
|
||||
}
|
||||
|
||||
template <typename A, typename U, typename... Others>
|
||||
bool allequal(const A &t, const U &u, Others const &... args) {
|
||||
return (t == u) && allequal(u, args...);
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Get input tensors
|
||||
const Tensor& input_mask = context->input(0);
|
||||
const Tensor& input_ids = context->input(1);
|
||||
const Tensor& input_keys = context->input(2);
|
||||
const Tensor& input_values = context->input(3);
|
||||
const Tensor& input_codes = context->input(4);
|
||||
const Tensor& input_types = context->input(5);
|
||||
|
||||
auto mask = input_mask.flat<bool>();
|
||||
auto ids = input_ids.flat<int64>();
|
||||
auto keys = input_keys.flat<int64>();
|
||||
auto codes = input_codes.flat<int64>();
|
||||
auto values = input_values.flat<T>();
|
||||
auto types = input_types.flat<int8>();
|
||||
|
||||
// Verify that all Tensors have the same size.
|
||||
OP_REQUIRES(context, allequal(mask.size(), ids.size(), keys.size(), codes.size(), values.size(), types.size()),
|
||||
errors::InvalidArgument("all input vectors must be the same size."));
|
||||
|
||||
// Get the size of the output vectors by counting the numbers of trues.
|
||||
int total_size = 0;
|
||||
for (int i = 0; i < mask.size(); i++) {
|
||||
if (mask(i))
|
||||
total_size += 1;
|
||||
}
|
||||
|
||||
// Shape is the number of Trues in the mask Eigen::Tensor
|
||||
TensorShape shape_out = {total_size};
|
||||
|
||||
// Create the output tensors
|
||||
Tensor* output_codes = nullptr;
|
||||
Tensor* output_ids = nullptr;
|
||||
Tensor* output_values = nullptr;
|
||||
Tensor* output_types = nullptr;
|
||||
Tensor* output_keys = nullptr;
|
||||
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, shape_out, &output_ids));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(1, shape_out, &output_keys));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(2, shape_out, &output_values));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(3, shape_out, &output_codes));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(4, shape_out, &output_types));
|
||||
|
||||
auto output_ids_ = output_ids->flat<int64>();
|
||||
auto output_keys_ = output_keys->flat<int64>();
|
||||
auto output_codes_ = output_codes->flat<int64>();
|
||||
auto output_values_ = output_values->flat<T>();
|
||||
auto output_types_ = output_types->flat<int8>();
|
||||
|
||||
// Iterate through the mask and set values to output Eigen::Tensors
|
||||
int j = 0;
|
||||
for (int i = 0; i < mask.size(); i++) {
|
||||
if (mask(i)) {
|
||||
output_ids_(j) = ids(i);
|
||||
output_keys_(j) = keys(i);
|
||||
output_values_(j) = values(i);
|
||||
output_codes_(j) = codes(i);
|
||||
output_types_(j) = types(i);
|
||||
++j;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define REGISTER(Type) \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("FeatureExtractor") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
FeatureExtractor<Type>); \
|
||||
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
BIN
twml/libtwml/src/ops/feature_extractor.docx
Normal file
BIN
twml/libtwml/src/ops/feature_extractor.docx
Normal file
Binary file not shown.
@ -1,58 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("FeatureId")
|
||||
.Attr("feature_names: list(string)")
|
||||
.Output("output: int64")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP that hashes a list of strings into int64. This is used for feature name hashing.
|
||||
|
||||
Attr
|
||||
feature_names: a list of string feature names (list(string)).
|
||||
|
||||
Outputs
|
||||
ouput: hashes corresponding to the string feature names (int64).
|
||||
)doc");
|
||||
|
||||
|
||||
class FeatureId : public OpKernel {
|
||||
private:
|
||||
std::vector<string> input_vector;
|
||||
|
||||
public:
|
||||
explicit FeatureId(OpKernelConstruction* context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("feature_names", &input_vector));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Get size of the input_vector and create TensorShape shape
|
||||
const int total_size = static_cast<int>(input_vector.size());
|
||||
TensorShape shape = {total_size};
|
||||
|
||||
// Create an output tensor
|
||||
Tensor* output_tensor = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, shape,
|
||||
&output_tensor));
|
||||
auto output_flat = output_tensor->flat<int64>();
|
||||
|
||||
// Transform the input tensor into a int64
|
||||
for (int i = 0; i < total_size; i++) {
|
||||
output_flat(i) = twml::featureId(input_vector[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("FeatureId")
|
||||
.Device(DEVICE_CPU),
|
||||
FeatureId);
|
BIN
twml/libtwml/src/ops/feature_id.docx
Normal file
BIN
twml/libtwml/src/ops/feature_id.docx
Normal file
Binary file not shown.
@ -1,83 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
REGISTER_OP("FeatureMask")
|
||||
.Attr("T: {int64, int8}")
|
||||
.Input("keep: T")
|
||||
.Attr("list_keep: list(int)")
|
||||
.Output("mask: bool")
|
||||
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP that creates a mask of the indices that should be kept.
|
||||
|
||||
Attribute
|
||||
list_keep: list of values which should be kept(list(int))
|
||||
|
||||
Input
|
||||
keep: Tensor for which we will apply the mask (int64, int8)
|
||||
|
||||
Outputs
|
||||
mask: boolean Tensor. (bool)
|
||||
|
||||
)doc");
|
||||
template <typename T>
|
||||
class FeatureMask : public OpKernel {
|
||||
private:
|
||||
std::set<int64> feature_set_keep;
|
||||
|
||||
public:
|
||||
explicit FeatureMask(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
std::vector<int64> feature_list_keep;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("list_keep", &feature_list_keep));
|
||||
// create set that contains the content of the feature_list_keep, since tensorflow does not allow
|
||||
// me to directly ouput the contents of list_keep to a set
|
||||
feature_set_keep = std::set<int64>(feature_list_keep.begin(), feature_list_keep.end());
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Get size of the input_vector and create TensorShape shape
|
||||
const Tensor& input = context->input(0);
|
||||
|
||||
auto keep = input.flat<T>();
|
||||
|
||||
// Create an output tensor
|
||||
Tensor* output_mask = nullptr;
|
||||
|
||||
// Output shape is determined and now we can copy the contents of the vector to the output Tensor.
|
||||
const int total_size_out = static_cast<int>(keep.size());
|
||||
|
||||
TensorShape shape_out = {total_size_out};
|
||||
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, shape_out, &output_mask));
|
||||
|
||||
auto output_mask_ = output_mask->flat<bool>();
|
||||
|
||||
// Check if value is in set, output is boolean
|
||||
for (int j = 0; j < keep.size(); j++){
|
||||
output_mask_(j) = (feature_set_keep.count(keep(j)));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#define REGISTER(Type) \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("FeatureMask") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
FeatureMask<Type>); \
|
||||
|
||||
REGISTER(int64);
|
||||
REGISTER(int8);
|
BIN
twml/libtwml/src/ops/feature_mask.docx
Normal file
BIN
twml/libtwml/src/ops/feature_mask.docx
Normal file
Binary file not shown.
@ -1,190 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
#include "resource_utils.h"
|
||||
|
||||
#include <algorithm>
|
||||
using std::string;
|
||||
|
||||
template<typename IndexType, typename ValueType, bool calc_batch_size>
|
||||
void ComputeFixedLengthTensor(OpKernelContext *context, int64 max_length_) {
|
||||
try {
|
||||
const Tensor& segment_ids = context->input(0);
|
||||
const Tensor& values = context->input(1);
|
||||
const Tensor& pad_value = context->input(2);
|
||||
|
||||
auto indices_flat = segment_ids.flat<IndexType>();
|
||||
auto values_flat = values.flat<ValueType>();
|
||||
|
||||
auto pad_value_scalar = pad_value.scalar<ValueType>()();
|
||||
|
||||
// Get maximum length from batch if user hasn't specified it.
|
||||
int64 max_length = max_length_;
|
||||
if (max_length < 0 && indices_flat.size() > 0) {
|
||||
int64 current_id = indices_flat(0);
|
||||
int64 current_length = 1;
|
||||
|
||||
for (int64 i = 1; i < indices_flat.size(); i++) {
|
||||
if (current_id == indices_flat(i)) {
|
||||
current_length++;
|
||||
} else {
|
||||
current_id = indices_flat(i);
|
||||
max_length = std::max(max_length, current_length);
|
||||
current_length = 1;
|
||||
}
|
||||
}
|
||||
// This is needed if the last batch is the longest sequence.
|
||||
max_length = std::max(max_length, current_length);
|
||||
}
|
||||
|
||||
int64 batch_size = 0;
|
||||
if (calc_batch_size) {
|
||||
if (indices_flat.size() > 0) {
|
||||
// The last value of segment_ids will have value batch_size 1;
|
||||
batch_size = 1 + indices_flat(indices_flat.size() - 1);
|
||||
} else {
|
||||
batch_size = 0;
|
||||
}
|
||||
} else {
|
||||
const Tensor& batch_size_tensor = context->input(3);
|
||||
batch_size = batch_size_tensor.flat<int64>()(0);
|
||||
}
|
||||
|
||||
TensorShape output_shape = {batch_size, max_length};
|
||||
Tensor* fixed_length = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &fixed_length));
|
||||
|
||||
auto fixed_length_flat = fixed_length->flat<ValueType>();
|
||||
|
||||
int64 n = 0;
|
||||
int64 offset = 0;
|
||||
for (int64 i = 0; i < batch_size; i++) {
|
||||
for (int64 j = 0; j < max_length; j++) {
|
||||
if (n < indices_flat.size() && indices_flat(n) == i) {
|
||||
// Copy from variable length tensor.
|
||||
fixed_length_flat(offset + j) = values_flat(n);
|
||||
n++;
|
||||
} else {
|
||||
// Pad to fixed length.
|
||||
fixed_length_flat(offset + j) = pad_value_scalar;
|
||||
}
|
||||
}
|
||||
// Corner case: truncate to max_length if user specified max_length < current length.
|
||||
while (n < indices_flat.size() && i == indices_flat(n)) n++;
|
||||
|
||||
// Update output pointer
|
||||
offset += max_length;
|
||||
}
|
||||
} catch (const std::exception &err) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(err.what()));
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_OP("FixedLengthTensor")
|
||||
.Attr("IndexType: {int64, int32}")
|
||||
.Attr("ValueType: {int64, int32, string}")
|
||||
.Attr("max_length: int")
|
||||
.Input("segment_ids: IndexType")
|
||||
.Input("values: ValueType")
|
||||
.Input("pad_value: ValueType")
|
||||
.Output("fixed_length: ValueType")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP to convert variable length segments into fixed length tensor.
|
||||
|
||||
Attr
|
||||
max_length: The size of the inner most (i.e. last) dimension.
|
||||
|
||||
Input
|
||||
segment_ids: 1D input tensor containing the sorted segment_ids.
|
||||
values: 1D input tensor containing the values.
|
||||
pad_value: The value used for padding the fixed length tensor.
|
||||
|
||||
Outputs
|
||||
fixed_length: A fixed length tensor of size [batch_size, max_length].
|
||||
)doc");
|
||||
|
||||
template<typename IndexType, typename ValueType>
|
||||
class FixedLengthTensor: public OpKernel {
|
||||
public:
|
||||
explicit FixedLengthTensor(OpKernelConstruction *context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("max_length", &max_length_));
|
||||
}
|
||||
|
||||
private:
|
||||
int64 max_length_;
|
||||
|
||||
void Compute(OpKernelContext *context) override {
|
||||
ComputeFixedLengthTensor<IndexType, ValueType, true>(context, max_length_);
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("FixedLengthTensorV2")
|
||||
.Attr("IndexType: {int64, int32}")
|
||||
.Attr("ValueType: {int64, int32, string}")
|
||||
.Attr("max_length: int")
|
||||
.Input("segment_ids: IndexType")
|
||||
.Input("values: ValueType")
|
||||
.Input("pad_value: ValueType")
|
||||
.Input("batch_size: int64")
|
||||
.Output("fixed_length: ValueType")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP to convert variable length segments into fixed length tensor.
|
||||
|
||||
Attr
|
||||
max_length: The size of the inner most (i.e. last) dimension.
|
||||
|
||||
Input
|
||||
segment_ids: 1D input tensor containing the sorted segment_ids.
|
||||
values: 1D input tensor containing the values.
|
||||
pad_value: The value used for padding the fixed length tensor.
|
||||
batch_size: The batch size to use.
|
||||
|
||||
Outputs
|
||||
fixed_length: A fixed length tensor of size [batch_size, max_length].
|
||||
)doc");
|
||||
|
||||
template<typename IndexType, typename ValueType>
|
||||
class FixedLengthTensorV2: public OpKernel {
|
||||
public:
|
||||
explicit FixedLengthTensorV2(OpKernelConstruction *context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("max_length", &max_length_));
|
||||
}
|
||||
|
||||
private:
|
||||
int64 max_length_;
|
||||
|
||||
void Compute(OpKernelContext *context) override {
|
||||
ComputeFixedLengthTensor<IndexType, ValueType, false>(context, max_length_);
|
||||
}
|
||||
};
|
||||
|
||||
#define REGISTER_SPARSE_TO_FIXED_LENGTH(IndexType, ValueType) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("FixedLengthTensor") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<IndexType>("IndexType") \
|
||||
.TypeConstraint<ValueType>("ValueType"), \
|
||||
FixedLengthTensor<IndexType, ValueType>); \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("FixedLengthTensorV2") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<IndexType>("IndexType") \
|
||||
.TypeConstraint<ValueType>("ValueType"), \
|
||||
FixedLengthTensorV2<IndexType, ValueType>); \
|
||||
|
||||
REGISTER_SPARSE_TO_FIXED_LENGTH(int64, int64)
|
||||
REGISTER_SPARSE_TO_FIXED_LENGTH(int64, int32)
|
||||
REGISTER_SPARSE_TO_FIXED_LENGTH(int64, string)
|
||||
REGISTER_SPARSE_TO_FIXED_LENGTH(int32, int64)
|
||||
REGISTER_SPARSE_TO_FIXED_LENGTH(int32, int32)
|
||||
REGISTER_SPARSE_TO_FIXED_LENGTH(int32, string)
|
BIN
twml/libtwml/src/ops/fixed_length_tensor.docx
Normal file
BIN
twml/libtwml/src/ops/fixed_length_tensor.docx
Normal file
Binary file not shown.
@ -1,520 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
#include "resource_utils.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
REGISTER_OP("DecodeAndHashDataRecord")
|
||||
.Attr("InputType: {uint8, string}")
|
||||
.Input("input_bytes: InputType")
|
||||
.Attr("keep_features: list(int)")
|
||||
.Attr("keep_codes: list(int)")
|
||||
.Attr("label_features: list(int)")
|
||||
.Attr("weight_features: list(int) = []")
|
||||
.Attr("decode_mode: int = 0")
|
||||
.Output("hashed_data_record_handle: resource")
|
||||
.SetShapeFn(shape_inference::ScalarShape)
|
||||
.Doc(R"doc(
|
||||
A tensorflow OP that creates a handle for the hashed data record.
|
||||
|
||||
Attr
|
||||
keep_features: a list of int ids to keep.
|
||||
keep_codes: their corresponding code.
|
||||
label_features: list of feature ids representing the labels.
|
||||
weight_features: list of feature ids representing the weights. Defaults to empty list.
|
||||
decode_mode: integer, indicates which decoding method to use. Let a sparse continuous
|
||||
have a feature_name and a dict of {name: value}. 0 indicates feature_ids are computed
|
||||
as hash(name). 1 indicates feature_ids are computed as hash(feature_name, name)
|
||||
shared_name: name used by the resource handle inside the resource manager.
|
||||
container: name used by the container of the resources.
|
||||
|
||||
Input
|
||||
input_bytes: Input tensor containing the serialized batch of HashedDataRecords.
|
||||
|
||||
Outputs
|
||||
hashed_data_record_handle: A resource handle to batch of HashedDataRecords.
|
||||
)doc");
|
||||
|
||||
template<typename InputType>
|
||||
class DecodeAndHashDataRecord : public OpKernel {
|
||||
public:
|
||||
explicit DecodeAndHashDataRecord(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
std::vector<int64> keep_features;
|
||||
std::vector<int64> keep_codes;
|
||||
|
||||
std::vector<int64> label_features;
|
||||
std::vector<int64> weight_features;
|
||||
|
||||
OP_REQUIRES_OK(context, context->GetAttr("keep_features", &keep_features));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("keep_codes", &keep_codes));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("label_features", &label_features));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("weight_features", &weight_features));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("decode_mode", &m_decode_mode));
|
||||
|
||||
OP_REQUIRES(context, keep_features.size() == keep_codes.size(),
|
||||
errors::InvalidArgument("keep keys and values must have same size."));
|
||||
|
||||
#ifdef USE_DENSE_HASH
|
||||
m_keep_map.set_empty_key(0);
|
||||
m_labels_map.set_empty_key(0);
|
||||
m_weights_map.set_empty_key(0);
|
||||
#endif // USE_DENSE_HASH
|
||||
|
||||
for (uint64_t i = 0; i < keep_features.size(); i++) {
|
||||
m_keep_map[keep_features[i]] = keep_codes[i];
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < label_features.size(); i++) {
|
||||
m_labels_map[label_features[i]] = i;
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < weight_features.size(); i++) {
|
||||
m_weights_map[weight_features[i]] = i;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
twml::Map<int64_t, int64_t> m_keep_map;
|
||||
twml::Map<int64_t, int64_t> m_labels_map;
|
||||
twml::Map<int64_t, int64_t> m_weights_map;
|
||||
int64 m_decode_mode;
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
HashedDataRecordResource *resource = nullptr;
|
||||
OP_REQUIRES_OK(context, makeResourceHandle<HashedDataRecordResource>(context, 0, &resource));
|
||||
|
||||
// Store the input bytes in the resource so it isnt freed before the resource.
|
||||
// This is necessary because we are not copying the contents for tensors.
|
||||
resource->input = context->input(0);
|
||||
int batch_size = getBatchSize<InputType>(resource->input);
|
||||
int num_labels = static_cast<int>(m_labels_map.size());
|
||||
int num_weights = static_cast<int>(m_weights_map.size());
|
||||
|
||||
twml::HashedDataRecordReader reader;
|
||||
reader.setKeepMap(&m_keep_map);
|
||||
reader.setLabelsMap(&m_labels_map);
|
||||
reader.setDecodeMode(m_decode_mode);
|
||||
|
||||
// Do not set weight map if it is empty. This will take a faster path.
|
||||
if (num_weights != 0) {
|
||||
reader.setWeightsMap(&m_weights_map);
|
||||
}
|
||||
|
||||
resource->records.clear();
|
||||
resource->records.reserve(batch_size);
|
||||
|
||||
int64 total_size = 0;
|
||||
|
||||
for (int id = 0; id < batch_size; id++) {
|
||||
const uint8_t *input_bytes = getInputBytes<InputType>(resource->input, id);
|
||||
reader.setBuffer(input_bytes);
|
||||
resource->records.emplace_back(num_labels, num_weights);
|
||||
resource->records[id].decode(reader);
|
||||
total_size += static_cast<int64>(resource->records[id].totalSize());
|
||||
}
|
||||
|
||||
resource->total_size = total_size;
|
||||
resource->num_labels = num_labels;
|
||||
resource->num_weights = num_weights;
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetIdsFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("ids: int64")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
A tensorflow OP that returns unhashed ids from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
ids: ids specifies the index of the records[id] in the batch (int64)
|
||||
)doc");
|
||||
|
||||
// This Kernel is used for both training and serving once the resource is created.
|
||||
class GetIdsFromHashedDataRecord : public OpKernel {
|
||||
public:
|
||||
explicit GetIdsFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
auto handle = getHandle<HashedDataRecordResource>(context, 0);
|
||||
const auto &records = handle->records;
|
||||
const auto &common = handle->common;
|
||||
const int64 common_size = static_cast<int64>(common.totalSize());
|
||||
const int64 total_size = handle->total_size;
|
||||
TensorShape shape = {total_size};
|
||||
|
||||
Tensor *ids;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, shape, &ids));
|
||||
|
||||
int id = 0;
|
||||
int64 offset = 0;
|
||||
auto ids_flat = ids->flat<int64>();
|
||||
for (const auto &record : records) {
|
||||
// Since common features are added to each input, add the common_size to the current size.
|
||||
// For training common_size == 0, for serving it can be a non-zero value.
|
||||
int64 curr_size = static_cast<int64>(record.totalSize()) + common_size;
|
||||
std::fill(ids_flat.data() + offset, ids_flat.data() + offset + curr_size, id);
|
||||
offset += curr_size;
|
||||
id++;
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// OutType: Output Tensor Type. FieldType: The storage type used inside HashedDatarecord.
|
||||
template<typename OutType, typename FieldType>
|
||||
class GetOutputFromHashedDataRecord : public OpKernel {
|
||||
protected:
|
||||
using Getter = std::function<const std::vector<FieldType>&(const twml::HashedDataRecord &)>;
|
||||
Getter getter;
|
||||
|
||||
public:
|
||||
explicit GetOutputFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
auto handle = getHandle<HashedDataRecordResource>(context, 0);
|
||||
const auto &records = handle->records;
|
||||
const auto &common = handle->common;
|
||||
const int64 total_size = handle->total_size;
|
||||
TensorShape shape = {total_size};
|
||||
|
||||
Tensor *output;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output));
|
||||
|
||||
const auto &common_output = getter(common);
|
||||
|
||||
auto output_data = output->flat<OutType>().data();
|
||||
for (const auto &record : records) {
|
||||
// This is does not copy anything during training as common_size == 0
|
||||
// It will copy the relevant common features coming from a batch prediction request.
|
||||
output_data = std::copy(common_output.begin(), common_output.end(), output_data);
|
||||
|
||||
// Copy the current record to output.
|
||||
const auto& rec_output = getter(record);
|
||||
output_data = std::copy(rec_output.begin(), rec_output.end(), output_data);
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetUKeysFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("ukeys: int64")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
A tensorflow OP that returns unhashed keys from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
ukeys: unhased keys / raw feature ids from the original request.
|
||||
)doc");
|
||||
|
||||
class GetUKeysFromHashedDataRecord : public GetOutputFromHashedDataRecord<int64, int64_t> {
|
||||
public:
|
||||
explicit GetUKeysFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: GetOutputFromHashedDataRecord<int64, int64_t>(context){
|
||||
getter = [](const twml::HashedDataRecord &record) -> const std::vector<int64_t> & {
|
||||
return record.keys();
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetKeysFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("keys: int64")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
A tensorflow OP that returns keys from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
keys: keys after raw feature ids are hashed with values (int64)
|
||||
)doc");
|
||||
|
||||
class GetKeysFromHashedDataRecord : public GetOutputFromHashedDataRecord<int64, int64_t> {
|
||||
public:
|
||||
explicit GetKeysFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: GetOutputFromHashedDataRecord<int64, int64_t>(context){
|
||||
getter = [](const twml::HashedDataRecord &record) -> const std::vector<int64_t> & {
|
||||
return record.transformed_keys();
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetValuesFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("values: float")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
A tensorflow OP that returns values from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
values: feature values.
|
||||
)doc");
|
||||
|
||||
class GetValuesFromHashedDataRecord : public GetOutputFromHashedDataRecord<float, double> {
|
||||
public:
|
||||
explicit GetValuesFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: GetOutputFromHashedDataRecord<float, double>(context){
|
||||
getter = [](const twml::HashedDataRecord &record) -> const std::vector<double> & {
|
||||
return record.values();
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetCodesFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("codes: int64")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
A tensorflow OP that returns codes from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
codes: deepbird feature code, usually from A,B,C,D ... in the config.
|
||||
)doc");
|
||||
|
||||
class GetCodesFromHashedDataRecord : public GetOutputFromHashedDataRecord<int64, int64_t> {
|
||||
public:
|
||||
explicit GetCodesFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: GetOutputFromHashedDataRecord<int64, int64_t>(context){
|
||||
getter = [](const twml::HashedDataRecord &record) -> const std::vector<int64_t> & {
|
||||
return record.codes();
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetTypesFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("types: int8")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
A tensorflow OP that returns types from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
types: feature types corresponding to BINARY, DISCRETE, etc.
|
||||
)doc");
|
||||
|
||||
class GetTypesFromHashedDataRecord : public GetOutputFromHashedDataRecord<int8, uint8_t> {
|
||||
public:
|
||||
explicit GetTypesFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: GetOutputFromHashedDataRecord<int8, uint8_t>(context){
|
||||
getter = [](const twml::HashedDataRecord &record) -> const std::vector<uint8_t> & {
|
||||
return record.types();
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetBatchSizeFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("batch_size: int64")
|
||||
.SetShapeFn(shape_inference::ScalarShape)
|
||||
.Doc(R"doc(
|
||||
A tensorflow OP that returns batch size from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
batch_size: Number of records held in the handle.
|
||||
)doc");
|
||||
|
||||
class GetBatchSizeFromHashedDataRecord : public OpKernel {
|
||||
public:
|
||||
explicit GetBatchSizeFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
auto handle = getHandle<HashedDataRecordResource>(context, 0);
|
||||
Tensor *output;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape({}), &output));
|
||||
output->scalar<int64>()() = handle->records.size();
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetTotalSizeFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("total_size: int64")
|
||||
.SetShapeFn(shape_inference::ScalarShape)
|
||||
.Doc(R"doc(
|
||||
A tensorflow OP that returns total size from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
total_size: Total number of keys / values in the batch.
|
||||
)doc");
|
||||
|
||||
class GetTotalSizeFromHashedDataRecord : public OpKernel {
|
||||
public:
|
||||
explicit GetTotalSizeFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
auto handle = getHandle<HashedDataRecordResource>(context, 0);
|
||||
|
||||
Tensor *output;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape({}), &output));
|
||||
output->scalar<int64>()() = handle->total_size;
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetLabelsFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("labels: float")
|
||||
.Attr("default_label: float")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
A tensorflow OP that returns labels from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
labels: A 2D tensor of size [batch_size, num_labels] containing the label values.
|
||||
)doc");
|
||||
|
||||
class GetLabelsFromHashedDataRecord : public OpKernel {
|
||||
private:
|
||||
float default_label;
|
||||
|
||||
public:
|
||||
explicit GetLabelsFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("default_label", &default_label));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
auto handle = getHandle<HashedDataRecordResource>(context, 0);
|
||||
const auto &records = handle->records;
|
||||
const int num_labels = static_cast<int>(handle->num_labels);
|
||||
TensorShape shape = {static_cast<int64>(handle->records.size()), num_labels};
|
||||
|
||||
Tensor *labels;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, shape, &labels));
|
||||
|
||||
// The default value of label is not present in data record is std::nanf
|
||||
// For continuous labels, change that to a default_label or label.
|
||||
auto func = [this](float label) -> float {
|
||||
return std::isnan(label) ? default_label : label;
|
||||
};
|
||||
|
||||
auto labels_data = labels->flat<float>().data();
|
||||
for (const auto &record : records) {
|
||||
const auto& rec_labels = record.labels();
|
||||
labels_data = std::transform(rec_labels.begin(), rec_labels.end(), labels_data, func);
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_OP("GetWeightsFromHashedDataRecord")
|
||||
.Input("hashed_data_record_handle: resource")
|
||||
.Output("weights: float")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
A tensorflow OP that returns weights from the hashed data record.
|
||||
Input
|
||||
hashed_data_record_handle: Resource handle to DataRecord
|
||||
|
||||
Outputs
|
||||
weights: A 2D tensor of size [batch_size, num_weights] containing the weight values.
|
||||
)doc");
|
||||
|
||||
class GetWeightsFromHashedDataRecord : public OpKernel {
|
||||
public:
|
||||
explicit GetWeightsFromHashedDataRecord(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
auto handle = getHandle<HashedDataRecordResource>(context, 0);
|
||||
const auto &records = handle->records;
|
||||
const int num_weights = static_cast<int>(handle->num_weights);
|
||||
TensorShape shape = {static_cast<int64>(handle->records.size()), num_weights};
|
||||
|
||||
Tensor *weights;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, shape, &weights));
|
||||
|
||||
auto weights_data = weights->flat<float>().data();
|
||||
for (const auto &record : records) {
|
||||
const auto& rec_weights = record.weights();
|
||||
weights_data = std::copy(rec_weights.begin(), rec_weights.end(), weights_data);
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#define REGISTER_DECODE_AND_HASH(InputType) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("DecodeAndHashDataRecord") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<InputType>("InputType"), \
|
||||
DecodeAndHashDataRecord<InputType>); \
|
||||
|
||||
REGISTER_DECODE_AND_HASH(uint8)
|
||||
REGISTER_DECODE_AND_HASH(string)
|
||||
|
||||
#define REGISTER_GETTER(FIELD) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("Get" #FIELD "FromHashedDataRecord") \
|
||||
.Device(DEVICE_CPU), \
|
||||
Get##FIELD##FromHashedDataRecord); \
|
||||
|
||||
REGISTER_GETTER(Ids)
|
||||
REGISTER_GETTER(UKeys)
|
||||
REGISTER_GETTER(Keys)
|
||||
REGISTER_GETTER(Values)
|
||||
REGISTER_GETTER(Codes)
|
||||
REGISTER_GETTER(Types)
|
||||
REGISTER_GETTER(BatchSize)
|
||||
REGISTER_GETTER(TotalSize)
|
||||
REGISTER_GETTER(Labels)
|
||||
REGISTER_GETTER(Weights)
|
BIN
twml/libtwml/src/ops/hashed_data_record.docx
Normal file
BIN
twml/libtwml/src/ops/hashed_data_record.docx
Normal file
Binary file not shown.
@ -1,260 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/util/work_sharder.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
void ComputeHashingDiscretizer(
|
||||
OpKernelContext*,
|
||||
int64_t,
|
||||
const twml::Map<int64_t, int64_t> &,
|
||||
int64_t,
|
||||
int64_t,
|
||||
int64_t);
|
||||
|
||||
REGISTER_OP("HashingDiscretizer")
|
||||
.Attr("T: {float, double}")
|
||||
.Input("input_ids: int64")
|
||||
.Input("input_vals: T")
|
||||
.Input("bin_vals: T")
|
||||
.Attr("feature_ids: tensor = { dtype: DT_INT64 }")
|
||||
.Attr("n_bin: int")
|
||||
.Attr("output_bits: int")
|
||||
.Attr("cost_per_unit: int")
|
||||
.Attr("options: int")
|
||||
.Output("new_keys: int64")
|
||||
.Output("new_vals: T")
|
||||
.SetShapeFn(
|
||||
[](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
c->set_output(0, c->input(0));
|
||||
c->set_output(1, c->input(1));
|
||||
return Status::OK();
|
||||
}
|
||||
)
|
||||
.Doc(R"doc(
|
||||
|
||||
This operation discretizes a tensor containing continuous features (if calibrated).
|
||||
- note - choice of float or double should be consistent among inputs/output
|
||||
|
||||
Input
|
||||
input_ids(int64): A tensor containing input feature ids (direct from data record).
|
||||
input_vals(float/double): A tensor containing input values at corresponding feature ids.
|
||||
- i.e. input_ids[i] <-> input_vals[i] for each i
|
||||
bin_vals(float/double): A tensor containing the bin boundaries for values of a given feature.
|
||||
- float or double, matching input_vals
|
||||
feature_ids(int64 attr): 1D TensorProto of feature IDs seen during calibration
|
||||
-> hint: look up make_tensor_proto:
|
||||
proto_init = np.array(values, dtype=np.int64)
|
||||
tensor_attr = tf.make_tensor_proto(proto_init)
|
||||
n_bin(int): The number of bin boundary values per feature
|
||||
-> hence, n_bin + 1 buckets for each feature
|
||||
output_bits(int): The maximum number of bits to use for the output IDs.
|
||||
cost_per_unit(int): An estimate of the number of CPU cycles (or nanoseconds
|
||||
if not CPU-bound) to complete a unit of work. Overestimating creates too
|
||||
many shards and CPU time will be dominated by per-shard overhead, such as
|
||||
Context creation. Underestimating may not fully make use of the specified
|
||||
parallelism.
|
||||
options(int): selects behavior of the op.
|
||||
0x00 in bits{1:0} for std::lower_bound bucket search.
|
||||
0x01 in bits{1:0} for linear bucket search
|
||||
0x02 in bits{1:0} for std::upper_bound bucket search
|
||||
0x00 in bits{4:2} for integer_multiplicative_hashing
|
||||
0x01 in bits{4:2} for integer64_multiplicative_hashing
|
||||
higher bits/other values are reserved for future extensions
|
||||
|
||||
Outputs
|
||||
new_keys(int64): The discretized feature ids with same shape and size as keys.
|
||||
new_vals(float or double): The discretized values with the same shape and size as vals.
|
||||
|
||||
Operation
|
||||
Note that the discretization operation maps observation vectors to higher dimensional
|
||||
observation vectors. Here, we describe this mapping.
|
||||
|
||||
Let a calibrated feature observation be given by (F,x), where F is the ID of the
|
||||
feature, and x is some real value (i.e., continuous feature). This kind of
|
||||
representation is useful for the representation of sparse vectors, where there
|
||||
are many zeros.
|
||||
|
||||
For example, for a dense feature vector [1.2, 2.4, 3.6], we might have
|
||||
(0, 1.2) (1, 2.4) and (2, 3.6), with feature IDs indicating the 0th, 1st, and 2nd
|
||||
elements of the vector.
|
||||
|
||||
The disretizer performs the following operation:
|
||||
(F,x) -> (map(x|F),1).
|
||||
Hence, we have that map(x|F) is a new feature ID, and the value observed for that
|
||||
feature is 1. We might read map(x|F) as 'the map of x for feature F'.
|
||||
|
||||
For each feature F, we associate a (discrete, finite) set of new feature IDs, newIDs(F).
|
||||
We will then have that map(x|F) is in the set newIDs(F) for any value of x. Each
|
||||
set member of newIDs(F) is associated with a 'bin', as defined by the bin
|
||||
boundaries given in the bin_vals input array. For any two different feature IDs F
|
||||
and G, we would ideally have that INTERSECT(newIDs(F),newIDs(G)) is the empty set.
|
||||
However, this is not guaranteed for this discretizer.
|
||||
|
||||
In the case of this hashing discretizer, map(x|F) can actually be written as follows:
|
||||
let bucket = bucket(x|F) be the the bucket index for x, according to the
|
||||
calibration on F. (This is an integer value in [0,n_bin], inclusive)
|
||||
F is an integer ID. Here, we have that map(x|F) = hash_fn(F,bucket). This has
|
||||
the desirable property that the new ID depends only on the calibration data
|
||||
supplied for feature F, and not on any other features in the dataset (e.g.,
|
||||
number of other features present in the calibration data, or order of features
|
||||
in the dataset). Note that PercentileDiscretizer does NOT have this property.
|
||||
This comes at the expense of the possibility of output ID collisions, which
|
||||
we try to minimize through the design of hash_fn.
|
||||
|
||||
Example - consider input vector with a single element, i.e. [x].
|
||||
Let's Discretize to one of 2 values, as follows:
|
||||
Let F=0 for the ID of the single feature in the vector.
|
||||
Let the bin boundary of feature F=0 be BNDRY(F) = BNDRY(0) since F=0
|
||||
bucket = bucket(x|F=0) = 0 if x<=BNDRY(0) else 1
|
||||
Let map(x|F) = hash_fn(F=0,bucket=0) if x<=BNDRY(0) else hash_fn(F=0,bucket=1)
|
||||
If we had another element y in the vector, i.e. [x, y], then we might additionally
|
||||
Let F=1 for element y.
|
||||
Let the bin boundary be BNDRY(F) = BNDRY(1) since F=1
|
||||
bucket = bucket(x|F=1) = 0 if x<=BNDRY(1) else 1
|
||||
Let map(x|F) = hash_fn(F=1,bucket=0) if x<=BNDRY(1) else hash_fn(F=1,bucket=1)
|
||||
Note how the construction of map(x|F=1) does not depend on whether map(x|F=0)
|
||||
was constructed.
|
||||
)doc");
|
||||
|
||||
template<typename T>
|
||||
class HashingDiscretizer : public OpKernel {
|
||||
public:
|
||||
explicit HashingDiscretizer(OpKernelConstruction* context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("n_bin", &n_bin_));
|
||||
OP_REQUIRES(context,
|
||||
n_bin_ > 0,
|
||||
errors::InvalidArgument("Must have n_bin_ > 0."));
|
||||
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("output_bits", &output_bits_));
|
||||
OP_REQUIRES(context,
|
||||
output_bits_ > 0,
|
||||
errors::InvalidArgument("Must have output_bits_ > 0."));
|
||||
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("cost_per_unit", &cost_per_unit_));
|
||||
OP_REQUIRES(context,
|
||||
cost_per_unit_ >= 0,
|
||||
errors::InvalidArgument("Must have cost_per_unit >= 0."));
|
||||
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("options", &options_));
|
||||
|
||||
// construct the ID_to_index hash map
|
||||
Tensor feature_IDs;
|
||||
|
||||
// extract the tensors
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("feature_ids", &feature_IDs));
|
||||
|
||||
// for access to the data
|
||||
// int64_t data type is set in to_layer function of the calibrator objects in Python
|
||||
auto feature_IDs_flat = feature_IDs.flat<int64>();
|
||||
|
||||
// verify proper dimension constraints
|
||||
OP_REQUIRES(context,
|
||||
feature_IDs.shape().dims() == 1,
|
||||
errors::InvalidArgument("feature_ids must be 1D."));
|
||||
|
||||
// reserve space in the hash map and fill in the values
|
||||
int64_t num_features = feature_IDs.shape().dim_size(0);
|
||||
#ifdef USE_DENSE_HASH
|
||||
ID_to_index_.set_empty_key(0);
|
||||
ID_to_index_.resize(num_features);
|
||||
#else
|
||||
ID_to_index_.reserve(num_features);
|
||||
#endif // USE_DENSE_HASH
|
||||
for (int64_t i = 0 ; i < num_features ; i++) {
|
||||
ID_to_index_[feature_IDs_flat(i)] = i;
|
||||
}
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
ComputeHashingDiscretizer(
|
||||
context,
|
||||
output_bits_,
|
||||
ID_to_index_,
|
||||
n_bin_,
|
||||
cost_per_unit_,
|
||||
options_);
|
||||
}
|
||||
|
||||
private:
|
||||
twml::Map<int64_t, int64_t> ID_to_index_;
|
||||
int n_bin_;
|
||||
int output_bits_;
|
||||
int cost_per_unit_;
|
||||
int options_;
|
||||
};
|
||||
|
||||
#define REGISTER(Type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("HashingDiscretizer") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
HashingDiscretizer<Type>); \
|
||||
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
||||
|
||||
void ComputeHashingDiscretizer(
|
||||
OpKernelContext* context,
|
||||
int64_t output_bits,
|
||||
const twml::Map<int64_t, int64_t> &ID_to_index,
|
||||
int64_t n_bin,
|
||||
int64_t cost_per_unit,
|
||||
int64_t options) {
|
||||
const Tensor& keys = context->input(0);
|
||||
const Tensor& vals = context->input(1);
|
||||
const Tensor& bin_vals = context->input(2);
|
||||
|
||||
const int64 output_size = keys.dim_size(0);
|
||||
|
||||
TensorShape output_shape;
|
||||
OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(&output_size, 1, &output_shape));
|
||||
|
||||
Tensor* new_keys = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &new_keys));
|
||||
Tensor* new_vals = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(1, output_shape, &new_vals));
|
||||
|
||||
try {
|
||||
twml::Tensor out_keys_ = TFTensor_to_twml_tensor(*new_keys);
|
||||
twml::Tensor out_vals_ = TFTensor_to_twml_tensor(*new_vals);
|
||||
|
||||
const twml::Tensor in_keys_ = TFTensor_to_twml_tensor(keys);
|
||||
const twml::Tensor in_vals_ = TFTensor_to_twml_tensor(vals);
|
||||
const twml::Tensor bin_vals_ = TFTensor_to_twml_tensor(bin_vals);
|
||||
|
||||
// retrieve the thread pool from the op context
|
||||
auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
|
||||
|
||||
// Definition of the computation thread
|
||||
auto task = [&](int64 start, int64 limit) {
|
||||
twml::hashDiscretizerInfer(out_keys_, out_vals_,
|
||||
in_keys_, in_vals_,
|
||||
n_bin,
|
||||
bin_vals_,
|
||||
output_bits,
|
||||
ID_to_index,
|
||||
start, limit,
|
||||
options);
|
||||
};
|
||||
|
||||
// let Tensorflow split up the work as it sees fit
|
||||
Shard(worker_threads.num_threads,
|
||||
worker_threads.workers,
|
||||
output_size,
|
||||
static_cast<int64>(cost_per_unit),
|
||||
task);
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
|
BIN
twml/libtwml/src/ops/hashing_discretizer.docx
Normal file
BIN
twml/libtwml/src/ops/hashing_discretizer.docx
Normal file
Binary file not shown.
@ -1,84 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("Hashmap")
|
||||
.Input("keys: int64")
|
||||
.Input("hash_keys: int64")
|
||||
.Input("hash_values: int64")
|
||||
.Output("values: int64")
|
||||
.Output("mask: int8")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
// TODO: check if the sizes are different in the input
|
||||
c->set_output(0, c->input(0));
|
||||
c->set_output(1, c->input(0));
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
|
||||
class Hashmap : public OpKernel {
|
||||
private:
|
||||
twml::HashMap hmap;
|
||||
std::once_flag flag;
|
||||
|
||||
public:
|
||||
explicit Hashmap(OpKernelConstruction* context) : OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
try {
|
||||
// Quick hack
|
||||
const Tensor& keys = context->input(0);
|
||||
|
||||
std::call_once(this->flag, [this, context](){
|
||||
const Tensor& hash_keys = context->input(1);
|
||||
const Tensor& hash_values = context->input(2);
|
||||
const auto hash_keys_flat = hash_keys.flat<int64>();
|
||||
const auto hash_values_flat = hash_values.flat<int64>();
|
||||
const int64 N = hash_keys_flat.size();
|
||||
|
||||
for (int64 i = 0; i < N; i++) {
|
||||
hmap.insert(hash_keys_flat(i), hash_values_flat(i));
|
||||
}
|
||||
});
|
||||
|
||||
Tensor* values = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, keys.shape(),
|
||||
&values));
|
||||
|
||||
Tensor* mask = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(1, keys.shape(),
|
||||
&mask));
|
||||
|
||||
// copy the values without sharing a storage
|
||||
values->flat<int64>() = keys.flat<int64>();
|
||||
|
||||
auto keys_flat = keys.flat<int64>();
|
||||
auto values_flat = values->flat<int64>();
|
||||
auto mask_flat = mask->flat<int8>();
|
||||
|
||||
// TODO: use twml tensor
|
||||
const int64 N = keys_flat.size();
|
||||
for (int64 i = 0; i < N; i++) {
|
||||
// values_flat(i), keys_flat(i) return references to tensorflow::int64.
|
||||
// Using them in hmap.get() was causing issues because of automatic casting.
|
||||
int64_t val = values_flat(i);
|
||||
int64_t key = keys_flat(i);
|
||||
mask_flat(i) = hmap.get(val, key);
|
||||
values_flat(i) = val;
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Hashmap")
|
||||
.Device(DEVICE_CPU),
|
||||
Hashmap);
|
BIN
twml/libtwml/src/ops/hashmap.docx
Normal file
BIN
twml/libtwml/src/ops/hashmap.docx
Normal file
Binary file not shown.
@ -1,81 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("IsotonicCalibration")
|
||||
.Attr("T: {float, double}")
|
||||
.Input("input: T")
|
||||
.Input("xs: T")
|
||||
.Input("ys: T")
|
||||
.Output("output: T")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
// output shape should be the same as input shape.
|
||||
c->set_output(0, c->input(0));
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
This operation calibrates probabilities by fitting to a piece-wise non-decreasing function.
|
||||
|
||||
Input
|
||||
input: A tensor containing uncalibrated probabilities.
|
||||
xs: A tensor containing the boundaries of the bins.
|
||||
ys: A tensor contianing calibrated values for the corresponding bins.
|
||||
|
||||
Expected Sizes:
|
||||
input: [batch_size, num_labels].
|
||||
xs, ys: [num_labels, num_bins].
|
||||
|
||||
Expected Types:
|
||||
input: float or double.
|
||||
xs, ys: same as input.
|
||||
|
||||
Outputs
|
||||
output: A tensor containing calibrated probabilities with same shape and size as input.
|
||||
|
||||
)doc");
|
||||
|
||||
template<typename T>
|
||||
class IsotonicCalibration : public OpKernel {
|
||||
public:
|
||||
explicit IsotonicCalibration(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
const Tensor& input = context->input(0);
|
||||
const Tensor& xs = context->input(1);
|
||||
const Tensor& ys = context->input(2);
|
||||
|
||||
Tensor* output = nullptr;
|
||||
OP_REQUIRES_OK(
|
||||
context,
|
||||
context->allocate_output(0, input.shape(), &output));
|
||||
|
||||
try {
|
||||
const twml::Tensor twml_input = TFTensor_to_twml_tensor(input);
|
||||
const twml::Tensor twml_xs = TFTensor_to_twml_tensor(xs);
|
||||
const twml::Tensor twml_ys = TFTensor_to_twml_tensor(ys);
|
||||
twml::Tensor twml_output = TFTensor_to_twml_tensor(*output);
|
||||
|
||||
twml::linearInterpolation(twml_output, twml_input, twml_xs, twml_ys);
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define REGISTER(Type) \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("IsotonicCalibration") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
IsotonicCalibration<Type>); \
|
||||
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
BIN
twml/libtwml/src/ops/isotonic_calibration.docx
Normal file
BIN
twml/libtwml/src/ops/isotonic_calibration.docx
Normal file
Binary file not shown.
@ -1,39 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("NumIntraOpThreads")
|
||||
.Input("x: float32")
|
||||
.Output("num_intra_op_threads: int32")
|
||||
.SetShapeFn(tensorflow::shape_inference::ScalarShape)
|
||||
.Doc(R"doc(
|
||||
A tensorflow OP that returns the number of threads in the intra_op_parallelism pool
|
||||
This is not part of the Tensorflow API as of the date of writing this doc. Hence,
|
||||
a tensorflow operation is the best resort.
|
||||
Input
|
||||
x: Dummy placeholder so that constant folding is not done by TF GraphOptimizer.
|
||||
Please refer https://github.com/tensorflow/tensorflow/issues/22546 for more
|
||||
details.
|
||||
Output
|
||||
num_intra_op_threads: A scalar tensor corresponding to the number of threads in
|
||||
the intra_op_parallelism pool
|
||||
)doc");
|
||||
|
||||
class NumIntraOpThreads : public OpKernel {
|
||||
public:
|
||||
explicit NumIntraOpThreads(OpKernelConstruction* context)
|
||||
: OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
int num_intra_op_threads = context->device()->tensorflow_cpu_worker_threads()->num_threads;
|
||||
Tensor* output_tensor = NULL;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape({}), &output_tensor));
|
||||
auto output_flat = output_tensor->flat<int32>();
|
||||
output_flat(0) = num_intra_op_threads;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("NumIntraOpThreads").Device(DEVICE_CPU), NumIntraOpThreads);
|
BIN
twml/libtwml/src/ops/num_intra_op_threads.docx
Normal file
BIN
twml/libtwml/src/ops/num_intra_op_threads.docx
Normal file
Binary file not shown.
@ -1,75 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/util/work_sharder.h"
|
||||
#include "tensorflow/core/lib/core/threadpool.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/mutex.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include <iostream>
|
||||
|
||||
#include <vector>
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("ParAdd")
|
||||
.Input("input_a: float")
|
||||
.Input("input_b: float")
|
||||
.Output("a_plus_b: float")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
c->set_output(0, c->input(0));
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
|
||||
class ParAddOp : public OpKernel {
|
||||
public:
|
||||
explicit ParAddOp(OpKernelConstruction* context) : OpKernel(context) {
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Grab the input tensor
|
||||
const Tensor& input_tensor0 = context->input(0);
|
||||
auto input_flat0 = input_tensor0.flat<float>();
|
||||
const Tensor& input_tensor1 = context->input(1);
|
||||
auto input_flat1 = input_tensor1.flat<float>();
|
||||
|
||||
OP_REQUIRES(context, input_tensor0.shape() == input_tensor1.shape(),
|
||||
errors::InvalidArgument("Input tensors must be identical shape."));
|
||||
|
||||
// Create an output tensor
|
||||
Tensor* output_tensor = NULL;
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_output(0,
|
||||
input_tensor0.shape(),
|
||||
&output_tensor));
|
||||
auto output_flat = output_tensor->flat<float>();
|
||||
|
||||
// PARALLEL ADD
|
||||
const int N = input_flat0.size();
|
||||
|
||||
// retrieve the thread pool from the op context
|
||||
auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
|
||||
|
||||
// Definition of the computation thread
|
||||
auto task = [=, &input_flat0, &input_flat1, &output_flat](int64 start, int64 limit) {
|
||||
for (; start < limit; ++start) {
|
||||
output_flat(start) = input_flat0(start) + input_flat1(start);
|
||||
}
|
||||
};
|
||||
|
||||
// this is a heuristic. high number is likely to be sharded into smaller pieces
|
||||
int64 cost_per_unit = 1;
|
||||
|
||||
// let Tensorflow split up the work as it sees fit
|
||||
Shard(worker_threads.num_threads,
|
||||
worker_threads.workers,
|
||||
N,
|
||||
cost_per_unit,
|
||||
task);
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("ParAdd").Device(DEVICE_CPU), ParAddOp);
|
||||
|
||||
|
BIN
twml/libtwml/src/ops/par_add.docx
Normal file
BIN
twml/libtwml/src/ops/par_add.docx
Normal file
Binary file not shown.
@ -1,125 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
REGISTER_OP("PartitionSparseTensorMod")
|
||||
.Attr("T: {float, double}")
|
||||
.Input("indices: int64")
|
||||
.Input("values: T")
|
||||
.Output("result: output_types")
|
||||
.Attr("num_partitions: int")
|
||||
.Attr("output_types: list({int64, float, double})")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
A tensorflow OP that partitions an input batch represented as a sparse tensor
|
||||
(indices are [ids, keys]) into separate sparse tensors to more optimally place
|
||||
sparse computations in distributed training.
|
||||
|
||||
Inputs
|
||||
indices: Indices from sparse tensor ([ids, keys] from the batch).
|
||||
values: Batch values from the original features dict.
|
||||
|
||||
Attr
|
||||
num_partitions: Number of partitions to generate.
|
||||
output_types: A list of types for the output tensors like
|
||||
[tf.int64, tf.float32, tf.int64, tf.float32, ...]
|
||||
The length must be 2 * num_partitions (see Outputs below)
|
||||
|
||||
Outputs
|
||||
List of dense tensors containing for each partition:
|
||||
- partitioned indices tensor ([ids, keys] from partitioned batch)
|
||||
- partitioned values tensor
|
||||
The list lenth is 2 * num_partitions. Example:
|
||||
[ [ids_1, keys_1], values_1, [ids_2, keys_2], values_2, ... ]
|
||||
)doc");
|
||||
|
||||
template<typename T>
|
||||
class PartitionSparseTensorMod : public OpKernel {
|
||||
private:
|
||||
int64 num_partitions;
|
||||
|
||||
public:
|
||||
explicit PartitionSparseTensorMod(OpKernelConstruction* context) : OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("num_partitions", &num_partitions));
|
||||
OP_REQUIRES(context, num_partitions > 0,
|
||||
errors::InvalidArgument("Number of partitions must be positive"));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// grab input tensors
|
||||
const Tensor& indices_tensor = context->input(0); // (ids, keys)
|
||||
const Tensor& values_tensor = context->input(1);
|
||||
|
||||
// check sizes
|
||||
int64 num_keys = indices_tensor.shape().dim_size(0);
|
||||
OP_REQUIRES(context, indices_tensor.dims() == 2,
|
||||
errors::InvalidArgument("Indices tensor must be 2D [ids, keys]"));
|
||||
OP_REQUIRES(context, indices_tensor.shape().dim_size(1) == 2,
|
||||
errors::InvalidArgument("Indices tensor must have 2 cols [ids, keys]"));
|
||||
OP_REQUIRES(context, values_tensor.shape().dim_size(0) == num_keys,
|
||||
errors::InvalidArgument("Number of values must match number of keys"));
|
||||
|
||||
// grab input vectors
|
||||
auto indices = indices_tensor.flat<int64>();
|
||||
auto values = values_tensor.flat<T>();
|
||||
|
||||
// count the number of features that fall in each partition
|
||||
std::vector<int64> partition_counts(num_partitions);
|
||||
|
||||
for (int i = 0; i < num_keys; i++) {
|
||||
int64 key = indices(2 * i + 1);
|
||||
int64 partition_id = key % num_partitions;
|
||||
partition_counts[partition_id]++;
|
||||
}
|
||||
|
||||
// allocate outputs for each partition and keep references
|
||||
std::vector<int64*> output_indices_partitions;
|
||||
std::vector<T*> output_values_partitions;
|
||||
output_indices_partitions.reserve(num_partitions);
|
||||
output_values_partitions.reserve(num_partitions);
|
||||
|
||||
for (int i = 0; i < num_partitions; i++) {
|
||||
Tensor *output_indices = nullptr, *output_values = nullptr;
|
||||
TensorShape shape_indices = TensorShape({partition_counts[i], 2});
|
||||
TensorShape shape_values = TensorShape({partition_counts[i]});
|
||||
|
||||
OP_REQUIRES_OK(context, context->allocate_output(2 * i, shape_indices, &output_indices));
|
||||
OP_REQUIRES_OK(context, context->allocate_output(2 * i + 1, shape_values, &output_values));
|
||||
|
||||
output_indices_partitions.push_back(output_indices->flat<int64>().data());
|
||||
output_values_partitions.push_back(output_values->flat<T>().data());
|
||||
}
|
||||
|
||||
// assign a partition id to each feature
|
||||
// populate tensors for each partition
|
||||
std::vector<int64> partition_indices(num_partitions);
|
||||
|
||||
for (int i = 0; i < num_keys; i++) {
|
||||
int64 key = indices(2 * i + 1);
|
||||
int64 pid = key % num_partitions; // partition id
|
||||
int64 idx = partition_indices[pid]++;
|
||||
|
||||
output_indices_partitions[pid][2 * idx] = indices(2 * i);
|
||||
output_indices_partitions[pid][2 * idx + 1] = key / num_partitions;
|
||||
output_values_partitions[pid][idx] = values(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define REGISTER(Type) \
|
||||
\
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("PartitionSparseTensorMod") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
PartitionSparseTensorMod<Type>); \
|
||||
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
BIN
twml/libtwml/src/ops/partition_sparse_tensor.docx
Normal file
BIN
twml/libtwml/src/ops/partition_sparse_tensor.docx
Normal file
Binary file not shown.
@ -1,241 +0,0 @@
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/util/work_sharder.h"
|
||||
|
||||
#include <twml.h>
|
||||
#include "tensorflow_utils.h"
|
||||
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
void CombinedComputeDiscretizers(
|
||||
OpKernelContext*,
|
||||
int64_t,
|
||||
const twml::Map<int64_t, int64_t>&,
|
||||
int64_t);
|
||||
|
||||
REGISTER_OP("PercentileDiscretizerV2")
|
||||
.Attr("T: {float, double}")
|
||||
.Input("input_ids: int64")
|
||||
.Input("input_vals: T")
|
||||
.Input("bin_ids: int64")
|
||||
.Input("bin_vals: T")
|
||||
.Input("feature_offsets: int64")
|
||||
.Input("start_compute: int64")
|
||||
.Input("end_compute: int64")
|
||||
.Attr("output_bits: int")
|
||||
.Attr("feature_ids: tensor = { dtype: DT_INT64 }")
|
||||
.Attr("feature_indices: tensor = { dtype: DT_INT64 }")
|
||||
.Attr("cost_per_unit: int")
|
||||
.Output("new_keys: int64")
|
||||
.Output("new_vals: T")
|
||||
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
|
||||
// TODO: check sizes
|
||||
c->set_output(0, c->input(0));
|
||||
c->set_output(1, c->input(0));
|
||||
return Status::OK();
|
||||
}).Doc(R"doc(
|
||||
|
||||
This operation discretizes a tensor containing continuous features (if calibrated).
|
||||
- note - choice of float or double should be consistent among inputs/output
|
||||
|
||||
Input
|
||||
input_ids(int64): A tensor containing input feature ids (direct from data record).
|
||||
input_vals: A tensor containing input values at corresponding feature ids.
|
||||
- i.e. input_ids[i] <-> input_vals[i] for each i
|
||||
- float or double
|
||||
bin_ids(int64): A tensor containing the discretized feature id for each bin.
|
||||
bin_vals: A tensor containing the bin boundaries for values of a given feature.
|
||||
- float or double
|
||||
feature_offsets(int64): Specifies the starting location of bins for a given feature id.
|
||||
start_compute(int64 scalar tensor): which index to start the computation at
|
||||
end_compute(int64 scalar tensor): which index to end the computation right before
|
||||
-> for example, (start_compute,end_compute)=(0,10) would compute on 0 thru 9
|
||||
output_bits(int): The maximum number of bits to use for the output IDs.
|
||||
-> 2**out_bits must be greater than bin_ids.size
|
||||
feature_ids(int64): 1D TensorProto of feature IDs seen during calibration
|
||||
feature_indices(int64): 1D TensorProto of feature indices corresponding with feature_IDs
|
||||
-> hint: look up make_tensor_proto:
|
||||
proto_init = np.array(values, dtype=np.int64)
|
||||
tensor_attr = tf.make_tensor_proto(my_proto_init)
|
||||
cost_per_unit(int): An estimate of the number of CPU cycles (or nanoseconds
|
||||
if not CPU-bound) to complete a unit of work. Overestimating creates too
|
||||
many shards and CPU time will be dominated by per-shard overhead, such as
|
||||
Context creation. Underestimating may not fully make use of the specified
|
||||
parallelism.
|
||||
|
||||
Outputs
|
||||
new_keys(int64): The discretized feature ids with same shape and size as keys.
|
||||
new_vals(float or double): The discretized values with the same shape and size as vals.
|
||||
|
||||
Operation
|
||||
Note that the discretization operation maps observation vectors to higher dimensional
|
||||
observation vectors. Here, we describe this mapping.
|
||||
|
||||
Let a calibrated feature observation be given by (F,x), where F is the ID of the
|
||||
feature, and x is some real value (i.e., continuous feature). This kind of
|
||||
representation is useful for the representation of sparse vectors, where there
|
||||
are many zeros.
|
||||
|
||||
For example, for a dense feature vector [1.2, 2.4, 3.6], we might have
|
||||
(0, 1.2) (1, 2.4) and (2, 3.6), with feature IDs indicating the 0th, 1st, and 2nd
|
||||
elements of the vector
|
||||
|
||||
The disretizer performs the following operation:
|
||||
(F,x) -> (map(x|F),1).
|
||||
Hence, we have that map(x|F) is a new feature ID, and the value observed for that
|
||||
feature is 1. We might read map(x|F) as 'the map of x for feature F'.
|
||||
|
||||
For each feature F, we associate a (discrete, finite) set of new feature IDs, newIDs(F).
|
||||
We will then have that F~(x) is in the set newIDs(F) for any value of x. Each set member
|
||||
of newIDs(F) is associated with a 'bin', as defined by the bin boundaries given in
|
||||
the bin_vals input array. For any two different feature IDs F and G, we have that
|
||||
INTERSECT(newIDs(F),newIDs(G)) is the empty set
|
||||
|
||||
Example - consider input vector with a single element, i.e. [x].
|
||||
Let's Discretize to one of 2 values, as follows:
|
||||
Let F=0 for the ID of the single feature in the vector.
|
||||
Let the bin boundary of feature F=0 be BNDRY(F) = BNDRY(0) since F=0
|
||||
Let newIDs(F) = newIDs(0) = {0,1}
|
||||
Let map(x|F) = map(x|0) = 0 if x<=BNDRY else 1
|
||||
If we had another element y in the vector, i.e. [x, y], then we might additionally
|
||||
Let F=1 for element y.
|
||||
Let the bin boundary be BNDRY(F) = BNDRY(1) since F=1
|
||||
Let newIDs(F) = newIDs(1) = {2,3} (so as to have empty intersect with newIDs(0))
|
||||
Let map(x|F) = map(x|1) = 2 if x<=BNDRY else 3
|
||||
Consider vector observation [-0.1, 0.2]. We then represent this as [(0, -0.1), (1, 0.2)]
|
||||
Let BNDRY(0) = BNDRY(1) = 0. When we discretize the vector, we get:
|
||||
(0, -0.1) -> (map(-0.1|0), 1) = (0, 1)
|
||||
(1, 0.2) -> (map( 0.2|1), 1) = (3, 1)
|
||||
Our output vector is then represented sparsely as [(0, 1), (3, 1)], and the dense
|
||||
representation of this could be [1, 0, 0, 1]
|
||||
|
||||
)doc");
|
||||
|
||||
template<typename T>
|
||||
class PercentileDiscretizerV2 : public OpKernel {
|
||||
public:
|
||||
explicit PercentileDiscretizerV2(OpKernelConstruction* context) : OpKernel(context) {
|
||||
// get the number of output bits
|
||||
// for use with features that have not been calibrated
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("output_bits", &output_bits_));
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("cost_per_unit", &cost_per_unit_));
|
||||
OP_REQUIRES(context, cost_per_unit_ >= 0,
|
||||
errors::InvalidArgument("Must have cost_per_unit >= 0."));
|
||||
|
||||
// construct the ID_to_index hash map
|
||||
Tensor feature_IDs;
|
||||
Tensor feature_indices;
|
||||
|
||||
// extract the tensors
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("feature_ids", &feature_IDs));
|
||||
OP_REQUIRES_OK(context,
|
||||
context->GetAttr("feature_indices", &feature_indices));
|
||||
|
||||
// for access to the data
|
||||
// int64_t data type is set in to_layer function of the calibrator objects in Python
|
||||
auto feature_IDs_flat = feature_IDs.flat<int64>();
|
||||
auto feature_indices_flat = feature_indices.flat<int64>();
|
||||
|
||||
// verify proper dimension constraints
|
||||
OP_REQUIRES(context, feature_IDs.shape() == feature_indices.shape(),
|
||||
errors::InvalidArgument("feature_ids and feature_indices must be identical shape."));
|
||||
OP_REQUIRES(context, feature_IDs.shape().dims() == 1,
|
||||
errors::InvalidArgument("feature_ids and feature_indices must be 1D."));
|
||||
|
||||
// reserve space in the hash map and fill in the values
|
||||
int num_features = feature_IDs.shape().dim_size(0);
|
||||
|
||||
#ifdef USE_DENSE_HASH
|
||||
ID_to_index_.set_empty_key(0);
|
||||
ID_to_index_.resize(num_features);
|
||||
#else
|
||||
ID_to_index_.reserve(num_features);
|
||||
#endif // USE_DENSE_HASH
|
||||
for (int i = 0 ; i < num_features ; i++) {
|
||||
ID_to_index_[feature_IDs_flat(i)] = feature_indices_flat(i);
|
||||
}
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
CombinedComputeDiscretizers(
|
||||
context,
|
||||
output_bits_,
|
||||
ID_to_index_,
|
||||
cost_per_unit_);
|
||||
}
|
||||
|
||||
private:
|
||||
twml::Map<int64_t, int64_t> ID_to_index_;
|
||||
int output_bits_;
|
||||
int cost_per_unit_;
|
||||
};
|
||||
|
||||
#define REGISTER(Type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
Name("PercentileDiscretizerV2") \
|
||||
.Device(DEVICE_CPU) \
|
||||
.TypeConstraint<Type>("T"), \
|
||||
PercentileDiscretizerV2<Type>); \
|
||||
|
||||
REGISTER(float);
|
||||
REGISTER(double);
|
||||
|
||||
void CombinedComputeDiscretizers(
|
||||
OpKernelContext* context,
|
||||
int64_t output_bits,
|
||||
const twml::Map<int64_t, int64_t> &ID_to_index,
|
||||
int64_t cost_per_unit) {
|
||||
const Tensor& keys = context->input(0);
|
||||
const Tensor& vals = context->input(1);
|
||||
const Tensor& bin_ids = context->input(2);
|
||||
const Tensor& bin_vals = context->input(3);
|
||||
const Tensor& feature_offsets = context->input(4);
|
||||
|
||||
uint64 full_size = keys.dim_size(0);
|
||||
const int total_size = static_cast<int64>(full_size);
|
||||
TensorShape output_shape = {total_size};
|
||||
|
||||
Tensor* new_keys = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &new_keys));
|
||||
Tensor* new_vals = nullptr;
|
||||
OP_REQUIRES_OK(context, context->allocate_output(1, output_shape, &new_vals));
|
||||
|
||||
try {
|
||||
twml::Tensor out_keys_ = TFTensor_to_twml_tensor(*new_keys);
|
||||
twml::Tensor out_vals_ = TFTensor_to_twml_tensor(*new_vals);
|
||||
|
||||
const twml::Tensor in_keys_ = TFTensor_to_twml_tensor(keys);
|
||||
const twml::Tensor in_vals_ = TFTensor_to_twml_tensor(vals);
|
||||
const twml::Tensor bin_ids_ = TFTensor_to_twml_tensor(bin_ids);
|
||||
const twml::Tensor bin_vals_ = TFTensor_to_twml_tensor(bin_vals);
|
||||
const twml::Tensor feature_offsets_ = TFTensor_to_twml_tensor(feature_offsets);
|
||||
|
||||
// retrieve the thread pool from the op context
|
||||
auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
|
||||
|
||||
// Definition of the computation thread
|
||||
auto task = [&](int64 start, int64 limit) {
|
||||
twml::discretizerInfer(out_keys_, out_vals_,
|
||||
in_keys_, in_vals_,
|
||||
bin_ids_, bin_vals_,
|
||||
feature_offsets_, output_bits,
|
||||
ID_to_index,
|
||||
start, limit,
|
||||
start);
|
||||
};
|
||||
|
||||
// let Tensorflow split up the work as it sees fit
|
||||
Shard(worker_threads.num_threads,
|
||||
worker_threads.workers,
|
||||
full_size,
|
||||
static_cast<int64>(cost_per_unit),
|
||||
task);
|
||||
} catch (const std::exception &e) {
|
||||
context->CtxFailureWithWarning(errors::InvalidArgument(e.what()));
|
||||
}
|
||||
}
|
BIN
twml/libtwml/src/ops/percentile_discretizer_v2.docx
Normal file
BIN
twml/libtwml/src/ops/percentile_discretizer_v2.docx
Normal file
Binary file not shown.
BIN
twml/libtwml/src/ops/resource_utils.docx
Normal file
BIN
twml/libtwml/src/ops/resource_utils.docx
Normal file
Binary file not shown.
@ -1,126 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <twml.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// Add these to make gcc ignore the warnings from tensorflow.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||
|
||||
#include "tensorflow/core/framework/resource_mgr.h"
|
||||
#include "tensorflow/core/framework/resource_op_kernel.h"
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
template<typename T>
|
||||
void unrefHandle(T *handle) {
|
||||
handle->Unref();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using unique_handle = std::unique_ptr<T, std::function<void(T *)> >;
|
||||
|
||||
// as std::type_index is not abi compatible, we bypass the hash_code checks.
|
||||
// https://github.com/tensorflow/tensorflow/commit/15275d3a14c77e2244ae1155f93243256f08e3ed
|
||||
#ifdef __APPLE__
|
||||
template <typename T>
|
||||
Status CreateTwmlResource(OpKernelContext* ctx, const ResourceHandle& p, T* value) {
|
||||
return ctx->resource_manager()->Create(p.container(), p.name(), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status LookupTwmlResource(OpKernelContext* ctx, const ResourceHandle& p,
|
||||
T** value) {
|
||||
return ctx->resource_manager()->Lookup(p.container(), p.name(), value);
|
||||
}
|
||||
#endif // __APPLE__
|
||||
|
||||
template<typename T>
|
||||
unique_handle<T> getHandle(tensorflow::OpKernelContext* context, int input_idx) {
|
||||
using namespace tensorflow;
|
||||
T *ptr = nullptr;
|
||||
#ifdef __APPLE__
|
||||
auto s = LookupTwmlResource(context, HandleFromInput(context, input_idx), &ptr);
|
||||
#else
|
||||
auto s = LookupResource(context, HandleFromInput(context, input_idx), &ptr);
|
||||
#endif // __APPLE__
|
||||
|
||||
if (!s.ok()) {
|
||||
throw std::runtime_error("Failed to get resource handle");
|
||||
}
|
||||
return unique_handle<T>(ptr, unrefHandle<T>);
|
||||
}
|
||||
|
||||
template<typename InputType>
|
||||
const uint8_t *getInputBytes(const Tensor &input, int id) {
|
||||
return reinterpret_cast<const uint8_t *>(input.flat<InputType>().data());
|
||||
}
|
||||
|
||||
template<>
|
||||
inline const uint8_t *getInputBytes<string>(const Tensor &input, int id) {
|
||||
return reinterpret_cast<const uint8_t *>(input.flat<string>()(id).c_str());
|
||||
}
|
||||
|
||||
template<typename InputType>
|
||||
const int getBatchSize(const Tensor &input) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline const int getBatchSize<string>(const Tensor &input) {
|
||||
return static_cast<int>(input.NumElements());
|
||||
}
|
||||
|
||||
class DataRecordResource : public ResourceBase {
|
||||
public:
|
||||
Tensor input;
|
||||
int64 num_labels;
|
||||
int64 num_weights;
|
||||
twml::DataRecord common;
|
||||
std::vector<twml::DataRecord> records;
|
||||
twml::Map<int64_t, int64_t> *keep_map;
|
||||
string DebugString() const override { return "DataRecords resource"; }
|
||||
};
|
||||
|
||||
// A thin layer around batch of HashedDataRecords
|
||||
class HashedDataRecordResource : public ResourceBase {
|
||||
public:
|
||||
Tensor input;
|
||||
int64 total_size;
|
||||
int64 num_labels;
|
||||
int64 num_weights;
|
||||
twml::HashedDataRecord common;
|
||||
std::vector<twml::HashedDataRecord> records;
|
||||
string DebugString() const override { return "HashedDataRecord Resource"; }
|
||||
};
|
||||
|
||||
#define TF_CHECK_STATUS(fn) do { \
|
||||
Status s = fn; \
|
||||
if (!s.ok()) return s; \
|
||||
} while (0)
|
||||
|
||||
template<typename ResourceType>
|
||||
Status makeResourceHandle(OpKernelContext* context, int out_idx, ResourceType **resource_) {
|
||||
static std::atomic<int64> id;
|
||||
Tensor* handle_tensor;
|
||||
TF_CHECK_STATUS(context->allocate_output(out_idx, TensorShape({}), &handle_tensor));
|
||||
|
||||
ResourceType *resource = new ResourceType();
|
||||
const auto resource_name = typeid(ResourceType).name() + std::to_string(id++);
|
||||
ResourceHandle handle = MakePerStepResourceHandle<ResourceType>(context, resource_name);
|
||||
#ifdef __APPLE__
|
||||
TF_CHECK_STATUS(CreateTwmlResource(context, handle, resource));
|
||||
#else
|
||||
TF_CHECK_STATUS(CreateResource(context, handle, resource));
|
||||
#endif // __APPLE__
|
||||
handle_tensor->scalar<ResourceHandle>()() = handle;
|
||||
|
||||
*resource_ = resource;
|
||||
return Status::OK();
|
||||
}
|
BIN
twml/libtwml/src/ops/scripts/get_inc.docx
Normal file
BIN
twml/libtwml/src/ops/scripts/get_inc.docx
Normal file
Binary file not shown.
@ -1,5 +0,0 @@
|
||||
"""Gets the path of headers for the current Tensorflow library"""
|
||||
|
||||
import tensorflow.compat.v1 as tf
|
||||
|
||||
print(tf.sysconfig.get_include(), end='')
|
@ -1,2 +0,0 @@
|
||||
#!/bin/sh
|
||||
PEX_INTERPRETER=1 "$PYTHON_ENV" "$LIBTWML_HOME"/src/ops/scripts/get_inc.py
|
BIN
twml/libtwml/src/ops/scripts/get_lib.docx
Normal file
BIN
twml/libtwml/src/ops/scripts/get_lib.docx
Normal file
Binary file not shown.
@ -1,5 +0,0 @@
|
||||
"""Gets the path of headers for the current Tensorflow library"""
|
||||
|
||||
import tensorflow.compat.v1 as tf
|
||||
|
||||
print(tf.sysconfig.get_lib(), end='')
|
@ -1,2 +0,0 @@
|
||||
#!/bin/sh
|
||||
PEX_INTERPRETER=1 "$PYTHON_ENV" "$LIBTWML_HOME"/src/ops/scripts/get_lib.py
|
BIN
twml/libtwml/src/ops/scripts/symlink.docx
Normal file
BIN
twml/libtwml/src/ops/scripts/symlink.docx
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user