or a numpy array, or a `scipy.sparse` matrix.
if scipy.sparse.issparse(doc):
vec = doc.toarray().flatten()
elif isinstance(doc, numpy.ndarray):
vec = doc
else:
vec = matutils.sparse2full(doc, self.numFeatures)
vec = numpy.asfortranarray(vec, dtype=self.corpus.dtype).reshape(self.numFeatures, 1)
// compute cosine similarity against every other document in the collection
gemv = matutils.blas("gemv", self.corpus)
allSims = gemv(1.0, self.corpus, vec) // N x T * T x 1 = N x 1
allSims = list(allSims.flat) // convert to plain python list
assert len(allSims) == self.corpus.shape[0] // make sure no document got lost!
return allSims
//endclass MatrixSimilarity
After Change
// do a little transposition dance to stop numpy from making a copy of
// self.corpus internally in dot (very slow).
return numpy.dot(self.corpus, query.T).T // XXX: removed casting the result to list; does anyone care?
//endclass MatrixSimilarity