Estos parecen ser un poco mejores:
import numpy
a = numpy.random.ranf(size=(5,6))
normalized_a = a/numpy.nansum(a)
def original(a, normalized_a):
row_values = []
col_values = []
for row, col in numpy.ndindex(normalized_a.shape):
weight = int(normalized_a[row, col] * 100)
row_values.extend([row] * weight)
col_values.extend([col] * weight)
return sum(row_values)/float(len(row_values)), sum(col_values)/float(len(col_values))
def new(a, normalized_a):
weights = numpy.floor(normalized_a * 100)
nx, ny = a.shape
rows, columns = numpy.mgrid[:nx, :ny]
row_values = numpy.sum(rows * weights)/numpy.sum(weights)
col_values = numpy.sum(columns * weights)/numpy.sum(weights)
return row_values, col_values
def new2(a, normalized_a):
weights = numpy.floor(normalized_a * 100)
nx, ny = a.shape
rows, columns = numpy.ogrid[:nx, :ny]
row_values = numpy.sum(rows * weights)/numpy.sum(weights)
col_values = numpy.sum(columns * weights)/numpy.sum(weights)
return row_values, col_values
print original(a, normalized_a)
print new(a, normalized_a)
print new2(a, normalized_a)
print "timing!!!"
import timeit
print timeit.timeit('original(a, normalized_a)', 'from __main__ import original, a, normalized_a', number=10000)
print timeit.timeit('new(a, normalized_a)', 'from __main__ import new, a, normalized_a', number=10000)
print timeit.timeit('new2(a, normalized_a)', 'from __main__ import new2, a, normalized_a', number=10000)
Los resultados en mi computadora:
(1.8928571428571428, 2.630952380952381)
(1.8928571428571428, 2.6309523809523809)
(1.8928571428571428, 2.6309523809523809)
timing!!!
1.05751299858
0.64871096611
0.497050046921
Usé algunos de los trucos de índice de numpy para vectorizar el
cálculo. De hecho, estoy un poco sorprendido de que no hayamos
mejorado. np.ogrid
es solo aproximadamente el doble de
rápido que el original en su matriz de prueba.
np.mgrid
se encuentra en algún punto intermedio.