import numpy as np from pyspark.mllib.clustering import GussianMixture, \ GussianMixtureModel data = sc.textFile("data/mllib/gmm_data.txt") parsed_data = data.map(lambda line: np.array[float(i) for i in line.strip()]))
gmm = GaussianMixture.train(parsed_data, 2) for w, g in zip(gmm.weights, gmm.gaussians): print("weight = ", w, "mu = ", g.mu, "sigma = ", g.sigma.toArray()) gmm.save(sc, "model_path") same_model = GussainMixtureModel.load(sc, "model_path")
Latent Dirichlet Allocation(LDA)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
from pyspark.mllib.clustering import LDA, LDAModel from pyspark.mllib.linalg import Vectors
data = sc.textFile("data/mllib/sample_lda_data.txt") parsed_data = data.map(lambda line: Vector.dense([float(i) for i in line.strip()])) corpus = parsed_data.zipWithIndex() \ .map(lambda x: [x[1], x[0]).cache() ldaModel = LDA.train(corpus, k=3) topics = ldaModel.topicsMatrix()
for word in range(0, ldaModel.vocabSize()): for topic in word: print(topic) ldaModel.save(sc, "model_path") same_model = LDAModel.load("model_path")
np_var = np.array([1, 2]) K.is_keras_tensor(np_var) # False keras_var = K.variable(np_var) K.is_keras_tensor(keras_var) # A variable is not a Tensor. # False keras_placeholder = K.placeholder(shape=(2, 4, 5)) K.is_keras_tensor(keras_placeholder) # A placeholder is a Tensor. # True
from keras import backend as K a = K.placeholder((2, 2), sparse=False) print(K.is_sparse(a)) # False b = K.placeholder((2, 2), sparse=True) print(K.is_sparse(b)) # True
from keras import backend as K tf_session = K.get_session() val = np.array([[1, 2], [3, 4]]) kvar = K.variable(value=val) input = keras.backend.placeholder(shape=(2, 4, 5)) K.shape(kvar) # <tf.Tensor 'Shape_8:0' shape=(2,) dtype=int32> K.shape(input) # <tf.Tensor 'Shape_9:0' shape=(3,) dtype=int32> # To get integer shape (Instead, you can use K.int_shape(x)) K.shape(kvar).eval(session=tf_session) # array([2, 2], dtype=int32) K.shape(input).eval(session=tf_session) # array([2, 4, 5], dtype=int32)
int_shape
1
defint_shape(x)
返回张量shape
返回值:tuple(int)/None
1 2 3 4 5 6 7 8
from keras import backend as K input = K.placeholder(shape=(2, 4, 5)) K.int_shape(input) # (2, 4, 5) val = np.array([[1, 2], [3, 4]]) kvar = K.variable(value=val) K.int_shape(kvar) # (2, 2)
ndim
1
defndim(x)
返回张量的阶数
返回值:int
1 2 3 4 5 6 7 8
from keras import backend as K input = K.placeholder(shape=(2, 4, 5)) val = np.array([[1, 2], [3, 4]]) kvar = K.variable(value=val) K.ndim(input) # 3 K.ndim(kvar) # 2
dtype
1
defdtype(x)
返回张量的数据类型
返回值:str
float32
float32_ref
1 2 3 4 5 6 7 8 9 10 11 12 13 14
from keras import backend as K K.dtype(K.placeholder(shape=(2,4,5))) # 'float32' K.dtype(K.placeholder(shape=(2,4,5), dtype='float32')) # 'float32' K.dtype(K.placeholder(shape=(2,4,5), dtype='float64')) # 'float64'__Keras variable__
kvar = K.variable(np.array([[1, 2], [3, 4]])) K.dtype(kvar) # 'float32_ref' kvar = K.variable(np.array([[1, 2], [3, 4]]), dtype='float32') K.dtype(kvar) # 'float32_ref'
eval
1
defeval(x)
求得张量的值
返回值:NDA
1 2 3 4 5
from keras import backend as K kvar = K.variable(np.array([[1, 2], [3, 4]]), dtype='float32') K.eval(kvar) # array([[ 1., 2.], # [ 3., 4.]], dtype=float32)
`zeros
1 2 3 4 5
defzeros( shape, dtype='float32', name=None )
生成shape大小的全0张量
1 2 3 4 5 6
from keras import backend as K kvar = K.zeros((3,4)) K.eval(kvar) # array([[ 0., 0., 0., 0.], # [ 0., 0., 0., 0.], # [ 0., 0., 0., 0.]], dtype=float32)
model = Sequential() model.add(LocallyConnected2D(64, (3, 3), input_shape=(32, 32, 3))) # apply a 3x3 unshared weights convolution with 64 output filters on a 32x32 image # with `data_format="channels_last"`: # now model.output_shape == (None, 30, 30, 64) # notice that this layer will consume (30*30)*(3*3*3*64) + (30*30)*64 parameters
model.add(LocallyConnected2D(32, (3, 3))) # now model.output_shape == (None, 28, 28, 32) # add a 3x3 unshared weights convolution on top, with 32 output filters: