grace_t.scripts.keras_multi_demo 源代码
#!/usr/bin/env python
# -*- coding: utf8 -*-
# author: flyzzaway
##export PATH=/opt/compiler/gcc-4.8.2/bin/:$PATH
##import theano
##theano.config.openmp = True
##OMP_NUM_THREADS=20 python xxx.py
##就可以跑多核cpu了。。另外 装个jumbo install htop,可以看到每个核的占用情况。。
import numpy as np
import pandas as pd
import gc
from keras.models import Sequential
from keras.layers import Dense,Dropout,Embedding,Concatenate,Activation,Flatten
from keras.layers.advanced_activations import PReLU
from keras.layers.normalization import BatchNormalization
from keras import regularizers
from ml_metrics import mapk
from keras.preprocessing.sequence import pad_sequences
from keras.utils import plot_model
[文档]def get_max():
'''
get_max
'''
max_dict = {}
max_dict['max_displayid'] = 172668
max_dict['max_adid'] = 353356
max_dict['max_platform'] = 4
max_dict['max_hour'] = 6
max_dict['max_weekday'] = 1
max_dict['max_uid'] = 166049
max_dict['max_documentid'] = 1802181
max_dict['max_campaignid'] = 29471
max_dict['max_advertiserid'] = 4036
max_dict['max_sourceidx'] = 14403
max_dict['max_categoryid'] = 95
max_dict['max_entityid'] = 1326010
max_dict['max_topicid'] = 301
max_dict['max_doctrfids'] = 2998870
max_dict['max_sourceidy'] = 14404
max_dict['max_docids'] = 2997096
return max_dict
[文档]def sub_input_model(max_dict,embedding_size,max_name,input_shape_dim1,name):
'''
sub_input_model
'''
print "sub_input_model"
sub_model = Sequential()
sub_model.add(Embedding(input_dim = max_dict[max_name], output_dim=embedding_size,input_shape=(input_shape_dim1,),name = name)) #input_shape=(1,)
sub_model.add(Flatten(name = 'Flatten' + name))
sub_model.add(Dense(name = 'Dense' + name))
return sub_model
[文档]def multi_input_model(model,max_dict):
'''
multi_input_model
'''
print "multi_input_model"
model_displayid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_displayid',input_shape_dim1 = 1,name='displayid')
model_adid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_adid',input_shape_dim1 = 1,name='adid')
model_platform = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_platform',input_shape_dim1 = 1,name = 'platform')
model_hour = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_hour',input_shape_dim1 = 1,name = 'hour')
model_weekday = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_weekday',input_shape_dim1 = 1 ,name = 'weekday')
model_uid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_uid',input_shape_dim1 = 1, name = 'uid')
model_documentid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_documentid',input_shape_dim1 = 1,name = 'documentid')
model_campaignid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_campaignid',input_shape_dim1 = 1,name = 'campaignid')
model_advertiserid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_advertiserid',input_shape_dim1 = 1, name = 'advertiserid')
model_sourceidx = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_sourceidx',input_shape_dim1 = 1,name = 'sourceid')
model_categoryid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_categoryid',input_shape_dim1 = 2,name = 'categoryid')
model_entityid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_entityid',input_shape_dim1 = 10,name = 'entityid')
model_topicid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_topicid',input_shape_dim1 = 39,name = 'topicid')
model_uidview_doc = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_doctrfids',input_shape_dim1 = 306,name = 'uidview_doc')
model_uidview_source = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_sourceidy',input_shape_dim1 = 160,name = 'uidview_source')
model_uidview_onehour_doc = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_docids',input_shape_dim1 = 123,name = 'uidview_onehour_doc')
model.add(Concatenate([model_displayid, model_adid, model_platform,model_hour,model_weekday,model_uid,model_documentid,model_campaignid,\
model_advertiserid,model_sourceidx,model_categoryid,model_entityid,model_topicid,model_uidview_doc,\
model_uidview_source,model_uidview_onehour_doc], mode='concat', concat_axis=1))
print('the model\'s input shape ', model.input_shape)
print ('the mode\'s output shape ', model.output_shape)
model.add(Dense(30,activation = 'relu',name = 'Dense_1'))
print model.output_shape
model.add(Dense(1, activation='sigmoid',name = 'Dense_2'))
print('the final model\'s shape', model.output_shape)
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy', 'mae'])
plot_model(model,to_file='model.png', show_shapes=True)
if __name__ == "__main__":
# rand 300 training examples
# x_train_displayid = np.random.randint(30000,size=(300,1))
# x_train_adid = np.random.randint(220000,size=(300,1))
# x_train_entityids = np.random.randint(4000000,size=(300,10))
# y_train = np.random.randint(1,size=(300,1+1+10,1))
# print y_train.shape
max_dict = get_max()
model = Sequential()
multi_input_model(model,max_dict)
# model.fit([x_train_displayid, x_train_adid, x_train_entityids], y_train, batch_size=16, epochs=10)
# score = model.evaluate([x_test_1, x_test_2], y_test, batch_size=16)
# print score