grace_t.scripts.keras_multi_demo 源代码

#!/usr/bin/env python
# -*- coding: utf8 -*-
# author: flyzzaway

##export PATH=/opt/compiler/gcc-4.8.2/bin/:$PATH
##import theano
##theano.config.openmp = True
##OMP_NUM_THREADS=20 python xxx.py
##就可以跑多核cpu了。。另外 装个jumbo install htop，可以看到每个核的占用情况。。

import numpy as np
import pandas as pd
import gc
from keras.models import Sequential
from keras.layers import Dense,Dropout,Embedding,Concatenate,Activation,Flatten
from keras.layers.advanced_activations import PReLU
from keras.layers.normalization import BatchNormalization
from keras import regularizers
from ml_metrics import mapk
from keras.preprocessing.sequence import pad_sequences
from keras.utils import plot_model
[文档]def get_max():
    '''
    get_max
    '''
    
    max_dict = {}
    max_dict['max_displayid'] = 172668
    max_dict['max_adid'] = 353356
    max_dict['max_platform'] = 4
    max_dict['max_hour'] = 6
    max_dict['max_weekday'] = 1
    max_dict['max_uid'] = 166049
    max_dict['max_documentid'] = 1802181
    max_dict['max_campaignid'] = 29471
    max_dict['max_advertiserid'] = 4036
    max_dict['max_sourceidx'] = 14403
    max_dict['max_categoryid'] = 95
    max_dict['max_entityid'] = 1326010
    max_dict['max_topicid'] = 301
    max_dict['max_doctrfids'] = 2998870 
    max_dict['max_sourceidy'] = 14404
    max_dict['max_docids'] = 2997096
    return max_dict


[文档]def sub_input_model(max_dict,embedding_size,max_name,input_shape_dim1,name):
    '''
    sub_input_model
    '''
    print "sub_input_model"
    sub_model = Sequential()
    sub_model.add(Embedding(input_dim = max_dict[max_name], output_dim=embedding_size,input_shape=(input_shape_dim1,),name = name)) #input_shape=(1,)
    sub_model.add(Flatten(name = 'Flatten' + name))
    sub_model.add(Dense(name = 'Dense' + name))
    return sub_model

[文档]def multi_input_model(model,max_dict):
    '''
    multi_input_model
    '''
    print "multi_input_model"
 
    model_displayid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_displayid',input_shape_dim1 = 1,name='displayid')
    
    model_adid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_adid',input_shape_dim1 = 1,name='adid')
    
    model_platform = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_platform',input_shape_dim1 = 1,name = 'platform')
    
    model_hour = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_hour',input_shape_dim1 = 1,name = 'hour')
    
    model_weekday = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_weekday',input_shape_dim1 = 1 ,name = 'weekday')
    
    model_uid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_uid',input_shape_dim1 = 1, name = 'uid')
    
    model_documentid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_documentid',input_shape_dim1 = 1,name = 'documentid')
    
    model_campaignid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_campaignid',input_shape_dim1 = 1,name = 'campaignid')
    
    model_advertiserid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_advertiserid',input_shape_dim1 = 1, name = 'advertiserid')
 
    model_sourceidx = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_sourceidx',input_shape_dim1 = 1,name = 'sourceid')
   
    model_categoryid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_categoryid',input_shape_dim1 = 2,name = 'categoryid')
    
    model_entityid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_entityid',input_shape_dim1 = 10,name = 'entityid')
    
    model_topicid = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_topicid',input_shape_dim1 = 39,name = 'topicid') 
    
    model_uidview_doc = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_doctrfids',input_shape_dim1 = 306,name = 'uidview_doc')
    
    model_uidview_source = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_sourceidy',input_shape_dim1 = 160,name = 'uidview_source')
    
    model_uidview_onehour_doc = sub_input_model(max_dict,embedding_size = 50,max_name = 'max_docids',input_shape_dim1 = 123,name = 'uidview_onehour_doc')
    
    model.add(Concatenate([model_displayid, model_adid, model_platform,model_hour,model_weekday,model_uid,model_documentid,model_campaignid,\
                     model_advertiserid,model_sourceidx,model_categoryid,model_entityid,model_topicid,model_uidview_doc,\
                      model_uidview_source,model_uidview_onehour_doc], mode='concat', concat_axis=1))
    
    print('the model\'s input shape ', model.input_shape)
    print ('the mode\'s output shape ', model.output_shape)
    
    model.add(Dense(30,activation = 'relu',name = 'Dense_1'))
    print model.output_shape
    
    model.add(Dense(1, activation='sigmoid',name = 'Dense_2'))
    print('the final model\'s shape', model.output_shape)
    
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy', 'mae'])

    plot_model(model,to_file='model.png', show_shapes=True)


if __name__ == "__main__":
# rand 300 training examples
#    x_train_displayid = np.random.randint(30000,size=(300,1))
#    x_train_adid = np.random.randint(220000,size=(300,1))
#    x_train_entityids = np.random.randint(4000000,size=(300,10))
#    y_train = np.random.randint(1,size=(300,1+1+10,1))
#    print y_train.shape
    
    max_dict = get_max() 
    model = Sequential()
    multi_input_model(model,max_dict)


   
#    model.fit([x_train_displayid, x_train_adid, x_train_entityids], y_train, batch_size=16, epochs=10)
#    score = model.evaluate([x_test_1, x_test_2], y_test, batch_size=16)
#    print score