Tensorflow-keras实战(九):Estimator实战

你的名字 2024-04-19 15:08 181阅读 0赞

目录:

1.泰坦尼克问题和feature_column结合使用
2.预定义estimator使用
3.交叉验证特征实战

1.泰坦尼克问题和feature_column结合使用

  1. import matplotlib as mpl
  2. import matplotlib.pyplot as plt
  3. %matplotlib inline
  4. import numpy as np
  5. import pandas as pd
  6. import os
  7. import sklearn
  8. import sys
  9. import time
  10. import tensorflow as tf
  11. from tensorflow import keras
  12. print(tf.__version__)
  13. print(sys.version_info)
  14. for module in mpl,np,pd,sklearn,tf,keras:
  15. print(module.__name__, module.__version__)
  16. # 泰坦尼克问题
  17. # https://storage.googleapis.com/tf-datasets/titanic/train.csv
  18. # https://storage.googleapis.com/tf-datasets/titanic/eval.csv
  19. train_file = "./data/titanic/train.csv"
  20. eval_file = "./data/titanic/eval.csv"
  21. train_df = pd.read_csv(train_file)
  22. eval_df = pd.read_csv(eval_file)
  23. print(train_df.head())
  24. print(eval_df.head())
  25. y_train = train_df.pop('survived')
  26. y_eval = eval_df.pop('survived')
  27. print(train_df.head())
  28. print(eval_df.head())
  29. print(y_train.head())
  30. print(y_eval.head())
  31. train_df.describe()
  32. print(train_df.shape,eval_df.shape)
  33. train_df.age.hist(bins = 20)
  34. train_df.sex.value_counts().plot(kind = 'barh')
  35. train_df.sex.value_counts().plot(kind = 'barv')
  36. train_df['class'].value_counts().plot(kind = 'barh')
  37. pd.concat([train_df,y_train],axis = 1).groupby('sex').survived.mean().plot(kind="barh")
  38. categorical_columns = ['sex','n_siblings_spouses','parch','class','deck','embark_town','alone']
  39. numeric_columns = ['age','fare']
  40. feature_columns=[]
  41. for categorical_column in categorical_columns:
  42. vocab = train_df[categorical_column].unique()
  43. print(categorical_column,vocab)
  44. feature_columns.append(
  45. tf.feature_column.indicator_column(
  46. tf.feature_column.categorical_column_with_vocabulary_list(
  47. categorical_column,vocab)))
  48. for categorical_column in numeric_columns:
  49. feature_columns.append(
  50. tf.feature_column.numeric_column(
  51. categorical_column,dtype=tf.float32))
  52. def make_dataset(data_df,label_df,epochs=10,shuffle=True,batch_size=32):
  53. dataset = tf.data.Dataset.from_tensor_slices((dict(data_df),label_df))
  54. if shuffle:
  55. dataset=dataset.shuffle(10000)
  56. dataset = dataset.repeat(epochs).batch(batch_size)
  57. return dataset
  58. train_dataset = make_dataset(train_df,y_train,batch_size=5)
  59. for x,y in train_dataset.take(1):
  60. print(x,y)
  61. #keras.layers.DenseFeature 把feature_columns和dataset结合
  62. for x,y in train_dataset.take(1):
  63. age_column = feature_columns[7]
  64. gender_column = feature_columns[0]
  65. print(keras.layers.DenseFeatures(age_column)(x).numpy())
  66. print(keras.layers.DenseFeatures(gender_column)(x).numpy())
  67. for x,y in train_dataset.take(1):
  68. print(keras.layers.DenseFeatures(feature_columns)(x).numpy())
  69. model = keras.models.Sequential([
  70. keras.layers.DenseFeatures(feature_columns),
  71. keras.layers.Dense(100,activation='relu'),
  72. keras.layers.Dense(100,activation='relu'),
  73. keras.layers.Dense(2,activation='softmax'),
  74. ])
  75. model.compile(loss='sparse_categorical_crossentropy',
  76. optimizer = keras.optimizers.SGD(lr=0.01),
  77. metrics=['accuracy'])
  78. # 1.model.fit
  79. # 2.model->estimator->train
  80. train_dataset = make_dataset(train_df,y_train,epochs=100)
  81. eval_dataset = make_dataset(eval_df,y_eval,epochs=1,shuffle=False)
  82. history = model.fit(train_dataset,
  83. validation_data=eval_dataset,
  84. steps_per_epoch=20,
  85. validation_steps = 8,
  86. epochs=100)
  87. estimator = keras.estimator.model_to_estimator(model)
  88. # input_fn 1.function
  89. #2.return a.(features,labels) b.dataset->(feature,label)
  90. estimator.train(input_fn = lambda : make_dataset(
  91. train_df,y_train,epochs=100))
  92. #2.0bug 名字没有被保存下来

2.预定义estimator使用

  1. import matplotlib as mpl
  2. import matplotlib.pyplot as plt
  3. %matplotlib inline
  4. import numpy as np
  5. import pandas as pd
  6. import os
  7. import sklearn
  8. import sys
  9. import time
  10. import tensorflow as tf
  11. from tensorflow import keras
  12. print(tf.__version__)
  13. print(sys.version_info)
  14. for module in mpl,np,pd,sklearn,tf,keras:
  15. print(module.__name__, module.__version__)
  16. # https://storage.googleapis.com/tf-datasets/titanic/train.csv
  17. # https://storage.googleapis.com/tf-datasets/titanic/eval.csv
  18. train_file = "./data/titanic/train.csv"
  19. eval_file = "./data/titanic/eval.csv"
  20. train_df = pd.read_csv(train_file)
  21. eval_df = pd.read_csv(eval_file)
  22. print(train_df.head())
  23. print(eval_df.head())
  24. y_train = train_df.pop('survived')
  25. y_eval = eval_df.pop('survived')
  26. print(train_df.head())
  27. print(eval_df.head())
  28. print(y_train.head())
  29. print(y_eval.head())
  30. train_df.describe()
  31. categorical_columns = ['sex','n_siblings_spouses','parch','class','deck','embark_town','alone']
  32. numeric_columns = ['age','fare']
  33. feature_columns=[]
  34. for categorical_column in categorical_columns:
  35. vocab = train_df[categorical_column].unique()
  36. print(categorical_column,vocab)
  37. feature_columns.append(
  38. tf.feature_column.indicator_column(
  39. tf.feature_column.categorical_column_with_vocabulary_list(
  40. categorical_column,vocab)))
  41. for categorical_column in numeric_columns:
  42. feature_columns.append(
  43. tf.feature_column.numeric_column(
  44. categorical_column,dtype=tf.float32))
  45. def make_dataset(data_df,label_df,epochs=10,shuffle=True,batch_size=32):
  46. dataset = tf.data.Dataset.from_tensor_slices((dict(data_df),label_df))
  47. if shuffle:
  48. dataset=dataset.shuffle(10000)
  49. dataset = dataset.repeat(epochs).batch(batch_size)
  50. return dataset
  51. output_dir = 'baseline_model'
  52. if not os.path.exists(output_dir):
  53. os.mkdir(output_dir)
  54. baseline_estimator = tf.estimator.BaselineClassifier(
  55. model_dir = output_dir,n_classes=2)
  56. baseline_estimator.train(input_fn=lambda : make_dataset(train_df,y_train,epochs=100))
  57. baseline_estimator.evaluate(input_fn=lambda : make_dataset(
  58. eval_df,y_eval,epochs=1,shuffle=False,batch_size=20))
  59. linear_output_dir = 'linear_model'
  60. if not os.path.exists(output_dir):
  61. os.mkdir(linear_output_dir)
  62. linear_estimator = tf.estimator.LinearClassifier(
  63. model_dir = linear_output_dir,n_classes=2,feature_columns = feature_columns)
  64. linear_estimator.train(input_fn = lambda : make_dataset(train_df,y_train,epochs=100))
  65. linear_estimator.evaluate(input_fn=lambda : make_dataset(
  66. eval_df,y_eval,epochs=1,shuffle=False))
  67. dnn_output_dir = './dnn_model'
  68. if not os.path.exists(dnn_output_dir):
  69. os.mkdir(dnn_output_dir)
  70. dnn_estimator = tf.estimator.DNNClassifier(
  71. model_dir = dnn_output_dir,n_classes=2,
  72. feature_columns = feature_columns,hidden_units = [128,128],
  73. activation_fn = tf.nn.relu,optimizer = 'Adam')
  74. dnn_estimator.train(input_fn = lambda : make_dataset(train_df,y_train,epochs=100))
  75. dnn_estimator.evaluate(input_fn=lambda : make_dataset(
  76. eval_df,y_eval,epochs=1,shuffle=False))

3.交叉验证特征实战# 泰坦尼克问题

  1. categorical_columns = ['sex','n_siblings_spouses','parch','class','deck','embark_town','alone']
  2. numeric_columns = ['age','fare']
  3. feature_columns=[]
  4. for categorical_column in categorical_columns:
  5. vocab = train_df[categorical_column].unique()
  6. print(categorical_column,vocab)
  7. feature_columns.append(
  8. tf.feature_column.indicator_column(
  9. tf.feature_column.categorical_column_with_vocabulary_list(
  10. categorical_column,vocab)))
  11. for numeric_column in numeric_columns:
  12. feature_columns.append(
  13. tf.feature_column.numeric_column(
  14. numeric_column,dtype=tf.float32))
  15. # cross feture:age:[1,2,3,4,5],gender:[male,female]
  16. # age_x_gender:[(1,male),(2,male),...,(5,male),...,(5,female)]
  17. #100000:100->hash(100000values)%100
  18. feature_columns.append(
  19. tf.feature_column.indicator_column(
  20. tf.feature_column.crossed_column(
  21. ['age','sex'],hash_bucket_size=100)))

发表评论

表情:
评论列表 (有 0 条评论,181人围观)

还没有评论,来说两句吧...

相关阅读

    相关 Redis实战)-有序集合

            有序集合相对于哈希、列表、集合来说会有一点点陌生,但既然叫有序集合,那么它和集合必然有着联系,它保留了集合不能有重复成员的特性,但不同的是,有序集合中的元素可以