Simple Introduction to Tensorboard Embedding Visualisation (in r)

Published by onesixx on

 

– web:  http://www.pinchofintelligence.com/simple-introduction-to-tensorboard-embedding-visualisation/
– gibhub:  https://github.com/rmeertens/Simplest-Tensorflow-Tensorboard-MNIST-Embedding-Visualisation/blob/master/Minimal%20example%20embeddings.ipynb

 

sprite & metadata 준비

#Visualisation helper functions

# the sprites
# If you don’t load sprites, each digit is represented as a simple point (does NOT give you a lot of information)
# To add labels, you have to create a ‘sprite map’ basically all images in what you want to visualise…

########################################################################
# There are 3 functions which are quite important for the visualisation.

### create_sprite_image:
# neatly aligns image sprits on a square canvas, as specified in the images section here:
# (https://www.tensorflow.org/get_started/embedding_viz)
# """Returns a sprite image consisting of images passed as argument. Images should be count x width x height"""

def create_sprite_image(images):
    
    if isinstance(images, list):
        images = np.array(images)
    img_h = images.shape[1]
    img_w = images.shape[2]
    n_plots = int(np.ceil(np.sqrt(images.shape[0])))

    spriteimage = np.ones((img_h * n_plots ,img_w * n_plots ))

    for i in range(n_plots):
        for j in range(n_plots):
            this_filter = i * n_plots + j
            if this_filter < images.shape[0]:
                this_img = images[this_filter]
                spriteimage[i * img_h:(i + 1) * img_h,
                  j * img_w:(j + 1) * img_w] = this_img
    return spriteimage

    
create_sprite_image <- function(images){
    if isinstance(images, list){
        images <- array(images)
        #images <- array(batch_xs)
    }
    img_h <- images[shape[[1]]]
    img_w <- images.shape[2]
    n_plots = int(np.ceil(np.sqrt(images.shape[0])))
    
  initial <- tf$truncated_normal(shape, stddev=0.1)
  tf$Variable(initial)
}
    
    

### vector_to_matrix_mnist:
# MNIST characters are loaded as a vector, not as an image… this function turns them into images
# """Reshapes normal mnist digit (batch,28*28) to matrix (batch,28,28)"""
def vector_to_matrix_mnist(mnist_digits):
    
    return np.reshape(mnist_digits,(-1,28,28))

vector_to_matrix_mnist <- function(mnist_digits){
    return np.reshape(mnist_digits,(-1,28,28))
}

### invert_grayscale:
# matplotlib treats a 0 as black, and a 1 as white.
# The tensorboard embeddings visualisation looks way better with white backgrounds,
# so we invert them for the visualisation

def invert_grayscale(mnist_digits):
    """ Makes black white, and white black """
    return 1-mnist_digits

### Save the sprite image
# Pretty straightforward:
# convert our vectors to images, invert the grayscale, and create and save the sprite image.
to_visualise = batch_xs
to_visualise = vector_to_matrix_mnist(to_visualise)
to_visualise = invert_grayscale(to_visualise)

sprite_image = create_sprite_image(to_visualise)

plt.imsave(path_for_mnist_sprites,sprite_image,cmap='gray')
plt.imshow(sprite_image,cmap='gray')

### Save the metadata
# To add colors to your mnist digits,
# the embedding visualisation tool needs to know what label each image has.
# This is saved in a “TSV (tab seperated file)”.

# Each line of our file contains the following:
# "Index" , "Label"
# The Index is simply the index in our embedding matrix.
# The label is the label of the MNIST character.

# This code writes our data to the metadata file.
with open(path_for_mnist_metadata,'w') as f:
    f.write("Index\tLabel\n")
    for index,label in enumerate(batch_ys):
        f.write("%d\t%d\n" % (index,label))

 

### Visualising embeddings
# 알고리즘이 무엇을 배우게 되는지 이해하고, 진짜 내가 배우려고 하는게 맞는지를 확인해 볼수 있다.
# Embedding visualisation 는 Tensorboard에 기본 기능이다.
#
# 아래는 MNIST digit의 간단한 visualisation을 실행하기 위한 간단한 코드이다.

library(tensorflow)

# Module
projector <- tf$contrib$tensorboard$plugins$projector
input_data <- tf$examples$tutorials$mnist$input_data
#input_data <- tf$contrib$learn$datasets$mnist

R_HOME  <- "/Users/onesixx/Dropbox/Rhome/tensorflow"
LOG_DIR <- paste(R_HOME,'tmp', sep="/")
NAME_TO_VISUALISE_VARIABLE <- "mnistembedding"
TO_EMBED_COUNT <- 500L

PATH_FOR_MNIST_SPRITES  <- paste(LOG_DIR, 'mnistdigits.png', sep="/")
PATH_FOR_MNIST_METADATA <- paste(LOG_DIR, 'metadata.tsv', sep="/")


### What to visualise
# embedding visualizer는 training후 얻어지는 embedding을 visulaising하는 도구에 불과하지만,
# 기본적으로  MNIST digit의 visulaization을 적용하기 위해 사용법을 알아야한다.
# 여기에서는 각 digit이 28*28(=784)차원의 vector로 표현된다.

#mnist <- input_data$read_data_sets("MNIST-data", one_hot = TRUE)
# [SSL: CERTIFICATE_VERIFY_FAILED] 때문에 아래 폴더에 4개의 gz압축파일을 넣어둠
# http://yann.lecun.com/exdb/mnist/
mnist <- input_data$read_data_sets("MNIST-data/", FALSE)
batch <- mnist$train$next_batch(TO_EMBED_COUNT)
batch_xs = batch[[1]]
batch_ys = batch[[2]]

###  embeddings 생성
# 이 예제의 embeddings은 진짜 간단하다. embedding은 traindata의 직접적인 값이다.
# 그래프는 아마도 좀더 복잡하지만, 중요한것은 원하는 것이 제대로 나오냐는 것이다. (이름까지)

embedding_var  <- tf$Variable(batch_xs, name=NAME_TO_VISUALISE_VARIABLE)
summary_writer <- tf$summary$FileWriter(file.path(LOG_DIR))  # checkpoint 파일을 같은 LOG_DIR폴더에 저장한다.

### Create the embedding projector
# This is the important part of your embedding visualisation.
# Here you specify what variable you want to project, what the metadata path is (the names and classes),
# and where you save the sprites.

config <- projector$ProjectorConfig()     # Format: projector 모듈의 projector_config.proto
embedding <- config$embeddings$add()      # multiple embeddings를 추가 (여기서는 embedding 하나만 추가)
embedding$tensor_name <-embedding_var$name
# 해당 tensor를 metadata file(예,lables)에 Link한다.
embedding$metadata_path <- PATH_FOR_MNIST_METADATA #'metadata.tsv'

# Specify where you find the sprite (we will create this later)
embedding$sprite$image_path <- PATH_FOR_MNIST_SPRITES  #'mnistdigits.png'
embedding$sprite$single_image_dim$extend(c(28,28))

# Say that you want to visualise the embeddings
# The next line writes a projector_config.pbtxt in the LOG_DIR.
# TensorBoard will read this file during startup.
projector$visualize_embeddings(summary_writer, config)

################
# /tmp/tensorlog|⇒ vi "projector_config.pbtxt"
#
# embeddings {
# tensor_name: "word_embedding_1:0"
# metadata_path: "/tmp/tensorlog/metadata.tsv"
# }
#################

# I changed global_variables_initializer into initialize_all_variables.
### Saving the data
#
# You are not saving any checkpoint.
# To save your model, create a tf.train.Saver and save your model periodically
# by calling saver.save(session, LOG_DIR/model.ckpt, step).

# Tensorboard loads the saved variable from the saved graph.
# Initialise a session and variables, and
# save them in your logging directory.
# 'tmp/model.ckpt-1'

sess <- tf$InteractiveSession()
init <- tf$initialize_all_variables()   # global_variables_initializer()
sess$run(init)
saver <- tf$train$Saver()
saver$save(sess, paste(LOG_DIR, 'model.ckpt', sep="/"))

# with(tf$Session() %as% sess, {
#     init <- tf$initialize_all_variables   #global_variables_initializer()
#     sess$run(init)
#     saver <- tf$train$Saver()
#     saver$save(sess, paste(LOG_DIR, 'model.ckpt', sep="/"))
# })

###  tensorboard 실행
tensorboard(log_dir=LOG_DIR)

 

 

Categories: DL

onesixx

Blog Owner

Leave a Reply

Your email address will not be published.