2015-12-27 20:25:04 +08:00
{
" cells " : [
{
" cell_type " : " markdown " ,
" metadata " : {
" colab_type " : " text " ,
" id " : " D7tqLMoKF6uq "
} ,
" source " : [
" Deep Learning with TensorFlow \n " ,
" ============= \n " ,
" \n " ,
" Credits: Forked from [TensorFlow](https://github.com/tensorflow/tensorflow) by Google \n " ,
" \n " ,
" Setup \n " ,
" ------------ \n " ,
" \n " ,
" Refer to the [setup instructions](https://github.com/donnemartin/data-science-ipython-notebooks/tree/feature/deep-learning/deep-learning/tensor-flow-exercises/README.md). \n " ,
" \n " ,
" Exercise 5 \n " ,
" ------------ \n " ,
" \n " ,
" The goal of this exercise is to train a skip-gram model over [Text8](http://mattmahoney.net/dc/textdata) data. "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : null ,
" metadata " : {
" cellView " : " both " ,
" colab " : {
" autoexec " : {
" startup " : false ,
" wait_interval " : 0
}
} ,
" colab_type " : " code " ,
" collapsed " : true ,
" id " : " 0K1ZyLn04QZf "
} ,
" outputs " : [ ] ,
" source " : [
" # These are all the modules we ' ll be using later. Make sure you can import them \n " ,
" # before proceeding further. \n " ,
" import collections \n " ,
" import math \n " ,
" import numpy as np \n " ,
" import os \n " ,
" import random \n " ,
" import tensorflow as tf \n " ,
" import urllib \n " ,
" import zipfile \n " ,
" from matplotlib import pylab \n " ,
" from sklearn.manifold import TSNE "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : {
" colab_type " : " text " ,
" id " : " aCjPJE944bkV "
} ,
" source " : [
" Download the data from the source website if necessary. "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : null ,
" metadata " : {
" cellView " : " both " ,
" colab " : {
" autoexec " : {
" startup " : false ,
" wait_interval " : 0
} ,
" output_extras " : [
{
" item_id " : 1
}
]
} ,
" colab_type " : " code " ,
" collapsed " : false ,
" executionInfo " : {
" elapsed " : 14640 ,
" status " : " ok " ,
" timestamp " : 1445964482948 ,
" user " : {
" color " : " #1FA15D " ,
" displayName " : " Vincent Vanhoucke " ,
" isAnonymous " : false ,
" isMe " : true ,
" permissionId " : " 05076109866853157986 " ,
" photoUrl " : " //lh6.googleusercontent.com/-cCJa7dTDcgQ/AAAAAAAAAAI/AAAAAAAACgw/r2EZ_8oYer4/s50-c-k-no/photo.jpg " ,
" sessionId " : " 2f1ffade4c9f20de " ,
" userId " : " 102167687554210253930 "
} ,
" user_tz " : 420
} ,
" id " : " RJ-o3UBUFtCw " ,
" outputId " : " c4ec222c-80b5-4298-e635-93ca9f79c3b7 "
} ,
" outputs " : [
{
" name " : " stdout " ,
" output_type " : " stream " ,
" text " : [
" Found and verified text8.zip \n "
]
}
] ,
" source " : [
" url = ' http://mattmahoney.net/dc/ ' \n " ,
" \n " ,
" def maybe_download(filename, expected_bytes): \n " ,
" \" \" \" Download a file if not present, and make sure it ' s the right size. \" \" \" \n " ,
" if not os.path.exists(filename): \n " ,
" filename, _ = urllib.urlretrieve(url + filename, filename) \n " ,
" statinfo = os.stat(filename) \n " ,
" if statinfo.st_size == expected_bytes: \n " ,
" print ' Found and verified ' , filename \n " ,
" else: \n " ,
" print statinfo.st_size \n " ,
" raise Exception( \n " ,
" ' Failed to verify ' + filename + ' . Can you get to it with a browser? ' ) \n " ,
" return filename \n " ,
" \n " ,
" filename = maybe_download( ' text8.zip ' , 31344016) "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : {
" colab_type " : " text " ,
" id " : " Zqz3XiqI4mZT "
} ,
" source " : [
" Read the data into a string. "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : null ,
" metadata " : {
" cellView " : " both " ,
" colab " : {
" autoexec " : {
" startup " : false ,
" wait_interval " : 0
} ,
" output_extras " : [
{
" item_id " : 1
}
]
} ,
" colab_type " : " code " ,
" collapsed " : false ,
" executionInfo " : {
" elapsed " : 28844 ,
" status " : " ok " ,
" timestamp " : 1445964497165 ,
" user " : {
" color " : " #1FA15D " ,
" displayName " : " Vincent Vanhoucke " ,
" isAnonymous " : false ,
" isMe " : true ,
" permissionId " : " 05076109866853157986 " ,
" photoUrl " : " //lh6.googleusercontent.com/-cCJa7dTDcgQ/AAAAAAAAAAI/AAAAAAAACgw/r2EZ_8oYer4/s50-c-k-no/photo.jpg " ,
" sessionId " : " 2f1ffade4c9f20de " ,
" userId " : " 102167687554210253930 "
} ,
" user_tz " : 420
} ,
" id " : " Mvf09fjugFU_ " ,
" outputId " : " e3a928b4-1645-4fe8-be17-fcf47de5716d "
} ,
" outputs " : [
{
" name " : " stdout " ,
" output_type " : " stream " ,
" text " : [
" Data size 17005207 \n "
]
}
] ,
" source " : [
" def read_data(filename): \n " ,
" f = zipfile.ZipFile(filename) \n " ,
" for name in f.namelist(): \n " ,
" return f.read(name).split() \n " ,
" f.close() \n " ,
" \n " ,
" words = read_data(filename) \n " ,
" print ' Data size ' , len(words) "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : {
" colab_type " : " text " ,
" id " : " Zdw6i4F8glpp "
} ,
" source " : [
" Build the dictionary and replace rare words with UNK token. "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : null ,
" metadata " : {
" cellView " : " both " ,
" colab " : {
" autoexec " : {
" startup " : false ,
" wait_interval " : 0
} ,
" output_extras " : [
{
" item_id " : 1
}
]
} ,
" colab_type " : " code " ,
" collapsed " : false ,
" executionInfo " : {
" elapsed " : 28849 ,
" status " : " ok " ,
" timestamp " : 1445964497178 ,
" user " : {
" color " : " #1FA15D " ,
" displayName " : " Vincent Vanhoucke " ,
" isAnonymous " : false ,
" isMe " : true ,
" permissionId " : " 05076109866853157986 " ,
" photoUrl " : " //lh6.googleusercontent.com/-cCJa7dTDcgQ/AAAAAAAAAAI/AAAAAAAACgw/r2EZ_8oYer4/s50-c-k-no/photo.jpg " ,
" sessionId " : " 2f1ffade4c9f20de " ,
" userId " : " 102167687554210253930 "
} ,
" user_tz " : 420
} ,
" id " : " gAL1EECXeZsD " ,
" outputId " : " 3fb4ecd1-df67-44b6-a2dc-2291730970b2 "
} ,
" outputs " : [
{
" name " : " stdout " ,
" output_type " : " stream " ,
" text " : [
" Most common words (+UNK) [[ ' UNK ' , 418391], ( ' the ' , 1061396), ( ' of ' , 593677), ( ' and ' , 416629), ( ' one ' , 411764)] \n " ,
" Sample data [5243, 3083, 12, 6, 195, 2, 3136, 46, 59, 156] \n "
]
}
] ,
" source " : [
" vocabulary_size = 50000 \n " ,
" \n " ,
" def build_dataset(words): \n " ,
" count = [[ ' UNK ' , -1]] \n " ,
" count.extend(collections.Counter(words).most_common(vocabulary_size - 1)) \n " ,
" dictionary = dict() \n " ,
" for word, _ in count: \n " ,
" dictionary[word] = len(dictionary) \n " ,
" data = list() \n " ,
" unk_count = 0 \n " ,
" for word in words: \n " ,
" if word in dictionary: \n " ,
" index = dictionary[word] \n " ,
" else: \n " ,
" index = 0 # dictionary[ ' UNK ' ] \n " ,
" unk_count = unk_count + 1 \n " ,
" data.append(index) \n " ,
" count[0][1] = unk_count \n " ,
" reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) \n " ,
" return data, count, dictionary, reverse_dictionary \n " ,
" \n " ,
" data, count, dictionary, reverse_dictionary = build_dataset(words) \n " ,
" print ' Most common words (+UNK) ' , count[:5] \n " ,
" print ' Sample data ' , data[:10] \n " ,
" del words # Hint to reduce memory. "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : {
" colab_type " : " text " ,
" id " : " lFwoyygOmWsL "
} ,
" source " : [
" Function to generate a training batch for the skip-gram model. "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : null ,
" metadata " : {
" cellView " : " both " ,
" colab " : {
" autoexec " : {
" startup " : false ,
" wait_interval " : 0
} ,
" output_extras " : [
{
" item_id " : 1
}
]
} ,
" colab_type " : " code " ,
" collapsed " : false ,
" executionInfo " : {
" elapsed " : 113 ,
" status " : " ok " ,
" timestamp " : 1445964901989 ,
" user " : {
" color " : " #1FA15D " ,
" displayName " : " Vincent Vanhoucke " ,
" isAnonymous " : false ,
" isMe " : true ,
" permissionId " : " 05076109866853157986 " ,
" photoUrl " : " //lh6.googleusercontent.com/-cCJa7dTDcgQ/AAAAAAAAAAI/AAAAAAAACgw/r2EZ_8oYer4/s50-c-k-no/photo.jpg " ,
" sessionId " : " 2f1ffade4c9f20de " ,
" userId " : " 102167687554210253930 "
} ,
" user_tz " : 420
} ,
" id " : " w9APjA-zmfjV " ,
" outputId " : " 67cccb02-cdaf-4e47-d489-43bcc8d57bb8 "
} ,
" outputs " : [
{
" name " : " stdout " ,
" output_type " : " stream " ,
" text " : [
" 3083 -> 5243 \n " ,
" originated -> anarchism \n " ,
" 3083 -> 12 \n " ,
" originated -> as \n " ,
" 12 -> 3083 \n " ,
" as -> originated \n " ,
" 12 -> 6 \n " ,
" as -> a \n " ,
" 6 -> 12 \n " ,
" a -> as \n " ,
" 6 -> 195 \n " ,
" a -> term \n " ,
" 195 -> 6 \n " ,
" term -> a \n " ,
" 195 -> 2 \n " ,
" term -> of \n "
]
}
] ,
" source " : [
" data_index = 0 \n " ,
" \n " ,
" def generate_batch(batch_size, num_skips, skip_window): \n " ,
" global data_index \n " ,
" assert batch_size % num_skips == 0 \n " ,
" assert num_skips <= 2 * skip_window \n " ,
" batch = np.ndarray(shape=(batch_size), dtype=np.int32) \n " ,
" labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) \n " ,
" span = 2 * skip_window + 1 # [ skip_window target skip_window ] \n " ,
" buffer = collections.deque(maxlen=span) \n " ,
" for _ in range(span): \n " ,
" buffer.append(data[data_index]) \n " ,
" data_index = (data_index + 1) % le n(data) \n " ,
" for i in range(batch_size / num_skips): \n " ,
" target = skip_window # target label at the center of the buffer \n " ,
" targets_to_avoid = [ skip_window ] \n " ,
" for j in range(num_skips): \n " ,
" while target in targets_to_avoid: \n " ,
" target = random.randint(0, span - 1) \n " ,
" targets_to_avoid.append(target) \n " ,
" batch[i * num_skips + j] = buffer[skip_window] \n " ,
" labels[i * num_skips + j, 0] = buffer[target] \n " ,
" buffer.append(data[data_index]) \n " ,
" data_index = (data_index + 1) % le n(data) \n " ,
" return batch, labels \n " ,
" \n " ,
" batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1) \n " ,
" for i in range(8): \n " ,
" print batch[i], ' -> ' , labels[i, 0] \n " ,
" print reverse_dictionary[batch[i]], ' -> ' , reverse_dictionary[labels[i, 0]] "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : {
" colab_type " : " text " ,
" id " : " Ofd1MbBuwiva "
} ,
" source " : [
" Train a skip-gram model. "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : null ,
" metadata " : {
" cellView " : " both " ,
" colab " : {
" autoexec " : {
" startup " : false ,
" wait_interval " : 0
}
} ,
" colab_type " : " code " ,
" collapsed " : true ,
" id " : " 8pQKsV4Vwlzy "
} ,
" outputs " : [ ] ,
" source " : [
" batch_size = 128 \n " ,
" embedding_size = 128 # Dimension of the embedding vector. \n " ,
" skip_window = 1 # How many words to consider left and right. \n " ,
" num_skips = 2 # How many times to reuse an input to generate a label. \n " ,
" # We pick a random validation set to sample nearest neighbors. here we limit the \n " ,
" # validation samples to the words that have a low numeric ID, which by \n " ,
" # construction are also the most frequent. \n " ,
" valid_size = 16 # Random set of words to evaluate similarity on. \n " ,
" valid_window = 100 # Only pick dev samples in the head of the distribution. \n " ,
" valid_examples = np.array(random.sample(xrange(valid_window), valid_size)) \n " ,
" num_sampled = 64 # Number of negative examples to sample. \n " ,
" \n " ,
" graph = tf.Graph() \n " ,
" \n " ,
" with graph.as_default(): \n " ,
" \n " ,
" # Input data. \n " ,
" train_dataset = tf.placeholder(tf.int32, shape=[batch_size]) \n " ,
" train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) \n " ,
" valid_dataset = tf.constant(valid_examples, dtype=tf.int32) \n " ,
" \n " ,
" # Variables. \n " ,
" embeddings = tf.Variable( \n " ,
" tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) \n " ,
" softmax_weights = tf.Variable( \n " ,
" tf.truncated_normal([vocabulary_size, embedding_size], \n " ,
" stddev=1.0 / math.sqrt(embedding_size))) \n " ,
" softmax_biases = tf.Variable(tf.zeros([vocabulary_size])) \n " ,
" \n " ,
" # Model. \n " ,
" # Look up embeddings for inputs. \n " ,
" embed = tf.nn.embedding_lookup(embeddings, train_dataset) \n " ,
" # Compute the softmax loss, using a sample of the negative labels each time. \n " ,
" loss = tf.reduce_mean( \n " ,
" tf.nn.sampled_softmax_loss(softmax_weights, softmax_biases, embed, \n " ,
" train_labels, num_sampled, vocabulary_size)) \n " ,
" \n " ,
" # Optimizer. \n " ,
" optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss) \n " ,
" \n " ,
" # Compute the similarity between minibatch examples and all embeddings. \n " ,
" # We use the cosine distance: \n " ,
" norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) \n " ,
" normalized_embeddings = embeddings / norm \n " ,
" valid_embeddings = tf.nn.embedding_lookup( \n " ,
" normalized_embeddings, valid_dataset) \n " ,
" similarity = tf.matmul(valid_embeddings, tf.transpose(normalized_embeddings)) "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : null ,
" metadata " : {
" cellView " : " both " ,
" colab " : {
" autoexec " : {
" startup " : false ,
" wait_interval " : 0
} ,
" output_extras " : [
{
" item_id " : 23
} ,
{
" item_id " : 48
} ,
{
" item_id " : 61
}
]
} ,
" colab_type " : " code " ,
" collapsed " : false ,
" executionInfo " : {
" elapsed " : 436189 ,
" status " : " ok " ,
" timestamp " : 1445965429787 ,
" user " : {
" color " : " #1FA15D " ,
" displayName " : " Vincent Vanhoucke " ,
" isAnonymous " : false ,
" isMe " : true ,
" permissionId " : " 05076109866853157986 " ,
" photoUrl " : " //lh6.googleusercontent.com/-cCJa7dTDcgQ/AAAAAAAAAAI/AAAAAAAACgw/r2EZ_8oYer4/s50-c-k-no/photo.jpg " ,
" sessionId " : " 2f1ffade4c9f20de " ,
" userId " : " 102167687554210253930 "
} ,
" user_tz " : 420
} ,
" id " : " 1bQFGceBxrWW " ,
" outputId " : " 5ebd6d9a-33c6-4bcd-bf6d-252b0b6055e4 "
} ,
" outputs " : [
{
" name " : " stdout " ,
" output_type " : " stream " ,
" text " : [
" Initialized \n " ,
" Average loss at step 0 : 8.58149623871 \n " ,
" Nearest to been: unfavourably, marmara, ancestral, legal, bogart, glossaries, worst, rooms, \n " ,
" Nearest to time: conformist, strawberries, sindhi, waterfall, xia, nominates, psp, sensitivity, \n " ,
" Nearest to over: overlord, panda, golden, semigroup, rawlings, involved, shreveport, handling, \n " ,
" Nearest to not: hymenoptera, reintroducing, lamiaceae, because, davao, omnipotent, combustion, debilitating, \n " ,
" Nearest to three: catalog, koza, gn, braque, holstein, postgresql, luddite, justine, \n " ,
" Nearest to if: chilled, vince, fiddler, represented, sandinistas, happiness, lya, glands, \n " ,
" Nearest to there: coast, photosynthetic, kimmei, legally, inner, illyricum, formats, fullmetal, \n " ,
" Nearest to between: chuvash, prinz, suitability, wolfe, guideline, computability, diminutive, paulo, \n " ,
" Nearest to from: tanganyika, workshop, elphinstone, spearhead, resurrected, kevlar, shangri, loves, \n " ,
" Nearest to state: sextus, wuppertal, glaring, inches, unrounded, courageous, adler, connie, \n " ,
" Nearest to on: gino, phocas, rhine, jg, macrocosm, jackass, jays, theorie, \n " ,
" Nearest to and: standings, towed, reyes, willard, equality, juggling, wladislaus, faked, \n " ,
" Nearest to eight: gresham, dogg, moko, tennis, superseded, telegraphy, scramble, vinod, \n " ,
" Nearest to they: prisons, divisor, coder, ribeira, willingness, factional, nne, lotta, \n " ,
" Nearest to more: blues, fur, sterling, tangier, khwarizmi, discouraged, cal, deicide, \n " ,
" Nearest to other: enemies, bogged, brassicaceae, lascaux, dispense, alexandrians, crimea, dou, \n " ,
" Average loss at step 2000 : 4.39983723116 \n " ,
" Average loss at step 4000 : 3.86921076906 \n " ,
" Average loss at step 6000 : 3.72542127335 \n " ,
" Average loss at step 8000 : 3.57835536212 \n " ,
" Average loss at step 10000 : 3.61056993055 \n " ,
" Nearest to been: glossaries, legal, unfavourably, be, hadad, wore, scarcity, were, \n " ,
" Nearest to time: strawberries, conformist, gleichschaltung, waterfall, molality, nominates, baal, dole, \n " ,
" Nearest to over: golden, semigroup, catus, motorways, brick, shehri, mussolini, overlord, \n " ,
" Nearest to not: hinayana, it, often, they, boots, also, noaa, lindsey, \n " ,
" Nearest to three: four, seven, six, five, nine, eight, two, zero, \n " ,
" Nearest to if: glands, euros, wallpaper, redefine, toho, confuse, unsound, shepherd, \n " ,
" Nearest to there: it, they, fullmetal, pace, legally, harpsichord, mma, bug, \n " ,
" Nearest to between: chuvash, wandering, from, kirsch, pursuant, eurocents, suitability, jackie, \n " ,
" Nearest to from: into, in, workshop, to, at, misogynist, elphinstone, spearhead, \n " ,
" Nearest to state: sextus, glaring, connie, adler, esoteric, didactic, handedness, presidents, \n " ,
" Nearest to on: in, at, for, ruminants, wakefulness, torrey, foley, gino, \n " ,
" Nearest to and: or, who, but, zelda, of, for, thirst, chisel, \n " ,
" Nearest to eight: nine, six, seven, five, four, three, zero, two, \n " ,
" Nearest to they: he, prisons, there, we, hydrate, it, not, cumbersome, \n " ,
" Nearest to more: skye, blues, trypomastigotes, deicide, most, readable, used, sterling, \n " ,
" Nearest to other: trochaic, hush, surveyors, joachim, differentiation, attackers, reverence, attestation, \n " ,
" Average loss at step 12000 : 3.66169466591 \n " ,
" Average loss at step 14000 : 3.60342905837 \n " ,
" Average loss at step 16000 : 3.57761328053 \n " ,
" Average loss at step 18000 : 3.57667332476 \n " ,
" Average loss at step 20000 : 3.53310145146 \n " ,
" Nearest to been: be, become, was, hadad, unfavourably, were, wore, partido, \n " ,
" Nearest to time: gleichschaltung, strawberries, year, nominates, conformist, etch, admittedly, treasuries, \n " ,
" Nearest to over: golden, semigroup, motorways, rawlings, triangle, trey, ustawa, mattingly, \n " ,
" Nearest to not: they, boots, often, dieppe, still, hinayana, nearly, be, \n " ,
" Nearest to three: two, four, five, seven, eight, six, nine, one, \n " ,
" Nearest to if: wallpaper, euros, before, toho, unsound, so, bg, pfc, \n " ,
" Nearest to there: they, it, he, usually, which, we, not, transactions, \n " ,
" Nearest to between: from, with, about, near, reactance, eurocents, wandering, voltaire, \n " ,
" Nearest to from: into, workshop, by, between, in, on, elphinstone, under, \n " ,
" Nearest to state: glaring, esoteric, succeeding, sextus, vorarlberg, presidents, depends, connie, \n " ,
" Nearest to on: in, at, upon, during, from, janis, foley, nubian, \n " ,
" Nearest to and: or, thirst, but, where, s, who, pfaff, including, \n " ,
" Nearest to eight: nine, seven, six, five, four, three, zero, one, \n " ,
" Nearest to they: there, he, we, not, it, you, prisons, who, \n " ,
" Nearest to more: less, most, deicide, skye, trypomastigotes, interventionism, toed, drummond, \n " ,
" Nearest to other: such, joachim, hush, attackers, surveyors, trochaic, differentiation, reverence, \n " ,
" Average loss at step 22000 : 3.59519316927 \n " ,
" Average loss at step 24000 : 3.55378576797 \n " ,
" Average loss at step 26000 : 3.56455037558 \n " ,
" Average loss at step 28000 : 3.5040882225 \n " ,
" Average loss at step 30000 : 3.39208897972 \n " ,
" Nearest to been: become, be, were, was, spotless, hadad, by, hausdorff, \n " ,
" Nearest to time: gleichschaltung, year, day, nominates, jesus, strawberries, way, admittedly, \n " ,
" Nearest to over: golden, semigroup, motorways, rawlings, interventionism, counternarcotics, adaption, brick, \n " ,
" Nearest to not: often, they, it, never, still, nor, boots, pki, \n " ,
" Nearest to three: four, six, two, eight, five, seven, nine, zero, \n " ,
" Nearest to if: when, before, so, should, toho, where, bg, wallpaper, \n " ,
" Nearest to there: they, it, which, usually, he, that, also, now, \n " ,
" Nearest to between: with, from, in, panasonic, presupposes, churchmen, hijacking, where, \n " ,
" Nearest to from: into, elphinstone, workshop, between, through, speculates, sosa, in, \n " ,
" Nearest to state: esoteric, glaring, presidents, vorarlberg, atmosphere, succeeding, lute, connie, \n " ,
" Nearest to on: upon, in, janis, during, torrey, against, infield, catalans, \n " ,
" Nearest to and: or, thirst, in, but, of, sobib, cleaves, including, \n " ,
" Nearest to eight: nine, six, four, seven, three, zero, five, one, \n " ,
" Nearest to they: we, there, he, you, it, these, who, i, \n " ,
" Nearest to more: less, most, deicide, faster, toed, very, skye, tonic, \n " ,
" Nearest to other: different, attackers, joachim, various, such, many, differentiation, these, \n " ,
" Average loss at step 32000 : 3.49501452419 \n " ,
" Average loss at step 34000 : 3.48593705952 \n " ,
" Average loss at step 36000 : 3.50112806576 \n " ,
" Average loss at step 38000 : 3.49244426501 \n " ,
" Average loss at step 40000 : 3.3890105716 \n " ,
" Nearest to been: become, be, were, was, jolie, hausdorff, spotless, had, \n " ,
" Nearest to time: year, way, gleichschaltung, period, day, stanislav, stage, outcome, \n " ,
" Nearest to over: through, semigroup, rawlings, golden, about, brick, on, motorways, \n " ,
" Nearest to not: they, radiated, never, pki, still, omnipotent, hinayana, really, \n " ,
" Nearest to three: four, six, five, two, seven, eight, one, nine, \n " ,
" Nearest to if: when, before, where, then, bg, because, can, should, \n " ,
" Nearest to there: they, it, he, usually, this, typically, still, often, \n " ,
" Nearest to between: with, in, from, about, against, churchmen, johansen, presupposes, \n " ,
" Nearest to from: into, through, elphinstone, in, workshop, between, suing, under, \n " ,
" Nearest to state: esoteric, presidents, atmosphere, vorarlberg, lute, succeeding, glaring, didactic, \n " ,
" Nearest to on: upon, at, in, during, unitarians, under, catalans, batavians, \n " ,
" Nearest to and: or, but, s, incapacitation, including, while, of, which, \n " ,
" Nearest to eight: nine, six, seven, four, five, three, one, two, \n " ,
" Nearest to they: we, he, there, you, she, i, not, it, \n " ,
" Nearest to more: less, most, deicide, toed, greater, faster, quite, longer, \n " ,
" Nearest to other: various, different, attackers, joachim, clutter, nz, trochaic, apulia, \n " ,
" Average loss at step 42000 : 3.45294014364 \n " ,
" Average loss at step 44000 : 3.47660055941 \n " ,
" Average loss at step 46000 : 3.47458503014 \n " ,
" Average loss at step 48000 : 3.47261548793 \n " ,
" Average loss at step 50000 : 3.45390708435 \n " ,
" Nearest to been: become, be, had, was, were, hausdorff, prem, remained, \n " ,
" Nearest to time: way, year, period, stv, day, gleichschaltung, stage, outcome, \n " ,
" Nearest to over: through, golden, semigroup, about, brick, counternarcotics, theremin, mattingly, \n " ,
" Nearest to not: they, still, never, really, sometimes, it, kiwifruit, nearly, \n " ,
" Nearest to three: five, four, six, seven, two, eight, one, nine, \n " ,
" Nearest to if: when, before, where, because, connexion, though, so, whether, \n " ,
" Nearest to there: they, it, he, this, now, often, usually, still, \n " ,
" Nearest to between: with, from, fashioned, churchmen, panasonic, explores, within, racial, \n " ,
" Nearest to from: into, through, under, elphinstone, between, workshop, circumpolar, idiom, \n " ,
" Nearest to state: atmosphere, vorarlberg, esoteric, presidents, madhya, majority, moulin, bowmen, \n " ,
" Nearest to on: upon, in, catalans, tezuka, minotaurs, wakefulness, batavians, guglielmo, \n " ,
" Nearest to and: or, but, thirst, signifier, which, however, including, unattractive, \n " ,
" Nearest to eight: six, nine, seven, five, four, three, zero, two, \n " ,
" Nearest to they: we, there, he, you, it, she, these, not, \n " ,
" Nearest to more: less, most, quite, very, further, faster, toed, deicide, \n " ,
" Nearest to other: various, different, many, attackers, are, joachim, nihilo, reject, \n " ,
" Average loss at step 52000 : 3.43597227755 \n " ,
" Average loss at step 54000 : 3.25126817495 \n " ,
" Average loss at step 56000 : 3.35102432287 \n " ,
" Average loss at step 58000 : 3.44654818082 \n " ,
" Average loss at step 60000 : 3.4287913968 \n " ,
" Nearest to been: become, be, was, prem, had, remained, hadad, stanislavsky, \n " ,
" Nearest to time: year, way, period, stv, barely, name, stage, restoring, \n " ,
" Nearest to over: about, through, golden, adaption, counternarcotics, up, mattingly, brick, \n " ,
" Nearest to not: still, never, nor, kiwifruit, they, nearly, therefore, rarely, \n " ,
" Nearest to three: two, five, four, six, seven, eight, one, nine, \n " ,
" Nearest to if: when, though, before, where, although, because, can, could, \n " ,
" Nearest to there: they, it, he, still, she, we, this, often, \n " ,
" Nearest to between: with, from, churchmen, among, ethical, within, vma, panasonic, \n " ,
" Nearest to from: through, into, under, during, between, in, suing, across, \n " ,
" Nearest to state: atmosphere, infringe, madhya, vorarlberg, government, bowmen, vargas, republic, \n " ,
" Nearest to on: upon, through, within, ridiculous, janis, in, under, over, \n " ,
" Nearest to and: or, while, including, but, of, like, whose, bannister, \n " ,
" Nearest to eight: nine, six, five, four, seven, zero, three, two, \n " ,
" Nearest to they: we, there, you, he, it, these, she, prisons, \n " ,
" Nearest to more: less, most, quite, further, toed, very, faster, rather, \n " ,
" Nearest to other: different, various, many, nihilo, these, amour, including, screenplays, \n " ,
" Average loss at step 62000 : 3.38358767056 \n " ,
" Average loss at step 64000 : 3.41693099326 \n " ,
" Average loss at step 66000 : 3.39588000977 \n " ,
" Average loss at step 68000 : 3.35567189544 \n " ,
" Average loss at step 70000 : 3.38878934443 \n " ,
" Nearest to been: become, be, was, prem, remained, were, being, discounts, \n " ,
" Nearest to time: year, way, day, period, barely, ethos, stage, reason, \n " ,
" Nearest to over: about, through, fortunately, semigroup, theremin, off, loudest, up, \n " ,
" Nearest to not: still, nor, never, they, actually, nearly, unelected, therefore, \n " ,
" Nearest to three: five, two, four, six, seven, eight, nine, zero, \n " ,
" Nearest to if: when, though, before, where, because, then, after, since, \n " ,
" Nearest to there: they, it, he, often, she, we, usually, still, \n " ,
" Nearest to between: among, with, within, from, ethical, churchmen, racial, prentice, \n " ,
" Nearest to from: through, into, within, during, under, until, between, across, \n " ,
" Nearest to state: city, atmosphere, desks, surrounding, preservation, bohr, principal, republic, \n " ,
" Nearest to on: upon, tezuka, through, within, wakefulness, catalans, at, ingeborg, \n " ,
" Nearest to and: or, but, while, including, thirst, jerzy, massing, abadan, \n " ,
" Nearest to eight: seven, six, nine, five, four, three, two, zero, \n " ,
" Nearest to they: we, you, he, there, she, it, prisons, who, \n " ,
" Nearest to more: less, most, quite, very, faster, smaller, further, larger, \n " ,
" Nearest to other: various, different, some, screenplays, lab, many, including, debugging, \n " ,
" Average loss at step 72000 : 3.41103189731 \n " ,
" Average loss at step 74000 : 3.44926435578 \n " ,
" Average loss at step 76000 : 3.4423020488 \n " ,
" Average loss at step 78000 : 3.41976813722 \n " ,
" Average loss at step 80000 : 3.39511853886 \n " ,
" Nearest to been: become, be, remained, was, grown, were, prem, already, \n " ,
" Nearest to time: year, way, period, reason, barely, distance, stage, day, \n " ,
" Nearest to over: about, fortunately, through, semigroup, further, mattingly, rawlings, golden, \n " ,
" Nearest to not: still, they, nor, never, we, kiwifruit, noaa, really, \n " ,
" Nearest to three: five, two, seven, four, eight, six, nine, zero, \n " ,
" Nearest to if: when, where, though, before, since, because, although, follows, \n " ,
" Nearest to there: they, it, he, we, she, still, typically, actually, \n " ,
" Nearest to between: with, among, within, in, racial, around, from, serapeum, \n " ,
" Nearest to from: into, through, in, within, under, using, during, towards, \n " ,
" Nearest to state: city, atmosphere, ferro, vorarlberg, surrounding, republic, madhya, national, \n " ,
" Nearest to on: upon, poll, in, from, tezuka, janis, through, within, \n " ,
" Nearest to and: or, but, including, while, s, which, thirst, although, \n " ,
" Nearest to eight: nine, seven, six, five, four, three, zero, two, \n " ,
" Nearest to they: we, you, there, he, she, it, these, not, \n " ,
" Nearest to more: less, most, smaller, very, faster, quite, rather, larger, \n " ,
" Nearest to other: various, different, joachim, including, theos, smaller, individual, screenplays, \n " ,
" Average loss at step 82000 : 3.40933967865 \n " ,
" Average loss at step 84000 : 3.41618054378 \n " ,
" Average loss at step 86000 : 3.31485116804 \n " ,
" Average loss at step 88000 : 3.37068593091 \n " ,
" Average loss at step 90000 : 3.2785516749 \n " ,
" Nearest to been: become, be, was, prem, remained, grown, recently, already, \n " ,
" Nearest to time: year, way, period, day, barely, battle, buds, name, \n " ,
" Nearest to over: through, about, fortunately, off, theremin, semigroup, extraterrestrial, mattingly, \n " ,
" Nearest to not: nor, still, never, otherwise, generally, separately, gown, hydrate, \n " ,
" Nearest to three: four, five, six, two, eight, seven, nine, zero, \n " ,
" Nearest to if: when, where, before, though, because, since, then, while, \n " ,
" Nearest to there: they, it, he, we, she, still, typically, fiorello, \n " ,
" Nearest to between: with, among, within, from, churchmen, prentice, racial, panasonic, \n " ,
" Nearest to from: through, into, across, during, towards, until, at, within, \n " ,
" Nearest to state: bohr, city, atmosphere, ferro, bowmen, republic, retaliation, vorarlberg, \n " ,
" Nearest to on: upon, in, tezuka, at, during, within, via, catalans, \n " ,
" Nearest to and: or, including, but, while, like, thirst, with, schuman, \n " ,
" Nearest to eight: seven, nine, six, five, four, three, zero, two, \n " ,
" Nearest to they: we, there, he, you, she, it, prisons, these, \n " ,
" Nearest to more: less, most, very, faster, larger, quite, smaller, better, \n " ,
" Nearest to other: different, various, tamara, prosthetic, including, individual, failing, restaurants, \n " ,
" Average loss at step 92000 : 3.40355363208 \n " ,
" Average loss at step 94000 : 3.35647508007 \n " ,
" Average loss at step 96000 : 3.34374570692 \n " ,
" Average loss at step 98000 : 3.4230104093 \n " ,
" Average loss at step 100000 : 3.36909827 \n " ,
" Nearest to been: become, be, grown, was, being, already, remained, prem, \n " ,
" Nearest to time: way, year, day, period, years, days, mothersbaugh, separators, \n " ,
" Nearest to over: through, about, semigroup, further, fortunately, off, into, theremin, \n " ,
" Nearest to not: never, nor, still, dieppe, really, unelected, actually, now, \n " ,
" Nearest to three: four, two, five, seven, six, eight, nine, zero, \n " ,
" Nearest to if: when, though, where, before, is, abe, then, follows, \n " ,
" Nearest to there: they, it, he, we, still, she, typically, often, \n " ,
" Nearest to between: within, with, among, churchmen, around, explores, from, reactance, \n " ,
" Nearest to from: into, through, within, across, in, between, using, workshop, \n " ,
" Nearest to state: atmosphere, bohr, national, ferro, germ, desks, city, unpaid, \n " ,
" Nearest to on: upon, in, within, tezuka, janis, batavians, about, macrocosm, \n " ,
" Nearest to and: or, but, purview, thirst, sukkot, epr, including, honesty, \n " ,
" Nearest to eight: seven, nine, six, four, five, three, zero, one, \n " ,
" Nearest to they: we, there, you, he, she, prisons, it, these, \n " ,
" Nearest to more: less, most, very, quite, faster, larger, rather, smaller, \n " ,
" Nearest to other: various, different, tamara, theos, some, cope, many, others, \n "
]
}
] ,
" source " : [
" num_steps = 100001 \n " ,
" \n " ,
" with tf.Session(graph=graph) as session: \n " ,
2017-02-06 01:13:24 +08:00
" tf.global_variables_initializer().run() \n " ,
2015-12-27 20:25:04 +08:00
" print \" Initialized \" \n " ,
" average_loss = 0 \n " ,
" for step in xrange(num_steps): \n " ,
" batch_data, batch_labels = generate_batch( \n " ,
" batch_size, num_skips, skip_window) \n " ,
" feed_dict = { train_dataset : batch_data, train_labels : batch_labels} \n " ,
" _, l = session.run([optimizer, loss], feed_dict=feed_dict) \n " ,
" average_loss += l \n " ,
" if step % 2000 == 0: \n " ,
" if step > 0: \n " ,
" average_loss = average_loss / 2000 \n " ,
" # The average loss is an estimate of the loss over the last 2000 batches. \n " ,
" print \" Average loss at step \" , step, \" : \" , average_loss \n " ,
" average_loss = 0 \n " ,
" # note that this is expensive (~20 % s lowdown if computed every 500 steps) \n " ,
" if step % 10000 == 0: \n " ,
" sim = similarity.eval() \n " ,
" for i in xrange(valid_size): \n " ,
" valid_word = reverse_dictionary[valid_examples[i]] \n " ,
" top_k = 8 # number of nearest neighbors \n " ,
" nearest = (-sim[i, :]).argsort()[1:top_k+1] \n " ,
" log = \" Nearest to %s : \" % valid_word \n " ,
" for k in xrange(top_k): \n " ,
" close_word = reverse_dictionary[nearest[k]] \n " ,
" log = \" %s %s , \" % (log, close_word) \n " ,
" print log \n " ,
" final_embeddings = normalized_embeddings.eval() "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : null ,
" metadata " : {
" cellView " : " both " ,
" colab " : {
" autoexec " : {
" startup " : false ,
" wait_interval " : 0
}
} ,
" colab_type " : " code " ,
" collapsed " : true ,
" id " : " jjJXYA_XzV79 "
} ,
" outputs " : [ ] ,
" source " : [
" num_points = 400 \n " ,
" \n " ,
" tsne = TSNE(perplexity=30, n_components=2, init= ' pca ' , n_iter=5000) \n " ,
" two_d_embeddings = tsne.fit_transform(final_embeddings[1:num_points+1, :]) "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : null ,
" metadata " : {
" cellView " : " both " ,
" colab " : {
" autoexec " : {
" startup " : false ,
" wait_interval " : 0
} ,
" output_extras " : [
{
" item_id " : 1
}
]
} ,
" colab_type " : " code " ,
" collapsed " : false ,
" executionInfo " : {
" elapsed " : 4763 ,
" status " : " ok " ,
" timestamp " : 1445965465525 ,
" user " : {
" color " : " #1FA15D " ,
" displayName " : " Vincent Vanhoucke " ,
" isAnonymous " : false ,
" isMe " : true ,
" permissionId " : " 05076109866853157986 " ,
" photoUrl " : " //lh6.googleusercontent.com/-cCJa7dTDcgQ/AAAAAAAAAAI/AAAAAAAACgw/r2EZ_8oYer4/s50-c-k-no/photo.jpg " ,
" sessionId " : " 2f1ffade4c9f20de " ,
" userId " : " 102167687554210253930 "
} ,
" user_tz " : 420
} ,
" id " : " o_e0D_UezcDe " ,
" outputId " : " df22e4a5-e8ec-4e5e-d384-c6cf37c68c34 "
} ,
" outputs " : [
{
" data " : {
" image/png " : " iVBORw0KGgoAAAANSUhEUgAAA3MAAANpCAYAAAChBGCHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz \n AAALEgAACxIB0t1+/AAAIABJREFUeJzs3XdAldUfx/H3BdlbQEVzoyDukZaae5aZ5tbcIzUz9x5Z \n jhwNNXMVztTExFHqT9Ny50hFc+ZKEVBwAbLh/v4gSXILChc/r3+69/Lc53yfewL8cM5zjsFoNBoR \n ERERERERk2KW0QWIiIiIiIjI01OYExERERERMUEKcyIiIiIiIiZIYU5ERERERMQEKcyJiIiIiIiY \n IIU5ERERERERE5TmMBceHk7fvn1p2LAhb775JgEBAdy6dYvOnTtTv359unTpQnh4eHrUKiIiIiIi \n Iv8wpHWfuaFDh/Lqq6/SvHlzEhISiI6OZvbs2bi4uNC9e3fmzZtHeHg4gwYNSq+aRUREREREXnpp \n GpmLiIjg4MGDNG/eHIBs2bLh4ODAtm3baNq0KQBNmzbll19+SXulIiIiIiIikiJbWt4cGBhI9uzZ \n GT58OKdOnaJ48eKMGDGC69ev4+bmBoCbmxvXr19Pl2JFREREREQkWZpG5hISEjhx4gRt2rTB398f \n Gxsb5s2bl+oYg8GAwWBIU5EiIiIiIiKSWprCXK5cuciZMyelSpUCoH79+pw4cQI3NzdCQ0MBuHbt \n GtmzZ3/kedJ4256IiIiIiMhLJ03TLN3d3fHw8ODChQsULFiQvXv34unpiaenJ/7+/vTo0YM1a9ZQ \n p06dR57HYDAQGhqRllIkE3N3d1D/ZmHq36xLfZu1qX+zLvVt1qb+zbrc3R2e+j1pCnMAo0ePZtCg \n QcTHx5MvXz4mTZpEYmIi/fr148cffyRPnjx89dVXaW1GRERERERE7pHmMOft7c2PP/543+sLFy5M \n 66lFRERERETkIdK8abiIiIiIiIi8eApzIiIiIiIiJkhhTkRERERExAQpzImIiIiIiJgghTkRERER \n ERETpDAnIiIiIiJighTmRERERERETJDCnIiIiIiIiAlSmBMRERERETFBCnMiIiIiIiImSGFORERE \n RETEBCnMiYiIiIiImCCFOREREREREROkMCciIiIiImKCFOZERERERERMkMKciIiIiIiICVKYExER \n ERERMUEKcyIiIiIiIiZIYU5ERERERMQEKcyJiIiIiIiYIIU5ERERERERE6QwJyIiIiIiYoIU5kRE \n REREREyQwpyIiIiIiIgJUpgTERERERExQQpzIiIiIiIiJkhhTkRERERExAQpzImIiIiIiJgghTkR \n ERERERETpDAnIiIiIiJighTmRERERERETJDCnIiIiIiIiAlSmBMRERERETFBCnMiIiIiIiImSGFO \n RERERETEBCnMiYiIiIiImCCFOREREREREROkMCciIiIiImKCFOZERERERERMkMKciIiIiIiICVKY \n ExERERERMUEKcyIiIiIiIiZIYU5ERERERMQEKcyJiIiIiIiYIIU5ERERERERE6QwJyIiIiIiYoIU \n 5kREREREREyQwpyIiIiIiIgJUpgTERERERExQQpzIiIiIiIiJkhhTkRERERExAQpzImIiIiIiJgg \n hTkRERERERETpDAnIiIiIiJighTmRERERERETJDCnIiIiIiIiAlSmBMRERERETFBCnMiIiIiIiIm \n SGFORERERETEBCnMiYiIiIiImCCFOREREREREROkMCciIiIiImKCFOZERERERERMkMKciIiIiIiI \n CVKYExERERERMUEKcyIiIiIiIiZIYU5ERERERMQEKcyJiIiIiIiYIIU5ERERERERE6QwJyIiIiIi \n YoIU5kREREREREyQwpyIiIiIiIgJUpgTERERERExQQpzIiIiIiIiJkhhTkRERERExAQpzImIiIiI \n iJgghTkRERERERETpDAnIiIiIiJighTmRERERERETJDCnIiIiIiIiAlSmBMRERERETFBCnMiIiIi \n IiImSGFORERERETEBCnMiYiIiIiImCCFOREREREREROULaMLEBERkcxnxYqlbNiwHoBGjZpQrVoN \n Bg78kFKlyvLnnwG4u+dg0qTPsbKy4sqVQL74Ygq3bt3E2tqaoUNHki9fgYy9ABGRl4BG5kRERCSV \n U6dOsnHjT8yfv4i5cxeyfr0/ERHhBAZeplmzlixZshJ7ewe2b98GwJQpE+jffzDffbeE3r0/4vPP \n J2fwFYiIvBw0MiciIiKpHD16hGrVamJlZQ1A9eq1CAg4jIdHHjw9iwDg5eVNcHAQ0dHRHDt2lNGj \n h6a8Pz4+IUPqFhF52SjMiYiISCoGg+GBr1taWqQ8NjMzJykpDqMxCQcHBxYsWPaiyhMRkX9omqWI \n iIikUrp0GXbs+I3Y2Biio6PZseNXSpcue99xRqMRW1s7cufOza+//pLy2tmzf73okkVEXkoamRMR \n EZFUihb15s03G9G9e0cA3n67KQ4OjveN2N19PmbMeKZN+4xFi3xJSEigTp16KdMxRUTk+TEYjUZj \n RhcBEBoakdElyHPi7u6g/s3C1L9Zl/o2a1P/Zl3q26xN/Zt1ubs7PPV7NM1SREREntnx4+eYP38j \n f/xxMqNLERF56SjMiYiIyDNZt24/LVpEM3JkC1q1smLhwu0ZXZKIyEtFYU5ERESeyeLFNwgLex0w \n EB5emqVLYzK6JBGRl4rCnIiIiDwTo9HwyOciIvJ8KcyJiIjIM2nd2gEXl0MA2NmdpkULLZItIvIi \n 6aeuiIiIPJMWLSpToMAJ9u3zo2TJXFSvXiujSxIReakozImIiMgze/VVH1591SejyxAReSlpmqWI \n iIiIiIgJUpgTERERERExQQpzIiIiIiIiJkhhTkRERERExAQpzImIiGSAyMhI/P1XZXQZIiJiwhTm \n REREMkBERDj+/n4ZXYaIiJgwbU0gIiKSAebMmcmVK4F07tyWIkW8qFatJlWrVmP48EE4OjoyfPgY \n fvppLUFBV+jRozcrVixlw4b1ADRq1ISWLdtk8BWIiEhG08iciIhIBujVqy958rzCggXLqFTpdY4e \n PQxAWNg1/v77IgBHjx6hbNlynDp1ko0bf2L+/EXMnbuQ9ev9+euv0xlYvYiIZAYKcyIiIhnAaDSm \n PC5VqgwBAUe4ePECBQsWxsUlO9evh3H8+DFKlCjN0aNHqFatJlZW1tjY2FC9ei0CAg5nYPUiIpIZ \n aJqliIhIBnN3z0FkZAT79u2hdOmyhIeHs3XrFmxtbbGxscFgMKQ63mg03veaiIi8fDQyJyIikgFs \n bW2JiopKeV68eElWrlxOmTLlKF26DCtWLKVUqbIAlC5dhh07fiM2Nobo6Gh27vwt5WsiIvLy0sic \n iIhIBnBycqZkydJ06NCK116rTKlSZThwYB958rxCzpy5iIgIp3Tp5MBWtKg3b77ZiO7dOwLw9ttN \n KVKkaEaWLyIimYDBeO+k/QwUGhqR0SXIc+Lu7qD+zcLUv1mX+jbzuHr1KsHBoXh7e2JtbZ0u51T/ \n Zl3q26xN/Zt1ubs7PPV7NM1SREQkE/P13U61apepV8+DRo02cflySEaXJCIimYTCnIiISCYVHx/P \n rFnx3LxZEyjA0aPt+eKLgxldloiIZBIKcyIiIplUXFwcUVH2qV6LibHMoGpERCSzUZgTERHJpOzs \n 7Kha9W8gGgBHx8M0bOiUsUWJiEimodUsRUREMrHZs5tRvPhPhIVBzZq5qF27UkaXJCIimYTCnIiI \n SCaWLVs2+vVrkNFliIhIJqRpliIiIiIiIiZIYU5ERERERMQEKcyJiIjIA02ePJ6LFy9kdBkiIvIQ \n umdOREREHmjo0FEZXYKIiDyCwpyIiMhLIDo6mjFjhhEaGkpSUiIfftgHBwc3vv76S6Kjo3FwcMBo \n NBIaGsrVq8GMGPEx/v5+tG79Ht98M4OkpCQGDRrGwoXfcvr0SfLnL8DkyV/i6upGnz49KF68JIcO \n HSQyMoJhw8ZQunSZjL5
" text/plain " : [
" <matplotlib.figure.Figure at 0x2ee65e10> "
]
} ,
" metadata " : { } ,
" output_type " : " display_data "
}
] ,
" source " : [
" def plot(embeddings, labels): \n " ,
" assert embeddings.shape[0] >= len(labels), ' More labels than embeddings ' \n " ,
" pylab.figure(figsize=(15,15)) # in inches \n " ,
" for i, label in enumerate(labels): \n " ,
" x, y = embeddings[i,:] \n " ,
" pylab.scatter(x, y) \n " ,
" pylab.annotate(label, xy=(x, y), xytext=(5, 2), textcoords= ' offset points ' , \n " ,
" ha= ' right ' , va= ' bottom ' ) \n " ,
" pylab.show() \n " ,
" \n " ,
" words = [reverse_dictionary[i] for i in xrange(1, num_points+1)] \n " ,
" plot(two_d_embeddings, words) "
]
}
] ,
" metadata " : {
" colabVersion " : " 0.3.2 " ,
" colab_default_view " : { } ,
" colab_views " : { } ,
" kernelspec " : {
2017-02-06 01:13:24 +08:00
" display_name " : " Python 2 " ,
2015-12-27 20:25:04 +08:00
" language " : " python " ,
2017-02-06 01:13:24 +08:00
" name " : " python2 "
2015-12-27 20:25:04 +08:00
} ,
" language_info " : {
" codemirror_mode " : {
" name " : " ipython " ,
2017-02-06 01:13:24 +08:00
" version " : 2
2015-12-27 20:25:04 +08:00
} ,
" file_extension " : " .py " ,
" mimetype " : " text/x-python " ,
" name " : " python " ,
" nbconvert_exporter " : " python " ,
2017-02-06 01:13:24 +08:00
" pygments_lexer " : " ipython2 " ,
" version " : " 2.7.12 "
2015-12-27 20:25:04 +08:00
}
} ,
" nbformat " : 4 ,
" nbformat_minor " : 0
}