Word Embedding Demo

UMass CS 490A, 2020-10-06 and 2020-10-08

Using the is the smallest download of GLOVE pre-trained word embeddings from https://nlp.stanford.edu/projects/glove/

In [54]:
import numpy as np
In [3]:
lines = open("/d/lexical/glove/glove.6B.50d.txt").readlines()
In [2]:
len(lines)
Out[2]:
400000
In [3]:
lines[0]
Out[3]:
'the 0.418 0.24968 -0.41242 0.1217 0.34527 -0.044457 -0.49688 -0.17862 -0.00066023 -0.6566 0.27843 -0.14767 -0.55677 0.14658 -0.0095095 0.011658 0.10204 -0.12792 -0.8443 -0.12181 -0.016801 -0.33279 -0.1552 -0.23131 -0.19181 -1.8823 -0.76746 0.099051 -0.42125 -0.19526 4.0071 -0.18594 -0.52287 -0.31681 0.00059213 0.0074449 0.17778 -0.15897 0.012041 -0.054223 -0.29871 -0.15749 -0.34758 -0.045637 -0.44251 0.18785 0.0027849 -0.18411 -0.11514 -0.78581\n'
In [4]:
vocab = np.array([line.split()[0] for line in lines])
In [5]:
vocab
Out[5]:
array(['the', ',', '.', ..., 'rolonda', 'zsombor', 'sandberger'],
      dtype='<U68')
In [6]:
wordvecs = [ np.array( [float(x) for x in line.split()[1:]] )  for line in lines]
In [7]:
wordvecs = np.array(wordvecs)
In [18]:
wordvecs.shape
Out[18]:
(400000, 50)
In [24]:
np.where((vocab=='dog'))[0][0]
Out[24]:
2926
In [27]:
plane = wordvecs[np.where((vocab=='plane'))[0][0], :]
bird = wordvecs[np.where((vocab=='bird'))[0][0], :]
In [32]:
np.linalg.norm
Out[32]:
<function numpy.linalg.norm(x, ord=None, axis=None, keepdims=False)>
In [33]:
np.dot(bird, plane) / np.linalg.norm(plane) / np.linalg.norm(bird)
Out[33]:
0.361571250512347
In [11]:
def cossim(x,y):
    return np.dot(x,y) / np.linalg.norm(x) / np.linalg.norm(y)
In [35]:
cossim(wordvecs[np.where((vocab=='eagle'))[0][0], :],
       wordvecs[np.where((vocab=='bird'))[0][0], :])
Out[35]:
0.48209728826817394
In [36]:
cossim(wordvecs[np.where((vocab=='sky'))[0][0], :],
       wordvecs[np.where((vocab=='bird'))[0][0], :])
Out[36]:
0.3717682086716736
In [37]:
cossim(wordvecs[np.where((vocab=='dream'))[0][0], :],
       wordvecs[np.where((vocab=='bird'))[0][0], :])
Out[37]:
0.2771980452737902
In [38]:
cossim(wordvecs[np.where((vocab=='water'))[0][0], :],
       wordvecs[np.where((vocab=='sea'))[0][0], :])
Out[38]:
0.7154357446726445
In [39]:
cossim(wordvecs[np.where((vocab=='water'))[0][0], :],
       wordvecs[np.where((vocab=='h2o'))[0][0], :])
Out[39]:
0.25256160059745203
In [40]:
cossim(wordvecs[np.where((vocab=='soccer'))[0][0], :],
       wordvecs[np.where((vocab=='football'))[0][0], :])
Out[40]:
0.8964510705423745
In [41]:
cossim(wordvecs[np.where((vocab=='soccer'))[0][0], :],
       wordvecs[np.where((vocab=='baseball'))[0][0], :])
Out[41]:
0.71647383733098
In [43]:
target = wordvecs[np.where((vocab=='football'))[0][0], :]
In [44]:
target = wordvecs[np.where((vocab=='football'))[0][0], :]
scores = [cossim(target,  row) for row in wordvecs]
In [45]:
scores=np.array(scores)
In [49]:
highest_word_nums = np.argsort(-scores)[:5]
scores[highest_word_nums]
Out[49]:
array([1.        , 0.89645107, 0.88899709, 0.87867881, 0.86123517])
In [50]:
vocab[highest_word_nums]
Out[50]:
array(['football', 'soccer', 'league', 'basketball', 'club'], dtype='<U68')
In [51]:
highest_word_nums = np.argsort(-scores)[:30]
for i in highest_word_nums:
    print(i, scores[i], vocab[i])
535 0.9999999999999999 football
1733 0.8964510705423745 soccer
292 0.8889970924854076 league
1788 0.8786788106585918 basketball
449 0.8612351665848952 club
2354 0.8606578949666054 hockey
2707 0.8488245275584447 rugby
145 0.8386602505678181 team
1444 0.7990507471765447 baseball
4492 0.7885136944626482 coaching
648 0.786394624937422 player
776 0.7814492556485999 teams
6797 0.7754027369268953 coached
334 0.7749550183020617 played
612 0.7749485683962433 coach
3657 0.7649336093396811 athletic
2652 0.7640478921926308 nfl
6154 0.7633808470064196 leagues
479 0.7616741988209906 players
879 0.7608774693533801 championship
195 0.7551038160754967 season
697 0.7549335728029329 playing
2269 0.7432138584791385 junior
1162 0.7404312170094726 professional
2463 0.7381250019235271 clubs
13717 0.7352879839438771 varsity
2809 0.7326056482020311 nba
1664 0.7293409084866613 champions
2327 0.7281805372909017 squad
290 0.7261293935856018 games
In [53]:
target = wordvecs[np.where((vocab=='professor'))[0][0], :]
scores = np.array([cossim(target,  row) for row in wordvecs])
highest_word_nums = np.argsort(-scores)[:30]
for i in highest_word_nums:
    print(i, scores[i], vocab[i])
1282 1.0 professor
3332 0.8490980128362392 harvard
221 0.8417033708024436 university
3891 0.8324161464689673 associate
12589 0.8307599515252821 sociology
13274 0.8251417435031112 emeritus
10353 0.8136281050222984 psychologist
6601 0.8120549079540911 yale
10668 0.8077565302607912 lecturer
3674 0.8068307513881633 economics
1344 0.8046660038039287 dr.
6239 0.8041027140571557 researcher
4756 0.8030246960751849 scientist
1064 0.8003226432234166 institute
2599 0.7992411291418161 studied
18840 0.7968824406706098 sociologist
15872 0.7962586890180728 prof.
9917 0.7934651362033632 ph.d.
6104 0.7924675309350806 scholar
6878 0.7914219244138478 psychology
3482 0.783014433395256 graduate
1121 0.7772168877104714 science
3495 0.7741190204332098 graduated
14795 0.7724618251652826 anthropology
10034 0.767776812023105 cornell
8296 0.7672549324674627 teaches
3323 0.7588455737351705 taught
7030 0.7586951330951585 berkeley
7867 0.7524475130689451 princeton
7976 0.7520392251222152 hopkins

Clustering demo (2020-10-08)

In [8]:
from sklearn.cluster import KMeans
In [13]:
km = KMeans(30)

xx = wordvecs[:50000,:]
for i in range(xx.shape[0]):
    xx[i,:] = xx[i,:] / np.linalg.norm(xx[i,:])

km.fit(xx)
Out[13]:
KMeans(n_clusters=30)
In [14]:
for k,centroid in enumerate(km.cluster_centers_):
    print("CLUSTER",k)
    scores = np.array([cossim(centroid,  row) for row in xx])
    highest_word_nums = np.argsort(-scores)[:20]
    print(", ".join(["{:} ({:.3f})".format(vocab[i], scores[i])   for i in highest_word_nums ]))
CLUSTER 0
3.85 (0.933), 1.76 (0.929), 1.87 (0.924), 1.36 (0.923), 1.74 (0.923), 2.02 (0.923), 1.86 (0.922), 1.79 (0.922), 1.92 (0.921), 1.24 (0.920), 1.33 (0.919), 1.27 (0.919), 1.73 (0.918), 1.67 (0.918), 1.46 (0.918), 1.47 (0.917), 2.05 (0.917), 1.26 (0.916), 1.89 (0.916), 1.62 (0.915)
CLUSTER 1
belief (0.841), moral (0.833), embodied (0.822), morality (0.813), relevance (0.812), ideals (0.810), beliefs (0.808), sense (0.804), individuality (0.801), profound (0.796), wisdom (0.791), notions (0.789), genuine (0.788), necessity (0.786), spirituality (0.785), conceptions (0.779), desires (0.779), patriotism (0.776), manifestation (0.775), ignorance (0.772)
CLUSTER 2
well (0.936), making (0.934), but (0.931), this (0.926), to (0.926), only (0.925), for (0.924), though (0.922), same (0.922), still (0.921), even (0.921), as (0.919), . (0.918), because (0.917), that (0.916), now (0.914), both (0.914), instead (0.913), it (0.907), be (0.907)
CLUSTER 3
maggie (0.857), ex-wife (0.819), lillian (0.814), hannah (0.791), fonda (0.791), co-star (0.786), carrie (0.781), dorothy (0.779), lucy (0.775), scarlett (0.773), bridget (0.768), annie (0.766), alice (0.766), molly (0.765), bette (0.765), kate (0.765), aunt (0.760), niece (0.753), bessie (0.752), fiancée (0.751)
CLUSTER 4
wehr (0.824), moo (0.809), hoo (0.805), bakri (0.804), yasin (0.797), ah (0.795), mook (0.791), sahm (0.787), moh (0.784), nah (0.779), obeidi (0.778), ih (0.777), nee (0.775), behr (0.771), abdallah (0.769), beel (0.768), sy (0.768), adel (0.767), mee (0.763), bahr (0.758)
CLUSTER 5
downright (0.874), ironic (0.868), undeniably (0.864), simplistic (0.859), comical (0.855), contrived (0.843), admittedly (0.842), banal (0.837), formulaic (0.832), delightfully (0.832), amusing (0.832), incongruous (0.831), deliciously (0.829), endearing (0.828), oddly (0.828), pretentious (0.828), comically (0.822), glib (0.818), wonderfully (0.817), idiosyncratic (0.817)
CLUSTER 6
semifinal (0.861), semifinals (0.858), quarterfinals (0.844), finals (0.829), champion (0.822), clinched (0.812), quarterfinal (0.810), tournament (0.803), qualifying (0.797), slam (0.785), federer (0.785), titlist (0.782), compatriot (0.779), match (0.776), nadal (0.774), round (0.773), champions (0.773), wimbledon (0.772), agassi (0.771), hingis (0.771)
CLUSTER 7
parameters (0.872), linear (0.856), discrete (0.829), analogous (0.825), configurations (0.817), measurement (0.814), computation (0.810), functional (0.806), optimized (0.802), static (0.798), algorithms (0.796), modes (0.793), function (0.791), synchronization (0.791), interfaces (0.786), interface (0.786), binary (0.784), template (0.784), nonlinear (0.782), components (0.780)
CLUSTER 8
aircraft (0.844), helicopter (0.842), jet (0.833), flying (0.829), unmanned (0.804), planes (0.802), boats (0.801), ship (0.801), submarine (0.798), airplane (0.798), landing (0.798), ships (0.796), aboard (0.791), boat (0.790), airplanes (0.788), helicopters (0.787), towed (0.787), submarines (0.782), towing (0.778), vessel (0.768)
CLUSTER 9
1816 (0.962), 1795 (0.954), 1818 (0.953), 1817 (0.953), 1823 (0.953), 1792 (0.952), 1802 (0.949), 1826 (0.948), 1822 (0.947), 1784 (0.947), 1821 (0.947), 1786 (0.946), 1764 (0.945), 1768 (0.945), 1808 (0.945), 1799 (0.944), 1827 (0.943), 1790 (0.943), 1797 (0.942), 1811 (0.942)
CLUSTER 10
balochistan (0.827), peloponnese (0.807), baluchistan (0.797), northeastern (0.786), cotabato (0.782), province (0.782), andalusia (0.771), manipur (0.768), qinghai (0.764), huambo (0.763), mindanao (0.761), gansu (0.756), guangxi (0.756), visayas (0.756), guizhou (0.754), navarra (0.752), southwestern (0.748), northwestern (0.747), isabela (0.747), medan (0.747)
CLUSTER 11
seahawks (0.865), mavs (0.856), bruins (0.854), sixers (0.853), celtics (0.852), favre (0.850), finley (0.848), payton (0.847), iverson (0.841), mets (0.841), astros (0.838), mavericks (0.838), knicks (0.837), garnett (0.835), bibby (0.834), oilers (0.826), bengals (0.826), gooden (0.820), sonics (0.820), shanahan (0.820)
CLUSTER 12
djp (0.796), str (0.751), ^ (0.743), cn (0.741), est (0.734), eb (0.732), rr (0.726), dk (0.723), lk (0.717), w (0.710), cq (0.704), sf (0.703), 00 (0.703), -0 (0.703), pvs (0.700), --- (0.699), ------ (0.699), twx (0.698), -1 (0.697), / (0.695)
CLUSTER 13
chuan (0.843), tung (0.802), ying (0.799), thapa (0.795), bahadur (0.792), rong (0.787), guan (0.777), lal (0.775), lim (0.774), tong (0.771), ping (0.769), ching (0.767), hsiung (0.766), chun (0.765), jian (0.761), chiu (0.760), yong (0.760), kun (0.758), peng (0.756), tien (0.755)
CLUSTER 14
infection (0.862), disease (0.837), diabetes (0.836), diseases (0.833), cancers (0.832), asthma (0.825), respiratory (0.824), infections (0.822), treating (0.820), patients (0.809), symptoms (0.805), hepatitis (0.801), disorders (0.799), illnesses (0.796), liver (0.795), treat (0.794), fetal (0.793), viral (0.788), treatments (0.788), brain (0.785)
CLUSTER 15
adjacent (0.834), adjoining (0.834), walled (0.833), hillside (0.829), courtyard (0.808), buildings (0.799), houses (0.795), walls (0.787), overlooking (0.785), surrounded (0.780), dotted (0.779), nestled (0.778), wooded (0.775), terraces (0.771), ruins (0.769), brick (0.767), enclosed (0.767), sprawling (0.766), constructed (0.753), picturesque (0.743)
CLUSTER 16
savings (0.864), payments (0.863), costs (0.854), funds (0.832), expenses (0.830), taxes (0.829), fees (0.828), income (0.827), incentive (0.825), loans (0.825), benefits (0.822), purchases (0.816), paying (0.815), expense (0.815), borrowing (0.811), tax (0.810), salaries (0.810), cash (0.807), revenues (0.807), payroll (0.802)
CLUSTER 17
sleeves (0.831), dangling (0.823), oversized (0.819), fingers (0.807), tucked (0.801), underneath (0.798), nose (0.795), waist (0.794), slung (0.792), strung (0.791), oversize (0.785), forehead (0.781), sewn (0.780), pierced (0.780), shiny (0.780), padded (0.777), rope (0.775), bare (0.774), shoulders (0.772), protruding (0.772)
CLUSTER 18
345 (0.950), 390 (0.950), 380 (0.945), 385 (0.942), 330 (0.939), 260 (0.939), 315 (0.937), 265 (0.937), 280 (0.936), 285 (0.936), 248 (0.936), 295 (0.936), 340 (0.934), 520 (0.933), 540 (0.932), 312 (0.930), 290 (0.930), 176 (0.929), 305 (0.929), 445 (0.929)
CLUSTER 19
subsidiary (0.844), corp. (0.838), inc. (0.817), mci (0.811), honeywell (0.805), conglomerate (0.794), lucent (0.785), co. (0.784), ltd. (0.781), telecom (0.780), nortel (0.766), shares (0.763), holdings (0.760), sbc (0.756), llc (0.755), mobil (0.753), itt (0.753), pepsico (0.753), company (0.748), corp (0.743)
CLUSTER 20
reconsider (0.849), arguing (0.846), accept (0.834), rejecting (0.829), insisting (0.822), approve (0.821), consider (0.821), rejected (0.820), agreeing (0.816), decision (0.816), demanded (0.811), proposals (0.811), demands (0.806), proposal (0.804), consideration (0.804), approval (0.804), pending (0.801), rules (0.799), request (0.799), considering (0.798)
CLUSTER 21
schmid (0.795), werner (0.775), jens (0.769), herzog (0.768), christoph (0.766), klaus (0.748), rudolf (0.739), fischer (0.737), gruber (0.731), anders (0.731), josef (0.726), gunnar (0.724), hans (0.724), bernhard (0.723), andreas (0.722), gerd (0.720), joachim (0.720), baumann (0.717), kraus (0.716), mueller (0.716)
CLUSTER 22
bates (0.874), boyd (0.871), lyons (0.848), gould (0.843), moran (0.842), randall (0.841), gardner (0.840), mcintyre (0.837), garrett (0.836), jarvis (0.830), duffy (0.829), griffin (0.827), freeman (0.824), cunningham (0.824), dunn (0.823), jenkins (0.823), cowan (0.821), kelley (0.818), collins (0.817), barrett (0.815)
CLUSTER 23
musical (0.870), music (0.856), acclaimed (0.831), classics (0.827), anthology (0.826), genre (0.825), theatrical (0.825), songs (0.823), comic (0.822), featured (0.821), soundtrack (0.817), films (0.811), tunes (0.809), parody (0.803), soundtracks (0.794), titled (0.790), poetry (0.789), comedy (0.781), feature (0.780), dance (0.779)
CLUSTER 24
caballero (0.851), pereira (0.830), luis (0.821), osorio (0.817), joao (0.814), vicente (0.812), roberto (0.812), andrade (0.810), oliveira (0.804), humberto (0.802), carvalho (0.800), rocha (0.797), pinto (0.796), mendoza (0.795), julio (0.795), santos (0.795), henrique (0.794), carlos (0.794), souza (0.794), alves (0.793)
CLUSTER 25
persistent (0.833), exacerbated (0.831), worsening (0.830), turmoil (0.824), instability (0.813), escalating (0.810), inevitably (0.810), downturn (0.808), lingering (0.799), tumult (0.795), prolonged (0.791), spiraling (0.791), fears (0.789), jitters (0.789), upheaval (0.788), inevitable (0.788), uncertainty (0.785), repercussions (0.784), troubles (0.783), intensifying (0.782)
CLUSTER 26
instinctively (0.843), annoyed (0.828), amused (0.825), understandably (0.819), intimidated (0.819), blithely (0.817), disgusted (0.810), astonished (0.802), cheerfully (0.799), embarrassed (0.796), ashamed (0.795), aghast (0.793), loathe (0.792), reminding (0.791), comforted (0.789), disheartened (0.788), impatient (0.787), pretended (0.783), bewildered (0.783), frightened (0.775)
CLUSTER 27
roasted (0.866), beans (0.862), canned (0.850), tomato (0.849), fruit (0.840), potatoes (0.837), dried (0.837), baked (0.828), honey (0.828), vegetables (0.828), syrup (0.827), butter (0.823), juice (0.822), cooked (0.820), soup (0.812), berries (0.811), fermented (0.811), sauce (0.810), edible (0.806), pumpkin (0.805)
CLUSTER 28
militants (0.865), extremists (0.858), guerrillas (0.852), armed (0.834), militias (0.828), rebels (0.824), militant (0.818), civilians (0.816), insurgents (0.814), militiamen (0.810), separatists (0.804), suspected (0.804), fleeing (0.791), sympathizers (0.790), fundamentalists (0.787), muslim (0.787), islamists (0.785), dissidents (0.781), soldiers (0.780), kurdish (0.777)
CLUSTER 29
camden (0.864), bedford (0.853), milford (0.852), fairfield (0.844), worcester (0.834), newtown (0.826), bangor (0.824), malvern (0.819), roanoke (0.818), dorchester (0.811), burlington (0.808), cumberland (0.807), rochester (0.805), bridgeport (0.801), litchfield (0.798), middletown (0.798), stratford (0.797), exeter (0.795), richmond (0.792), monmouth (0.789)
In [15]:
km = KMeans(100)

xx = wordvecs[:50000,:]
for i in range(xx.shape[0]):
    xx[i,:] = xx[i,:] / np.linalg.norm(xx[i,:])

km.fit(xx)
Out[15]:
KMeans(n_clusters=100)
In [16]:
for k,centroid in enumerate(km.cluster_centers_):
    print("CLUSTER",k)
    scores = np.array([cossim(centroid,  row) for row in xx])
    highest_word_nums = np.argsort(-scores)[:20]
    print(", ".join(["{:} ({:.3f})".format(vocab[i], scores[i])   for i in highest_word_nums ]))
CLUSTER 0
improve (0.878), enhance (0.863), improving (0.862), aim (0.859), establishing (0.858), ensuring (0.858), efforts (0.853), initiatives (0.852), facilitate (0.849), ensure (0.849), aims (0.849), focus (0.848), strengthen (0.846), strengthening (0.844), maintaining (0.841), promote (0.838), development (0.836), establish (0.836), creating (0.827), focusing (0.826)
CLUSTER 1
schumacher (0.820), coulthard (0.819), slalom (0.815), hakkinen (0.815), cuche (0.799), raikkonen (0.797), devers (0.790), 400-meter (0.790), barrichello (0.788), biffle (0.787), 100-meter (0.787), motogp (0.784), vonn (0.781), villeneuve (0.779), mclaren (0.778), vasser (0.776), eberharter (0.774), zanardi (0.773), raich (0.771), ullrich (0.768)
CLUSTER 2
habsburg (0.840), throne (0.836), vassal (0.822), emperor (0.819), emperors (0.799), charlemagne (0.788), frankish (0.786), augustus (0.778), sigismund (0.777), vassals (0.776), constantine (0.772), ruler (0.763), viii (0.762), fief (0.761), mughal (0.759), reign (0.759), prussia (0.755), king (0.753), son-in-law (0.750), father-in-law (0.747)
CLUSTER 3
snakes (0.854), birds (0.813), insects (0.808), frog (0.806), crocodiles (0.805), turtles (0.804), lizards (0.804), mammals (0.799), squirrel (0.798), shark (0.795), rabbits (0.795), rodent (0.794), domesticated (0.792), beetles (0.790), ants (0.787), squirrels (0.785), goats (0.784), boar (0.784), butterflies (0.783), crabs (0.781)
CLUSTER 4
la (0.797), casa (0.789), grande (0.781), plata (0.773), del (0.772), tierra (0.763), norte (0.759), oro (0.757), luz (0.755), hora (0.752), una (0.745), mexicana (0.744), marche (0.738), fuego (0.735), de (0.734), centro (0.726), dans (0.714), brasil (0.713), vida (0.712), españa (0.711)
CLUSTER 5
came (0.927), on (0.925), first (0.924), while (0.919), in (0.917), time (0.916), took (0.915), only (0.914), . (0.914), one (0.909), over (0.904), for (0.903), last (0.903), three (0.903), before (0.900), next (0.899), five (0.898), from (0.896), six (0.896), since (0.895)
CLUSTER 6
championship (0.878), championships (0.861), tournaments (0.853), tournament (0.847), volleyball (0.826), competitions (0.817), finals (0.814), tourney (0.810), athletics (0.788), badminton (0.784), ncaa (0.782), qualifying (0.772), champions (0.769), aau (0.766), qualification (0.765), euroleague (0.763), qualifiers (0.760), asiad (0.760), softball (0.758), competed (0.754)
CLUSTER 7
luis (0.876), mendoza (0.870), flores (0.865), osorio (0.861), alvarez (0.860), enrique (0.856), humberto (0.854), gerardo (0.852), carlos (0.850), caballero (0.848), castillo (0.846), manuel (0.839), armando (0.835), moreno (0.835), jaime (0.835), julio (0.835), mendez (0.834), jorge (0.833), machado (0.833), pedro (0.831)
CLUSTER 8
spine (0.861), pelvis (0.852), bruise (0.844), forearm (0.837), throat (0.833), muscles (0.832), abdomen (0.827), scar (0.822), forehead (0.821), neck (0.816), blisters (0.815), tendons (0.814), abdominal (0.814), jaw (0.812), nose (0.811), chest (0.811), pelvic (0.809), spinal (0.808), cord (0.806), teeth (0.805)
CLUSTER 9
paintings (0.916), sculptures (0.901), painting (0.875), sculpture (0.865), masterpieces (0.858), drawings (0.837), decorative (0.835), reproductions (0.835), collection (0.825), artwork (0.822), portraits (0.820), murals (0.819), artworks (0.816), architectural (0.800), portrait (0.796), mural (0.790), collections (0.787), sculptural (0.786), art (0.784), artifacts (0.783)
CLUSTER 10
berman (0.842), shapiro (0.821), schwartz (0.820), greenberg (0.816), jaffe (0.814), siegel (0.814), abramson (0.810), schulman (0.806), kaplan (0.802), cramer (0.801), falk (0.800), feldman (0.794), berkowitz (0.788), shaffer (0.788), rosen (0.781), mandel (0.779), fishman (0.778), meyers (0.775), barnett (0.775), eisenberg (0.772)
CLUSTER 11
inherently (0.833), sufficiently (0.825), incapable (0.820), problematic (0.814), constrained (0.806), morally (0.798), insufficiently (0.797), technically (0.793), preferable (0.793), ethically (0.792), acutely (0.791), indistinguishable (0.791), fundamentally (0.780), irrelevant (0.774), impractical (0.772), admittedly (0.770), woefully (0.766), poorly (0.764), reasonably (0.763), socially (0.763)
CLUSTER 12
cronin (0.839), hodges (0.820), plummer (0.816), garrett (0.814), dickerson (0.814), finley (0.801), orr (0.795), mcdaniel (0.794), maddox (0.794), corey (0.791), callahan (0.787), coffey (0.785), chandler (0.785), zimmerman (0.784), nolan (0.782), humphries (0.780), boyd (0.779), westbrook (0.778), melvin (0.778), mullins (0.778)
CLUSTER 13
1926 (0.973), 1922 (0.973), 1935 (0.972), 1929 (0.972), 1934 (0.971), 1927 (0.969), 1925 (0.969), 1951 (0.968), 1921 (0.967), 1938 (0.966), 1950 (0.963), 1930 (0.962), 1937 (0.962), 1933 (0.962), 1958 (0.961), 1920 (0.961), 1931 (0.961), 1913 (0.960), 1953 (0.960), 1923 (0.959)
CLUSTER 14
tubes (0.840), cylinders (0.833), wheels (0.830), hose (0.825), pipe (0.819), fitted (0.804), wires (0.798), folding (0.791), pipes (0.786), tube (0.785), screws (0.783), screw (0.783), removable (0.775), wheel (0.775), tubing (0.767), casing (0.767), plastic (0.767), valves (0.766), cylinder (0.766), mesh (0.764)
CLUSTER 15
blatant (0.841), unwarranted (0.823), shameful (0.806), immoral (0.804), disgraceful (0.797), immorality (0.795), justifying (0.792), egregious (0.792), callous (0.792), odious (0.790), stupidity (0.789), unjust (0.788), deliberate (0.788), resorting (0.786), outrageous (0.784), insidious (0.783), unjustified (0.783), cowardice (0.782), irresponsible (0.774), reprehensible (0.774)
CLUSTER 16
bujumbura (0.865), bouake (0.861), jaffna (0.859), aceh (0.845), baluchistan (0.829), khartoum (0.822), kinshasa (0.820), mogadishu (0.816), krajina (0.816), kisangani (0.813), huambo (0.811), burundi (0.810), zaire (0.809), herat (0.809), sudanese (0.806), mindanao (0.805), zairean (0.799), goma (0.798), restive (0.795), banja (0.791)
CLUSTER 17
annoyed (0.892), understandably (0.883), amused (0.870), miffed (0.868), disheartened (0.863), mystified (0.861), dismayed (0.860), displeased (0.854), astonished (0.844), perplexed (0.843), embarrassed (0.841), aback (0.840), aghast (0.839), disgusted (0.839), apprehensive (0.832), puzzled (0.829), leery (0.819), elated (0.815), offended (0.810), irritated (0.810)
CLUSTER 18
yu (0.900), ching (0.883), guan (0.874), jian (0.871), chong (0.869), yong (0.867), shu (0.867), ying (0.865), yun (0.864), hua (0.860), yi (0.860), tien (0.858), chung (0.856), lai (0.853), kang (0.853), chou (0.852), ping (0.851), rong (0.850), ting (0.849), cheng (0.847)
CLUSTER 19
diabetes (0.887), infection (0.870), asthma (0.868), respiratory (0.848), cancers (0.841), symptoms (0.841), disease (0.839), infections (0.836), patients (0.833), treating (0.832), illnesses (0.830), diseases (0.825), alzheimer (0.824), disorders (0.824), hepatitis (0.822), complications (0.821), ailments (0.821), sufferers (0.817), illness (0.808), epilepsy (0.804)
CLUSTER 20
desktop (0.868), user (0.848), functionality (0.845), interface (0.838), server (0.832), software (0.830), built-in (0.828), messaging (0.827), handheld (0.818), ipod (0.817), compatible (0.815), interfaces (0.812), servers (0.810), users (0.810), hardware (0.809), digital (0.809), computers (0.806), audio (0.792), pcs (0.791), devices (0.788)
CLUSTER 21
particular (0.943), example (0.935), rather (0.925), certain (0.915), this (0.911), instance (0.911), similar (0.910), same (0.909), well (0.907), most (0.900), these (0.899), generally (0.898), although (0.896), fact (0.892), important (0.890), different (0.888), as (0.888), means (0.886), given (0.886), any (0.884)
CLUSTER 22
werner (0.818), kraus (0.790), ludwig (0.784), müller (0.783), gruber (0.781), fuchs (0.777), fritz (0.776), muller (0.775), gottlieb (0.770), johann (0.760), winkler (0.758), vogel (0.753), gunther (0.753), cleary (0.752), bernhard (0.752), jensen (0.748), heinrich (0.746), nils (0.744), florian (0.744), gunnar (0.742)
CLUSTER 23
2.07 (0.983), 2.04 (0.976), 2.05 (0.974), 1.97 (0.973), 1.71 (0.973), 2.02 (0.972), 2.11 (0.970), 1.81 (0.969), 2.06 (0.968), 2.09 (0.967), 1.92 (0.966), 2.03 (0.966), 1.89 (0.966), 1.87 (0.965), 1.74 (0.965), 1.79 (0.964), 2.12 (0.964), 1.91 (0.964), 1.59 (0.963), 2.01 (0.963)
CLUSTER 24
decline (0.882), expectations (0.877), weaker (0.874), slowing (0.874), declines (0.872), rise (0.871), gains (0.858), offset (0.855), declining (0.854), inflation (0.853), slowdown (0.852), prices (0.850), rising (0.843), steady (0.841), sluggish (0.837), boosted (0.830), falling (0.825), surge (0.823), growth (0.822), risen (0.820)
CLUSTER 25
roasted (0.897), tomato (0.895), sauce (0.894), butter (0.891), potatoes (0.882), beans (0.874), baked (0.873), canned (0.868), mashed (0.866), cooked (0.862), garlic (0.861), juice (0.855), soup (0.855), cheese (0.851), vinegar (0.849), creamy (0.845), pasta (0.845), broth (0.843), cream (0.842), custard (0.841)
CLUSTER 26
magnetic (0.889), electromagnetic (0.857), sensor (0.824), vibration (0.823), flux (0.820), velocity (0.819), gravity (0.814), vibrations (0.809), optical (0.800), amplitude (0.794), measurements (0.794), sensors (0.792), wavelength (0.788), x-ray (0.782), velocities (0.779), measurement (0.778), frequency (0.778), photon (0.777), plasma (0.776), electron (0.775)
CLUSTER 27
acknowledging (0.869), assertion (0.851), criticisms (0.844), suggestion (0.837), statements (0.832), reiterating (0.831), remarks (0.824), acknowledgment (0.824), assertions (0.823), suggestions (0.822), expressing (0.819), alluding (0.814), denials (0.810), rejecting (0.810), comments (0.804), criticism (0.798), apology (0.797), ignoring (0.795), asserting (0.790), regret (0.789)
CLUSTER 28
solana (0.813), kofi (0.800), albright (0.793), envoy (0.788), armitage (0.785), miliband (0.784), condoleezza (0.780), holbrooke (0.778), zoellick (0.771), annan (0.764), lavrov (0.764), ivanov (0.761), kozyrev (0.758), kouchner (0.757), ambassador (0.752), mccormack (0.739), akashi (0.738), bildt (0.733), ghali (0.730), briefed (0.726)
CLUSTER 29
lingering (0.855), exacerbated (0.840), instability (0.837), persists (0.835), anxieties (0.832), persistent (0.829), persisting (0.823), tumult (0.820), repercussions (0.816), mistrust (0.813), troubles (0.812), exacerbating (0.810), escalating (0.808), deepening (0.807), worsening (0.804), inevitably (0.803), turmoil (0.801), unease (0.800), malaise (0.797), persist (0.796)
CLUSTER 30
312 (0.980), 248 (0.979), 232 (0.973), 258 (0.973), 244 (0.972), 263 (0.970), 345 (0.970), 318 (0.969), 341 (0.969), 236 (0.967), 237 (0.967), 374 (0.966), 292 (0.966), 305 (0.965), 295 (0.964), 352 (0.964), 378 (0.964), 358 (0.964), 243 (0.964), 298 (0.964)
CLUSTER 31
honeywell (0.836), lucent (0.807), mci (0.806), subsidiary (0.796), corp. (0.794), pepsico (0.791), nortel (0.789), mobil (0.773), pharmacia (0.769), telecom (0.769), alcoa (0.766), inc. (0.765), unilever (0.756), vodafone (0.756), kpn (0.751), pfizer (0.750), amoco (0.749), alcatel (0.748), nestle (0.748), conglomerate (0.748)
CLUSTER 32
jacket (0.880), oversized (0.880), shiny (0.866), worn (0.864), embroidered (0.863), pants (0.862), satin (0.859), colored (0.858), trousers (0.856), sleeves (0.851), dyed (0.848), dresses (0.844), hats (0.843), scarf (0.843), socks (0.840), wears (0.839), oversize (0.838), pleated (0.832), blouse (0.832), leather (0.831)
CLUSTER 33
prosecution (0.897), charges (0.895), criminal (0.878), guilty (0.877), prosecutors (0.862), defendants (0.858), alleged (0.855), trial (0.828), acquitted (0.825), prosecuted (0.824), indictment (0.824), charged (0.820), convicted (0.819), fraud (0.818), arrest (0.814), felony (0.813), pleaded (0.812), conviction (0.810), wrongdoing (0.809), allegations (0.808)
CLUSTER 34
animated (0.847), comedy (0.844), mtv (0.842), theatrical (0.834), aired (0.828), tv (0.820), films (0.818), movies (0.816), premiere (0.814), hbo (0.812), airing (0.812), movie (0.811), live-action (0.810), television (0.809), productions (0.803), featured (0.802), episodes (0.801), specials (0.797), show (0.797), documentary (0.795)
CLUSTER 35
moore (0.929), collins (0.926), smith (0.925), allen (0.919), anderson (0.913), clark (0.908), cooper (0.906), griffin (0.906), bennett (0.905), campbell (0.904), thompson (0.903), freeman (0.901), harris (0.900), walker (0.899), russell (0.897), sullivan (0.897), gardner (0.897), robinson (0.896), parker (0.896), murphy (0.896)
CLUSTER 36
mets (0.880), seahawks (0.873), celtics (0.871), astros (0.868), mavs (0.866), sixers (0.862), bruins (0.861), payton (0.855), dodgers (0.850), mariners (0.850), mavericks (0.849), knicks (0.847), iverson (0.843), sox (0.842), sonics (0.842), oilers (0.841), yankees (0.841), orioles (0.840), finley (0.839), lakers (0.835)
CLUSTER 37
eto'o (0.857), midfielder (0.853), trezeguet (0.853), ronaldo (0.851), anelka (0.850), drogba (0.849), ballack (0.844), mancini (0.843), lampard (0.842), ronaldinho (0.842), messi (0.842), striker (0.842), rivaldo (0.837), ibrahimovic (0.833), bergkamp (0.828), ribery (0.823), totti (0.819), kaka (0.819), zidane (0.817), veron (0.816)
CLUSTER 38
corps (0.854), battalion (0.842), battalions (0.840), squadrons (0.834), marines (0.830), deployed (0.824), artillery (0.822), detachment (0.821), expeditionary (0.816), infantry (0.816), army (0.815), stationed (0.814), cavalry (0.811), squadron (0.810), command (0.810), reconnaissance (0.806), brigade (0.804), detachments (0.799), platoon (0.799), commando (0.798)
CLUSTER 39
chandigarh (0.843), fuzhou (0.819), izmir (0.814), prefecture (0.805), changsha (0.802), hefei (0.792), wenzhou (0.791), hebei (0.790), ningbo (0.784), lucknow (0.784), adana (0.783), kagoshima (0.781), mashhad (0.779), patna (0.778), hubei (0.776), nanchang (0.776), zhejiang (0.775), peloponnese (0.774), heilongjiang (0.773), hangzhou (0.771)
CLUSTER 40
restaurants (0.817), diners (0.805), dining (0.798), catering (0.795), eateries (0.792), meals (0.791), cafes (0.782), catered (0.770), establishments (0.765), shops (0.749), shopping (0.745), picnic (0.744), patrons (0.744), fancy (0.741), breakfasts (0.738), cater (0.736), lunches (0.734), menus (0.734), dinners (0.730), gourmet (0.729)
CLUSTER 41
ball (0.823), throws (0.818), scoring (0.816), tying (0.815), volley (0.809), missed (0.805), shots (0.801), bounced (0.793), straight (0.791), kick (0.790), lob (0.786), forehand (0.784), backhand (0.776), crossbar (0.767), smacked (0.767), score (0.766), chipped (0.765), footer (0.764), 3-pointers (0.763), minute (0.759)
CLUSTER 42
nemtsov (0.826), orban (0.806), tanaka (0.801), tariceanu (0.790), zeman (0.785), popescu (0.775), aleksander (0.754), buzek (0.751), topolanek (0.749), meciar (0.749), yilmaz (0.749), kudrin (0.746), zivkovic (0.742), gyurcsany (0.740), leszek (0.740), oleksy (0.735), parvanov (0.730), persson (0.729), yukio (0.729), jaroslaw (0.728)
CLUSTER 43
beast (0.818), monsters (0.811), unicorn (0.806), vampire (0.777), dragon (0.777), ape (0.771), tarzan (0.770), undead (0.770), monster (0.759), ninja (0.757), wizard (0.755), scorpion (0.743), werewolf (0.740), spider (0.739), slayer (0.737), monkey (0.735), superhero (0.732), demon (0.726), zombie (0.725), lassie (0.703)
CLUSTER 44
frustrate (0.814), dissuade (0.811), distract (0.810), rethink (0.800), resist (0.784), confine (0.782), restrain (0.779), overwhelm (0.778), redouble (0.777), legitimize (0.770), embarrass (0.769), isolate (0.768), hinder (0.766), rationalize (0.764), lest (0.761), prod (0.759), hesitate (0.759), embolden (0.757), deprive (0.755), convince (0.754)
CLUSTER 45
17.1 (0.989), 15.6 (0.988), 19.3 (0.987), 14.3 (0.986), 17.7 (0.986), 19.4 (0.986), 16.3 (0.985), 16.9 (0.984), 18.7 (0.984), 17.9 (0.984), 15.9 (0.983), 14.7 (0.982), 18.8 (0.982), 14.9 (0.982), 17.3 (0.982), 16.6 (0.982), 16.2 (0.982), 16.1 (0.982), 18.3 (0.982), 15.8 (0.982)
CLUSTER 46
christ (0.869), congregation (0.838), holy (0.831), worship (0.830), liturgy (0.808), church (0.802), rites (0.794), catholic (0.792), orthodox (0.792), sacred (0.790), anglican (0.774), latter-day (0.772), disciples (0.766), prayer (0.766), lutheran (0.765), episcopal (0.764), holiness (0.762), teachings (0.761), lds (0.760), believers (0.758)
CLUSTER 47
caused (0.869), devastating (0.859), triggered (0.857), causing (0.845), triggering (0.843), flooding (0.836), floods (0.830), severe (0.811), fires (0.807), sudden (0.800), storms (0.799), disturbances (0.797), landslides (0.791), havoc (0.789), torrential (0.789), deadly (0.788), sparking (0.786), panic (0.785), rains (0.779), aftermath (0.778)
CLUSTER 48
sachs (0.854), jpmorgan (0.853), merrill (0.842), nikko (0.841), stearns (0.836), citigroup (0.832), j.p. (0.828), lazard (0.828), nomura (0.828), brokerage (0.826), prudential (0.826), witter (0.826), ubs (0.819), securities (0.813), dresdner (0.808), barclays (0.807), warburg (0.803), suisse (0.800), salomon (0.797), daiwa (0.796)
CLUSTER 49
mohammed (0.843), ahmad (0.842), ahmed (0.834), abdallah (0.830), osman (0.830), hassan (0.825), rashid (0.824), ibrahim (0.822), nasir (0.821), yusuf (0.820), abdul (0.817), saleh (0.817), ali (0.816), othman (0.815), majid (0.814), muhammed (0.811), aziz (0.811), haji (0.810), mohammad (0.809), tahir (0.802)
CLUSTER 50
filipinos (0.822), expatriates (0.819), haitians (0.806), foreigners (0.798), mexicans (0.795), afghans (0.794), bangladeshis (0.790), somalis (0.790), indonesians (0.785), pakistanis (0.784), nigerians (0.780), immigrants (0.779), villagers (0.775), migrants (0.773), peasants (0.771), africans (0.769), colombians (0.766), guatemalans (0.763), asians (0.762), thais (0.762)
CLUSTER 51
courtyard (0.892), brick (0.868), walled (0.841), tiled (0.839), walls (0.835), floors (0.827), roof (0.827), facade (0.825), enclosed (0.814), spacious (0.802), buildings (0.797), porch (0.796), marble (0.791), wooden (0.791), ramshackle (0.790), facades (0.788), roofed (0.784), courtyards (0.784), houses (0.783), three-story (0.773)
CLUSTER 52
riverside (0.825), scottsdale (0.802), glendale (0.797), oceanside (0.791), pasadena (0.790), biltmore (0.771), chatsworth (0.770), palisades (0.764), riverfront (0.760), deerfield (0.743), inglewood (0.742), calabasas (0.738), fairmont (0.735), malibu (0.734), lakewood (0.734), oaks (0.731), tempe (0.729), fremont (0.721), aspen (0.719), savannah (0.719)
CLUSTER 53
graduate (0.903), undergraduate (0.888), faculty (0.873), teaching (0.870), postgraduate (0.844), harvard (0.838), academic (0.825), studying (0.821), doctoral (0.821), psychology (0.821), adjunct (0.817), yale (0.814), ph.d. (0.813), doctorate (0.811), post-graduate (0.808), studies (0.807), university (0.807), humanities (0.805), taught (0.804), phd (0.803)
CLUSTER 54
biographical (0.865), articles (0.846), unpublished (0.835), translations (0.832), printed (0.828), essays (0.823), editions (0.819), publish (0.812), excerpts (0.811), texts (0.809), text (0.808), translated (0.801), preface (0.800), commentaries (0.797), references (0.792), biographies (0.786), books (0.786), annotated (0.783), quotations (0.783), pages (0.778)
CLUSTER 55
2.5 (0.882), 1.25 (0.880), 3.5 (0.876), 1.75 (0.876), 1.8 (0.870), 1.5 (0.870), 3.6 (0.867), 1.4 (0.866), 4.5 (0.865), 2.8 (0.862), 2.2 (0.862), 7.5 (0.862), 1.1 (0.861), 1.7 (0.861), 1.2 (0.858), eur (0.856), 3.3 (0.855), 2.4 (0.853), 1.3 (0.852), 12.5 (0.851)
CLUSTER 56
violate (0.871), permitting (0.860), provisions (0.860), impose (0.849), enforce (0.849), laws (0.846), stipulates (0.836), requirement (0.834), mandating (0.832), provision (0.828), prohibit (0.827), prohibiting (0.827), rules (0.822), granting (0.820), clause (0.819), abolishing (0.816), statutes (0.816), applies (0.814), abolish (0.813), regulations (0.807)
CLUSTER 57
sen. (0.880), gephardt (0.871), mcconnell (0.860), cornyn (0.853), rep. (0.851), mccollum (0.843), kasich (0.841), gramm (0.837), armey (0.830), schumer (0.822), feinstein (0.821), dewine (0.821), harkin (0.818), boehner (0.814), brownback (0.813), moynihan (0.813), senator (0.812), crist (0.808), corzine (0.807), dodd (0.806)
CLUSTER 58
easygoing (0.864), delightfully (0.851), genial (0.843), wonderfully (0.841), amiable (0.839), undeniably (0.837), astonishingly (0.835), incredibly (0.835), deceptively (0.835), oddly (0.833), characteristically (0.833), refreshingly (0.829), amazingly (0.829), unbelievably (0.826), playful (0.826), awfully (0.823), decidedly (0.823), glib (0.822), downright (0.820), endearing (0.820)
CLUSTER 59
berisha (0.855), wahid (0.818), rajoelina (0.817), obasanjo (0.817), sali (0.816), ravalomanana (0.815), ranariddh (0.813), zia (0.805), museveni (0.799), aristide (0.794), bakiyev (0.788), rabbani (0.785), muluzi (0.784), deby (0.784), rakhmonov (0.781), gusmao (0.780), kabila (0.778), zelaya (0.776), rainsy (0.775), kumaratunga (0.774)
CLUSTER 60
inquirer (0.834), newsday (0.784), newspaper (0.783), ha'aretz (0.767), newsweek (0.762), editorial (0.759), sonntag (0.757), kommersant (0.752), dailies (0.747), handelsblatt (0.743), herald (0.741), news (0.741), gazeta (0.741), newsletter (0.734), npr (0.732), bild (0.730), blog (0.726), bulletin (0.725), ctk (0.725), watan (0.723)
CLUSTER 61
gilchrist (0.878), spinner (0.852), ponting (0.852), flintoff (0.851), mcgrath (0.848), pietersen (0.843), harmison (0.833), hooper (0.828), waugh (0.828), gayle (0.823), hoggard (0.821), batsman (0.818), vettori (0.815), kumble (0.815), warne (0.814), paceman (0.811), bowled (0.809), graeme (0.809), rounder (0.807), martyn (0.804)
CLUSTER 62
marshy (0.861), shoreline (0.855), lakes (0.849), valleys (0.848), swamps (0.846), dunes (0.844), grasslands (0.843), swampy (0.841), low-lying (0.840), arid (0.833), barren (0.830), forests (0.830), lowlands (0.823), coastline (0.823), basin (0.817), plains (0.814), vegetation (0.812), grassland (0.803), slopes (0.801), marshland (0.799)
CLUSTER 63
parliamentary (0.872), candidates (0.865), elected (0.845), vote (0.844), legislative (0.842), election (0.842), elections (0.839), legislature (0.834), legislators (0.833), parliament (0.827), democrats (0.826), democratic (0.826), votes (0.824), deputies (0.817), mps (0.817), party (0.815), voting (0.814), electing (0.813), lawmakers (0.806), senate (0.806)
CLUSTER 64
bastia (0.844), piacenza (0.842), boavista (0.842), getafe (0.840), porto (0.834), zaragoza (0.833), fiorentina (0.831), livorno (0.830), fc (0.829), benfica (0.827), nantes (0.823), verona (0.822), levante (0.822), valladolid (0.819), brescia (0.817), internazionale (0.815), bilbao (0.814), rostock (0.814), udinese (0.813), sevilla (0.812)
CLUSTER 65
companies (0.933), businesses (0.905), commercial (0.895), business (0.886), industry (0.873), firms (0.873), company (0.872), buying (0.855), products (0.844), sell (0.840), purchasing (0.840), operating (0.835), selling (0.832), market (0.831), suppliers (0.826), sales (0.825), industries (0.821), sector (0.816), manufacturing (0.815), domestic (0.813)
CLUSTER 66
0100 (0.832), 9:30 (0.828), 4:30 (0.826), 0800 (0.825), 11:00 (0.817), 8:30 (0.815), 0900 (0.814), 2200 (0.814), 10:00 (0.805), 12:30 (0.804), 0500 (0.803), 2100 (0.800), a.m (0.798), 3:30 (0.798), 9:00 (0.797), 2:30 (0.797), 10-11 (0.797), 0630 (0.795), edt (0.795), 0200 (0.794)
CLUSTER 67
resumption (0.815), stalled (0.805), monthlong (0.803), resuming (0.798), resume (0.797), yearlong (0.792), 10-day (0.791), negotiations (0.786), delaying (0.773), timetable (0.772), impasse (0.772), restarting (0.767), postponing (0.766), negotiation (0.764), delayed (0.753), halting (0.746), truce (0.745), protracted (0.745), stalemate (0.744), groundwork (0.744)
CLUSTER 68
7-5 (0.923), 6-1 (0.922), 6-2 (0.921), 6-3 (0.918), 6-4 (0.915), 6-0 (0.909), 7-6 (0.890), robredo (0.882), outlasted (0.880), schnyder (0.879), roddick (0.874), sugiyama (0.869), 3-6 (0.867), 2-6 (0.867), 4-6 (0.865), soderling (0.864), dementieva (0.861), zvonareva (0.857), rafter (0.856), seeded (0.855)
CLUSTER 69
suspected (0.893), militants (0.887), extremists (0.848), terrorists (0.845), insurgents (0.839), guerrillas (0.836), militant (0.835), armed (0.831), gunmen (0.821), suspects (0.816), plotting (0.803), criminals (0.802), attackers (0.802), terrorist (0.801), arresting (0.800), attacks (0.793), rebels (0.790), arrested (0.786), qaeda (0.785), civilians (0.783)
CLUSTER 70
germanic (0.879), slavic (0.877), turkic (0.844), javanese (0.832), iberian (0.803), berber (0.798), igbo (0.788), malay (0.785), archaic (0.782), norse (0.773), dialect (0.769), aramaic (0.768), anglo-saxon (0.767), mythology (0.762), antiquity (0.758), folklore (0.752), indo-european (0.751), andalusian (0.750), paganism (0.740), ancient (0.738)
CLUSTER 71
suv (0.834), sedans (0.832), sedan (0.832), wagon (0.811), chassis (0.804), camry (0.803), lexus (0.792), minivans (0.791), chevrolet (0.781), audi (0.779), roadster (0.778), 4x4 (0.778), minivan (0.776), mercedes (0.775), full-size (0.768), suvs (0.768), prius (0.766), taurus (0.761), jeep (0.759), bikes (0.756)
CLUSTER 72
flailing (0.849), bouncing (0.833), poking (0.830), crawled (0.825), darting (0.819), gingerly (0.819), poked (0.809), darted (0.801), awkwardly (0.794), trotted (0.790), trudged (0.788), ducked (0.787), zipped (0.787), tugged (0.785), clawing (0.783), waded (0.780), veering (0.777), hopped (0.769), stomped (0.767), glided (0.765)
CLUSTER 73
35th (0.976), 23rd (0.971), 41st (0.967), 34th (0.967), 32nd (0.966), 39th (0.965), 26th (0.965), 22nd (0.964), 33rd (0.964), 31st (0.962), 29th (0.960), 37th (0.960), 36th (0.960), 27th (0.959), 49th (0.959), 28th (0.958), 45th (0.957), 38th (0.955), 51st (0.953), 48th (0.953)
CLUSTER 74
ideology (0.883), nationalism (0.845), reactionary (0.837), liberalism (0.834), secular (0.813), nationalistic (0.811), radicalism (0.799), leanings (0.798), totalitarian (0.793), fervor (0.792), fascism (0.792), capitalism (0.783), right-wing (0.782), communism (0.778), espoused (0.775), ideologies (0.771), tendencies (0.770), espouse (0.767), rooted (0.767), radical (0.765)
CLUSTER 75
alden (0.818), cowan (0.798), harwood (0.793), fenton (0.790), langford (0.787), ingham (0.783), gould (0.782), barlow (0.781), lyons (0.775), metcalf (0.774), hardwick (0.766), bates (0.761), davison (0.761), keene (0.760), lawson (0.758), forsyth (0.755), bingham (0.754), chadwick (0.753), poole (0.753), kimball (0.753)
CLUSTER 76
42-year (0.878), 44-year (0.871), 43-year (0.866), 41-year (0.854), 8-year (0.853), schoolteacher (0.852), 9-year (0.852), 39-year (0.851), 55-year (0.838), 53-year (0.836), 49-year (0.835), 6-year (0.831), 28-year (0.830), 16-year (0.828), 31-year (0.828), 48-year (0.826), 45-year (0.825), 33-year (0.824), 24-year (0.823), 52-year (0.823)
CLUSTER 77
aboard (0.873), ship (0.856), jet (0.839), freighter (0.838), docked (0.832), ships (0.828), plane (0.828), planes (0.827), airliner (0.827), boat (0.826), aircraft (0.825), cargo (0.825), vessel (0.823), airplane (0.822), landing (0.818), flight (0.817), sail (0.813), helicopter (0.812), hijacked (0.809), 747 (0.803)
CLUSTER 78
government (0.889), hoped (0.888), calls (0.876), hold (0.874), would (0.870), officials (0.869), decision (0.868), demanded (0.867), agreed (0.866), sought (0.862), promised (0.861), saying (0.860), seeking (0.860), calling (0.860), consider (0.858), should (0.858), considering (0.858), support (0.858), insisted (0.857), to (0.857)
CLUSTER 79
sussex (0.877), maidstone (0.873), harrogate (0.873), norwich (0.871), cheshire (0.870), peterborough (0.865), yorkshire (0.864), nottingham (0.860), essex (0.860), shrewsbury (0.858), aberdeen (0.856), ipswich (0.856), lancashire (0.854), lincolnshire (0.854), southport (0.853), exeter (0.852), colchester (0.850), buckinghamshire (0.846), warrington (0.845), chesterfield (0.844)
CLUSTER 80
secretariat (0.809), ec (0.773), ilo (0.770), fsc (0.752), intergovernmental (0.739), igad (0.732), supervisory (0.724), sadc (0.720), csrc (0.709), unep (0.708), eac (0.706), directorate (0.704), igc (0.702), accrediting (0.698), oau (0.696), psc (0.694), idb (0.694), comesa (0.692), arf (0.690), consultative (0.684)
CLUSTER 81
formula_1 (0.916), finite (0.901), formula_3 (0.896), formula_2 (0.887), formula_4 (0.884), linear (0.875), formula_5 (0.874), formula_6 (0.870), equivalently (0.863), formula_8 (0.854), formula_9 (0.854), discrete (0.853), formula_7 (0.853), parameter (0.853), corresponds (0.841), formula_12 (0.840), formula_10 (0.839), analogous (0.837), variables (0.836), formula_13 (0.832)
CLUSTER 82
surreal (0.887), comical (0.864), juxtaposition (0.849), fascinating (0.846), cinematic (0.834), captivating (0.830), poignant (0.829), whimsical (0.829), melodramatic (0.827), mesmerizing (0.824), retelling (0.823), quirky (0.821), evokes (0.818), amusing (0.815), riveting (0.812), bizarre (0.809), beguiling (0.808), evocative (0.805), unsettling (0.805), haunting (0.804)
CLUSTER 83
ih (0.894), beel (0.888), nee (0.871), dur (0.867), kee (0.864), kah (0.863), moh (0.861), ree (0.854), sah (0.853), sahm (0.851), ee (0.849), hahm (0.849), bah (0.846), koht (0.846), uh (0.843), suh (0.843), ehs (0.843), duh (0.843), yah (0.840), ah (0.838)
CLUSTER 84
roanoke (0.870), rochester (0.843), middletown (0.843), danville (0.843), charlottesville (0.839), farmington (0.839), bloomington (0.831), springfield (0.830), burlington (0.824), wilmington (0.823), albany (0.823), wichita (0.823), connecticut (0.822), missouri (0.817), camden (0.814), fayetteville (0.813), milford (0.813), maryland (0.808), illinois (0.806), macon (0.805)
CLUSTER 85
'cause (0.891), gotta (0.854), fucking (0.845), damn (0.845), kidding (0.835), yeah (0.820), pretend (0.810), fool (0.809), kinda (0.807), gonna (0.802), hey (0.798), anymore (0.787), crazy (0.787), crap (0.786), damned (0.783), ’re (0.778), darn (0.776), wanna (0.772), ?! (0.772), heck (0.770)
CLUSTER 86
maggie (0.840), christina (0.836), emily (0.817), lillian (0.816), katherine (0.811), julie (0.811), née (0.809), ann (0.807), michelle (0.806), carrie (0.805), esther (0.805), vanessa (0.803), laura (0.803), caroline (0.802), pamela (0.796), sophie (0.795), lisa (0.794), emma (0.793), josephine (0.792), janice (0.789)
CLUSTER 87
ablaze (0.851), burned (0.838), grenades (0.806), sprayed (0.802), smashed (0.796), rubble (0.792), debris (0.789), hurled (0.784), blew (0.783), burning (0.777), littered (0.775), bombs (0.773), bullets (0.773), parked (0.770), blown (0.768), ripped (0.768), burnt (0.765), explosives (0.761), assailants (0.759), strewn (0.758)
CLUSTER 88
nitrogen (0.862), chlorine (0.838), ammonia (0.830), impurities (0.815), purified (0.814), sulfur (0.812), chemicals (0.810), substances (0.805), mixtures (0.801), liquid (0.799), byproducts (0.796), contaminants (0.794), toxins (0.794), toxic (0.790), iodine (0.788), radioactive (0.787), additives (0.785), methanol (0.783), synthetic (0.777), formaldehyde (0.775)
CLUSTER 89
clooney (0.856), costner (0.845), depp (0.843), duvall (0.840), keaton (0.831), travolta (0.829), hanks (0.826), starring (0.825), barrymore (0.821), carrey (0.818), eastwood (0.815), stiller (0.809), streep (0.800), julianne (0.800), neeson (0.791), bogart (0.787), kidman (0.785), crowe (0.785), spacey (0.784), malkovich (0.783)
CLUSTER 90
rr (0.864), eb (0.824), str (0.810), djp (0.797), ts (0.785), scw (0.784), js (0.783), lk (0.779), pfg (0.777), db (0.777), bm (0.773), nr (0.764), rac (0.763), vg (0.762), pd (0.760), twx (0.759), dg (0.759), kh (0.758), agh (0.753), cl (0.753)
CLUSTER 91
1764 (0.984), 1768 (0.982), 1766 (0.980), 1716 (0.977), 1682 (0.976), 1711 (0.975), 1684 (0.975), 1744 (0.973), 1754 (0.972), 1736 (0.972), 1761 (0.972), 1762 (0.972), 1612 (0.971), 1722 (0.971), 1686 (0.971), 1681 (0.969), 1698 (0.969), 1767 (0.967), 1752 (0.967), 1676 (0.966)
CLUSTER 92
shouts (0.914), clapping (0.854), cheers (0.853), shouting (0.841), loud (0.836), loudly (0.830), screaming (0.826), screamed (0.826), cries (0.824), screams (0.817), yelling (0.816), applause (0.812), laughter (0.811), cheering (0.802), obscenities (0.800), shaking (0.795), cursing (0.795), booing (0.792), whispers (0.791), waving (0.788)
CLUSTER 93
songs (0.846), singers (0.839), guitar (0.836), duets (0.833), singing (0.829), tunes (0.825), duet (0.820), vocals (0.815), music (0.813), sings (0.811), pop (0.810), musical (0.808), jazz (0.805), piano (0.803), soundtrack (0.799), song (0.799), band (0.794), album (0.794), compositions (0.790), dance (0.787)
CLUSTER 94
empirical (0.843), definitions (0.841), methodology (0.835), concepts (0.832), qualitative (0.830), aspects (0.828), context (0.827), assumptions (0.818), mathematical (0.816), contexts (0.813), complexity (0.809), relevance (0.803), analyses (0.801), linguistic (0.800), subjective (0.798), parameters (0.792), reasoning (0.788), methodologies (0.786), methods (0.783), terminology (0.781)
CLUSTER 95
humility (0.873), individuality (0.854), empathy (0.847), honesty (0.839), generosity (0.827), reverence (0.822), newfound (0.820), persistence (0.820), sense (0.818), sacrificing (0.815), perseverance (0.812), genuine (0.806), clarity (0.804), familiarity (0.803), undeniable (0.803), originality (0.802), innate (0.801), greatness (0.800), ingenuity (0.800), imagination (0.799)
CLUSTER 96
payments (0.891), savings (0.840), payment (0.832), fees (0.831), cash (0.830), expenses (0.826), bonuses (0.822), paying (0.819), payouts (0.811), hefty (0.809), taxpayer (0.808), funds (0.807), monies (0.806), taxes (0.805), loans (0.804), pension (0.797), incentive (0.795), refund (0.794), expense (0.793), dividends (0.790)
CLUSTER 97
so (0.962), sure (0.959), something (0.958), always (0.954), how (0.951), everyone (0.948), really (0.948), even (0.948), what (0.947), anything (0.946), come (0.943), n't (0.942), why (0.942), nothing (0.942), we (0.941), else (0.938), way (0.938), know (0.937), 're (0.934), simply (0.933)
CLUSTER 98
connecting (0.886), corridor (0.861), connects (0.854), north-south (0.851), route (0.841), roadway (0.838), traverses (0.832), terminus (0.831), crossing (0.821), crosses (0.809), east-west (0.807), highway (0.804), stretches (0.801), traversing (0.801), intersection (0.798), lanes (0.795), bridge (0.786), intersecting (0.783), loop (0.782), routes (0.779)
CLUSTER 99
23,000 (0.975), 33,000 (0.971), 4,500 (0.968), 24,000 (0.967), 22,000 (0.966), 27,000 (0.966), 32,000 (0.964), 2,800 (0.964), 19,000 (0.962), 14,000 (0.961), 2,400 (0.961), 2,600 (0.960), 36,000 (0.960), 48,000 (0.960), 12,500 (0.959), 3,200 (0.959), 18,000 (0.958), 85,000 (0.958), 13,000 (0.956), 16,000 (0.956)