Question
Please help, clustering is very hard for me.

Given the following data pouts. Cluster these points according to the followng three distance mesaurements: single-link on MIN. complete link on MAX and grun average lo calculate distance between two clusters Each unswer show include both nested kop and hierachical cludemng. x-and! 7. oo | 5,0D | le.。。| 2,oo |13,SDIhu |Sco |气50
0 0
Add a comment Improve this question Transcribed image text
Answer #1

import java.util.*;
public class NoOfwords {
public int wordCount(String s){
int counter=0;
for(int i=0;i<=s.length()-1;i++){
if(Character.isLetter(s.charAt(i))){
counter++;
for(;i<=s.length()-1;i++){
if(s.charAt(i)==' '){
counter++;
}
}

}


}
return counter;
}
  
}

/**
* Stemmer, implementing the Porter Stemming Algorithm
*
* The Stemmer class transforms a word into its root form. The input
* word can be provided a character at time (by calling add()), or at once
* by calling one of the various stem(something) methods.
*/

class Stemmer
{ private char[] b;
private int i, /* offset into b */
i_end, /* offset to end of stemmed word */
j, k;
private static final int INC = 50;
/* unit of size whereby b is increased */
public Stemmer()
{ b = new char[INC];
i = 0;
i_end = 0;
}

/**
* Add a character to the word being stemmed. When you are finished
* adding characters, you can call stem(void) to stem the word.
*/

public void add(char ch)
{ if (i == b.length)
{ char[] new_b = new char[i+INC];
for (int c = 0; c < i; c++) new_b[c] = b[c];
b = new_b;
}
b[i++] = ch;
}


/** Adds wLen characters to the word being stemmed contained in a portion
* of a char[] array. This is like repeated calls of add(char ch), but
* faster.
*/

public void add(char[] w, int wLen)
{ if (i+wLen >= b.length)
{ char[] new_b = new char[i+wLen+INC];
for (int c = 0; c < i; c++) new_b[c] = b[c];
b = new_b;
}
for (int c = 0; c < wLen; c++) b[i++] = w[c];
}

/**
* After a word has been stemmed, it can be retrieved by toString(),
* or a reference to the internal buffer can be retrieved by getResultBuffer
* and getResultLength (which is generally more efficient.)
*/
public String toString() { return new String(b,0,i_end); }

/**
* Returns the length of the word resulting from the stemming process.
*/
public int getResultLength() { return i_end; }

/**
* Returns a reference to a character buffer containing the results of
* the stemming process. You also need to consult getResultLength()
* to determine the length of the result.
*/
public char[] getResultBuffer() { return b; }

/* cons(i) is true <=> b[i] is a consonant. */

private final boolean cons(int i)
{ switch (b[i])
{ case 'a': case 'e': case 'i': case 'o': case 'u': return false;
case 'y': return (i==0) ? true : !cons(i-1);
default: return true;
}
}

/* m() measures the number of consonant sequences between 0 and j. if c is
a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
presence,

<c><v> gives 0
<c>vc<v> gives 1
<c>vcvc<v> gives 2
<c>vcvcvc<v> gives 3
....
*/

private final int m()
{ int n = 0;
int i = 0;
while(true)
{ if (i > j) return n;
if (! cons(i)) break; i++;
}
i++;
while(true)
{ while(true)
{ if (i > j) return n;
if (cons(i)) break;
i++;
}
i++;
n++;
while(true)
{ if (i > j) return n;
if (! cons(i)) break;
i++;
}
i++;
}
}

/* vowelinstem() is true <=> 0,...j contains a vowel */

private final boolean vowelinstem()
{ int i; for (i = 0; i <= j; i++) if (! cons(i)) return true;
return false;
}

/* doublec(j) is true <=> j,(j-1) contain a double consonant. */

private final boolean doublec(int j)
{ if (j < 1) return false;
if (b[j] != b[j-1]) return false;
return cons(j);
}

/* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
and also if the second c is not w,x or y. this is used when trying to
restore an e at the end of a short word. e.g.

cav(e), lov(e), hop(e), crim(e), but
snow, box, tray.

*/

private final boolean cvc(int i)
{ if (i < 2 || !cons(i) || cons(i-1) || !cons(i-2)) return false;
{ int ch = b[i];
if (ch == 'w' || ch == 'x' || ch == 'y') return false;
}
return true;
}

private final boolean ends(String s)
{ int l = s.length();
int o = k-l+1;
if (o < 0) return false;
for (int i = 0; i < l; i++) if (b[o+i] != s.charAt(i)) return false;
j = k-l;
return true;
}

/* setto(s) sets (j+1),...k to the characters in the string s, readjusting
k. */

private final void setto(String s)
{ int l = s.length();
int o = j+1;
for (int i = 0; i < l; i++) b[o+i] = s.charAt(i);
k = j+l;
}

/* r(s) is used further down. */

private final void r(String s) { if (m() > 0) setto(s); }

/* step1() gets rid of plurals and -ed or -ing. e.g.

caresses -> caress
ponies -> poni
ties -> ti
caress -> caress
cats -> cat

feed -> feed
agreed -> agree
disabled -> disable

matting -> mat
mating -> mate
meeting -> meet
milling -> mill
messing -> mess

meetings -> meet

*/

private final void step1()
{ if (b[k] == 's')
{ if (ends("sses")) k -= 2; else
if (ends("ies")) setto("i"); else
if (b[k-1] != 's') k--;
}
if (ends("eed")) { if (m() > 0) k--; } else
if ((ends("ed") || ends("ing")) && vowelinstem())
{ k = j;
if (ends("at")) setto("ate"); else
if (ends("bl")) setto("ble"); else
if (ends("iz")) setto("ize"); else
if (doublec(k))
{ k--;
{ int ch = b[k];
if (ch == 'l' || ch == 's' || ch == 'z') k++;
}
}
else if (m() == 1 && cvc(k)) setto("e");
}
}

/* step2() turns terminal y to i when there is another vowel in the stem. */

private final void step2() { if (ends("y") && vowelinstem()) b[k] = 'i'; }

/* step3() maps double suffices to single ones. so -ization ( = -ize plus
-ation) maps to -ize etc. note that the string before the suffix must give
m() > 0. */

private final void step3() { if (k == 0) return; /* For Bug 1 */ switch (b[k-1])
{
case 'a': if (ends("ational")) { r("ate"); break; }
if (ends("tional")) { r("tion"); break; }
break;
case 'c': if (ends("enci")) { r("ence"); break; }
if (ends("anci")) { r("ance"); break; }
break;
case 'e': if (ends("izer")) { r("ize"); break; }
break;
case 'l': if (ends("bli")) { r("ble"); break; }
if (ends("alli")) { r("al"); break; }
if (ends("entli")) { r("ent"); break; }
if (ends("eli")) { r("e"); break; }
if (ends("ousli")) { r("ous"); break; }
break;
case 'o': if (ends("ization")) { r("ize"); break; }
if (ends("ation")) { r("ate"); break; }
if (ends("ator")) { r("ate"); break; }
break;
case 's': if (ends("alism")) { r("al"); break; }
if (ends("iveness")) { r("ive"); break; }
if (ends("fulness")) { r("ful"); break; }
if (ends("ousness")) { r("ous"); break; }
break;
case 't': if (ends("aliti")) { r("al"); break; }
if (ends("iviti")) { r("ive"); break; }
if (ends("biliti")) { r("ble"); break; }
break;
case 'g': if (ends("logi")) { r("log"); break; }
} }

/* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */

private final void step4() { switch (b[k])
{
case 'e': if (ends("icate")) { r("ic"); break; }
if (ends("ative")) { r(""); break; }
if (ends("alize")) { r("al"); break; }
break;
case 'i': if (ends("iciti")) { r("ic"); break; }
break;
case 'l': if (ends("ical")) { r("ic"); break; }
if (ends("ful")) { r(""); break; }
break;
case 's': if (ends("ness")) { r(""); break; }
break;
} }

/* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */

private final void step5()
{ if (k == 0) return; /* for Bug 1 */ switch (b[k-1])
{ case 'a': if (ends("al")) break; return;
case 'c': if (ends("ance")) break;
if (ends("ence")) break; return;
case 'e': if (ends("er")) break; return;
case 'i': if (ends("ic")) break; return;
case 'l': if (ends("able")) break;
if (ends("ible")) break; return;
case 'n': if (ends("ant")) break;
if (ends("ement")) break;
if (ends("ment")) break;
/* element etc. not stripped before the m */
if (ends("ent")) break; return;
case 'o': if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;
/* j >= 0 fixes Bug 2 */
if (ends("ou")) break; return;
/* takes care of -ous */
case 's': if (ends("ism")) break; return;
case 't': if (ends("ate")) break;
if (ends("iti")) break; return;
case 'u': if (ends("ous")) break; return;
case 'v': if (ends("ive")) break; return;
case 'z': if (ends("ize")) break; return;
default: return;
}
if (m() > 1) k = j;
}

/* step6() removes a final -e if m() > 1. */

private final void step6()
{ j = k;
if (b[k] == 'e')
{ int a = m();
if (a > 1 || a == 1 && !cvc(k-1)) k--;
}
if (b[k] == 'l' && doublec(k) && m() > 1) k--;
}

/** Stem the word placed into the Stemmer buffer through calls to add().
* Returns true if the stemming process resulted in a word different
* from the input. You can retrieve the result with
* getResultLength()/getResultBuffer() or toString().
*/
public void stem()
{ k = i - 1;
if (k > 1) { step1(); step2(); step3(); step4(); step5(); step6(); }
i_end = k+1; i = 0;
}

/** Test program for demonstrating the Stemmer. It reads text from a
* a list of files, stems each word, and writes the result to standard
* output. Note that the word stemmed is expected to be in lower case:
* forcing lower case must be done outside the Stemmer class.
* Usage: Stemmer file-name file-name ...
*/
// public static void main(String[] args)
// {
// char[] w = new char[501];
// Stemmer s = new Stemmer();
// //for (int i = 0; i < args.length; i++)
// try
// {
// //FileInputStream in = new FileInputStream(args[i]);
// FileReader br=new FileReader("F:/effective ptrn/datasets/new.txt");
// BufferedReader in=new BufferedReader(br);
// try
// { while(true)
//
// { int ch = in.read();
// if (Character.isLetter((char) ch))
// {
// int j = 0;
// while(true)
// { ch = Character.toLowerCase((char) ch);
// w[j] = (char) ch;
// if (j < 500) j++;
// ch = in.read();
// if (!Character.isLetter((char) ch))
// {
// /* to test add(char ch) */
// for (int c = 0; c < j; c++) s.add(w[c]);
//
// /* or, to test add(char[] w, int j) */
// /* s.add(w, j); */
//
// s.stem();
// { String u;
//
// /* and now, to test toString() : */
// u = s.toString();
//
// /* to test getResultBuffer(), getResultLength() : */
// /* u = new String(s.getResultBuffer(), 0, s.getResultLength()); */
//
// System.out.print(u);
// }
// break;
// }
// }
// }
// if (ch < 0) break;
// System.out.print((char)ch);
// }
// }
// catch (IOException e)
// { System.out.println("error reading ");
//// break;
// }
// }
// catch (FileNotFoundException e)
// { System.out.println("file not found");
//// break;
// }
// }
}


import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Properties;
import java.util.StringTokenizer;

public class Indexing {

public static void main(String[] args) throws Exception{
ArrayList al = new ArrayList();
FileInputStream fis = new FileInputStream("D:\\prsStemmFiles");
byte[] b = new byte[fis.available()];
fis.read(b);
String temp = new String(b);
StringTokenizer str = new StringTokenizer(temp);
while (str.hasMoreTokens()) {
al.add(str.nextToken());
}
// System.out.println(al);
// al.add("aaaa");
// al.add("aaaa");
// al.add("cccc");
// al.add("aaaa");
// al.add("aaaa");
// al.add("dddd");
// al.add("dddd");
searchForRepeat ser = new searchForRepeat();
for (int i = 0; i < al.size(); i++) {
ser.search(al.get(i).toString());
}
ser.display();
//System.out.println("sbbb: "+ser.sb);
}
}

Add a comment
Know the answer?
Add Answer to:
Please help, clustering is very hard for me. Given the following data points. Cluster these points...
Your Answer:

Post as a guest

Your Name:

What's your source?

Earn Coins

Coins can be redeemed for fabulous gifts.

Not the answer you're looking for? Ask your own homework help question. Our experts will answer your question WITHIN MINUTES for Free.
Similar Homework Help Questions
  • K-means clustering K-means clustering is a very well-known method of clustering unlabeled data. The simplicity of...

    K-means clustering K-means clustering is a very well-known method of clustering unlabeled data. The simplicity of the process made it popular to data analysts. The task is to form clusters of similar data objects (points, properties etc.). When the dataset given is unlabeled, we try to make some conclusion about the data by forming clusters. Now, the number of clusters can be pre-determined and number of points can have any range. The main idea behind the process is finding nearest...

  • Task 1 Following the Naïve Bayes classification example in the slides, please predict if the player...

    Task 1 Following the Naïve Bayes classification example in the slides, please predict if the player go out to play if 1. the outlook is Overcast 2. the temperature is cool 3. the humidity is high 4. not windy Task 2 Please import the “admit.csv” into Rstudio. In this dataset, we know the GRE score, the GPA, and the rank of 400 applicants for a graduate program. We also know if each of the candidates is admitted. In the admit...

  • please help me with this problem for statics very hard Three forces act on the anchor...

    please help me with this problem for statics very hard Three forces act on the anchor bolt connection shown below. The forces are concurrent at point A. Express all answers to three significant figures with the appropriate units F2 35 kN 2 F3 10 kN 0 Problem #1 Express force F1 in rectangular vector form. Take F, 20 kN and θ,-50". Problem#2 Express force Fa in rectangular vector form. Problem #3 Express force Fs in rectangular vector form. Problem #4...

  • can somebody please help me with the following two questions: List and explain three differences between...

    can somebody please help me with the following two questions: List and explain three differences between distance vector routing and link state routing. Explain some problems encountered with distance vector and link state routing. 2. In case a change occurs in the network (for example: a node/communication link comes up or goes down), which routing approach converges faster and why?

  • Detail explanation please. Consider the following set of data points: 1.5 1.0 0.5 0.0 -0.5 -1.0...

    Detail explanation please. Consider the following set of data points: 1.5 1.0 0.5 0.0 -0.5 -1.0 -1.5 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 Single-linkage, average-linkage, and complete linkage clustering were all performed on this data. Which of the dendrograms below is the result of single-linkage, which is the result of average-linkage, and which is the result of complete-linkage? Explain your reasoning. 35 3.0 3.0 3.0 2.5 2.31 20 2.0 15 1.5 1.5 10 10 1.0 054 051 0.5 00...

  • Please help as it is very important task for me please write in Python code and...

    Please help as it is very important task for me please write in Python code and divide this into function and write comments for it 1. Ask the player’s name and welcome them to the game using their name. 2. Ask the player what is par for this game (number between 3-5 inclusive) 3. Ask the player what the distance to the hole for this game is (whole number between 195 and 250 inclusive) 4. Show the game menu: (I)nstructions...

  • can you please help me out on creating this product and all it's costs? i'm very...

    can you please help me out on creating this product and all it's costs? i'm very confused, thank you in advance!!!! That is the full question, I have to create a product and include/create all that info for it Please identify a product you will manufacture. Please complete the following steps for your product: 1) List at least twelve costs related to your product - you much use each category at least once. 2) Identify the per unit cost for...

  • I need help with certain questions. Please. 18. 4 points For each Rental, list the Rental...

    I need help with certain questions. Please. 18. 4 points For each Rental, list the Rental ID, Rental date, customer ID, customer first name, customer last name, and count of disks rented; sort by Rental ID. Show the Rental date formatted as ‘mm-dd-yyyy.’ Hint: use a GROUP BY clause. 19. 4 points List the disk ID, title name, rating, format description, and fee amount for all copies rented in Rental 3; sort by disk ID. Show the fee amount formatted...

  • Please help me in both questions, thank you very much :) QUESTION 6 According to the...

    Please help me in both questions, thank you very much :) QUESTION 6 According to the Herald Sun Newspaper, a study by the Centre for International Finance found that Australian monopolies perform better compared to monopolies elsewhere. Surprisingly, the research found that companies with monopoly power in Australia were three times more likely to invest in research and development than companies in competitive markets Which of the following are valid explanations for the existence of monopolies? 1. They have exclusive...

  • Please help me code the following in: JAVA Please create the code in as basic way...

    Please help me code the following in: JAVA Please create the code in as basic way as possible, so I can understand it better :) Full points will be awarded, thanks in advance! Program Description Write a program that demonstrates the skills we've learned throughout this quarter. This type of project offers only a few guidelines, allowing you to invest as much time and polish as you want, as long as you meet the program requirements described below. You can...

ADVERTISEMENT
Free Homework Help App
Download From Google Play
Scan Your Homework
to Get Instant Free Answers
Need Online Homework Help?
Ask a Question
Get Answers For Free
Most questions answered within 3 hours.
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT