K-中心点算法是对K-Means均值算法的改进由于样本数据可能具有很大的极端值对象这些数据会严重的扭曲数据的分布而平方误差和的使用可能会更加恶化这一影响。而k-Medoide算法不是选取簇中对象的均值作为质心而是在每一个簇内选出一个实际的对象来代表该簇这个对象就称之为簇的中心点。
算法实现步骤
1.任意选择k个对象作为k个中心点
2.计算每个非中心点的对象到每个中心点之间的距离
3.把每一个非中心点对象分配到距离它最近的的中心点所代表的簇中
4.在每一个聚簇中按照顺序依次选取点计算该点到当前聚簇中所有点的距离之和最终距离之和最小的点则视为新的中心点
5 重复步骤2-4直到各个聚簇的中心点不在发生改变。
Java代码如下
package com.kmedoids;
import java.awt.font.TextHitInfo;
import java.util.ArrayList;
public class Cluster {
private String clusterName;//类簇名
private Medoid medoid;//类簇的质点
private ArrayList dataPoints;//类簇中个样本点
//Generate Constructor using Fileds
//构造方法完成簇类的初始化工作
public Cluster(String clusterName){
//super();
this.clusterName clusterName;
this.medoid null;
dataPoints new ArrayList();
}
public void setMedoid(Medoid c){
this.medoid c;
}
public Medoid getMedoid(){
return this.medoid;
}
//添加样本点
public void addDataPoint(DataPoint dp){
dp.setCluster(this);
this.dataPoints.add(dp);
}
public ArrayList getDataPoints() {
// TODO Auto-generated method stub
return this.dataPoints;
}
public void removeDataPoint(DataPoint dp){
this.dataPoints.remove(dp);
}
public int getNumDataPoints(){
return dataPoints.size();
}
public DataPoint getDataPoint(int pos){
return (DataPoint)this.dataPoints.get(pos);
}
public String getName(){
return this.clusterName;
}
}
/*******************************************/
package com.kmedoids;
import java.util.ArrayList;
public class DataPoint{
private double dimension[];//样本点的维度
private String PointName;//样本点的名字
private Cluster cluster;//类簇
private double euDt;//样本点到质点的距离
public DataPoint(double[] dimension, String pointName) {
//super();
this.dimension dimension;
//PointName pointName;
this.PointName pointName;
this.cluster null;
}
public Cluster getCluster() {
return cluster;
}
public void setCluster(Cluster cluster) {
this.cluster cluster;
}
//计算欧几里得距离
public double calEuclideanDistanceSum(){
double sum 0.0;
Cluster cluster this.getCluster();
ArrayList dataPoints cluster.getDataPoints();
for(int i0;i
double []dims dataPoints.get(i).getDimensioin();
for(int j0;j
double temp Math.pow(dims[j]-this.dimension[j],2);
sum temp;
}
}
return Math.sqrt(sum);
}
public double[] getDimensioin() {
// TODO Auto-generated method stub
return this.dimension;
}
public double testEuclideanDistance(Medoid md){
double sum 0.0;
double [] cDim md.getDimension();
for(int i0;i
double temp Math.pow(dimension[i]-cDim[i], 2);
sum temp;
}
return Math.sqrt(sum);
}
public String getPointName() {
return this.PointName;
}
public void setPointName(String pointName) {
PointName pointName;
}
public double getCurrentEudt(){
return this.euDt;
}
}
/*******************************************************/
package com.kmedoids;
import java.util.ArrayList;
import javax.xml.crypto.Data;
public class Medoid{
private double dimension[];//质点的维度
private Cluster cluster;//所属类簇
private double etdDisSum;//Medoid到本类簇中的所有的所有欧氏距离之和
public Medoid(double dimension[]){
this.dimensiondimension;
}
public double[] getDimension() {
return dimension;
}
public void setDimension(double[] dimension) {
this.dimension dimension;
}
public Cluster getCluster() {
return cluster;
}
public void setCluster(Cluster cluster) {
this.cluster cluster;
}
//取代价最小的函数点
public void calcMedoid(){
calcEtdDisSum();
double minEucDisSum this.etdDisSum;
ArrayList dps this.cluster.getDataPoints();
for(int i 0;i
//get()方法获得类簇上面的指定的点
double tempeucDisSum dps.get(i).calEuclideanDistanceSum();
if (tempeucDisSum
dimension dps.get(i).getDimensioin();
minEucDisSumtempeucDisSum;
}
}
}
//计算该Medoid到同类簇所有样本点的欧氏距离和
private void calcEtdDisSum() {
// TODO Auto-generated method stub
double sum 0.0;
Cluster cluster this.getCluster();
ArrayList dataPoints cluster.getDataPoints();
for(int i0;i
double [] dims dataPoints.get(i).getDimensioin();
for(int j0;j
double temp Math.abs(dims[j]-this.dimension[j]);
sum temp;
}
}
etdDisSum sum;
}
}
/*****************************************************/
package com.kmedoids;
import java.util.ArrayList;
import javax.xml.crypto.Data;
public class ClusterAnalysis{
private Cluster[] clusters;//所有类簇
private int miter;//迭代次数
//所有样本点
private ArrayList dataPoints new ArrayList();
//维度
private int dimNum;
public ClusterAnalysis(int k,int iter, ArrayList dataPoints, int dimNum) {
//super();
clusters new Cluster[k];//类簇的种类数
for(int i0;i
//调用Cluster的public方法
clusters[i]new Cluster("Cluster: " i);
}
this.miter iter;
this.dataPoints dataPoints;
this.dimNum dimNum;
}
public int getIterations(){
return miter;
}
public ArrayList[] getClusterOutput(){
ArrayList data[] new ArrayList[clusters.length];
for(int i0;i
data[i] clusters[i].getDataPoints();
}
return data;
}
public void startAnalysis(double[][] medoids){
setInitiaMedoids(medoids);
double[][]newMedoids medoids;
double[][]oldMedoids new double[medoids.length][this.dimNum];
while(!isEqual(oldMedoids,newMedoids)){
//每次迭代开始时清空各类簇的点
for(int m0;m
clusters[m].getDataPoints().clear();
}
for(int j 0;j
int clusterIndex 0;
double minDistance Double.MAX_VALUE;
//判断样本点属于哪一个类簇
for(int k0;k
double eucDistance dataPoints.get(j).testEuclideanDistance(clusters[k].getMedoid());
if(eucDistance
minDistance eucDistance;
clusterIndex k;
}
}
//将样本点添加到该类簇中
clusters[clusterIndex].addDataPoint(dataPoints.get(j));
}
//重新计算各类簇的质点
for(int m0;m
clusters[m].getMedoid().calcMedoid();
}
// medoids是一个二维数组
for(int i0;i
for(int j 0 ;j
oldMedoids[i][j] newMedoids[i][j];
}
}
for(int n0;n
newMedoids[n]clusters[n].getMedoid().getDimension();
}
this.miter ;
}
}
private boolean isEqual(double[][] oldMedoids, double[][] newMedoids) {
// TODO Auto-generated method stub
boolean flag false;
for(int i0;i
for(int j0;j
if(oldMedoids[i][j] ! newMedoids[i][j]){
return flag;
}
}
}
flag true;
return flag;
}
private void setInitiaMedoids(double[][] medoids) {
// TODO Auto-generated method stub
for(int n 0;n
Medoid medoid new Medoid(medoids[n]);
clusters[n].setMedoid(medoid);
medoid.setCluster(clusters[n]);
}
}
}
/***************************************************/
package com.kmedoids;
import java.util.ArrayList;
import java.util.Iterator;
public class TestMain{
public static void main(String[] args) {
ArrayList dataPoints new ArrayList();
double[] a{2,3};
double[] b{2,4};
double[] c{1,4};
double[] d{1,3};
double[] e{2,2};
double[] f{3,2};
double[] g{8,7};
double[] h{8,6};
double[] i{7,7};
double[] j{7,6};
double[] k{8,5};
//double[] gg{18,7};
//double[] hh{8,16};
//double[] ii{7,17};
//double[] jj{7,16};
//double[] kk{8,51};
double[] l{100,200};//孤立点
double[] m{8,20};
double[] n{8,19};
double[] o{7,18};
double[] p{7,17};
double[] q{7,20};
dataPoints.add(new DataPoint(a,"a"));
dataPoints.add(new DataPoint(b,"b"));
dataPoints.add(new DataPoint(c,"c"));
dataPoints.add(new DataPoint(d,"d"));
dataPoints.add(new DataPoint(e,"e"));
dataPoints.add(new DataPoint(f,"f"));
dataPoints.add(new DataPoint(g,"g"));
dataPoints.add(new DataPoint(h,"h"));
dataPoints.add(new DataPoint(i,"i"));
dataPoints.add(new DataPoint(j,"j"));
dataPoints.add(new DataPoint(k,"k"));
//dataPoints.add(new DataPoint(gg,"gg"));
//dataPoints.add(new DataPoint(hh,"hh"));
//dataPoints.add(new DataPoint(ii,"ii"));
//dataPoints.add(new DataPoint(jj,"jj"));
//dataPoints.add(new DataPoint(kk,"kk"));
dataPoints.add(new DataPoint(l,"l"));
dataPoints.add(new DataPoint(m,"m"));
dataPoints.add(new DataPoint(n,"n"));
dataPoints.add(new DataPoint(o,"o"));
dataPoints.add(new DataPoint(p,"p"));
dataPoints.add(new DataPoint(q,"q"));
//设置初始k值 初始迭代次数 样本点 样本维度
ClusterAnalysis ca new ClusterAnalysis(5, 0, dataPoints, 2);
double[][] cen {
//初始中心点的具体值是哪一个
//K-Medoide算法的初始质心是原样本中的点
{8,7},{8,6},{7,7},{8,19},{7,20}
};
ca.startAnalysis(cen);
ArrayList[]v ca.getClusterOutput();
for(int dti0;dti
ArrayList tempV v[dti];
System.out.println("类簇类别"dti":");
Iterator iter tempV.iterator();
//如果iter内还有元素可以进行迭代 则返回true
while(iter.hasNext())
{
DataPoint dpTemp (DataPoint)iter.next();
System.out.printf(dpTemp.getPointName()"\t");
}
System.out.println();
}
}
}
/********************************************/
运行结果