-
Hdfs的特点:
数据冗余,硬件容错(3个备份)
流式的数据访问(一次写入,多次读取,无法删改,只能通过写入到新的块删除旧文件)
存储大文件 (否则namenode消耗高,头大身小)
实用性和局限性:
适合数据批量读写,吞吐量高
不适合交互式应用,低延迟很难满足
适合一次写入多次读取,顺序读写
不支持多用户并发写相同文件
查看全部 -
Hdfs文件读取流程:
Hdfs文件写入流程:
查看全部 -
Hdfes数据管理:
数据块副本:每个数据块3个副本,分布在2机架3节点上(容错性)
2. 心跳检测:DataNode定期向NameNode发心跳消息,刷新存在感!!
3. 二级NameNode:NameNdoe定期同步元数据映像文件到二级NameNode(secondryNameNode),一旦故障,备胎转正,成正宫娘娘!
查看全部 -
HDFS的文件被分成块进行存储,HDFS块默认大小是64MB,快是整个文件存储处理的逻辑单元
HDFS中有两类节点NameNode和DataNode
namenode是管理节点,存放文件元数据,元数据包含两个部分
文件与数据快的映射表
数据块与数据节点的映射表
namenode是唯一的管理节点,里面存放大量元数据,客户进行访问请求,首先会到namenode查看元数据,这个文件放在哪些节点上面然后从这些节点拿数据块,然后组装成想要的文件
DateNode是HDFS的工作节点,存放数据块
查看全部 -
Linux下Java程序运行:
先创建Java程序,
然后编译:
后打包:
查看全部 -
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class WordCount {
public static class WordCountMap extends
Mapper<LongWritable, Text, Text, IntWritable> {
private final IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer token = new StringTokenizer(line);
while (token.hasMoreTokens()) {
word.set(token.nextToken());
context.write(word, one);
}
}
}
public static class WordCountReduce extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(WordCount.class);
job.setJobName("wordcount");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(WordCountMap.class);
job.setReducerClass(WordCountReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
查看全部 -
void main()
{
int x;
x = 0;
function(1,2,3);
x =x+ 1;
printf("%d\n",x);
}
void function(int a, int b, int c)
{
char buffer1[15];
char buffer2[10];
int *ret;
ret = buffer1 + 28;
(*ret) += 5;
}查看全部 -
void main()
{
int x;
x = 0;
function(1,2,3);
x = 1;
printf("%d\n",x);
}void function(int a, int b, int c)
{
char buffer1[5];
char buffer2[10];
int *ret;
ret = buffer1 + 12;
(*ret) += 8;
}查看全部 -
void function(int a, int b, int c)
{
char buffer1[5];
char buffer2[10];
int *ret; ret = buffer1 + 28;
// (*ret) += 8;
printf(“%237x%hnn\n”,0,
(int*)&ret);}
void main()
{
int x;
x = 0;
function(1,2,3);
x = 1;
printf("%d\n",x);
}查看全部 -
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Sort {
public static class Map extends
Mapper<Object, Text, IntWritable, IntWritable> {
private static IntWritable data = new IntWritable();
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
data.set(Integer.parseInt(line));
context.write(data, new IntWritable(1));
}
}
public static class Reduce extends
Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
private static IntWritable linenum = new IntWritable(1);
public void reduce(IntWritable key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
for (IntWritable val : values) {
context.write(linenum, key);
linenum = new IntWritable(linenum.get() + 1);
}
}
}
public static class Partition extends Partitioner<IntWritable, IntWritable> {
@Override
public int getPartition(IntWritable key, IntWritable value,
int numPartitions) {
int MaxNumber = 65223;
int bound = MaxNumber / numPartitions + 1;
int keynumber = key.get();
for (int i = 0; i < numPartitions; i++) {
if (keynumber < bound * i && keynumber >= bound * (i - 1))
return i - 1;
}
return 0;
}
}
/**
* @param args
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage WordCount <int> <out>");
System.exit(2);
}
Job job = new Job(conf, "Sort");
job.setJarByClass(Sort.class);
job.setMapperClass(Map.class);
job.setPartitionerClass(Partition.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
查看全部 -
3
查看全部 -
需要配置Hadoop的环境变量
查看全部 -
hdfs:
高成本
低成本
成熟生态圈
hive
查看全部 -
Hadoop
查看全部 -
大数据 是个好东西
查看全部
举报