文本的简单读:
String ReadPath = "dataset/textData/De_";
FileReader f2= new FileReader(ReadPath+(ij+1)+"_.txt");
BufferedReader readerText= new BufferedReader(f2);
for(String line = readerText.readLine(); line != null; line = readerText.readLine()){
if (line.replaceAll("[,。、|~!@#¥; 【》‘“”、】《·\\-??:,:{}%……&*()\\=—+ A-Za-z0-9]","").trim().equals("")) {
continue;}
//去除纯数字与字母的句子
if (line.trim().equals("")) {//去除空行
continue;}
System.out.println(line);
}
readerText.close();
简单写:
String WritPath = "dataset/TrainInput/train_set";//训练 数据保存 位置
FileWriter writerText = new FileWriter(WritPath+"_.txt", true);
writerText.write(String text);
writerText.write("\r\n");
writerText.close();
按编码读:
// 获取文件名
File r1 = new File("filename");
// 文件流读取
FileInputStream r2 = new FileInputStream(r1);
// 文件流写入缓存
BufferedInputStream r3 = new BufferedInputStream(r2);
// 按编码流读取
InputStreamReader r4 = new InputStreamReader(r3,"utf-8");
// 从缓存读取
BufferedReader r5 = new BufferedReader(r4,1024*1024);
for (String r6 = r5.readLine(); r5.readLine()!=null; r6=r5.readLine()) {
System.out.println(r6);
}
r5.close();
按编码写:
// 文件名获取
File w1 = new File("filename");
// 文件流输出
FileOutputStream w2 = new FileOutputStream(w1);
// 以编码流的形式输出
OutputStreamWriter w3 = new OutputStreamWriter(w2,"GBK");
w3.write("");
w3.close();
获取TXT文本编码格式
private static String getCharsetCode(String fileName) throws IOException{
BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileName));
int p = (bin.read() << 8) + bin.read();
String code = null;
switch (p) {
case 0xefbb:
code = "UTF-8";
break;
case 0xfffe:
code = "Unicode";
break;
case 0xfeff:
code = "UTF-16BE";
break;
default:
code = "GBK";
}
return code;
}
点击查看更多内容
1人点赞
评论
共同学习,写下你的评论
评论加载中...
作者其他优质文章
正在加载中
感谢您的支持,我会继续努力的~
扫码打赏,你说多少就多少
赞赏金额会直接到老师账户
支付方式
打开微信扫一扫,即可进行扫码打赏哦