Android使用webview获取网站源码
最近在使用webview去获取网页的源码,但是对于百度百家号的文章,网页的源代码获取出来只要标题,内容不见了,想问问有没有大佬知道这个问题。去用python直接获取的话是正常的内容,应该不是网页限制的原因。
使用python的代码
#encoding:UTF-8
import urllib
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
url = "https://baijiahao.baidu.com/s?id=1647156988761941852&wfr=spider&for=pc"
data = urllib.urlopen(url).read()
data = data.decode('UTF-8')
def storageToLocalFiles(storagePath, data):
fhandle = open(storagePath,"wb")
fhandle.write(data)
fhandle.close()
storagePath = r'\Users\roger\Desktop\FridaHook\1.html'
storageToLocalFiles(storagePath,data)
print(data)
使用Android Studio实现的获取网页源码https://www.sportsfactbook.com
package com.example.webview;
import android.app.Activity;
import android.content.Context;
import android.graphics.Bitmap;
import android.os.Build;
import android.os.Bundle;
import android.util.Log;
import android.webkit.WebView;
import android.webkit.WebViewClient;
import android.annotation.SuppressLint;
import android.webkit.JavascriptInterface;
import androidx.annotation.RequiresApi;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
public class MainActivity extends Activity {
private WebView webView;
@RequiresApi(api = Build.VERSION_CODES.JELLY_BEAN)
@SuppressLint("JavascriptInterface")
/** Called when the activity is first created. */
@Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
webView = (WebView)findViewById(R.id.webview);
webView.getSettings().setAllowFileAccess(true);
webView.getSettings().setDomStorageEnabled(true);
webView.getSettings().setJavaScriptEnabled(true);
webView.getSettings().setAllowFileAccessFromFileURLs(true);
webView.getSettings().setDefaultTextEncodingName("utf-8");
//webView.getSettings().setUserAgentString("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0");
webView.addJavascriptInterface(new InJavaScriptLocalObj(), "local_obj");
webView.setWebViewClient(new MyWebViewClient());
webView.loadUrl("https://www.baidu.com");
}
final class MyWebViewClient extends WebViewClient{
public boolean shouldOverrideUrlLoading(WebView view, String url) {
view.loadUrl(url);
return true;
}
public void onPageStarted(WebView view, String url, Bitmap favicon) {
Log.d("WebView","onPageStarted");
super.onPageStarted(view, url, favicon);
}
public void onPageFinished(WebView view, String url) {
Log.d("WebView","onPageFinished ");
view.loadUrl("javascript:window.local_obj.showSource('<head>'+" +
"document.getElementsByTagName('html')[0].innerHTML+'</head>');");
super.onPageFinished(view, url);https://www.sportsfactbook.com
}
}
final class InJavaScriptLocalObj {
@SuppressLint("JavascriptInterface")
@JavascriptInterface
public void showSource(String html) {
Log.e("123", "123");
Log.e("HTML", html);
//将网页源码输出到/data/data/files/data中
FileOutputStream out = null;
BufferedWriter writer = null;
try{
Log.e("info1","write");
out = openFileOutput("data", Context.MODE_PRIVATE);
writer = new BufferedWriter(new OutputStreamWriter(out));
writer.write(html);
}catch (IOException e){
e.printStackTrace();
Log.e("info2","notwrite");
}finally {
try {
Log.e("info","write");
if (writer != null) {
writer.close();
}
} catch (IOException e) {
Log.e("error","notwrite");
e.printStackTrace();
}
}
}
}
}
共同学习,写下你的评论
评论加载中...
作者其他优质文章