PHP代理实现用老人机看小说
评论 0 热度 990
为了应对当前HTML格式和js脚本对老人机的不友好,我们需要处理一下某些小说网站,让它回到十几年前的样子,剔除大量CSS和JS,精简HTML标签,这样就能够让老人机快速、友好地看小说了。
目前下面的脚本就能实现了,可以为老人机提供简单地浏览和必备的搜索功能,并且支持缓存。
简单说明
_remoteurl_
:填写链接_band_word_
:删除关键词_band_href_
:删除URL包含该值的链接_band_ele_
:把某些没有替换掉的元素删除掉,这个设置的存在是由一个正则匹配引起的,一般不要修改。
关于缓存:在源码内搜索$keeptime
即可找到相关代码。
快速配置
1. 基本配置
define处修改remoteurl和其他配置,注意看注释,别乱来。
2. 搜索配置
找到if ($_GET['search']) :
这一行,下面有
$html = c_get('https://search2.booktxt.net/s.php', 'post', 't=1&keyword='.urlencode(_iconv($_GET['search'], 'GBK')));
这样一行,修改c_get的参数即可。
第一个参数是url,第二个是方式(post/get),第三个是传输过去的请求字符串,可能要用到转码功能,具体看对面页面的编码,用_iconv
(脚本自构的函数) 转换。具体写什么需要手动抓取网站信息手动填好。
例子:
# define_remoteurl:
define('_remoteurl_', 'https://www.biqutxt.com/');
# $_GET['search']后边:
$html = c_get('https://www.biqutxt.com/modules/article/search.php', 'post', 'searchtype=articlename&action=login&searchkey='.urlencode(_iconv($_GET['search'], 'GBK')));
若出现乱码,则可能为转码问题,也可能为原网页的问题,需排查,可以把URL上面的page参数进行urldecode+base64decode即得到源URL。
搬上完整代码:
<?php
/*
author: foxnes/luuljh
*/
error_reporting(E_ALL ^ E_WARNING ^ E_NOTICE);
date_default_timezone_set("PRC");
define('_remoteurl_', 'https://m.booktxt.net/'); // 必须用 / 结尾 且 http(s)://开头
define('_band_word_', '字体:|顶点小说移动网(m\.booktxt\.net)|37小说网'); // 用 | 隔开,不能留空
define('_band_href_', 'cnzz\.com|mybook\.php');// 同上
define('_band_ele_', 'link');
$cachee = "cachepage/" . md5($_SERVER["QUERY_STRING"].'haha');
(file_exists("./cachepage/")) ?: mkdir('cachepage');
if (file_exists($cachee)) {
if (!$_GET['page'])
$keeptime = 259200; //保存3天
else
$keeptime = 31536000; //保存1年
if (time() - filectime($cachee) >= $keeptime){
unlink($cachee);
}else{
echo file_get_contents($cachee);
exit;
}
}
ob_start();
echo '<?xml version="1.0" encoding="UTF-8"?>';
?>
<!DOCTYPE html PUBLIC "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"
"http://www.openmobilealliance.org/tech/DTD/xhtml-mobile12.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title><?php echo 'WT ' . ($_GET['page'] ? base64_decode($_GET['page']) : ''); ?></title>
<meta name="viewport" content="width=device-width, minimum-scale=1.0, maximum-scale=2.0"/>
<style type="text/css">body{font-size: 13px;line-height: 19px} p{margin: 3px;} a+a{display: block}</style>
</head>
<body>
<a href="?">回到主页</a>
<br />
<form action="" method="get">
搜索小说:
<input type="text" name="search" />
<button type='submit'>搜索</button>
</form>
<?php
if ($_GET['search']) :
$html = c_get('https://search2.booktxt.net/s.php', 'post', 't=1&keyword='.urlencode(_iconv($_GET['search'], 'GBK')));
elseif ($_GET['page']):
$html = c_get(base64_decode($_GET['page']));
else:
$html = c_get(_remoteurl_);
endif;
if (!preg_match('/200 OK/i', $html[1])) {
echo '服务器可能出现了点问题噢(⊙o⊙)?';
echo "<br />返回:".str_replace(PHP_EOL, '<br />', $html[1]);
$cachee = false;
goto foot;
}
$html = _iconv($html[0]);
$html = preg_replace('/<(style|script)[^>]*?>[\s\S]*?<\/\1>/i', '', $html);
$html = preg_replace('/<(?:'._band_ele_.')[^>]*?>/i', '', $html);
$html = preg_replace('/<(?!a|\/a|p|\/p|br|li|\/li|table|\/table|td|\/td|tr|\/tr)(?:[^>]*?)>/i', '', $html);
$html = preg_replace('/(id|class|title|style|target|alt|onclick)=("|\').*?\2/i', '', $html);
$html = preg_replace('/<a[^>]*?=[^>]*?(?:javascript\:|'._band_href_.')[\s\S]*?<\/a>/i', '', $html);
$html = preg_replace('/[\n\r\s]+|( )+/i', ' ', $html);
$html = preg_replace('/'._band_word_.'/i', '', $html);
$html = preg_replace('/<p[^>]*?><\/p>|<a >.*?<\/a>|<a[^>]*?><\/a>/i', '', $html);
preg_match_all('/<a[^>]*?href=("|\')([^>]*?)\1/i', $html, $links);
$rep = [];
$rem = [];
foreach ($links[2] as $key => $value) {
if (!(strlen($links[2][$key]) > 5)) continue;
$qt = $links[1][$key];
$rep[] = $qt . $links[2][$key] . $qt;
$rem[] = $qt . "?page=".urlencode(base64_encode(rel2abs($value))) . $qt;
}
if (count($links) > 1)
$html = str_replace($rep, $rem, $html);
echo $html;
foot:
?>
<br />
<a href="?">回到主页</a>
<small>[<?php echo date("y-m-d H:i:s"); ?>]</small>
</body>
</html><?php
$html = ob_get_clean();
echo $html;
if ($cachee)
file_put_contents($cachee, $html);
function c_get($url, $method = 'get', $data = '', $referer = _remoteurl_, $timeout = 10, $useck = false, $saveck = false, $ckfile = "ck.txt") {
$headerinfo = array(
"User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headerinfo);
curl_setopt($ch, CURLOPT_TIMEOUT_MS, $timeout * 1000);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
if ($saveck == true) {
curl_setopt($ch, CURLOPT_COOKIEJAR, $ckfile);
}
if (file_exists($ckfile) && $useck == true) {
curl_setopt($ch, CURLOPT_COOKIEFILE, $ckfile);
}
if ($method == "post") {
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
}
$content = curl_exec($ch);
if (curl_errno($ch)) {
return 'Curl error: ' . curl_error($ch);
}
if ($content == false) {
return "Get content false!";
}
$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($content, 0, $headerSize);
$body = substr($content, $headerSize);
if (in_array(curl_getinfo($ch, CURLINFO_HTTP_CODE), ['301','302'])) {
preg_match("@location: (.*?)[\n\r;]@i", $header, $tmpgo);
curl_close($ch);
return c_get($tmpgo[1]);
}
curl_close($ch);
$content = array(
$body,
$header
);
return $content;
}
function rel2abs($n){
if ($_GET['page']) {
$fix = pathinfo(base64_decode(urldecode($_GET['page'])));
$fix = $fix["dirname"]."/";
}else{
$fix = _remoteurl_;
}
if (strpos($n,"#") !== false) {
$n = substr($n, 0, strpos($n,"#"));
}
if (substr($n, 0, 7) == "http://" || substr($n, 0, 8) == "https://") {
return $n;
}elseif (substr($n, 0, 2) == "//") {
return "http:".$n;
}else{
if (empty($n)) {
return false;
}
if (substr($n, 0, 1) == "/") {
return _remoteurl_.substr($n, 1);
}else{
return $fix.$n;
}
}
}
function _iconv($data, $output = 'utf-8') {
$encode_arr = array('UTF-8','ASCII','GBK','GB2312','BIG5','JIS','eucjp-win','sjis-win','EUC-JP');
$encoded = mb_detect_encoding($data, $encode_arr);
if (!is_array($data)) {
return mb_convert_encoding($data, $output, $encoded);
} else {
foreach ($data as $key=>$val) {
$key = _iconv($key, $output);
if(is_array($val)) {
$data[$key] = _iconv($val, $output);
} else {
$data[$key] = mb_convert_encoding($data, $output, $encoded);
}
}
return $data;
}
}