require_once 'simple_html_dom.php';
function Generate_Featured_Image( $image_url, $post_id ){
$upload_dir = wp_upload_dir();
$image_data = file_get_contents($image_url);
$filename = basename($image_url);
if(wp_mkdir_p($upload_dir['path'])){
$file = $upload_dir['path'] . '/' . $filename;
} else {
$file = $upload_dir['basedir'] . '/' . $filename;
}
file_put_contents($file, $image_data);
$wp_filetype = wp_check_filetype($filename, null );
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name($filename),
'post_content' => '',
'post_status' => 'inherit'
);
$attach_id = wp_insert_attachment( $attachment, $file, $post_id );
require_once(ABSPATH . 'wp-admin/includes/image.php');
$attach_data = wp_generate_attachment_metadata( $attach_id, $file );
$res1= wp_update_attachment_metadata( $attach_id, $attach_data );
$res2= set_post_thumbnail( $post_id, $attach_id );
}
function add_crawl_data(){
if (!is_admin() && isset($_GET['act']) && $_GET['act']=='crawl'){
$html_content = file_get_html('https://vnisinvestment.com/tin-tuc');
$list_post = $html_content->find('#tth-content .list_item .row_item .col_item');
if ( ! function_exists( 'post_exists' ) ) {
require_once( ABSPATH . 'wp-admin/includes/post.php' );
}
if (!empty($list_post)){
foreach ($list_post as $post){
$post_link = $post->find('.news_img a', 0)->href;
$thumb = $post->find('.news_img a .img');
foreach($thumb as $a0) {
$style = $a0->style;
preg_match('/\(([^)]+)\)/', $style, $match);
$src[$i++] = $match[1];
$img = str_replace( "'", "", $match[1] );
}
//Post Detail
$html_detail = file_get_html($post_link);
$title = $html_detail->find('#item_detail h1', 0)->plaintext;
$content = $html_detail->find('#item_detail .item-content', 0)->innertext;
$date = $html_detail->find('#item_detail .item-date .date', 0)->innertext;
$time = $html_detail->find('#item_detail .item-date .time', 0)->innertext;
$date1 = str_replace('/', '-', $date);
$date2 = date('Y-m-d', strtotime($date1));
$date_time = $date2 .' '. $time;
if (post_exists($title)===0){
$args = array(
'post_type' => 'post',
'post_status' => 'publish',
'post_title' => $title,
'post_content' => $content,
'post_date' => $date_time
);
$post_id = wp_insert_post($args);
if ($post_id>0){
wp_set_post_terms($post_id, 1, 'category');
Generate_Featured_Image($img, $post_id);
}
}
}
}
die();
}
}
add_action('init', 'add_crawl_data');