image cad area size
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,76 @@
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
import traceback
|
||||
|
||||
from cad_vector_area import calculate_pdf_vector_area
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
|
||||
@app.route("/health", methods=["GET"])
|
||||
def health():
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"message": "Python CAD Area service is running"
|
||||
})
|
||||
|
||||
|
||||
@app.route("/calculate", methods=["POST"])
|
||||
def calculate():
|
||||
try:
|
||||
if "file" not in request.files:
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": "No PDF file received"
|
||||
}), 400
|
||||
|
||||
uploaded_file = request.files["file"]
|
||||
|
||||
if uploaded_file.filename == "":
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": "Empty filename"
|
||||
}), 400
|
||||
|
||||
if not uploaded_file.filename.lower().endswith(".pdf"):
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": "Only PDF files are allowed"
|
||||
}), 400
|
||||
|
||||
pdf_bytes = uploaded_file.read()
|
||||
|
||||
scale_ratio = request.form.get("scale_ratio", "1")
|
||||
|
||||
try:
|
||||
scale_ratio = float(scale_ratio)
|
||||
if scale_ratio <= 0:
|
||||
scale_ratio = 1.0
|
||||
except ValueError:
|
||||
scale_ratio = 1.0
|
||||
|
||||
result = calculate_pdf_vector_area(
|
||||
pdf_bytes=pdf_bytes,
|
||||
filename=uploaded_file.filename,
|
||||
scale_ratio=scale_ratio
|
||||
)
|
||||
|
||||
status_code = 200 if result.get("success") else 422
|
||||
|
||||
return jsonify(result), status_code
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"message": str(e),
|
||||
"trace": traceback.format_exc()
|
||||
}), 500
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(
|
||||
host="127.0.0.1",
|
||||
port=5055,
|
||||
debug=True
|
||||
)
|
||||
@@ -0,0 +1,375 @@
|
||||
import fitz
|
||||
from shapely.geometry import Polygon
|
||||
from shapely.validation import make_valid
|
||||
import math
|
||||
|
||||
|
||||
POINT_TO_MM = 25.4 / 72.0
|
||||
|
||||
|
||||
def point_to_tuple(point):
|
||||
return float(point.x), float(point.y)
|
||||
|
||||
|
||||
def distance(p1, p2):
|
||||
return math.hypot(p1[0] - p2[0], p1[1] - p2[1])
|
||||
|
||||
|
||||
def rect_to_polygon(rect):
|
||||
return [
|
||||
(float(rect.x0), float(rect.y0)),
|
||||
(float(rect.x1), float(rect.y0)),
|
||||
(float(rect.x1), float(rect.y1)),
|
||||
(float(rect.x0), float(rect.y1)),
|
||||
(float(rect.x0), float(rect.y0)),
|
||||
]
|
||||
|
||||
|
||||
def cubic_bezier_points(p0, p1, p2, p3, steps=32):
|
||||
points = []
|
||||
|
||||
for i in range(1, steps + 1):
|
||||
t = i / steps
|
||||
|
||||
x = (
|
||||
(1 - t) ** 3 * p0[0]
|
||||
+ 3 * (1 - t) ** 2 * t * p1[0]
|
||||
+ 3 * (1 - t) * t ** 2 * p2[0]
|
||||
+ t ** 3 * p3[0]
|
||||
)
|
||||
|
||||
y = (
|
||||
(1 - t) ** 3 * p0[1]
|
||||
+ 3 * (1 - t) ** 2 * t * p1[1]
|
||||
+ 3 * (1 - t) * t ** 2 * p2[1]
|
||||
+ t ** 3 * p3[1]
|
||||
)
|
||||
|
||||
points.append((x, y))
|
||||
|
||||
return points
|
||||
|
||||
|
||||
def polygon_area_mm2(points, scale_ratio=1.0):
|
||||
polygon = Polygon(points)
|
||||
|
||||
if not polygon.is_valid:
|
||||
polygon = make_valid(polygon)
|
||||
|
||||
if polygon.is_empty:
|
||||
return None
|
||||
|
||||
area_points2 = abs(float(polygon.area))
|
||||
area_mm2 = area_points2 * (POINT_TO_MM ** 2)
|
||||
area_mm2 = area_mm2 / (scale_ratio ** 2)
|
||||
|
||||
return area_mm2
|
||||
|
||||
|
||||
def get_bounds_mm(points, scale_ratio=1.0):
|
||||
polygon = Polygon(points)
|
||||
bounds = polygon.bounds
|
||||
|
||||
x_min, y_min, x_max, y_max = bounds
|
||||
|
||||
width_points = x_max - x_min
|
||||
height_points = y_max - y_min
|
||||
|
||||
width_mm = width_points * POINT_TO_MM / scale_ratio
|
||||
height_mm = height_points * POINT_TO_MM / scale_ratio
|
||||
|
||||
return {
|
||||
"x_min": x_min,
|
||||
"y_min": y_min,
|
||||
"x_max": x_max,
|
||||
"y_max": y_max,
|
||||
"width_mm": width_mm,
|
||||
"height_mm": height_mm,
|
||||
}
|
||||
|
||||
|
||||
def extract_points_from_drawing(drawing):
|
||||
points = []
|
||||
source_type = "path"
|
||||
|
||||
for item in drawing.get("items", []):
|
||||
command = item[0]
|
||||
|
||||
if command == "l":
|
||||
p1 = point_to_tuple(item[1])
|
||||
p2 = point_to_tuple(item[2])
|
||||
|
||||
if not points:
|
||||
points.append(p1)
|
||||
|
||||
if distance(points[-1], p1) > 0.01:
|
||||
points.append(p1)
|
||||
|
||||
points.append(p2)
|
||||
|
||||
elif command == "re":
|
||||
rect = item[1]
|
||||
source_type = "rectangle"
|
||||
return rect_to_polygon(rect), source_type
|
||||
|
||||
elif command == "c":
|
||||
# PyMuPDF cubic item is normally:
|
||||
# ("c", start_point, control_1, control_2, end_point)
|
||||
if len(item) >= 5:
|
||||
p0 = point_to_tuple(item[1])
|
||||
p1 = point_to_tuple(item[2])
|
||||
p2 = point_to_tuple(item[3])
|
||||
p3 = point_to_tuple(item[4])
|
||||
|
||||
if not points:
|
||||
points.append(p0)
|
||||
elif distance(points[-1], p0) > 0.01:
|
||||
points.append(p0)
|
||||
|
||||
points.extend(cubic_bezier_points(p0, p1, p2, p3, steps=32))
|
||||
|
||||
return points, source_type
|
||||
|
||||
|
||||
def is_closed(points, tolerance_points=1.5):
|
||||
if len(points) < 4:
|
||||
return False
|
||||
|
||||
return distance(points[0], points[-1]) <= tolerance_points
|
||||
|
||||
|
||||
def is_simple_rectangle(points, source_type):
|
||||
if source_type == "rectangle":
|
||||
return True
|
||||
|
||||
# Most CAD frames, dimension boxes and table lines become 5-point rectangles.
|
||||
if len(points) <= 5:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def reject_reason(points, page_rect, source_type, area_mm2, scale_ratio=1.0):
|
||||
if len(points) < 6:
|
||||
return "too_few_points"
|
||||
|
||||
if not is_closed(points):
|
||||
return "not_closed"
|
||||
|
||||
if is_simple_rectangle(points, source_type):
|
||||
return "rectangle_or_box"
|
||||
|
||||
if area_mm2 is None or area_mm2 <= 0:
|
||||
return "zero_area"
|
||||
|
||||
bounds = get_bounds_mm(points, scale_ratio)
|
||||
width_mm = bounds["width_mm"]
|
||||
height_mm = bounds["height_mm"]
|
||||
|
||||
if width_mm <= 0 or height_mm <= 0:
|
||||
return "invalid_bounds"
|
||||
|
||||
# Reject thin long rectangles/lines:
|
||||
# this is exactly what was happening on Zodiac:
|
||||
# a long frame/table line was selected as area.
|
||||
min_side = min(width_mm, height_mm)
|
||||
max_side = max(width_mm, height_mm)
|
||||
|
||||
if min_side < 1.0:
|
||||
return "thin_line_or_stroke"
|
||||
|
||||
if max_side / min_side > 80:
|
||||
return "extreme_aspect_ratio"
|
||||
|
||||
# Reject page frames / title blocks.
|
||||
page_area_mm2 = (page_rect.width * POINT_TO_MM) * (page_rect.height * POINT_TO_MM)
|
||||
|
||||
if area_mm2 > page_area_mm2 * 0.05:
|
||||
return "too_large_page_element"
|
||||
|
||||
# Reject text glyphs / arrows / tiny symbols.
|
||||
if area_mm2 < 20:
|
||||
return "too_small_detail"
|
||||
|
||||
# Reasonable technical-section limits for this first version.
|
||||
# We can later make these user-configurable.
|
||||
if width_mm > 250 or height_mm > 250:
|
||||
return "too_large_for_profile"
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def candidate_score(candidate):
|
||||
"""
|
||||
Higher score = more plausible rubber/profile section.
|
||||
This does not guarantee correctness, but avoids obvious false positives.
|
||||
"""
|
||||
area = candidate["area_mm2"]
|
||||
width = candidate["width_mm"]
|
||||
height = candidate["height_mm"]
|
||||
|
||||
min_side = min(width, height)
|
||||
max_side = max(width, height)
|
||||
|
||||
aspect = max_side / min_side if min_side > 0 else 9999
|
||||
|
||||
score = 0
|
||||
|
||||
# Prefer meaningful areas.
|
||||
if area >= 50:
|
||||
score += 20
|
||||
if area >= 100:
|
||||
score += 20
|
||||
if area >= 300:
|
||||
score += 10
|
||||
|
||||
# Penalize strange aspect ratios.
|
||||
if aspect <= 10:
|
||||
score += 20
|
||||
elif aspect <= 25:
|
||||
score += 5
|
||||
else:
|
||||
score -= 20
|
||||
|
||||
# Penalize very large bounding boxes.
|
||||
if width > 120 or height > 120:
|
||||
score -= 10
|
||||
|
||||
return score
|
||||
|
||||
|
||||
def calculate_pdf_vector_area(pdf_bytes, filename="uploaded.pdf", scale_ratio=1.0):
|
||||
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
||||
|
||||
if len(doc) == 0:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "PDF has no pages"
|
||||
}
|
||||
|
||||
page = doc[0]
|
||||
drawings = page.get_drawings()
|
||||
|
||||
diagnostics = {
|
||||
"filename": filename,
|
||||
"pages": len(doc),
|
||||
"page_width_points": float(page.rect.width),
|
||||
"page_height_points": float(page.rect.height),
|
||||
"drawings_count": len(drawings),
|
||||
"scale_ratio_used": scale_ratio,
|
||||
"raw_closed_candidates_count": 0,
|
||||
"accepted_candidates_count": 0,
|
||||
"rejected_candidates_count": 0,
|
||||
}
|
||||
|
||||
if len(drawings) == 0:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "No vector drawings found. This PDF may be raster/scanned.",
|
||||
"confidence": "low",
|
||||
"diagnostics": diagnostics
|
||||
}
|
||||
|
||||
accepted_candidates = []
|
||||
rejected_candidates = []
|
||||
|
||||
for index, drawing in enumerate(drawings):
|
||||
points, source_type = extract_points_from_drawing(drawing)
|
||||
|
||||
if len(points) < 4:
|
||||
continue
|
||||
|
||||
closed = is_closed(points)
|
||||
|
||||
if closed:
|
||||
diagnostics["raw_closed_candidates_count"] += 1
|
||||
|
||||
area_mm2 = None
|
||||
|
||||
if closed:
|
||||
area_mm2 = polygon_area_mm2(points, scale_ratio=scale_ratio)
|
||||
|
||||
bounds_data = None
|
||||
|
||||
if closed and area_mm2 is not None and area_mm2 > 0:
|
||||
bounds_data = get_bounds_mm(points, scale_ratio=scale_ratio)
|
||||
|
||||
reason = reject_reason(
|
||||
points=points,
|
||||
page_rect=page.rect,
|
||||
source_type=source_type,
|
||||
area_mm2=area_mm2,
|
||||
scale_ratio=scale_ratio
|
||||
)
|
||||
|
||||
candidate = {
|
||||
"drawing_index": index,
|
||||
"source_type": source_type,
|
||||
"drawing_type": drawing.get("type"),
|
||||
"points_count": len(points),
|
||||
"area_mm2": round(area_mm2, 6),
|
||||
"area_cm2": round(area_mm2 / 100.0, 6),
|
||||
"area_m2": round(area_mm2 / 1_000_000.0, 9),
|
||||
"width_mm": round(bounds_data["width_mm"], 3),
|
||||
"height_mm": round(bounds_data["height_mm"], 3),
|
||||
"bounds_points": {
|
||||
"x_min": bounds_data["x_min"],
|
||||
"y_min": bounds_data["y_min"],
|
||||
"x_max": bounds_data["x_max"],
|
||||
"y_max": bounds_data["y_max"],
|
||||
},
|
||||
"fill": drawing.get("fill"),
|
||||
"color": drawing.get("color"),
|
||||
}
|
||||
|
||||
if reason is None:
|
||||
candidate["score"] = candidate_score(candidate)
|
||||
accepted_candidates.append(candidate)
|
||||
else:
|
||||
candidate["rejected_reason"] = reason
|
||||
|
||||
# Keep only useful rejected diagnostics, not thousands of tiny glyphs.
|
||||
if len(rejected_candidates) < 80:
|
||||
rejected_candidates.append(candidate)
|
||||
|
||||
diagnostics["accepted_candidates_count"] = len(accepted_candidates)
|
||||
diagnostics["rejected_candidates_count"] = len(rejected_candidates)
|
||||
|
||||
accepted_candidates.sort(key=lambda item: item["score"], reverse=True)
|
||||
|
||||
if not accepted_candidates:
|
||||
return {
|
||||
"success": False,
|
||||
"message": (
|
||||
"No reliable closed profile found. "
|
||||
"False positives such as rectangles, frames, dimension lines and text were rejected. "
|
||||
"This PDF probably needs stitched-contour reconstruction."
|
||||
),
|
||||
"confidence": "low",
|
||||
"diagnostics": diagnostics,
|
||||
"rejected_candidates_preview": rejected_candidates[:30]
|
||||
}
|
||||
|
||||
best = accepted_candidates[0]
|
||||
area_mm2 = best["area_mm2"]
|
||||
|
||||
# In this MVP, even accepted candidates need validation.
|
||||
# We do not want to present a wrong number as final production data.
|
||||
confidence = "needs_validation"
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": (
|
||||
"Candidate found after rejecting rectangles, frames and thin lines. "
|
||||
"Validate the selected candidate before using it as final area."
|
||||
),
|
||||
"area_mm2": round(area_mm2, 6),
|
||||
"area_cm2": round(area_mm2 / 100.0, 6),
|
||||
"area_m2": round(area_mm2 / 1_000_000.0, 9),
|
||||
"scale_detected": f"{scale_ratio}:1 manual/default",
|
||||
"confidence": confidence,
|
||||
"selected_candidate": best,
|
||||
"diagnostics": diagnostics,
|
||||
"accepted_candidates_preview": accepted_candidates[:20],
|
||||
"rejected_candidates_preview": rejected_candidates[:30]
|
||||
}
|
||||
Binary file not shown.
Reference in New Issue
Block a user